vdj_pipe
pipeline for processing DNA sequence data
sequence_file_map.hpp
Go to the documentation of this file.
1 
7 #ifndef SEQUENCE_FILE_MAP_HPP_
8 #define SEQUENCE_FILE_MAP_HPP_
9 #include <utility>
10 #include "boost/assert.hpp"
11 #include "boost/multi_index_container.hpp"
12 #include "boost/multi_index/mem_fun.hpp"
13 #include "boost/multi_index/member.hpp"
14 #include "boost/multi_index/ordered_index.hpp"
16 #include "vdj_pipe/object_ids.hpp"
18 #include "vdj_pipe/exception.hpp"
19 
20 namespace vdj_pipe{
21 
24 class Seq_file_map {
25  typedef boost::multi_index_container<
26  Seq_file,
27  boost::multi_index::indexed_by<
28  boost::multi_index::ordered_unique<
29  boost::multi_index::tag<struct id_tag>,
30  boost::multi_index::const_mem_fun<
31  Seq_file, Path_id, &Seq_file::id
32  >
33  >,
34  boost::multi_index::ordered_unique<
35  boost::multi_index::tag<struct path_tag>,
36  boost::multi_index::const_mem_fun<
37  File, std::string const&, &Seq_file::path
38  >
39  >
40  >
41  > file_map;
42 
43  typedef file_map::index<id_tag>::type id_index;
44  typedef id_index::const_iterator id_iterator;
45  typedef file_map::index<path_tag>::type path_index;
46  typedef path_index::const_iterator path_iterator;
47 
48  //do not use std::pair - it causes problems with MSVC
49  struct Pair {
50  Pair(const Path_id one, const Path_id two): one_(one), two_(two) {}
51  Path_id one_, two_;
52  };
53 
54  typedef boost::multi_index_container<
55  Pair,
56  boost::multi_index::indexed_by<
57  boost::multi_index::ordered_unique<
58  boost::multi_index::member<Pair, Path_id, &Pair::one_>
59  >,
60  boost::multi_index::ordered_unique<
61  boost::multi_index::member<Pair, Path_id, &Pair::two_>
62  >
63  >
64  > pair_map;
65  typedef pair_map::nth_index<0>::type index1;
66  typedef index1::const_iterator citer1;
67  typedef pair_map::nth_index<1>::type index2;
68  typedef index2::const_iterator citer2;
69 
70 public:
71  typedef Seq_file value_type;
72  typedef file_map::iterator iterator;
73  typedef file_map::const_iterator const_iterator;
74  struct Err : public base_exception {};
75 
76  Seq_file_map() : i_(Path_id(1)) {}
77  std::size_t size() const { return map_.size(); }
78  const_iterator begin() const {return map_.begin();}
79  const_iterator end() const {return map_.end();}
80  bool empty() const {return map_.empty();}
81 
82  Seq_file const& operator[](const Path_id id) const {
83  id_index const& ind = map_.get<id_tag>();
84  const id_iterator i = ind.find(id);
85  BOOST_ASSERT(i != ind.end());
86  return *i;
87  }
88 
89  Seq_file const& at(const Path_id id) const {
90  id_index const& ind = map_.get<id_tag>();
91  const id_iterator i = ind.find(id);
92  if(i == ind.end()) BOOST_THROW_EXCEPTION(
93  Err()
94  << Err::msg_t("invalid path ID")
95  << Err::int1_t(id())
96  );
97  return *i;
98  }
99 
100  Seq_file const* find(std::string const& path) const {
101  path_index const& ind = map_.get<path_tag>();
102  const path_iterator i = ind.find(path);
103  if( i == ind.end() ) return 0;
104  return &(*i);
105  }
106 
107  void erase(const Path_id id) {map_.erase(id);}
108  void clear() {map_.clear();}
109 
110  std::pair<Path_id, bool> insert(Seq_file const& file) {
111  typedef std::pair<Path_id,bool> pair;
112  if( Seq_file const* sfp = find(file.path()) ) return pair(sfp->id(), false);
113  const Path_id id = *i_++;
114  Seq_file sf = file;
115  sf.pid_ = id;
116  map_.emplace(sf);
117  return pair(id, true);
118  }
119 
120  void set_seq_qual(const Path_id seq_id, const Path_id qual_id) {
121  if( (!seq_id) || (!qual_id) || seq_id == qual_id ) BOOST_THROW_EXCEPTION(
122  Err()
123  << Err::msg_t("files should be valid and distinct")
124  );
125 
126  if( (*this)[seq_id].format() != format::Fasta ) BOOST_THROW_EXCEPTION(
127  Err()
128  << Err::msg_t("sequence file should be in FASTA format")
129  );
130 
131  if( (*this)[qual_id].format() != format::Qual ) BOOST_THROW_EXCEPTION(
132  Err()
133  << Err::msg_t("quality file should be in QUAL format")
134  );
135 
136  sqm_.insert(Pair(seq_id, qual_id));
137  }
138 
139  void set_paired(const Path_id frw_id, const Path_id rev_id) {
140  if( (!frw_id) || (!rev_id) || frw_id == rev_id ) BOOST_THROW_EXCEPTION(
141  Err()
142  << Err::msg_t("files should be valid and distinct")
143  );
144 
145  if( (*this)[frw_id].format() != (*this)[rev_id].format() ) BOOST_THROW_EXCEPTION(
146  Err()
147  << Err::msg_t("paired reads files should be in same format")
148  );
149 
150  if(
151  (*this)[frw_id].format() != format::Fasta &&
152  (*this)[frw_id].format() != format::Fastq
153  ) BOOST_THROW_EXCEPTION(
154  Err()
155  << Err::msg_t("paired reads files should be in FASTA or FASTQ formats")
156  );
157 
158  frm_.insert(Pair(frw_id, rev_id));
159  }
160 
161  void set_mid(const Path_id seq_id, const Path_id mid_id) {
162  if( (!seq_id) || (!mid_id) || seq_id == mid_id ) BOOST_THROW_EXCEPTION(
163  Err()
164  << Err::msg_t("files should be valid and distinct")
165  );
166 
167  if( (*this)[seq_id].format() != (*this)[mid_id].format() ) BOOST_THROW_EXCEPTION(
168  Err()
169  << Err::msg_t("sequence and eMID files should be in same format")
170  );
171 
172  if(
173  (*this)[seq_id].format() != format::Fasta &&
174  (*this)[seq_id].format() != format::Fastq
175  ) BOOST_THROW_EXCEPTION(
176  Err()
177  << Err::msg_t("sequence files should be in FASTA or FASTQ formats")
178  );
179 
180  smm_.insert(Pair(seq_id, mid_id));
181  }
182 
183  Path_id qual2seq(const Path_id qual_id) const {return get<1>(qual_id, sqm_);}
184  Path_id seq2qual(const Path_id seq_id) const {return get<0>(seq_id, sqm_);}
185  Path_id rev2frw(const Path_id rev_id) const {return get<1>(rev_id, frm_);}
186  Path_id frw2rev(const Path_id frw_id) const {return get<0>(frw_id, frm_);}
187  Path_id mid2seq(const Path_id mid_id) const {return get<1>(mid_id, smm_);}
188  Path_id seq2mid(const Path_id seq_id) const {return get<0>(seq_id, smm_);}
189 
190 private:
192  file_map map_;
193  pair_map sqm_;
194  pair_map frm_;
195  pair_map smm_;
197  template<int N> static
198  Path_id get(const Path_id pid, pair_map const& pm) {
199  typedef typename pair_map::nth_index<N>::type index;
200  typedef typename index::const_iterator iter;
201  index const& ind = pm.get<N>();
202  iter i = ind.find(pid);
203  if( i == ind.end() ) return Path_id();
204  return N ? i->one_ : i->two_;
205  }
206 };
207 
208 }//namespace vdj_pipe
209 #endif /* SEQUENCE_FILE_MAP_HPP_ */
Definition: file.hpp:62
Main namespace of vdj_pipe library.
Definition: sequence_file.hpp:14
Definition: sequence_file_map.hpp:24
Definition: sequence_file_map.hpp:74
Definition: sequence_file.hpp:19
Definition: exception.hpp:23