vdj_pipe
pipeline for processing DNA sequence data
sequence_store.hpp
Go to the documentation of this file.
1 
7 #ifndef SEQUENCE_STORE_HPP_
8 #define SEQUENCE_STORE_HPP_
9 #include "boost/assert.hpp"
10 #include "boost/foreach.hpp"
11 #include "boost/multi_index_container.hpp"
12 #include "boost/multi_index/hashed_index.hpp"
13 #include "boost/multi_index/ordered_index.hpp"
14 #include "boost/range.hpp"
15 #include "boost/shared_ptr.hpp"
16 
21 #include "vdj_pipe/exception.hpp"
22 #include "vdj_pipe/object_ids.hpp"
25 
26 namespace vdj_pipe{
27 
30 class Seq_store {
31 public:
33  typedef boost::shared_ptr<seq_map> seq_map_ptr;
34 
35 private:
36  typedef detail::Getter<
37  seq_map, Seq_id, Seq_entry,
38  std::string const&, &Seq_entry::sequence
39  > get_seq;
40 
41  typedef detail::Getter<
42  seq_map, Seq_id, Seq_entry,
43  unsigned, &Seq_entry::size
44  > get_size;
45 
46  typedef boost::multi_index_container<
47  Seq_id,
48  boost::multi_index::indexed_by<
49  boost::multi_index::hashed_unique<
50  boost::multi_index::tag<struct seq_tag>,
51  get_seq
52  >,
53  boost::multi_index::ordered_non_unique<
54  boost::multi_index::tag<struct size_tag>,
55  get_size
56  >
57  >
58  > seq_mi_t;
59  typedef seq_mi_t::index<seq_tag>::type seq_index;
60  typedef seq_mi_t::index<size_tag>::type size_index;
61 
62  static seq_mi_t::ctor_args_list seq_index_init(seq_map const& sm) {
63  return boost::make_tuple(
64  boost::make_tuple(
65  0,
66  get_seq(sm),
67  boost::hash<std::string>(),
68  std::equal_to<std::string>()
69  ),
70  boost::make_tuple(get_size(sm), std::less<unsigned>())
71  );
72  }
73 
74 public:
77 
78 private:
81  typedef size_index::const_iterator size_iterator;
82  typedef boost::iterator_range<size_iterator> size_range;
83 
84 public:
85  typedef seq_index::iterator iterator;
86  typedef seq_index::const_iterator const_iterator;
87 
88  Seq_store()
89  : sm_(new seq_map(Seq_id(1))),
90  si_(seq_index_init(*sm_)),
91  s2i_(Seq_id(1)),
92  i2s_(Read_id(1))
93  {}
94 
95  std::size_t size() const {return si_.size();}
96  const_iterator begin() const {return si_.begin();}
97  const_iterator end() const {return si_.end();}
98  bool empty() const {return si_.empty();}
99  Seq_entry const& operator[](const Seq_id sid) const {return (*sm_)[sid];}
100  boost::shared_ptr<seq_map> sequence_map() {return sm_;}
101 
103  size_range by_size() const {return boost::make_iterator_range(si_.get<size_tag>());}
104 
106  size_range by_size(const unsigned size) const {
107  return boost::make_iterator_range(
108  si_.get<size_tag>().equal_range(size)
109  );
110  }
111 
113  size_range by_size_range(
114  const unsigned from,
115  const unsigned to = std::numeric_limits<unsigned>::max()
116  ) const {
117  return boost::make_iterator_range(
118  si_.get<size_tag>().lower_bound(from),
119  si_.get<size_tag>().upper_bound(to)
120  );
121  }
122 
124  Seq_id insert(const Read_id rid, const boost::string_ref seq) {
125  seq_index& ind = si_.get<seq_tag>();
126  const const_iterator i = ind.find(
127  seq,
128  boost::hash<boost::string_ref>(),
130  );
131 
132  Seq_id sid;
133  if( i == ind.end() ) {
134  sid = sm_->insert(Seq_entry(seq, rid));
135  si_.insert(sid);
136  } else {
137  sid = *i;
138  }
139  s2i_.insert(sid).insert(sub_seq(rid, 0));
140  i2s_.insert(rid).insert(super_seq(sid, 0));
141  return sid;
142  }
143 
144  template<class Super_seqs> void
145  remove_subsequence(const Seq_id sid, Super_seqs const& ss) {
146  //re-map all Read_id-s to sequences in ss and back
147  BOOST_FOREACH(sub_seq const& subs, s2i_[sid]) {
148  const Read_id rid = subs.id_;
149  super_seq_set& super_ss = i2s_[rid];
150  BOOST_ASSERT(super_ss.find(sid));
151  super_ss.erase(sid);
152  BOOST_FOREACH(super_seq const& supers, ss) {
153  const unsigned pos = subs.pos_ + supers.pos_;
154  super_ss.insert(super_seq(supers.id_, pos));
155  s2i_[supers.id_].insert(sub_seq(rid, pos));
156  }
157  }
158  //remove sid
159  s2i_.erase(sid);
160  si_.get<seq_tag>().erase((*sm_)[sid].sequence());
161  sm_->erase(sid);
162  }
163 
164  super_seq_set const& maps_to(const Read_id iid) const {return i2s_[iid];}
165  sub_seq_set const& maps_from(const Seq_id sid) const {return s2i_[sid];}
166 
167 private:
168  seq_map_ptr sm_;
169  seq_mi_t si_;
170  seq_to_info s2i_;
171  info_to_seq i2s_;
172 };
173 
174 }//namespace vdj_pipe
175 #endif /* SEQUENCE_STORE_HPP_ */
value_type const * find(CompatType const &t) const
Definition: vector_set.hpp:69
size_range by_size(const unsigned size) const
Definition: sequence_store.hpp:106
Collection of unique objects stored in an ordered vector.
Definition: vector_set.hpp:18
size_range by_size() const
Definition: sequence_store.hpp:103
Main namespace of vdj_pipe library.
Definition: sequence_file.hpp:14
size_range by_size_range(const unsigned from, const unsigned to=std::numeric_limits< unsigned >::max()) const
Definition: sequence_store.hpp:113
Definition: id_map.hpp:20
Definition: sequence_position.hpp:15
Definition: string_ref.hpp:23
Extract object by its ID and apply member function.
Definition: get_by_id.hpp:25
Store sequence and related information.
Definition: sequence_store.hpp:30
Seq_id insert(const Read_id rid, const boost::string_ref seq)
insert name and sequence checking for duplicates
Definition: sequence_store.hpp:124
Definition: sequence_entry.hpp:19