vdj_pipe
pipeline for processing DNA sequence data
sequence_store.hpp
Go to the documentation of this file.
1 
7 #ifndef SEQUENCE_STORE_HPP_
8 #define SEQUENCE_STORE_HPP_
9 #include "boost/assert.hpp"
10 #include "boost/foreach.hpp"
11 #include "boost/multi_index_container.hpp"
12 #include "boost/multi_index/hashed_index.hpp"
13 #include "boost/multi_index/ordered_index.hpp"
14 #include "boost/range.hpp"
15 #include "boost/shared_ptr.hpp"
16 
21 #include "vdj_pipe/exception.hpp"
22 #include "vdj_pipe/object_ids.hpp"
25 
26 namespace vdj_pipe{
27 
30 class Seq_store {
31 public:
33  typedef boost::shared_ptr<seq_map> seq_map_ptr;
34 
35 private:
36  typedef detail::Getter<
37  seq_map, Seq_id, Seq_entry,
38  std::string const&, &Seq_entry::sequence
40 
41  typedef detail::Getter<
42  seq_map, Seq_id, Seq_entry,
43  unsigned, &Seq_entry::size
45 
46  typedef boost::multi_index_container<
47  Seq_id,
48  boost::multi_index::indexed_by<
49  boost::multi_index::hashed_unique<
50  boost::multi_index::tag<struct seq_tag>,
51  get_seq
52  >,
53  boost::multi_index::ordered_non_unique<
54  boost::multi_index::tag<struct size_tag>,
55  get_size
56  >
57  >
59  typedef seq_mi_t::index<seq_tag>::type seq_index;
60  typedef seq_mi_t::index<size_tag>::type size_index;
61 
62  static seq_mi_t::ctor_args_list seq_index_init(seq_map const& sm) {
63  return boost::make_tuple(
64  boost::make_tuple(
65  0,
66  get_seq(sm),
67  boost::hash<std::string>(),
68  std::equal_to<std::string>()
69  ),
70  boost::make_tuple(get_size(sm), std::less<unsigned>())
71  );
72  }
73 
74 public:
77 
78 private:
81  typedef size_index::const_iterator size_iterator;
82  typedef boost::iterator_range<size_iterator> size_range;
83 
84 public:
85  typedef seq_index::iterator iterator;
86  typedef seq_index::const_iterator const_iterator;
87 
89  : sm_(new seq_map(Seq_id(1))),
91  s2i_(Seq_id(1)),
92  i2s_(Read_id(1))
93  {}
94 
95  std::size_t size() const {return si_.size();}
96  const_iterator begin() const {return si_.begin();}
97  const_iterator end() const {return si_.end();}
98  bool empty() const {return si_.empty();}
99  Seq_entry const& operator[](const Seq_id sid) const {return (*sm_)[sid];}
100  boost::shared_ptr<seq_map> sequence_map() {return sm_;}
101 
103  size_range by_size() const {return boost::make_iterator_range(si_.get<size_tag>());}
104 
106  size_range by_size(const unsigned size) const {
107  return boost::make_iterator_range(
108  si_.get<size_tag>().equal_range(size)
109  );
110  }
111 
113  size_range by_size_range(
114  const unsigned from,
115  const unsigned to = std::numeric_limits<unsigned>::max()
116  ) const {
117  return boost::make_iterator_range(
118  si_.get<size_tag>().lower_bound(from),
119  si_.get<size_tag>().upper_bound(to)
120  );
121  }
122 
124  Seq_id insert(const Read_id rid, const boost::string_ref seq) {
125  seq_index& ind = si_.get<seq_tag>();
126  const const_iterator i = ind.find(
127  seq,
128  boost::hash<boost::string_ref>(),
130  );
131 
132  Seq_id sid;
133  if( i == ind.end() ) {
134  sid = sm_->insert(Seq_entry(seq, rid));
135  si_.insert(sid);
136  } else {
137  sid = *i;
138  }
139  s2i_.insert(sid).insert(sub_seq(rid, 0));
140  i2s_.insert(rid).insert(super_seq(sid, 0));
141  return sid;
142  }
143 
144  template<class Super_seqs> void
145  remove_subsequence(const Seq_id sid, Super_seqs const& ss) {
146  //re-map all Read_id-s to sequences in ss and back
147  BOOST_FOREACH(sub_seq const& subs, s2i_[sid]) {
148  const Read_id rid = subs.id_;
149  super_seq_set& super_ss = i2s_[rid];
150  BOOST_ASSERT(super_ss.find(sid));
151  super_ss.erase(sid);
152  BOOST_FOREACH(super_seq const& supers, ss) {
153  const unsigned pos = subs.pos_ + supers.pos_;
154  super_ss.insert(super_seq(supers.id_, pos));
155  s2i_[supers.id_].insert(sub_seq(rid, pos));
156  }
157  }
158  //remove sid
159  s2i_.erase(sid);
160  si_.get<seq_tag>().erase((*sm_)[sid].sequence());
161  sm_->erase(sid);
162  }
163 
164  super_seq_set const& maps_to(const Read_id iid) const {return i2s_[iid];}
165  sub_seq_set const& maps_from(const Seq_id sid) const {return s2i_[sid];}
166 
167 private:
168  seq_map_ptr sm_;
170  seq_to_info s2i_;
171  info_to_seq i2s_;
172 };
173 
174 }//namespace vdj_pipe
175 #endif /* SEQUENCE_STORE_HPP_ */
seq_to_info s2i_
Definition: sequence_store.hpp:170
boost::iterator_range< size_iterator > size_range
Definition: sequence_store.hpp:82
detail::Vector_set< super_seq > super_seq_set
Definition: sequence_store.hpp:76
super_seq_set const & maps_to(const Read_id iid) const
Definition: sequence_store.hpp:164
Id id_
Definition: sequence_position.hpp:20
unsigned size() const
Definition: sequence_entry.hpp:29
bool empty() const
Definition: sequence_store.hpp:98
value_type const * find(CompatType const &t) const
Definition: vector_set.hpp:69
unsigned pos_
Definition: sequence_position.hpp:21
size_range by_size(const unsigned size) const
Definition: sequence_store.hpp:106
Collection of unique objects stored in an ordered vector.
Definition: vector_set.hpp:18
size_range by_size() const
Definition: sequence_store.hpp:103
boost::multi_index_container< Seq_id, boost::multi_index::indexed_by< boost::multi_index::hashed_unique< boost::multi_index::tag< struct seq_tag >, get_seq >, boost::multi_index::ordered_non_unique< boost::multi_index::tag< struct size_tag >, get_size > > > seq_mi_t
Definition: sequence_store.hpp:58
detail::Vector_set< sub_seq > sub_seq_set
Definition: sequence_store.hpp:75
Seq_pos< Seq_id > super_seq
Definition: sequence_position.hpp:25
Seq_pos< Read_id > sub_seq
Definition: sequence_position.hpp:24
seq_index::const_iterator const_iterator
Definition: sequence_store.hpp:86
seq_mi_t si_
Definition: sequence_store.hpp:169
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
static seq_mi_t::ctor_args_list seq_index_init(seq_map const &sm)
Definition: sequence_store.hpp:62
void insert(Iter i1, Iter i2)
Definition: vector_set.hpp:40
seq_index::iterator iterator
Definition: sequence_store.hpp:85
std::size_t size() const
Definition: sequence_store.hpp:95
size_range by_size_range(const unsigned from, const unsigned to=std::numeric_limits< unsigned >::max()) const
Definition: sequence_store.hpp:113
detail::Id_map< Seq_id, sub_seq_set > seq_to_info
Definition: sequence_store.hpp:79
Definition: id_map.hpp:20
std::string const & sequence() const
Definition: sequence_entry.hpp:30
const_iterator end() const
Definition: sequence_store.hpp:97
boost::shared_ptr< seq_map > sequence_map()
Definition: sequence_store.hpp:100
seq_map_ptr sm_
Definition: sequence_store.hpp:168
detail::Getter< seq_map, Seq_id, Seq_entry, unsigned,&Seq_entry::size > get_size
Definition: sequence_store.hpp:44
Seq_store()
Definition: sequence_store.hpp:88
seq_mi_t::index< seq_tag >::type seq_index
Definition: sequence_store.hpp:59
Seq_entry const & operator[](const Seq_id sid) const
Definition: sequence_store.hpp:99
seq_mi_t::index< size_tag >::type size_index
Definition: sequence_store.hpp:60
Definition: sequence_position.hpp:15
Definition: string_ref.hpp:23
detail::Id_map< Read_id, super_seq_set > info_to_seq
Definition: sequence_store.hpp:80
detail::Id_map< Seq_id, Seq_entry > seq_map
Definition: sequence_store.hpp:32
boost::shared_ptr< seq_map > seq_map_ptr
Definition: sequence_store.hpp:33
void remove_subsequence(const Seq_id sid, Super_seqs const &ss)
Definition: sequence_store.hpp:145
void erase(const id_type id)
Definition: id_map.hpp:109
size_index::const_iterator size_iterator
Definition: sequence_store.hpp:81
sub_seq_set const & maps_from(const Seq_id sid) const
Definition: sequence_store.hpp:165
Extract object by its ID and apply member function.
Definition: get_by_id.hpp:25
info_to_seq i2s_
Definition: sequence_store.hpp:171
id_type insert(value_type const &obj)
Definition: id_map.hpp:83
Store sequence and related information.
Definition: sequence_store.hpp:30
Seq_id insert(const Read_id rid, const boost::string_ref seq)
insert name and sequence checking for duplicates
Definition: sequence_store.hpp:124
const_iterator begin() const
Definition: sequence_store.hpp:96
detail::Getter< seq_map, Seq_id, Seq_entry, std::string const &,&Seq_entry::sequence > get_seq
Definition: sequence_store.hpp:39
unsigned erase(CompatType const &t)
Definition: vector_set.hpp:58
Definition: sequence_entry.hpp:19