vdj_pipe
pipeline for processing DNA sequence data
me_combination.hpp
Go to the documentation of this file.
1 
7 #ifndef ME_COMBINATION_HPP_
8 #define ME_COMBINATION_HPP_
9 #include "boost/assert.hpp"
10 #include "boost/foreach.hpp"
11 #include "boost/unordered_map.hpp"
12 #include "me_types.hpp"
13 #include "sequence_map_types.hpp"
15 #include "vdj_pipe/exception.hpp"
16 #include "vdj_pipe/object_ids.hpp"
17 #include "vdj_pipe/value_map.hpp"
18 
19 namespace vdj_pipe{ namespace match{
20 
24  typedef boost::unordered_map<std::string, Seq_id> seq_map;
25  typedef seq_map::const_iterator map_iter;
26  typedef std::vector<seq_map> seq_map_vector;
27  typedef std::vector<Seq_id> id_vector;
28  typedef boost::unordered_map<id_vector, std::string> name_map;
29  typedef name_map::const_iterator name_iter;
30  typedef boost::string_ref sequence;
31 
32 public:
33  struct Err : public base_exception{};
34 
36  Value_map const& vm,
37  std::string const& val_name,
38  std::vector<std::size_t> const& mev,
39  detail::string_table const& st
40  )
41  : vm_(vm),
42  val_id_(vm_.insert_new_name(val_name)),
43  mev_(mev),
44  smv_(mev_.size()),
45  nm_()
46  {
47  if( st.empty() ) BOOST_THROW_EXCEPTION(
48  Err()
49  << Err::msg_t("no combination sequences found")
50  );
51  BOOST_FOREACH(detail::string_vector const& sv, st) {
52  BOOST_ASSERT(sv.size() == mev.size() + 1);
53  std::string const& name = sv[0];
54  id_vector idv(mev.size());
55  for(std::size_t n = 0; n != mev.size(); ++n) {
56  seq_map& sm = smv_[n];
57  map_iter i = sm.emplace(sv[n + 1], Seq_id(sm.size() + 1)).first;
58  idv[n] = i->second;
59  }
60  const std::pair<name_iter,bool> p = nm_.emplace(idv, name);
61  if( ! p.second ) BOOST_THROW_EXCEPTION(
62  Err()
63  << Err::msg_t("duplicate combination")
64  << Err::str1_t(sanitize(name))
65  );
66  }
67 
68  // interval_vector starts with the whole read interval followed by
69  // the intervals produced by each of the matching elements
70  // Therefore mev_ indices should be 1-based
71  BOOST_FOREACH(std::size_t& n, mev_) ++n;
72  }
73 
75  const sequence seq,
76  interval_vector const& iv
77  ) {
78  id_vector idv(mev_.size());
79  for(std::size_t n = 0; n != mev_.size(); ++n) {
80  sequence_interval const& si = iv[mev_[n]];
81  if( ! is_valid(si) ) return sequence_interval::empty();
82  const sequence s = seq.substr(si.lower(), width(si));
83  map_iter i = smv_[n].find(
84  s,
85  boost::hash<sequence>(),
87  );
88  if( i == smv_[n].end() ) return sequence_interval::empty();
89  idv[n] = i->second;
90  }
91  name_iter i = nm_.find(idv);
92  if( i == nm_.end() ) return sequence_interval::empty();
93  vm_[val_id_] = i->second;
94  return iv[0];
95  }
96 
97 private:
99  Val_id val_id_;
100  std::vector<std::size_t> mev_;
101  seq_map_vector smv_;
102  name_map nm_;
103 };
104 
105 }//namespace match
106 }//namespace vdj_pipe
107 #endif /* ME_COMBINATION_HPP_ */
Match_combination(Value_map const &vm, std::string const &val_name, std::vector< std::size_t > const &mev, detail::string_table const &st)
Definition: me_combination.hpp:35
Definition: me_combination.hpp:33
sequence_interval operator()(const sequence seq, interval_vector const &iv)
Definition: me_combination.hpp:74
Value_map vm_
Definition: me_combination.hpp:98
boost::unordered_map< std::string, Seq_id > seq_map
Definition: me_combination.hpp:24
name_map nm_
Definition: me_combination.hpp:102
boost::string_ref sequence
Definition: me_combination.hpp:30
std::vector< std::string > string_vector
Definition: sequence_map_types.hpp:16
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
Val_id val_id_
Definition: me_combination.hpp:99
std::vector< Seq_id > id_vector
Definition: me_combination.hpp:27
std::vector< string_vector > string_table
Definition: sequence_map_types.hpp:17
seq_map::const_iterator map_iter
Definition: me_combination.hpp:25
std::vector< sequence_interval > interval_vector
Definition: me_types.hpp:34
Definition: me_combination.hpp:23
name_map::const_iterator name_iter
Definition: me_combination.hpp:29
boost::error_info< struct errinfo_str1_, std::string > str1_t
Definition: exception.hpp:25
seq_map_vector smv_
Definition: me_combination.hpp:101
Definition: string_ref.hpp:23
const std::size_t n
Definition: vector_set_test.cpp:26
std::vector< seq_map > seq_map_vector
Definition: me_combination.hpp:26
boost::numeric::interval< int, boost::numeric::interval_lib::policies< boost::numeric::interval_lib::rounded_math< int >, detail::Interval_checking_policy< int > > > sequence_interval
Definition: sequence_interval.hpp:40
Definition: exception.hpp:23
std::vector< std::size_t > mev_
Definition: me_combination.hpp:100
boost::unordered_map< id_vector, std::string > name_map
Definition: me_combination.hpp:28
boost::error_info< struct errinfo_message_, std::string > msg_t
Definition: exception.hpp:24
bool is_valid(vdj_pipe::sequence_interval const &si)
Definition: sequence_interval.hpp:62
std::string sanitize(const char c)
Definition: sanitize_string.cpp:53
Store values mapped against name strings and value IDs.
Definition: value_map.hpp:23