vdj_pipe
pipeline for processing DNA sequence data
me_aligned.hpp
Go to the documentation of this file.
1 
7 #ifndef ME_ALIGNED_HPP_
8 #define ME_ALIGNED_HPP_
9 #include "boost/lexical_cast.hpp"
10 #include "boost/utility/string_ref.hpp"
11 #include "me_relative_interval.hpp"
12 #include "me_types.hpp"
13 #include "sequence_map_aligned.hpp"
14 #include "sequence_map_types.hpp"
15 #include "vdj_pipe/exception.hpp"
16 #include "vdj_pipe/object_ids.hpp"
19 #include "vdj_pipe/value_map.hpp"
20 
21 namespace vdj_pipe{ namespace match{
22 
26 public:
27  struct Err : public base_exception{};
28  typedef boost::string_ref sequence;
30 
32  Value_map const& vm,
33  std::string const& match_value_name,
34  std::string const& score_value_name,
35  std::string const& identity_value_name,
36  Relative_interval const& ri,
37  detail::name_seq_vector const& sv,
38  const int min_score,
39  const unsigned min_match_len,
40  const bool track_mismatches
41  )
42  : vm_(vm),
43  name_val_id_(vm_.insert_new_name(match_value_name)),
44  score_val_id_(vm_.insert_new_name(score_value_name)),
45  identity_val_id_(vm_.insert_new_name(identity_value_name)),
46  gsm_(),
47  ri_(ri),
48  min_score_(min_score),
49  min_match_len_(min_match_len),
50  track_mismatches_(track_mismatches)
51  {
52  if( sv.empty() ) BOOST_THROW_EXCEPTION(
53  Err()
54  << Err::msg_t("no sequences provided")
55  );
56 
57  for(std::size_t n = 0; n != sv.size(); ++n) {
58  static const std::string seq = "seq_";
59  detail::match_seq const& ms = sv[n];
60  if( ms.first.empty() ) {
61  gsm_.insert(
62  seq + boost::lexical_cast<std::string>(n+1),
63  ms.second,
64  false
65  );
66  } else {
67  gsm_.insert(
68  ms.first,
69  ms.second,
70  false
71  );
72  }
73  }
74  }
75 
77  const sequence seq,
78  quality const&,
79  sequence_interval const& si
80  ) {
82  if( ! is_valid(si) ) return res;
83  const sequence_interval si1 = ri_(si, seq.size());
84  if( ! is_valid(si1) || (std::size_t)width(si1) < min_match_len_ ) return res;
85 
86  const Seq_map_aligned::Match m =
88  seq.substr(si1.lower(), width(si1)),
91  );
92 
93  if( m.score_ > 0 ) {
94  if( score_val_id_ ) vm_[score_val_id_] = (long)m.score_;
96  }
97 
98  if(
99  ( track_mismatches_ && ( (unsigned)min_score_ >= m.n_mismatches_ ) ) ||
100  ( ! track_mismatches_ && ( min_score_ <= m.score_ ) )
101  ) {
103  res = m.si_ + si1.lower();
104  }
105 
106  return res;
107  }
108 
109  void finish() {}
110 
111 private:
113  Val_id name_val_id_;
119  std::size_t min_match_len_;
121 };
122 
123 }//namespace match
124 }//namespace vdj_pipe
125 #endif /* ME_ALIGNED_HPP_ */
Seq_id id_
Definition: sequence_map_aligned.hpp:49
Match best_match(const boost::string_ref seq, const std::size_t min_match, const bool track_mismatches) const
Definition: sequence_map_aligned.hpp:102
Qual_record::quality quality
Definition: me_aligned.hpp:29
sequence_interval si_
Definition: sequence_map_aligned.hpp:53
std::string const & seq_id(const Seq_id sid) const
Definition: sequence_map_aligned.hpp:97
void finish()
Definition: me_aligned.hpp:109
bool track_mismatches_
Definition: me_aligned.hpp:120
Definition: sequence_record.hpp:35
Definition: sequence_map_aligned.hpp:39
Value_map vm_
Definition: me_aligned.hpp:112
std::size_t min_match_len_
Definition: me_aligned.hpp:119
Definition: sequence_map_aligned.hpp:30
Val_id name_val_id_
Definition: me_aligned.hpp:113
Identify DNA sequence interval by finding the best alignment.
Definition: me_aligned.hpp:25
int min_score_
Definition: me_aligned.hpp:118
unsigned n_mismatches_
Definition: sequence_map_aligned.hpp:51
Val_id score_val_id_
Definition: me_aligned.hpp:114
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
Seq_map_aligned gsm_
Definition: me_aligned.hpp:116
sequence_interval sequence_interval_invalid()
Definition: sequence_interval.hpp:44
void insert(std::string const &id, std::string seq, const bool reverse)
Definition: sequence_map_aligned.hpp:74
Match_element_aligned(Value_map const &vm, std::string const &match_value_name, std::string const &score_value_name, std::string const &identity_value_name, Relative_interval const &ri, detail::name_seq_vector const &sv, const int min_score, const unsigned min_match_len, const bool track_mismatches)
Definition: me_aligned.hpp:31
boost::string_ref sequence
Definition: me_aligned.hpp:28
const std::size_t n
Definition: vector_set_test.cpp:26
Relative_interval ri_
Definition: me_aligned.hpp:117
Definition: me_aligned.hpp:27
sequence_interval operator()(const sequence seq, quality const &, sequence_interval const &si)
Definition: me_aligned.hpp:76
boost::numeric::interval< int, boost::numeric::interval_lib::policies< boost::numeric::interval_lib::rounded_math< int >, detail::Interval_checking_policy< int > > > sequence_interval
Definition: sequence_interval.hpp:40
Definition: exception.hpp:23
boost::error_info< struct errinfo_message_, std::string > msg_t
Definition: exception.hpp:24
bool is_valid(vdj_pipe::sequence_interval const &si)
Definition: sequence_interval.hpp:62
Val_id identity_val_id_
Definition: me_aligned.hpp:115
std::pair< std::string, std::string > match_seq
Definition: sequence_map_types.hpp:14
double identity_
Definition: sequence_map_aligned.hpp:52
Store values mapped against name strings and value IDs.
Definition: value_map.hpp:23
const sequence_interval si1(0, 18)
std::vector< match_seq > name_seq_vector
Definition: sequence_map_types.hpp:15
int score_
Definition: sequence_map_aligned.hpp:50
Definition: me_relative_interval.hpp:15