vdj_pipe
pipeline for processing DNA sequence data
me_short.hpp
Go to the documentation of this file.
1 
7 #ifndef ME_SHORT_HPP_
8 #define ME_SHORT_HPP_
9 #include "boost/foreach.hpp"
10 #include "boost/utility/string_ref.hpp"
11 
12 #include "me_relative_interval.hpp"
13 #include "me_types.hpp"
14 #include "sequence_map_short.hpp"
15 #include "sequence_map_types.hpp"
16 #include "vdj_pipe/exception.hpp"
17 #include "vdj_pipe/object_ids.hpp"
19 #include "vdj_pipe/value_map.hpp"
20 
21 namespace vdj_pipe{ namespace match{
22 
28 public:
29  struct Err : public base_exception{};
30  typedef boost::string_ref sequence;
32 
34  Value_map const& vm,
35  std::string const& match_value_name,
36  std::string const& score_value_name,
37  Relative_interval const& ri,
38  detail::name_seq_vector const& sv,
39  const int min_score,
40  const bool ignore_dups,
41  const bool require_best,
42  const bool track_mismatches
43  )
44  : vm_(vm),
45  name_val_id_(vm_.insert_new_name(match_value_name)),
46  score_val_id_(vm_.insert_new_name(score_value_name)),
47  sm_(ignore_dups),
48  ri_(ri),
50  track_mismatches ?
51  &scoring_matrix<0,-1,0,0>() :
52  &scoring_matrix<2,-2,1,0>()
53  ),
54  min_score_(track_mismatches ? - min_score : min_score),
55  require_best_(require_best)
56  {
57  if( sv.empty() ) BOOST_THROW_EXCEPTION(
58  Err()
59  << Err::msg_t("no sequences provided")
60  );
61 
62  if( ! ri_.length_defined() ) {
63  ri_ = Relative_interval(ri_.position(), sv.front().second.size());
64  }
65 
66  BOOST_FOREACH(detail::match_seq const& ms, sv) {
67  if( ms.first.empty() ) {
68  sm_.insert(ms.second, ms.second);
69  } else {
70  sm_.insert(ms.first, ms.second);
71  }
72  }
73  }
74 
76  const sequence seq,
77  quality const& qual,
78  sequence_interval const& si
79  ) {
81  if( ! is_valid(si) ) return res;
82  const sequence_interval si1 = ri_(si, seq.size());
83  if( ! is_valid(si1) || (std::size_t)width(si1) < sm_.seq_size() ) {
84  return res;
85  }
86 
87  std::size_t best_pos = 0;
89 
90  for(
91  std::size_t n = si1.lower(), max = si1.upper() - sm_.seq_size() + 1;
92  n != max;
93  ++n
94  ) {
95  const sequence s = seq.substr(n, sm_.seq_size());
96  if( sm_.find_closest(s, m, *scoring_matrix_) ) {
97  best_pos = n;
98  }
99  }
100 
101  if( m.has_score() && score_val_id_ ) {
102  vm_[score_val_id_] = (long)m.score1();
103  }
104 
106  if( name_val_id_ ) vm_[name_val_id_] = sm_.name(m.id1());
107  res.assign(best_pos, best_pos + sm_.seq_size());
108  }
109 
110  return res;
111  }
112 
113  void finish() {}
114 
115 private:
117  Val_id name_val_id_;
124 };
125 
126 }//namespace match
127 }//namespace vdj_pipe
128 #endif /* ME_SHORT_HPP_ */
std::string const & name(const Mid_id id) const
Definition: sequence_map_short.hpp:52
Find best match between DNA sequence interval and a set of short sequences without gaps...
Definition: me_short.hpp:27
bool length_defined() const
Definition: me_relative_interval.hpp:42
Value_map vm_
Definition: me_short.hpp:116
std::size_t seq_size() const
Definition: sequence_map_short.hpp:53
Definition: best_match_pair.hpp:16
Definition: sequence_record.hpp:35
sequence_interval operator()(const sequence seq, quality const &qual, sequence_interval const &si)
Definition: me_short.hpp:75
scoring_matrix_t const & scoring_matrix()
Definition: nucleotide_index.hpp:307
Relative_position const & position() const
Definition: me_relative_interval.hpp:43
Definition: find_shared.hpp:22
bool require_best_
Definition: me_short.hpp:123
boost::string_ref sequence
Definition: me_short.hpp:30
Relative_interval ri_
Definition: me_short.hpp:120
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
Qual_record::quality qual
Definition: match_element_run.cpp:26
scoring_matrix_t const * scoring_matrix_
Definition: me_short.hpp:121
sequence_interval sequence_interval_invalid()
Definition: sequence_interval.hpp:44
Mid_id insert(std::string name, std::string const &seq)
Definition: sequence_map_short.hpp:80
Match_element_short(Value_map const &vm, std::string const &match_value_name, std::string const &score_value_name, Relative_interval const &ri, detail::name_seq_vector const &sv, const int min_score, const bool ignore_dups, const bool require_best, const bool track_mismatches)
Definition: me_short.hpp:33
int min_score_
Definition: me_short.hpp:122
const std::size_t n
Definition: vector_set_test.cpp:26
bool is_acceptable(const score_type min_score, const bool require_best) const
Definition: best_match_pair.hpp:35
id_type const & id1() const
Definition: best_match_pair.hpp:29
Val_id name_val_id_
Definition: me_short.hpp:117
boost::numeric::interval< int, boost::numeric::interval_lib::policies< boost::numeric::interval_lib::rounded_math< int >, detail::Interval_checking_policy< int > > > sequence_interval
Definition: sequence_interval.hpp:40
Definition: exception.hpp:23
void finish()
Definition: me_short.hpp:113
boost::error_info< struct errinfo_message_, std::string > msg_t
Definition: exception.hpp:24
bool is_valid(vdj_pipe::sequence_interval const &si)
Definition: sequence_interval.hpp:62
score_type score1() const
Definition: best_match_pair.hpp:32
Seq_map_short sm_
Definition: me_short.hpp:119
std::pair< std::string, std::string > match_seq
Definition: sequence_map_types.hpp:14
Store values mapped against name strings and value IDs.
Definition: value_map.hpp:23
Definition: sequence_map_short.hpp:25
Qual_record::quality quality
Definition: me_short.hpp:31
Val_id score_val_id_
Definition: me_short.hpp:118
const sequence_interval si1(0, 18)
std::vector< match_seq > name_seq_vector
Definition: sequence_map_types.hpp:15
bool find_closest(const Seq s, match_type &m, scoring_matrix_t const &sm) const
Definition: sequence_map_short.hpp:63
Definition: me_relative_interval.hpp:15
bool has_score() const
Definition: best_match_pair.hpp:31