vdj_pipe
pipeline for processing DNA sequence data
find_interval.hpp
Go to the documentation of this file.
1 
7 #ifndef FIND_INTERVAL_HPP_
8 #define FIND_INTERVAL_HPP_
9 #include <utility>
10 #include <deque>
13 
14 namespace vdj_pipe{
15 
20 inline std::pair<std::size_t,std::size_t> longest_average_interval(
21  Qual_record::quality const& q,
22  const std::size_t min_q,
23  const std::size_t win
24 ) {
25  std::pair<std::size_t,std::size_t> p(0,0);
26  if( q.size() < win ) return p;
27  std::size_t cqs = 0; //current sum of scores
28  for( std::size_t i = 0; i != win; ++i ) cqs += q[i];
29 
30  std::size_t lo = 0, up = win;
31  bool expanding = cqs >= min_q;
32  for( ; up != q.size(); ++up ) {
33  cqs -= q[up - win];
34  cqs += q[up];
35  if( cqs < min_q ) {
36  if( expanding ) {
37  if( p.second < up - lo ) {
38  p.first = lo;
39  p.second = up - lo;
40  }
41  expanding = false;
42  }
43  } else {
44  if( ! expanding ) {
45  lo = up - win + 1;
46  expanding = true;
47  }
48  }
49  }
50  if( expanding && p.second < up - lo ) {
51  p.first = lo;
52  p.second = up - lo;
53  }
54  return p;
55 }
56 
61 inline std::pair<std::size_t,std::size_t> longest_min_interval(
62  Qual_record::quality const& q,
64 ) {
65  std::pair<std::size_t,std::size_t> p(0,0);
66  std::size_t lo = 0, up = 0;
67  bool expanding = false;
68  for( ; up != q.size(); ++up) {
69  if( q[up] < min_q ) {
70  if( expanding ) {
71  if( p.second < up - lo ) {
72  p.first = lo;
73  p.second = up - lo;
74  }
75  expanding = false;
76  }
77  } else {
78  if( ! expanding ) {
79  lo = up;
80  expanding = true;
81  }
82  }
83  }
84  if( expanding && p.second < up - lo ) {
85  p.first = lo;
86  p.second = up - lo;
87  }
88  return p;
89 }
90 
95 inline std::pair<std::size_t,std::size_t> longest_unambiguous_interval(
96  Seq_record::sequence const& seq,
97  const std::size_t max
98 ) {
99  std::pair<std::size_t,std::size_t> p(0,0);
100  std::size_t lo = 0, up = 0;
101 
102  if( max ) {
103  std::deque<std::size_t> amb_pos;
104  for( ; up != seq.size(); ++up) {
105  if( is_ambiguous(seq[up]) ) {
106  if( amb_pos.size() == max ) {
107  if( p.second < up - lo ) {
108  p.first = lo;
109  p.second = up - lo;
110  }
111  lo = amb_pos.front() + 1;
112  amb_pos.pop_front();
113  }
114  amb_pos.push_back(up);
115  }
116  }
117  } else {
118  for( ; up != seq.size(); ++up) {
119  if( is_ambiguous(seq[up]) ) {
120  if( p.second < up - lo ) {
121  p.first = lo;
122  p.second = up - lo;
123  }
124  lo = up + 1;
125  }
126  }
127  }
128 
129  if( p.second < up - lo ) {
130  p.first = lo;
131  p.second = up - lo;
132  }
133  return p;
134 }
135 
136 }//namespace vdj_pipe
137 #endif /* FIND_INTERVAL_HPP_ */
Definition: sequence_record.hpp:35
std::pair< std::size_t, std::size_t > longest_average_interval(Qual_record::quality const &q, const std::size_t min_q, const std::size_t win)
Definition: find_interval.hpp:20
bool is_ambiguous(const char c)
Definition: nucleotide_index.hpp:106
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
unsigned char value_type
Definition: sequence_record.hpp:37
std::string sequence
Definition: sequence_record.hpp:29
std::pair< std::size_t, std::size_t > longest_min_interval(Qual_record::quality const &q, const Qual_record::quality::value_type min_q)
Definition: find_interval.hpp:61
std::pair< std::size_t, std::size_t > longest_unambiguous_interval(Seq_record::sequence const &seq, const std::size_t max)
Definition: find_interval.hpp:95