vdj_pipe
pipeline for processing DNA sequence data
filter_step.hpp
Go to the documentation of this file.
1 
7 #ifndef FILTER_STEP_HPP_
8 #define FILTER_STEP_HPP_
9 #include <iosfwd>
10 #include <string>
11 #include <vector>
12 #include "boost/property_tree/ptree_fwd.hpp"
15 
16 namespace vdj_pipe{
17 
21  struct kwds;
22 public:
23  VDJ_PIPE_STATIC_STRING_METHOD(name, "character_filter")
24  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
25  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
27  comment,
28  "discard reads that have nucleotides other than specified"
29  )
30 
32  Vm_access_single const& vma,
35  );
36 
37  void run();
38  void finish() {}
39 
40 private:
41  std::vector<char> ch_;
42 };
43 
47  struct kwds;
48 public:
49  VDJ_PIPE_STATIC_STRING_METHOD(name, "length_filter")
50  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
51  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
53  comment,
54  "discard reads with length outside of [min, max] rage"
55  )
56 
58  Vm_access_single const& vma,
61  );
62 
63  void run();
64  void finish() {}
65 
66 private:
67  unsigned min_;
68  unsigned max_;
69  bool trim_;
70 };
71 
75  struct kwds;
76 public:
77  VDJ_PIPE_STATIC_STRING_METHOD(name, "homopolymer_filter")
78  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
79  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
81  comment,
82  "discard reads that contain a homopolymer longer than minimal length"
83  )
84 
86  Vm_access_single const& vma,
89  );
90 
91  void run();
92  void finish() {}
93 
94 private:
95  unsigned min_;
96 };
97 
101  struct kwds;
103 public:
104  VDJ_PIPE_STATIC_STRING_METHOD(name, "min_quality_filter")
105  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
106  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
108  comment,
109  "discard reads that contain a quality score lower than minimal"
110  )
111 
113  Vm_access_single const& vma,
114  boost::property_tree::ptree const& pt,
116  );
117 
118  void run();
119  void finish() {}
120 
121 private:
122  value_type min_;
123 };
124 
128  struct kwds;
129 public:
130  VDJ_PIPE_STATIC_STRING_METHOD(name, "average_quality_filter")
131  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
132  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
134  comment,
135  "discard reads with average quality score lower than minimal"
136  )
137 
139  Vm_access_single const& vma,
142  );
143 
144  void run();
145  void finish() {}
146 
147 private:
148  double min_;
149 };
150 
154  struct kwds;
155 public:
156  VDJ_PIPE_STATIC_STRING_METHOD(name, "min_quality_window_filter")
157  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
158  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
160  comment,
161  "find longest interval with each quality score greater than "
162  "minimal; discard reads where the interval found is shorter "
163  "than minimal"
164  )
165 
167  Vm_access_single const& vma,
170  );
171 
172  void run();
173  void finish() {}
174 
175 private:
177  std::size_t min_len_;
178 };
179 
183  struct kwds;
184 public:
185  VDJ_PIPE_STATIC_STRING_METHOD(name, "average_quality_window_filter")
186  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
187  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
189  comment,
190  "find longest interval with average quality score greater than "
191  "minimal; discard reads where the interval found is shorter "
192  "than minimal"
193  )
194 
196  Vm_access_single const& vma,
199  );
200 
201  void run();
202  void finish() {}
203 
204 private:
206  std::size_t win_len_;
207 
209  std::size_t min_qual_l_;
210 
212  std::size_t min_len_;
213 };
214 
218  struct kwds;
219 public:
220  VDJ_PIPE_STATIC_STRING_METHOD(name, "ambiguous_window_filter")
221  VDJ_PIPE_STATIC_STRING_METHOD(category, "filter")
222 
224  comment,
225  "Truncate reads to bring number of ambiguous bases below maximum"
226  )
227 
228  VDJ_PIPE_STATIC_STRING_METHOD(description,
229  "Find read interval at least min_len nucleotides long that has "
230  "at most max_ambiguous ambiguous nucleotides. "
231  "Discard reads where such interval cannot be found. "
232  "If min_len is zero, require that the whole read contains at most "
233  "max_ambiguous ambiguous nucleotides."
234  )
235 
237  Vm_access_single const& vma,
238  boost::property_tree::ptree const& pt,
240  );
241 
242  void run();
243  void finish() {}
244 
245 private:
246  std::size_t min_len_;
247  std::size_t max_ambiguous_;
248 };
249 
250 }//namespace vdj_pipe
251 #endif /* FILTER_STEP_HPP_ */
VDJ_PIPE_STATIC_STRING_METHOD(comment,"discard reads that have nucleotides other than specified") Character_filter(Vm_access_single const &vma
void finish()
Definition: filter_step.hpp:38
Definition: filter_step.hpp:217
Definition: sanitize_string.cpp:15
Definition: filter_base.hpp:17
std::size_t min_len_
Definition: filter_step.hpp:212
std::size_t min_qual_l_
Definition: filter_step.hpp:209
void finish()
Definition: filter_step.hpp:145
std::size_t win_len_
Definition: filter_step.hpp:206
double min_
Definition: filter_step.hpp:148
value_type min_
Definition: filter_step.hpp:122
Definition: filter_step.hpp:127
Definition: value_map_access_single.hpp:16
Definition: filter_step.hpp:182
unsigned min_
Definition: filter_step.hpp:95
Qual_record::quality::value_type min_qual_
Definition: filter_step.hpp:176
Qual_record::quality::value_type value_type
Definition: filter_step.hpp:101
void finish()
Definition: filter_step.hpp:92
Definition: filter_base.hpp:51
void finish()
Definition: filter_step.hpp:202
Definition: filter_step.hpp:100
std::vector< char > ch_
Definition: filter_step.hpp:41
void finish()
Definition: filter_step.hpp:64
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
Definition: filter_step.hpp:20
Definition: filter_step.hpp:153
std::size_t min_len_
Definition: filter_step.hpp:246
Definition: pipe_environment.hpp:26
Definition: filter_step.hpp:74
unsigned char value_type
Definition: sequence_record.hpp:37
boost::property_tree::ptree const & pt
Definition: filter_step.hpp:33
void finish()
Definition: filter_step.hpp:173
Definition: filter_step.hpp:46
std::size_t max_ambiguous_
Definition: filter_step.hpp:247
unsigned max_
Definition: filter_step.hpp:68
unsigned min_
Definition: filter_step.hpp:67
bool trim_
Definition: filter_step.hpp:69
void run()
Definition: filter_step.cpp:71
bpt::ptree ptree
Definition: processing_step_utils.hpp:19
std::size_t min_len_
Definition: filter_step.hpp:177
boost::property_tree::ptree const Pipe_environment & pe
Definition: filter_step.hpp:33