vdj_pipe
pipeline for processing DNA sequence data
parser_fastq.hpp
Go to the documentation of this file.
1 
7 #ifndef PARSER_FASTQ_HPP_
8 #define PARSER_FASTQ_HPP_
9 #include <vector>
10 #include "boost/foreach.hpp"
13 
14 namespace vdj_pipe{
15 
20  enum State {Def, Seq, Qual, End};
21 public:
25  static const char qual_offset = -33;
26 
27  explicit Parser_fastq(File_input const& fi, const char offset = qual_offset)
28  : detail::Parser_line(fi), offset_(offset), state_(End)
29  {
30  if( fi.format() != format::Fastq ) BOOST_THROW_EXCEPTION(
31  Err()
32  << Err::msg_t("wrong file format for FASTQ parser")
33  << Err::str1_t(sanitize(fi.path()))
34  << Err::int1_t(fi.format())
35  );
36  next_record();
37  }
38 
39  explicit Parser_fastq(
40  std::istream& is,
42  const char offset = qual_offset
43  )
44  : detail::Parser_line(is, compr), offset_(offset), state_(End)
45  {
46  next_record();
47  }
48 
49  void next_record() {
50  if( ! has_next() ) return;
51  switch (state_) {
52  case Def:
53  skip_line();
54  if( ! has_next() ) return;
55  /* no break */
56  case Seq:
57  skip_line();
58  if( ! has_next() ) return;
59  Parser_line::seek_line('+');
60  if( ! has_next() ) return;
61  /* no break */
62  case Qual:
63  skip_line();
64  if( ! has_next() ) return;
65  skip_line();
66  if( ! has_next() ) return;
67  /* no break */
68  case End:
69  state_ = Def;
70  }
71  Parser_line::seek_line('@');
72  }
73 
74  const boost::string_ref get_id() {
75  if( state_ != Def ) next_record();
76  if( ! has_next() ) return "";
77  state_ = Seq;
78  return Parser_line::get_id('@');
79  }
80 
81  const boost::string_ref get_defstr() {
82  if( state_ != Def ) next_record();
83  if( ! has_next() ) return "";
84  state_ = Seq;
85  return Parser_line::get_defstr('@');
86  }
87 
88  sequence get_sequence() {
89  if( ! has_next() ) return "";
90  switch (state_) {
91  case Qual:
92  case End:
93  next_record();
94  if( ! has_next() ) return "";
95  /* no break */
96  case Def:
97  skip_line();
98  if( ! has_next() ) return "";
99  /* no break */
100  case Seq: break;
101  }
102  state_ = Qual;
103  return Parser_line::get_sequence('+');
104  }
105 
106  quality get_qual() {
107  quality q;
108  get_qual(back_inserter(q));
109  return q;
110  }
111 
112  template<class InsertIter> void get_qual(InsertIter i) {
113  if( ! has_next() ) return;
114  switch (state_) {
115  case End:
116  next_record();
117  if( ! has_next() ) return;
118  /* no break */
119  case Def:
120  skip_line();
121  if( ! has_next() ) return;
122  /* no break */
123  case Seq:
124  skip_line();
125  if( ! has_next() ) return;
126  Parser_line::seek_line('+');
127  if( ! has_next() ) return;
128  /* no break */
129  case Qual:
130  skip_line();
131  if( ! has_next() ) return;
132  }
133  getline(fis_.istream(), str_);
134  ++line_;
135  BOOST_FOREACH(const char c, str_) {
136  if( c < '!' || c > '~') BOOST_THROW_EXCEPTION(
137  Err()
138  << Err::msg_t("invalid quality character")
139  << Err::str1_t(sanitize(c))
140  << Err::line_t(line_num() - 1)
141  );
142  i = c + offset_;
143  }
144  state_ = End;
145  }
146 
147  record get_record() {
148  record qr;
149  set_meta(qr, '@');
150  state_ = Seq;
151  qr.seq_ = get_sequence();
152  get_qual(back_inserter(qr.qual_));
153 
154  if( qr.seq_.size() != qr.qual_.size() ) BOOST_THROW_EXCEPTION(
155  Err()
156  << Err::msg_t("sequence-quality size mismatch")
157  << Err::str1_t(sanitize(qr.id_, 60))
158  << Err::int1_t(qr.seq_.size())
159  << Err::int2_t(qr.qual_.size())
160  << Err::line_t(line_num() - 1)
161  );
162  return qr;
163  }
164 
165 private:
166  const char offset_;
168 };
169 
170 }//namespace vdj_pipe
171 #endif /* PARSER_FASTQ_HPP_ */
State
Definition: parser_fastq.hpp:20
Parser_fastq(std::istream &is, const compression::Compression compr=compression::none, const char offset=qual_offset)
Definition: parser_fastq.hpp:39
bool has_next() const
Definition: parser_line.hpp:31
record::sequence sequence
Definition: parser_fastq.hpp:23
Definition: sequence_record.hpp:35
format::Format format() const
Definition: file.hpp:75
Parser for FASTQ files.
Definition: parser_fastq.hpp:19
File_istream fis_
Definition: parser_line.hpp:103
std::string str_
Definition: parser_line.hpp:104
record::quality quality
Definition: parser_fastq.hpp:24
Seq_record::sequence sequence
Definition: sequence_record.hpp:78
Compression
File compression types.
Definition: file_properties.hpp:19
const boost::string_ref get_defstr()
Definition: parser_fastq.hpp:81
boost::error_info< struct errinfo_int1_, int > int1_t
Definition: exception.hpp:28
Definition: sequence_record.hpp:77
Definition: parser_line.hpp:26
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
File target is supposed to exist at construction time.
Definition: file.hpp:93
void get_qual(InsertIter i)
Definition: parser_fastq.hpp:112
static const char qual_offset
Definition: parser_fastq.hpp:25
Parser_line(File_input const &fi)
Definition: parser_line.hpp:34
std::string const & path() const
Definition: file.hpp:74
const boost::string_ref get_id()
Definition: parser_fastq.hpp:74
sequence seq_
Definition: sequence_record.hpp:82
boost::error_info< struct errinfo_int2_, int > int2_t
Definition: exception.hpp:29
boost::error_info< struct errinfo_str1_, std::string > str1_t
Definition: exception.hpp:25
State state_
Definition: parser_fastq.hpp:167
std::string id_
Definition: sequence_record.hpp:20
void set_meta(Seq_meta &sm, const char tag)
Definition: parser_line.hpp:70
quality get_qual()
Definition: parser_fastq.hpp:106
void next_record()
Definition: parser_fastq.hpp:49
int line_num() const
Definition: parser_line.hpp:30
quality qual_
Definition: sequence_record.hpp:85
Definition: parser_fastq.hpp:20
Definition: file_properties.hpp:43
Seq_qual_record record
Definition: parser_fastq.hpp:22
boost::error_info< struct errinfo_message_, std::string > msg_t
Definition: exception.hpp:24
Definition: parser_fastq.hpp:20
record get_record()
Definition: parser_fastq.hpp:147
std::string sanitize(const char c)
Definition: sanitize_string.cpp:53
Basic line-based parser; use to derive other parsers.
Definition: parser_line.hpp:23
std::istream & istream()
Definition: file_stream.hpp:102
boost::error_info< struct errinfo_line_n_, int > line_t
Definition: parser_line.hpp:27
Definition: parser_fastq.hpp:20
Parser_fastq(File_input const &fi, const char offset=qual_offset)
Definition: parser_fastq.hpp:27
Definition: file_properties.hpp:20
const char offset_
Definition: parser_fastq.hpp:166
sequence get_sequence()
Definition: parser_fastq.hpp:88
Definition: parser_fastq.hpp:20
int line_
Definition: parser_line.hpp:105
void skip_line()
Definition: parser_line.hpp:53