7 #ifndef PARSER_FASTQ_HPP_ 8 #define PARSER_FASTQ_HPP_ 10 #include "boost/foreach.hpp" 20 enum State {Def, Seq, Qual, End};
23 typedef record::sequence sequence;
25 static const char qual_offset = -33;
30 if( fi.format() != format::Fastq ) BOOST_THROW_EXCEPTION(
32 << Err::msg_t(
"wrong file format for FASTQ parser")
33 << Err::str1_t(sanitize(fi.path()))
34 << Err::int1_t(fi.format())
42 const char offset = qual_offset
50 if( ! has_next() )
return;
54 if( ! has_next() )
return;
58 if( ! has_next() )
return;
59 Parser_line::seek_line(
'+');
60 if( ! has_next() )
return;
64 if( ! has_next() )
return;
66 if( ! has_next() )
return;
71 Parser_line::seek_line(
'@');
74 const boost::string_ref get_id() {
75 if( state_ != Def ) next_record();
76 if( ! has_next() )
return "";
78 return Parser_line::get_id(
'@');
81 const boost::string_ref get_defstr() {
82 if( state_ != Def ) next_record();
83 if( ! has_next() )
return "";
85 return Parser_line::get_defstr(
'@');
88 sequence get_sequence() {
89 if( ! has_next() )
return "";
94 if( ! has_next() )
return "";
98 if( ! has_next() )
return "";
103 return Parser_line::get_sequence(
'+');
108 get_qual(back_inserter(q));
112 template<
class InsertIter>
void get_qual(InsertIter i) {
113 if( ! has_next() )
return;
117 if( ! has_next() )
return;
121 if( ! has_next() )
return;
125 if( ! has_next() )
return;
126 Parser_line::seek_line(
'+');
127 if( ! has_next() )
return;
131 if( ! has_next() )
return;
133 getline(fis_.istream(), str_);
135 BOOST_FOREACH(
const char c, str_) {
136 if( c < '!' || c >
'~') BOOST_THROW_EXCEPTION(
138 << Err::msg_t(
"invalid quality character")
139 << Err::str1_t(sanitize(c))
140 << Err::line_t(line_num() - 1)
147 record get_record() {
151 qr.
seq_ = get_sequence();
152 get_qual(back_inserter(qr.
qual_));
154 if( qr.
seq_.size() != qr.
qual_.size() ) BOOST_THROW_EXCEPTION(
156 << Err::msg_t(
"sequence-quality size mismatch")
157 << Err::str1_t(sanitize(qr.
id_, 60))
158 << Err::int1_t(qr.
seq_.size())
159 << Err::int2_t(qr.
qual_.size())
160 << Err::line_t(line_num() - 1)
Definition: sequence_record.hpp:35
Parser for FASTQ files.
Definition: parser_fastq.hpp:19
Compression
File compression types.
Definition: file_properties.hpp:19
Definition: sequence_record.hpp:77
Definition: parser_line.hpp:26
Main namespace of vdj_pipe library.
Definition: sequence_file.hpp:14
sequence seq_
Definition: sequence_record.hpp:82
quality qual_
Definition: sequence_record.hpp:85
Basic line-based parser; use to derive other parsers.
Definition: parser_line.hpp:23