vdj_pipe
pipeline for processing DNA sequence data
parser_line.hpp
Go to the documentation of this file.
1 
7 #ifndef PARSER_LINE_HPP_
8 #define PARSER_LINE_HPP_
9 #include <istream>
10 #include <limits>
11 #include <cctype>
12 #include "boost/assert.hpp"
13 #include "boost/foreach.hpp"
15 #include "vdj_pipe/exception.hpp"
16 #include "vdj_pipe/file_stream.hpp"
18 
19 namespace vdj_pipe{ namespace detail{
20 
23 class Parser_line {
24 
25 public:
26  struct Err : public base_exception {
27  typedef boost::error_info<struct errinfo_line_n_, int> line_t;
28  };
29 
30  int line_num() const {return line_;}
31  bool has_next() const {return fis_.good();}
32 
33 protected:
34  explicit Parser_line(File_input const& fi)
35  : fis_(fi), str_(), line_(1)
36  {
37  if( ! fis_.good() ) BOOST_THROW_EXCEPTION(
38  Err()
39  << Err::msg_t("error reading")
40  << Err::str1_t(sanitize(fi.path()))
41  );
42  }
43 
44  explicit Parser_line(std::istream& is, const compression::Compression compr)
45  : fis_(is, compr), str_(), line_(1)
46  {
47  if( ! fis_.good() ) BOOST_THROW_EXCEPTION(
48  Err()
49  << Err::msg_t("error reading")
50  );
51  }
52 
53  void skip_line() {
54  fis_.istream().ignore(std::numeric_limits<std::streamsize>::max(), '\n');
55  ++line_;
56  }
57 
59  void seek_line(const char tag) {
60  while(fis_.istream() && fis_.istream().peek() != tag) skip_line();
61  }
62 
63  const boost::string_ref get_id(const char tag) {
64  get_defstr(tag);
65  std::size_t n = 0;
66  for(; n != str_.length() && str_[n] != ' ' && str_[n] != '\t'; ++n){}
67  return boost::string_ref(str_.data(), n);
68  }
69 
70  void set_meta(Seq_meta& sm, const char tag) {
71  get_defstr(tag);
72  std::size_t n = 0;
73  for(; n != str_.size() && str_[n] != ' ' && str_[n] != '\t'; ++n){}
74  sm.id_ = str_.substr(0, n);
75  for(; n != str_.size() && (str_[n] == ' ' || str_[n] == '\t'); ++n){}
76  sm.comm_ = str_.substr(n);
77  }
78 
79  const boost::string_ref get_defstr(const char tag) {
80  if( ! fis_.istream() || fis_.istream().peek() != tag ) BOOST_THROW_EXCEPTION(
81  Err()
82  << Err::msg_t("parsing error")
83  << Err::line_t(line_)
84  );
85  fis_.istream().get();
86  getline(fis_.istream(), str_);
87  ++line_;
88  return str_;
89  }
90 
91  std::string get_sequence(const char tag) {
92  std::string seq;
93  while( fis_.istream().peek() != tag && getline(fis_.istream(), str_) ) {
94  ++line_;
95  BOOST_FOREACH(const char c, str_) {
96  if( std::isalpha(c) ) seq.push_back(c);
97  }
98  }
99  return seq;
100  }
101 
102 protected:
104  std::string str_;
105  int line_;
106 
107  void check() const {
108  if( ! fis_.good() ) BOOST_THROW_EXCEPTION(
109  Err()
110  << Err::msg_t("error reading")
111  );
112  }
113 };
114 
115 }//namespace detail
116 }//namespace vdj_pipe
117 #endif /* PARSER_LINE_HPP_ */
bool has_next() const
Definition: parser_line.hpp:31
Definition: file_stream.hpp:82
void seek_line(const char tag)
Definition: parser_line.hpp:59
File_istream fis_
Definition: parser_line.hpp:103
std::string str_
Definition: parser_line.hpp:104
Compression
File compression types.
Definition: file_properties.hpp:19
Definition: sequence_record.hpp:18
Definition: parser_line.hpp:26
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
File target is supposed to exist at construction time.
Definition: file.hpp:93
Parser_line(File_input const &fi)
Definition: parser_line.hpp:34
std::string const & path() const
Definition: file.hpp:74
boost::error_info< struct errinfo_str1_, std::string > str1_t
Definition: exception.hpp:25
std::string id_
Definition: sequence_record.hpp:20
void set_meta(Seq_meta &sm, const char tag)
Definition: parser_line.hpp:70
bool good() const
Definition: file_stream.hpp:103
const std::size_t n
Definition: vector_set_test.cpp:26
int line_num() const
Definition: parser_line.hpp:30
const boost::string_ref get_id(const char tag)
Definition: parser_line.hpp:63
Definition: exception.hpp:23
boost::error_info< struct errinfo_message_, std::string > msg_t
Definition: exception.hpp:24
void check() const
Definition: parser_line.hpp:107
std::string sanitize(const char c)
Definition: sanitize_string.cpp:53
std::string get_sequence(const char tag)
Definition: parser_line.hpp:91
Basic line-based parser; use to derive other parsers.
Definition: parser_line.hpp:23
std::istream & istream()
Definition: file_stream.hpp:102
boost::error_info< struct errinfo_line_n_, int > line_t
Definition: parser_line.hpp:27
Parser_line(std::istream &is, const compression::Compression compr)
Definition: parser_line.hpp:44
std::string comm_
Definition: sequence_record.hpp:23
const boost::string_ref get_defstr(const char tag)
Definition: parser_line.hpp:79
int line_
Definition: parser_line.hpp:105
void skip_line()
Definition: parser_line.hpp:53