vdj_pipe
pipeline for processing DNA sequence data
input_manager_impl.hpp
Go to the documentation of this file.
1 
7 #ifndef INPUT_MANAGER_IMPL_HPP_
8 #define INPUT_MANAGER_IMPL_HPP_
9 #include <algorithm>
10 #include <map>
11 #include <set>
12 #include "boost/foreach.hpp"
13 #include "boost/lexical_cast.hpp"
14 #include "boost/property_tree/ptree.hpp"
15 #include "boost/range.hpp"
16 namespace bpt = boost::property_tree;
17 
18 #include "keywords_variable.hpp"
19 #include "vdj_pipe/exception.hpp"
20 #include "vdj_pipe/value_map.hpp"
23 
24 namespace vdj_pipe{ namespace input_detail{
25 
27 typedef val_map_t::value_type val_pair_t;
28 typedef std::vector<val_map_t> vvmap_t;
29 
31  input_kwds,
32  (forward_seq)
33  (reverse_seq)
34  (forward_mid)
35  (reverse_mid)
36  (forward_qual)
37  (reverse_qual)
38 );
39 
42 val_map_t prepare_values(bpt::ptree const& pt, Value_map& vm) {
43  typedef Seq_file_entry::Err Err;
44  val_map_t m;
45  std::set<std::string> name_set;
46  bpt::ptree::value_type const* sequence = 0;
47  bpt::ptree::value_type const* quality = 0;
48  bpt::ptree::value_type const* reverse = 0;
49  bpt::ptree::value_type const* forward_seq = 0;
50  bpt::ptree::value_type const* reverse_seq = 0;
51  bpt::ptree::value_type const* forward_mid = 0;
52  bpt::ptree::value_type const* reverse_mid = 0;
53  bpt::ptree::value_type const* forward_qual = 0;
54  bpt::ptree::value_type const* reverse_qual = 0;
55 
56  BOOST_FOREACH(bpt::ptree::value_type const& vt, pt) {
57 
58  if( ! name_set.insert(vt.first).second ) BOOST_THROW_EXCEPTION(
59  Err()
60  << Err::msg_t("duplicate value name")
61  << Err::str1_t(vt.first)
62  );
63 
64  if( vt.first == kwds::Single::sequence() ) {
65  sequence = &vt;
66 
67  } else if( vt.first == kwds::Single::quality() ) {
68  quality = &vt;
69 
70  } else if( vt.first == kwds::Single::is_reverse() ) {
71  reverse = &vt;
72 
73  } else if( vt.first == input_kwds::forward_seq() ) {
74  forward_seq = &vt;
75 
76  } else if( vt.first == input_kwds::reverse_seq() ) {
77  reverse_seq = &vt;
78 
79  } else if( vt.first == input_kwds::forward_mid() ) {
80  forward_mid = &vt;
81 
82  } else if( vt.first == input_kwds::reverse_mid() ) {
83  reverse_mid = &vt;
84 
85  } else if( vt.first == input_kwds::forward_qual() ) {
86  forward_qual = &vt;
87 
88  } else if( vt.first == input_kwds::reverse_qual() ) {
89  reverse_qual = &vt;
90 
91  } else if( vt.first.size() ) {
92  const Val_id vid = vm.insert_name(vt.first);
93  const value_variant vv = parse_variant(vt.second.data());
94  m.insert(val_pair_t(vid, vv));
95 
96  } else {
97  BOOST_THROW_EXCEPTION( Err() << Err::msg_t("empty value name") );
98  }
99  }
100 
101  // check for correct file name combinations
102  // and translate them to variable map names
103  if( sequence ) {
104  if(
105  forward_seq || reverse_seq || forward_mid ||
106  reverse_mid || forward_qual || reverse_qual
107  ) {
108  BOOST_THROW_EXCEPTION(
109  Err()
110  << Err::msg_t(
111  "\"forward_\" and \"reverse_\" cannot be used with "
112  "\"sequence\""
113  )
114  );
115  }
116  const Val_id vid1 = vm.value_id(kwds::Single::seq_file_path());
117  m.insert(val_pair_t(vid1, sequence->second.get_value<std::string>()));
118 
119  if( quality ) {
120  const Val_id vid = vm.value_id(kwds::Single::qual_file_path());
121  m.insert(val_pair_t(vid, quality->second.get_value<std::string>()));
122  }
123 
124  const Val_id vid2 = vm.value_id(kwds::Single::is_reverse());
125  if( reverse ) {
126  m.insert(val_pair_t(vid2, reverse->second.get_value<bool>()));
127  } else {
128  m.insert(val_pair_t(vid2, false));
129  }
130 
131  return m;
132  }
133 
134  if( quality || reverse ) BOOST_THROW_EXCEPTION(
135  Err()
136  << Err::msg_t(
137  "\"quality\" and \"reverse\" should be used with "
138  "\"sequence\" keyword"
139  )
140  );
141 
142  if( forward_seq && reverse_seq ) {
143 
144  if( (bool)forward_qual != (bool)reverse_qual ) BOOST_THROW_EXCEPTION(
145  Err()
146  << Err::msg_t(
147  "both forward and reverse quality scores should be "
148  "provided"
149  )
150  );
151 
152  if( (bool)forward_mid != (bool)reverse_mid ) BOOST_THROW_EXCEPTION(
153  Err()
154  << Err::msg_t(
155  "both forward and reverse eMID files should be provided"
156  )
157  );
158 
159  const Val_id vid1 = vm.value_id(kwds::Forward::seq_file_path_fwd());
160  m.insert(val_pair_t(vid1, forward_seq->second.get_value<std::string>()));
161  const Val_id vid2 = vm.value_id(kwds::Reverse::seq_file_path_rev());
162  m.insert(val_pair_t(vid2, reverse_seq->second.get_value<std::string>()));
163 
164  if( forward_qual ) {
165  const Val_id vid1 = vm.value_id(kwds::Forward::qual_file_path_fwd());
166  m.insert(val_pair_t(vid1, forward_qual->second.get_value<std::string>()));
167  const Val_id vid2 = vm.value_id(kwds::Reverse::qual_file_path_rev());
168  m.insert(val_pair_t(vid2, reverse_qual->second.get_value<std::string>()));
169  }
170 
171  if( forward_mid ) {
172  const Val_id vid1 = vm.value_id(kwds::Emid::emid_file_path_fwd());
173  m.insert(val_pair_t(vid1, forward_mid->second.get_value<std::string>()));
174  const Val_id vid2 = vm.value_id(kwds::Emid::emid_file_path_rev());
175  m.insert(val_pair_t(vid2, reverse_mid->second.get_value<std::string>()));
176  }
177 
178  return m;
179  }
180 
181  if( forward_seq ) {
182  const Val_id vid1 = vm.value_id(kwds::Single::seq_file_path());
183  m.insert(val_pair_t(vid1, forward_seq->second.get_value<std::string>()));
184 
185  const Val_id vid2 = vm.value_id(kwds::Single::is_reverse());
186  m.insert(val_pair_t(vid2, false));
187 
188  if( forward_qual ) {
189  const Val_id vid1 = vm.value_id(kwds::Single::qual_file_path());
190  m.insert(val_pair_t(vid1, forward_qual->second.get_value<std::string>()));
191  }
192 
193  return m;
194  }
195 
196  if( reverse_seq ) {
197  const Val_id vid1 = vm.value_id(kwds::Single::seq_file_path());
198  m.insert(val_pair_t(vid1, reverse_seq->second.get_value<std::string>()));
199 
200  const Val_id vid2 = vm.value_id(kwds::Single::is_reverse());
201  m.insert(val_pair_t(vid2, true));
202 
203  if( reverse_qual ) {
204  const Val_id vid1 = vm.value_id(kwds::Single::qual_file_path());
205  m.insert(val_pair_t(vid1, reverse_qual->second.get_value<std::string>()));
206  }
207 
208  return m;
209  }
210 
211  BOOST_THROW_EXCEPTION(
212  Err()
213  << Err::msg_t("no sequence information found")
214  );
215 
216  return m;
217 }
218 
219 typedef std::map<Val_id,int> type_map_t;
220 
226  val_map_t& entry_vars,
227  type_map_t& curr_types,
228  type_map_t& change_to,
229  std::set<Val_id> const& ign,
230  Value_map const& vm
231 ) {
232  typedef Seq_file_entry::Err Err;
233  BOOST_FOREACH(type_map_t::value_type& vp, curr_types) {
234  BOOST_ASSERT(vp.first);
235 
236  if( ign.find(vp.first) != ign.end() ) continue;
237 
238  if( entry_vars.find(vp.first) == entry_vars.end() ) BOOST_THROW_EXCEPTION(
239  Err()
240  << Err::msg_t("value not present in all input entries")
241  << Err::str1_t(vm.name(vp.first))
242  );
243 
244  value_variant& vv = entry_vars[vp.first];
245  if( vv.which() == vp.second ) continue;
246 
247  //current type is int and entry has float
248  if(
249  vp.second == Type_index<long>::value &&
250  vv.which() == Type_index<double>::value
251  ) {
252  change_to[vp.first] = Type_index<double>::value;
253  vp.second = Type_index<double>::value;
254  continue;
255  }
256 
257  //current type is float and entry has int
258  if(
259  vp.second == Type_index<double>::value &&
260  vv.which() == Type_index<long>::value
261  ) {
262  vv = (double)boost::get<long>(vv);
263  continue;
264  }
265 
266  //current type is string
267  if( vp.second == Type_index<std::string>::value ) {
268  vv = boost::lexical_cast<std::string>(vv);
269  continue;
270  }
271 
272  //convert both types to string
273  if( vv.which() != Type_index<std::string>::value ) {
274  vv = boost::lexical_cast<std::string>(vv);
275  }
276  change_to[vp.first] = Type_index<std::string>::value;
277  vp.second = Type_index<std::string>::value;
278  }
279 }
280 
284  val_map_t& entry_vars,
285  type_map_t const& change_to,
286  Value_map const& vm
287 ) {
288  typedef Seq_file_entry::Err Err;
289  BOOST_FOREACH(type_map_t::value_type const& vp, change_to) {
290  value_variant& vv = entry_vars[vp.first];
291  if( vv.which() == vp.second ) continue;
292 
293  //current type is float and entry has int
294  if(
295  vp.second == Type_index<double>::value &&
296  vv.which() == Type_index<long>::value
297  ) {
298  vv = (double)boost::get<long>(vv);
299  continue;
300  }
301 
302  //current type is string
303  if( vp.second == Type_index<std::string>::value ) {
304  vv = boost::lexical_cast<std::string>(vv);
305  continue;
306  }
307 
308  BOOST_THROW_EXCEPTION(
309  Err()
310  << Err::msg_t("value type conversion error")
311  << Err::str1_t(vm.name(vp.first))
312  << Err::str2_t(variable_type_str(vv))
313  << Err::str3_t(variable_type_str(vp.second))
314  );
315  }
316 
317 }
318 
324 void harmonize_variables(vvmap_t& vvm, Value_map const& vm) {
325  typedef Seq_file_entry::Err Err;
326 
327  if( vvm.empty() ) return;
328 
329  type_map_t curr_types;
330  BOOST_FOREACH(
331  val_map_t& entry_vars,
332  boost::make_iterator_range(++vvm.begin(), vvm.end())
333  ) {
334  typedef type_map_t::value_type pair_t;
335  BOOST_FOREACH(val_pair_t const& vp, entry_vars) {
336  curr_types.insert(pair_t(vp.first, vp.second.which()));
337  }
338  }
339 
340  //these variables may be set only in some input entries
341  std::set<Val_id> ign;
342 
343  if( Val_id const* id = vm.find_id(kwds::Single::qual_file_path()) ) {
344  ign.insert(*id);
345  }
346 
347  if( Val_id const* id = vm.find_id(kwds::Forward::qual_file_path_fwd()) ) {
348  ign.insert(*id);
349  }
350 
351  if( Val_id const* id = vm.find_id(kwds::Reverse::qual_file_path_rev()) ) {
352  ign.insert(*id);
353  }
354 
355  type_map_t change_to;
356  BOOST_FOREACH(
357  val_map_t& entry_vars,
358  boost::make_iterator_range(++vvm.begin(), vvm.end())
359  ) {
360  harmonize_variables_1(entry_vars, curr_types, change_to, ign, vm);
361  }
362 
363  BOOST_FOREACH(
364  val_map_t& entry_vars,
365  boost::make_iterator_range(++vvm.begin(), vvm.end())
366  ) {
367  harmonize_variables_2(entry_vars, change_to, vm);
368  }
369 }
370 
371 }//namespace input_detail
372 }//namespace vdj_pipe
373 #endif /* INPUT_MANAGER_IMPL_HPP_ */
value_variant parse_variant(std::string const &s)
Definition: value_variant.cpp:19
void harmonize_variables_1(val_map_t &entry_vars, type_map_t &curr_types, type_map_t &change_to, std::set< Val_id > const &ign, Value_map const &vm)
Definition: input_manager_impl.hpp:225
Definition: value_variant.hpp:69
std::string const & variable_type_str(const int which)
Definition: value_variant.hpp:54
VDJ_PIPE_KEYWORD_STRUCT(input_kwds,(forward_seq)(reverse_seq)(forward_mid)(reverse_mid)(forward_qual)(reverse_qual))
Val_id value_id(std::string const &name) const
Definition: value_map.hpp:87
val_map_t::value_type val_pair_t
Definition: input_manager_impl.hpp:27
std::vector< val_map_t > vvmap_t
Definition: input_manager_impl.hpp:28
std::map< Val_id, value_variant > map_t
Definition: sequence_file_entry.hpp:29
std::string const & name(const Val_id vid) const
Definition: value_map.hpp:71
Main namespace of vdj_pipe library.
Definition: keywords_variable.hpp:11
void harmonize_variables(vvmap_t &vvm, Value_map const &vm)
Definition: input_manager_impl.hpp:324
Val_id insert_name(std::string const &name)
Definition: value_map.hpp:75
boost::variant< Blank, bool, long, double, std::string, sequence_interval, Qual_record::quality > value_variant
Definition: value_variant.hpp:50
Definition: sequence_file_entry.hpp:34
val_map_t prepare_values(bpt::ptree const &pt, Value_map &vm)
Definition: input_manager_impl.hpp:42
Val_id const * find_id(std::string const &name) const
Definition: value_map.hpp:83
std::map< Val_id, int > type_map_t
Definition: input_manager_impl.hpp:219
void harmonize_variables_2(val_map_t &entry_vars, type_map_t const &change_to, Value_map const &vm)
Definition: input_manager_impl.hpp:283
Store values mapped against name strings and value IDs.
Definition: value_map.hpp:23
Seq_file_entry::map_t val_map_t
Definition: input_manager_impl.hpp:26
bpt::ptree ptree
Definition: processing_step_utils.hpp:19