vdj_pipe
pipeline for processing DNA sequence data
find_shared.hpp
Go to the documentation of this file.
1 
7 #ifndef FIND_SHARED_HPP_
8 #define FIND_SHARED_HPP_
9 #include <iosfwd>
10 #include <string>
11 #include "boost/property_tree/ptree_fwd.hpp"
12 #include "boost/scoped_ptr.hpp"
13 #include "boost/shared_ptr.hpp"
20 
21 namespace boost{
22 template<typename, std::size_t, typename> class multi_array;
23 }//namespace boost
24 
25 namespace vdj_pipe{
26 class File_ostream;
27 class Seq_store;
28 class Read_ginfo_store;
29 
33  struct kwds;
34  typedef detail::Queable_ofstream_types::value_type value_type;
35  typedef detail::Queable_ofstream_types::val_vector val_vector;
36  typedef detail::Queable_ofstream_types::val_ref_vector val_ref_vector;
38  typedef std::vector<unsigned> counts_v;
39  typedef std::vector<counts_v> hist_array_t;
40 
41 public:
42  VDJ_PIPE_STATIC_STRING_METHOD(name, "find_shared")
43  VDJ_PIPE_STATIC_STRING_METHOD(category, "post-processing")
44  VDJ_PIPE_STATIC_STRING_METHOD(description, "XXX")
45  VDJ_PIPE_STATIC_STRING_METHOD(
46  comment,
47  "output sequence reads present in multiple sets"
48  )
49 
51  Vm_access_single const& vma,
52  boost::property_tree::ptree const& pt,
54  );
55 
56  void run();
57  void finish();
58  void summary(std::ostream& os) const;
59 
60 private:
61  Get_match_length gml_;
62  std::size_t min_duplicates_;
63 
65  std::size_t n_unique_;
66  bool trim_;
67  bool reverse_;
68  bool consensus_trim_;
69  val_map_t vm_;
70  boost::shared_ptr<Read_ginfo_store> rgs_;
71  boost::shared_ptr<Seq_store> ss_;
72  counts_v cv_;
73  std::size_t unique_count_;
74  std::string redund_hist_;
75  std::string summ_;
76  value_type unset_val_;
77  Variable_path g_unique_;
78  Variable_path g_dups_;
79  Variable_path unique_;
80  Variable_path dups_;
81  std::vector<Val_id> ids_;
82 
84 
85  void write_sharing_matrix(
86  std::ostream& os,
87  counts_v const& cv,
88  xstats const& xs,
89  std::vector<Mapped_id> const& sorted_vi
90  ) const;
91 
92  void write_summary(
93  std::ostream& os,
94  counts_v const& uc,
95  counts_v const& guc,
96  std::vector<Mapped_id> const& sn
97  ) const;
98 
99  void write_redundancy(
100  std::ostream& os,
101  hist_array_t const& ha,
102  std::vector<Mapped_id> const& sorted_vi
103  ) const;
104 
105  void write_redundancy_summ(
106  std::ostream& os,
107  hist_array_t const& ha,
108  std::vector<Mapped_id> const& sorted_vi
109  ) const;
110 
111 
112  void group_stats();
113 
114  void bulk_stats();
115 
116  std::vector<Mapped_id> sorted_ids() const;
117 
118  void init_stream(
119  boost::scoped_ptr<File_ostream>&,
120  Variable_path const&,
122  const format::Format
123  ) const;
124 };
125 
126 }//namespace vdj_pipe
127 #endif /* FIND_SHARED_HPP_ */
Format
File format types.
Definition: file_properties.hpp:42
Definition: sequence_interval.hpp:58
Compression
File compression types.
Definition: file_properties.hpp:19
Definition: value_map_access_single.hpp:16
Definition: find_shared.hpp:22
Definition: find_shared.hpp:32
Main namespace of vdj_pipe library.
Definition: sequence_file.hpp:14
Definition: pipe_environment.hpp:26
Set a fraction of sequence length to match.
Definition: min_match_length.hpp:93
Definition: variable_path.hpp:38
Definition: step_base_single.hpp:16