pipeline for processing DNA sequence data
Namespaces | Classes | Typedefs | Enumerations | Functions
vdj_pipe Namespace Reference

Main namespace of vdj_pipe library. More...


 Namespace for unit-tests.


class  Ambiguous_window_filter
class  Apply_many
 Apply enclosed processing step multiple times to sequence reads of specified kind: forward, reverse, or merged. More...
class  Apply_one
 Apply enclosed processing step once to a sequence read of specified kind: forward, reverse, or merged. More...
class  Average_quality_filter
class  Average_quality_window_filter
struct  base_exception
class  Best_match_pair
struct  Blank
class  Blank_step
class  Character_filter
class  Command_line_options
class  Composition_stats
struct  Config_paired_emid_reads
struct  Config_paired_reads
struct  Config_single_reads
class  External_mid_infile
 Look for external MIDs in separate files. More...
class  External_mid_inline
 Look for external MIDs in FASTA/Q description lines. More...
class  File
class  File_input
 File target is supposed to exist at construction time. More...
class  File_istream
class  File_ostream
class  File_ostream_queue
 stack of recently used output file streams More...
class  File_ostream_variant
 Select output file based on variables in value map. More...
class  File_output
 File target is created if needed at construction time. More...
class  Find_shared
struct  Finish_visitor
class  Get_match_length
 Set a fraction of sequence length to match. More...
class  Histogram_1d
 simple unsigned integer-based histogram More...
class  Histogram_step
class  Homopolymer_filter
class  Id_iterator
struct  Identity
 why is it not in STL? More...
class  Input_manager
struct  Is_ambiguous
class  Length_filter
struct  Lib_info
 library info More...
class  Match_fraction_length
 Set a fraction of sequence length to match. More...
struct  Match_full_length
 Require full sequence to match. More...
class  Match_ignore_ends
 Set a minimal sequence length to match allowing for some mismatch at ends. More...
class  Match_min_length
 Set a minimal sequence length to match. More...
class  Match_step
class  Merge_paired
struct  Merge_result
class  Min_quality_filter
class  Min_quality_window_filter
class  Output_manager
class  Parser_fasta
class  Parser_fastq
 Parser for FASTQ files. More...
class  Parser_qual
class  Pipe_environment
class  Pipe_paired_emid_read
 process paired reads with external MIDs More...
class  Pipe_paired_read
 process paired reads with external MIDs More...
class  Pipe_single_read
class  Pipeline
class  Pipeline_input
struct  Qual_record
class  Qual_stats
class  Quality
struct  Queable_ofstream
class  Read_info
class  Read_info_store
 Store sequence-related information. More...
struct  Run_visitor
class  Seq_entry
class  Seq_file
class  Seq_file_entry
class  Seq_file_map
class  Seq_fls
 store short sequence in an integer More...
struct  Seq_match
struct  Seq_meta
struct  Seq_pos
struct  Seq_qual_record
struct  Seq_record
class  Seq_store
 Store sequence and related information. More...
class  Step_variant_store
class  Summary_visitor
class  Summary_visitor2
struct  Type_index
class  Unambiguous_interval_iter
class  Value_ids_emid
 Provides access to standard eMID values. More...
class  Value_ids_paired
 Provides access to standard values for paired read pipeline. More...
class  Value_ids_single
 Provides access to standard values for single read pipeline. More...
class  Value_map
 Store values mapped against name strings and value IDs. More...
struct  Value_names
class  Variable_path
class  Vm_access_paired
 Access to value map for paired read pipeline and processing steps. More...
class  Vm_access_paired_emid
 Access to value map for paired eMID read pipeline and processing steps. More...
class  Vm_access_single
class  Write_seq
class  Write_value


typedef boost::variant< Blank, bool, long, double, std::string, sequence_interval, Qual_record::qualityvalue_variant
typedef boost::numeric::interval< int, boost::numeric::interval_lib::policies< boost::numeric::interval_lib::rounded_math< int >, detail::Interval_checking_policy< int > > > sequence_interval
typedef Seq_pos< Read_id > sub_seq
typedef Seq_pos< Seq_id > super_seq
typedef boost::multi_array< int, 2 > scoring_matrix_t
typedef boost::make_recursive_variant_over< step::paired_read_vector >::type step_variant_paired
typedef boost::make_recursive_variant_over< step::paired_emid_read_vector >::type step_variant_paired_emid
typedef boost::make_recursive_variant_over< step::single_read_vector >::type step_variant_single
typedef boost::make_recursive_variant_over< step::all_steps_vector >::type step_variant_all


enum  Nucleotide {
  Adenine = 0, Cytosine = 1, Guanine = 2, Thymine = 3,
  Any = 4, Uracil = 5, Purine = 6, Pyrimidine = 7,
  Ketone = 8, Amine = 9, Strong = 10, Weak = 11,
  not_A = 12, not_C = 13, not_G = 14, not_T = 15


std::size_t hash_value (Blank const &)
template<class ChT , class Tr >
std::basic_ostream< ChT, Tr > & operator<< (std::basic_ostream< ChT, Tr > &os, Blank const &)
std::string const & variable_type_str (const int which)
bool is_blank (value_variant const &vv)
std::string const & variable_type_str (value_variant const &vv)
VDJ_PIPE_DECL value_variant parse_variant (std::string const &s)
VDJ_PIPE_DECL void store_values (Seq_file_entry const &sfe, Value_map &vm)
VDJ_PIPE_DECL compression::Compression extension_to_compression (std::string const &ext)
 guess file compression from low case extension string More...
VDJ_PIPE_DECL compression::Compression compression_magic (std::istream &is)
 guess file compression from magic number More...
VDJ_PIPE_DECL format::Format extension_to_format (std::string const &ext)
 guess file format from low case extension string More...
VDJ_PIPE_DECL void path_decompose (std::string const &path, std::vector< std::string > &tv, std::vector< std::string > &nv)
 separate path into non-name and name areas More...
VDJ_PIPE_DECL std::string path_assemble (std::vector< std::string > const &templ, detail::Queable_ofstream_types::val_ref_vector const &vals)
 assemble path from template and values More...
sequence_interval sequence_interval_invalid ()
template<class MinLength >
void remove_subsequences (Seq_store &ss, gdst::Gdst &st, MinLength const &min_len)
 identify unique sequences in sequence store, insert them into suffix tree, and remove non-unique ones
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_split (std::basic_ostream< Ch, Tr > &os, const boost::string_ref str, const Ch delim, const unsigned length)
 print string splitting it into parts of equal length More...
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_fasta (std::basic_ostream< Ch, Tr > &os, std::string const &name, Seq_record::sequence const &sequence, const unsigned length=80)
 print FASTA record More...
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_fasta (std::basic_ostream< Ch, Tr > &os, const std::string &name, const boost::string_ref sequence, const unsigned length=80)
 print FASTA record More...
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_qual (std::basic_ostream< Ch, Tr > &os, std::string const &name, Qual_record::quality const &qual, const unsigned length=80)
 print QUAL record More...
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_fastq (std::basic_ostream< Ch, Tr > &os, std::string const &name, Seq_record::sequence const &sequence, Qual_record::quality const &qual, const int offset=33)
 print FASTQ record More...
VDJ_PIPE_DECL std::string sanitize (const char c)
VDJ_PIPE_DECL std::string sanitize (std::string const &str)
VDJ_PIPE_DECL std::string sanitize (const boost::string_ref str, const std::size_t max_len)
template<class Seq >
int identity (Seq const &s1, const boost::string_ref s2, scoring_matrix_t const &sm, const std::size_t=0)
template<unsigned N, typename S >
int identity (const Seq_fls< N, S > seq1, const Seq_fls< N, S > seq2, scoring_matrix_t const &sm, const unsigned len=N)
template<class ChT , class Tr >
std::basic_ostream< ChT, Tr > & operator<< (std::basic_ostream< ChT, Tr > &os, Qual_record::quality const &qual)
bool operator== (Seq_entry const &se1, Seq_entry const &se2)
bool operator== (const boost::string_ref se1, Seq_entry const &se2)
bool operator== (Seq_entry const &se1, const boost::string_ref se2)
std::size_t hash_value (Seq_entry const &se)
std::pair< std::size_t, std::size_t > longest_average_interval (Qual_record::quality const &q, const std::size_t min_q, const std::size_t win)
std::pair< std::size_t, std::size_t > longest_min_interval (Qual_record::quality const &q, const Qual_record::quality::value_type min_q)
std::pair< std::size_t, std::size_t > longest_unambiguous_interval (Seq_record::sequence const &seq, const std::size_t max)
template<class Id >
bool operator== (Seq_pos< Id > const &rp1, Seq_pos< Id > const &rp2)
template<class Id >
bool operator< (Seq_pos< Id > const &rp1, Seq_pos< Id > const &rp2)
template<class Id >
bool operator< (Seq_pos< Id > const &rp1, const Id rp2)
template<class Id >
bool operator< (const Id rp1, Seq_pos< Id > const &rp2)
VDJ_PIPE_DECL compression::Compression guess_compression_ext (std::string const &path)
VDJ_PIPE_DECL compression::Compression guess_compression_magic (std::string const &path)
VDJ_PIPE_DECL std::pair< compression::Compression, format::Formatguess_compression_format (std::string const &path)
VDJ_PIPE_DECL format::Format guess_format (std::string const &path, const compression::Compression c)
VDJ_PIPE_DECL std::string ensure_path_writable (std::string const &path)
 create file if does not exist along with parent directories if needed
VDJ_PIPE_DECL std::string ensure_path_writable (std::string const &path, std::string const &header)
 if file does not exist, create and write header; create parent directories if needed
VDJ_PIPE_DECL bool is_path_readable (std::string const &path)
VDJ_PIPE_DECL std::string ensure_path_readable (std::string const &path)
VDJ_PIPE_DECL std::size_t hash_value (File const &f)
Nucleotide nucleotide_index (const char c)
bool is_ambiguous (const char c)
Nucleotide complement (const Nucleotide n)
char complement (const char c)
char to_capital (const Nucleotide n)
char to_small (const Nucleotide n)
template<int Match, int Mismatch, int Approximate, int Uncertain>
scoring_matrix_t const & scoring_matrix ()
int identity (const Nucleotide n1, const Nucleotide n2, scoring_matrix_t const &sm=scoring_matrix< 2,-2, 1, 0 >())
template<class Range >
std::string complement (Range const &r)
std::string transform (const boost::string_ref seq, sequence_interval const &si, const bool reverse)
 Reverse-complement (optionally) a portion of a sequence.
template<class Ostr >
Ostr & operator<< (Ostr &ostr, const Read_info si)
VDJ_PIPE_DECL std::ostream & stamp (std::ostream &os, std::string const &pref="## ", std::string const &delim="; ", std::string const &suff="\n")
VDJ_PIPE_DECL std::string stamp (std::string const &pref="## ", std::string const &delim="; ", std::string const &suff="\n")
VDJ_PIPE_DECL void process_options (boost::property_tree::ptree const &pt)
VDJ_PIPE_DECL void process_single_reads (boost::property_tree::ptree const &pt)
VDJ_PIPE_DECL void process_paired_reads (boost::property_tree::ptree const &pt)
VDJ_PIPE_DECL void process_paired_emid_reads (boost::property_tree::ptree const &pt)
VDJ_PIPE_DECL void merge (std::string const &s1, Seq_qual_record::quality const &qv1, std::string const &s2, Seq_qual_record::quality const &qv2, const unsigned min_score, Merge_result &mr)
template<class Id >
boost::iterator_range< Id_iterator< Id > > id_range (const Id id1, const Id id2)
template<typename T >
void unused_variable (T const &)
VDJ_PIPE_DECL step_variant_paired create_step_paired (Vm_access_paired const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
Config_paired_reads::processing_step create_step< Config_paired_reads > (Config_paired_reads::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
VDJ_PIPE_DECL step_variant_single create_step_single (Vm_access_single const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
Config_single_reads::processing_step create_step< Config_single_reads > (Config_single_reads::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
VDJ_PIPE_DECL step_variant_paired_emid create_step_paired_emid (Vm_access_paired_emid const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
Config_paired_emid_reads::processing_step create_step< Config_paired_emid_reads > (Config_paired_emid_reads::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
template<class Config >
Config::processing_step create_step (typename Config::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)

Detailed Description

Main namespace of vdj_pipe library.

Enumeration Type Documentation




R : A, G.


Y : C, T, U.


K : G, T, U.


M : A, C.


S : C, G.


W : A, T, U.


B : C G T.


D : A G T.


H : A C T.


V : A C G.

Function Documentation

VDJ_PIPE_DECL compression::Compression vdj_pipe::compression_magic ( std::istream &  is)

guess file compression from magic number

Example ".gz" ".bz2"

VDJ_PIPE_DECL std::string vdj_pipe::ensure_path_readable ( std::string const &  path)
canonical path name
ifpath not readable
VDJ_PIPE_DECL compression::Compression vdj_pipe::extension_to_compression ( std::string const &  ext)

guess file compression from low case extension string

Example ".gz" ".bz2"

VDJ_PIPE_DECL format::Format vdj_pipe::extension_to_format ( std::string const &  ext)

guess file format from low case extension string

Example ".fq" ".fasta"

VDJ_PIPE_DECL compression::Compression vdj_pipe::guess_compression_ext ( std::string const &  path)
compression based on file extension
VDJ_PIPE_DECL std::pair<compression::Compression, format::Format> vdj_pipe::guess_compression_format ( std::string const &  path)
file compression and format based on file extension
VDJ_PIPE_DECL compression::Compression vdj_pipe::guess_compression_magic ( std::string const &  path)
compression based on file magic number
VDJ_PIPE_DECL format::Format vdj_pipe::guess_format ( std::string const &  path,
const compression::Compression  c 
file compression and format based on file extension
template<class Seq >
int vdj_pipe::identity ( Seq const &  s1,
const boost::string_ref  s2,
scoring_matrix_t const &  sm,
const std::size_t  = 0 

compute identity score

template<unsigned N, typename S >
int vdj_pipe::identity ( const Seq_fls< N, S >  seq1,
const Seq_fls< N, S >  seq2,
scoring_matrix_t const &  sm,
const unsigned  len = N 

compute identity score

VDJ_PIPE_DECL bool vdj_pipe::is_path_readable ( std::string const &  path)
true if readable
std::pair<std::size_t,std::size_t> vdj_pipe::longest_average_interval ( Qual_record::quality const &  q,
const std::size_t  min_q,
const std::size_t  win 
starting position and length of the longest interval in which the sum of every sub-interval of length win is at least min_q
std::pair<std::size_t,std::size_t> vdj_pipe::longest_min_interval ( Qual_record::quality const &  q,
const Qual_record::quality::value_type  min_q 
starting position and length of the longest interval in which every element is at least min_q
std::pair<std::size_t,std::size_t> vdj_pipe::longest_unambiguous_interval ( Seq_record::sequence const &  seq,
const std::size_t  max 
starting position and length of the longest interval that contains fewer than max ambiguous nucleotides
VDJ_PIPE_DECL std::string vdj_pipe::path_assemble ( std::vector< std::string > const &  templ,
detail::Queable_ofstream_types::val_ref_vector const &  vals 

assemble path from template and values

Example: out_dir/{name1}-dir/{name2}file.fna

VDJ_PIPE_DECL void vdj_pipe::path_decompose ( std::string const &  path,
std::vector< std::string > &  tv,
std::vector< std::string > &  nv 

separate path into non-name and name areas

Example: out_dir/{name1}-dir/{name2}file.fna

template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_fasta ( std::basic_ostream< Ch, Tr > &  os,
std::string const &  name,
Seq_record::sequence const &  sequence,
const unsigned  length = 80 

print FASTA record

osoutput stream
nameFASTA description string
sequenceFASTA sequence string
lengthsequence line length
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_fasta ( std::basic_ostream< Ch, Tr > &  os,
const std::string &  name,
const boost::string_ref  sequence,
const unsigned  length = 80 

print FASTA record

osoutput stream
nameFASTA description string
sequenceFASTA sequence string
lengthsequence line length
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_fastq ( std::basic_ostream< Ch, Tr > &  os,
std::string const &  name,
Seq_record::sequence const &  sequence,
Qual_record::quality const &  qual,
const int  offset = 33 

print FASTQ record

osoutput stream
nameFASTA description string
sequenceFASTA sequence string
qualquality scores
offsetquality score to char conversion offset
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_qual ( std::basic_ostream< Ch, Tr > &  os,
std::string const &  name,
Qual_record::quality const &  qual,
const unsigned  length = 80 

print QUAL record

osoutput stream
nameFASTA description string
qualquality scores
lengthquality line length
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_split ( std::basic_ostream< Ch, Tr > &  os,
const boost::string_ref  str,
const Ch  delim,
const unsigned  length 

print string splitting it into parts of equal length

osoutput stream
strinput string
delimcharacters to insert between the parts of the string
lengthstring part length
vdj_pipe::VDJ_PIPE_OBJECT_ID ( Val_id  )

Mapped value ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Mid_id  )

Molecular identifier ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Path_id  )

Filesystem path ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Read_id  )

Sequencing read ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Seq_id  )

Sequence ID