vdj_pipe
pipeline for processing DNA sequence data
Namespaces | Classes | Typedefs | Enumerations | Functions
vdj_pipe Namespace Reference

Main namespace of vdj_pipe library. More...

Namespaces

 test
 Namespace for unit-tests.
 

Classes

class  Ambiguous_window_filter
 
class  Apply_many
 Apply enclosed processing step multiple times to sequence reads of specified kind: forward, reverse, or merged. More...
 
class  Apply_one
 Apply enclosed processing step once to a sequence read of specified kind: forward, reverse, or merged. More...
 
class  Average_quality_filter
 
class  Average_quality_window_filter
 
struct  base_exception
 
class  Best_match_pair
 
struct  Blank
 
class  Blank_step
 
class  Character_filter
 
class  Command_line_options
 
class  Composition_stats
 
struct  Config_paired_emid_reads
 
struct  Config_paired_reads
 
struct  Config_single_reads
 
class  External_mid_infile
 Look for external MIDs in separate files. More...
 
class  External_mid_inline
 Look for external MIDs in FASTA/Q description lines. More...
 
class  File
 
class  File_input
 File target is supposed to exist at construction time. More...
 
class  File_istream
 
class  File_ostream
 
class  File_ostream_queue
 stack of recently used output file streams More...
 
class  File_ostream_variant
 Select output file based on variables in value map. More...
 
class  File_output
 File target is created if needed at construction time. More...
 
class  Find_shared
 
struct  Finish_visitor
 
class  Get_match_length
 Set a fraction of sequence length to match. More...
 
class  Histogram_1d
 simple unsigned integer-based histogram More...
 
class  Histogram_step
 
class  Homopolymer_filter
 
class  Id_iterator
 
struct  Identity
 why is it not in STL? More...
 
class  Input_manager
 
struct  Is_ambiguous
 
class  Length_filter
 
struct  Lib_info
 library info More...
 
class  Match_fraction_length
 Set a fraction of sequence length to match. More...
 
struct  Match_full_length
 Require full sequence to match. More...
 
class  Match_ignore_ends
 Set a minimal sequence length to match allowing for some mismatch at ends. More...
 
class  Match_min_length
 Set a minimal sequence length to match. More...
 
class  Match_step
 
class  Merge_paired
 
struct  Merge_result
 
class  Min_quality_filter
 
class  Min_quality_window_filter
 
class  Output_manager
 
class  Parser_fasta
 
class  Parser_fastq
 Parser for FASTQ files. More...
 
class  Parser_qual
 
class  Pipe_environment
 
class  Pipe_paired_emid_read
 process paired reads with external MIDs More...
 
class  Pipe_paired_read
 process paired reads with external MIDs More...
 
class  Pipe_single_read
 
class  Pipeline
 
class  Pipeline_input
 
struct  Qual_record
 
class  Qual_stats
 
class  Quality
 
struct  Queable_ofstream
 
class  Read_info
 
class  Read_info_store
 Store sequence-related information. More...
 
struct  Run_visitor
 
class  Seq_entry
 
class  Seq_file
 
class  Seq_file_entry
 
class  Seq_file_map
 
class  Seq_fls
 store short sequence in an integer More...
 
struct  Seq_match
 
struct  Seq_meta
 
struct  Seq_pos
 
struct  Seq_qual_record
 
struct  Seq_record
 
class  Seq_store
 Store sequence and related information. More...
 
class  Step_variant_store
 
class  Summary_visitor
 
class  Summary_visitor2
 
struct  Type_index
 
class  Unambiguous_interval_iter
 
class  Value_ids_emid
 Provides access to standard eMID values. More...
 
class  Value_ids_paired
 Provides access to standard values for paired read pipeline. More...
 
class  Value_ids_single
 Provides access to standard values for single read pipeline. More...
 
class  Value_map
 Store values mapped against name strings and value IDs. More...
 
struct  Value_names
 
class  Variable_path
 
class  Vm_access_paired
 Access to value map for paired read pipeline and processing steps. More...
 
class  Vm_access_paired_emid
 Access to value map for paired eMID read pipeline and processing steps. More...
 
class  Vm_access_single
 
class  Write_seq
 
class  Write_value
 

Typedefs

typedef boost::variant< Blank, bool, long, double, std::string, sequence_interval, Qual_record::qualityvalue_variant
 
typedef boost::numeric::interval< int, boost::numeric::interval_lib::policies< boost::numeric::interval_lib::rounded_math< int >, detail::Interval_checking_policy< int > > > sequence_interval
 
typedef Seq_pos< Read_id > sub_seq
 
typedef Seq_pos< Seq_id > super_seq
 
typedef boost::multi_array< int, 2 > scoring_matrix_t
 
typedef boost::make_recursive_variant_over< step::paired_read_vector >::type step_variant_paired
 
typedef boost::make_recursive_variant_over< step::paired_emid_read_vector >::type step_variant_paired_emid
 
typedef boost::make_recursive_variant_over< step::single_read_vector >::type step_variant_single
 
typedef boost::make_recursive_variant_over< step::all_steps_vector >::type step_variant_all
 

Enumerations

enum  Nucleotide {
  Adenine = 0, Cytosine = 1, Guanine = 2, Thymine = 3,
  Any = 4, Uracil = 5, Purine = 6, Pyrimidine = 7,
  Ketone = 8, Amine = 9, Strong = 10, Weak = 11,
  not_A = 12, not_C = 13, not_G = 14, not_T = 15
}
 

Functions

std::size_t hash_value (Blank const &)
 
template<class ChT , class Tr >
std::basic_ostream< ChT, Tr > & operator<< (std::basic_ostream< ChT, Tr > &os, Blank const &)
 
std::string const & variable_type_str (const int which)
 
bool is_blank (value_variant const &vv)
 
std::string const & variable_type_str (value_variant const &vv)
 
VDJ_PIPE_DECL value_variant parse_variant (std::string const &s)
 
VDJ_PIPE_DECL void store_values (Seq_file_entry const &sfe, Value_map &vm)
 
VDJ_PIPE_DECL compression::Compression extension_to_compression (std::string const &ext)
 guess file compression from low case extension string More...
 
VDJ_PIPE_DECL compression::Compression compression_magic (std::istream &is)
 guess file compression from magic number More...
 
VDJ_PIPE_DECL format::Format extension_to_format (std::string const &ext)
 guess file format from low case extension string More...
 
VDJ_PIPE_DECL void path_decompose (std::string const &path, std::vector< std::string > &tv, std::vector< std::string > &nv)
 separate path into non-name and name areas More...
 
VDJ_PIPE_DECL std::string path_assemble (std::vector< std::string > const &templ, detail::Queable_ofstream_types::val_ref_vector const &vals)
 assemble path from template and values More...
 
sequence_interval sequence_interval_invalid ()
 
template<class MinLength >
void remove_subsequences (Seq_store &ss, gdst::Gdst &st, MinLength const &min_len)
 identify unique sequences in sequence store, insert them into suffix tree, and remove non-unique ones
 
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_split (std::basic_ostream< Ch, Tr > &os, const boost::string_ref str, const Ch delim, const unsigned length)
 print string splitting it into parts of equal length More...
 
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_fasta (std::basic_ostream< Ch, Tr > &os, std::string const &name, Seq_record::sequence const &sequence, const unsigned length=80)
 print FASTA record More...
 
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_fasta (std::basic_ostream< Ch, Tr > &os, const std::string &name, const boost::string_ref sequence, const unsigned length=80)
 print FASTA record More...
 
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_qual (std::basic_ostream< Ch, Tr > &os, std::string const &name, Qual_record::quality const &qual, const unsigned length=80)
 print QUAL record More...
 
template<class Ch , class Tr >
std::basic_ostream< Ch, Tr > & print_fastq (std::basic_ostream< Ch, Tr > &os, std::string const &name, Seq_record::sequence const &sequence, Qual_record::quality const &qual, const int offset=33)
 print FASTQ record More...
 
VDJ_PIPE_DECL std::string sanitize (const char c)
 
VDJ_PIPE_DECL std::string sanitize (std::string const &str)
 
VDJ_PIPE_DECL std::string sanitize (const boost::string_ref str, const std::size_t max_len)
 
template<class Seq >
int identity (Seq const &s1, const boost::string_ref s2, scoring_matrix_t const &sm, const std::size_t=0)
 
template<unsigned N, typename S >
int identity (const Seq_fls< N, S > seq1, const Seq_fls< N, S > seq2, scoring_matrix_t const &sm, const unsigned len=N)
 
template<class ChT , class Tr >
std::basic_ostream< ChT, Tr > & operator<< (std::basic_ostream< ChT, Tr > &os, Qual_record::quality const &qual)
 
bool operator== (Seq_entry const &se1, Seq_entry const &se2)
 
bool operator== (const boost::string_ref se1, Seq_entry const &se2)
 
bool operator== (Seq_entry const &se1, const boost::string_ref se2)
 
std::size_t hash_value (Seq_entry const &se)
 
std::pair< std::size_t, std::size_t > longest_average_interval (Qual_record::quality const &q, const std::size_t min_q, const std::size_t win)
 
std::pair< std::size_t, std::size_t > longest_min_interval (Qual_record::quality const &q, const Qual_record::quality::value_type min_q)
 
std::pair< std::size_t, std::size_t > longest_unambiguous_interval (Seq_record::sequence const &seq, const std::size_t max)
 
template<class Id >
bool operator== (Seq_pos< Id > const &rp1, Seq_pos< Id > const &rp2)
 
template<class Id >
bool operator< (Seq_pos< Id > const &rp1, Seq_pos< Id > const &rp2)
 
template<class Id >
bool operator< (Seq_pos< Id > const &rp1, const Id rp2)
 
template<class Id >
bool operator< (const Id rp1, Seq_pos< Id > const &rp2)
 
VDJ_PIPE_DECL compression::Compression guess_compression_ext (std::string const &path)
 
VDJ_PIPE_DECL compression::Compression guess_compression_magic (std::string const &path)
 
VDJ_PIPE_DECL std::pair< compression::Compression, format::Formatguess_compression_format (std::string const &path)
 
VDJ_PIPE_DECL format::Format guess_format (std::string const &path, const compression::Compression c)
 
VDJ_PIPE_DECL std::string ensure_path_writable (std::string const &path)
 create file if does not exist along with parent directories if needed
 
VDJ_PIPE_DECL std::string ensure_path_writable (std::string const &path, std::string const &header)
 if file does not exist, create and write header; create parent directories if needed
 
VDJ_PIPE_DECL bool is_path_readable (std::string const &path)
 
VDJ_PIPE_DECL std::string ensure_path_readable (std::string const &path)
 
VDJ_PIPE_DECL std::size_t hash_value (File const &f)
 
Nucleotide nucleotide_index (const char c)
 
bool is_ambiguous (const char c)
 
Nucleotide complement (const Nucleotide n)
 
char complement (const char c)
 
char to_capital (const Nucleotide n)
 
char to_small (const Nucleotide n)
 
template<int Match, int Mismatch, int Approximate, int Uncertain>
scoring_matrix_t const & scoring_matrix ()
 
int identity (const Nucleotide n1, const Nucleotide n2, scoring_matrix_t const &sm=scoring_matrix< 2,-2, 1, 0 >())
 
template<class Range >
std::string complement (Range const &r)
 
std::string transform (const boost::string_ref seq, sequence_interval const &si, const bool reverse)
 Reverse-complement (optionally) a portion of a sequence.
 
template<class Ostr >
Ostr & operator<< (Ostr &ostr, const Read_info si)
 
VDJ_PIPE_DECL std::ostream & stamp (std::ostream &os, std::string const &pref="## ", std::string const &delim="; ", std::string const &suff="\n")
 
VDJ_PIPE_DECL std::string stamp (std::string const &pref="## ", std::string const &delim="; ", std::string const &suff="\n")
 
VDJ_PIPE_DECL void process_options (boost::property_tree::ptree const &pt)
 
VDJ_PIPE_DECL void process_single_reads (boost::property_tree::ptree const &pt)
 
VDJ_PIPE_DECL void process_paired_reads (boost::property_tree::ptree const &pt)
 
VDJ_PIPE_DECL void process_paired_emid_reads (boost::property_tree::ptree const &pt)
 
VDJ_PIPE_DECL void merge (std::string const &s1, Seq_qual_record::quality const &qv1, std::string const &s2, Seq_qual_record::quality const &qv2, const unsigned min_score, Merge_result &mr)
 
 VDJ_PIPE_OBJECT_ID (Val_id)
 
 VDJ_PIPE_OBJECT_ID (Mid_id)
 
 VDJ_PIPE_OBJECT_ID (Path_id)
 
 VDJ_PIPE_OBJECT_ID (Read_id)
 
 VDJ_PIPE_OBJECT_ID (Seq_id)
 
 VDJ_PIPE_OBJECT_ID (Mapped_id)
 
template<class Id >
boost::iterator_range< Id_iterator< Id > > id_range (const Id id1, const Id id2)
 
template<typename T >
void unused_variable (T const &)
 
VDJ_PIPE_DECL step_variant_paired create_step_paired (Vm_access_paired const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 
template<>
Config_paired_reads::processing_step create_step< Config_paired_reads > (Config_paired_reads::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 
VDJ_PIPE_DECL step_variant_single create_step_single (Vm_access_single const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 
template<>
Config_single_reads::processing_step create_step< Config_single_reads > (Config_single_reads::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 
VDJ_PIPE_DECL step_variant_paired_emid create_step_paired_emid (Vm_access_paired_emid const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 
template<>
Config_paired_emid_reads::processing_step create_step< Config_paired_emid_reads > (Config_paired_emid_reads::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 
template<class Config >
Config::processing_step create_step (typename Config::value_map_access const &vma, boost::property_tree::ptree const &pt, Pipe_environment &pe)
 

Detailed Description

Main namespace of vdj_pipe library.

Enumeration Type Documentation

Enumerator
Uracil 

U.

Purine 

R : A, G.

Pyrimidine 

Y : C, T, U.

Ketone 

K : G, T, U.

Amine 

M : A, C.

Strong 

S : C, G.

Weak 

W : A, T, U.

not_A 

B : C G T.

not_C 

D : A G T.

not_G 

H : A C T.

not_T 

V : A C G.

Function Documentation

VDJ_PIPE_DECL compression::Compression vdj_pipe::compression_magic ( std::istream &  is)

guess file compression from magic number

Example ".gz" ".bz2"

VDJ_PIPE_DECL std::string vdj_pipe::ensure_path_readable ( std::string const &  path)
Returns
canonical path name
Exceptions
ifpath not readable
VDJ_PIPE_DECL compression::Compression vdj_pipe::extension_to_compression ( std::string const &  ext)

guess file compression from low case extension string

Example ".gz" ".bz2"

VDJ_PIPE_DECL format::Format vdj_pipe::extension_to_format ( std::string const &  ext)

guess file format from low case extension string

Example ".fq" ".fasta"

VDJ_PIPE_DECL compression::Compression vdj_pipe::guess_compression_ext ( std::string const &  path)
Returns
compression based on file extension
VDJ_PIPE_DECL std::pair<compression::Compression, format::Format> vdj_pipe::guess_compression_format ( std::string const &  path)
Returns
file compression and format based on file extension
VDJ_PIPE_DECL compression::Compression vdj_pipe::guess_compression_magic ( std::string const &  path)
Returns
compression based on file magic number
VDJ_PIPE_DECL format::Format vdj_pipe::guess_format ( std::string const &  path,
const compression::Compression  c 
)
Returns
file compression and format based on file extension
template<class Seq >
int vdj_pipe::identity ( Seq const &  s1,
const boost::string_ref  s2,
scoring_matrix_t const &  sm,
const std::size_t  = 0 
)
inline

compute identity score

template<unsigned N, typename S >
int vdj_pipe::identity ( const Seq_fls< N, S >  seq1,
const Seq_fls< N, S >  seq2,
scoring_matrix_t const &  sm,
const unsigned  len = N 
)
inline

compute identity score

VDJ_PIPE_DECL bool vdj_pipe::is_path_readable ( std::string const &  path)
Returns
true if readable
std::pair<std::size_t,std::size_t> vdj_pipe::longest_average_interval ( Qual_record::quality const &  q,
const std::size_t  min_q,
const std::size_t  win 
)
inline
Returns
starting position and length of the longest interval in which the sum of every sub-interval of length win is at least min_q
std::pair<std::size_t,std::size_t> vdj_pipe::longest_min_interval ( Qual_record::quality const &  q,
const Qual_record::quality::value_type  min_q 
)
inline
Returns
starting position and length of the longest interval in which every element is at least min_q
std::pair<std::size_t,std::size_t> vdj_pipe::longest_unambiguous_interval ( Seq_record::sequence const &  seq,
const std::size_t  max 
)
inline
Returns
starting position and length of the longest interval that contains fewer than max ambiguous nucleotides
VDJ_PIPE_DECL std::string vdj_pipe::path_assemble ( std::vector< std::string > const &  templ,
detail::Queable_ofstream_types::val_ref_vector const &  vals 
)

assemble path from template and values

Example: out_dir/{name1}-dir/{name2}file.fna

VDJ_PIPE_DECL void vdj_pipe::path_decompose ( std::string const &  path,
std::vector< std::string > &  tv,
std::vector< std::string > &  nv 
)

separate path into non-name and name areas

Example: out_dir/{name1}-dir/{name2}file.fna

template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_fasta ( std::basic_ostream< Ch, Tr > &  os,
std::string const &  name,
Seq_record::sequence const &  sequence,
const unsigned  length = 80 
)
inline

print FASTA record

Parameters
osoutput stream
nameFASTA description string
sequenceFASTA sequence string
lengthsequence line length
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_fasta ( std::basic_ostream< Ch, Tr > &  os,
const std::string &  name,
const boost::string_ref  sequence,
const unsigned  length = 80 
)
inline

print FASTA record

Parameters
osoutput stream
nameFASTA description string
sequenceFASTA sequence string
lengthsequence line length
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_fastq ( std::basic_ostream< Ch, Tr > &  os,
std::string const &  name,
Seq_record::sequence const &  sequence,
Qual_record::quality const &  qual,
const int  offset = 33 
)
inline

print FASTQ record

Parameters
osoutput stream
nameFASTA description string
sequenceFASTA sequence string
qualquality scores
offsetquality score to char conversion offset
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_qual ( std::basic_ostream< Ch, Tr > &  os,
std::string const &  name,
Qual_record::quality const &  qual,
const unsigned  length = 80 
)
inline

print QUAL record

Parameters
osoutput stream
nameFASTA description string
qualquality scores
lengthquality line length
template<class Ch , class Tr >
std::basic_ostream<Ch,Tr>& vdj_pipe::print_split ( std::basic_ostream< Ch, Tr > &  os,
const boost::string_ref  str,
const Ch  delim,
const unsigned  length 
)
inline

print string splitting it into parts of equal length

Parameters
osoutput stream
strinput string
delimcharacters to insert between the parts of the string
lengthstring part length
vdj_pipe::VDJ_PIPE_OBJECT_ID ( Val_id  )

Mapped value ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Mid_id  )

Molecular identifier ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Path_id  )

Filesystem path ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Read_id  )

Sequencing read ID

vdj_pipe::VDJ_PIPE_OBJECT_ID ( Seq_id  )

Sequence ID