vdj_pipe
pipeline for processing DNA sequence data
Classes | Public Types | Public Member Functions | Private Types | Private Member Functions | Private Attributes | Friends | List of all members
vdj_pipe::gdst::Gdst Class Reference

Generalized DNA suffix tree. More...

#include <gdst.hpp>

Collaboration diagram for vdj_pipe::gdst::Gdst:
Collaboration graph
[legend]

Classes

struct  Err
 

Public Types

typedef detail::Id_map< Seq_id, Seq_entryseq_map
 
typedef boost::shared_ptr< seq_mapseq_map_ptr
 
typedef Seq_pos< Seq_id > match_type
 
typedef std::vector< match_typematch_vector
 

Public Member Functions

 Gdst ()
 
 Gdst (seq_map_ptr ss)
 
std::size_t size () const
 
Depth_iter depth_first () const
 
Branch_id root () const
 
seq_map const & sequence_map () const
 
seq_mapsequence_map ()
 
Branch const & operator[] (const Branch_id bid) const
 
Leaf const & operator[] (const Leaf_id lid) const
 
Seq_entry const & operator[] (const Seq_id sid) const
 
Leaf const & leaf (const Branch_id bid) const
 
const seq_type suffix (const Seq_id sid, const unsigned len) const
 
const seq_type suffix (const Leaf_id lid, const unsigned len) const
 
const seq_type suffix (Branch const &b) const
 
const seq_type suffix (const Branch_id bid) const
 
Nucleotide letter (const Branch_id bid, const unsigned i) const
 
Nucleotide letter (const Branch_id bid, const Nucleotide n, const unsigned i) const
 
Branch_id child (const Branch_id bid1, const Nucleotide n) const
 
Branch_id child (const Branch_id bid1, const seq_type seq) const
 
void child (const Branch_id bid1, const Nucleotide n, const Branch_id bid2)
 
Common_subseq find_longest (const seq_type seq, std::size_t min_len=0) const
 
void find_overlaping (const boost::string_ref seq, detail::Vector_set< Seq_id > &vs, std::size_t min_overlap=0) const
 
Match find (const seq_type seq, const Branch_id bid, unsigned min_d=0) const
 
void insert (const Seq_id sid)
 

Private Types

typedef detail::Id_map< Branch_id, Branchbranch_map
 
typedef detail::Id_map< Leaf_id, Leafleaf_map
 
typedef detail::Id_map< Children_id, Childrenchildren_map
 
typedef boost::string_ref seq_type
 

Private Member Functions

void collect_sequences (const gdst::Branch_id nid, detail::Vector_set< Seq_id > &vs) const
 
void suffix_link (const Branch_id bid1, const Branch_id bid2)
 
Branch_id suffix_link (const Branch_id bid) const
 
unsigned edge_length (const Branch_id bid, const Nucleotide n) const
 
unsigned edge_length (const Branch_id bid, const seq_type suff) const
 
void check_edge (Branch_id &an, unsigned &aei, unsigned &al, const boost::string_ref seq) const
 
void add_to_leaf (const Branch_id bid, const Seq_id sid)
 
Branch_id leaf_from_branch (const Branch_id bid1, const Nucleotide n1, const unsigned i, const Seq_id sid)
 
Branch_id split_edge (const Branch_id bid1, const Nucleotide n1, const unsigned i, const Nucleotide n2)
 

Private Attributes

seq_map_ptr ss_
 
branch_map bm_
 
children_map cm_
 
leaf_map lm_
 
Branch_id root_
 

Friends

class Gdst_stats
 
class Common_substrings
 
class Ukkonen_inserter
 

Detailed Description

Generalized DNA suffix tree.

Member Typedef Documentation

typedef boost::shared_ptr<seq_map> vdj_pipe::gdst::Gdst::seq_map_ptr
typedef boost::string_ref vdj_pipe::gdst::Gdst::seq_type
private

Constructor & Destructor Documentation

vdj_pipe::gdst::Gdst::Gdst ( )
inline
vdj_pipe::gdst::Gdst::Gdst ( seq_map_ptr  ss)
inlineexplicit

Member Function Documentation

void vdj_pipe::gdst::Gdst::add_to_leaf ( const Branch_id  bid,
const Seq_id  sid 
)
inlineprivate
void vdj_pipe::gdst::Gdst::check_edge ( Branch_id &  an,
unsigned &  aei,
unsigned &  al,
const boost::string_ref  seq 
) const
inlineprivate
Branch_id vdj_pipe::gdst::Gdst::child ( const Branch_id  bid1,
const Nucleotide  n 
) const
inline
Branch_id vdj_pipe::gdst::Gdst::child ( const Branch_id  bid1,
const seq_type  seq 
) const
inline
void vdj_pipe::gdst::Gdst::child ( const Branch_id  bid1,
const Nucleotide  n,
const Branch_id  bid2 
)
inline
void vdj_pipe::gdst::Gdst::collect_sequences ( const gdst::Branch_id  nid,
detail::Vector_set< Seq_id > &  vs 
) const
inlineprivate
Depth_iter vdj_pipe::gdst::Gdst::depth_first ( ) const
inline
unsigned vdj_pipe::gdst::Gdst::edge_length ( const Branch_id  bid,
const Nucleotide  n 
) const
inlineprivate
unsigned vdj_pipe::gdst::Gdst::edge_length ( const Branch_id  bid,
const seq_type  suff 
) const
inlineprivate
Match vdj_pipe::gdst::Gdst::find ( const seq_type  seq,
const Branch_id  bid,
unsigned  min_d = 0 
) const
inline
Common_subseq vdj_pipe::gdst::Gdst::find_longest ( const seq_type  seq,
std::size_t  min_len = 0 
) const

find longest common substring

Parameters
seqDNA sequence, no ambiguous characters
min_lenminimal common substring length; if min_len == 0, only complete matches are considered
void vdj_pipe::gdst::Gdst::find_overlaping ( const boost::string_ref  seq,
detail::Vector_set< Seq_id > &  vs,
std::size_t  min_overlap = 0 
) const
Parameters
seqsequence of non-ambiguous nucleotides
min_overlapminimal overlap size; if min_overlap == 0, only fully matching sequences will be returned
Returns
set of sequence IDs that have overlaps with seq
void vdj_pipe::gdst::Gdst::insert ( const Seq_id  sid)
Leaf const& vdj_pipe::gdst::Gdst::leaf ( const Branch_id  bid) const
inline
Branch_id vdj_pipe::gdst::Gdst::leaf_from_branch ( const Branch_id  bid1,
const Nucleotide  n1,
const unsigned  i,
const Seq_id  sid 
)
inlineprivate
Nucleotide vdj_pipe::gdst::Gdst::letter ( const Branch_id  bid,
const unsigned  i 
) const
inline
Nucleotide vdj_pipe::gdst::Gdst::letter ( const Branch_id  bid,
const Nucleotide  n,
const unsigned  i 
) const
inline
Branch const& vdj_pipe::gdst::Gdst::operator[] ( const Branch_id  bid) const
inline
Leaf const& vdj_pipe::gdst::Gdst::operator[] ( const Leaf_id  lid) const
inline
Seq_entry const& vdj_pipe::gdst::Gdst::operator[] ( const Seq_id  sid) const
inline
Branch_id vdj_pipe::gdst::Gdst::root ( ) const
inline
seq_map const& vdj_pipe::gdst::Gdst::sequence_map ( ) const
inline
seq_map& vdj_pipe::gdst::Gdst::sequence_map ( )
inline
std::size_t vdj_pipe::gdst::Gdst::size ( ) const
inline
Branch_id vdj_pipe::gdst::Gdst::split_edge ( const Branch_id  bid1,
const Nucleotide  n1,
const unsigned  i,
const Nucleotide  n2 
)
inlineprivate
const seq_type vdj_pipe::gdst::Gdst::suffix ( const Seq_id  sid,
const unsigned  len 
) const
inline
const seq_type vdj_pipe::gdst::Gdst::suffix ( const Leaf_id  lid,
const unsigned  len 
) const
inline
const seq_type vdj_pipe::gdst::Gdst::suffix ( Branch const &  b) const
inline
const seq_type vdj_pipe::gdst::Gdst::suffix ( const Branch_id  bid) const
inline
void vdj_pipe::gdst::Gdst::suffix_link ( const Branch_id  bid1,
const Branch_id  bid2 
)
inlineprivate
Branch_id vdj_pipe::gdst::Gdst::suffix_link ( const Branch_id  bid) const
inlineprivate

Friends And Related Function Documentation

friend class Common_substrings
friend
friend class Gdst_stats
friend
friend class Ukkonen_inserter
friend

Member Data Documentation

branch_map vdj_pipe::gdst::Gdst::bm_
private
children_map vdj_pipe::gdst::Gdst::cm_
private
leaf_map vdj_pipe::gdst::Gdst::lm_
private
Branch_id vdj_pipe::gdst::Gdst::root_
private
seq_map_ptr vdj_pipe::gdst::Gdst::ss_
private

The documentation for this class was generated from the following files: