vdj_pipe
pipeline for processing DNA sequence data
Classes | Public Types | Public Member Functions | Friends | List of all members
vdj_pipe::gdst::Gdst Class Reference

Generalized DNA suffix tree. More...

#include <gdst.hpp>

Classes

struct  Err
 

Public Types

typedef detail::Id_map< Seq_id, Seq_entryseq_map
 
typedef boost::shared_ptr< seq_mapseq_map_ptr
 
typedef Seq_pos< Seq_id > match_type
 
typedef std::vector< match_typematch_vector
 

Public Member Functions

 Gdst (seq_map_ptr ss)
 
std::size_t size () const
 
Depth_iter depth_first () const
 
Branch_id root () const
 
seq_map const & sequence_map () const
 
seq_mapsequence_map ()
 
Branch const & operator[] (const Branch_id bid) const
 
Leaf const & operator[] (const Leaf_id lid) const
 
Seq_entry const & operator[] (const Seq_id sid) const
 
Leaf const & leaf (const Branch_id bid) const
 
const seq_type suffix (const Seq_id sid, const unsigned len) const
 
const seq_type suffix (const Leaf_id lid, const unsigned len) const
 
const seq_type suffix (Branch const &b) const
 
const seq_type suffix (const Branch_id bid) const
 
Nucleotide letter (const Branch_id bid, const unsigned i) const
 
Nucleotide letter (const Branch_id bid, const Nucleotide n, const unsigned i) const
 
Branch_id child (const Branch_id bid1, const Nucleotide n) const
 
Branch_id child (const Branch_id bid1, const seq_type seq) const
 
void child (const Branch_id bid1, const Nucleotide n, const Branch_id bid2)
 
Common_subseq find_longest (const seq_type seq, std::size_t min_len=0) const
 
void find_overlaping (const boost::string_ref seq, detail::Vector_set< Seq_id > &vs, std::size_t min_overlap=0) const
 
Match find (const seq_type seq, const Branch_id bid, unsigned min_d=0) const
 
void insert (const Seq_id sid)
 

Friends

class Gdst_stats
 
class Common_substrings
 
class Ukkonen_inserter
 

Detailed Description

Generalized DNA suffix tree.

Member Function Documentation

Common_subseq vdj_pipe::gdst::Gdst::find_longest ( const seq_type  seq,
std::size_t  min_len = 0 
) const

find longest common substring

Parameters
seqDNA sequence, no ambiguous characters
min_lenminimal common substring length; if min_len == 0, only complete matches are considered
void vdj_pipe::gdst::Gdst::find_overlaping ( const boost::string_ref  seq,
detail::Vector_set< Seq_id > &  vs,
std::size_t  min_overlap = 0 
) const
Parameters
seqsequence of non-ambiguous nucleotides
min_overlapminimal overlap size; if min_overlap == 0, only fully matching sequences will be returned
Returns
set of sequence IDs that have overlaps with seq

The documentation for this class was generated from the following file: