7 #ifndef SEQUENCE_FLS_HPP_ 8 #define SEQUENCE_FLS_HPP_ 11 #include "boost/assert.hpp" 12 #include "boost/cstdint.hpp" 13 #include "boost/integer/static_log2.hpp" 24 template<
unsigned N = 4,
typename S = boost::u
int_least32_t>
class Seq_fls {
25 static const unsigned BITS_PER_CHAR = boost::static_log2<N>::value;
26 static const unsigned MAX_CHARS =
sizeof(S) * 8 / BITS_PER_CHAR;
27 static const unsigned MASK = ~(~0 << BITS_PER_CHAR);
30 static const unsigned alphabet_size = N;
31 static std::size_t length() {
return MAX_CHARS;}
36 explicit Seq_fls(
const boost::string_ref seq) : val_(0) {
40 Seq_fls(self_type
const& ss) : val_(ss.val_) {}
41 std::size_t size()
const {
return MAX_CHARS;}
42 self_type& operator=(self_type
const& ss) {
47 self_type& operator=(
const boost::string_ref seq) {
53 bool operator==(self_type
const& sf)
const {
return val_ == sf.val_;}
54 bool operator!=(self_type
const& sf)
const {
return !(*
this == sf);}
56 Nucleotide operator[](
const std::size_t i)
const {
57 BOOST_ASSERT(i < MAX_CHARS);
58 return Nucleotide((val_>> i * BITS_PER_CHAR) & MASK);
61 void set(
const unsigned i,
const Nucleotide n) {
62 BOOST_ASSERT(i < MAX_CHARS);
63 BOOST_ASSERT(n < alphabet_size);
64 val_ &= ~(MASK << i * BITS_PER_CHAR);
65 val_ |= n << i * BITS_PER_CHAR;
68 std::string to_string(std::size_t n = self_type::length())
const {
69 std::string s(n,
'X');
70 n = std::min(self_type::length(), n);
71 for(std::size_t i = 0; i != n; ++i) {
72 s[i] = to_capital(this->
operator[](i));
79 std::size_t n = self_type::length()
81 n = std::min(self_type::length(), n);
82 for(std::size_t i = 0; i != n; ++i) {
83 os << to_capital(this->
operator[](i));
91 void assign(
const boost::string_ref seq) {
92 if( seq.size() > size() ) BOOST_THROW_EXCEPTION(
94 << base_exception::msg_t(
"sequence too long")
95 << base_exception::int1_t(seq.size())
96 << base_exception::int2_t(size())
98 for(
unsigned i = seq.size(); i != 0; --i) {
99 const Nucleotide n = nucleotide_index(seq[i-1]);
100 if( n >= alphabet_size ) {
104 BOOST_THROW_EXCEPTION(
106 << base_exception::msg_t(
"character not supported")
107 << base_exception::str1_t(sanitize(seq[i-1]))
110 val_ <<= BITS_PER_CHAR;
115 friend inline std::size_t hash_value(self_type
const& seq) {
return seq.val_;}
117 friend std::ostream& operator<<(std::ostream& os, self_type
const& seq) {
118 return seq.print(os);
128 friend bool assign(self_type& seq1,
const boost::string_ref seq2) {
129 if( seq2.size() > length() )
return false;
131 for(
unsigned i = seq2.size(); i != 0; --i) {
132 const Nucleotide n = nucleotide_index(seq2[i-1]);
133 if( n >= alphabet_size )
return false;
134 seq1.val_ <<= BITS_PER_CHAR;
143 template<
class Seq>
inline int 146 const boost::string_ref s2,
148 const std::size_t = 0
151 for(std::size_t n = 0; n != s1.size() && n != s2.size(); ++n) {
153 const Nucleotide n2 = nucleotide_index(s2[n]);
161 template<
unsigned N,
typename S>
inline int 166 const unsigned len = N
169 for(
unsigned j = 0; j != len; ++j ) {
170 i +=
identity(seq1[j], seq2[j], sm);
store short sequence in an integer
Definition: sequence_fls.hpp:24
int identity(Seq const &s1, const boost::string_ref s2, scoring_matrix_t const &sm, const std::size_t=0)
Definition: sequence_fls.hpp:144
friend bool assign(self_type &seq1, const boost::string_ref seq2)
Definition: sequence_fls.hpp:128
Definition: find_shared.hpp:22
Nucleotide
Definition: nucleotide_index.hpp:24
Main namespace of vdj_pipe library.
Definition: sequence_file.hpp:14
Definition: exception.hpp:23