7 #ifndef NUCLEOTIDE_INDEX_HPP_ 8 #define NUCLEOTIDE_INDEX_HPP_ 9 #include "boost/algorithm/string/predicate_facade.hpp" 10 #include "boost/assert.hpp" 14 #define BOOST_DISABLE_ASSERTS 16 #include "boost/multi_array.hpp" 20 #define VDJ_PIPE_N_NUCLEOTIDES 16 45 inline Nucleotide nucleotide_index(
const char c) {
96 BOOST_THROW_EXCEPTION(
98 << base_exception::msg_t(
"invalid character")
99 << base_exception::str1_t(sanitize(c))
106 inline bool is_ambiguous(
const char c) {
143 BOOST_THROW_EXCEPTION(
145 << base_exception::msg_t(
"invalid character")
146 << base_exception::str1_t(sanitize(c))
154 BOOST_ASSERT((
int)n < VDJ_PIPE_N_NUCLEOTIDES);
156 case Adenine:
return Thymine;
157 case Cytosine:
return Guanine;
158 case Guanine:
return Cytosine;
159 case Thymine:
return Adenine;
160 case Uracil:
return Adenine;
171 case Any:
return Any;
178 inline char complement(
const char c) {
180 case 'A':
return 'T';
181 case 'a':
return 't';
182 case 'T':
return 'A';
183 case 't':
return 'a';
184 case 'C':
return 'G';
185 case 'c':
return 'g';
186 case 'G':
return 'C';
187 case 'g':
return 'c';
188 case 'U':
return 'A';
189 case 'u':
return 'a';
190 case 'R':
return 'Y';
191 case 'r':
return 'y';
192 case 'Y':
return 'R';
193 case 'y':
return 'r';
194 case 'K':
return 'M';
195 case 'k':
return 'm';
196 case 'M':
return 'K';
197 case 'm':
return 'k';
198 case 'S':
return 'W';
199 case 's':
return 'w';
200 case 'W':
return 'S';
201 case 'w':
return 's';
202 case 'N':
return 'N';
203 case 'n':
return 'n';
205 BOOST_THROW_EXCEPTION(
207 << base_exception::msg_t(
"invalid nucleotide symbol")
208 << base_exception::str1_t(sanitize(c))
217 BOOST_ASSERT((
int)n < VDJ_PIPE_N_NUCLEOTIDES);
219 case Adenine:
return 'A';
220 case Cytosine:
return 'C';
221 case Guanine:
return 'G';
222 case Thymine:
return 'T';
227 case Amine:
return 'M';
229 case Weak:
return 'W';
230 case not_A:
return 'B';
231 case not_C:
return 'D';
232 case not_G:
return 'H';
233 case not_T:
return 'V';
234 case Any:
return 'N';
242 BOOST_ASSERT((
int)n < VDJ_PIPE_N_NUCLEOTIDES);
244 case Adenine:
return 'a';
245 case Cytosine:
return 'c';
246 case Guanine:
return 'g';
247 case Thymine:
return 't';
252 case Amine:
return 'm';
254 case Weak:
return 'w';
255 case not_A:
return 'b';
256 case not_C:
return 'd';
257 case not_G:
return 'h';
258 case not_T:
return 'v';
259 case Any:
return 'n';
268 template<
int A,
int I,
int P,
int U>
269 inline scoring_matrix_t make_sm() {
271 m(boost::extents[VDJ_PIPE_N_NUCLEOTIDES][VDJ_PIPE_N_NUCLEOTIDES]);
273 static const int p = VDJ_PIPE_N_NUCLEOTIDES * VDJ_PIPE_N_NUCLEOTIDES;
279 A, I, I, I, U, I, P, I, I, P, I, P, I, P, P, P,
280 I, A, I, I, U, I, I, P, I, P, P, I, P, I, P, P,
281 I, I, A, I, U, I, P, I, P, I, P, I, P, P, I, P,
282 I, I, I, A, U, A, I, P, P, I, I, P, P, P, P, I,
283 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
284 I, I, I, A, U, A, I, P, P, I, I, P, P, P, P, I,
285 P, I, P, I, U, I, P, I, P, P, P, P, P, P, P, P,
286 I, P, I, P, U, P, I, P, P, P, P, P, P, P, P, P,
287 I, I, P, P, U, P, P, P, P, I, P, P, P, P, P, P,
288 P, P, I, I, U, I, P, P, I, P, P, P, P, P, P, P,
289 I, P, P, I, U, I, P, P, P, P, P, I, P, P, P, P,
290 P, I, I, P, U, P, P, P, P, P, I, P, P, P, P, P,
291 I, P, P, P, U, P, P, P, P, P, P, P, P, P, P, P,
292 P, I, P, P, U, P, P, P, P, P, P, P, P, P, P, P,
293 P, P, I, P, U, P, P, P, P, P, P, P, P, P, P, P,
294 P, P, P, I, U, I, P, P, P, P, P, P, P, P, P, P
298 std::copy(n, n + p, m.data());
306 template<
int Match,
int Mismatch,
int Approximate,
int Uncertain>
307 inline scoring_matrix_t
const& scoring_matrix() {
308 static const scoring_matrix_t m =
309 detail::make_sm<Match,Mismatch,Approximate,Uncertain>();
318 scoring_matrix_t
const& sm = scoring_matrix<2,-2,1,0>()
321 sm.shape()[0] == VDJ_PIPE_N_NUCLEOTIDES &&
322 sm.shape()[1] == VDJ_PIPE_N_NUCLEOTIDES
324 BOOST_ASSERT((
int)n1 < VDJ_PIPE_N_NUCLEOTIDES);
325 BOOST_ASSERT((
int)n2 < VDJ_PIPE_N_NUCLEOTIDES);
326 BOOST_ASSERT(sm[n1][n2] == sm[n2][n1]);
330 struct Is_ambiguous :
public boost::algorithm::predicate_facade<Is_ambiguous> {
332 typedef bool result_type;
333 bool operator() (
const char c)
const {
return vdj_pipe::is_ambiguous(c);}
int identity(Seq const &s1, const boost::string_ref s2, scoring_matrix_t const &sm, const std::size_t=0)
Definition: sequence_fls.hpp:144
R : A, G.
Definition: nucleotide_index.hpp:31
U.
Definition: nucleotide_index.hpp:30
Definition: find_shared.hpp:22
Nucleotide
Definition: nucleotide_index.hpp:24
K : G, T, U.
Definition: nucleotide_index.hpp:33
M : A, C.
Definition: nucleotide_index.hpp:34
V : A C G.
Definition: nucleotide_index.hpp:40
Definition: nucleotide_index.hpp:330
Main namespace of vdj_pipe library.
Definition: sequence_file.hpp:14
D : A G T.
Definition: nucleotide_index.hpp:38
S : C, G.
Definition: nucleotide_index.hpp:35
W : A, T, U.
Definition: nucleotide_index.hpp:36
Definition: exception.hpp:23
H : A C T.
Definition: nucleotide_index.hpp:39
B : C G T.
Definition: nucleotide_index.hpp:37
Y : C, T, U.
Definition: nucleotide_index.hpp:32