libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
aastringcodec.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/aastringcodec.cpp
3 * \date 09/05/2023
4 * \author Olivier Langella
5 * \brief code and decodefrom amino acid string to integer
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2023 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "aastringcodec.h"
29#include <QDebug>
30
31using namespace pappso;
32
33AaStringCodec::AaStringCodec(const AaCode &aaCode) : m_aaCode(aaCode)
34{
35
36 m_base = m_aaCode.getSize() + 1;
37 m_units.resize(10);
38 uint32_t unit = 1;
39 for(auto &this_unit : m_units)
40 {
41 this_unit = unit;
42 unit *= m_base;
43 }
44}
45
47 : m_aaCode(other.m_aaCode)
48{
49 m_base = other.m_base;
50 m_units = other.m_units;
51}
52
56
57
58uint32_t
59pappso::AaStringCodec::code(const QString &aa_str) const
60{
61
62 std::size_t pos = 0;
63 uint32_t code = 0;
64 for(auto &aa_char : aa_str)
65 {
66 code += m_aaCode.getAaCode(aa_char.toLatin1()) * m_units[pos];
67 pos++;
68 }
69 return code;
70}
71
72uint32_t
73pappso::AaStringCodec::codeLlc(const QString &aa_str) const
74{
75 std::vector<uint8_t> llc_vec;
76
77 for(auto &aa_char : aa_str)
78 {
79 llc_vec.push_back(m_aaCode.getAaCode(aa_char.toLatin1()));
80 }
81 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
82
83
84 std::size_t pos = 0;
85 uint32_t code = 0;
86 for(auto &aa_code : llc_vec)
87 {
88 code += (uint32_t)aa_code * m_units[pos];
89 pos++;
90 }
91 return code;
92}
93
94uint32_t
95pappso::AaStringCodec::codeLlc(std::vector<uint8_t>::const_iterator it_begin,
96 std::size_t size) const
97{
98 std::vector<uint8_t> llc_vec;
99
100 for(std::size_t i = 0; i < size; i++)
101 {
102 llc_vec.push_back(*it_begin);
103 it_begin++;
104 }
105 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
106
107
108 std::size_t pos = 0;
109 uint32_t code = 0;
110 for(auto &aa_code : llc_vec)
111 {
112 code += (uint32_t)aa_code * m_units[pos];
113 pos++;
114 }
115 return code;
116}
117
118
119QString
121{
122 QString aa_suite;
123
124 while(code > 0)
125 {
126 aa_suite.append(m_aaCode.getAa((uint8_t)(code % m_base)).getLetter());
127 code /= m_base;
128 }
129
130 // qDebug() << aa_suite;
131
132 return aa_suite;
133}
134
135double
137{
138 double mass = 0;
139
140 while(code > 0)
141 {
142 mass += m_aaCode.getMass((uint8_t)(code % m_base));
143 code /= m_base;
144 }
145
146 return mass;
147}
148
149
150std::vector<CodeToMass>
152 std::size_t size) const
153{
154 std::vector<CodeToMass> llc_list;
155 if(size == 0)
156 return llc_list;
157 std::vector<uint8_t> model;
158 for(uint8_t p = 1; p <= size; p++)
159 {
160 model.resize(p, 0);
161
162 for(uint8_t i = 1; i < m_base; i++)
163 {
164 model[0] = i;
165 if(p == 1)
166 {
167 llc_list.push_back(generateCodeMassFromModel(model));
168 }
169 else
170 {
171 recGenerateModel(llc_list, model, 1);
172 }
173 }
174 }
175 return llc_list;
176}
177
178
179std::vector<CodeToMass>
181 std::size_t size) const
182{
183 std::vector<CodeToMass> llc_list;
184 if(size == 0)
185 return llc_list;
186 std::vector<uint8_t> model;
187 model.resize(size, 0);
188
189 for(uint8_t i = 1; i < m_base; i++)
190 {
191 model[0] = i;
192 recGenerateModel(llc_list, model, 1);
193 }
194 return llc_list;
195}
196
197void
198pappso::AaStringCodec::recGenerateModel(std::vector<CodeToMass> &glist,
199 std::vector<uint8_t> &model,
200 std::size_t position) const
201{
202 if(position == model.size())
203 return;
204
205 if(position == model.size() - 1)
206 {
207 uint8_t max = model[position - 1];
208 for(uint8_t i = 1; i <= max; i++)
209 {
210 model[position] = i;
211 glist.push_back(generateCodeMassFromModel(model));
212 }
213 }
214 else
215 {
216 uint8_t max = model[position - 1];
217 for(uint8_t i = 1; i <= max; i++)
218 {
219 model[position] = i;
220 recGenerateModel(glist, model, position + 1);
221 }
222 }
223}
224
227 const std::vector<uint8_t> &model) const
228{
229 CodeToMass code_mass;
230 std::size_t pos = 0;
231 for(auto aacode : model)
232 {
233 code_mass.mass += m_aaCode.getMass(aacode);
234
235 code_mass.code += (uint32_t)aacode * m_units[pos];
236 pos++;
237 }
238
239 // qDebug() << code_mass.code << " " << code_mass.mass;
240 return code_mass;
241}
242
243
244std::size_t
246{
247
248 std::size_t code = 0;
249 for(std::size_t pos = 0; pos < size; pos++)
250 {
251 code += (std::size_t)(m_base - 1) * (std::size_t)m_units[pos];
252 }
253 return code;
254}
255
256bool
258 const std::vector<uint8_t> &aa_ok) const
259{
260
261 while(code > 0)
262 {
263 if(std::find(aa_ok.begin(), aa_ok.end(), (uint8_t)(code % m_base)) ==
264 aa_ok.end())
265 return false;
266
267 code /= m_base;
268 }
269 return true;
270}
271
272const pappso::AaCode &
274{
275 return m_aaCode;
276}
code and decodefrom amino acid string to integer
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:43
std::size_t getSize() const
Definition aacode.cpp:74
std::size_t getLimitMax(std::size_t size) const
get the maximum code number for a given peptide size
double getMass(uint32_t code) const
const AaCode & getAaCode() const
QString decode(uint32_t code) const
uint32_t codeLlc(const QString &aa_str) const
get the lowest common denominator integer from amino acide suite string
void recGenerateModel(std::vector< CodeToMass > &glist, std::vector< uint8_t > &model, std::size_t position) const
recursive method to generate models
std::vector< uint32_t > m_units
uint32_t code(const QString &aa_str) const
get integer from amino acide suite string
const AaCode & m_aaCode
CodeToMass generateCodeMassFromModel(const std::vector< uint8_t > &model) const
AaStringCodec(const AaCode &aaCode)
std::vector< CodeToMass > generateLlcCodeListByMaxPeptideSize(std::size_t size) const
generates all possible combination of llc code mass llc : the lowest common code denominator for a gi...
bool codeOnlyContains(uint32_t code, const std::vector< uint8_t > &aa_ok) const
std::vector< CodeToMass > generateLlcCodeListUpToMaxPeptideSize(std::size_t size) const
generates all possible combination of llc code mass llc : the lowest common code denominator for a gi...
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ max
maximum of intensities