libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
10#include "timsmsfilereader.h"
11#include "xymsfilereader.h"
12
13
14#include "../exception/exceptionnotfound.h"
15#include "../exception/exceptionnotpossible.h"
16#include "../msrun/msrunid.h"
17#include "../msrun/private/timsframesmsrunreader.h"
18
19#include "../msrun/private/pwizmsrunreader.h"
20#include "../msrun/private/timsmsrunreader.h"
21#include "../msrun/private/timsmsrunreaderms2.h"
22#include "../msrun/xymsrunreader.h"
23
24#include "../utils.h"
25
26
27namespace pappso
28{
29
30
31MsFileAccessor::MsFileAccessor(const QString &file_name,
32 const QString &xml_prefix)
33 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
34{
35 QFile file(file_name);
36 if(!file.exists())
37 throw(ExceptionNotFound(QObject::tr("File %1 not found.")
38 .arg(QFileInfo(file_name).absoluteFilePath())));
39
40
42 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
44 "No nativeID format indicates that the file tagged with this term does not "
45 "contain spectra that can have a nativeID format.";
46}
47
48
50 : m_fileName(other.m_fileName),
51 m_xmlPrefix(other.m_xmlPrefix),
52 m_fileFormat(other.m_fileFormat),
53 m_fileReaderType(other.m_fileReaderType)
54{
56}
57
61
62
63const QString &
65{
66 return m_fileName;
67}
68
69
75
76const OboPsiModTerm
78{
79 OboPsiModTerm term;
80
81 // is_a: MS:1000560 ! mass spectrometer file format
82 switch(m_fileFormat)
83 {
85 term.m_accession = "MS:1001560";
86 term.m_name = "SCIEX TOF/TOF T2D format";
87 term.m_definition =
88 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
89 "export format.";
90 break;
92 term.m_accession = "MS:1000562";
93 term.m_name = "ABI WIFF format";
94 term.m_definition = "Applied Biosystems WIFF file format.";
95 break;
97 term.m_accession = "MS:1001509";
98 term.m_name = "Agilent MassHunter format";
99 term.m_definition =
100 "A data file format found in an Agilent MassHunter directory which "
101 "contains raw data acquired by an Agilent mass spectrometer.";
102 break;
104 break;
106 term.m_accession = "MS:1000825";
107 term.m_name = "Bruker FID format";
108 term.m_definition = "Bruker FID file format.";
109 break;
111 term.m_accession = "MS:1002817";
112 term.m_name = "Bruker TDF format";
113 term.m_definition = "Bruker TDF raw file format.";
114 break;
116 term.m_accession = "MS:1000567";
117 term.m_name = "Bruker/Agilent YEP format";
118 term.m_definition = "Bruker/Agilent YEP file format.";
119 break;
121 term.m_accession = "MS:1001062";
122 term.m_name = "Mascot MGF format";
123 term.m_definition = "Mascot MGF file format.";
124 break;
126 break;
128 term.m_accession = "MS:1001881";
129 term.m_name = "mz5 format";
130 term.m_definition = "mz5 file format, modelled after mzML.";
131 break;
133 term.m_accession = "MS:1000584";
134 term.m_name = "mzML format";
135 term.m_definition =
136 "Proteomics Standards Inititative mzML file format.";
137 break;
139 term.m_accession = "MS:1000566";
140 term.m_name = "ISB mzXML format";
141 term.m_definition = "Institute of Systems Biology mzXML file format.";
142 break;
144 break;
146
147 term.m_accession = "MS:1000563";
148 term.m_name = "Thermo RAW format";
149 term.m_definition = "Thermo Scientific RAW file format.";
150 break;
152 break;
154 term.m_accession = "MS:1000526";
155 term.m_name = "Waters raw format";
156 term.m_definition =
157 "Waters data file format found in a Waters RAW directory, generated "
158 "from an MS acquisition.";
159 break;
160 case MsDataFormat::xy:
161 term.m_accession = "MS:1001369";
162 term.m_name = "text format";
163 term.m_definition =
164 "Simple text file format of \"m/z [intensity]\" values for a PMF (or "
165 "single MS2) search.";
166 break;
167 default:
168 break;
169 }
170
171 return term;
172}
173
174
175const OboPsiModTerm &
182
183
184std::vector<MsRunIdCstSPtr>
186{
187 // qDebug();
188
189 // Try the PwizMsFileReader
190
191 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
192
193 std::vector<MsRunIdCstSPtr> ms_run_ids =
194 pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
195 if(ms_run_ids.size())
196 {
197 // qDebug() << "Might well be handled using the Pwiz code.";
198 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
200
201 // But the user might have configured one preferred reader type.
202
204 if(pref != m_preferredFileReaderTypeMap.end())
205 {
206 m_fileReaderType = pref->second;
207 }
208
209 return ms_run_ids;
210 }
211
212 // qDebug() << "The Pwiz reader did not work.";
213
214 // Try the TimsData reader
215
216 QString tims_dir = m_fileName;
217 if(!QFileInfo(tims_dir).isDir())
218 {
219 tims_dir = QFileInfo(m_fileName).absolutePath();
220 }
221
222 TimsMsFileReader tims_file_reader(tims_dir);
223
224 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
225
226 if(ms_run_ids.size())
227 {
228 // qDebug() << "Might well be handled using the Bruker code";
229
230 m_fileName = tims_dir;
231 m_fileFormat = tims_file_reader.getFileFormat();
233
235 if(pref != m_preferredFileReaderTypeMap.end())
236 {
237 m_fileReaderType = pref->second;
238 }
239
240 // qDebug() << "Returning Bruker::tims ms run(s)."
241 // << "with preferred reader type:"
242 // << Utils::fileReaderTypeAsString(m_fileReaderType);
243
244 return ms_run_ids;
245 }
246
247 // qDebug() << "The Tims reader did not work.";
248
249 // At this point try the XyMsFileReader
250
251 XyMsFileReader xy_ms_file_reader(m_fileName);
252
253 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
254
255 if(ms_run_ids.size())
256 {
257 // qDebug() << "Might well be handled using the XY code";
259
260 m_fileFormat = xy_ms_file_reader.getFileFormat();
261
262 return ms_run_ids;
263 }
264
265 // qDebug() << "The XY reader did not work.";
266
267 return ms_run_ids;
268}
269
270
271void
273 FileReaderType reader_type)
274{
275 // qDebug();
276
277 auto ret = m_preferredFileReaderTypeMap.insert(
278 std::pair<MsDataFormat, FileReaderType>(format, reader_type));
279
280 if(!ret.second)
281 {
282 // replace
283 ret.first->second = reader_type;
284 }
285}
286
287
290{
291 // qDebug();
292
293 auto ret = m_preferredFileReaderTypeMap.find(format);
294
295 if(ret != m_preferredFileReaderTypeMap.end())
296 {
297 return ret->second;
298 }
299
300 return m_fileReaderType;
301}
302
303
309
310
311void
313{
315}
316
317
318std::size_t
323
324
327{
328 if(m_selectedMsRunIdIndex >= getMsRunIds().size())
329 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
330
332}
333
336{
337 // try TimsData reader
338 QString tims_dir = m_fileName;
339 if(!QFileInfo(tims_dir).isDir())
340 {
341 tims_dir = QFileInfo(m_fileName).absolutePath();
342 }
343 TimsMsFileReader tims_file_reader(tims_dir);
344
345 std::vector<MsRunIdCstSPtr> ms_run_ids =
346 tims_file_reader.getMsRunIds(m_xmlPrefix);
347
348 if(ms_run_ids.size())
349 {
350 // qDebug() << "Might well be handled using the Bruker code";
352 m_fileFormat = tims_file_reader.getFileFormat();
353 m_fileName = tims_dir;
354
355 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
356 }
357 else
358 {
360 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.")
361 .arg(tims_dir)));
362 }
363}
364
365
368{
369 // qDebug();
370
371 // We want to return a MsRunReader that accounts for the configuration that
372 // the user might have set.
373
374 if(m_fileName != ms_run_id->getFileName())
376 QObject::tr("The MsRunId instance must have the name file name as the "
377 "MsFileAccessor.")));
378
380 {
381 // qDebug() << "Returning a PwizMsRunReader.";
382 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
384 pwiz_reader->getOboPsiModTermNativeIDFormat();
385 return pwiz_reader;
386 }
388 {
389 // qDebug() << "Returning a XyMsRunReader.";
390
391 return std::make_shared<XyMsRunReader>(ms_run_id);
392 }
394 {
395 // qDebug() << "Returning a TimsMsRunReader.";
396
397 return std::make_shared<TimsMsRunReader>(ms_run_id);
398 }
401 {
402 // qDebug() << "Returning a TimsFramesMsRunReader.";
403
404 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
405 }
407 {
408 // qDebug() << "Returning a TimsMsRunReaderMs2.";
409
410 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
411 }
413 {
414 if(ms_run_id.get()->getMsDataFormat() == MsDataFormat::xy)
415 {
416 return std::make_shared<XyMsRunReader>(ms_run_id);
417 }
418 else
419 {
420
421 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
423 pwiz_reader->getOboPsiModTermNativeIDFormat();
424 return pwiz_reader;
425 }
426 }
427 else
428 {
429 throw PappsoException(QObject::tr("No file format was found."));
430 }
431
432 return nullptr;
433}
434
435
437MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
438{
439 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
440 if(ms_run_id_index >= ms_run_ids.size())
441 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
442
443 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
444}
445
446
449{
450 // qDebug();
451
452 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
453
454 if(m_selectedMsRunIdIndex >= ms_run_ids.size())
455 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
456
457 return msRunReaderSPtr(ms_run_ids.at(m_selectedMsRunIdIndex));
458}
459
460
466
469 MsRunIdCstSPtr ms_run_id, pappso::FileReaderType preferred_file_reader_type)
470{
471 QFile file(ms_run_id.get()->getFileName());
472 if(!file.exists())
473 throw(ExceptionNotFound(
474 QObject::tr("unable to build a reader : file %1 not found.")
475 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
476
477 MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
478
479 if(file_format == MsDataFormat::xy)
480 {
481 // qDebug() << "Returning a XyMsRunReader.";
482
483 return std::make_shared<XyMsRunReader>(ms_run_id);
484 }
485 else if(file_format == MsDataFormat::unknown)
486 {
487 throw(PappsoException(
488 QObject::tr("unable to build a reader for %1 : unknown file format")
489 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
490 }
491
492 else if(file_format == MsDataFormat::brukerTims)
493 {
494 if(preferred_file_reader_type == pappso::FileReaderType::tims)
495 {
496 return std::make_shared<TimsMsRunReader>(ms_run_id);
497 }
498 else if(preferred_file_reader_type == pappso::FileReaderType::tims_ms2)
499 {
500 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
501 }
502 else if(preferred_file_reader_type == pappso::FileReaderType::tims_frames)
503 {
504 qDebug()
505 << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
506 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
507 }
508 // qDebug() << "by default, build a TimsMsRunReader.";
509 return std::make_shared<TimsMsRunReader>(ms_run_id);
510 }
511 else
512 {
513 // qDebug() << "Returning a PwizMsRunReader .";
514 auto sp_pwiz = std::make_shared<PwizMsRunReader>(ms_run_id);
515 return sp_pwiz;
516 }
517}
518
519
522 const QString &xml_id)
523{
524 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
525 MsRunReaderSPtr reader_sp;
526 for(MsRunIdCstSPtr &original_run_id : run_list)
527 {
528 if(original_run_id.get()->getRunId() == run_id)
529 {
530 MsRunId new_run_id(*original_run_id.get());
531 new_run_id.setXmlId(xml_id);
532
533 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
534 }
535 }
536
537 if((run_id.isEmpty()) && (run_list.size() == 1))
538 {
539 MsRunId new_run_id(*run_list[0].get());
540 new_run_id.setXmlId(xml_id);
541
542 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
543 }
544
545
546 if(reader_sp == nullptr)
547 {
548 throw(
549 ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
550 .arg(run_id)
551 .arg(QFileInfo(m_fileName).absoluteFilePath())));
552 }
553 return reader_sp;
554}
555
556
557} // namespace pappso
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunIdIndex()
void setSelectedMsRunIdIndex(std::size_t index)
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
std::map< MsDataFormat, FileReaderType > m_preferredFileReaderTypeMap
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
std::size_t m_selectedMsRunIdIndex
std::size_t getSelectedMsRunIdIndex() const
void setPreferredFileReaderType(MsDataFormat format, FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
FileReaderType getpreferredFileReaderType(MsDataFormat format)
MsRunIdCstSPtr getSelectedMsRunId()
FileReaderType getFileReaderType() const
get the file reader type
MsDataFormat getFileFormat() const
get the raw format of mz data
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
FileReaderType m_fileReaderType
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition msrunid.cpp:137
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition msrunreader.h:56
MsDataFormat
Definition types.h:120
@ xy
(x,y) format
@ unknown
unknown format
@ SQLite3
SQLite3 format.
@ MGF
Mascot format.
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
FileReaderType
Definition types.h:146
MSrun file reader for native Bruker TimsTOF raw data.