libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pwizmsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3 * \date 29/05/2018
4 * \author Olivier Langella
5 * \brief MSrun file reader base on proteowizard library
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31
32#include <QDebug>
33
34#include "pwizmsrunreader.h"
35
36#include <pwiz/data/msdata/DefaultReaderList.hpp>
37
38
39#include "../../utils.h"
40#include "../../pappsoexception.h"
41#include "../../exception/exceptionnotfound.h"
42#include "../../exception/exceptionnotpossible.h"
43
44
45// int pwizMsRunReaderMetaTypeId =
46// qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47
48
49namespace pappso
50{
51
52
54 : MsRunReader(msrun_id_csp)
55{
56 // The initialization needs to be done immediately so that we get the pwiz
57 // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58 // pointer will be set to msp_msData.
59
60 initialize();
61}
62
63
64void
66{
67 std::string file_name_std =
69
70 // Make a backup of the current locale
71 std::string env_backup = setlocale(LC_ALL, "");
72 // struct lconv *lc = localeconv();
73
74 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75 //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76 //<< lc->decimal_point;
77
78 // Now actually search the useful MSDataPtr to the member variable.
79
80 pwiz::msdata::DefaultReaderList defaultReaderList;
81
82 std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83
84 try
85 {
86 defaultReaderList.read(file_name_std, msDataPtrVector);
87 }
88 catch(std::exception &error)
89 {
90 qDebug() << QString("Failed to read the data from file %1")
91 .arg(QString::fromStdString(file_name_std));
92
93 throw(PappsoException(
94 QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
95 .arg(mcsp_msRunId->getFileName())
96 .arg(mcsp_msRunId.get()->toString())
97 .arg(error.what())));
98 }
99
100 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
101 //<< "The number of runs is:" << msDataPtrVector.size()
102 //<< "The number of spectra in first run is:"
103 //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
104
105 // Single-run file handling here.
106
107 // Specific case of the MGF data format: we do not have a run id for that kind
108 // of data. In this case there must be a single run!
109
110 if(mcsp_msRunId->getRunId().isEmpty())
111 {
112 if(msDataPtrVector.size() != 1)
113 throw(
114 ExceptionNotPossible("For the kind of file at hand there can only be "
115 "one run in the file."));
116
117 // At this point we know the single msDataPtr is the one we are looking
118 // for.
119
120 msp_msData = msDataPtrVector.front();
121 }
122 else
123 {
124 // Multi-run file handling here.
125 for(auto &msDataPtr : msDataPtrVector)
126 {
127 if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
128 {
129 msp_msData = msDataPtr;
130
131 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
132 //<< "Found the right MSDataPtr for run id.";
133
134 break;
135 }
136 }
137 }
138
139 if(msp_msData == nullptr)
140 {
142 QString("Could not find a MSDataPtr matching the requested run id : %1")
143 .arg(mcsp_msRunId.get()->toString())));
144 }
145
146
147 // check if this MS run can be used with scan numbers
148 // MS:1000490 Agilent instrument model
149 pwiz::cv::CVID native_id_format =
150 pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
151
152 // msp_msData.get()->getDefaultNativeIDFormat();
153
154 if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
155 {
156 m_hasScanNumbers = true;
157 }
158 else
159 {
160 m_hasScanNumbers = false;
161 }
162
163 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::mzXML)
164 {
165 m_hasScanNumbers = true;
166 }
167}
168
172
173
174const OboPsiModTerm
176{
177
178 OboPsiModTerm term;
179
180 term.m_accession = "MS:1000824";
181 term.m_name = "no nativeID format";
182 term.m_definition =
183 "No nativeID format indicates that the file tagged with this term does not "
184 "contain spectra that can have a nativeID format.";
185
186
187 pwiz::cv::CVID cvid =
188 pwiz::msdata::id::getDefaultNativeIDFormat(*(msp_msData.get()));
189
190 switch(cvid)
191 {
192 case pwiz::cv::MS_Thermo_nativeID_format:
193 term.m_accession = "MS:1000768";
194 term.m_name = "Thermo nativeID format";
195 term.m_definition =
196 "Native format defined by controllerType=xsd:nonNegativeInteger "
197 "controllerNumber=xsd:positiveInteger scan=xsd:positiveInteger.";
198 break;
199 default:
200 break;
201 }
202 return term;
203}
204
205pwiz::msdata::SpectrumPtr
206PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
207 std::size_t spectrum_index,
208 bool want_binary_data) const
209{
210 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
211
212 try
213 {
214 native_pwiz_spectrum_sp =
215 p_spectrum_list->spectrum(spectrum_index, want_binary_data);
216 }
217 catch(std::runtime_error &error)
218 {
219 qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
220 << typeid(error).name();
221
222 throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
223 "MS file std::runtime_error :\n%2")
224 .arg(spectrum_index)
225 .arg(error.what()));
226 }
227 catch(std::exception &error)
228 {
229 qDebug() << "getPwizSpectrumPtr error " << error.what()
230 << typeid(error).name();
231
232 throw ExceptionNotFound(
233 QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
234 .arg(spectrum_index)
235 .arg(error.what()));
236 }
237
238 if(native_pwiz_spectrum_sp.get() == nullptr)
239 {
240 throw ExceptionNotFound(
241 QObject::tr(
242 "Pwiz spectrum index %1 not found in MS file : null pointer")
243 .arg(spectrum_index));
244 }
245
246 return native_pwiz_spectrum_sp;
247}
248
249
250bool
252 pwiz::msdata::Spectrum *spectrum_p,
253 QualifiedMassSpectrum &qualified_mass_spectrum) const
254{
255
256 // We now have to set the retention time at which this mass spectrum
257 // was acquired. This is the scan start time.
258
259 if(!spectrum_p->scanList.scans[0].hasCVParam(
260 pwiz::msdata::MS_scan_start_time))
261 {
262 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
263 { // MGF could not have scan start time
264 qualified_mass_spectrum.setRtInSeconds(-1);
265 }
266 else
267 {
269 "The spectrum has no scan start time value set."));
270 }
271 }
272 else
273 {
274 pwiz::data::CVParam retention_time_cv_param =
275 spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
276
277 // Try to get the units of the retention time value.
278
279 std::string unit_name = retention_time_cv_param.unitsName();
280 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
281 //<< "Unit name for the retention time:"
282 //<< QString::fromStdString(unit_name);
283
284 if(unit_name == "second")
285 {
286 qualified_mass_spectrum.setRtInSeconds(
287 retention_time_cv_param.valueAs<double>());
288 }
289 else if(unit_name == "minute")
290 {
291 qualified_mass_spectrum.setRtInSeconds(
292 retention_time_cv_param.valueAs<double>() * 60);
293 }
294 else
295 throw(
296 ExceptionNotPossible("Could not determine the unit for the "
297 "scan start time value."));
298 }
299
300 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
301 //<< "Retention time for spectrum is:"
302 //<< qualified_mass_spectrum.getRtInSeconds();
303
304 // Old version not checking unit (by default unit is minutes for RT,
305 // not seconds)
306 //
307 // pappso_double retentionTime =
308 // QString(spectrum_p->scanList.scans[0]
309 //.cvParam(pwiz::msdata::MS_scan_start_time)
310 //.value.c_str())
311 //.toDouble();
312 // qualified_mass_spectrum.setRtInSeconds(retentionTime);
313
314 return true;
315}
316
317
318bool
320 pwiz::msdata::Spectrum *spectrum_p,
321 QualifiedMassSpectrum &qualified_mass_spectrum) const
322{
323 // Not all the acquisitions have ion mobility data. We need to test
324 // that:
325
326 if(spectrum_p->scanList.scans[0].hasCVParam(
327 pwiz::msdata::MS_ion_mobility_drift_time))
328 {
329
330 // qDebug() << "as strings:"
331 //<< QString::fromStdString(
332 // spectrum_p->scanList.scans[0]
333 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
334 //.valueAs<std::string>());
335
336 pappso_double driftTime =
337 spectrum_p->scanList.scans[0]
338 .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
339 .valueAs<double>();
340
341 // qDebug() << "driftTime:" << driftTime;
342
343 // Old version requiring use of QString.
344 // pappso_double driftTime =
345 // QString(spectrum_p->scanList.scans[0]
346 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
347 //.value.c_str())
348 //.toDouble();
349
350 // Now make positively sure that the obtained value is correct.
351 // Note that I suffered a lot with Waters Synapt data that
352 // contained apparently correct drift time XML element that in
353 // fact contained either NaN or inf. When such mass spectra were
354 // encountered, the mz,i data were bogus and crashed the data
355 // loading functions. We just want to skip this kind of bogus mass
356 // spectrum by letting the caller know that the drift time was
357 // bogus ("I" is Filippo Rusconi).
358
359 if(std::isnan(driftTime) || std::isinf(driftTime))
360 {
361 // qDebug() << "detected as nan or inf.";
362
363 return false;
364 }
365 else
366 {
367 // The mzML standard stipulates that drift times are in
368 // milliseconds.
369 qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
370 }
371 }
372 // End of
373 // if(spectrum_p->scanList.scans[0].hasCVParam(
374 // pwiz::msdata::MS_ion_mobility_drift_time))
375 else
376 {
377 // Not a bogus mass spectrum but also not a drift spectrum, set -1
378 // as the drift time value.
379 qualified_mass_spectrum.setDtInMilliSeconds(-1);
380 }
381
382 return true;
383}
384
385
388 const MassSpectrumId &massSpectrumId,
389 pwiz::msdata::Spectrum *spectrum_p,
390 bool want_binary_data,
391 bool &ok) const
392{
393 // qDebug();
394
395 std::string env;
396 env = setlocale(LC_ALL, "");
397 setlocale(LC_ALL, "C");
398
399 QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
400
401 try
402 {
403
404 // We want to store the ms level for this spectrum
405
406 int msLevel =
407 (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
408
409 qualified_mass_spectrum.setMsLevel(msLevel);
410
411 if(!spectrum_p->scanList.scans[0].hasCVParam(
412 pwiz::msdata::MS_peak_list_scans))
413 {
414
415 // qDebug() << spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
416 // .valueAs<double>();
417 qualified_mass_spectrum.setParameterValue(
419 spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
420 .valueAs<double>());
421 }
422 // We want to know if this spectrum is a fragmentation spectrum obtained
423 // from a selected precursor ion.
424
425 std::size_t precursor_list_size = spectrum_p->precursors.size();
426
427 // qDebug() << "For spectrum at index:" <<
428 // massSpectrumId.getSpectrumIndex()
429 //<< "msLevel:" << msLevel
430 //<< "with number of precursors:" << precursor_list_size;
431
432 if(precursor_list_size > 0)
433 {
434
435 // Sanity check
436 if(msLevel < 2)
437 {
438 qDebug() << "Going to throw: msLevel cannot be less than two for "
439 "a spectrum that has items in its Precursor list.";
440
442 "msLevel cannot be less than two for "
443 "a spectrum that has items in its Precursor list."));
444 }
445
446 // See what is the first precursor in the list.
447
448 for(auto &precursor : spectrum_p->precursors)
449 {
450
451 // Set this variable ready as we need that default value in
452 // certain circumstances.
453
454 std::size_t precursor_spectrum_index =
455 std::numeric_limits<std::size_t>::max();
456
457 // The spectrum ID of the precursor might be empty.
458
459 if(precursor.spectrumID.empty())
460 {
461 // qDebug() << "The precursor's spectrum ID is empty.";
462
463 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
464 {
465 // qDebug()
466 //<< "Format is MGF, precursor's spectrum ID can be
467 // empty.";
468 }
469 else
470 {
471 // When performing Lumos Fusion fragmentation experiments
472 // in Tune mode and with recording, the first spectrum of
473 // the list is a fragmentation spectrum (ms level 2) that
474 // has no identity for the precursor spectrum because
475 // there is no full scan accquisition.
476 }
477 }
478 // End of
479 // if(precursor.spectrumID.empty())
480 else
481 {
482 // We could get a native precursor spectrum id, so convert
483 // that native id to a spectrum index.
484
485 qualified_mass_spectrum.setPrecursorNativeId(
486 QString::fromStdString(precursor.spectrumID));
487
488 if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
489 {
490 // qDebug() << "The native id of the precursor spectrum is
491 // empty.";
492 }
493
494 // Get the spectrum index of the spectrum that contained the
495 // precursor ion.
496
497 precursor_spectrum_index =
498 msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
499
500 // Note that the Mascot MGF format has a peculiar handling of
501 // the precursor ion stuff so we cannot throw.
502 if(precursor_spectrum_index ==
503 msp_msData->run.spectrumListPtr->size())
504 {
505 if(mcsp_msRunId.get()->getMsDataFormat() !=
507 {
509 "Failed to find the index of the "
510 "precursor ion's spectrum."));
511 }
512 }
513
514 qualified_mass_spectrum.setPrecursorSpectrumIndex(
515 precursor_spectrum_index);
516
517 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
518 // "()"
519 //<< "Set the precursor spectrum index to:"
520 //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
521 //<< "for qualified mass spectrum:"
522 //<< &qualified_mass_spectrum;
523 }
524
525 if(!precursor.selectedIons.size())
526 {
527 qDebug()
528 << "Going to throw The spectrum has msLevel > 1 but the "
529 "precursor ions's selected ions list is empty..";
530
531 throw(
532 ExceptionNotPossible("The spectrum has msLevel > 1 but the "
533 "precursor ions's selected ions "
534 "list is empty."));
535 }
536
537 pwiz::msdata::SelectedIon &ion =
538 *(precursor.selectedIons.begin());
539
540 // selected ion m/z
541
542 pappso_double selected_ion_mz =
543 QString(
544 ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
545 .toDouble();
546
547 // selected ion peak intensity
548 //<cvParam cvRef="MS" accession="MS:1000042"
549 // value="910663.949707031" name="peak intensity"
550 // unitAccession="MS:1000131" unitName="number of detector counts"
551 // unitCvRef="MS" />
552
553 pappso_double selected_ion_peak_intensity =
554 QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
555 .toDouble();
556
557 // charge state
558
559 unsigned int selected_ion_charge_state =
560 QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
561 .toUInt();
562
563 // At this point we can craft a new PrecursorIonData instance and
564 // push it back to the vector.
565
566 PrecursorIonData precursor_ion_data(selected_ion_mz,
567 selected_ion_charge_state,
568 selected_ion_peak_intensity);
569
570 qualified_mass_spectrum.appendPrecursorIonData(
571 precursor_ion_data);
572
573 // General sum-up
574
575 // qDebug()
576 //<< "Appended new PrecursorIonData:"
577 //<< "mz:"
578 //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
579 //<< "charge:"
580 //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
581 //<< "intensity:"
582 //<< qualified_mass_spectrum.getPrecursorIonData()
583 //.back()
584 //.intensity;
585 }
586 // End of
587 // for(auto &precursor : spectrum_p->precursors)
588 }
589 // End of
590 // if(precursor_list_size > 0)
591 else
592 {
593 // Sanity check
594
595 // Unfortunately, logic here is defeated by some vendors that have
596 // files with MS2 spectra without <precursorList>. Thus we have
597 // spectrum_p->precursors.size() == 0 and msLevel > 1.
598
599 // if(msLevel != 1)
600 //{
601 // throw(
602 // ExceptionNotPossible("msLevel cannot be different than 1 if "
603 //"there is not a single precursor ion."));
604 //}
605 }
606
607 // Sanity check.
608
609 if(precursor_list_size !=
610 qualified_mass_spectrum.getPrecursorIonData().size())
611 {
612 qDebug() << "Going to throw The number of precursors in the file is "
613 "different from the number of precursors in memory.";
614
616 QObject::tr("The number of precursors in the file is different "
617 "from the number of precursors in memory."));
618 }
619
620 // if(precursor_list_size == 1)
621 //{
622 // qDebug() << "Trying to get the mz value of the unique precursor ion:"
623 //<< qualified_mass_spectrum.getPrecursorMz();
624 //}
625
626 processRetentionTime(spectrum_p, qualified_mass_spectrum);
627
628 processDriftTime(spectrum_p, qualified_mass_spectrum);
629
630 // for(pwiz::data::CVParam cv_param : ion.cvParams)
631 //{
632 // pwiz::msdata::CVID param_id = cv_param.cvid;
633 // qDebug() << param_id;
634 // qDebug() << cv_param.cvid.c_str();
635 // qDebug() << cv_param.name().c_str();
636 // qDebug() << cv_param.value.c_str();
637 //}
638
639 if(want_binary_data)
640 {
641
642 // Fill-in MZIntensityPair vector for convenient access to binary
643 // data
644
645 std::vector<pwiz::msdata::MZIntensityPair> pairs;
646 spectrum_p->getMZIntensityPairs(pairs);
647
648 MassSpectrum spectrum;
649 double tic = 0;
650 // std::size_t iterCount = 0;
651
652 // Iterate through the m/z-intensity pairs
653 for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
654 it = pairs.begin(),
655 end = pairs.end();
656 it != end;
657 ++it)
658 {
659 //++iterCount;
660
661 // qDebug() << "it->mz " << it->mz << " it->intensity" <<
662 // it->intensity;
663 if(it->intensity)
664 {
665 spectrum.push_back(DataPoint(it->mz, it->intensity));
666 tic += it->intensity;
667 }
668 }
669
670 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
671 {
672 // Sort peaks by mz
673 spectrum.sortMz();
674 }
675
676 // lc = localeconv ();
677 // qDebug() << " env=" << localeconv () << " lc->decimal_point "
678 // << lc->decimal_point;
679 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
680 // "<< spectrum.size();
681 MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
682 qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
683
684 // double sumY =
685 // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
686 // <<
687 // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
688 //<< "iterCount:" << iterCount << "Spectrum size "
689 //<< spectrum.size() << "with tic:" << tic
690 //<< "and sumY:" << sumY;
691 }
692 else
693 qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
694 }
695 catch(PappsoException &errorp)
696 {
697 qDebug() << "Going to throw";
698
700 QObject::tr("Error reading data using the proteowizard library: %1")
701 .arg(errorp.qwhat()));
702 }
703 catch(std::exception &error)
704 {
705 qDebug() << "Going to throw";
706
708 QObject::tr("Error reading data using the proteowizard library: %1")
709 .arg(error.what()));
710 }
711
712 // setlocale(LC_ALL, env.c_str());
713
714 ok = true;
715
716 // qDebug() << "QualifiedMassSpectrum: " <<
717 // qualified_mass_spectrum.toString();
718 return qualified_mass_spectrum;
719}
720
721
724 bool want_binary_data,
725 bool &ok) const
726{
727
728 std::string env;
729 env = setlocale(LC_ALL, "");
730 // struct lconv *lc = localeconv();
731
732 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
733 //<< "env=" << env.c_str()
734 //<< "lc->decimal_point:" << lc->decimal_point;
735
736 setlocale(LC_ALL, "C");
737
738 MassSpectrumId massSpectrumId(mcsp_msRunId);
739
740 if(msp_msData == nullptr)
741 {
742 setlocale(LC_ALL, env.c_str());
743 return (QualifiedMassSpectrum(massSpectrumId));
744 }
745
746 // const bool want_binary_data = true;
747
748 pwiz::msdata::SpectrumListPtr spectrum_list_p =
749 msp_msData->run.spectrumListPtr;
750
751 if(spectrum_index == spectrum_list_p.get()->size())
752 {
753 setlocale(LC_ALL, env.c_str());
754 throw ExceptionNotFound(
755 QObject::tr("The spectrum index cannot be equal to the size of the "
756 "spectrum list."));
757 }
758
759 // At this point we know the spectrum index might be sane, so store it in
760 // the mass spec id object.
761 massSpectrumId.setSpectrumIndex(spectrum_index);
762
763 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
764 getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
765
766 setlocale(LC_ALL, env.c_str());
767
768 massSpectrumId.setNativeId(
769 QString::fromStdString(native_pwiz_spectrum_sp->id));
770
772 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
773}
774
775
776bool
777PwizMsRunReader::accept(const QString &file_name) const
778{
779 // We want to know if we can handle the file_name.
780 pwiz::msdata::ReaderList reader_list;
781
782 std::string reader_type = reader_list.identify(file_name.toStdString());
783
784 if(!reader_type.empty())
785 return true;
786
787 return false;
788}
789
790
792PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
793{
794 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
795 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
796}
797
799PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
800{
801 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
802 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
803}
804
806PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
807 bool want_binary_data) const
808{
809
810 QualifiedMassSpectrum spectrum;
811 bool ok = false;
812
813 spectrum =
814 qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
815
816 if(mcsp_msRunId->getMsDataFormat() == pappso::MsDataFormat::MGF)
817 {
818 if(spectrum.getRtInSeconds() == 0)
819 {
820 // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
821 }
822 }
823
824 // if(!ok)
825 // qDebug() << "Encountered a mass spectrum for which the status is bad.";
826
827 return spectrum;
828}
829
830
831void
837
838void
840 [[maybe_unused]] const MsRunReadConfig &config,
842{
843 qDebug();
845}
846
847void
849 SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
850{
851
853 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
854
855 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
856 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
857 // spectrum has been fully qualified (that is, the member data have been
858 // set), it is transferred to the handler passed as parameter to this
859 // function for the consumer to do what it wants with it.
860
861 // Does the handler consuming the mass spectra read from file want these
862 // mass spectra to hold the binary data arrays (mz/i vectors)?
863
864 const bool want_binary_data = handler.needPeakList();
865
866
867 std::string env;
868 env = setlocale(LC_ALL, "");
869 setlocale(LC_ALL, "C");
870
871
872 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
873 // run member of msp_msData.
874
875 pwiz::msdata::SpectrumListPtr spectrum_list_p =
876 msp_msData->run.spectrumListPtr;
877
878 // We'll need it to perform the looping in the spectrum list.
879 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
880
881 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
882
883 // Inform the handler of the spectrum list so that it can handle feedback to
884 // the user.
885 handler.spectrumListHasSize(spectrum_list_size);
886
887 // Iterate in the full list of spectra.
888
889 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
890 {
891
892 // If the user of this reader instance wants to stop reading the
893 // spectra, then break this loop.
894 if(handler.shouldStop())
895 {
896 qDebug() << "The operation was cancelled. Breaking the loop.";
897 break;
898 }
899
900 // Get the native pwiz-spectrum from the spectrum list.
901 // Note that this pointer is a shared pointer from pwiz.
902
903 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
904 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
905
906 /*
907 * we want to load metadata of the spectrum even if it does not contain
908 peaks
909
910 * if(!native_pwiz_spectrum_sp->hasBinaryData())
911 {
912 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
913 "
914 ()"
915 //<< "native pwiz spectrum is empty, continuing.";
916 continue;
917 }
918 */
919
920 // Instantiate the mass spectrum id that will hold critical information
921 // like the the native id string and the spectrum index.
922
923 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
924
925 // Get the spectrum native id as a QString to store it in the mass
926 // spectrum id class. This is will allow later to refer to the same
927 // spectrum starting back from the file.
928
929 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
930 massSpectrumId.setNativeId(native_id);
931
932 // Finally, instantiate the qualified mass spectrum with its id. This
933 // function will continue performing pappso-spectrum detailed
934 // qualification.
935
936 bool ok = false;
937
938 QualifiedMassSpectrum qualified_mass_spectrum =
940 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
941
942 if(!ok)
943 {
944 // qDebug() << "Encountered a mass spectrum for which the returned "
945 //"status is bad.";
946 continue;
947 }
948
949 // Before handing the mass spectrum out to the handler, see if the
950 // native mass spectrum was empty or not.
951
952 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
953 // qDebug() << "The mass spectrum has not defaultArrayLength";
954
955 qualified_mass_spectrum.setEmptyMassSpectrum(
956 !native_pwiz_spectrum_sp->defaultArrayLength);
957
958 // The handler will receive the index of the mass spectrum in the
959 // current run via the mass spectrum id member datum.
960 if(ms_level == 0)
961 {
962 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
963 }
964 else
965 {
966 if(qualified_mass_spectrum.getMsLevel() == ms_level)
967 {
968 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
969 }
970 }
971 }
972
973 setlocale(LC_ALL, env.c_str());
974 // End of
975 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
976
977 // Now let the loading handler know that the loading of the data has ended.
978 // The handler might need this "signal" to perform additional tasks or to
979 // cleanup cruft.
980
981 // qDebug() << "Loading ended";
982 handler.loadingEnded();
983}
984
985void
988{
989 qDebug();
991 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
992
993 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
994 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
995 // spectrum has been fully qualified (that is, the member data have been
996 // set), it is transferred to the handler passed as parameter to this
997 // function for the consumer to do what it wants with it.
998
999 // Does the handler consuming the mass spectra read from file want these
1000 // mass spectra to hold the binary data arrays (mz/i vectors)?
1001
1002 const bool want_binary_data = config.needPeakList();
1003
1004
1005 std::string env;
1006 env = setlocale(LC_ALL, "");
1007 setlocale(LC_ALL, "C");
1008
1009
1010 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
1011 // run member of msp_msData.
1012
1013 pwiz::msdata::SpectrumListPtr spectrum_list_p =
1014 msp_msData->run.spectrumListPtr;
1015
1016 // We'll need it to perform the looping in the spectrum list.
1017 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
1018
1019 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
1020
1021 // Inform the handler of the spectrum list so that it can handle feedback to
1022 // the user.
1023 handler.spectrumListHasSize(spectrum_list_size);
1024
1025 // Iterate in the full list of spectra.
1026
1027 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1028 {
1029
1030
1031 // If the user of this reader instance wants to stop reading the
1032 // spectra, then break this loop.
1033 if(handler.shouldStop())
1034 {
1035 qDebug() << "The operation was cancelled. Breaking the loop.";
1036 break;
1037 }
1038
1039 // Get the native pwiz-spectrum from the spectrum list.
1040 // Note that this pointer is a shared pointer from pwiz.
1041
1042 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
1043 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
1044
1045 /*
1046 * we want to load metadata of the spectrum even if it does not contain
1047 peaks
1048
1049 * if(!native_pwiz_spectrum_sp->hasBinaryData())
1050 {
1051 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
1052 "
1053 ()"
1054 //<< "native pwiz spectrum is empty, continuing.";
1055 continue;
1056 }
1057 */
1058
1059 // Instantiate the mass spectrum id that will hold critical information
1060 // like the the native id string and the spectrum index.
1061
1062 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
1063
1064 // Get the spectrum native id as a QString to store it in the mass
1065 // spectrum id class. This is will allow later to refer to the same
1066 // spectrum starting back from the file.
1067
1068 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
1069 massSpectrumId.setNativeId(native_id);
1070
1071 // Finally, instantiate the qualified mass spectrum with its id. This
1072 // function will continue performing pappso-spectrum detailed
1073 // qualification.
1074
1075 bool ok = false;
1076
1077 QualifiedMassSpectrum qualified_mass_spectrum =
1079 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
1080
1081 if(!ok)
1082 {
1083 // qDebug() << "Encountered a mass spectrum for which the returned "
1084 //"status is bad.";
1085 continue;
1086 }
1087
1088 // Before handing the mass spectrum out to the handler, see if the
1089 // native mass spectrum was empty or not.
1090
1091 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
1092 // qDebug() << "The mass spectrum has not defaultArrayLength";
1093
1094 qualified_mass_spectrum.setEmptyMassSpectrum(
1095 !native_pwiz_spectrum_sp->defaultArrayLength);
1096
1097 // The handler will receive the index of the mass spectrum in the
1098 // current run via the mass spectrum id member datum.
1099
1100 if(config.acceptMsLevel(qualified_mass_spectrum.getMsLevel()))
1101 {
1103 qualified_mass_spectrum.getRtInSeconds()))
1104 {
1105 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
1106 }
1107 }
1108 }
1109
1110 setlocale(LC_ALL, env.c_str());
1111 // End of
1112 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1113
1114 // Now let the loading handler know that the loading of the data has ended.
1115 // The handler might need this "signal" to perform additional tasks or to
1116 // cleanup cruft.
1117
1118 // qDebug() << "Loading ended";
1119 handler.loadingEnded();
1120}
1121
1122std::size_t
1124{
1125 return msp_msData->run.spectrumListPtr.get()->size();
1126}
1127
1128bool
1130{
1131 return m_hasScanNumbers;
1132}
1133
1134bool
1136{
1137 msp_msData = nullptr;
1138 return true;
1139}
1140
1141bool
1143{
1144 if(msp_msData == nullptr)
1145 {
1146 initialize();
1147 }
1148 return true;
1149}
1150
1151
1154 std::size_t spectrum_index, pappso::PrecisionPtr precision) const
1155{
1156
1157 QualifiedMassSpectrum mass_spectrum =
1158 qualifiedMassSpectrum(spectrum_index, false);
1159
1160 return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
1161}
1162
1165 const pappso::QualifiedMassSpectrum &mass_spectrum,
1166 pappso::PrecisionPtr precision) const
1167{
1168 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
1169
1170 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
1171
1172 xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
1173
1174 return xic_coord;
1175}
1176
1177} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
bool acceptMsLevel(std::size_t ms_level) const
bool acceptRetentionTimeInSeconds(double retention_time_in_seconds) const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:63
MsRunIdCstSPtr mcsp_msRunId
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
virtual void readSpectrumCollectionWithMsrunReadConfig(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
const OboPsiModTerm getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setParameterValue(QualifiedMassSpectrumParameter parameter, const QVariant &value)
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition utils.cpp:143
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
std::shared_ptr< XicCoord > XicCoordSPtr
Definition xiccoord.h:43
MSrun file reader base on proteowizard library.