Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
PeakPickerIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERITERATIVE_H
36 #define OPENMS_TRANSFORMATIONS_RAW2PEAK_PEAKPICKERITERATIVE_H
37 
41 
42 // #define DEBUG_PEAK_PICKING
43 
44 namespace OpenMS
45 {
46 
52  {
53  int index;
55 
57  double leftWidth;
58  double rightWidth;
59  float mz;
60  };
61 
62  bool sort_peaks_by_intensity(const PeakCandidate& a, const PeakCandidate& b); // prototype
64  {
66  }
67 
97  class OPENMS_DLLAPI PeakPickerIterative :
98  public DefaultParamHandler,
99  public ProgressLogger
100  {
101 
102 private:
104  double peak_width_;
108  double sn_win_len_;
110 
111 public:
112 
115  DefaultParamHandler("PeakPickerIterative"),
117  {
118  defaults_.setValue("signal_to_noise_", 1.0, "Signal to noise value, each peak is required to be above this value (turn off by setting it to 0.0)");
119  defaults_.setValue("peak_width", 0.0, "Expected peak width half width in Dalton - peaks will be extended until this half width is reached (even if the intensitity is increasing). In conjunction with check_width_internally it will also be used to remove peaks whose spacing is larger than this value.");
120 
121 
122  defaults_.setValue("spacing_difference", 1.5, "Difference between peaks in multiples of the minimal difference to continue. The higher this value is set, the further apart peaks are allowed to be to still extend a peak. E.g. if the value is set to 1.5 and in a current peak the minimal spacing between peaks is 10 mDa, then only peaks at most 15 mDa apart will be added to the peak.", ListUtils::create<String>("advanced"));
123  defaults_.setValue("sn_bin_count_", 30, "Bin count for the Signal to Noise estimation.", ListUtils::create<String>("advanced"));
124  defaults_.setValue("nr_iterations_", 5, "Nr of iterations to perform (how many times the peaks are re-centered).", ListUtils::create<String>("advanced"));
125  defaults_.setMinInt("nr_iterations_", 1);
126  defaults_.setValue("sn_win_len_", 20.0, "Window length for the Signal to Noise estimation.", ListUtils::create<String>("advanced"));
127 
128  defaults_.setValue("check_width_internally", "false", "Delete peaks where the spacing is larger than the peak width (should be set to true to avoid artefacts)", ListUtils::create<String>("advanced"));
129  defaults_.setValidStrings("check_width_internally", ListUtils::create<String>("true,false"));
130 
131  defaults_.setValue("ms1_only", "false", "Only do MS1");
132  defaults_.setValidStrings("ms1_only", ListUtils::create<String>("true,false"));
133  defaults_.setValue("clear_meta_data", "false", "Delete meta data about peak width");
134  defaults_.setValidStrings("clear_meta_data", ListUtils::create<String>("true,false"));
135 
136  // write defaults into Param object param_
137  defaultsToParam_();
138  }
139 
141  {
142  signal_to_noise_ = (double)param_.getValue("signal_to_noise_");
143  peak_width_ = (double)param_.getValue("peak_width");
144  spacing_difference_ = (double)param_.getValue("spacing_difference");
145  sn_bin_count_ = (double)param_.getValue("sn_bin_count_");
146  nr_iterations_ = (double)param_.getValue("nr_iterations_");
147  sn_win_len_ = (double)param_.getValue("sn_win_len_");
148 
149  check_width_internally_ = param_.getValue("check_width_internally").toBool();
150  }
151 
154 
155 private:
156 
157  /*
158  * This will re-center the peaks by using the seeds (ordered by intensity) to
159  * find raw signals that may belong to this peak. Then the peak is centered
160  * using a weighted average.
161  * Signals are added to the peak as long as they are still inside the
162  * peak_width or as long as the signal intensity keeps falling. Also the
163  * distance to the previous signal and the whether the signal is below the
164  * noise level is taken into account.
165  * This function implements a single iteration of this algorithm.
166  *
167  */
168  template <typename PeakType>
170  std::vector<PeakCandidate>& PeakCandidates,
172  {
173  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
174  {
175  int i = PeakCandidates[peak_it].index;
176  double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
177  double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
178  double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
179 
180  // MZ spacing sanity checks
181  double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
182  double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
183  double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
184  double est_peak_width = peak_width_;
185 
186  if (check_width_internally_ && (left_to_central > est_peak_width || central_to_right > est_peak_width))
187  {
188  // something has gone wrong, the points are further away than the peak width -> delete this peak
189  PeakCandidates[peak_it].integrated_intensity = -1;
190  PeakCandidates[peak_it].leftWidth = -1;
191  PeakCandidates[peak_it].rightWidth = -1;
192  PeakCandidates[peak_it].mz = -1;
193  continue;
194  }
195 
196  std::map<double, double> peak_raw_data;
197 
198  peak_raw_data[central_peak_mz] = central_peak_int;
199  peak_raw_data[left_neighbor_mz] = left_neighbor_int;
200  peak_raw_data[right_neighbor_mz] = right_neighbor_int;
201 
202  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
203  // COPY - from PeakPickerHiRes
204  // peak core found, now extend it to the left
205  Size k = 2;
206  while ((i - k + 1) > 0
207  && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < spacing_difference_ * min_spacing
208  && (input[i - k].getIntensity() < peak_raw_data.begin()->second
209  || std::fabs(input[i - k].getMZ() - central_peak_mz) < est_peak_width)
210  )
211  {
212  if (signal_to_noise_ > 0.0)
213  {
214  if (snt.getSignalToNoise(input[i - k]) < signal_to_noise_)
215  {
216  break;
217  }
218  }
219  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
220  ++k;
221  }
222  double leftborder = input[i - k + 1].getMZ();
223 
224  // to the right
225  k = 2;
226  while ((i + k) < input.size()
227  && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference_ * min_spacing
228  && (input[i + k].getIntensity() < peak_raw_data.rbegin()->second
229  || std::fabs(input[i + k].getMZ() - central_peak_mz) < est_peak_width)
230  )
231  {
232  if (signal_to_noise_ > 0.0)
233  {
234  if (snt.getSignalToNoise(input[i + k]) < signal_to_noise_)
235  {
236  break;
237  }
238  }
239 
240  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
241  ++k;
242  }
243  // END COPY - from PeakPickerHiRes
244  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
245 
246  double rightborder = input[i + k - 1].getMZ();
247 
248  double weighted_mz = 0;
249  double integrated_intensity = 0;
250  for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
251  {
252  weighted_mz += map_it->first * map_it->second;
253  integrated_intensity += map_it->second;
254  }
255  weighted_mz /= integrated_intensity;
256 
257  // store the data
258  PeakCandidates[peak_it].integrated_intensity = integrated_intensity;
259  PeakCandidates[peak_it].leftWidth = leftborder;
260  PeakCandidates[peak_it].rightWidth = rightborder;
261  PeakCandidates[peak_it].mz = weighted_mz;
262 
263  // find the closest raw signal peak to where we just put our peak and store it
264  double min_diff = std::fabs(weighted_mz - input[i].getMZ());
265  int min_i = i;
266 
267  // Search to the left
268  for (int m = 1; i - m > 0 && leftborder < input[i - m].getMZ(); m++)
269  {
270  if (std::fabs(weighted_mz - input[i - m].getMZ()) < min_diff)
271  {
272  min_diff = std::fabs(weighted_mz - input[i - m].getMZ());
273  min_i = i - m;
274  }
275  }
276  // Search to the right
277  for (int m = 1; i - m > 0 && rightborder > input[i + m].getMZ(); m++)
278  {
279  if (std::fabs(weighted_mz - input[i + m].getMZ()) < min_diff)
280  {
281  min_diff = std::fabs(weighted_mz - input[i + m].getMZ());
282  min_i = i + m;
283  }
284  }
285  PeakCandidates[peak_it].index = min_i;
286  }
287  }
288 
289 public:
290 
291  /*
292  * This will pick one single spectrum. The PeakPickerHiRes is used to
293  * generate seeds, these seeds are then used to re-center the mass and
294  * compute peak width and integrated intensity of the peak.
295  *
296  * Finally, other peaks that would fall within the primary peak are
297  * discarded
298  *
299  * The output are the remaining peaks.
300  */
301  template <typename PeakType>
302  void pick(const MSSpectrum<PeakType>& input, MSSpectrum<PeakType>& output)
303  {
304  // don't pick a spectrum with less than 3 data points
305  if (input.size() < 3) return;
306 
307  // copy meta data of the input spectrum
308  output.clear(true);
309  output.SpectrumSettings::operator=(input);
310  output.MetaInfoInterface::operator=(input);
311  output.setRT(input.getRT());
312  output.setMSLevel(input.getMSLevel());
313  output.setName(input.getName());
315  output.getFloatDataArrays().clear();
316 
317  std::vector<PeakCandidate> PeakCandidates;
318  MSSpectrum<PeakType> picked_spectrum;
319 
320  // Use the PeakPickerHiRes to find candidates ...
322  Param pepi_param = OpenMS::PeakPickerHiRes().getDefaults();
323  pepi_param.setValue("signal_to_noise", signal_to_noise_);
324  pepi_param.setValue("spacing_difference", spacing_difference_);
325  pp.setParameters(pepi_param);
326  pp.pick(input, picked_spectrum);
327 
328  // after picking peaks, we store the closest index of the raw spectrum and the picked intensity
329  std::vector<PeakCandidate> newPeakCandidates_;
330  Size j = 0;
331  LOG_DEBUG << "Candidates " << picked_spectrum.size() << std::endl;
332  for (Size k = 0; k < input.size() && j < picked_spectrum.size(); k++)
333  {
334  if (input[k].getMZ() > picked_spectrum[j].getMZ())
335  {
336  LOG_DEBUG << "got a value " << k << " @ " << input[k] << std::endl;
337  PeakCandidate pc = { /*.index=*/ k, /*.intensity=*/ picked_spectrum[j].getIntensity(), -1, -1, -1, -1};
338  newPeakCandidates_.push_back(pc);
339  j++;
340  }
341  }
342 
343  PeakCandidates = newPeakCandidates_;
344  std::sort(PeakCandidates.begin(), PeakCandidates.end(), sort_peaks_by_intensity);
345 
346  // signal-to-noise estimation
348  if (signal_to_noise_ > 0.0)
349  {
350  Param snt_parameters = snt.getParameters();
351  snt_parameters.setValue("win_len", sn_win_len_);
352  snt_parameters.setValue("bin_count", sn_bin_count_);
353  snt.setParameters(snt_parameters);
354  snt.init(input);
355  }
356 
357  // The peak candidates are re-centered and the width is computed for each peak
358  for (int i = 0; i < nr_iterations_; i++)
359  {
360  pickRecenterPeaks_(input, PeakCandidates, snt);
361  }
362 
363  output.getFloatDataArrays().resize(3);
364  output.getFloatDataArrays()[0].setName("IntegratedIntensity");
365  output.getFloatDataArrays()[1].setName("leftWidth");
366  output.getFloatDataArrays()[2].setName("rightWidth");
367 
368  // Go through all candidates and exclude all lower-intensity candidates
369  // that are within the borders of another peak
370  LOG_DEBUG << "Will now merge candidates" << std::endl;
371  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
372  {
373  if (PeakCandidates[peak_it].leftWidth < 0) continue;
374 
375  //Remove all peak candidates that are enclosed by this peak
376  for (Size m = peak_it + 1; m < PeakCandidates.size(); m++)
377  {
378  if (PeakCandidates[m].mz >= PeakCandidates[peak_it].leftWidth && PeakCandidates[m].mz <= PeakCandidates[peak_it].rightWidth)
379  {
380  LOG_DEBUG << "Remove peak " << m << " : " << PeakCandidates[m].mz << " " <<
381  PeakCandidates[m].peak_apex_intensity << " (too close to " << PeakCandidates[peak_it].mz <<
382  " " << PeakCandidates[peak_it].peak_apex_intensity << ")" << std::endl;
383  PeakCandidates[m].leftWidth = PeakCandidates[m].rightWidth = -1;
384  }
385  }
386 
387  PeakType peak;
388  peak.setMZ(PeakCandidates[peak_it].mz);
389  peak.setIntensity(PeakCandidates[peak_it].integrated_intensity);
390  output.push_back(peak);
391 
392  LOG_DEBUG << "Push peak " << peak_it << " " << peak << std::endl;
393  output.getFloatDataArrays()[0].push_back(PeakCandidates[peak_it].integrated_intensity);
394  output.getFloatDataArrays()[1].push_back(PeakCandidates[peak_it].leftWidth);
395  output.getFloatDataArrays()[2].push_back(PeakCandidates[peak_it].rightWidth);
396  }
397 
398  LOG_DEBUG << "Found seeds: " << PeakCandidates.size() << " / Found peaks: " << output.size() << std::endl;
399  output.sortByPosition();
400  }
401 
402  template <typename PeakType>
404  {
405  // make sure that output is clear
406  output.clear(true);
407 
408  // copy experimental settings
409  static_cast<ExperimentalSettings&>(output) = input;
410 
411  // resize output with respect to input
412  output.resize(input.size());
413 
414  bool ms1_only = param_.getValue("ms1_only").toBool();
415  bool clear_meta_data = param_.getValue("clear_meta_data").toBool();
416 
417  Size progress = 0;
418  startProgress(0, input.size(), "picking peaks");
419  for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
420  {
421  if (ms1_only && (input[scan_idx].getMSLevel() != 1))
422  {
423  output[scan_idx] = input[scan_idx];
424  }
425  else
426  {
427  pick(input[scan_idx], output[scan_idx]);
428  if (clear_meta_data) {output[scan_idx].getFloatDataArrays().clear();}
429  }
430  setProgress(progress++);
431  }
432  endProgress();
433  }
434 
435  };
436 
437 }
438 
439 #endif
This class implements a peak-picking algorithm for high-resolution MS data (specifically designed for...
Definition: PeakPickerIterative.h:97
~PeakPickerIterative()
Destructor.
Definition: PeakPickerIterative.h:153
const double k
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
const Param & getDefaults() const
Non-mutable access to the default parameters.
double spacing_difference_
Definition: PeakPickerIterative.h:105
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:55
UInt getMSLevel() const
Returns the MS level.
Definition: MSSpectrum.h:259
Size size() const
Definition: MSExperiment.h:117
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:197
void sortByPosition()
Lexicographically sorts the peaks by their position.
Definition: MSSpectrum.h:419
void pickRecenterPeaks_(const MSSpectrum< PeakType > &input, std::vector< PeakCandidate > &PeakCandidates, SignalToNoiseEstimatorMedian< MSSpectrum< PeakType > > &snt)
Definition: PeakPickerIterative.h:169
double signal_to_noise_
Definition: PeakPickerIterative.h:103
double leftWidth
Definition: PeakPickerIterative.h:57
int index
Definition: PeakPickerIterative.h:53
const String & getName() const
Returns the name.
Definition: MSSpectrum.h:271
void setName(const String &name)
Sets the name.
Definition: MSSpectrum.h:277
void resize(Size s)
Definition: MSExperiment.h:122
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setParameters(const Param &param)
Sets the parameters.
float mz
Definition: PeakPickerIterative.h:59
int nr_iterations_
Definition: PeakPickerIterative.h:107
#define LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:459
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:167
double integrated_intensity
Definition: PeakPickerIterative.h:56
double peak_width_
Definition: PeakPickerIterative.h:104
void pick(const MSSpectrum< PeakType > &input, MSSpectrum< PeakType > &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
Definition: PeakPickerHiRes.h:102
int sn_bin_count_
Definition: PeakPickerIterative.h:106
double getRT() const
Definition: MSSpectrum.h:243
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: MSSpectrum.h:265
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: PeakPickerIterative.h:140
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSSpectrum.h:635
void setRT(double rt)
Sets the absolute retention time (is seconds)
Definition: MSSpectrum.h:249
Management and storage of parameters / INI files.
Definition: Param.h:75
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:69
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:72
bool sort_peaks_by_intensity(const PeakCandidate &a, const PeakCandidate &b)
Definition: PeakPickerIterative.h:63
PeakPickerIterative()
Constructor.
Definition: PeakPickerIterative.h:114
void setType(SpectrumType type)
sets the spectrum type
void pick(const MSSpectrum< PeakType > &input, MSSpectrum< PeakType > &output)
Definition: PeakPickerIterative.h:302
double rightWidth
Definition: PeakPickerIterative.h:58
double sn_win_len_
Definition: PeakPickerIterative.h:108
A small structure to hold peak candidates.
Definition: PeakPickerIterative.h:51
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:850
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
double peak_apex_intensity
Definition: PeakPickerIterative.h:54
void pickExperiment(const MSExperiment< PeakType > &input, MSExperiment< PeakType > &output)
Definition: PeakPickerIterative.h:403
bool check_width_internally_
Definition: PeakPickerIterative.h:109
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
Definition: MSSpectrum.h:298
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition: PeakPickerHiRes.h:75
Description of the experimental settings.
Definition: ExperimentalSettings.h:59

OpenMS / TOPP release 2.0.0 Documentation generated on Fri May 29 2015 17:20:28 using doxygen 1.8.9.1