Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SignalToNoiseEstimatorMedian.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 //
35 
36 #ifndef OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
37 #define OPENMS_FILTERING_NOISEESTIMATION_SIGNALTONOISEESTIMATORMEDIAN_H
38 
39 
44 #include <vector>
45 
46 namespace OpenMS
47 {
71  template <typename Container = MSSpectrum<> >
73  public SignalToNoiseEstimator<Container>
74  {
75 
76 public:
77 
80 
87 
90 
92 
95  {
96  //set the name for DefaultParamHandler error messages
97  this->setName("SignalToNoiseEstimatorMedian");
98 
99  defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
100  " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
101  " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin." \
102  " If you choose 'max_intensity' too small, the noise estimate might be too small as well. " \
103  " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)." \
104  " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", ListUtils::create<String>("advanced"));
105  defaults_.setMinInt("max_intensity", -1);
106 
107  defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", ListUtils::create<String>("advanced"));
108  defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
109  defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
110 
111  defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", ListUtils::create<String>("advanced"));
112  defaults_.setMinInt("auto_max_percentile", 0);
113  defaults_.setMaxInt("auto_max_percentile", 100);
114 
115  defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", ListUtils::create<String>("advanced"));
116  defaults_.setMinInt("auto_mode", -1);
117  defaults_.setMaxInt("auto_mode", 1);
118 
119  defaults_.setValue("win_len", 200.0, "window length in Thomson");
120  defaults_.setMinFloat("win_len", 1.0);
121 
122  defaults_.setValue("bin_count", 30, "number of bins for intensity values");
123  defaults_.setMinInt("bin_count", 3);
124 
125  defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
126  defaults_.setMinInt("min_required_elements", 1);
127 
128  defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", ListUtils::create<String>("advanced"));
129 
130 
132  }
133 
136  SignalToNoiseEstimator<Container>(source)
137  {
138  updateMembers_();
139  }
140 
146  {
147  if (&source == this) return *this;
148 
150  updateMembers_();
151  return *this;
152  }
153 
155 
156 
159  {}
160 
161 
162 protected:
163 
164 
170  void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_)
171  {
172  // reset counter for sparse windows
173  double sparse_window_percent = 0;
174  // reset counter for histogram overflow
175  double histogram_oob_percent = 0;
176 
177  // reset the results
178  stn_estimates_.clear();
179 
180  // maximal range of histogram needs to be calculated first
181  if (auto_mode_ == AUTOMAXBYSTDEV)
182  {
183  // use MEAN+auto_max_intensity_*STDEV as threshold
184  GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
185  max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
186  }
187  else if (auto_mode_ == AUTOMAXBYPERCENT)
188  {
189  // get value at "auto_max_percentile_"th percentile
190  // we use a histogram approach here as well.
191  if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
192  {
194  throw Exception::InvalidValue(__FILE__,
195  __LINE__,
196  __PRETTY_FUNCTION__,
197  "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
198  s);
199  }
200 
201  std::vector<int> histogram_auto(100, 0);
202 
203  // find maximum of current scan
204  int size = 0;
205  typename PeakType::IntensityType maxInt = 0;
206  PeakIterator run = scan_first_;
207  while (run != scan_last_)
208  {
209  maxInt = std::max(maxInt, (*run).getIntensity());
210  ++size;
211  ++run;
212  }
213 
214  double bin_size = maxInt / 100;
215 
216  // fill histogram
217  run = scan_first_;
218  while (run != scan_last_)
219  {
220  ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
221  ++run;
222  }
223 
224  // add up element counts in histogram until ?th percentile is reached
225  int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
226  int elements_seen = 0;
227  int i = -1;
228  run = scan_first_;
229 
230  while (run != scan_last_ && elements_seen < elements_below_percentile)
231  {
232  ++i;
233  elements_seen += histogram_auto[i];
234  ++run;
235  }
236 
237  max_intensity_ = (((double)i) + 0.5) * bin_size;
238  }
239  else //if (auto_mode_ == MANUAL)
240  {
241  if (max_intensity_ <= 0)
242  {
244  throw Exception::InvalidValue(__FILE__,
245  __LINE__,
246  __PRETTY_FUNCTION__,
247  "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
248  s);
249  }
250  }
251 
252  if (max_intensity_ < 0)
253  {
254  std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
255  return;
256  }
257 
258  PeakIterator window_pos_center = scan_first_;
259  PeakIterator window_pos_borderleft = scan_first_;
260  PeakIterator window_pos_borderright = scan_first_;
261 
262  double window_half_size = win_len_ / 2;
263  double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
264  int bin_count_minus_1 = bin_count_ - 1;
265 
266  std::vector<int> histogram(bin_count_, 0);
267  std::vector<double> bin_value(bin_count_, 0);
268  // calculate average intensity that is represented by a bin
269  for (int bin = 0; bin < bin_count_; bin++)
270  {
271  histogram[bin] = 0;
272  bin_value[bin] = (bin + 0.5) * bin_size;
273  }
274  // bin in which a datapoint would fall
275  int to_bin = 0;
276 
277  // index of bin where the median is located
278  int median_bin = 0;
279  // additive number of elements from left to x in histogram
280  int element_inc_count = 0;
281 
282  // tracks elements in current window, which may vary because of unevenly spaced data
283  int elements_in_window = 0;
284  // number of windows
285  int window_count = 0;
286 
287  // number of elements where we find the median
288  int element_in_window_half = 0;
289 
290  double noise; // noise value of a datapoint
291 
292  // determine how many elements we need to estimate (for progress estimation)
293  int windows_overall = 0;
294  PeakIterator run = scan_first_;
295  while (run != scan_last_)
296  {
297  ++windows_overall;
298  ++run;
299  }
300  SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");
301 
302  // MAIN LOOP
303  while (window_pos_center != scan_last_)
304  {
305 
306  // erase all elements from histogram that will leave the window on the LEFT side
307  while ((*window_pos_borderleft).getMZ() < (*window_pos_center).getMZ() - window_half_size)
308  {
309  to_bin = std::max(std::min<int>((int)((*window_pos_borderleft).getIntensity() / bin_size), bin_count_minus_1), 0);
310  --histogram[to_bin];
311  --elements_in_window;
312  ++window_pos_borderleft;
313  }
314 
315  // add all elements to histogram that will enter the window on the RIGHT side
316  while ((window_pos_borderright != scan_last_)
317  && ((*window_pos_borderright).getMZ() <= (*window_pos_center).getMZ() + window_half_size))
318  {
319  //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
320  to_bin = std::max(std::min<int>((int)((*window_pos_borderright).getIntensity() / bin_size), bin_count_minus_1), 0);
321  ++histogram[to_bin];
322  ++elements_in_window;
323  ++window_pos_borderright;
324  }
325 
326  if (elements_in_window < min_required_elements_)
327  {
328  noise = noise_for_empty_window_;
329  ++sparse_window_percent;
330  }
331  else
332  {
333  // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
334  median_bin = -1;
335  element_inc_count = 0;
336  element_in_window_half = (elements_in_window + 1) / 2;
337  while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half)
338  {
339  ++median_bin;
340  element_inc_count += histogram[median_bin];
341  }
342 
343  // increase the error count
344  if (median_bin == bin_count_minus_1) {++histogram_oob_percent; }
345 
346  // just avoid division by 0
347  noise = std::max(1.0, bin_value[median_bin]);
348  }
349 
350  // store result
351  stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
352 
353 
354  // advance the window center by one datapoint
355  ++window_pos_center;
356  ++window_count;
357  // update progress
359 
360  } // end while
361 
363 
364  sparse_window_percent = sparse_window_percent * 100 / window_count;
365  histogram_oob_percent = histogram_oob_percent * 100 / window_count;
366 
367  // warn if percentage of sparse windows is above 20%
368  if (sparse_window_percent > 20)
369  {
370  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
371  << sparse_window_percent
372  << "% of all windows were sparse. You should consider increasing 'win_len' or decreasing 'min_required_elements'"
373  << std::endl;
374  }
375 
376  // warn if percentage of possibly wrong median estimates is above 1%
377  if (histogram_oob_percent > 1)
378  {
379  LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
380  << histogram_oob_percent
381  << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
382  << "You should consider increasing 'max_intensity' (and maybe 'bin_count' with it, to keep bin width reasonable)"
383  << std::endl;
384  }
385 
386  } // end of shiftWindow_
387 
390  {
391  max_intensity_ = (double)param_.getValue("max_intensity");
392  auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
393  auto_max_percentile_ = param_.getValue("auto_max_percentile");
394  auto_mode_ = param_.getValue("auto_mode");
395  win_len_ = (double)param_.getValue("win_len");
396  bin_count_ = param_.getValue("bin_count");
397  min_required_elements_ = param_.getValue("min_required_elements");
398  noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
399  is_result_valid_ = false;
400  }
401 
411  double win_len_;
419 
420 
421 
422  };
423 
424 } // namespace OpenMS
425 
426 #endif //OPENMS_FILTERING_NOISEESTIMATION_DSIGNALTONOISEESTIMATORMEDIAN_H
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:157
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition: SignalToNoiseEstimatorMedian.h:89
A more convenient string class.
Definition: String.h:57
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev ...
Definition: SignalToNoiseEstimatorMedian.h:407
Definition: SignalToNoiseEstimatorMedian.h:79
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1, AUTOMAXBYSTDEV=0 or AUTOMAXBYPERCENT=1
Definition: SignalToNoiseEstimatorMedian.h:409
Param param_
Container for current parameters.
Definition: DefaultParamHandler.h:150
Definition: SignalToNoiseEstimatorMedian.h:79
void computeSTN_(const PeakIterator &scan_first_, const PeakIterator &scan_last_)
Definition: SignalToNoiseEstimatorMedian.h:170
int bin_count_
number of bins in the histogram
Definition: SignalToNoiseEstimatorMedian.h:413
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition: SignalToNoiseEstimator.h:92
double win_len_
range of data points which belong to a window in Thomson
Definition: SignalToNoiseEstimatorMedian.h:411
Container::const_iterator PeakIterator
Definition: SignalToNoiseEstimator.h:65
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition: SignalToNoiseEstimatorMedian.h:403
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition: SignalToNoiseEstimatorMedian.h:91
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition: SignalToNoiseEstimator.h:175
SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian &source)
Copy Constructor.
Definition: SignalToNoiseEstimatorMedian.h:135
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
bool is_result_valid_
flag: set to true if SignalToNoise estimates are calculated and none of the params were changed...
Definition: SignalToNoiseEstimator.h:215
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key.
void updateMembers_()
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed ...
Definition: SignalToNoiseEstimatorMedian.h:389
protected struct to store parameters my, sigma for a Gaussian distribution
Definition: SignalToNoiseEstimator.h:167
void endProgress() const
Ends the progress display.
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier ...
Definition: SignalToNoiseEstimatorMedian.h:405
void setMaxFloat(const String &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
PeakIterator::value_type PeakType
Definition: SignalToNoiseEstimator.h:66
double noise_for_empty_window_
Definition: SignalToNoiseEstimatorMedian.h:418
Invalid value exception.
Definition: Exception.h:336
SignalToNoiseEstimatorMedian & operator=(const SignalToNoiseEstimatorMedian &source)
Definition: SignalToNoiseEstimatorMedian.h:145
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:72
SignalToNoiseEstimatorMedian()
default constructor
Definition: SignalToNoiseEstimatorMedian.h:94
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition: SignalToNoiseEstimatorMedian.h:415
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key.
Definition: SignalToNoiseEstimatorMedian.h:79
This class represents the abstract base class of a signal to noise estimator.
Definition: SignalToNoiseEstimator.h:57
std::map< PeakType, double, typename PeakType::PositionLess > stn_estimates_
stores the noise estimate for each peak
Definition: SignalToNoiseEstimator.h:208
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void setProgress(SignedSize value) const
Sets the current progress.
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation ...
Definition: SignalToNoiseEstimatorMedian.h:79
void setName(const String &name)
Mutable access to the name.
virtual ~SignalToNoiseEstimatorMedian()
Destructor.
Definition: SignalToNoiseEstimatorMedian.h:158
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition: SignalToNoiseEstimatorMedian.h:88
void setMinFloat(const String &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.

OpenMS / TOPP release 2.0.0 Documentation generated on Fri May 29 2015 17:20:30 using doxygen 1.8.9.1