Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
LinearRegression.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Clemens Groepl $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_MATH_STATISTICS_LINEARREGRESSION_H
36 #define OPENMS_MATH_STATISTICS_LINEARREGRESSION_H
37 
38 #include <OpenMS/CONCEPT/Types.h>
41 
42 #include "Wm5Vector2.h"
43 #include "Wm5ApprLineFit2.h"
44 #include "Wm5LinearSystem.h"
45 
46 #include <cmath>
47 #include <vector>
48 
49 using std::pow;
50 
51 namespace OpenMS
52 {
53  namespace Math
54  {
71  class OPENMS_DLLAPI LinearRegression
72  {
73 public:
74 
77  intercept_(0),
78  slope_(0),
79  x_intercept_(0),
80  lower_(0),
81  upper_(0),
82  t_star_(0),
83  r_squared_(0),
84  stand_dev_residuals_(0),
85  mean_residuals_(0),
86  stand_error_slope_(0),
87  chi_squared_(0),
88  rsd_(0)
89  {
90  }
91 
94  {
95  }
96 
113  template <typename Iterator>
114  void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin);
115 
132  template <typename Iterator>
133  void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin);
134 
136  double getIntercept() const;
138  double getSlope() const;
140  double getXIntercept() const;
142  double getLower() const;
144  double getUpper() const;
146  double getTValue() const;
148  double getRSquared() const;
150  double getStandDevRes() const;
152  double getMeanRes() const;
154  double getStandErrSlope() const;
156  double getChiSquared() const;
158  double getRSD() const;
159 
160 protected:
161 
163  double intercept_;
165  double slope_;
167  double x_intercept_;
169  double lower_;
171  double upper_;
173  double t_star_;
175  double r_squared_;
183  double chi_squared_;
185  double rsd_;
186 
187 
189  void computeGoodness_(const std::vector<Wm5::Vector2d>& points, double confidence_interval_P);
190 
192  template <typename Iterator>
193  double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept);
194 
196  template <typename Iterator>
197  double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept);
198 
199 private:
200 
204  LinearRegression& operator=(const LinearRegression& arg);
205 
206  }; //class
207 
208 
209  namespace
210  {
211  //given x compute y = slope * x + intercept
212  double computePointY(double x, double slope, double intercept)
213  {
214  return slope * x + intercept;
215  }
216 
217  } //namespace
218 
219  //x, y, w must be of same size
220  template <typename Iterator>
221  double LinearRegression::computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
222  {
223  double chi_squared = 0.0;
224  Iterator xIter = x_begin;
225  Iterator yIter = y_begin;
226  for (; xIter != x_end; ++xIter, ++yIter)
227  {
228  chi_squared += std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
229  }
230 
231  return chi_squared;
232  }
233 
234  //x, y, w must be of same size
235  template <typename Iterator>
236  double LinearRegression::computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
237  {
238  double chi_squared = 0.0;
239  Iterator xIter = x_begin;
240  Iterator yIter = y_begin;
241  Iterator wIter = w_begin;
242  for (; xIter != x_end; ++xIter, ++yIter, ++wIter)
243  {
244  chi_squared += *wIter * std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
245  }
246 
247  return chi_squared;
248  }
249 
250  template <typename Iterator>
251  void LinearRegression::computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin)
252  {
253  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
254 
255  // Compute the unweighted linear fit.
256  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
257  // and the value of Chi squared (sum( (y - evel(x))^2)
258  bool pass = Wm5::HeightLineFit2<double>(static_cast<int>(points.size()), &points.front(), slope_, intercept_);
259  chi_squared_ = computeChiSquare(x_begin, x_end, y_begin, slope_, intercept_);
260 
261  if (pass)
262  {
263  computeGoodness_(points, confidence_interval_P);
264  }
265  else
266  {
267  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-LinearRegression", "Could not fit a linear model to the data");
268  }
269  }
270 
271  template <typename Iterator>
272  void LinearRegression::computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin)
273  {
274  // Compute the weighted linear fit.
275  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
276  // and the value of Chi squared, the covariances of the intercept and the slope
277  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
278  // Compute sums for linear system. copy&paste from GeometricTools Wm5ApprLineFit2.cpp
279  // and modified to allow weights
280  int numPoints = points.size();
281  double sumX = 0, sumY = 0;
282  double sumXX = 0, sumXY = 0;
283  double sumW = 0;
284  Iterator wIter = w_begin;
285 
286  for (int i = 0; i < numPoints; ++i, ++wIter)
287  {
288  sumX += (*wIter) * points[i].X();
289  sumY += (*wIter) * points[i].Y();
290  sumXX += (*wIter) * points[i].X() * points[i].X();
291  sumXY += (*wIter) * points[i].X() * points[i].Y();
292  sumW += (*wIter);
293  }
294  //create matrices to solve Ax = B
295  double A[2][2] =
296  {
297  {sumXX, sumX},
298  {sumX, sumW}
299  };
300  double B[2] =
301  {
302  sumXY,
303  sumY
304  };
305  double X[2];
306 
307  bool nonsingular = Wm5::LinearSystem<double>().Solve2(A, B, X);
308  if (nonsingular)
309  {
310  slope_ = X[0];
311  intercept_ = X[1];
312  }
313  chi_squared_ = computeWeightedChiSquare(x_begin, x_end, y_begin, w_begin, slope_, intercept_);
314 
315  if (nonsingular)
316  {
317  computeGoodness_(points, confidence_interval_P);
318  }
319  else
320  {
321  throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToFit-LinearRegression", "Could not fit a linear model to the data");
322  }
323  }
324 
325  } // namespace Math
326 } // namespace OpenMS
327 
328 
329 #endif
virtual ~LinearRegression()
Destructor.
Definition: LinearRegression.h:93
std::vector< Wm5::Vector2d > iteratorRange2Wm5Vectors(Iterator x_begin, Iterator x_end, Iterator y_begin)
Copies the distance(x_begin,x_end) elements starting at x_begin and y_begin into the Wm5::Vector...
Definition: RegressionUtils.h:45
LinearRegression()
Constructor.
Definition: LinearRegression.h:76
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double rsd_
the relative standard deviation
Definition: LinearRegression.h:185
double upper_
The upper bound of the confidence interval.
Definition: LinearRegression.h:171
double chi_squared_
The value of the Chi Squared statistic.
Definition: LinearRegression.h:183
double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
Compute the chi squared of a linear fit.
Definition: LinearRegression.h:221
void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin)
This function computes the best-fit linear regression coefficients of the model for the weighted da...
Definition: LinearRegression.h:272
This class offers functions to perform least-squares fits to a straight line model, .
Definition: LinearRegression.h:71
double x_intercept_
The intercept of the fitted line with the x-axis.
Definition: LinearRegression.h:167
double t_star_
The value of the t-statistic.
Definition: LinearRegression.h:173
double stand_error_slope_
The standard error of the slope.
Definition: LinearRegression.h:181
double mean_residuals_
Mean of residuals.
Definition: LinearRegression.h:179
double slope_
The slope of the fitted line.
Definition: LinearRegression.h:165
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:662
double stand_dev_residuals_
The standard deviation of the residuals.
Definition: LinearRegression.h:177
void computeGoodness_(const std::vector< Wm5::Vector2d > &points, double confidence_interval_P)
Computes the goodness of the fitted regression line.
double lower_
The lower bound of the confidence interval.
Definition: LinearRegression.h:169
double r_squared_
The squared correlation coefficient (Pearson)
Definition: LinearRegression.h:175
double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
Compute the chi squared of a weighted linear fit.
Definition: LinearRegression.h:236
double intercept_
The intercept of the fitted line with the y-axis.
Definition: LinearRegression.h:163
void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin)
This function computes the best-fit linear regression coefficients of the model for the dataset ...
Definition: LinearRegression.h:251

OpenMS / TOPP release 2.0.0 Documentation generated on Fri May 29 2015 17:20:25 using doxygen 1.8.9.1