SimpleLinearRegression.java
/*
* Copyright 2013 University of Glasgow.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package broadwick.statistics.regression;
import org.apache.commons.math3.stat.regression.SimpleRegression;
/**
* Estimates an ordinary least squares regression model with one independent variable. y = intercept + slope * x
*/
public class SimpleLinearRegression {
/**
* Create an empty SimpleLinearRegression instance.
*/
public SimpleLinearRegression() {
regression = new SimpleRegression();
}
/**
* Adds the observation (x,y) to the regression data set. Uses updating formulas for means and sums of squares
* defined in "Algorithms for Computing the Sample Variance: Analysis and Recommendations", Chan, T.F., Golub, G.H.,
* and LeVeque, R.J. 1983, American Statistician, vol. 37, pp. 242-247, referenced in Weisberg, S. "Applied Linear
* Regression". 2nd Ed. 1985. Note: this uses the apache commons math library.
* @param x independent variable value
* @param y dependent variable value
*/
public final void addData(final double x, final double y) {
regression.addData(x, y);
}
/**
* Removes the observation (x,y) from the regression data set, mirroring the addData method.
* The method has no effect if there are no points of data (i.e. n=0)
* @param x independent variable value
* @param y dependent variable value
*/
public final void removeData(final double x, final double y) {
regression.removeData(x, y);
}
/**
* Returns the slope of the estimated regression line. At least two observations (with at least two different x
* values) must have been added before invoking this method. If this method is invoked before a model can be
* estimated,
* <code>Double,NaN</code> is returned.
* @return the slope of the regression line
*/
public final double getSlope() {
return regression.getSlope();
}
/**
* Returns the intercept of the estimated regression line. At least two observations (with at least two different x
* values) must have been added before invoking this method. If this method is invoked before a model can be
* estimated,
* <code>Double,NaN</code> is returned.
* @return the intercept of the regression line
*/
public final double getIntercept() {
return regression.getIntercept();
}
/**
* Returns the number of observations that have been added to the model.
* @return n number of observations that have been added.
*/
public final long getN() {
return regression.getN();
}
/**
* Returns the "predicted"
* <code>y</code> value associated with the supplied
* <code>x</code> value, based on the data that has been added to the model when this method is activated.
* <p>
* <code> predict(x) = intercept + slope * x </code>
* </p>
* At least two observations (with at least two different x values) must have been added before invoking this
* method. If this method is invoked before a model can be estimated,
* <code>Double,NaN</code> is returned.
* @param x input <code>x</code> value
* @return predicted <code>y</code> value
*/
public final double predict(final double x) {
return regression.predict(x);
}
/**
* Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html"> Pearson's product moment correlation
* coefficient</a>, usually denoted r. At least two observations (with at least two different x values) must have
* been added before invoking this method. If this method is invoked before a model can be estimated,
* <code>Double,NaN</code> is returned.
* <p/>
* @return Pearson's r
*/
public final double getR() {
return regression.getR();
}
/**
* Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
* standard error of the intercept estimate</a>, usually denoted s(b0). If there are fewer that
* <strong>three</strong> observations in the model, or if there is no variation in x, this returns
* <code>Double.NaN</code>.
* @return standard error associated with intercept estimate
*/
public final double getInterceptStdErr() {
return regression.getInterceptStdErr();
}
/**
* Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard error of the slope estimate</a>. If there
* are fewer that <strong>three</strong> data pairs in the model, or if there is no variation in x, this returns
* <code>Double.NaN</code>.
* @return standard error associated with slope estimate
*/
public final double getSlopeStdErr() {
return regression.getSlopeStdErr();
}
@Override
public final String toString() {
return String.format("Slope [%f], Intercept [%f]. N=%d", getSlope(), getIntercept(), getN());
}
private SimpleRegression regression;
}