001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2011, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Regression.java 029 * --------------- 030 * (C) Copyright 2002-2009, by Object Refinery Limited. 031 * 032 * Original Author: David Gilbert (for Object Refinery Limited); 033 * Contributor(s): Peter Kolb (patch 2795746); 034 * 035 * Changes 036 * ------- 037 * 30-Sep-2002 : Version 1 (DG); 038 * 18-Aug-2003 : Added 'abstract' (DG); 039 * 15-Jul-2004 : Switched getX() with getXValue() and getY() with 040 * getYValue() (DG); 041 * 29-May-2009 : Added support for polynomial regression, see patch 2795746 042 * by Peter Kolb (DG); 043 * 044 */ 045 046package org.jfree.data.statistics; 047 048import org.jfree.data.xy.XYDataset; 049 050/** 051 * A utility class for fitting regression curves to data. 052 */ 053public abstract class Regression { 054 055 /** 056 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 057 * the data using ordinary least squares regression. The result is 058 * returned as a double[], where result[0] --> a, and result[1] --> b. 059 * 060 * @param data the data. 061 * 062 * @return The parameters. 063 */ 064 public static double[] getOLSRegression(double[][] data) { 065 066 int n = data.length; 067 if (n < 2) { 068 throw new IllegalArgumentException("Not enough data."); 069 } 070 071 double sumX = 0; 072 double sumY = 0; 073 double sumXX = 0; 074 double sumXY = 0; 075 for (int i = 0; i < n; i++) { 076 double x = data[i][0]; 077 double y = data[i][1]; 078 sumX += x; 079 sumY += y; 080 double xx = x * x; 081 sumXX += xx; 082 double xy = x * y; 083 sumXY += xy; 084 } 085 double sxx = sumXX - (sumX * sumX) / n; 086 double sxy = sumXY - (sumX * sumY) / n; 087 double xbar = sumX / n; 088 double ybar = sumY / n; 089 090 double[] result = new double[2]; 091 result[1] = sxy / sxx; 092 result[0] = ybar - result[1] * xbar; 093 094 return result; 095 096 } 097 098 /** 099 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 100 * the data using ordinary least squares regression. The result is returned 101 * as a double[], where result[0] --> a, and result[1] --> b. 102 * 103 * @param data the data. 104 * @param series the series (zero-based index). 105 * 106 * @return The parameters. 107 */ 108 public static double[] getOLSRegression(XYDataset data, int series) { 109 110 int n = data.getItemCount(series); 111 if (n < 2) { 112 throw new IllegalArgumentException("Not enough data."); 113 } 114 115 double sumX = 0; 116 double sumY = 0; 117 double sumXX = 0; 118 double sumXY = 0; 119 for (int i = 0; i < n; i++) { 120 double x = data.getXValue(series, i); 121 double y = data.getYValue(series, i); 122 sumX += x; 123 sumY += y; 124 double xx = x * x; 125 sumXX += xx; 126 double xy = x * y; 127 sumXY += xy; 128 } 129 double sxx = sumXX - (sumX * sumX) / n; 130 double sxy = sumXY - (sumX * sumY) / n; 131 double xbar = sumX / n; 132 double ybar = sumY / n; 133 134 double[] result = new double[2]; 135 result[1] = sxy / sxx; 136 result[0] = ybar - result[1] * xbar; 137 138 return result; 139 140 } 141 142 /** 143 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 144 * the data using a power regression equation. The result is returned as 145 * an array, where double[0] --> a, and double[1] --> b. 146 * 147 * @param data the data. 148 * 149 * @return The parameters. 150 */ 151 public static double[] getPowerRegression(double[][] data) { 152 153 int n = data.length; 154 if (n < 2) { 155 throw new IllegalArgumentException("Not enough data."); 156 } 157 158 double sumX = 0; 159 double sumY = 0; 160 double sumXX = 0; 161 double sumXY = 0; 162 for (int i = 0; i < n; i++) { 163 double x = Math.log(data[i][0]); 164 double y = Math.log(data[i][1]); 165 sumX += x; 166 sumY += y; 167 double xx = x * x; 168 sumXX += xx; 169 double xy = x * y; 170 sumXY += xy; 171 } 172 double sxx = sumXX - (sumX * sumX) / n; 173 double sxy = sumXY - (sumX * sumY) / n; 174 double xbar = sumX / n; 175 double ybar = sumY / n; 176 177 double[] result = new double[2]; 178 result[1] = sxy / sxx; 179 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 180 181 return result; 182 183 } 184 185 /** 186 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 187 * the data using a power regression equation. The result is returned as 188 * an array, where double[0] --> a, and double[1] --> b. 189 * 190 * @param data the data. 191 * @param series the series to fit the regression line against. 192 * 193 * @return The parameters. 194 */ 195 public static double[] getPowerRegression(XYDataset data, int series) { 196 197 int n = data.getItemCount(series); 198 if (n < 2) { 199 throw new IllegalArgumentException("Not enough data."); 200 } 201 202 double sumX = 0; 203 double sumY = 0; 204 double sumXX = 0; 205 double sumXY = 0; 206 for (int i = 0; i < n; i++) { 207 double x = Math.log(data.getXValue(series, i)); 208 double y = Math.log(data.getYValue(series, i)); 209 sumX += x; 210 sumY += y; 211 double xx = x * x; 212 sumXX += xx; 213 double xy = x * y; 214 sumXY += xy; 215 } 216 double sxx = sumXX - (sumX * sumX) / n; 217 double sxy = sumXY - (sumX * sumY) / n; 218 double xbar = sumX / n; 219 double ybar = sumY / n; 220 221 double[] result = new double[2]; 222 result[1] = sxy / sxx; 223 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 224 225 return result; 226 227 } 228 229 /** 230 * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 231 * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n, 232 * fitted to the data using a polynomial regression equation. 233 * The result is returned as an array with a length of n + 2, 234 * where double[0] --> a0, double[1] --> a1, .., double[n] --> an. 235 * and double[n + 1] is the correlation coefficient R2 236 * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german 237 * edition), pp. 243ff and 327ff. 238 * 239 * @param dataset the dataset (<code>null</code> not permitted). 240 * @param series the series to fit the regression line against (the series 241 * must have at least order + 1 non-NaN items). 242 * @param order the order of the function (> 0). 243 * 244 * @return The parameters. 245 * 246 * @since 1.0.14 247 */ 248 public static double[] getPolynomialRegression(XYDataset dataset, int series, int order) { 249 if (dataset == null) { 250 throw new IllegalArgumentException("Null 'dataset' argument."); 251 } 252 int itemCount = dataset.getItemCount(series); 253 if (itemCount < order + 1) { 254 throw new IllegalArgumentException("Not enough data."); 255 } 256 int validItems = 0; 257 double[][] data = new double[2][itemCount]; 258 for(int item = 0; item < itemCount; item++){ 259 double x = dataset.getXValue(series, item); 260 double y = dataset.getYValue(series, item); 261 if (!Double.isNaN(x) && !Double.isNaN(y)){ 262 data[0][validItems] = x; 263 data[1][validItems] = y; 264 validItems++; 265 } 266 } 267 if (validItems < order + 1) { 268 throw new IllegalArgumentException("Not enough data."); 269 } 270 int equations = order + 1; 271 int coefficients = order + 2; 272 double[] result = new double[equations + 1]; 273 double[][] matrix = new double[equations][coefficients]; 274 double sumX = 0.0; 275 double sumY = 0.0; 276 277 for(int item = 0; item < validItems; item++){ 278 sumX += data[0][item]; 279 sumY += data[1][item]; 280 for(int eq = 0; eq < equations; eq++){ 281 for(int coe = 0; coe < coefficients - 1; coe++){ 282 matrix[eq][coe] += Math.pow(data[0][item],eq + coe); 283 } 284 matrix[eq][coefficients - 1] += data[1][item] 285 * Math.pow(data[0][item],eq); 286 } 287 } 288 double[][] subMatrix = calculateSubMatrix(matrix); 289 for (int eq = 1; eq < equations; eq++) { 290 matrix[eq][0] = 0; 291 for (int coe = 1; coe < coefficients; coe++) { 292 matrix[eq][coe] = subMatrix[eq - 1][coe - 1]; 293 } 294 } 295 for (int eq = equations - 1; eq > -1; eq--) { 296 double value = matrix[eq][coefficients - 1]; 297 for (int coe = eq; coe < coefficients -1; coe++) { 298 value -= matrix[eq][coe] * result[coe]; 299 } 300 result[eq] = value / matrix[eq][eq]; 301 } 302 double meanY = sumY / validItems; 303 double yObsSquare = 0.0; 304 double yRegSquare = 0.0; 305 for (int item = 0; item < validItems; item++) { 306 double yCalc = 0; 307 for (int eq = 0; eq < equations; eq++) { 308 yCalc += result[eq] * Math.pow(data[0][item],eq); 309 } 310 yRegSquare += Math.pow(yCalc - meanY, 2); 311 yObsSquare += Math.pow(data[1][item] - meanY, 2); 312 } 313 double rSquare = yRegSquare / yObsSquare; 314 result[equations] = rSquare; 315 return result; 316 } 317 318 /** 319 * Returns a matrix with the following features: (1) the number of rows 320 * and columns is 1 less than that of the original matrix; (2)the matrix 321 * is triangular, i.e. all elements a (row, column) with column > row are 322 * zero. This method is used for calculating a polynomial regression. 323 * 324 * @param matrix the start matrix. 325 * 326 * @return The new matrix. 327 */ 328 private static double[][] calculateSubMatrix(double[][] matrix){ 329 int equations = matrix.length; 330 int coefficients = matrix[0].length; 331 double[][] result = new double[equations - 1][coefficients - 1]; 332 for (int eq = 1; eq < equations; eq++) { 333 double factor = matrix[0][0] / matrix[eq][0]; 334 for (int coe = 1; coe < coefficients; coe++) { 335 result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe] 336 * factor; 337 } 338 } 339 if (equations == 1) { 340 return result; 341 } 342 // check for zero pivot element 343 if (result[0][0] == 0) { 344 boolean found = false; 345 for (int i = 0; i < result.length; i ++) { 346 if (result[i][0] != 0) { 347 found = true; 348 double[] temp = result[0]; 349 for (int j = 0; j < result[i].length; j++) { 350 result[0][j] = result[i][j]; 351 } 352 for (int j = 0; j < temp.length; j++) { 353 result[i][j] = temp[j]; 354 } 355 break; 356 } 357 } 358 if (!found) { 359 System.out.println("Equation has no solution!"); 360 return new double[equations - 1][coefficients - 1]; 361 } 362 } 363 double[][] subMatrix = calculateSubMatrix(result); 364 for (int eq = 1; eq < equations - 1; eq++) { 365 result[eq][0] = 0; 366 for (int coe = 1; coe < coefficients - 1; coe++) { 367 result[eq][coe] = subMatrix[eq - 1][coe - 1]; 368 } 369 } 370 return result; 371 } 372 373}