/LinearRegression.py
Python | 69 lines | 57 code | 3 blank | 9 comment | 3 complexity | 29ae04c6d543ae0543f963fb0fa9ab09 MD5 | raw file
- import pandas
- import numpy
- class LinearRegression:
- def __init__(self,data_frame):
- '''
- Reads data in DataFrame form. Data should have relevant column labels.
- '''
- self.data = data_frame
-
- def get_data(self):
- '''
- Returns data as DataFrame object
- '''
- return self.data
-
- def add_constant_term(self):
- '''
- Void function. Adds a column of 1's with 'const' as label
- Allows for linear regression with a constant term
- Appends column to right end of table.
- '''
- const = pandas.Series(1., index = self.data.index)
- const = pandas.DataFrame(const, index = const.index, columns = ['const'])
- self.data = self.data.join(const)
-
- def regress(self, col_label):
- '''
- Removes the dependent variable from the data and creates a separate DataFrame from it
- '''
- data = self.data.copy()
- col = pandas.DataFrame(data[col_label], index = data.index, columns = [col_label])
- del data[col_label]
-
- '''
- Converts data into a matrix
- '''
- A = numpy.matrix(data)
- b = numpy.matrix(col)
- '''
- Solve Ax=b by solving A^t*Ax = A^t*b
- '''
- M = A.T*A
- b_new = A.T*b
- '''
- Performs Cholesky decomposition on A^t*A and solves for x
- x is equal to the linear regression coefficients
- '''
- L = numpy.linalg.cholesky(M)
- y = numpy.linalg.solve(L,b_new)
- x = numpy.linalg.solve(L.T,y)
-
- '''
- Stores the approximation error from our regression
- '''
- self.last_error = numpy.linalg.norm(A*x-b)
-
- '''
- Returns coefficients as a DataFrame object
- '''
- return pandas.DataFrame(x, index = data.columns, columns = ['coeffs'])
-
- def get_last_error(self):
- '''
- Returns the error term for the previously performed regression
- '''
- return self.last_error