PageRenderTime 106ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/LinearRegression.py

https://bitbucket.org/jiefujie/hw2_1_9815
Python | 69 lines | 57 code | 3 blank | 9 comment | 3 complexity | 29ae04c6d543ae0543f963fb0fa9ab09 MD5 | raw file
  1. import pandas
  2. import numpy
  3. class LinearRegression:
  4. def __init__(self,data_frame):
  5. '''
  6. Reads data in DataFrame form. Data should have relevant column labels.
  7. '''
  8. self.data = data_frame
  9. def get_data(self):
  10. '''
  11. Returns data as DataFrame object
  12. '''
  13. return self.data
  14. def add_constant_term(self):
  15. '''
  16. Void function. Adds a column of 1's with 'const' as label
  17. Allows for linear regression with a constant term
  18. Appends column to right end of table.
  19. '''
  20. const = pandas.Series(1., index = self.data.index)
  21. const = pandas.DataFrame(const, index = const.index, columns = ['const'])
  22. self.data = self.data.join(const)
  23. def regress(self, col_label):
  24. '''
  25. Removes the dependent variable from the data and creates a separate DataFrame from it
  26. '''
  27. data = self.data.copy()
  28. col = pandas.DataFrame(data[col_label], index = data.index, columns = [col_label])
  29. del data[col_label]
  30. '''
  31. Converts data into a matrix
  32. '''
  33. A = numpy.matrix(data)
  34. b = numpy.matrix(col)
  35. '''
  36. Solve Ax=b by solving A^t*Ax = A^t*b
  37. '''
  38. M = A.T*A
  39. b_new = A.T*b
  40. '''
  41. Performs Cholesky decomposition on A^t*A and solves for x
  42. x is equal to the linear regression coefficients
  43. '''
  44. L = numpy.linalg.cholesky(M)
  45. y = numpy.linalg.solve(L,b_new)
  46. x = numpy.linalg.solve(L.T,y)
  47. '''
  48. Stores the approximation error from our regression
  49. '''
  50. self.last_error = numpy.linalg.norm(A*x-b)
  51. '''
  52. Returns coefficients as a DataFrame object
  53. '''
  54. return pandas.DataFrame(x, index = data.columns, columns = ['coeffs'])
  55. def get_last_error(self):
  56. '''
  57. Returns the error term for the previously performed regression
  58. '''
  59. return self.last_error