LinearRegression.py | searchcode

/LinearRegression.py

https://bitbucket.org/jiefujie/hw2_1_9815 · Python · 69 lines · 28 code · 3 blank · 38 comment · 7 complexity · 29ae04c6d543ae0543f963fb0fa9ab09 MD5 · raw file


import pandas
import numpy

class LinearRegression:
    def __init__(self,data_frame):
        '''
        Reads data in DataFrame form. Data should have relevant column labels.
        '''
        self.data = data_frame
        
    def get_data(self):
        '''
        Returns data as DataFrame object
        '''
        return self.data
        
    def add_constant_term(self):
        '''
        Void function. Adds a column of 1's with 'const' as label
        Allows for linear regression with a constant term
        Appends column to right end of table.
        '''
        const = pandas.Series(1., index = self.data.index)
        const = pandas.DataFrame(const, index = const.index, columns = ['const'])
        self.data = self.data.join(const)   
    
    def regress(self, col_label):
        '''
        Removes the dependent variable from the data and creates a separate DataFrame from it
        '''
        data = self.data.copy()
        col = pandas.DataFrame(data[col_label], index = data.index, columns = [col_label])
        del data[col_label]
        
        '''
        Converts data into a matrix
        '''
        A = numpy.matrix(data)
        b = numpy.matrix(col)

        '''
        Solve Ax=b by solving A^t*Ax = A^t*b
        '''
        M = A.T*A
        b_new = A.T*b

        '''
        Performs Cholesky decomposition on A^t*A and solves for x
        x is equal to the linear regression coefficients
        '''
        L = numpy.linalg.cholesky(M)
        y = numpy.linalg.solve(L,b_new)
        x = numpy.linalg.solve(L.T,y)       
        
        '''
        Stores the approximation error from our regression
        '''
        self.last_error =  numpy.linalg.norm(A*x-b)
        
        '''
        Returns coefficients as a DataFrame object
        '''        
        return pandas.DataFrame(x, index = data.columns, columns = ['coeffs'])
    
    def get_last_error(self):  
        '''
        Returns the error term for the previously performed regression
        '''      
        return self.last_error

Tech Fingerprint

NumPy
Pandas

Alerts (3)

Complexity hotspot; lines 19 to 20 (total complexity: 3)
19 20
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
33