import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing as fch
from clean2 import *
housevalue = fch()
data = pd.DataFrame(housevalue.data, columns = housevalue.feature_names)
data[\'Price\'] = housevalue.target
m = manipulate(data)
std_data = m.data
train_data, test_data = m.train_data, m.test_data
class LRegression:
def __init__(self, intercept = True):
self.intercept = intercept
def fit(self, data, lam = 0):
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values
SST = np.var(Y) * len(Y)
if self.intercept:
ones = np.array([1] * data.shape[0])
X = np.column_stack((ones, X))
X = np.mat(X)
Y = np.mat(Y).T
XTX = X.T * X + np.eye(X.shape[1]) * lam
if np.linalg.det(XTX) == 0:
print(\'X is linear dependent, exiting now\')
return
theta = XTX.I * X.T * Y
Y_pred = X * theta
error = Y - Y_pred
SSE = np.power(Y - Y_pred, 2).sum()
coef_ = np.ravel(theta)
self.coef_ = theta
return SSE, coef_
def predict(self, data):
X = np.mat(data.iloc[:, :-1].values)
Y = np.mat(data.iloc[:, -1].values)
X = np.column_stack((np.ones(X.shape[0]), X))
Y_pred = X * self.coef_
SSE_pred = np.power(Y - np.ravel(Y_pred), 2).sum()
return Y_pred, SSE_pred
def solution(p, lam):
if p < -lam:
return p + lam
elif p > lam:
return p - lam
else:
return 0
def lasso_regression(data, lam = 0.1, num_inter = 1000):
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values
X = np.column_stack((np.ones(X.shape[0]), X))
col_sum = np.power(np.power(X, 2).sum(axis = 0), 0.5)
X = X / col_sum
theta = np.ones((X.shape[1]))
for _ in range(num_inter):
for j in range(X.shape[1]):
X_j = X[:, j]
Y_pred = np.mat(X) * np.mat(theta).T
p = np.mat(X_j) * (np.mat(Y).T - \\
Y_pred + \\
np.ravel(theta[j])[0] * np.mat(X_j).T)
p = np.ravel(p)[0]
theta[j] = solution(p, lam)
return theta/col_sum