08 实现我们自己的 Linear Regression
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import datetime;print("Run by CYJ,",datetime.datetime.now())
Run by CYJ, 2022-01-20 20:06:04.130127
boston = datasets.load_boston()X = boston.data
y = boston.targetX = X[y < 50.0]
y = y[y < 50.0]
X.shape
(490, 13)
from playML.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
使用我们自己制作 Linear Regression
代码参见 这里from playML.LinearRegression import LinearRegressionreg = LinearRegression()
reg.fit_normal(X_train, y_train)
LinearRegression()
reg.coef_
array([-1.20354261e-01, 3.64423279e-02, -3.61493155e-02, 5.12978140e-02,-1.15775825e+01, 3.42740062e+00, -2.32311760e-02, -1.19487594e+00,2.60101728e-01, -1.40219119e-02, -8.35430488e-01, 7.80472852e-03,-3.80923751e-01])
reg.intercept_
34.11739972320593
reg.score(X_test, y_test)
0.8129794056212907
封装的LinearRegression.py
import numpy as np
from .metrics import r2_scoreclass LinearRegression:def __init__(self):"""初始化Linear Regression模型"""self.coef_ = Noneself.intercept_ = Noneself._theta = Nonedef fit_normal(self, X_train, y_train):"""根据训练数据集X_train, y_train训练Linear Regression模型"""assert X_train.shape[0] == y_train.shape[0], \"the size of X_train must be equal to the size of y_train"X_b = np.hstack([np.ones((len(X_train), 1)), X_train])self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)self.intercept_ = self._theta[0]self.coef_ = self._theta[1:]return selfdef predict(self, X_predict):"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""assert self.intercept_ is not None and self.coef_ is not None, \"must fit before predict!"assert X_predict.shape[1] == len(self.coef_), \"the feature number of X_predict must be equal to X_train"X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])return X_b.dot(self._theta)def score(self, X_test, y_test):"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""y_predict = self.predict(X_test)return r2_score(y_test, y_predict)def __repr__(self):return "LinearRegression()"
封装的metrics.py
import numpy as np
from math import sqrtdef accuracy_score(y_true, y_predict):"""计算y_true和y_predict之间的准确率"""assert len(y_true) == len(y_predict), \"the size of y_true must be equal to the size of y_predict"return np.sum(y_true == y_predict) / len(y_true)def mean_squared_error(y_true, y_predict):"""计算y_true和y_predict之间的MSE"""assert len(y_true) == len(y_predict), \"the size of y_true must be equal to the size of y_predict"return np.sum((y_true - y_predict)**2) / len(y_true)def root_mean_squared_error(y_true, y_predict):"""计算y_true和y_predict之间的RMSE"""return sqrt(mean_squared_error(y_true, y_predict))def mean_absolute_error(y_true, y_predict):"""计算y_true和y_predict之间的MAE"""assert len(y_true) == len(y_predict), \"the size of y_true must be equal to the size of y_predict"return np.sum(np.absolute(y_true - y_predict)) / len(y_true)def r2_score(y_true, y_predict):"""计算y_true和y_predict之间的R Square"""return 1 - mean_squared_error(y_true, y_predict)/np.var(y_true)