# NumPy Pandas Matplotlib
import numpy as np
import matplotlib.pyplot as plt
'''
双特征,矩阵化
'''
1. Min-Max 归一化及其逆操作
1.1 输入数据归一化
def normalize1(sample, data):
max_value = np.max(data)
min_value = np.min(data)
return (sample - min_value) / (max_value - min_value)
1.2 预测结果去归一化
def d_normalize1(sample, data):
max_value = np.max(data)
min_value = np.min(data)
return sample * (max_value - min_value) + min_value
2. Z-score 标准化及其逆操作
2.1 输入数据归一化
def normalize2(sample, data):
mean = np.mean(data)
stds = np.std(data)
return (sample - mean) / stds
2.2 预测结果去归一化
def d_normalize2(sample, data):
mean = np.mean(data)
stds = np.std(data)
return sample * stds + mean
3. 模型定义
def model(X, W, b):
return X@W + b
4. 损失函数
使用均方误差(MSE)作为损失函数:
def loss_func(predict, label):
return (predict - label)**2
5. 主程序
if __name__ == "__main__":
np.random.seed(17)
prices_raw = np.arange(37294, 87294, 5000) + np.random.randn(10)*3000
years_raw = np.arange(2014, 2024)
areas_raw = np.arange(80, 140, 6) + np.random.randn(10)
# X = np.vstack((areas_raw, years_raw)).T
5.1 数据标准化
prices = normalize2(prices_raw, prices_raw)
years = normalize2(years_raw, years_raw)
areas = normalize2(areas_raw, areas_raw)
X = np.vstack((areas, years)).T
5.2 模型初始化与训练参数设置
-
设置训练周期(
epoch
)为 100。 -
模型输入特征维度为 2,输出特征维度为 1。
-
初始化权重矩阵
W
为全零矩阵,偏置b
为 0。 -
设置学习率
lr
为 0.01。 -
设定批量大小
batch_size
为 3(小批量梯度下降)。
epoch = 100
in_features = 2
out_features = 1
W = np.zeros((in_features, out_features))
b = 0.
lr = 0.01
batch_size = 3
5.3 小批量梯度下降训练循环
for e in range(epoch):
cursor = 0
while cursor <= len(X):
actual_batchsize = min(len(X)-cursor, batch_size)
batch_inputs = X[cursor: cursor + actual_batchsize]
batch_labels = prices[cursor: cursor + actual_batchsize].reshape(actual_batchsize, 1)
predict = model(batch_inputs, W, b)
loss = loss_func(predict, batch_labels)
G = 2*(predict - batch_labels) / actual_batchsize
dW = batch_inputs.T @ G
db = G
W -= lr*dW
b -= lr*db
b = np.mean(b)
cursor += batch_size
print("Loss: {}".format(loss))
5.4 预测功能
inputs = np.array((normalize2(90, areas_raw), normalize2(2025, years_raw))).reshape(1, 2)
predict = model(inputs, W, b)
predict = d_normalize2(predict, prices_raw)
pass
6. 完整代码
# -*- coding: utf-8 -*-
# NumPy Pandas Matplotlib
import numpy as np
import matplotlib.pyplot as plt'''
双特征,矩阵化
'''def normalize1(sample, data):max_value = np.max(data)min_value = np.min(data)return (sample - min_value) / (max_value - min_value)def d_normalize1(sample, data):max_value = np.max(data)min_value = np.min(data)return sample * (max_value - min_value) + min_valuedef normalize2(sample, data):mean = np.mean(data)stds = np.std(data)return (sample - mean) / stdsdef d_normalize2(sample, data):mean = np.mean(data)stds = np.std(data)return sample * stds + meandef model(X, W, b):return X@W + bdef loss_func(predict, label):return (predict - label)**2if __name__ == "__main__":np.random.seed(17)prices_raw = np.arange(37294, 87294, 5000) + np.random.randn(10)* 3000years_raw = np.arange(2014, 2024)areas_raw = np.arange(80, 140, 6) + np.random.randn(10)# X = np.vstack((areas_raw, years_raw)).Tprices = normalize2(prices_raw, prices_raw)years = normalize2(years_raw, years_raw)areas = normalize2(areas_raw, areas_raw)X = np.vstack((areas, years)).Tepoch = 100in_features = 2out_features = 1W = np.zeros((in_features, out_features))b = 0.lr = 0.01batch_size = 3for e in range(epoch):cursor = 0while cursor <= len(X):actual_batchsize = min(len(X)- cursor, batch_size)batch_inputs = X[cursor: cursor + actual_batchsize]batch_labels = prices[cursor: cursor + actual_batchsize].reshape(actual_batchsize, 1)predict = model(batch_inputs, W, b)loss = loss_func(predict, batch_labels)G = 2* ( predict - batch_labels) / actual_batchsizedW = batch_inputs.T @ Gdb = GW -= lr* dWb -= lr* dbb = np.mean(b)cursor += batch_sizeprint("Loss: {}".format(loss))inputs = np.array((normalize2(90, areas_raw), normalize2(2025, years_raw))).reshape(1, 2)predict = model(inputs, W, b)predict = d_normalize2(predict, prices_raw)pass
7. 运行结果