1. SGD梯度下降公式
当梯度大于0时,变小,往左边找梯度接近0的值。
当梯度小于0时,减去一个负数会变大,往右边找梯度接近0的值,此时梯度从负数到0上升
2.Adam优化器实现原理
#coding:utf8import torch
import torch.nn as nn
import numpy as np
import copy"""
基于pytorch的网络编写
手动实现梯度计算和反向传播
加入激活函数
"""class TorchModel(nn.Module):def __init__(self, hidden_size):super(TorchModel, self).__init__()self.layer = nn.Linear(hidden_size, hidden_size, bias=False) #w = hidden_size * hidden_size wx+b -> wxself.activation = torch.sigmoidself.loss = nn.functional.mse_loss #loss采用均方差损失#当输入真实标签,返回loss值;无真实标签,返回预测值def forward(self, x, y=None):y_pred = self.layer(x)y_pred = self.activation(y_pred)if y is not None:return self.loss(y_pred, y)else:return y_pred#自定义模型,接受一个参数矩阵作为入参
class DiyModel:def __init__(self, weight):self.weight = weightdef forward(self, x, y=None):x = np.dot(x, self.weight.T)y_pred = self.diy_sigmoid(x)if y is not None:return self.diy_mse_loss(y_pred, y)else:return y_pred#sigmoiddef diy_sigmoid(self, x):return 1 / (1 + np.exp(-x))#手动实现mse,均方差lossdef diy_mse_loss(self, y_pred, y_true):return np.sum(np.square(y_pred - y_true)) / len(y_pred)#手动实现梯度计算def calculate_grad(self, y_pred, y_true, x):#前向过程# wx = np.dot(self.weight, x)# sigmoid_wx = self.diy_sigmoid(wx)# loss = self.diy_mse_loss(sigmoid_wx, y_true)#反向过程# 均方差函数 (y_pred - y_true) ^ 2 / n 的导数 = 2 * (y_pred - y_true) / n , 结果为2维向量grad_mse = 2/len(x) * (y_pred - y_true)# sigmoid函数 y = 1/(1+e^(-x)) 的导数 = y * (1 - y), 结果为2维向量grad_sigmoid = y_pred * (1 - y_pred)# wx矩阵运算,见ppt拆解, wx = [w11*x0 + w21*x1, w12*x0 + w22*x1]#导数链式相乘grad_w11 = grad_mse[0] * grad_sigmoid[0] * x[0]grad_w12 = grad_mse[1] * grad_sigmoid[1] * x[0]grad_w21 = grad_mse[0] * grad_sigmoid[0] * x[1]grad_w22 = grad_mse[1] * grad_sigmoid[1] * x[1]grad = np.array([[grad_w11, grad_w12],[grad_w21, grad_w22]])#由于pytorch存储做了转置,输出时也做转置处理return grad.T#梯度更新
def diy_sgd(grad, weight, learning_rate):return weight - learning_rate * grad#adam梯度更新
def diy_adam(grad, weight):#参数应当放在外面,此处为保持后方代码整洁简单实现一步alpha = 1e-3 #学习率beta1 = 0.9 #超参数beta2 = 0.999 #超参数eps = 1e-8 #超参数t = 0 #初始化mt = 0 #初始化vt = 0 #初始化#开始计算t = t + 1gt = gradmt = beta1 * mt + (1 - beta1) * gtvt = beta2 * vt + (1 - beta2) * gt ** 2mth = mt / (1 - beta1 ** t)vth = vt / (1 - beta2 ** t)weight = weight - (alpha * mth/ (np.sqrt(vth) + eps))return weightx = np.array([-0.5, 0.1]) #输入
y = np.array([0.1, 0.2]) #预期输出#torch实验
torch_model = TorchModel(2)
torch_model_w = torch_model.state_dict()["layer.weight"]
print(torch_model_w, "初始化权重")
numpy_model_w = copy.deepcopy(torch_model_w.numpy())
#numpy array -> torch tensor, unsqueeze的目的是增加一个batchsize维度
torch_x = torch.from_numpy(x).float().unsqueeze(0)
torch_y = torch.from_numpy(y).float().unsqueeze(0)
#torch的前向计算过程,得到loss
torch_loss = torch_model(torch_x, torch_y)
print("torch模型计算loss:", torch_loss)
# #手动实现loss计算
diy_model = DiyModel(numpy_model_w)
diy_loss = diy_model.forward(x, y)
print("diy模型计算loss:", diy_loss)# # #设定优化器
learning_rate = 0.1
# optimizer = torch.optim.SGD(torch_model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(torch_model.parameters())
# optimizer.zero_grad()
# #
# # #pytorch的反向传播操作
torch_loss.backward()
print(torch_model.layer.weight.grad, "torch 计算梯度") #查看某层权重的梯度# # #手动实现反向传播
grad = diy_model.calculate_grad(diy_model.forward(x), y, x)
print(grad, "diy 计算梯度")
# #
# #torch梯度更新
optimizer.step()
# # #查看更新后权重
update_torch_model_w = torch_model.state_dict()["layer.weight"]
print(update_torch_model_w, "torch更新后权重")
# #
# # #手动梯度更新
# diy_update_w = diy_sgd(grad, numpy_model_w, learning_rate)
diy_update_w = diy_adam(grad, numpy_model_w)
print(diy_update_w, "diy更新权重")
3. RNN
#coding:utf8import torch
import torch.nn as nn
import numpy as np"""
手动实现简单的神经网络
使用pytorch实现RNN
手动实现RNN
对比
"""class TorchRNN(nn.Module):def __init__(self, input_size, hidden_size):super(TorchRNN, self).__init__()self.layer = nn.RNN(input_size, hidden_size, bias=False, batch_first=True)def forward(self, x):return self.layer(x)#自定义RNN模型
class DiyModel:def __init__(self, w_ih, w_hh, hidden_size):self.w_ih = w_ihself.w_hh = w_hhself.hidden_size = hidden_sizedef forward(self, x):ht = np.zeros((self.hidden_size))output = []for xt in x:ux = np.dot(self.w_ih, xt)wh = np.dot(self.w_hh, ht)ht_next = np.tanh(ux + wh)output.append(ht_next)ht = ht_nextreturn np.array(output), htx = np.array([[1, 2, 3],[3, 4, 5],[5, 6, 7]]) #网络输入#torch实验
hidden_size = 4
torch_model = TorchRNN(3, hidden_size)# print(torch_model.state_dict())
w_ih = torch_model.state_dict()["layer.weight_ih_l0"]
w_hh = torch_model.state_dict()["layer.weight_hh_l0"]
print(w_ih, w_ih.shape)
print(w_hh, w_hh.shape)
#
torch_x = torch.FloatTensor([x])
output, h = torch_model.forward(torch_x)
print(h)
print(output.detach().numpy(), "torch模型预测结果")
print(h.detach().numpy(), "torch模型预测隐含层结果")
print("---------------")
diy_model = DiyModel(w_ih, w_hh, hidden_size)
output, h = diy_model.forward(x)
print(output, "diy模型预测结果")
print(h, "diy模型预测隐含层结果")
#coding:utf8import torch
import torch.nn as nn
import numpy as np"""
手动实现简单的神经网络
使用pytorch实现CNN
手动实现CNN
对比
"""
#一个二维卷积
class TorchCNN(nn.Module):def __init__(self, in_channel, out_channel, kernel):super(TorchCNN, self).__init__()self.layer = nn.Conv2d(in_channel, out_channel, kernel, bias=False)def forward(self, x):return self.layer(x)#自定义CNN模型
class DiyModel:def __init__(self, input_height, input_width, weights, kernel_size):self.height = input_heightself.width = input_widthself.weights = weightsself.kernel_size = kernel_sizedef forward(self, x):output = []for kernel_weight in self.weights:kernel_weight = kernel_weight.squeeze().numpy() #shape : 2x2kernel_output = np.zeros((self.height - kernel_size + 1, self.width - kernel_size + 1))for i in range(self.height - kernel_size + 1):for j in range(self.width - kernel_size + 1):window = x[i:i+kernel_size, j:j+kernel_size]kernel_output[i, j] = np.sum(kernel_weight * window) # np.dot(a, b) != a * boutput.append(kernel_output)return np.array(output)x = np.array([[0.1, 0.2, 0.3, 0.4],[-3, -4, -5, -6],[5.1, 6.2, 7.3, 8.4],[-0.7, -0.8, -0.9, -1]]) #网络输入#torch实验
in_channel = 1
out_channel = 3
kernel_size = 2
torch_model = TorchCNN(in_channel, out_channel, kernel_size)
print(torch_model.state_dict())
torch_w = torch_model.state_dict()["layer.weight"]
# print(torch_w.numpy().shape)
torch_x = torch.FloatTensor([[x]])
output = torch_model.forward(torch_x)
output = output.detach().numpy()
print(output, output.shape, "torch模型预测结果\n")
print("---------------")
diy_model = DiyModel(x.shape[0], x.shape[1], torch_w, kernel_size)
output = diy_model.forward(x)
print(output, "diy模型预测结果")