Tushare接口+LSTM模型预测股票走势
Tushare ID:423115
Tushare接口优势以及使用方法
Tushare是一款国内使用较为热门的财经接口,数据源稳定不易出错,速度较快,能符合开发的需求,下面讲讲使用的基本方法。
- 注册账号 通过注册账号可以获取属于个人的token码;
- python中import tushare 并且调用接口
import tushare as ts
ts.set_token('你的token码')
pro = ts.pro_api()
数据预处理
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tushare as ts
ts.set_token('你的token码')
pro = ts.pro_api()#获取数据集
ts_code_temp = stock_config.STOCKCODE
df = pro.daily(ts_code = ts_code_temp, start_date='20160701')
df = df.sort_index(ascending=False)df_close = df.reset_index()[['trade_date','close']]#数据规则化
scaler=MinMaxScaler(feature_range=(0,1))
df_min=scaler.fit_transform(np.array(df_close['close']).reshape(-1,1))#用于实际预测的数据集
df_lasttendays = df_min[-stock_config.timestep:]
LSTM模型
#LSTM
import data_tushare
import numpy as np
import matplotlib.pyplot as pltT = 10 #学习天数
N = 3 #预测天数df2 = data_tushare.df_min##划分数据集
training_size=int(len(df2)*0.85)
test_size=len(df2)-training_size
train_data,test_data=df2[0:training_size,:],df2[training_size:len(df2),:1]import numpy
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):dataX, dataY = [], []for i in range(len(dataset)-time_step-1):a = dataset[i:(i+time_step), 0] ###i=0, 0,1,2,3-----9dataX.append(a)dataY.append(dataset[i + time_step, 0])return numpy.array(dataX), numpy.array(dataY)# reshape into X=t,t+1,t+2,t+3 and Y=t+4
time_step = T
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)### Create the Stacked LSTM model
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import LSTM
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(T,1)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')#开始拟合
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=300,batch_size=45,verbose=1)### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)##Transformback to original form
train_predict = data_tushare.scaler.inverse_transform(train_predict)
test_predict = data_tushare.scaler.inverse_transform(test_predict)### Calculate RMSE performance metrics
import math
from sklearn.metrics import mean_squared_error### Test Data RMSE
rmse = math.sqrt(mean_squared_error(y_test,test_predict))
print(f'RMSE = {rmse}')### Plottinglook_back=T
trainPredictPlot = numpy.empty_like(df2)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(df2)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(df2)-1, :] = test_predictplt.plot(data_tushare.scaler.inverse_transform(df2),'blue',label = "All data")
plt.plot(trainPredictPlot,'red',label = "Train data")
plt.plot(testPredictPlot,'green',label = "Test data")
plt.legend()
plt.show()
以上是数据清洗以及模型训练,之后的部分只要拉取合适的数据调用模型即可进行预测。