基于股票信息的数据分析与可视化
项目简介:采用皮尔逊相关系数研究A股开盘前十分钟成交量变化与当日收盘价变化的相关性,最后将数据导入到Excel中做可视化分析。
结论:大部分都没有很强的相关性。
import baostock as bs
import pandas as pd
import matplotlib.pyplot as plt
import xlwings as xw
from scipy.stats import pearsonr
# 设置显示最大行和列
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', None)
#### 登陆系统 ####
lg = bs.login()
# 显示登陆返回信息
print('login respond error_code:'+lg.error_code)
print('login respond error_msg:'+lg.error_msg)
def get_code(start_date):stock_rs = bs.query_all_stock(start_date)stock_df = stock_rs.get_data()stock_df.drop(stock_df[stock_df.code < 'sh.600000'].index, inplace=True)stock_df.drop(stock_df[stock_df.code > 'sz.399000'].index, inplace=True)return stock_df['code'].tolist()'''获得股票衍生变量数据'''
# 通过公式1获取成交量涨跌幅
def chg1(stock_table):stock_table['f_volume'] = stock_table['volume'].shift(-1)stock_table['chg_volume1(%)'] = (stock_table['volume']-stock_table['f_volume'])/stock_table['f_volume']*100return stock_table# 通过公式2获得成交量涨跌幅
def chg2(stock_table):ten_mean = stock_table['volume'].sort_index().rolling(10, min_periods=1).mean()stock_table['m10_volume'] = ten_meanstock_table['chg_volume2(%)'] = (stock_table['volume']-stock_table['m10_volume'])/stock_table['m10_volume']*100stock_table['ma5'] = stock_table['close'].sort_index().rolling(5).mean()return stock_tabledef get_timedataAll(start_date, end_date):stock_list = get_code(start_date)data = pd.DataFrame()i = 0for code in stock_list:i += 1#获取前十分钟数据rs = bs.query_history_k_data_plus(code, "date,time,code,open,high,low,close,volume,amount",start_date=start_date, end_date=end_date,frequency="5", adjustflag="3")#获取收盘涨跌幅rs1 = bs.query_history_k_data_plus(code, "date,code,open,high,low,close,volume,amount,pctChg",start_date=start_date, end_date=end_date, frequency="d",adjustflag="3")df1 = rs1.get_data()df = rs.get_data()if not df.empty:df = df[(df['time'].str[-9:] == '093500000') | (df['time'].str[-9:] == '094000000')]# 统计前10分钟成交量df['volume'] = df['volume'].apply(pd.to_numeric)df = df.groupby(['code', 'date'], as_index=False, sort=True)['volume'].sum()else:print(code, '为空')stock_table = pd.merge(df1[['code', 'date', 'open', 'close', 'pctChg']],df[['code', 'date', 'volume']], on=['code', 'date'])stock_table = stock_table.set_index('date')data1 = chg1(stock_table)data2 = chg2(stock_table)# 通过公式1计算的相关性stock_table['pctChg'] =stock_table['pctChg'].apply(pd.to_numeric)corr = pearsonr(stock_table['pctChg'][:-1], data1['chg_volume1(%)'][:-1])if corr:stock_table['corr1'] = corr[0]stock_table['p1'] = corr[1]# 通过公式2计算的相关性corr2 = pearsonr(stock_table['pctChg'][1:], data2['chg_volume2(%)'][1:])if corr2:stock_table['corr2'] = corr2[0]stock_table['p2'] = corr2[1]# 选取所需要的列target_columns = ['code', 'open', 'close', 'pctChg', 'volume', 'chg_volume2(%)', 'chg_volume1(%)', 'corr1','p1','corr2','p2']stock_table = stock_table[target_columns]data = data.append(stock_table, ignore_index=True)#股票数量太多,可只打印三个股票if(i>3):breakreturn datadef getChangeRate(start_date, end_date):return get_timedataAll(start_date, end_date)start_date = '2021-03-01'
end_date = '2021-04-15'
# print(getChangeRate(start_date, end_date))
final_table = getChangeRate(start_date, end_date)'''数据可视化呈现'''
# 创建一个Excel文件,并设置为不可见
app = xw.App(visible=False)
wb = app.books.add()# 新建一张Excel表,命名为stock_name,也即最开始定义的“万科A”
sht = wb.sheets.add('浦发银行')# 将之前生成的所有股票的指标选出自己想可视化的代码,导入到Excel中
final_table = final_table[final_table['code']=='sh.600000']
sht.range('A1').value = final_table# 数据可视化,并将图片导入到Excel当中
fig = plt.figure() # 设置一下要导入Excel中的图片
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 解决负号'-'显示为方块的问题# 绘制第一个折线图:股价涨跌幅(%)
plt.plot(final_table.index, final_table['pctChg'], label='股价涨跌幅(%)', color='red')
plt.legend(loc='upper left') # 设置图例位置# 绘制第二个折线图:10分钟成交量涨跌幅(%)
plt.twinx() # 生成双坐标轴
plt.plot(final_table.index, final_table['chg_volume2(%)'], label='10分钟成交量涨跌幅(%)', linestyle='--')
plt.legend(loc='upper right')# 设置图片标题,自动调整x坐标轴刻度的角度并展示图片
plt.title('浦发银行') # 设置标题
plt.gcf().autofmt_xdate() # 自动调整x坐标轴刻度的角度
# plt.show()# 把图片放到excel中
sht.pictures.add(fig, name='图1', update=True, left=500)wb.save(r'G:\code\股票量化分析10.xlsx')
wb.close()
app.quit()print('股票策略分析及Excel生成完毕')