1、摘要
本文主要内容:使用百度情绪分析接口评估股票近半年的新闻,评估新闻属于利好还是利空,最终统计利好和利空的比例,供选股做参考
本文福利:赠送百度AppID:应用的唯一标识AppKey:公匙(相当于账号)AppSecret:私匙(相当于密码)
2、主要思路
- 选择自己要评估的股票代码数组
- 从金融界行情中心获取股票新闻信息
- 得到页面的内容并保存
- 调用百度云自然语言处理接口,进行情感倾向分析
- 统计利好和利空的比例
3、代码
import os
import reimport lxml # 一个Python库,使用它可以轻松处理XML和HTML文件,还可以用于web爬取
import requests
from aip import AipNlp
from lxml import etree
from matplotlib import pyplotfrom utils.read_write import readTxt, writeOneCsv, readToStros.chdir(r'D:\项目\量化交易')
pyplot.rcParams['font.sans-serif'] = ['SimHei'] # 图形参数设置,用来正常显示中文标签,国标黑体
pyplot.rcParams['axes.unicode_minus'] = False # 用来正常显示负号# 得到最大页码的数值
def getallpage(url):pagedata = requests.get(url).content.decode("gbk")# print(pagedata)# lxml教程:https://www.cnblogs.com/zhangxinqi/p/9210211.html#_label2mytree = lxml.etree.HTML(pagedata)# 取所有的页码数if pagedata.find("page_newslib"):data = mytree.xpath("//*[@class=\"page_newslib\"]//a[last()-1]/text()")return dataelse:return ['1']# 得到页面的内容并保存
def everypagecontent(url, number):# 解决服务器延时的问题trytry:pagedata = requests.get(url).content.decode("gbk")mytree = lxml.etree.HTML(pagedata)# 取所有的内容datas = mytree.xpath("//*[@class = \"newlist\"]//li/span/a/text()")for data in datas:data = data + "\r\n"with open(number + ".txt", "a") as file:file.write(data)file.flush()return datasexcept:print("服务器超时")# 调用百度云自然语言处理接口,进行情感倾向分析
def analyze(number):""" 你的 APPID AK SK """# AppID:应用的唯一标识AppKey:公匙(相当于账号)AppSecret:私匙(相当于密码)APP_ID = '22793513'API_KEY = '0veui3PVDnoXSc3ZmGNI3Et5'SECRET_KEY = 'BGvXXlPQKigySi0MqG26i17bfnRPl8wy'pos = 0nav = 0avgpos = 0navavg = 0i = 0aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)lines = readTxt(number + ".txt")str = readToStr(number + ".txt")companys = re.findall(r"\[(.+?)\]", str)company = max(companys, key=companys.count)for line in lines:aline = line.replace("\r\n", "").strip()if len(aline) != 0:try:result = aipNlp.sentimentClassify(aline) # 调用百度接口positive = result['items'][0]['positive_prob']nagative = result['items'][0]['negative_prob']i += 1if positive >= nagative:pos += 1else:nav += 1avgpos = pos / inavavg = nav / iexcept Exception as e:passprint(company)print(avgpos)print(navavg)writeOneCsv([number, company, avgpos, navavg], '各种股票的情绪.csv')def getpageurl(url):pagenumber = getallpage(url)[0]for i in range(1, int(pagenumber) + 1):try:if i == 1:url = "http://stock.jrj.com.cn/share," + number + ",ggxw.shtml"else:url = "http://stock.jrj.com.cn/share," + number + ",ggxw_" + str(i) + ".shtml"except:passeverypagecontent(url, number)numbers = ['600519', '000661', '002241', '300677', '000860', '300750', '600436', '603939', '300347', '603429']# 得到股票名称,获取新闻信息生成文件
for number in numbers:print(number)url = "http://stock.jrj.com.cn/share," + number + ",ggxw.shtml"getpageurl(url)# 读取文件
for number in numbers:print(number)analyze(number)