爬取基金收盘价并用pyecharts进行展现
一、用到的第三方包
因为使用到了一些第三方的包,包还是比较大的如果直接从社区下载比较费劲,所以建议配置国内镜像源,这里以清华的镜像源为例。
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
pip config set global.trusted_host pypi.tuna.tsinghua.edu.cn
安装第三方包
pip install pandas bs4 pyecharts
二、数据获取
从网页爬取数据
import requests
from bs4 import BeautifulSoup
# 从url获取信息
response = requests.get('url地址')
# 解析HTML内容
soup = BeautifulSoup(response.text, 'html.parser')
soup.get_text()
1、从网页爬取每日最新数据
2、将数据追加存储到excel
# !/usr/bin/python
# -*-coding:utf-8 -*-
"""File : spider.pyTime : 2024/1/17 10:00Author : 天选之子Email : version : python 3.10.11Description :
"""
import datetime
import osimport numpy
import requests
from bs4 import BeautifulSoup
import pandasurl = "https://qt.gtimg.cn"
fund_code_list = ['sh510300', 'sz159995']
target_path = r'excel存储位置'
sheet_name = '存储的excel的sheet页'def get_fund_close_price(url_str):"""获取基金收盘价"""response = requests.get(url_str)soup = BeautifulSoup(response.text, 'html.parser')if soup is None:return '没找到'else:market = soup.get_text()return market.split('~')[4]def df_combiner(targetpath, dataframe, sheetname):"""将来源excel合并到目标excel:param targetpath::param dataframe::param sheetname::return: dataframe合并后的最终excel对象"""if not os.path.exists(targetpath):return Nonetar_data_frame = pandas.DataFrame(pandas.read_excel(targetpath, sheet_name=sheetname, keep_default_na=False))tar_date = set(numpy.array(tar_data_frame['日期']).tolist())to_date = set(numpy.array(dataframe['日期']).tolist())if to_date.issubset(tar_date):tar_df_all = tar_data_frameelse:if tar_data_frame is None:return dataframetar_df_all = pandas.concat([tar_data_frame, dataframe], ignore_index=True)return tar_df_alldef main():df_all = Nonefor fund_code in fund_code_list:fund_url = f'{url}/?q={fund_code}'result = get_fund_close_price(fund_url)print("该只基金的收盘价为:", result)print(datetime.datetime.now().strftime('%Y-%m-%d'))# 创建一个DataFramedata = {'日期': [datetime.datetime.now().strftime('%Y%m%d')], '基金代码': [fund_code], '收盘价': [result]}df = pandas.DataFrame(data)df_all = pandas.concat([df_all, df], ignore_index=True)target_df = df_combiner(target_path, df_all, sheet_name)excel_writer = pandas.ExcelWriter(target_path)target_df.to_excel(excel_writer, index=False, sheet_name='基金收盘价')excel_writer.close()if __name__ == '__main__':main()
三、展示
将获取的excel数据展示成折线图,这里使用的是pyecharts,不多废话直接上代码
# https://pyecharts.org/#/zh-cn/quickstart
import numpy
import pandas
from pyecharts import options as opts
from pyecharts.charts import Linetarget_path = r'excel位置'
sheet_name = '基金收盘价'
fund_code_list = ['sh510300', 'sz159995']
tar_data_frame = pandas.DataFrame(pandas.read_excel(target_path, sheet_name=sheet_name, keep_default_na=False))
data_date = list(set(numpy.array(tar_data_frame['日期']).tolist()))
sh510300_date = numpy.array(tar_data_frame.loc[tar_data_frame['基金代码'] == 'sh510300']['收盘价']).tolist()
sz159995_date = numpy.array(tar_data_frame.loc[tar_data_frame['基金代码'] == 'sz159995']['收盘价']).tolist()# 创建柱状图
bar_chart = Line()
bar_chart.add_xaxis(data_date)
title_opts = opts.TitleOpts(title="月度销售额折线图"),
bar_chart.add_yaxis(series_name='sh510300', linestyle_opts=opts.LineStyleOpts(color="red", width=2, is_show=True), itemstyle_opts=opts.ItemStyleOpts(color="red", border_width=1), is_smooth=True, y_axis=sh510300_date)
bar_chart.add_yaxis(series_name='sz159995_date', linestyle_opts=opts.LineStyleOpts(color="yellow", width=2, is_show=True), itemstyle_opts=opts.ItemStyleOpts(color="yellow", border_width=1), is_smooth=True, y_axis=sz159995_date)
bar_chart.set_global_opts(title_opts=opts.TitleOpts(title="基金收盘价格走势", is_show=True, pos_left='center'), xaxis_opts=opts.AxisOpts(name="月份"), yaxis_opts=opts.AxisOpts(name="收盘价(元)", min_=0, max_=10), legend_opts=opts.LegendOpts(pos_left='right'))# 渲染图表到 HTML 文件
bar_chart.render("基金收盘价.html")
展示结果如下: