爬取网站
网易财经
创建的文件
List_url.py
from finance.code_list import CodeListdef Shanghai_Stock_Index():"""上证指数"""code = int(input("证券代号:"))if code >= 201000 and code <= 900957:year = int(input("年份:"))if year >= 1991 and year <= 2020:season = int(input("季度(1 2 3 4):"))if season == 1 or season == 2 or season == 3 or season == 4:url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year,season)else:print("您输入的季度错误,请重新输入!")else:print("您输入的年份有误,请重新输入!")else:print("您输入的证券代码有误,请重新输入!")return urldef Shenzhen_Stock_Index():"""深证指数"""code = input("证券代号:")CODE_List = CodeList()if code in CODE_List:year = int(input("年份:"))if year >= 1991 and year <= 2020:season = int(input("季度(1 2 3 4):"))if season == 1 or season == 2 or season == 3 or season == 4:url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year,season)else:print("您输入的季度,请重新有误!")else:print("您输入的年份有误,请重新输入!")elif int(code) >= 131800 and int(code) <= 300790:year = int(input("年份:"))if year >= 1991 and year <= 2020:season = int(input("季度(1 2 3 4):"))if season == 1 or season == 2 or season == 3 or season == 4:url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year, season)else:print("您输入的季度错误,请重新输入!")else:print("您输入的年份有误,请重新输入!")else:print("您输入的证券代码有误,请重新输入!")return urldef exponent():a = int(input("请问您要执行深证指数操作还是上证指数操作(深证指数请输入:1 上证指数请输入:2):"))if a == 1:item = Shanghai_Stock_Index()elif a == 2:item = Shenzhen_Stock_Index()else:print("输入错误,请重新输入!")return item
code_list.py
def CodeList():a = ["00000" + str(n) for n in range(1, 10)]b = ["0000" + str(n) for n in range(10, 100)]c = ["000" + str(n) for n in range(100, 1000)]d = ["001696", "001872", "001896", "001965", "001979"]e = ["00" + str(n) for n in range(2000, 2976)]f = ["003816", "031005", "031007", "038011"] + ["0" + str(n) for n in range(38014, 38018)]CODE_List = a + b + c + d + e + freturn CODE_List
pro_data.py
def Data_Convert(replace, replaced, type, data_list):"""数据格式的变换"""new_data_list = []for i in data_list:st = str(i).replace(replace, replaced)new_data_list.append(type(st))return new_data_list
Spider_Data.py
from urllib.request import Request, urlopen
from finance.pro_data import Data_Convert
from finance.List_url import exponent
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from pandas import DataFrame
import redef html():"""爬取网页内容"""headers = {'User-Agent': UserAgent().Chrome}url = exponent()request = Request(url, headers = headers)response = urlopen(request)info = response.read().decode()return infodef get_Data():"""获取数据"""info = html()soup = BeautifulSoup(info, 'html.parser')con = soup.find_all('table', attrs={'class':'table_bg001 border_box limit_sale'})pattern1 = re.compile(r'(?<=<tr class="">).*?(?=</tr>)')pattern2 = re.compile(r'(?<=<tr class="dbrow">).*?(?=</tr>)')data1 = re.findall(pattern1, str(con))data2 = re.findall(pattern2, str(con))pattern_v1 = re.compile(r'(?<=<td>).*?(?=</td>)')data_v1 = re.findall(pattern_v1, str(data1))pattern_v2 = re.compile(r'(?<=<td>).*?(?=</td>)')data_v2 = re.findall(pattern_v2, str(data2))Times = []opening_price = []max_price = []min_price = []closing_price = []change_amount = [] #涨跌额price_limit = [] #涨跌幅trading_amount = [] #成交量trading_volume = [] #成交额while True:if len(data_v1) and len(data_v1) != 0:_ = [Times,opening_price,max_price,min_price,closing_price,\change_amount,price_limit,trading_amount,trading_volume]for item in _:item.append(data_v1[0])item.append(data_v2[0])del data_v1[0], data_v2[0]else:breaknew_opening_price = Data_Convert("," , "", float, opening_price)new_max_price = Data_Convert("," , "", float, max_price)new_min_price = Data_Convert("," , "", float, min_price)new_closing_price = Data_Convert("," , "", float, closing_price)new_trading_amount = Data_Convert("," , "", float, trading_amount)new_trading_volume = Data_Convert("," , "", float, trading_volume)dic = {'日期':Times, '开盘价':new_opening_price, '最高价':new_max_price, '最低价':new_min_price, \'收盘价':new_closing_price, '涨跌额':change_amount, '涨跌幅(%)':price_limit,'成交量(股)':new_trading_amount,\'成交金额(元)':new_trading_volume}DF = DataFrame(dic)return DF
main.py
from finance.Spider_Data import get_Data
import os
import sysdef save_data():"""保存数据"""_ = input("请输入您要导出的数据文件(.csv)名称:")if '/' in _ :print("输入错误,请重新输入!")else:save_path = "./save_csv/" + _if '.csv' in save_path:if os.path.exists(save_path):os.remove(save_path)Data.to_csv(save_path, sep=',', index=False, header=True)else:Data.to_csv(save_path, sep=',', index=False, header=True)else:new_save_path = save_path + '.csv'if os.path.exists(new_save_path):os.remove(new_save_path)Data.to_csv(new_save_path, sep=',', index=False, header=True)else:Data.to_csv(new_save_path, sep=',', index=False, header=True)def main():"""主程序"""show = input("是否展示数据:")if show == '是':print(Data)_show_ = input("是否要下载数据:")if _show_ == '是':save_data()elif _show_ == '否':sys.exit(0)else:print("输入错误,请重新输入!")elif show == '否':_show_ = input("是否要下载数据:")if _show_ == '是':save_data()elif _show_ == '否':sys.exit(0)else:print("输入错误,请重新输入!")else:print("输入错误,请重新输入!")if __name__ == '__main__':Data = get_Data()main()
运行
在main.py下运行,结果如下: