- 数据接口: https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?adCode=310000
- 其中,adCode 是地区地理编码,可参考:python 全国行政地区信息爬取-腾迅位置服务平台
- 请求方式: GET
- 返回数据类型: JSON
- 思路:先获取全国各省份的地理编码,再通过编码获取各省份的新冠历史数据
代码
import os
import json
import requests
import pandas as pd
import datetimedef Get_ProvinceInfo(Key):''' 获取省份信息:名称、编码、拼音、经纬度'''url = "https://apis.map.qq.com/ws/district/v1/list"headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',}params = {"key":key}res = requests.get(url,params=params,headers=headers)res.encoding = "utf-8"js_data = json.loads(res.text)province_df = pd.DataFrame(js_data['result'][0])province_df['pinyin'] = ["".join([j.capitalize() for j in i]) for i in province_df['pinyin']]province_df['lat'] = [i['lat'] for i in province_df['location']]province_df['lng'] = [i['lng'] for i in province_df['location']]province_df.drop(['location','cidx'],axis=1,inplace=True)return province_dfdef Get_COVID19_History(df):''' 获取新冠疫情历史数据'''res = pd.DataFrame()for code in df['id']:if code != '':history_data = requests.get('https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?adCode=' + str(code)).json()['data']history_df = pd.DataFrame(history_data)history_df['date'] = pd.to_datetime(history_df['year'].astype('str') + '.' + history_df['date'])history_df_use = history_df[['date','province','confirm','dead','heal','wzz','newConfirm','all_local_confirm_add',\'newHeal','newDead','wzz_add']]history_df_use.columns = ['日期','省份','累计确诊','累计死亡','累计治愈','无症状','新增确诊','新增本土',\'新增治愈','新增死亡','新增无症状']history_df_use.insert(2,"省份编码",code)res = pd.concat([res,history_df_use])return resdef CreateFolder(path):'''创建文件夹函数'''folder = os.path.exists(path)if not folder:os.makedirs(path) print('文件夹创建成功:', path)else:print('文件夹已经存在:', path)if __name__ == "__main__":key = 'your key' province = Get_ProvinceInfo(key)covid19_province = Get_COVID19_History(province)now_day = datetime.datetime.now().strftime("%Y-%m-%d")CreateFolder(os.getcwd()+"/"+now_day) covid19_province.to_csv("{}/COVID19_History_province_{}.csv".format(os.getcwd()+"/"+now_day,now_day),index=False)print("程序运行完成")
结果
数据存在的问题
- 2022-05之前的新增本土、新增无症状会出现缺失