Fiddler安装和设置
安装
Fiddler 安装包可以从这里获取,如果失效了可以自己网上找一个安装。
链接:https://pan.baidu.com/s/10tYQ-uL6HMddkOcIKnWEKQ?pwd=d1io
然后就是点击安装就好了,没什么好多说的。
启用HTTPS捕获
进入软件界面,点击 Tools -> Options -> HTTPS 启用捕获 https 请求并解密。
证书信任
设置信任根证书,不然进行抓包捕获时,其他网页就访问不了了。
证书安装
有时候,如果证书安装不正确,可能导致抓取 https 失败。如果你发现上面已经设置以后,仍然抓取不到 https 的话,可以尝试使用工具重新生成证书。
可以下载 fiddlercertmaker.exe 自动生成证书,具体安装过程可参考:Fiddler死活抓不了HTTPS包解决办法_fiddler 抓包 itune 310 错误-CSDN博客
链接:https://pan.baidu.com/s/19G6aBHtxQU4ViSicWw2NOw?pwd=y3uh
设置自动转发
设置指定 url 自动转发到本地,我这里是自动把请求转发到了我本地一个 Flask 搭建的服务,设置好以后进行保存(转发地址记得和你服务的地址保持一致)。
设置自动转发 https://search.weixin.qq.com/cgi-bin/wxaweb/wxindexfluctuations 的目的主要是为了获取数据请求参数中的 openid 和 search_key,因为我需要这两个请求参数去构造新的 body。
Unmatched requests passthrough 一定要勾选上——也就是不影响其他未匹配的请求
开启捕获
可以从 File -> Capture Traffic 开启捕获,也可以用 F12 快捷键开启捕获,当左下角有 Capturing 字样时,表示捕获已开启。
然后就可以正常捕获抓取 https 请求了
数据抓取处理
搭建并启动本地服务
可以自己在本地简单写一个服务接收和转发的请求并处理。我这里构造了两个 body 去分别获取 指数趋势 和 数据来源。
如果出现 Your proxy appears to only use HTTP and not HTTPS 报错,把转发 url 修改成 http 即可。
# coding:utf-8
import csv
import datetime
import json
import os
import tracebackimport pygal
from pygal.style import Styleimport requests
import urllib3
from flask import Flask, requestapp = Flask(__name__)time_indexes_map = {"time": "日期","score": "指数"
}channel_scores_map = {"finder_score": "视频号","live_score": "直播","mpdoc_score": "公众号","query_score": "搜一搜","extlink_score": "网页","ad_score": "其他","total_score": "总计","score_exp": "score_exp", # 这个字段没找到对应中文意义,先以原始key值映射
}headers = {'Host': 'search.weixin.qq.com','Connection': 'keep-alive',# 'Content-Length': '182','xweb_xhr': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309092b) XWEB/9129','Content-Type': 'application/json','Accept': '*/*','Sec-Fetch-Site': 'cross-site','Sec-Fetch-Mode': 'cors','Sec-Fetch-Dest': 'empty','Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/53/page-frame.html','Accept-Encoding': 'gzip, deflate, br','Accept-Language': 'zh-CN,zh;q=0.9',}# class ValueColors(pygal.style.Style):
# value_colors = ("#f6c443", "#ff6146", "#7c160", "#4fadf8", "#a9e87a", "#eda150")class ResultHandler:def __init__(self, file_save_dir):self.file_save_dir = file_save_dirself._init_file_save_dir()def _init_file_save_dir(self):os.makedirs(self.file_save_dir, exist_ok=True)def draw_line(self, title, time_indexes, last_day=7):time_indexes = time_indexes[-last_day:]date_chart = pygal.StackedLine(fill=True, interpolate='hermite', x_label_rotation=-20, style=pygal.style.LightGreenStyle)date_chart.x_labels = [str(x["time"])[4:] for x in time_indexes]date_chart.add(title, [x["score"] for x in time_indexes])file_path = os.path.join(self.file_save_dir, "line.svg")date_chart.render_to_file(file_path)def draw_pie(self, title, channel_scores, last_day=7):# 颜色对应关系可以使用 pyautogui 的 getpixel 取色器获取# colors_map = {# "ad_score": "#eda150",# "extlink_score": "#a9e87a",# "finder_score": "#f6c443",# "live_score": "#ff6146",# "mpdoc_score": "#7c160",# "query_score": "#4fadf8"# }channel_scores = channel_scores[-last_day:]channel_score = channel_scores.pop()for cs in channel_scores:for key, score in cs.items():channel_score[key] += score# pie_chart = pygal.Pie(inner_radius=0.5, style=pygal.style.LightSolarizedStyle)pie_chart = pygal.Pie(inner_radius=0.5)pie_chart.title = title# print(channel_score)total_score = channel_score["total_score"]for key, score in channel_score.items():if key in ["score_exp", "total_score"]:continuepercent = float("{:.2f}".format(100 * score / total_score))pie_chart.add(channel_scores_map[key], percent)file_path = os.path.join(self.file_save_dir, "pie.svg")pie_chart.render_to_file(file_path)def write_csv(self, title, rows: list):if len(rows) == 0:return Truefieldnames = list(rows[0].keys())fieldnames = sorted(fieldnames, key=lambda x: len(x))file = title + "_" + datetime.datetime.now().strftime("%Y%m%d") + ".csv"file_path = os.path.join(self.file_save_dir, file)try:with open(file_path, 'w', newline='', encoding='utf-8') as f:writer = csv.DictWriter(f, fieldnames)if set(fieldnames) == set(time_indexes_map):writer = csv.DictWriter(f, time_indexes_map.keys())writer.writerow(time_indexes_map)elif set(fieldnames) == set(channel_scores_map):writer = csv.DictWriter(f, channel_scores_map.keys())writer.writerow(channel_scores_map)else:writer.writeheader()for row in rows:writer.writerow(row)except Exception as e:print(e)traceback.format_exc()return Falsereturn True@app.route('/post_data', methods=['POST'])
def post():if request.method == 'POST':today = datetime.datetime.now().strftime("%Y%m%d")file_save_dir = f"./files/{today}"result_handler = ResultHandler(file_save_dir)urllib3.disable_warnings()data = request.get_json()# print(data)openid = data.get("openid")search_key = data.get("search_key")query = [data.get("query")]end_ymd = datetime.datetime.now().strftime("%Y%m%d")start_ymd = (datetime.datetime.now() - datetime.timedelta(365)).strftime("%Y%m%d")# forward_url = 'https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex'forward_url = 'http://search.weixin.qq.com/cgi-bin/wxaweb/wxindex'# 指数趋势json_data = {'openid': openid, 'search_key': search_key, 'cgi_name': 'GetDefaultIndex','query': query, 'compound_word': [], 'start_ymd': start_ymd, 'end_ymd': end_ymd}response = requests.post(forward_url, json=json_data, headers=headers, verify=False)response_data = response.json()# json.dump(response_data, open("test1.json", "w"), indent=2)title = response_data["content"]["resp_list"][0]["query"]time_indexes = response_data["content"]["resp_list"][0]["indexes"][0]["time_indexes"]print(time_indexes[:2])title_indexes = title + "_指数趋势"result_handler.draw_line(title_indexes, time_indexes, 30)result_handler.write_csv(title_indexes, time_indexes)# # 数据来源json_data2 = {'openid': openid, 'search_key': search_key, 'cgi_name': 'GetMultiChannel','query': query, 'start_ymd': start_ymd, 'end_ymd': end_ymd}response = requests.post(forward_url, json=json_data2, headers=headers, verify=False)response_data = response.json()# json.dump(response_data, open("test2.json", "w"), indent=2)result_list = response_data["content"]["result_list"]channel_scores = [c["channel_score"] for c in result_list]print(channel_scores[:2])title_scores = title + "_数据来源"result_handler.draw_pie(title_scores, channel_scores, 30)result_handler.write_csv(title_scores, channel_scores)return {}if __name__ == '__main__':app.run(host="127.0.0.1", debug=True)
小程序搜索关键字
- 进入电脑端微信
- 搜索 微信指数 小程序
- 进入小程序,输入想要搜索的关键词(比如:和平精英)
数据图表展示
微信图表展示如下:
我们自己使用 pygal 画的图如下(svg 图用浏览器打开),对比发现,除了插值导致的光滑度不一样,图的整体走势是一致的: