利用python 使用 selenium调用豆包进行自动化问答以及信息提取整理
使用方法:
- 打开网页之后有40秒等待时间 此时进行登录
- 登录之后随便输入一个问题进行问答,进入对话界面
- 自动执行对话测试
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas
import os.path
import pandas as pd
from shapely.geometry import Polygon
from shapely.wkt import dumps
import argparse
from tqdm import tqdm,trange
import re
# 创建解析器
parser = argparse.ArgumentParser(description="这是一个示例程序")
# 添加位置参数(必须提供)
parser.add_argument("-i","--input",default="duplicate.xlsx", type=str, help="输入文件路径")
# 读取Excel文件的默认工作表
# 解析参数
args = parser.parse_args()# 创建Chrome浏览器的WebDriver实例
driver = webdriver.Chrome()def initDriver():# try:# 打开网页# 打开网页driver.get('https://www.doubao.com/chat/1807899615328258')# 等待页面加载time.sleep(40)# # 找到输入框元素,这里使用的是ID定位方式 /html/body/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div/div[2]/div/div/div[2]/div[2]/div[2]/div/div[2]/div[1]/textarea# input_box = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div/div[2]/div/div/div[2]/div[2]/div[2]/div/div[2]/div[1]/textarea')# # input_box.click()# text = "hello world"# if input_box is not None:# input_box.send_keys(text)# buttopn = driver.find_element(By.ID,"flow-end-msg-send")# if buttopn is not None:# buttopn.click()# time.sleep(10)# input_bbox = driver.find_element(By.XPATH,"/html/body/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div/div[3]/div/div/div/div[3]/div[2]/div[1]/div/div[2]/div/textarea")# if input_bbox is not None:# input_bbox.send_keys(text)# buttopn = driver.find_element(By.ID,"flow-end-msg-send")# if buttopn is not None:# buttopn.click()# 在输入框中输入值# input_box.send_keys('Selenium教程')# 等待一段时间,方便查看结果# time.sleep(10)def main():#打开浏览器initDriver()# 读取excelread_xlsx(args.input)# 把面积最大 改成合并之后的范围最大
def getBoundBox(text):import re# 使用正则表达式匹配方括号内的经纬度范围pattern = r'\[([- \d.]+)[,,]([- \d.]+)[,,]([- \d.]+)[,,]([- \d.]+)\]'matches = re.findall(pattern, text)# 存储所有匹配到的范围all_ranges = []for match in matches:min_lon = float(match[0])min_lat = float(match[1])max_lon = float(match[2])max_lat = float(match[3])all_ranges.append((min_lon, min_lat, max_lon, max_lat))# 如果有多个匹配,找出最大的范围min_lat_best = 1e6min_lon_best = 1e6max_lat_best = -1e6max_lon_best = -1e6if all_ranges:if len(all_ranges) > 1:for min_lon, min_lat, max_lon, max_lat in all_ranges:if min_lon < min_lon_best:min_lon_best = min_lonif min_lat < min_lat_best:min_lat_best = min_latif max_lat > max_lat_best:max_lat_best = max_latif max_lon > max_lon_best:max_lon_best = max_lonreturn [min_lon_best,min_lat_best,max_lon_best,max_lat_best]# # 计算每个范围的面积(粗略估算,用经度差乘以纬度差)# areas = [(max_lon - min_lon) * (max_lat - min_lat) for min_lon, min_lat, max_lon, max_lat in all_ranges]# # 找出面积最大的范围的索引# max_area_index = areas.index(max(areas))# max_range = all_ranges[max_area_index]else:max_range = all_ranges[0]print(f"最大范围的经纬度为: {max_range}")return max_rangeelse:print(f"未找到匹配的经纬度范围。 {text}")return Nonedef getWkt(minx,miny,maxx,maxy):# 创建一个几何多边形对象polygon = Polygon([(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny)])# 调用dumps方法,将几何对象转换为WKT字符串wkt_string = dumps(polygon)print(wkt_string)return wkt_string
def read_xlsx(path):#读取已有数据alreay_names=set()# df_csv =pd.read_csv("miss10ju_dealmrh.csv")# df_names = list(df_csv[:,0])if os.path.exists("missditu_dealmrh.csv"):with open("missditu_dealmrh.csv","r",encoding="utf-8") as f:for line in list(f.readlines()):line_str = str(line).split(";")alreay_names.add(line_str[0])ccc=0passfile_path = pathdf = pd.read_excel(file_path, sheet_name='Sheet2').valuesrow_nums, col_nums = df.shapedf = df[:,[12,12]]with open("missditu_dealmrh.csv","a",encoding="utf-8") as f:f.write("name;extent;center;wkt\n")for i in trange(row_nums):name = str(df[i][0]) #机场名字country = str(df[i][1]) # 国家country = "美国"extent=""if name == "nan":extent=""f.write(";;;\n")else:if name in alreay_names:print(f"alreay exists continue!")continue# extent = None# while(extent is None): /html/body/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div/div[3]/div/div/div/div[3]/div[2]/div[1]/div/div[2]/div/textareaprompt = f"{country} {name} 获取目标的十进制经纬度范围 以十进制经纬度 [最小经度,最小纬度,最大经度,最大纬度]格式 返回:"input_bbox = driver.find_element(By.XPATH,"/html/body/div[1]/div[1]/div/div[3]/div[1]/div[1]/div/div/div[3]/div/div/div/div[3]/div[2]/div[1]/div/div[2]/div/textarea")if input_bbox is not None:input_bbox.send_keys(prompt)buttopn = driver.find_element(By.ID,"flow-end-msg-send")if buttopn is not None:buttopn.click()# 在输入框中输入值# input_box.send_keys('Selenium教程')# 等待一段时间,方便查看结果time.sleep(30)elements = driver.find_elements(By.CLASS_NAME,'container-ncFTrL')eelement = elements[-1].textcontent = eelementextent = getBoundBox(eelement)wktstr = getWkt(*extent)center = [(extent[0] + extent[2]) / 2, (extent[1] + extent[3]) / 2]# 输出到文件extentStr = ",".join([str(_) for _ in extent])centerStr = ",".join([str(_) for _ in center])ccc=0f.write(f"{name};{extentStr};{centerStr};{wktstr}\n")print(f"success")#查找 经纬度范围f.flush()ccc=0ccc=0print(f"success!")if __name__=='__main__':main()