这篇文章旨在记录学习selenium的时候遇到的问题和代码风格。
1、常见的登录问题、cookie、token、验证码、邮箱。
2、验证码工具,本地的onnx格式的微型模型进行图像识别,得到最终的验证码,速度还蛮快的。
ddddocr.DdddOcr()
包
try:while self.driver.find_element(By.ID, "radCaptha_CaptchaImageUP"):self.driver.find_element(By.ID, "txtUser").clear()self.driver.find_element(By.ID, "txtUser").send_keys("huyue33@sic.picc.com.cn")# 输入验证码image = self.driver.find_element(By.ID, "radCaptha_CaptchaImageUP").screenshot_as_png;ocr = ddddocr.DdddOcr()text = ocr.classification(Image.open(BytesIO(image)))# text = pytesseract.image_to_string(), lang='eng')self.driver.find_element(By.ID, "txtUserValdiationCode").send_keys(f"{text}")print(text)# time.sleep(1)self.driver.find_element(By.ID, "cmdSignIn").click()self.driver.find_element(By.ID, "txtPwd")self.driver.find_element(By.ID, "txtPwd").clear()self.driver.find_element(By.ID, "txtPwd").send_keys("7&vQrPSr")self.driver.find_element(By.ID, "cmdSignIn").click()except Exception as e:# pass#print(e)pass
service = webdriver.EdgeService(executable_path='./msedgedriver.exe') # 指定驱动的路径
options.add_argument("start-maximized")
options = webdriver.EdgeOptions()
self.driver = webdriver.Edge(options=options, service=service)#读取账号密码以及写入cookie。,
with open("./cookiefile", "r+") as f:cookie_data_list = f.readlines()for cookie in cookie_data_list:self.driver.add_cookie(json.loads(cookie))
with open("./login_info", "r+") as f:cookie_data_list = f.readlines()username = cookie_data_list[0]password = cookie_data_list[1]
截图
driver_wait = WebDriverWait(self.driver, 10, 0.5).until(
expected_conditions.element_to_be_clickable((By.ID, "phb_vw1_hyTranslation")))#10秒钟,每5秒检测该id元素是否存在
# expected_conditions.presence_of_element_located((By.ID, "phb_vw1_idAdditionalInfo")))
self.driver.set_window_size(1020, 2080)#设置网页大小
temp_save_pic.append(f"{save_dir}/{time.time()}.png")#截图
self.driver.save_screenshot(temp_save_pic[-1])
driver_wait.click()#点击
tabs = self.driver.window_handles
self.driver.switch_to.window(tabs[1]) # 切换到第二个标签页
WebDriverWait(self.driver, 10, 0.5).until(
expected_conditions.visibility_of_element_located((By.ID, "txtTranslation")))
text.append(self.driver.find_element(By.ID, "txtTranslation").text)
self.driver.close()#关闭标签页
self.driver.switch_to.window(tabs[0])
#保存文件到doc中
def saveDoc(filename, save_dictss, save_dictws, save_dictws_translate):list_save_dir = "./out_list"doc = Document()Header_name = filename.split(".")[0]doc.add_heading(filename, 0)i = 0for k, v in save_dictss.items():doc.add_paragraph(f'{i}-{k}')i = i + 1doc.add_picture(v, width=Pt(300))for pic, text in zip(save_dictws[k], save_dictws_translate[k]):print(pic)doc.add_picture(pic, width=Pt(300))# doc.add_picture(save_dictws[k], width=Pt(300))doc.add_paragraph(f'Translate')translate_txt = doc.add_paragraph()run = translate_txt.add_run(text)run.font.size = Pt(6)doc.save(list_save_dir + f"/{Header_name}.docx")
#读取excel文件
df = pd.read_excel(f"{todolist}/{filename}", engine="openpyxl", header=None)
df_list = df[0].tolist()