PaddleOCR 截图自动文字识别

春节假期在家无聊，撸了三个小工具：PC截图+编辑/PC录屏(用于meeting录屏)/PC截屏文字识别。因为感觉这三个小工具是工作中常常需要用到的，github上也有很多开源的，不过总有点或多或少的小问题，不利于自己的使用。脚本的编写尽量减少对三方库的使用。

已全部完成，这是其中的一个，后续将三个集成在在一个工具中。

import tkinter as tk
from tkinter import ttk, messagebox, font, filedialog
from PIL import Image, ImageTk, ImageGrab
import sys
import tempfile
import threading
from pathlib import Path
import ctypes  # 导入 ctypes 库
import logging.handlers  # 用于日志轮转# 最小化控制台窗口
def minimize_console():ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 6)minimize_console()  # 调用最小化函数# 获取脚本所在目录路径
def get_script_directory():return Path(__file__).parent# 配置日志文件路径和日志级别
log_file_path = get_script_directory() / 'ocr_errors.log'
logging.basicConfig(filename=log_file_path,level=logging.DEBUG,format='%(asctime)s - %(levelname)s - %(message)s'
)
# 添加日志轮转
handler = logging.handlers.RotatingFileHandler(log_file_path, maxBytes=1024*1024*5, backupCount=3)
logger = logging.getLogger()
logger.addHandler(handler)# 保存临时图片到磁盘
def save_temp_image(image, suffix='.png'):with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:image.save(temp_file.name)return Path(temp_file.name)class OCRApp:def __init__(self):try:self.root = tk.Tk()self.root.withdraw()# 禁用最大化按钮# self.root.resizable(False, False)self.screenshot = Noneself.ocr_model = None  # 延迟初始化self.recognized_text = ""self.main_frame = Noneself.load_win = None  # 初始化 load_win 为 None# 启动后台线程加载OCR模型以优化性能，使run脚本后能马上进入截图状态threading.Thread(target=self.load_ocr_model, daemon=True).start()# 立即开始截图选择self.start_selection()except Exception as e:self.show_crash_message(f"程序启动失败: {str(e)}")sys.exit(1)def load_ocr_model(self):from paddleocr import PaddleOCRtry:self.ocr_model = PaddleOCR(use_angle_cls=True, show_log=False, lang='ch')except Exception as e:logger.error(f"OCR模型加载失败: {str(e)}")# 开始截图选择区域def start_selection(self):self.selection_win = tk.Toplevel()self.selection_win.attributes("-fullscreen", True)self.selection_win.attributes("-alpha", 0.3)# 绑定整个窗口的 ESC 键事件self.selection_win.bind("<Escape>", self.on_escape)self.canvas = tk.Canvas(self.selection_win,cursor="cross",bg="gray30",highlightthickness=0)self.canvas.pack(fill=tk.BOTH, expand=True)self.start_x = self.start_y = 0self.rect_id = Noneself.crosshair_ids = []self.canvas.bind("<Button-1>", self.on_mouse_down)self.canvas.bind("<B1-Motion>", self.on_mouse_drag)self.canvas.bind("<ButtonRelease-1>", self.on_mouse_up)self.canvas.bind("<Motion>", self.on_mouse_move)self.escape_label = tk.Label(self.selection_win,text="按ESC键退出截图",fg="yellow",bg="gray20",font=("Helvetica", 12, "bold"))self.escape_label.place(x=10, y=10)self.update_crosshair(0, 0)# 鼠标按下事件处理def on_mouse_down(self, event):self.start_x = event.xself.start_y = event.yself.clear_crosshair()if self.rect_id:self.canvas.delete(self.rect_id)self.rect_id = None# 鼠标拖动事件处理def on_mouse_drag(self, event):current_x = event.xcurrent_y = event.yif self.rect_id:self.canvas.coords(self.rect_id, self.start_x, self.start_y, current_x, current_y)else:self.rect_id = self.canvas.create_rectangle(self.start_x, self.start_y,current_x, current_y,outline="blue", width=2, fill="gray75", tags="rect")# 鼠标释放事件处理def on_mouse_up(self, event):try:x1 = min(self.start_x, event.x)y1 = min(self.start_y, event.y)x2 = max(self.start_x, event.x)y2 = max(self.start_y, event.y)if (x2 - x1) < 10 or (y2 - y1) < 10:raise ValueError("选区过小，请选择更大的区域")if (x2 - x1) > self.canvas.winfo_width() or (y2 - y1) > self.canvas.winfo_height():raise ValueError("选区过大，请选择更小的区域")self.screenshot = ImageGrab.grab(bbox=(x1, y1, x2, y2))self.selection_win.destroy()self.initialize_ocr_and_process()except Exception as e:logger.error(f"截图错误: {str(e)}")messagebox.showerror("截图错误", str(e))self.restart_selection()# 初始化OCR引擎并处理截图def initialize_ocr_and_process(self):try:if self.ocr_model is None:self.load_win = self.show_loading("OCR模型正在加载中，请稍后...")self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.process_ocr()self.setup_main_ui()self.root.deiconify()except Exception as e:logger.error(f"OCR初始化失败: {str(e)}")if self.load_win:self.load_win.destroy()self.handle_ocr_init_error(str(e))def check_ocr_model(self):if self.ocr_model is None:self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:if self.load_win:self.load_win.destroy()self.process_ocr()self.setup_main_ui()self.root.deiconify()# 执行OCR处理def process_ocr(self):try:temp_image_path = save_temp_image(self.screenshot)result = self.ocr_model.ocr(str(temp_image_path), cls=True)self.recognized_text = "\n".join([line[1][0] for line in result[0]])temp_image_path.unlink()  # 确保临时文件被删除except Exception as e:logger.error(f"OCR处理失败: {str(e)}")messagebox.showerror("识别错误", f"OCR处理失败: {str(e)}")self.restart_selection()# 设置主界面UIdef setup_main_ui(self):if self.main_frame is None:self.main_frame = ttk.Frame(self.root, padding=20)self.main_frame.grid(row=0, column=0, sticky="nsew")self.root.grid_rowconfigure(0, weight=1)self.root.grid_columnconfigure(0, weight=1)# 使用 PanedWindow 来分割图片框和文本框self.paned_window = ttk.PanedWindow(self.main_frame, orient=tk.VERTICAL)self.paned_window.grid(row=0, column=0, sticky="nsew")# 创建一个 Frame 来包含图片和滚动条self.image_frame = ttk.Frame(self.paned_window)self.image_frame.pack(fill=tk.BOTH, expand=True)# 使用 Canvas 来显示图片并添加滚动条self.image_canvas = tk.Canvas(self.image_frame, highlightbackground=self.root.cget("bg"), highlightthickness=0)self.image_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)self.image_scrollbar = ttk.Scrollbar(self.image_frame, orient=tk.VERTICAL, command=self.image_canvas.yview)self.image_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)self.image_canvas.config(yscrollcommand=self.image_scrollbar.set)self.image_canvas.bind("<Configure>", self.on_canvas_configure)self.image_container = ttk.Frame(self.image_canvas)self.image_container_id = self.image_canvas.create_window((0, 0), window=self.image_container, anchor="nw")self.img_label = ttk.Label(self.image_container)self.img_label.pack(fill=tk.BOTH, expand=True)# 定义字体custom_font = font.Font(family="Microsoft YaHei", size=9)self.text_area = tk.Text(self.paned_window,wrap=tk.WORD,font=custom_font,  # 设置字体height=15  # 初始高度设置为15行)self.text_area.pack(fill=tk.BOTH, expand=True)self.paned_window.add(self.image_frame)self.paned_window.add(self.text_area)btn_frame = ttk.Frame(self.main_frame)btn_frame.grid(row=1, column=0, sticky="ew", pady=10)# 确保按钮行不会被压缩self.main_frame.grid_rowconfigure(0, weight=1)self.main_frame.grid_rowconfigure(1, weight=0)ttk.Button(btn_frame,text="重新选择",command=self.restart_selection).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="复制结果",command=self.copy_result).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="退出",command=self.safe_exit).pack(side=tk.RIGHT, padx=5)# 设置窗口标题self.root.title("文字识别")self.update_image_display()self.text_area.delete(1.0, tk.END)self.text_area.insert(tk.END, self.recognized_text.strip())self.update_text_area_height()  # 更新文本框高度# 设置窗口总是最顶层self.root.attributes('-topmost', True)# 更新图片显示def update_image_display(self):if self.screenshot:photo = ImageTk.PhotoImage(self.screenshot)self.img_label.config(image=photo)self.img_label.image = photo# 获取图片的实际大小img_width, img_height = self.screenshot.size# 获取屏幕高度screen_height = self.root.winfo_screenheight()# 计算图片框的最大高度max_image_height = screen_height // 2# 设置 Canvas 的滚动区域self.image_canvas.config(scrollregion=(0, 0, img_width, img_height))# 调整 image_canvas 的高度if img_height > max_image_height:self.image_canvas.config(height=max_image_height)else:self.image_canvas.config(height=img_height)# 配置 Canvas 大小def on_canvas_configure(self, event):# 更新 Canvas 的滚动区域self.image_canvas.config(scrollregion=self.image_canvas.bbox("all"))# 显示加载中的窗口def show_loading(self, message):load_win = tk.Toplevel()load_win.title("请稍候")frame = ttk.Frame(load_win, padding=20)frame.pack()ttk.Label(frame, text=message).pack(pady=10)progress = ttk.Progressbar(frame, mode='indeterminate')progress.pack(pady=5)progress.start()return load_win# 处理OCR初始化错误def handle_ocr_init_error(self, error_msg):choice = messagebox.askretrycancel("OCR初始化失败",f"{error_msg}\n\n是否重试？",icon='error')if choice:threading.Thread(target=self.initialize_ocr_and_process).start()else:self.safe_exit()# 重新开始截图选择def restart_selection(self):if self.root.winfo_exists():self.root.withdraw()self.screenshot = Noneself.recognized_text = ""self.clear_ui()self.start_selection()# 清理UI界面def clear_ui(self):if hasattr(self, 'img_label'):self.img_label.config(image='')self.img_label.image = Noneif hasattr(self, 'text_area'):self.text_area.delete(1.0, tk.END)# 复制识别结果到剪贴板def copy_result(self):self.root.clipboard_clear()self.root.clipboard_append(self.recognized_text)messagebox.showinfo("成功", "已复制到剪贴板")# 安全退出程序def safe_exit(self):if self.root.winfo_exists():self.root.destroy()sys.exit(0)# 显示程序崩溃错误信息def show_crash_message(self, message):crash_win = tk.Tk()crash_win.withdraw()messagebox.showerror("致命错误", message)crash_win.destroy()# 按下ESC键时退出程序def on_escape(self, event):self.selection_win.destroy()self.safe_exit()# 鼠标移动事件处理def on_mouse_move(self, event):current_x = event.xcurrent_y = event.yself.update_crosshair(current_x, current_y)# 更新十字线位置def update_crosshair(self, x, y):self.clear_crosshair()self.crosshair_ids.append(self.canvas.create_line(0, y, self.canvas.winfo_width(), y,tags="crosshair", fill="yellow", width=2))self.crosshair_ids.append(self.canvas.create_line(x, 0, x, self.canvas.winfo_height(),tags="crosshair", fill="yellow", width=2))# 清除十字线def clear_crosshair(self):for crosshair_id in self.crosshair_ids:self.canvas.delete(crosshair_id)self.crosshair_ids = []# 保存图片def save_image(self):if self.screenshot:file_path = filedialog.asksaveasfilename(defaultextension=".png",filetypes=[("PNG files", "*.png"), ("JPEG files", "*.jpg"), ("All files", "*.*")])if file_path:self.screenshot.save(file_path)messagebox.showinfo("保存成功", f"图片已保存到 {file_path}")# 更新文本框高度def update_text_area_height(self):# 计算当前文本行数line_count = int(self.text_area.index('end-1c').split('.')[0])if line_count > 15:self.text_area.config(height=15)  # 如果行数超过15行，固定高度为15行else:self.text_area.config(height=line_count)  # 否则根据内容调整高度# 运行主循环def run(self):self.root.mainloop()if __name__ == "__main__":app = OCRApp()app.run()