需求背景
提供一个Word文档模板,使用python程序替换里边的占位符,替换内容包括文本和图片,然后输出docx或者PDF文件。
功能演示
输入示例
输出示例
实现程序
import os
import shutil
import subprocess
import timefrom docx import Document
from docx.shared import Ptclass DocFiller:def __init__(self, doc_file_path, libreoffice_path=None):self.doc = Document(doc_file_path)self.libreoffice_path = libreoffice_pathdef _process_paragraphs(self, paragraphs, placeholders):# 遍历处理每个段落for paragraph in paragraphs:# 遍历处理每个段落中的每个runfor run in paragraph.runs:self._process_run(run, placeholders)@staticmethoddef _process_run(run, placeholders):for placeholder, replacement in placeholders.items():# 文本替换if isinstance(replacement, str):run.text = run.text.replace(placeholder, replacement)# 图片替换elif isinstance(replacement, tuple) and len(replacement) == 3:image_path, width, height = replacementif placeholder in run.text:# 保留占位符前后文本,只替换占位符部分texts = run.text.split(placeholder, 1)run.clear()run.add_text(texts[0])run.add_picture(image_path, width=Pt(width), height=Pt(height))run.add_text(texts[1])def _process_tables(self, doc_tables, placeholders):for table in doc_tables:for row in table.rows:for cell in row.cells:self._process_paragraphs(cell.paragraphs, placeholders)def fill(self, placeholders):self._process_paragraphs(self.doc.paragraphs, placeholders)self._process_tables(self.doc.tables, placeholders)def convert_to_pdf(self, docx_path, pdf_path):output_dir = os.path.dirname(pdf_path)temp_pdf_path = os.path.join(output_dir, os.path.basename(docx_path).rsplit('.', 1)[0] + '.pdf')try:subprocess.run([self.libreoffice_path, '--headless', '--convert-to', 'pdf:writer_pdf_Export','--outdir', output_dir, docx_path], check=True)shutil.move(temp_pdf_path, pdf_path)print(f"conversion successful: {pdf_path}")except subprocess.CalledProcessError as e:print(f"convert the document error: {e}")def to_docx(self, outputfile):self.doc.save(path_or_stream=outputfile)print('to docx complete')def to_pdf(self, outputfile):temp_file = f'{int(time.time() * 1e6)}.docx'self.to_docx(temp_file)try:self.convert_to_pdf(temp_file, outputfile)finally:os.remove(temp_file)print('to pdf complete')if __name__ == '__main__':liberoffice = 'D:/LibreOffice/program/soffice.exe'd = DocFiller('template.docx', liberoffice)data = {'{{meeting_time}}': '2024年12月12日 下午3点半','{{meeting_address}}': '中央会议室','{{host}}': '小白','{{photo}}': ('./0.png', 80, 80),'{{image1}}': ('./1.jpg', 8, 8),'{{image2}}': ('./2.jpg', 300, 200)}d.fill(data)# 生成docx文件# d.to_docx('data/output.docx')# 生成pdf文件d.to_pdf('data/output.pdf')
说明
- 本程序依赖python-docx库和liberoffice,可以实现docx文件模板内容替换。
- 如果替换后只需生成Word文档,则无需安装liberoffice,也不用指定liberoffice可执行文件路径。
- 如果需要生成PDF文件,则需要安装liberoffice,并将程序中的liberoffice可执行文件路径修改为自己的路径。