Tesseract.js 是一个javascript库,可以从图像中获取几乎任何语言的单词,支持文本转pdf功能,精准度很高。
1. 安装
npm install tesseract.js
2. 示例代码(vue3版)
<template><div class="container"><div class="l_box"><el-image class="c_img" :src="url" fit="contain" /><div class="btn_box"><el-button type="primary" @click="getImgText" style="margin-right: 10px;" :disabled="loading">解 析</el-button><el-upload class="upload-demo" :limit="1" :on-change="handleChange" accept=".jpg, .jpeg, .png, .bmp":show-file-list="false" :auto-upload="false"><el-button type="primary" :disabled="loading">上 传</el-button></el-upload><el-button type="primary" @click="download" style="margin-left: 10px;" :disabled="loading">下载PDF</el-button></div></div><pre class="c_value" v-loading="loading">{{ word }}</pre></div>
</template><script setup>
import { ElMessage } from 'element-plus'
import { createWorker } from 'tesseract.js';let url = ref('https://tesseract.projectnaptha.com/img/eng_bw.png')
let word = ref('')
let loading = ref(false)
let worker = ref(null)
let pdf = ref(null)onMounted(() => {init()
})onUnmounted(() => {// 卸载插件 worker.value.terminate()
})const init = async () => {// 初始化插件 worker.value = await createWorker(['eng', 'chi_sim'], 1, {logger: m => console.log(m),});
}// 获取图片链接文本
const getImgText = async () => {loading.value = truetry {const { data } = await worker.value.recognize(url.value, { pdfTitle: 'Example PDF' }, { pdf: true });pdf.value = data.pdfword.value = data.textloading.value = false} catch (error) {loading.value = falseElMessage({message: '解析失败',type: 'warning',})}
}// 上传附件解析
const handleChange = async (file) => {url.value = URL.createObjectURL(file.raw)getImgText()
}// 下载PDF
const download = () => {const blob = new Blob([new Uint8Array(pdf.value)], { type: 'application/pdf' });const url = URL.createObjectURL(blob);const link = document.createElement('a');link.href = url;link.download = 'example.pdf';link.click();URL.revokeObjectURL(url);
}</script>
<style scoped lang="scss">
.container {width: 100%;height: 100%;padding: 20px;display: flex;justify-content: space-between;box-sizing: border-box;.l_box {width: 48%;height: 100%;margin-right: 2%;.c_img {width: 100%;height: calc(100% - 40px);}.btn_box {height: 50px;display: flex;align-items: center;}}.c_value {width: 50%;min-height: 500px;border: 1px solid #999;line-height: 30px;padding: 20px;}
}
</style>
效果图
更多api可以去官网尝试了 tesseract.projectnaptha.com 。