目录
效果
项目
代码
下载
效果
项目
代码
using Aspose.Cells;
using NLog;
using OpenCvSharp;
using OpenVINO.OCRService;
using Sdcb.OpenVINO;
using Sdcb.OpenVINO.PaddleOCR;
using Sdcb.OpenVINO.PaddleOCR.Models;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace OpenVINO.OCR
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this);
}
String startupPath;
private string excelFileFilter = "表格|*.xlsx;*.xls;";
private Logger log = NLog.LogManager.GetCurrentClassLogger();
CancellationTokenSource cts;
ConcurrentQueue<ImgInfo> ltImgInfo = new ConcurrentQueue<ImgInfo>();
ConcurrentQueue<ImgInfo> matQueue = new ConcurrentQueue<ImgInfo>();
bool saveImg = false;
bool saveOcr = false;
int ocrNum = 0;//完成OCR识别的数量
int totalCount = 0;//图片总数量
int downloadCount = 0;//图片下载数量
int vioIDCount = 0;//违规ID;
private void frmMain_Load(object sender, EventArgs e)
{
DateTime limitTime = new DateTime(2024, 08, 30, 00, 00, 00);
//测试使用
if (DateTime.Now > limitTime)
{
MessageBox.Show("此软件试用期已过");
Application.Exit();
}
//初始化
startupPath = System.Windows.Forms.Application.StartupPath;
string detectionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_det_infer";
string classificationModelDir = startupPath + "\\inference\\ch_ppocr_mobile_v2.0_cls_infer";
string recognitionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_rec_infer";
string labelFilePath = startupPath + "\\inference\\ppocr_keys.txt";
FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);
PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions();
paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions("CPU");
paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800);
paddleOcrOptions.RecognitionStaticWidth = 512;
Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions);
Program.paddleOcr.Detector.UnclipRatio = 1.5f;
Program.paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */
Program.paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = 512;
//加载违禁词
Common.ltRuleContains.Clear();
Common.ltRuleTel.Clear();
string ruleContainsPath = "rules\\rule_contains.txt";
if (File.Exists(ruleContainsPath))
{
Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();
}
StringBuilder sb = new StringBuilder();
foreach (var item in Common.ltRuleContains)
{
sb.AppendLine(item);
}
log.Info("rule_contains.txt---->包含" + Common.ltRuleContains.Count() + "个违禁词,内容如下:\r\n" + sb.ToString());
string ruleTelPath = "rules\\rule_tel.txt";
if (File.Exists(ruleTelPath))
{
foreach (var item in File.ReadAllLines(ruleTelPath))
{
Common.ltRuleTel.Add(item.ToLower());
}
}
sb.Clear();
foreach (var item in Common.ltRuleTel)
{
sb.AppendLine(item);
}
log.Info("rule_tel.txt---->包含" + Common.ltRuleTel.Count() + "个号码前缀,内容如下:\r\n" + sb.ToString());
}
/// <summary>
/// 选择表格
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button2_Click(object sender, EventArgs e)
{
try
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = excelFileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
log.Info("解析中……");
Application.DoEvents();
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
string excelPath = ofd.FileName;
Workbook workbook = new Workbook(excelPath);
Cells cells = workbook.Worksheets[0].Cells;
System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitle
ltImgInfo = new ConcurrentQueue<ImgInfo>();
//遍历
ImgInfo temp;
int imgCount = 0;
foreach (DataRow row in dataTable1.Rows)
{
temp = new ImgInfo();
temp.id = row[0].ToString();
temp.title = row[1].ToString();
List<MatInfo> list = new List<MatInfo>();
for (int i = 2; i < cells.MaxColumn + 1; i++)
{
string tempStr = row[i].ToString();
if (!string.IsNullOrEmpty(tempStr))
{
if (i >= 7)
{
List<string> ltScrUrlTemp = Common.GetScrUrl(tempStr);
if (ltScrUrlTemp.Count > 0)
{
foreach (var item in ltScrUrlTemp)
{
MatInfo matInfo = new MatInfo();
matInfo.url = item;
list.Add(matInfo);
}
}
}
else
{
MatInfo matInfo = new MatInfo();
matInfo.url = tempStr;
list.Add(matInfo);
}
}
}
temp.images = list;
imgCount = imgCount + list.Count();
ltImgInfo.Enqueue(temp);
//for test
//if (ltImgInfo.Count()>10)
//{
// break;
//}
}
log.Info("解析完毕,一共[" + ltImgInfo.Count + "]条记录,[" + imgCount + "]张图片,耗时:" + sw.ElapsedMilliseconds + "毫秒");
}
catch (Exception ex)
{
log.Error("解析表格异常:" + ex.Message);
MessageBox.Show("解析表格异常:" + ex.Message);
}
}
void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount)
{
txtTotal.Invoke(new Action(() =>
{
TimeSpan ts = TimeSpan.FromMilliseconds(time);
txtTotal.Text = string.Format("下载完成:{0}/{1},识别完成:{2}/{3},违规ID数量:{5},用时:{4}"
, downloadCount
, total
, ocrNum
, total
, ts.ToString()
, vioIDCount
);
}));
}
/// <summary>
/// 下载识别
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button1_Click(object sender, EventArgs e)
{
if (ltImgInfo.Count == 0)
{
MessageBox.Show("请先选择表格!");
return;
}
DialogResult result = MessageBox.Show("确认开始下载识别?此操作会清空上一次的数据,请注意备份!", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
if (result == DialogResult.Yes)
{
log.Info("确认开始下载识别!");
}
else
{
log.Info("取消开始下载识别!");
return;
}
if (!Directory.Exists("img"))
{
Directory.CreateDirectory("img");
}
if (!Directory.Exists("ocr_result"))
{
Directory.CreateDirectory("ocr_result");
}
if (!Directory.Exists("result"))
{
Directory.CreateDirectory("result");
}
if (!Directory.Exists("result//img"))
{
Directory.CreateDirectory("result//img");
}
//清空结果
File.WriteAllText("result//result.txt", "");
File.WriteAllText("result//result_detail.txt", "");
// 清空文件夹中的文件
foreach (string filePath in Directory.GetFiles("result", "*", SearchOption.AllDirectories))
{
File.Delete(filePath);
}
// 写入列标题
File.WriteAllText("result//result.txt", "id\ttitel\tcontent\r\n");
btnStop.Enabled = true;
btnStart.Enabled = false;
chkSaveImg.Enabled = false;
chkSaveOcr.Enabled = false;
if (chkSaveImg.Checked)
{
saveImg = true;
}
else
{
saveImg = false;
}
if (chkSaveOcr.Checked)
{
saveOcr = true;
}
else
{
saveOcr = false;
}
Application.DoEvents();
cts = new CancellationTokenSource();
Stopwatch total = new Stopwatch();
total.Start(); //开始计时
// 清空队列
matQueue = new ConcurrentQueue<ImgInfo>();
//while (!matQueue.IsEmpty)
//{
// matQueue.TryDequeue(out _);
//}
ocrNum = 0;//完成OCR识别的数量
totalCount = ltImgInfo.Count();//图片总数量
downloadCount = 0;
//下载线程
int downloadThreadNum = 2;
for (int i = 0; i < downloadThreadNum; i++)
{
Task.Factory.StartNew(() =>
{
while (true)
{
//判断是否被取消;
if (cts.Token.IsCancellationRequested)
{
return;
}
if (downloadCount == totalCount)
{
log.Info("--------------------------------->下载完成!<----------------------------------");
return;
}
ImgInfo imgInfo = new ImgInfo();
if (ltImgInfo.TryDequeue(out imgInfo))
{
//队列容量大于50 休息一秒
if (matQueue.Count > 50)
{
System.Threading.Thread.Sleep(1000);
}
if (matQueue.Count > 100)
{
System.Threading.Thread.Sleep(2000);
}
int imagesCount = imgInfo.images.Count();
for (int j = 0; j < imagesCount; j++)
{
try
{
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest;
request.KeepAlive = false;
request.ServicePoint.Expect100Continue = false;
request.Timeout = 2000;// 2秒
request.ReadWriteTimeout = 2000;//2秒
request.ServicePoint.UseNagleAlgorithm = false;
request.ServicePoint.ConnectionLimit = 65500;
request.AllowWriteStreamBuffering = false;
request.Proxy = null;
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(new Cookie("AspxAutoDetectCookieSupport", "1") { Domain = new Uri(imgInfo.images[j].url).Host });
HttpWebResponse wresp = (HttpWebResponse)request.GetResponse();
Stream s = wresp.GetResponseStream();
Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s);
s.Dispose();
wresp.Close();
wresp.Dispose();
request.Abort();
sw.Stop();
if (saveImg)
{
bmp.Save("img//" + imgInfo.id + "_" + j + ".jpg");
}
var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);
if (mat.Channels() == 4)
{
Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);
}
imgInfo.images[j].mat = mat;
imgInfo.images[j].name = imgInfo.id + "_" + j;
if (saveImg)
{
bmp.Save("img//" + imgInfo.images[j].name + ".jpg");
}
log.Info(" " + imgInfo.images[j].name + "-->下载用时:" + sw.ElapsedMilliseconds + "毫秒");
}
catch (Exception ex)
{
log.Error("---->id:" + imgInfo.id + ",url[" + imgInfo.images[j].url + "],下载异常:" + ex.Message);
}
}
matQueue.Enqueue(imgInfo);
Interlocked.Increment(ref downloadCount);
}
}
}, TaskCreationOptions.LongRunning);
}
//识别线程
Task.Factory.StartNew(() =>
{
while (true)
{
//判断是否被取消;
if (cts.Token.IsCancellationRequested)
{
return;
}
if (ocrNum == totalCount)
{
log.Info("--------------------------------->识别完成!<----------------------------------");
return;
}
ImgInfo imgInfo = new ImgInfo();
if (matQueue.TryDequeue(out imgInfo))
{
Stopwatch perID = new Stopwatch();
perID.Start();//开始计时
int imagesCount = imgInfo.images.Count();
for (int j = 0; j < imagesCount; j++)
{
//Mat mat= imgInfo.images[j].mat;
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
PaddleOcrResult ocrResult = null;
try
{
if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty()))
{
ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);
sw.Stop();
log.Info(" " + imgInfo.images[j].name + "---->识别用时:" + sw.ElapsedMilliseconds + "毫秒");
//string ocrInfo = ocrResult.Text.ToString();
string ocrInfo = string.Join("\n", from x in ocrResult.Regions
where x.Score > 0.8
orderby x.Rect.Center.Y, x.Rect.Center.X
select x.Text);
if (saveOcr)
{
File.WriteAllText("ocr_result//" + imgInfo.images[j].name + ".txt", ocrInfo);
}
//规则校验
Stopwatch ruleSw = new Stopwatch();
ruleSw.Start();//开始计时
ocrInfo = ocrInfo.Trim();
ocrInfo = ocrInfo.Replace(" ", "");
string words = "";
string resultInfo = "";
if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult))
{
resultInfo = string.Format("ID:{0},Title:[{1}],------>包含违禁词:{2}", imgInfo.id, imgInfo.title, words);
log.Info(resultInfo);
//存数据
File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t包含违禁词:" + words + "\r\n");
File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
//存图
Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break;
}
if (Common.RuleTelCheck(ocrInfo, out words, ocrResult))
{
resultInfo = string.Format("ID:{0},Title:[{1}],------>疑似包含电话号码:{2}", imgInfo.id, imgInfo.title, words);
log.Info(resultInfo);
//File.AppendAllText("result//result.txt", resultInfo+ "\r\n");
File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t疑似包含电话号码:" + words + "\r\n");
File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
//存图
Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break;
}
imgInfo.images[j].mat.Dispose();
ruleSw.Stop();
//log.Info(" " + imgInfo.images[j].name + "---->违禁词校验用时:" + ruleSw.ElapsedMilliseconds + "毫秒");
}
}
catch (Exception ex)
{
imgInfo.images[j].mat.Dispose();
log.Info(" " + imgInfo.images[j].name + "---->识别异常:" + ex.Message);
}
}
perID.Stop();
log.Info("---->id:" + imgInfo.id + ",图片张数[" + imagesCount + "],识别小计用时:" + perID.ElapsedMilliseconds + "毫秒");
Interlocked.Increment(ref ocrNum);
ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);
}
}
}, TaskCreationOptions.LongRunning);
}
/// <summary>
/// 停止
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button3_Click(object sender, EventArgs e)
{
cts.Cancel();
btnStop.Enabled = false;
btnStart.Enabled = true;
chkSaveImg.Enabled = true;
chkSaveOcr.Enabled = true;
}
}
}
using Aspose.Cells;
using NLog;
using OpenCvSharp;
using OpenVINO.OCRService;
using Sdcb.OpenVINO;
using Sdcb.OpenVINO.PaddleOCR;
using Sdcb.OpenVINO.PaddleOCR.Models;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;namespace OpenVINO.OCR
{public partial class frmMain : Form{public frmMain(){InitializeComponent();NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this);}String startupPath;private string excelFileFilter = "表格|*.xlsx;*.xls;";private Logger log = NLog.LogManager.GetCurrentClassLogger();CancellationTokenSource cts;ConcurrentQueue<ImgInfo> ltImgInfo = new ConcurrentQueue<ImgInfo>();ConcurrentQueue<ImgInfo> matQueue = new ConcurrentQueue<ImgInfo>();bool saveImg = false;bool saveOcr = false;int ocrNum = 0;//完成OCR识别的数量int totalCount = 0;//图片总数量int downloadCount = 0;//图片下载数量int vioIDCount = 0;//违规ID;private void frmMain_Load(object sender, EventArgs e){DateTime limitTime = new DateTime(2024, 08, 30, 00, 00, 00);//测试使用if (DateTime.Now > limitTime){MessageBox.Show("此软件试用期已过");Application.Exit();}//初始化startupPath = System.Windows.Forms.Application.StartupPath;string detectionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_det_infer";string classificationModelDir = startupPath + "\\inference\\ch_ppocr_mobile_v2.0_cls_infer";string recognitionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_rec_infer";string labelFilePath = startupPath + "\\inference\\ppocr_keys.txt";FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions();paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions("CPU");paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800);paddleOcrOptions.RecognitionStaticWidth = 512;Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions);Program.paddleOcr.Detector.UnclipRatio = 1.5f;Program.paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */Program.paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */ServicePointManager.Expect100Continue = false;ServicePointManager.DefaultConnectionLimit = 512;//加载违禁词Common.ltRuleContains.Clear();Common.ltRuleTel.Clear();string ruleContainsPath = "rules\\rule_contains.txt";if (File.Exists(ruleContainsPath)){Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();}StringBuilder sb = new StringBuilder();foreach (var item in Common.ltRuleContains){sb.AppendLine(item);}log.Info("rule_contains.txt---->包含" + Common.ltRuleContains.Count() + "个违禁词,内容如下:\r\n" + sb.ToString());string ruleTelPath = "rules\\rule_tel.txt";if (File.Exists(ruleTelPath)){foreach (var item in File.ReadAllLines(ruleTelPath)){Common.ltRuleTel.Add(item.ToLower());}}sb.Clear();foreach (var item in Common.ltRuleTel){sb.AppendLine(item);}log.Info("rule_tel.txt---->包含" + Common.ltRuleTel.Count() + "个号码前缀,内容如下:\r\n" + sb.ToString());}/// <summary>/// 选择表格/// </summary>/// <param name="sender"></param>/// <param name="e"></param>private void button2_Click(object sender, EventArgs e){try{OpenFileDialog ofd = new OpenFileDialog();ofd.Filter = excelFileFilter;if (ofd.ShowDialog() != DialogResult.OK) return;log.Info("解析中……");Application.DoEvents();Stopwatch sw = new Stopwatch();sw.Start(); //开始计时string excelPath = ofd.FileName;Workbook workbook = new Workbook(excelPath);Cells cells = workbook.Worksheets[0].Cells;System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitleltImgInfo = new ConcurrentQueue<ImgInfo>();//遍历ImgInfo temp;int imgCount = 0;foreach (DataRow row in dataTable1.Rows){temp = new ImgInfo();temp.id = row[0].ToString();temp.title = row[1].ToString();List<MatInfo> list = new List<MatInfo>();for (int i = 2; i < cells.MaxColumn + 1; i++){string tempStr = row[i].ToString();if (!string.IsNullOrEmpty(tempStr)){if (i >= 7){List<string> ltScrUrlTemp = Common.GetScrUrl(tempStr);if (ltScrUrlTemp.Count > 0){foreach (var item in ltScrUrlTemp){MatInfo matInfo = new MatInfo();matInfo.url = item;list.Add(matInfo);}}}else{MatInfo matInfo = new MatInfo();matInfo.url = tempStr;list.Add(matInfo);}}}temp.images = list;imgCount = imgCount + list.Count();ltImgInfo.Enqueue(temp);//for test//if (ltImgInfo.Count()>10)//{// break;//}}log.Info("解析完毕,一共[" + ltImgInfo.Count + "]条记录,[" + imgCount + "]张图片,耗时:" + sw.ElapsedMilliseconds + "毫秒");}catch (Exception ex){log.Error("解析表格异常:" + ex.Message);MessageBox.Show("解析表格异常:" + ex.Message);}}void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount){txtTotal.Invoke(new Action(() =>{TimeSpan ts = TimeSpan.FromMilliseconds(time);txtTotal.Text = string.Format("下载完成:{0}/{1},识别完成:{2}/{3},违规ID数量:{5},用时:{4}", downloadCount, total, ocrNum, total, ts.ToString(), vioIDCount);}));}/// <summary>/// 下载识别/// </summary>/// <param name="sender"></param>/// <param name="e"></param>private void button1_Click(object sender, EventArgs e){if (ltImgInfo.Count == 0){MessageBox.Show("请先选择表格!");return;}DialogResult result = MessageBox.Show("确认开始下载识别?此操作会清空上一次的数据,请注意备份!", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);if (result == DialogResult.Yes){log.Info("确认开始下载识别!");}else{log.Info("取消开始下载识别!");return;}if (!Directory.Exists("img")){Directory.CreateDirectory("img");}if (!Directory.Exists("ocr_result")){Directory.CreateDirectory("ocr_result");}if (!Directory.Exists("result")){Directory.CreateDirectory("result");}if (!Directory.Exists("result//img")){Directory.CreateDirectory("result//img");}//清空结果File.WriteAllText("result//result.txt", "");File.WriteAllText("result//result_detail.txt", "");// 清空文件夹中的文件foreach (string filePath in Directory.GetFiles("result", "*", SearchOption.AllDirectories)){File.Delete(filePath);}// 写入列标题File.WriteAllText("result//result.txt", "id\ttitel\tcontent\r\n");btnStop.Enabled = true;btnStart.Enabled = false;chkSaveImg.Enabled = false;chkSaveOcr.Enabled = false;if (chkSaveImg.Checked){saveImg = true;}else{saveImg = false;}if (chkSaveOcr.Checked){saveOcr = true;}else{saveOcr = false;}Application.DoEvents();cts = new CancellationTokenSource();Stopwatch total = new Stopwatch();total.Start(); //开始计时// 清空队列matQueue = new ConcurrentQueue<ImgInfo>();//while (!matQueue.IsEmpty)//{// matQueue.TryDequeue(out _);//}ocrNum = 0;//完成OCR识别的数量totalCount = ltImgInfo.Count();//图片总数量downloadCount = 0;//下载线程int downloadThreadNum = 2;for (int i = 0; i < downloadThreadNum; i++){Task.Factory.StartNew(() =>{while (true){//判断是否被取消;if (cts.Token.IsCancellationRequested){return;}if (downloadCount == totalCount){log.Info("--------------------------------->下载完成!<----------------------------------");return;}ImgInfo imgInfo = new ImgInfo();if (ltImgInfo.TryDequeue(out imgInfo)){//队列容量大于50 休息一秒if (matQueue.Count > 50){System.Threading.Thread.Sleep(1000);}if (matQueue.Count > 100){System.Threading.Thread.Sleep(2000);}int imagesCount = imgInfo.images.Count();for (int j = 0; j < imagesCount; j++){try{Stopwatch sw = new Stopwatch();sw.Start(); //开始计时HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest;request.KeepAlive = false;request.ServicePoint.Expect100Continue = false;request.Timeout = 2000;// 2秒request.ReadWriteTimeout = 2000;//2秒request.ServicePoint.UseNagleAlgorithm = false;request.ServicePoint.ConnectionLimit = 65500;request.AllowWriteStreamBuffering = false;request.Proxy = null;request.CookieContainer = new CookieContainer();request.CookieContainer.Add(new Cookie("AspxAutoDetectCookieSupport", "1") { Domain = new Uri(imgInfo.images[j].url).Host });HttpWebResponse wresp = (HttpWebResponse)request.GetResponse();Stream s = wresp.GetResponseStream();Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s);s.Dispose();wresp.Close();wresp.Dispose();request.Abort();sw.Stop();if (saveImg){bmp.Save("img//" + imgInfo.id + "_" + j + ".jpg");}var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);if (mat.Channels() == 4){Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);}imgInfo.images[j].mat = mat;imgInfo.images[j].name = imgInfo.id + "_" + j;if (saveImg){bmp.Save("img//" + imgInfo.images[j].name + ".jpg");}log.Info(" " + imgInfo.images[j].name + "-->下载用时:" + sw.ElapsedMilliseconds + "毫秒");}catch (Exception ex){log.Error("---->id:" + imgInfo.id + ",url[" + imgInfo.images[j].url + "],下载异常:" + ex.Message);}}matQueue.Enqueue(imgInfo);Interlocked.Increment(ref downloadCount);}}}, TaskCreationOptions.LongRunning);}//识别线程Task.Factory.StartNew(() =>{while (true){//判断是否被取消;if (cts.Token.IsCancellationRequested){return;}if (ocrNum == totalCount){log.Info("--------------------------------->识别完成!<----------------------------------");return;}ImgInfo imgInfo = new ImgInfo();if (matQueue.TryDequeue(out imgInfo)){Stopwatch perID = new Stopwatch();perID.Start();//开始计时int imagesCount = imgInfo.images.Count();for (int j = 0; j < imagesCount; j++){//Mat mat= imgInfo.images[j].mat;Stopwatch sw = new Stopwatch();sw.Start(); //开始计时PaddleOcrResult ocrResult = null;try{if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty())){ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);sw.Stop();log.Info(" " + imgInfo.images[j].name + "---->识别用时:" + sw.ElapsedMilliseconds + "毫秒");//string ocrInfo = ocrResult.Text.ToString();string ocrInfo = string.Join("\n", from x in ocrResult.Regionswhere x.Score > 0.8orderby x.Rect.Center.Y, x.Rect.Center.Xselect x.Text);if (saveOcr){File.WriteAllText("ocr_result//" + imgInfo.images[j].name + ".txt", ocrInfo);}//规则校验Stopwatch ruleSw = new Stopwatch();ruleSw.Start();//开始计时ocrInfo = ocrInfo.Trim();ocrInfo = ocrInfo.Replace(" ", "");string words = "";string resultInfo = "";if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult)){resultInfo = string.Format("ID:{0},Title:[{1}],------>包含违禁词:{2}", imgInfo.id, imgInfo.title, words);log.Info(resultInfo);//存数据File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t包含违禁词:" + words + "\r\n");File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");//存图Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);imgInfo.images[j].mat.Dispose();Interlocked.Increment(ref vioIDCount);break;}if (Common.RuleTelCheck(ocrInfo, out words, ocrResult)){resultInfo = string.Format("ID:{0},Title:[{1}],------>疑似包含电话号码:{2}", imgInfo.id, imgInfo.title, words);log.Info(resultInfo);//File.AppendAllText("result//result.txt", resultInfo+ "\r\n");File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t疑似包含电话号码:" + words + "\r\n");File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");//存图Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);imgInfo.images[j].mat.Dispose();Interlocked.Increment(ref vioIDCount);break;}imgInfo.images[j].mat.Dispose();ruleSw.Stop();//log.Info(" " + imgInfo.images[j].name + "---->违禁词校验用时:" + ruleSw.ElapsedMilliseconds + "毫秒");}}catch (Exception ex){imgInfo.images[j].mat.Dispose();log.Info(" " + imgInfo.images[j].name + "---->识别异常:" + ex.Message);}}perID.Stop();log.Info("---->id:" + imgInfo.id + ",图片张数[" + imagesCount + "],识别小计用时:" + perID.ElapsedMilliseconds + "毫秒");Interlocked.Increment(ref ocrNum);ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);}}}, TaskCreationOptions.LongRunning);}/// <summary>/// 停止/// </summary>/// <param name="sender"></param>/// <param name="e"></param>private void button3_Click(object sender, EventArgs e){cts.Cancel();btnStop.Enabled = false;btnStart.Enabled = true;chkSaveImg.Enabled = true;chkSaveOcr.Enabled = true;}}
}
下载
源码下载