利用OpenCV的DNN模块加载onnx模型文件进行图片检测。
1、使用的yolov5工程代码,调用export.py导出onnx模型。
2、下载opencv版本,https://opencv.org/releases/
使用opencv版本4.5.3或以上,本文使用的opencv4.6.0
3、使用vc2015编写使用代码。
// dnnUseOnnx.cpp : 定义控制台应用程序的入口点。#include <fstream>
#include <iostream>
#include <string>
#include <map>
#include <opencv2/opencv.hpp>struct DetectResult
{int classId;float score;cv::Rect box;
};class YOLOv5Detector
{
public:void initConfig(std::string onnxpath, int iw, int ih, float threshold, bool bIsEnableCuda);void detect(cv::Mat& frame, std::vector<DetectResult>& result);private:int input_w = 640;int input_h = 640;cv::dnn::Net net;int threshold_score = 0.25;
};void YOLOv5Detector::initConfig(std::string onnxpath, int iw, int ih, float threshold, bool bIsEnableCuda)
{this->input_w = iw;this->input_h = ih;this->threshold_score = threshold;try{this->net = cv::dnn::readNetFromONNX(onnxpath);//依据情况选定是否使用CUDAif (bIsEnableCuda){std::cout << "Attempty to use CUDA\n";net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);}else{std::cout << "Running on CPU\n";net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);}}catch (cv::Exception & e) {printf("exception %s\n", e.err.c_str());}
}void YOLOv5Detector::detect(cv::Mat& frame, std::vector<DetectResult>& results)
{// 图象预处理 - 格式化操作int w = frame.cols;int h = frame.rows;int _max = std::max(h, w);cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);if (frame.channels() == 1){cv::cvtColor(frame, frame, cv::COLOR_GRAY2BGR);}cv::Rect roi(0, 0, w, h);frame.copyTo(image(roi));float x_factor = image.cols / 640.0f;float y_factor = image.rows / 640.0f;cv::Mat blob = cv::dnn::blobFromImage(image, 1 / 255.0, cv::Size(this->input_w, this->input_h), cv::Scalar(0, 0, 0),true, false);this->net.setInput(blob);cv::Mat preds = this->net.forward("output0");//outputname,使用Netron看一下输出的名字,一般为output0或者output//如果preds里有Mat的维数大于2,那么设断点调试的时候,可以看到rows和cols都等于-1,当Mat的dims>2时,想要访问Mat的高和宽,可以通过size属性获取。如下:printf("output:%d,%d,%d\n", preds.size[0], preds.size[1], preds.size[2]);//打印输出:output:1,25200,85//YOLOV5的输出1,25200,85如何理解和解析//1、25200代表着检测框的数量,比如我们取出第一个检测框a,也就是[1,1,85],取出来之后我们解析85,前五个为box的中点坐标、长宽值以及置信,后面80我们取Max(80个类别)中最大值,类别的处于多少行对应于label class.txt别中的类是哪一类别。cv::Mat det_output(preds.size[1], preds.size[2], CV_32F, preds.ptr<float>());float confidence_threshold = 0.5;std::vector<cv::Rect> boxes;boxes.clear();std::vector<int> classIds;classIds.clear();std::vector<float> confidences;confidences.clear();for (int i = 0; i < det_output.rows; i++){float confidence = det_output.at<float>(i, 4);if (confidence < 0.45){continue;}cv::Mat classes_scores = det_output.row(i).colRange(5, preds.size[2]);//colRange(5, num_class);num_class:最大分类数cv::Point classIdPoint;double score;minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);// 置信度 0~1之间if (score > this->threshold_score){float cx = det_output.at<float>(i, 0);float cy = det_output.at<float>(i, 1);float ow = det_output.at<float>(i, 2);float oh = det_output.at<float>(i, 3);int x = static_cast<int>((cx - 0.5 * ow) * x_factor);int y = static_cast<int>((cy - 0.5 * oh) * y_factor);int width = static_cast<int>(ow * x_factor);int height = static_cast<int>(oh * y_factor);cv::Rect box;box.x = x;box.y = y;box.width = width;box.height = height;boxes.push_back(box);classIds.push_back(classIdPoint.x);confidences.push_back(score * confidence);}}// NMSstd::vector<int> indexes;cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);for (size_t i = 0; i < indexes.size(); i++){DetectResult dr;int index = indexes[i];int idx = classIds[index];dr.box = boxes[index];dr.classId = idx;dr.score = confidences[index];cv::rectangle(frame, boxes[index], cv::Scalar(0, 0, 255), 2, 8);cv::rectangle(frame, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 20),cv::Point(boxes[index].br().x, boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);results.push_back(dr);}std::ostringstream ss;std::vector<double> layersTimings;double freq = cv::getTickFrequency() / 1000.0;double time = net.getPerfProfile(layersTimings) / freq;ss << "FPS: " << 1000 / time << " ; time : " << time << " ms";putText(frame, ss.str(), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
}std::map<int, std::string> classNames = { { 0, "person" },{ 1, "bicycle" },{ 2, "car" },{ 3, "motorcycle" } ,{ 4, "airplane" } ,{ 5, "bus" },{ 6, "train" },{ 7, "truck" },{ 8, "boat" },{ 9, "traffic light" },
{ 10, "fire hydrant" },{ 11, "stop sign'" },{ 12, "parking meter" },{ 13, "bench" } ,{ 14, "bird" } ,{ 15, "cat" },{ 16, "dog" },{ 17, "horse" },{ 18, "sheep" },{ 19, "cow" },
{ 20, "elephant" },{ 21, "bear" },{ 22, "zebra" },{ 23, "giraffe" } ,{ 24, "backpack" } ,{ 25, "umbrella" },{ 26, "handbag" },{ 27, "tie" },{ 28, "suitcase" },{ 29, "frisbee" },
{ 30, "skis" },{ 31, "snowboard" },{ 32, "sports ball" },{ 33, "kite" } ,{ 34, "baseball bat" } ,{ 35, "baseball glove" },{ 36, "skateboard" },{ 37, "surfboard" },{ 38, "tennis racket" },{ 39, "bottle" },
{ 40, "wine glass" },{ 41, "cup" },{ 42, "fork" },{ 43, "knife" } ,{ 44, "spoon" } ,{ 45, "bowl" },{ 46, "banana" },{ 47, "apple" },{ 48, "sandwich" },{ 49, "orange" },
{ 50, "broccoli" },{ 51, "carrot" },{ 52, "hot dog" },{ 53, "pizza" } ,{ 54, "donut" } ,{ 55, "cake" },{ 56, "chair" },{ 57, "couch" },{ 58, "potted plant" },{ 59, "bed" },
{ 60, "dining table" },{ 61, "toilet" },{ 62, "tv" },{ 63, "laptop" } ,{ 64, "mouse" } ,{ 65, "remote" },{ 66, "keyboard" },{ 67, "cell phone" },{ 68, "microwave" },{ 69, "oven" },
{ 70, "toaster" },{ 71, "sink" },{ 72, "refrigerator" },{ 73, "book" } ,{ 74, "clock" } ,{ 75, "vase" },{ 76, "scissors" },{ 77, "teddy bear" },{ 78, "hair drier" },{ 79, "toothbrush" }
};int main(int argc, char* argv[])
{std::shared_ptr<YOLOv5Detector> detector = std::make_shared<YOLOv5Detector>();detector->initConfig(R"(D:\python-project\yolov5\yolov5s.onnx)", 640, 640, 0.25f, false);cv::Mat frame = cv::imread(R"(D:\python-project\yolov5\data\images\bus.jpg)");std::vector<DetectResult> results;detector->detect(frame, results);for (DetectResult& dr : results){cv::Rect box = dr.box;cv::putText(frame, classNames[dr.classId]+ " "+ std::to_string(dr.score), cv::Point(box.tl().x, box.tl().y - 10), cv::FONT_HERSHEY_SIMPLEX,.5, cv::Scalar(0, 0, 0));}cv::imshow("OpenCV-DNN-yolov5", frame);cv::waitKey();results.clear();
}
运行效果:
注意事项:
1)、readNetFromONNX加载onnx模型出错。 interp_mode != "asymmetric",这个错误信息表明你在使用OpenCV的readNetFromONNX函数加载ONNX模型时,模型中的某些节点的插值模式(interp_mode)不是"asymmetric"。
解决方法:使用opencv版本4.5.3或以上,本文使用的opencv4.6.0
2)、监测到目标比yolov5工程detect.py推理出来的目标少。
解决方法:det_output.row(i).colRange(5, num_class);num_class:最大分类数
cv::Mat classes_scores = det_output.row(i).colRange(5, preds.size[2]);