yolov5核查数据标注漏报和误报

提示：文章写完后，目录可以自动生成，如何生成可参考右边的帮助文档

文章目录

前言
一、误报
二、漏报
三、源码
总结

前言

本文主要用于记录数据标注和模型预测之间的漏报和误报思想及其源码

提示：以下是本篇文章正文内容，下面案例可供参考

一、误报

我自己定义的误报是模型的预测结果框比人为标注的目标框多，也就是当标注人员标注图片的时候标注不仔细未能标注全的情况，逻辑是将在原始标注的xml文件当中添加误报-类别名称的框。
在这里插入图片描述

二、漏报

我自己定义的漏报是人为标注的框模型没有全部预测出来，也就是当标注人员标注图片的时候标注错误或者标注的框质量不合格的情况（跟模型性能也有关系），逻辑是将在原始标注的xml文件当中添加漏报-类别名称的框。
在这里插入图片描述

三、源码

import argparse
import os
import time
import shutil
import cv2
import numpy as np
import torch
from pathlib import Path
from pascal_voc_writer import Writer
import torchvision
from xml.etree import ElementTree
from xml.etree.ElementTree import Elementimport warnings
warnings.simplefilter(action='ignore', category=FutureWarning)FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]def xywh2xyxy(x):# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-righty = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2  # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right yreturn ydef box_iou(box1, box2):def box_area(box):# box = 4xnreturn (box[2] - box[0]) * (box[3] - box[1])area1 = box_area(box1.T)area2 = box_area(box2.T)# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)def cv_imread(file_path):cv_img = cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED) #读取的为bgr图像return cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2]  # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup:  # only scale down, do not scale up (for better val mAP)r = min(r, 1.0)# Compute paddingratio = r, r  # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh paddingif auto:  # minimum rectangledw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh paddingelif scaleFill:  # stretchdw, dh = 0.0, 0.0new_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratiosdw /= 2  # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad:  # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add borderreturn im, ratio, (dw, dh)def preprocess_file(path, img_size, stride, auto):img_rgb_ = cv_imread(path)  # RGBassert img_rgb_ is not None, f'Image Not Found {path}'# Padded resizeimg_rgb = letterbox(img_rgb_, img_size, stride=stride, auto=auto)[0]# Convertimg_rgb = img_rgb.transpose((2, 0, 1))  # HWC to CHWimg_rgb = np.ascontiguousarray(img_rgb)# 将一个内存不连续存储的数组转换为内存连续存储的数组，使得运行速度更快return img_rgb, img_rgb_def preprocess_mat(mat, img_size, stride, auto):img_bgr = mat  # BGR# Padded resizeimg_rgb = letterbox(img_bgr, img_size, stride=stride, auto=auto)[0]# Convertimg_rgb = img_rgb.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGBimg_rgb = np.ascontiguousarray(img_rgb)return img_rgb, img_bgrdef clip_coords(boxes, shape):# Clip bounding xyxy bounding boxes to image shape (height, width)if isinstance(boxes, torch.Tensor):  # faster individuallyboxes[:, 0].clamp_(0, shape[1])  # x1boxes[:, 1].clamp_(0, shape[0])  # y1boxes[:, 2].clamp_(0, shape[1])  # x2boxes[:, 3].clamp_(0, shape[0])  # y2else:  # np.array (faster grouped)boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):# Rescale coords (xyxy) from img1_shape to img0_shapeif ratio_pad is None:  # calculate from img0_shapegain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / newpad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh paddingelse:gain = ratio_pad[0][0]pad = ratio_pad[1]coords[:, [0, 2]] -= pad[0]  # x paddingcoords[:, [1, 3]] -= pad[1]  # y paddingcoords[:, :4] /= gainclip_coords(coords, img0_shape)return coordsdef remove_name_elements(element):name_element = element.find('name')if name_element is not None and name_element.text and name_element.text.startswith('\ufeff'):name_element.text = name_element.text.lstrip('\ufeff')for child in element:remove_name_elements(child)def read_xml(xml_file: str, names):if os.path.getsize(xml_file) == 0:return []with open(xml_file, encoding='utf-8-sig') as in_file:# if not in_file.readline():#     return []tree = ElementTree.parse(in_file)root = tree.getroot()remove_name_elements(root)results = []obj: Elementfor obj in tree.findall("object"):xml_box = obj.find("bndbox")x_min = float(xml_box.find("xmin").text)y_min = float(xml_box.find("ymin").text)x_max = float(xml_box.find("xmax").text)y_max = float(xml_box.find("ymax").text)b = [x_min, y_min, x_max, y_max]cls_id = names.index(obj.find("name").text)results.append([cls_id, b])return resultsdef non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,labels=(), max_det=300):"""Runs Non-Maximum Suppression (NMS) on inference resultsReturns:list of detections, on (n,6) tensor per image [xyxy, conf, cls]"""nc = prediction.shape[2] - 5  # number of classesxc = prediction[..., 4] > conf_thres  # candidates# Checksassert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'# Settingsmin_wh, max_wh = 2, 7680  # (pixels) minimum and maximum box width and heightmax_nms = 30000  # maximum number of boxes into torchvision.ops.nms()time_limit = 10.0  # seconds to quit afterredundant = True  # require redundant detectionsmulti_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)merge = False  # use merge-NMSt = time.time()output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]for xi, x in enumerate(prediction):  # image index, image inference# Apply constraintsx[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-heightx = x[xc[xi]]  # confidence# Cat apriori labels if autolabellingif labels and len(labels[xi]):lb = labels[xi]v = torch.zeros((len(lb), nc + 5), device=x.device)v[:, :4] = lb[:, 1:5]  # boxv[:, 4] = 1.0  # confv[range(len(lb)), lb[:, 0].long() + 5] = 1.0  # clsx = torch.cat((x, v), 0)# If none remain process next imageif not x.shape[0]:continue# Compute confx[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)box = xywh2xyxy(x[:, :4])# Detections matrix nx6 (xyxy, conf, cls)if multi_label:i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).Tx = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)else:  # conf是置信度 j是类别conf, j = x[:, 5:].max(1, keepdim=True)x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]# Filter by classif classes is not None:x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]# Apply finite constraint# if not torch.isfinite(x).all():#     x = x[torch.isfinite(x).all(1)]# Check shapen = x.shape[0]  # number of boxesif not n:  # no boxescontinueelif n > max_nms:  # excess boxesx = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence# Batched NMSc = x[:, 5:6] * (0 if agnostic else max_wh)  # classesboxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scoresi = torchvision.ops.nms(boxes, scores, iou_thres)  # NMSif i.shape[0] > max_det:  # limit detectionsi = i[:max_det]if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrixweights = iou * scores[None]  # box weightsx[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxesif redundant:i = i[iou.sum(1) > 1]  # require redundancyoutput[xi] = x[i]if (time.time() - t) > time_limit:break  # time limit exceededend = time.time()# print(time.time() - t,'seconds')return outputclass Detect():def __init__(self, weights, imgsz, conf_thres, iou_thres):self.device = 'cpu'self.weights = weightsself.model = Noneself.imgsz = imgszself.conf_thres = conf_thresself.iou_thres = iou_thresif torch.cuda.is_available() and torch.cuda.device_count() > 1:self.device = torch.device('cuda:0')self.init_model()self.stride = max(int(self.model.stride.max()), 32)def init_model(self):ckpt = torch.load(self.weights, map_location=self.device)  # loadckpt = (ckpt.get('ema', None) or ckpt['model']).float()  # FP32 modelfuse = Trueself.model = ckpt.fuse().eval() if fuse else ckpt.eval()  # fused or un-fused model in eval mode fuse()将Conv和bn层进行合并，提高模型的推理速度self.model.float()def infer_image(self, image_path):im, im0 = preprocess_file(image_path, img_size=self.imgsz, stride=self.stride, auto=True)im = torch.from_numpy(im).to(self.device).float() / 255if len(im.shape) == 3:im = im[None]  # expand for batch dim# Inferencepred = self.model(im, augment=False, visualize=False)[0]# NMSpred = non_max_suppression(pred, self.conf_thres, self.iou_thres, None, False, max_det=1000)det = pred[0]results = []if len(det):# Rescale boxes from img_size to im0 sizedet[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()# resultsfor *xyxy, conf, cls in reversed(det):xyxy = (torch.tensor(xyxy).view(1, 4)).view(-1).tolist()  # normalized xywhresults.append([cls.item(), xyxy, conf.item()])return resultsdef infer_mat(self, mat):im, im0 = preprocess_mat(mat, img_size=self.imgsz, stride=self.stride, auto=True)im = torch.from_numpy(im).to(self.device).float() / 255if len(im.shape) == 3:im = im[None]  # expand for batch dim# Inferencepred = self.model(im, augment=False, visualize=False)[0]# NMSpred = non_max_suppression(pred, self.conf_thres, self.iou_thres, None, False, max_det=1000)det = pred[0]results = []if len(det):# Rescale boxes from img_size to im0 sizedet[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()# resultsfor *xyxy, conf, cls in reversed(det):xyxy = (torch.tensor(xyxy).view(1, 4)).view(-1).tolist()  # normalized xywhresults.append([cls.item(), xyxy, conf.item()])return resultsdef box_iou_np(box1, box2):x11, y11, x12, y12 = box1x21, y21, x22, y22 = box2width1 = np.maximum(0, x12 - x11)height1 = np.maximum(0, y12 - y11)width2 = np.maximum(0, x22 - x21)height2 = np.maximum(0, y22 - y21)area1 = width1 * height1area2 = width2 * height2# 计算交集，需要计算交集部分的左、上、右、下坐标xi1 = np.maximum(x11, x21)yi1 = np.maximum(y11, y21)xi2 = np.minimum(x12, x22)yi2 = np.minimum(y12, y22)# 计算交集部分面积w = np.maximum(0, xi2 - xi1)h = np.maximum(0, yi2 - yi1)intersection = w * h# 计算并集union = area1 + area2 - intersection# 计算iouiou = intersection / unionreturn ioudef main(opt):if not os.path.exists(opt.output_path):os.makedirs(opt.output_path, exist_ok=True)#oxist_ok表示如果目录存在，不要抛出异常，正常结束detect = Detect(opt.weights, opt.imgsz, opt.conf_thres, opt.iou_thres)imgs = []for root,dirs,files in os.walk(opt.input_path):for file in files:if os.path.splitext(file)[1] in opt.extensions:imgs.append(root+'/'+file)total = len(imgs)for i,img in enumerate(imgs):print(f"{i + 1 : >05d}/{total : >05d} {img}")mat = cv_imread(img)xml = os.path.splitext(img)[0]+'.xml'h,w,_ = mat.shaperesults = detect.infer_image(img)# 标注anns = []if os.path.exists(xml):anns = read_xml(xml, opt.names)else:anns = []# 核查误报fps = []if opt.fp:for result in results:result_cls, result_box, _ = resultif result_cls in opt.verifynames:finded = Falsefor ann in anns:ann_cls, ann_box = annif ann_cls == result_cls and box_iou_np(ann_box, result_box) > 0:finded = Truebreakif not finded:fps.append([result_cls, result_box])# 核查漏报fns = []if opt.fn:for ann in anns:ann_cls, ann_box = annif ann_cls in opt.verifynames:finded = Falsefor result in results:result_cls, result_box, _ = resultif ann_cls == result_cls and box_iou_np(ann_box, result_box) > 0:finded = Truebreakif not finded:fns.append([ann_cls, ann_box])if len(fps) == 0 and len(fns) == 0:continue# 写文件writer = Writer(img, w, h)# 写原始标注for ann in anns:ann_cls, ann_box = annx_min = ann_box[0]y_min = ann_box[1]x_max = ann_box[2]y_max = ann_box[3]writer.addObject(opt.names[int(ann_cls)], x_min, y_min, x_max, y_max)# 写误报if opt.fp:for ann in fps:ann_cls, ann_box = annx_min = ann_box[0]y_min = ann_box[1]x_max = ann_box[2]y_max = ann_box[3]writer.addObject("误报-" + opt.names[int(ann_cls)], x_min, y_min, x_max, y_max)# 写漏报if opt.fn:for ann in fns:ann_cls, ann_box = annx_min = ann_box[0]y_min = ann_box[1]x_max = ann_box[2]y_max = ann_box[3]writer.addObject("漏报-" + opt.names[int(ann_cls)], x_min, y_min, x_max, y_max)# 写文件writer.save(os.path.join(opt.output_path, os.path.basename(xml)))shutil.copy2(img, os.path.join(opt.output_path, os.path.basename(img)))def parse_opt(known):parser = argparse.ArgumentParser()parser.add_argument('--weights',type=str, default=ROOT / 'weights/best.pt', help='模型权重pt文件')parser.add_argument('--imgsz', type=tuple, default=(1280,1280), help='输入模型大小')parser.add_argument("--conf_thres", type=float, default=0.25, help="模型conf阈值")parser.add_argument('--iou_thres', type=float, default=0.5, help='标注与模型输出框的IOU阈值，用于判断误报和漏报')parser.add_argument('--names', type=list, default=["键盘", "显示器", "鼠标", "桌子", "椅子", "人"],help='核查的所有类别标注名称')parser.add_argument('--verifynames', type=list, default=[0,1], help='需要核查的类别')parser.add_argument('--input_path', type=str, default=r'', help='输入image和xml路径')parser.add_argument('--output_path', type=str, default=r''+'核查', help='输出image和xml路径')parser.add_argument('--extensions', type=list, default=['.jpg', '.JPG', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.svg', '.pfg'])parser.add_argument("--fp", type=bool, default=True, help="是否核查误报")parser.add_argument("--fn", type=bool, default=True, help="是否核查漏报")return parser.parse_known_args()[0] if known else parser.parse_args() #True 标志可以处理任何位置参数，不会因为位置参数崩溃，Fakse任何未知参数导致程序显示错误消息并退出if __name__ == '__main__':opt = parse_opt(True)main(opt)