YOLOv5之Common.py

文章目录

    • 1.学习目的
    • 2.网络模型![在这里插入图片描述](https://i-blog.csdnimg.cn/direct/67b8dbd00c9b4034ba370fc8b8a6031a.jpeg)
    • 3.common.py分析

1.学习目的

YOLOv5中最关键一个模型类

2.网络模型在这里插入图片描述

在这里插入图片描述

3.common.py分析

# Ultralytics YOLOv5 🚀, AGPL-3.0 license
"""Common modules."""import ast
import contextlib
import json
import math
import platform
import warnings
import zipfile
from collections import OrderedDict, namedtuple
from copy import copy
from pathlib import Path
from urllib.parse import urlparseimport cv2
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp# Import 'ultralytics' package or install if missing
try:import ultralyticsassert hasattr(ultralytics, "__version__")  # verify package is not directory
except (ImportError, AssertionError):import osos.system("pip install -U ultralytics")import ultralyticsfrom ultralytics.utils.plotting import Annotator, colors, save_one_boxfrom utils import TryExcept
from utils.dataloaders import exif_transpose, letterbox
from utils.general import (LOGGER,ROOT,Profile,check_requirements,check_suffix,check_version,colorstr,increment_path,is_jupyter,make_divisible,non_max_suppression,scale_boxes,xywh2xyxy,xyxy2xywh,yaml_load,
)
from utils.torch_utils import copy_attr, smart_inference_mode# 实现合适的p,使得输出形状和输入一致
def autopad(k, p=None, d=1):"""Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.`k`: kernel, `p`: padding, `d`: dilation."""if d > 1:# 卷积核有可能是标量,也可能是列表,当d>1的时候就会调整卷积核尺寸k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-sizeif p is None:# 如果p没有赋值,那么就将p赋值为他的一半并取整p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-padreturn p# 卷积类,继承自父类nn.Module
class Conv(nn.Module):# 卷积 归一化 激活函数"""Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""# 这里默认激活函数为SiLU函数default_act = nn.SiLU()  # default activation# 初始化操作def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):"""Initializes a standard convolution layer with optional batch normalization and activation."""super().__init__()self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)self.bn = nn.BatchNorm2d(c2)# 激活函数的配置,默认是SiLu函数,但是如果是别的也行,再就是保持不变,给的啥就是啥self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()# 前向传播函数def forward(self, x):"""Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""# 卷积--批量归一化--激活return self.act(self.bn(self.conv(x)))# 融合卷积,不经过BN层直接激活def forward_fuse(self, x):"""Applies a fused convolution and activation function to the input tensor `x`."""return self.act(self.conv(x))# DW卷积 一个纵深的卷积层
class DWConv(Conv):"""Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""def __init__(self, c1, c2, k=1, s=1, d=1, act=True):"""Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), outputchannels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act)."""super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)# DW卷积实现上采样
class DWConvTranspose2d(nn.ConvTranspose2d):"""A depth-wise transpose convolutional layer for upsampling in neural networks, particularly in YOLOv5 models."""def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):"""Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels(c2), kernel size (k), stride (s), input padding (p1), output padding (p2)."""super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))# 带有多头注意力机制的Transformer层
class TransformerLayer(nn.Module):"""Transformer layer with multihead attention and linear layers, optimized by removing LayerNorm."""def __init__(self, c, num_heads):"""Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.See  as described in https://arxiv.org/abs/2010.11929."""super().__init__()# 配置q,k,v,ma,fc1,fc2初始值 注意赋两个值self.q = nn.Linear(c, c, bias=False)self.k = nn.Linear(c, c, bias=False)self.v = nn.Linear(c, c, bias=False)self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)self.fc1 = nn.Linear(c, c, bias=False)self.fc2 = nn.Linear(c, c, bias=False)# 前向传播函数def forward(self, x):"""Performs forward pass using MultiheadAttention and two linear transformations with residual connections."""# 多头注意力模块配置,由三个线性层构成x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x# 经过多头注意力模块后,在经过两层线性层x = self.fc2(self.fc1(x)) + xreturn x# 转换模块,肩负视觉任务,位置嵌入以及转换层
class TransformerBlock(nn.Module):"""A Transformer block for vision tasks with convolution, position embeddings, and Transformer layers."""def __init__(self, c1, c2, num_heads, num_layers):"""Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specifiedlayers."""super().__init__()self.conv = Noneif c1 != c2:self.conv = Conv(c1, c2)#  可以学习的位置嵌入self.linear = nn.Linear(c2, c2)  # learnable position embedding# 序列化操作-===将多个层连接起来self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))self.c2 = c2# 前向传播 现将每个通道展平与原始通道进行叠加,然后改变通道维度def forward(self, x):"""Processes input through an optional convolution, followed by Transformer layers and position embeddings forobject detection."""if self.conv is not None:x = self.conv(x)b, _, w, h = x.shape# 从第三个维度开始展平  轻量化的操作p = x.flatten(2).permute(2, 0, 1)# 将展平后张量送到神经网络中计算,得到的结果再次转换维度并reshapereturn self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)# Bottleneck类 瓶颈类 核心类
class Bottleneck(nn.Module):# 特色:随机裁剪+分组卷积 == 特征提取"""A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""# 初始化函数def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):# 初始化一个标准的颈部层,带有随机裁剪以及分组卷积,支持通道拓展"""Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channelexpansion."""super().__init__()# 定义隐藏层通道数 将c2通道数减半c_ = int(c2 * e)  # hidden channels# 定义两个卷积self.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_, c2, 3, 1, g=g)# 叠加标志,当c1与c2相同时就为Trueself.add = shortcut and c1 == c2# 前向传播函数def forward(self, x):"""Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is atensor."""# 如果叠加标志为True,则将输入域两次卷积后的输出进行叠加,否则只需要两次卷积后的输出return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))# 颈部CSP网络 ==== 特征提取带有交叉空间连接以及随机裁剪
class BottleneckCSP(nn.Module):"""CSP bottleneck layer for feature extraction with cross-stage partial connections and optional shortcuts."""# 初始化函数def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):# 采用随机裁剪初始化CSP瓶颈网络,参数:输入通道数,输出通道数,模块重复个数,剪切标志,分组,拓展标志"""Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,groups, expansion."""super().__init__()# 获取隐藏层输入通道数c_ = int(c2 * e)  # hidden channels# 卷积1self.cv1 = Conv(c1, c_, 1, 1)# 卷积2self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)# 卷积3self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)# 卷积4self.cv4 = Conv(2 * c_, c2, 1, 1)# 归一化层self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)# 激活函数层self.act = nn.SiLU()# 序列化self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))# 前向传播函数 通过运用这些网络层,激活函数,专注于输入x,返回特征增强的输出def forward(self, x):"""Performs forward pass by applying layers, activation, and concatenation on input x, returning feature-enhanced output."""# 先将x进行1*1卷积,然后送入搭建好的神经网络,最后再进行一次卷积,得到通道数减半的输出层y1 = self.cv3(self.m(self.cv1(x)))# 直接将输入进行卷积,得到通道数减半的卷积y2 = self.cv2(x)# 将y1,y2在第二个维度上进行连接,得到通道数叠加的特征图,然后进行归一化处理,最后用SiLu函数激活,再进行最后一次卷积,恢复通道数为c2return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))# 交叉卷积类 下采样,拓展,随机裁剪
class CrossConv(nn.Module):"""Implements a cross convolution layer with downsampling, expansion, and optional shortcut."""# 初始化函数def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):"""Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` outputchannels.Inputs are ch_in, ch_out, kernel, stride, groups, expansion, shortcut."""# 调用父类进行初始化super().__init__()# 获取隐藏层的输入通道数,由c2减半而来c_ = int(c2 * e)  # hidden channels# 卷积1,卷积核大小改变,默认(1,3),步长(1,1)self.cv1 = Conv(c1, c_, (1, k), (1, s))# 卷积2,默认卷积核大小为(3,1),步长(1,1)self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)# 裁剪标志位 只有调用时c1=c2,才会使裁剪标志位值置1self.add = shortcut and c1 == c2def forward(self, x):"""Performs feature sampling, expanding, and applies shortcut if channels match; expects `x` input tensor."""# 根据标志位不同进行不同的操作return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))# C3模块类
class C3(nn.Module):# 用3个卷积层搭建一个CSP瓶颈网络来增强特征提取"""Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""# 初始化函数def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, groupconvolutions, and expansion."""super().__init__()c_ = int(c2 * e)  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c1, c_, 1, 1)self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))def forward(self, x):"""Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))# C3x模块类 继承了带有交叉卷积的C3模块
class C3x(C3):"""Extends the C3 module with cross-convolutions for enhanced feature extraction in neural networks."""def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,and expansion."""# 调用父类方法进行子类参数初始化super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))#C3TR模块类
class C3TR(C3):"""C3 module with TransformerBlock for enhanced feature extraction in object detection models."""# 初始化函数 == 用Transformer模块来初始化C3模块def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcutconfig, group, and expansion."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)self.m = TransformerBlock(c_, c_, 4, n)# C3SPP模块===继承C3模块,重写了一个SPP层,以此来进行增强空间特征提取以及通道定制(可控)
class C3SPP(C3):"""Extends the C3 module with an SPP layer for enhanced spatial feature extraction and customizable channels."""def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):"""Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernelsizes, shortcut, group, and expansion ratio."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)# 调用下方SPP模块self.m = SPP(c_, c_, k)# C3Ghost模块类 == 实现了一个配置有Ghost瓶颈网络的C3模块===增强特征提取
class C3Ghost(C3):"""Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in YOLOv5."""def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)  # hidden channelsself.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))# SPP模块类
class SPP(nn.Module):"""Implements Spatial Pyramid Pooling (SPP) for feature extraction, ref: https://arxiv.org/abs/1406.4729."""# 初始化函数,其中k是一个三个元素构成的元组,后期用来进行三次最大池化操作def __init__(self, c1, c2, k=(5, 9, 13)):"""Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input channels), c2 (output channels), k (kernel sizes)."""super().__init__()c_ = c1 // 2  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])def forward(self, x):"""Applies convolution and max pooling layers to the input tensor `x`, concatenates results, and returns outputtensor."""x = self.cv1(x)with warnings.catch_warnings():warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warningreturn self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))# SPPF层===快速空间金字塔池化层
class SPPF(nn.Module):"""Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv5 models."""# 初始化SPPF层用给定的通道和卷积核def __init__(self, c1, c2, k=5):"""Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution andmax pooling.Equivalent to SPP(k=(5, 9, 13))."""super().__init__()c_ = c1 // 2  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_ * 4, c2, 1, 1)# 最大池化self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)# 前向传播函数===通过一系列卷积和最大池化操作进行特征提取def forward(self, x):"""Processes input through a series of convolutions and max pooling operations for feature extraction."""# 对输入进行卷积操作x = self.cv1(x)# 捕获异常信息with warnings.catch_warnings():# 抑制一些异常信息warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warning# 对输入进行最大池化操作y1 = self.m(x)# 对上面结果再次池化y2 = self.m(y1)# 对x,y1,y2,再次池化的y2进行通道维度连接,然后再次进行池化return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))# Focus类 使用切片与卷积操作来专注于空间信息转化为通道空间
class Focus(nn.Module):"""Focuses spatial information into channel space using slicing and convolution for efficient feature extraction."""# 初始化函数 初始化Focus模块从专注于宽高信息到通道空间def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):"""Initializes Focus module to concentrate width-height info into channel space with configurable convolutionparameters."""super().__init__()# 卷积操作self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)# self.contract = Contract(gain=2)#前向传播函数def forward(self, x):"""Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution."""# todo 在通道上面进行切片操作,最终导致宽度与高度减半 ==== 改进点return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))# return self.conv(self.contract(x))# Ghost卷积
class GhostConv(nn.Module):"""Implements Ghost Convolution for efficient feature extraction, see https://github.com/huawei-noah/ghostnet."""#初始化函数def __init__(self, c1, c2, k=1, s=1, g=1, act=True):"""Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channelsfor efficiency."""super().__init__()c_ = c2 // 2  # hidden channelsself.cv1 = Conv(c1, c_, k, s, None, g, act=act)self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)def forward(self, x):"""Performs forward pass, concatenating outputs of two convolutions on input `x`: shape (B,C,H,W)."""y = self.cv1(x)# 将输出和输出的卷积进行连接return torch.cat((y, self.cv2(y)), 1)#GhostBottleneck类
class GhostBottleneck(nn.Module):"""Efficient bottleneck layer using Ghost Convolutions, see https://github.com/huawei-noah/ghostnet."""def __init__(self, c1, c2, k=3, s=1):"""Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see https://github.com/huawei-noah/ghostnet."""super().__init__()c_ = c2 // 2# 初始化一个由多种卷积组成的卷积self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pwDWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dwGhostConv(c_, c2, 1, 1, act=False),)  # pw-linear# 初始化一个由多个卷积组成的网络序列构成剪切属性self.shortcut = (nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity())def forward(self, x):"""Processes input through conv and shortcut layers, returning their summed output."""# 构成该网络的模型return self.conv(x) + self.shortcut(x)# Contract模块类 === 降维类
class Contract(nn.Module):"""Contracts spatial dimensions into channel dimensions for efficient processing in neural networks."""def __init__(self, gain=2):"""Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape(1,64,80,80) to (1,256,40,40)."""super().__init__()# 增益初始化self.gain = gain# 前向传播函数def forward(self, x):"""Processes input tensor to expand channel dimensions by contracting spatial dimensions, yielding output shape`(b, c*s*s, h//s, w//s)`."""# 获取张量尺寸b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'# 将gain->strides = self.gain# 进行reshape操作x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)#再次将张量进行维度调换,并且将内存进行连续操作x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)# 返回维度被转换的张量return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)# 拓展类==拓展空间维度通过重新分配通道数---reshape操作
class Expand(nn.Module):"""Expands spatial dimensions by redistributing channels, e.g., from (1,64,80,80) to (1,16,160,160)."""# 初始化函数def __init__(self, gain=2):"""Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional gainfactor.Example: x(1,64,80,80) to x(1,16,160,160)."""super().__init__()self.gain = gaindef forward(self, x):"""Processes input tensor x to expand spatial dimensions by redistributing channels, requiring C / gain^2 ==0."""b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain's = self.gainx = x.view(b, s, s, c // s**2, h, w)  # x(1,2,2,16,80,80)x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)return x.view(b, c // s**2, h * s, w * s)  # x(1,16,160,160)# Concat类  指定通道维度进行连接
class Concat(nn.Module):"""Concatenates tensors along a specified dimension for efficient tensor manipulation in neural networks."""def __init__(self, dimension=1):"""Initializes a Concat module to concatenate tensors along a specified dimension."""super().__init__()# 维度初始化self.d = dimensiondef forward(self, x):"""Concatenates a list of tensors along a specified dimension; `x` is a list of tensors, `dimension` is anint."""# 张量在指定维度上进行连接return torch.cat(x, self.d)# 检测多个后端模型类
class DetectMultiBackend(nn.Module):"""YOLOv5 MultiBackend class for inference on various backends including PyTorch, ONNX, TensorRT, and more."""def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):"""Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""#   PyTorch:              weights = *.pt#   TorchScript:                    *.torchscript#   ONNX Runtime:                   *.onnx#   ONNX OpenCV DNN:                *.onnx --dnn#   OpenVINO:                       *_openvino_model#   CoreML:                         *.mlpackage#   TensorRT:                       *.engine#   TensorFlow SavedModel:          *_saved_model#   TensorFlow GraphDef:            *.pb#   TensorFlow Lite:                *.tflite#   TensorFlow Edge TPU:            *_edgetpu.tflite#   PaddlePaddle:                   *_paddle_modelfrom models.experimental import attempt_download, attempt_load  # scoped to avoid circular importsuper().__init__()# 获取字符串类型的权重文件名w = str(weights[0] if isinstance(weights, list) else weights)pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)fp16 &= pt or jit or onnx or engine or triton  # FP16nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)stride = 32  # default stride# 判断cuda是否可用cuda = torch.cuda.is_available() and device.type != "cpu"  # use CUDAif not (pt or triton):w = attempt_download(w)  # download if not local#如果选择的pytorch模型则进入下面的程序if pt:  # PyTorchmodel = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)# 获取模型步长最大值stride = max(int(model.stride.max()), 32)  # model stride# 获取模型名字names = model.module.names if hasattr(model, "module") else model.names  # get class names# 将模型数据类型降级model.half() if fp16 else model.float()self.model = model  # explicitly assign for to(), cpu(), cuda(), half()elif jit:  # TorchScriptLOGGER.info(f"Loading {w} for TorchScript inference...")extra_files = {"config.txt": ""}  # model metadatamodel = torch.jit.load(w, _extra_files=extra_files, map_location=device)model.half() if fp16 else model.float()if extra_files["config.txt"]:  # load metadata dictd = json.loads(extra_files["config.txt"],object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()},)stride, names = int(d["stride"]), d["names"]elif dnn:  # ONNX OpenCV DNNLOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")check_requirements("opencv-python>=4.5.4")net = cv2.dnn.readNetFromONNX(w)elif onnx:  # ONNX RuntimeLOGGER.info(f"Loading {w} for ONNX Runtime inference...")check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))import onnxruntimeproviders = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]session = onnxruntime.InferenceSession(w, providers=providers)output_names = [x.name for x in session.get_outputs()]meta = session.get_modelmeta().custom_metadata_map  # metadataif "stride" in meta:stride, names = int(meta["stride"]), eval(meta["names"])elif xml:  # OpenVINOLOGGER.info(f"Loading {w} for OpenVINO inference...")check_requirements("openvino>=2023.0")  # requires openvino-dev: https://pypi.org/project/openvino-dev/from openvino.runtime import Core, Layout, get_batchcore = Core()if not Path(w).is_file():  # if not *.xmlw = next(Path(w).glob("*.xml"))  # get *.xml file from *_openvino_model dirov_model = core.read_model(model=w, weights=Path(w).with_suffix(".bin"))if ov_model.get_parameters()[0].get_layout().empty:ov_model.get_parameters()[0].set_layout(Layout("NCHW"))batch_dim = get_batch(ov_model)if batch_dim.is_static:batch_size = batch_dim.get_length()ov_compiled_model = core.compile_model(ov_model, device_name="AUTO")  # AUTO selects best available devicestride, names = self._load_metadata(Path(w).with_suffix(".yaml"))  # load metadataelif engine:  # TensorRTLOGGER.info(f"Loading {w} for TensorRT inference...")import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-downloadcheck_version(trt.__version__, "7.0.0", hard=True)  # require tensorrt>=7.0.0if device.type == "cpu":device = torch.device("cuda:0")Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))logger = trt.Logger(trt.Logger.INFO)with open(w, "rb") as f, trt.Runtime(logger) as runtime:model = runtime.deserialize_cuda_engine(f.read())context = model.create_execution_context()bindings = OrderedDict()output_names = []fp16 = False  # default updated belowdynamic = Falseis_trt10 = not hasattr(model, "num_bindings")num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)for i in num:if is_trt10:name = model.get_tensor_name(i)dtype = trt.nptype(model.get_tensor_dtype(name))is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUTif is_input:if -1 in tuple(model.get_tensor_shape(name)):  # dynamicdynamic = Truecontext.set_input_shape(name, tuple(model.get_profile_shape(name, 0)[2]))if dtype == np.float16:fp16 = Trueelse:  # outputoutput_names.append(name)shape = tuple(context.get_tensor_shape(name))else:name = model.get_binding_name(i)dtype = trt.nptype(model.get_binding_dtype(i))if model.binding_is_input(i):if -1 in tuple(model.get_binding_shape(i)):  # dynamicdynamic = Truecontext.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))if dtype == np.float16:fp16 = Trueelse:  # outputoutput_names.append(name)shape = tuple(context.get_binding_shape(i))im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())batch_size = bindings["images"].shape[0]  # if dynamic, this is instead max batch sizeelif coreml:  # CoreMLLOGGER.info(f"Loading {w} for CoreML inference...")import coremltools as ctmodel = ct.models.MLModel(w)elif saved_model:  # TF SavedModelLOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")import tensorflow as tfkeras = False  # assume TF1 saved_modelmodel = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxtLOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")import tensorflow as tfdef wrap_frozen_graph(gd, inputs, outputs):"""Wraps a TensorFlow GraphDef for inference, returning a pruned function."""x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrappedge = x.graph.as_graph_elementreturn x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))def gd_outputs(gd):"""Generates a sorted list of graph outputs excluding NoOp nodes and inputs, formatted as '<name>:0'."""name_list, input_list = [], []for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDefname_list.append(node.name)input_list.extend(node.input)return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))gd = tf.Graph().as_graph_def()  # TF GraphDefwith open(w, "rb") as f:gd.ParseFromString(f.read())frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_pythontry:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpufrom tflite_runtime.interpreter import Interpreter, load_delegateexcept ImportError:import tensorflow as tfInterpreter, load_delegate = (tf.lite.Interpreter,tf.lite.experimental.load_delegate,)if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtimeLOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[platform.system()]interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])else:  # TFLiteLOGGER.info(f"Loading {w} for TensorFlow Lite inference...")interpreter = Interpreter(model_path=w)  # load TFLite modelinterpreter.allocate_tensors()  # allocateinput_details = interpreter.get_input_details()  # inputsoutput_details = interpreter.get_output_details()  # outputs# load metadatawith contextlib.suppress(zipfile.BadZipFile):with zipfile.ZipFile(w, "r") as model:meta_file = model.namelist()[0]meta = ast.literal_eval(model.read(meta_file).decode("utf-8"))stride, names = int(meta["stride"]), meta["names"]elif tfjs:  # TF.jsraise NotImplementedError("ERROR: YOLOv5 TF.js inference is not supported")elif paddle:  # PaddlePaddleLOGGER.info(f"Loading {w} for PaddlePaddle inference...")check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle")import paddle.inference as pdiif not Path(w).is_file():  # if not *.pdmodelw = next(Path(w).rglob("*.pdmodel"))  # get *.pdmodel file from *_paddle_model dirweights = Path(w).with_suffix(".pdiparams")config = pdi.Config(str(w), str(weights))if cuda:config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)predictor = pdi.create_predictor(config)input_handle = predictor.get_input_handle(predictor.get_input_names()[0])output_names = predictor.get_output_names()elif triton:  # NVIDIA Triton Inference ServerLOGGER.info(f"Using {w} as Triton Inference Server...")check_requirements("tritonclient[all]")from utils.triton import TritonRemoteModelmodel = TritonRemoteModel(url=w)nhwc = model.runtime.startswith("tensorflow")else:raise NotImplementedError(f"ERROR: {w} is not a supported format")# class namesif "names" not in locals():names = yaml_load(data)["names"] if data else {i: f"class{i}" for i in range(999)}if names[0] == "n01440764" and len(names) == 1000:  # ImageNetnames = yaml_load(ROOT / "data/ImageNet.yaml")["names"]  # human-readable namesself.__dict__.update(locals())  # assign all variables to self# 定义前向传播函数def forward(self, im, augment=False, visualize=False):"""Performs YOLOv5 inference on input images with options for augmentation and visualization."""b, ch, h, w = im.shape  # batch, channel, height, widthif self.fp16 and im.dtype != torch.float16:im = im.half()  # to FP16if self.nhwc:im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)# 我们用这种模型即可if self.pt:  # PyTorchy = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)elif self.jit:  # TorchScripty = self.model(im)elif self.dnn:  # ONNX OpenCV DNNim = im.cpu().numpy()  # torch to numpyself.net.setInput(im)y = self.net.forward()elif self.onnx:  # ONNX Runtimeim = im.cpu().numpy()  # torch to numpyy = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})elif self.xml:  # OpenVINOim = im.cpu().numpy()  # FP32y = list(self.ov_compiled_model(im).values())elif self.engine:  # TensorRTif self.dynamic and im.shape != self.bindings["images"].shape:i = self.model.get_binding_index("images")self.context.set_binding_shape(i, im.shape)  # reshape if dynamicself.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)for name in self.output_names:i = self.model.get_binding_index(name)self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))s = self.bindings["images"].shapeassert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"self.binding_addrs["images"] = int(im.data_ptr())self.context.execute_v2(list(self.binding_addrs.values()))y = [self.bindings[x].data for x in sorted(self.output_names)]elif self.coreml:  # CoreMLim = im.cpu().numpy()im = Image.fromarray((im[0] * 255).astype("uint8"))# im = im.resize((192, 320), Image.BILINEAR)y = self.model.predict({"image": im})  # coordinates are xywh normalizedif "confidence" in y:box = xywh2xyxy(y["coordinates"] * [[w, h, w, h]])  # xyxy pixelsconf, cls = y["confidence"].max(1), y["confidence"].argmax(1).astype(np.float)y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)else:y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)elif self.paddle:  # PaddlePaddleim = im.cpu().numpy().astype(np.float32)self.input_handle.copy_from_cpu(im)self.predictor.run()y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]elif self.triton:  # NVIDIA Triton Inference Servery = self.model(im)else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)im = im.cpu().numpy()if self.saved_model:  # SavedModely = self.model(im, training=False) if self.keras else self.model(im)elif self.pb:  # GraphDefy = self.frozen_func(x=self.tf.constant(im))else:  # Lite or Edge TPUinput = self.input_details[0]int8 = input["dtype"] == np.uint8  # is TFLite quantized uint8 modelif int8:scale, zero_point = input["quantization"]im = (im / scale + zero_point).astype(np.uint8)  # de-scaleself.interpreter.set_tensor(input["index"], im)self.interpreter.invoke()y = []for output in self.output_details:x = self.interpreter.get_tensor(output["index"])if int8:scale, zero_point = output["quantization"]x = (x.astype(np.float32) - zero_point) * scale  # re-scaley.append(x)y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixelsif isinstance(y, (list, tuple)):return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]else:return self.from_numpy(y)# 张量转numpy函数def from_numpy(self, x):"""Converts a NumPy array to a torch tensor, maintaining device compatibility."""return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else xdef warmup(self, imgsz=(1, 3, 640, 640)):"""Performs a single inference warmup to initialize model weights, accepting an `imgsz` tuple for image size."""warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.tritonif any(warmup_types) and (self.device.type != "cpu" or self.triton):im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # inputfor _ in range(2 if self.jit else 1):  #self.forward(im)  # warmup# 模型类型类 有预训练权重以及后期正式训练的一些权重@staticmethoddef _model_type(p="path/to/model.pt"):"""Determines model type from file path or URL, supporting various export formats.Example: path='path/to/model.onnx' -> type=onnx"""# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]from export import export_formatsfrom utils.downloads import is_url# 导出格式,取前缀sf = list(export_formats().Suffix)  # export suffixesif not is_url(p, check=False):check_suffix(p, sf)  # checksurl = urlparse(p)  # if url may be Triton inference servertypes = [s in Path(p).name for s in sf]types[8] &= not types[9]  # tflite &= not edgetputriton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])return types + [triton]# 导入元数据@staticmethoddef _load_metadata(f=Path("path/to/meta.yaml")):"""Loads metadata from a YAML file, returning strides and names if the file exists, otherwise `None`."""if f.exists():d = yaml_load(f)return d["stride"], d["names"]  # assign stride, namesreturn None, None# 数据预处理类
class AutoShape(nn.Module):"""AutoShape class for robust YOLOv5 inference with preprocessing, NMS, and support for various input formats."""conf = 0.25  # NMS confidence thresholdiou = 0.45  # NMS IoU thresholdagnostic = False  # NMS class-agnosticmulti_label = False  # NMS multiple labels per boxclasses = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogsmax_det = 1000  # maximum number of detections per imageamp = False  # Automatic Mixed Precision (AMP) inference# 初始化操作def __init__(self, model, verbose=True):"""Initializes YOLOv5 model for inference, setting up attributes and preparing model for evaluation."""super().__init__()if verbose:LOGGER.info("Adding AutoShape... ")copy_attr(self, model, include=("yaml", "nc", "hyp", "names", "stride", "abc"), exclude=())  # copy attributesself.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instanceself.pt = not self.dmb or model.pt  # PyTorch modelself.model = model.eval()if self.pt:m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()m.inplace = False  # Detect.inplace=False for safe multithread inferencem.export = True  # do not output loss values# 运用函数def _apply(self, fn):"""Applies to(), cpu(), cuda(), half() etc.to model tensors excluding parameters or registered buffers."""self = super()._apply(fn)if self.pt:m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()m.stride = fn(m.stride)m.grid = list(map(fn, m.grid))if isinstance(m.anchor_grid, list):m.anchor_grid = list(map(fn, m.anchor_grid))return self@smart_inference_mode()def forward(self, ims, size=640, augment=False, profile=False):"""Performs inference on inputs with optional augment & profiling.Supports various formats including file, URI, OpenCV, PIL, numpy, torch."""# For size(height=640, width=1280), RGB images example inputs are:#   file:        ims = 'data/images/zidane.jpg'  # str or PosixPath#   URI:             = 'https://ultralytics.com/images/zidane.jpg'#   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)#   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)#   numpy:           = np.zeros((640,1280,3))  # HWC#   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)#   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of imagesdt = (Profile(), Profile(), Profile())with dt[0]:# 尺寸拓展if isinstance(size, int):  # expandsize = (size, size)p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # paramautocast = self.amp and (p.device.type != "cpu")  # Automatic Mixed Precision (AMP) inferenceif isinstance(ims, torch.Tensor):  # torchwith amp.autocast(autocast):return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference# Pre-process# 数据预处理===获取数量以及图片集合n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of imagesshape0, shape1, files = [], [], []  # image and inference shapes, filenamesfor i, im in enumerate(ims):f = f"image{i}"  # filenameif isinstance(im, (str, Path)):  # filename or uriim, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im), imim = np.asarray(exif_transpose(im))elif isinstance(im, Image.Image):  # PIL Imageim, f = np.asarray(exif_transpose(im)), getattr(im, "filename", f) or ffiles.append(Path(f).with_suffix(".jpg").name)if im.shape[0] < 5:  # image in CHWim = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)#  取三个通道图像数据,如果是三维图像,否则将灰度图像处理为BGR格式im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch inputs = im.shape[:2]  # HWC# 将图像形状数据放到shape0中shape0.append(s)  # image shape# 取最大尺寸与图像尺寸的比值g = max(size) / max(s)  # gain# shap1列表存放预处理后的图像形状shape1.append([int(y * g) for y in s])# 获取图像信息ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # updateshape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)]  # inf shapex = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad# 维度转换x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHWx = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32with amp.autocast(autocast):# Inferencewith dt[1]:y = self.model(x, augment=augment)  # forward# Post-processwith dt[2]:y = non_max_suppression(y if self.dmb else y[0],self.conf,self.iou,self.classes,self.agnostic,self.multi_label,max_det=self.max_det,)  # NMSfor i in range(n):scale_boxes(shape1, y[i][:, :4], shape0[i])# 初始化一些图像信息return Detections(ims, y, files, dt, self.names, x.shape)class Detections:"""Manages YOLOv5 detection results with methods for visualization, saving, cropping, and exporting detections."""def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):"""Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""super().__init__()# 获取设备信息d = pred[0].device  # devicegn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizationsself.ims = ims  # list of images as numpy arrays# 预测框信息self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)self.names = names  # class namesself.files = files  # image filenamesself.times = times  # profiling timesself.xyxy = pred  # xyxy pixelsself.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixelsself.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalizedself.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalizedself.n = len(self.pred)  # number of images (batch size)self.t = tuple(x.t / self.n * 1e3 for x in times)  # timestamps (ms)self.s = tuple(shape)  # inference BCHW shapedef _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path("")):"""Executes model predictions, displaying and/or saving outputs with optional crops and labels."""s, crops = "", []for i, (im, pred) in enumerate(zip(self.ims, self.pred)):s += f"\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} "  # stringif pred.shape[0]:for c in pred[:, -1].unique():n = (pred[:, -1] == c).sum()  # detections per classs += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to strings = s.rstrip(", ")if show or save or render or crop:annotator = Annotator(im, example=str(self.names))for *box, conf, cls in reversed(pred):  # xyxy, confidence, classlabel = f"{self.names[int(cls)]} {conf:.2f}"if crop:file = save_dir / "crops" / self.names[int(cls)] / self.files[i] if save else Nonecrops.append({"box": box,"conf": conf,"cls": cls,"label": label,"im": save_one_box(box, im, file=file, save=save),})else:  # all others# 盒子标签annotator.box_label(box, label if labels else "", color=colors(cls))im = annotator.imelse:s += "(no detections)"im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from npif show:if is_jupyter():from IPython.display import displaydisplay(im)else:im.show(self.files[i])if save:f = self.files[i]im.save(save_dir / f)  # saveif i == self.n - 1:LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")if render:self.ims[i] = np.asarray(im)if pprint:s = s.lstrip("\n")return f"{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}" % self.tif crop:if save:LOGGER.info(f"Saved results to {save_dir}\n")return crops@TryExcept("Showing images is not supported in this environment")def show(self, labels=True):"""Displays detection results with optional labels.Usage: show(labels=True)"""self._run(show=True, labels=labels)  # show resultsdef save(self, labels=True, save_dir="runs/detect/exp", exist_ok=False):"""Saves detection results with optional labels to a specified directory.Usage: save(labels=True, save_dir='runs/detect/exp', exist_ok=False)"""save_dir = increment_path(save_dir, exist_ok, mkdir=True)  # increment save_dirself._run(save=True, labels=labels, save_dir=save_dir)  # save resultsdef crop(self, save=True, save_dir="runs/detect/exp", exist_ok=False):"""Crops detection results, optionally saves them to a directory.Args: save (bool), save_dir (str), exist_ok (bool)."""save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else Nonereturn self._run(crop=True, save=save, save_dir=save_dir)  # crop resultsdef render(self, labels=True):"""Renders detection results with optional labels on images; args: labels (bool) indicating label inclusion."""self._run(render=True, labels=labels)  # render resultsreturn self.imsdef pandas(self):"""Returns detections as pandas DataFrames for various box formats (xyxy, xyxyn, xywh, xywhn).Example: print(results.pandas().xyxy[0])."""new = copy(self)  # return copyca = "xmin", "ymin", "xmax", "ymax", "confidence", "class", "name"  # xyxy columnscb = "xcenter", "ycenter", "width", "height", "confidence", "class", "name"  # xywh columnsfor k, c in zip(["xyxy", "xyxyn", "xywh", "xywhn"], [ca, ca, cb, cb]):a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # updatesetattr(new, k, [pd.DataFrame(x, columns=c) for x in a])return newdef tolist(self):"""Converts a Detections object into a list of individual detection results for iteration.Example: for result in results.tolist():"""r = range(self.n)  # iterablereturn [Detections([self.ims[i]],[self.pred[i]],[self.files[i]],self.times,self.names,self.s,)for i in r]def print(self):"""Logs the string representation of the current object's state via the LOGGER."""LOGGER.info(self.__str__())def __len__(self):"""Returns the number of results stored, overrides the default len(results)."""return self.ndef __str__(self):"""Returns a string representation of the model's results, suitable for printing, overrides defaultprint(results)."""return self._run(pprint=True)  # print resultsdef __repr__(self):"""Returns a string representation of the YOLOv5 object, including its class and formatted results."""return f"YOLOv5 {self.__class__} instance\n" + self.__str__()class Proto(nn.Module):"""YOLOv5 mask Proto module for segmentation models, performing convolutions and upsampling on input tensors."""def __init__(self, c1, c_=256, c2=32):"""Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""super().__init__()self.cv1 = Conv(c1, c_, k=3)self.upsample = nn.Upsample(scale_factor=2, mode="nearest")self.cv2 = Conv(c_, c_, k=3)self.cv3 = Conv(c_, c2)def forward(self, x):"""Performs a forward pass using convolutional layers and upsampling on input tensor `x`."""return self.cv3(self.cv2(self.upsample(self.cv1(x))))# 分类模块
class Classify(nn.Module):"""YOLOv5 classification head with convolution, pooling, and dropout layers for channel transformation."""def __init__(self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0):  # ch_in, ch_out, kernel, stride, padding, groups, dropout probability"""Initializes YOLOv5 classification head with convolution, pooling, and dropout layers for input to outputchannel transformation."""super().__init__()c_ = 1280  # efficientnet_b0 sizeself.conv = Conv(c1, c_, k, s, autopad(k, p), g)self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)self.drop = nn.Dropout(p=dropout_p, inplace=True)self.linear = nn.Linear(c_, c2)  # to x(b,c2)def forward(self, x):"""Processes input through conv, pool, drop, and linear layers; supports list concatenation input."""if isinstance(x, list):x = torch.cat(x, 1)return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))

总的来说没那么复杂

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.rhkb.cn/news/462916.html

如若内容造成侵权/违法违规/事实不符,请联系长河编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

获取Windows计算机信息的一些常用命令

一、获取Windoiws计算机基本信息 1.1、获取系统详细信息 # systeminfo的详细用法帮助命令 systeminfo /? 通过获取系统信息可以了解系统版本内容、硬件信息、域信息和补丁情况。 systeminfo 1.2、获取系统已经启动的服务 # net的详细用法帮助命令 net /? 1.2.1、获取系…

通过 codespaces + ipad 来进行算法训练

目录 零、前言 一、环境搭建 二、DockerFile 2.1 主要流程 2.2 个人模板 零、前言 最近遇到了翘不了的水课&#xff0c;想在课上写题&#xff0c;但是游戏本一个是太重&#xff0c;一个是续航不行&#xff0c;然后想到了在Ipad 上通过云IDE来码题。 一开始用的腾讯云的 C…

部署Prometheus、Grafana、Zipkin、Kiali监控度量Istio

1. 模块简介 Prometheus 是一个开源的监控系统和时间序列数据库。Istio 使用 Prometheus 来记录指标&#xff0c;跟踪 Istio 和网格中的应用程序的健康状况。Grafana 是一个用于分析和监控的开放平台。Grafana 可以连接到各种数据源&#xff0c;并使用图形、表格、热图等将数据…

深入理解Redis的四种模式

Redis是一个内存数据存储系统&#xff0c;支持多种不同的部署模式。以下是Redis的四种主要部署模式。 1、单机模式 单机模式是最简单的部署模式&#xff0c;Redis将数据存储在单个节点上。这个节点包括一个Redis进程和一个持久化存储。单机模式非常适合小型应用程序或者开发和…

uln2003驱动28BYJ-48步进电机

欢迎入群共同学习交流 时间记录&#xff1a;2024/11/2 一、模块解析 1.uln2003 E脚&#xff1a;接GND COM脚&#xff1a;接VCC外部电源 1-7B&#xff1a;输入引脚 1-7C&#xff1a;输出引脚&#xff0c;输入与输出反向 无法输出高电平&#xff0c;外围电路需要接上拉电路…

使用 PyCharm 构建 FastAPI 项目:零基础入门 Web API 开发

使用 PyCharm 构建 FastAPI 项目&#xff1a;零基础入门 Web API 开发 本文提供了一份完整的 FastAPI 入门指南&#xff0c;涵盖从环境搭建、依赖安装到创建并运行一个简单的 FastAPI 应用的各个步骤。通过 FastAPI 和 Uvicorn&#xff0c;开发者可以快速构建现代化的 Web API…

SAP ABAP开发学习——BAPI

目录 业务对象 概念 ​编辑业务对象浏览 BAPI BAPI的浏览 BAPI的调用 BAPI的确认和返回 BAPI的创建 MM/SD常用BAPI 附加&#xff1a;长文本修改 业务对象 概念 业务对象浏览 进入SWO3查看 双击BUS2012 双击下图上方红色位置可以看到BAPI方法的内容 BAPI BAPI(Busines…

《高频电子线路》 —— 电感三端LC振荡器

文章内容来源于【中国大学MOOC 华中科技大学通信&#xff08;高频&#xff09;电子线路精品公开课】&#xff0c;此篇文章仅作为笔记分享。 电感三端LC振荡器 基本原理&#xff08;哈特莱电路&#xff09; 在高频下直流电阻对交流电相阻抗无穷大&#xff0c;相当于开路。谐振回…

它真能替代Express?tinyhttp用速度和轻量征服开发者

它真能替代Express&#xff1f;tinyhttp用速度和轻量征服开发者 如果你是个 Express 粉丝&#xff0c;又经常为它的历史遗留问题头疼&#xff0c;那么有个好消息要告诉你&#xff1a;tinyhttp 来啦&#xff01;这款专注于轻量、快速的 Web 框架正在以一种更现代的方式挑战 Expr…

【时间之外】IT人求职和创业应知【25】

目录 新闻一&#xff1a;AI流量变现财富峰会在深圳举办 新闻二&#xff1a;江苏省加快释放数据要素价值&#xff0c;推动数据产业发展 新闻三&#xff1a;全国大中城市巡回招聘温州站&#xff08;民营企业专场&#xff09;举办 认知决定你的赚钱能力。以下是今天可能影响你求…

qt QGroupBox详解

1、概述 QGroupBox是Qt框架中的一个容器控件&#xff0c;主要用于组织和管理一组相关的控件&#xff08;如按钮、复选框、文本框等&#xff09;&#xff0c;并为这些控件提供一个框架和标题。通过使用QGroupBox&#xff0c;可以创建具有逻辑分组和视觉层次结构的用户界面&…

从 vue 源码看问题 — vue 初始化都做了什么事?

前言 最近想要对 Vue2 源码进行学习&#xff0c;主要目的就是为了后面在学习 Vue3 源码时&#xff0c;可以有一个更好的对比和理解&#xff0c;所以这个系列暂时不会涉及到 Vue3 的内容&#xff0c;但是 Vue3 的核心模块和 Vue2 是一致的&#xff0c;只是在实现上改变了方式、…

nginx系列--(一)--调试环境搭建

辅助脚本&#xff1a; #!/bin/bash mkdir -p $(pwd)/nginxhome # 生成 Makefile,--prefix need a absolute path --with-stream表示要包括stream模块 auto/configure --prefix$(pwd)/nginxhome --with-stream # lsof -i tcp:10086 && fuser -k 10086/tcp ||true # 定…

Qt/C++地图导航app/支持qml/手机运行/输入起点终点规划路径/模拟轨迹移动

一、前言说明 搞Qt地图开发这块&#xff0c;随着研究的深入&#xff0c;用户的需求变化&#xff0c;最近又需要在手机上运行&#xff0c;由于本地图组件依赖浏览器控件&#xff0c;而手机安卓上的Qt并没有带qwebengine控件&#xff0c;怎么办呢&#xff0c;不断的努力验证下&a…

使用VS Code 安装VUE.js开发环境的搭建并创建第一个项目

初步掌握VUE.js开发环境的搭建并创建第一个项目的操作方法和实验步骤 题目 安装Visual Studio Code。安装VS Code汉化插件。安装Vue官方支持插件。使用VS Code运行第一个HTML页面。安装Node.js并验证其版本。验证npm版本。配置npm的下载镜像源。配置Yarn的下载镜像源。使用Vi…

记本地第一次运行seatunnel示例项目

前置 静态源码编译通过&#xff1a;https://blog.csdn.net/u011924665/article/details/143372464 参考 seatunnel官方的开发环境搭建文档&#xff1a;https://seatunnel.incubator.apache.org/zh-CN/docs/2.3.5/contribution/setup 安装scala 下载scala 去官网下载&…

Maven下载安装配置(环境、本地仓库、阿里云、jdk、idea)(Win11)

目录 Maven3.9.9工具参考下载安装配置环境变量配置验证是否安装完成本地仓库位置存放配置阿里云镜像加速配置jdk版本 配置 idea Maven3.9.9 工具 系统&#xff1a;Windows 11 环境&#xff1a;JDK-8 软件&#xff1a;IDEA-2024.2.1 参考 本人写的《JDK安装与环境配置&#…

袁庭新陕西理工大学演讲——AIGC时代面临的机遇与挑战

大家好&#xff0c;我是袁庭新。分享一篇我在陕西理工大学给计算机专业、人工智能专业和网络工程专业的演讲内容。 各位计算机学院的小伙伴们&#xff0c;大家好啊&#xff01;欢迎各位来到今天的分享会&#xff0c;非常荣幸能在这里和大家相聚。今天在这里&#xff0c;我要与大…

以客户为导向在开源 AI 智能名片 2 + 1 链动模式 S2B2C 商城小程序内容创作中的实践与价值

摘要&#xff1a;本文深入探讨了在开源 AI 智能名片 2 1 链动模式 S2B2C 商城小程序相关内容创作中以客户为导向的方法和意义。阐述了如何在创作过程中通过与客户对话和转换客户视角来优化内容&#xff0c;以提升该小程序在市场中的竞争力和用户接受度。 一、引言 在数字化商…

QT——TCP网络调试助手

目录 一.项目展示 ​编辑 二.开发流程 三.QTcpServer、QTcpSocket、QUdpSocket类的学习 1.QTcpServer服务端 2.QTcpSocket客户端 3.Udp通信 四.网络调试助手 1.首先我们实现当用户选择不同协议类型时不同的UI组件如何切换 2.实现打开/关闭按键图片的切换 方式一&…