effidehead_lite.py
yolov6\models\heads\effidehead_lite.py
目录
effidehead_lite.py
1.所需的库和模块
2.class Detect(nn.Module):
3.def build_effidehead_layer(channels_list, num_anchors, num_classes, num_layers):
1.所需的库和模块
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from yolov6.layers.common import DPBlock
from yolov6.assigners.anchor_generator import generate_anchors
from yolov6.utils.general import dist2bbox
2.class Detect(nn.Module):
class Detect(nn.Module):# 高效分离头。# 利用硬件感知设计,使用混合通道方法对解耦头进行优化。'''Efficient Decoupled HeadWith hardware-aware degisn, the decoupled head is optimized withhybridchannels methods.'''def __init__(self, num_classes=80, num_layers=3, inplace=True, head_layers=None): # detection layer 检测层super().__init__()assert head_layers is not Noneself.nc = num_classes # number of classes 类别数量self.no = num_classes + 5 # number of outputs per anchor 每个锚点的输出数量self.nl = num_layers # number of detection layers 检测层数self.grid = [torch.zeros(1)] * num_layersself.prior_prob = 1e-2self.inplace = inplacestride = [8, 16, 32] if num_layers == 3 else [8, 16, 32, 64] # strides computed during build 构建期间计算的步长self.stride = torch.tensor(stride)self.grid_cell_offset = 0.5self.grid_cell_size = 5.0# Init decouple head 初始化解耦头self.stems = nn.ModuleList()self.cls_convs = nn.ModuleList()self.reg_convs = nn.ModuleList()self.cls_preds = nn.ModuleList()self.reg_preds = nn.ModuleList()# Efficient decoupled head layers 高效解耦的头部层for i in range(num_layers):idx = i*5self.stems.append(head_layers[idx])self.cls_convs.append(head_layers[idx+1])self.reg_convs.append(head_layers[idx+2])self.cls_preds.append(head_layers[idx+3])self.reg_preds.append(head_layers[idx+4])# 它用于初始化神经网络中特定层的偏置(biases)。这个方法特别针对于类别预测层( self.cls_preds )和边界框回归预测层( self.reg_preds )的偏置和权重进行初始化。# 接受 self 作为参数,表示类的实例。def initialize_biases(self):# 遍历所有类别预测层, self.cls_preds 是一个包含卷积层的列表。for conv in self.cls_preds:# 获取当前卷积层的偏置,并将其展平为一维张量。b = conv.bias.view(-1, )# 使用逻辑斯谛分布的公式来初始化偏置值。这里的 self.prior_prob 是一个先验概率,通常用于目标检测中表示目标存在的概率。这个公式确保了在开始训练时,模型对目标的存在与否持中立态度。b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))# 将初始化后的偏置值重新设置为卷积层的偏置,并确保它们是可训练的( requires_grad=True )。conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)# 获取当前卷积层的权重。w = conv.weight# 将权重初始化为0。w.data.fill_(0.)# 将权重重新设置为卷积层的权重,并确保它们是可训练的。conv.weight = torch.nn.Parameter(w, requires_grad=True)# 遍历所有边界框回归预测层, self.reg_preds 是一个包含卷积层的列表。for conv in self.reg_preds:# 获取当前卷积层的偏置,并将其展平为一维张量。b = conv.bias.view(-1, )# 将偏置值初始化为1.0,这是因为在边界框回归中,我们通常希望预测的边界框与真实边界框的中心点对齐。b.data.fill_(1.0)# 将初始化后的偏置值重新设置为卷积层的偏置,并确保它们是可训练的。conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)# 获取当前卷积层的权重。w = conv.weight# 将权重初始化为0。w.data.fill_(0.)# 将权重重新设置为卷积层的权重,并确保它们是可训练的。conv.weight = torch.nn.Parameter(w, requires_grad=True)def forward(self, x):if self.training:cls_score_list = []reg_distri_list = []for i in range(self.nl):x[i] = self.stems[i](x[i])cls_x = x[i]reg_x = x[i]cls_feat = self.cls_convs[i](cls_x)cls_output = self.cls_preds[i](cls_feat)reg_feat = self.reg_convs[i](reg_x)reg_output = self.reg_preds[i](reg_feat)cls_output = torch.sigmoid(cls_output)cls_score_list.append(cls_output.flatten(2).permute((0, 2, 1)))reg_distri_list.append(reg_output.flatten(2).permute((0, 2, 1)))cls_score_list = torch.cat(cls_score_list, axis=1)reg_distri_list = torch.cat(reg_distri_list, axis=1)return x, cls_score_list, reg_distri_listelse:cls_score_list = []reg_dist_list = []# def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.5, device='cpu', is_eval=False, mode='af'):# -> 根据特征生成锚点。# -> return anchor_points, stride_tensor / return anchors, anchor_points, num_anchors_list, stride_tensoranchor_points, stride_tensor = generate_anchors(x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True, mode='af')for i in range(self.nl):b, _, h, w = x[i].shapel = h * wx[i] = self.stems[i](x[i])cls_x = x[i]reg_x = x[i]cls_feat = self.cls_convs[i](cls_x)cls_output = self.cls_preds[i](cls_feat)reg_feat = self.reg_convs[i](reg_x)reg_output = self.reg_preds[i](reg_feat)cls_output = torch.sigmoid(cls_output)cls_score_list.append(cls_output.reshape([b, self.nc, l]))reg_dist_list.append(reg_output.reshape([b, 4, l]))cls_score_list = torch.cat(cls_score_list, axis=-1).permute(0, 2, 1)reg_dist_list = torch.cat(reg_dist_list, axis=-1).permute(0, 2, 1)# def dist2bbox(distance, anchor_points, box_format='xyxy'): -> 将距离(ltrb)转换为盒子(xywh或xyxy)。 -> return bboxpred_bboxes = dist2bbox(reg_dist_list, anchor_points, box_format='xywh')pred_bboxes *= stride_tensorreturn torch.cat([pred_bboxes,torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype),cls_score_list],axis=-1)
3.def build_effidehead_layer(channels_list, num_anchors, num_classes, num_layers):
def build_effidehead_layer(channels_list, num_anchors, num_classes, num_layers):head_layers = nn.Sequential(# stem0DPBlock(in_channel=channels_list[0],out_channel=channels_list[0],kernel_size=5,stride=1),# cls_conv0DPBlock(in_channel=channels_list[0],out_channel=channels_list[0],kernel_size=5,stride=1),# reg_conv0DPBlock(in_channel=channels_list[0],out_channel=channels_list[0],kernel_size=5,stride=1),# cls_pred0nn.Conv2d(in_channels=channels_list[0],out_channels=num_classes * num_anchors,kernel_size=1),# reg_pred0nn.Conv2d(in_channels=channels_list[0],out_channels=4 * num_anchors,kernel_size=1),# stem1DPBlock(in_channel=channels_list[1],out_channel=channels_list[1],kernel_size=5,stride=1),# cls_conv1DPBlock(in_channel=channels_list[1],out_channel=channels_list[1],kernel_size=5,stride=1),# reg_conv1DPBlock(in_channel=channels_list[1],out_channel=channels_list[1],kernel_size=5,stride=1),# cls_pred1nn.Conv2d(in_channels=channels_list[1],out_channels=num_classes * num_anchors,kernel_size=1),# reg_pred1nn.Conv2d(in_channels=channels_list[1],out_channels=4 * num_anchors,kernel_size=1),# stem2DPBlock(in_channel=channels_list[2],out_channel=channels_list[2],kernel_size=5,stride=1),# cls_conv2DPBlock(in_channel=channels_list[2],out_channel=channels_list[2],kernel_size=5,stride=1),# reg_conv2DPBlock(in_channel=channels_list[2],out_channel=channels_list[2],kernel_size=5,stride=1),# cls_pred2nn.Conv2d(in_channels=channels_list[2],out_channels=num_classes * num_anchors,kernel_size=1),# reg_pred2nn.Conv2d(in_channels=channels_list[2],out_channels=4 * num_anchors,kernel_size=1))if num_layers == 4:head_layers.add_module('stem3',# stem3DPBlock(in_channel=channels_list[3],out_channel=channels_list[3],kernel_size=5,stride=1))head_layers.add_module('cls_conv3',# cls_conv3DPBlock(in_channel=channels_list[3],out_channel=channels_list[3],kernel_size=5,stride=1))head_layers.add_module('reg_conv3',# reg_conv3DPBlock(in_channel=channels_list[3],out_channel=channels_list[3],kernel_size=5,stride=1))head_layers.add_module('cls_pred3',# cls_pred3nn.Conv2d(in_channels=channels_list[3],out_channels=num_classes * num_anchors,kernel_size=1))head_layers.add_module('reg_pred3',# reg_pred3nn.Conv2d(in_channels=channels_list[3],out_channels=4 * num_anchors,kernel_size=1))return head_layers