最近在跑代码的时候需要可视化一些网络中间层特征来诊断网络,但是我的backbone是一个3D网络,一般的Grad-CAM都是在2D网络中应用更广泛,查了一下也只有几篇博文是关于3D Grad-CAM的介绍的。自己参照他们的代码试了一下,但是可视化的结果很怪异。
tensor = torch.tensor()
hook = tensor.register_hook(callback_func)
def forward_hook(module, input, output):pass
module = A_Module()
hook = tensor.register_forward_hook(forward_hook)
def backward_hook(module, input, output):pass
module = A_Module()
hook = tensor.register_backward_hook(backward_hook)
- 特征提取:通过获取和分析模块的输入和输出,可以提取中间层的特征表示,用于后续的可视化或分析。
- 网络理解:通过观察不同层次的特征表示,可以帮助研究人员理解模型的学习过程和决策依据,提高模型的可解释性。
- 诊断问题:在回调函数中检查输入和输出,可以识别潜在的问题,如数据异常、层间不匹配等。
import torch
import torch.nn as nnclass SimpleModel(nn.Module):def __init__(self):super(SimpleModel, self).__init__()self.conv = nn.Conv2d(1, 3, kernel_size=3)self.pool = nn.MaxPool2d(2, 2)self.relu = nn.ReLU()def forward(self, x):x = self.conv(x)x = self.pool(x)x = self.relu(x)return xmodel = SimpleModel()def forward_hook(module, inputs, output):print(f"Module: {module}")print(f"Input: {inputs[0].shape}")print(f"Output: {output.shape}")hook = model.conv.register_forward_hook(forward_hook) # 定义hook,hook到model的conv部分的输出。如果想获得relu部分的输出就直接修改为model.relu即可。
input_data = torch.randn(1, 1, 28, 28, requires_grad=True)
output = model(input_data)
Module: Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
Input: torch.Size([1, 1, 28, 28])
Output: torch.Size([1, 3, 26, 26])
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as npdef feature_visualization_hook(module, inputs, output):# 将输出特征图转换为RGB图像output = output.permute(0, 2, 3, 1)feature_map = output.detach().squeeze().numpy()feature_map -= feature_map.min()feature_map /= feature_map.max()feature_map *= 255feature_map = feature_map.astype(np.uint8)# print(feature_map.shape)plt.imshow(feature_map, cmap='gray')plt.title(f"Feature Map at Module {module}")plt.show()class SimpleModel(nn.Module):def __init__(self):super(SimpleModel, self).__init__()self.conv = nn.Conv2d(1, 3, kernel_size=3)self.pool = nn.MaxPool2d(2, 2)self.relu = nn.ReLU()def forward(self, x):x = self.conv(x)x = self.pool(x)x = self.relu(x)return xmodel = SimpleModel()hook = model.conv.register_forward_hook(feature_visualization_hook)
input_data = torch.randn(1, 1, 28, 28, requires_grad=True)
output = model(input_data)
import torch
import torch.nn as nnclass SimpleModel(nn.Module):def __init__(self):super(SimpleModel, self).__init__()self.conv = nn.Conv2d(1, 3, kernel_size=3)self.pool = nn.AdaptiveAvgPool2d(1)self.relu = nn.ReLU()def forward(self, x):x = self.conv(x) # 4 3 26 26x = self.pool(x).squeeze(-1).squeeze(-1) # 4 3x = self.relu(x)return xmodel = SimpleModel()def backward_hook(module, grad_input, grad_output):print(f"Module: {module}")for x in grad_input:if x is None: continueprint(f"Input Gradients: {x.shape}")for x in grad_output:if x is None: continueprint(f"Output Gradients: {x.shape}")hook = model.conv.register_backward_hook(backward_hook)
input_data = torch.randn(4, 1, 28, 28, requires_grad=True)
target_data = torch.randn(4, 1, requires_grad=True)
output = model(input_data)
loss = torch.mean((output - target_data) ** 2)
loss.backward() # 执行完backward后才会hook
Module: Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
Input Gradients: torch.Size([4, 1, 28, 28])
Input Gradients: torch.Size([3, 1, 3, 3])
Input Gradients: torch.Size([3])
Output Gradients: torch.Size([4, 3, 26, 26])
import torch# 创建一个随机张量
x = torch.randn(3, 4, requires_grad=True)
# 定义一个回调函数
def gradient_hook(grad):print(f"Gradient of x: {grad.shape}")
# 在张量x上注册梯度hook
# 创建一个依赖于x的张量y,并进行前向传播计算
y = x ** 2
out = y.mean()
Gradient of x: torch.Size([3, 4])
3D ConvNet尝试
class Block3D(nn.Module):r""" ConvNeXt Block. There are two equivalent implementations:(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute backWe use (2) as we find it slightly faster in PyTorchArgs:dim (int): Number of input channels.drop_path (float): Stochastic depth rate. Default: 0.0layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6."""def __init__(self, dim, drop_path=0., inflate_len=3, layer_scale_init_value=1e-6):super().__init__()self.dwconv = nn.Conv3d(dim, dim, kernel_size=(inflate_len, 7, 7), padding=(inflate_len // 2, 3, 3),groups=dim) # depthwise convself.norm = LayerNorm(dim, eps=1e-6)self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layersself.act = nn.GELU()self.pwconv2 = nn.Linear(4 * dim, dim)self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),requires_grad=True) if layer_scale_init_value > 0 else Noneself.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()self.grad = []def gradient_hook(self, grad):print(f"Gradients: {grad.shape}")self.grad.append(grad)def forward(self, x):input = xx = self.dwconv(x)x.register_hook(self.gradient_hook)x = x.permute(0, 2, 3, 4, 1) # (N, C, H, W) -> (N, H, W, C)x = self.norm(x)x = self.pwconv1(x)x = self.act(x)x = self.pwconv2(x)if self.gamma is not None:x = self.gamma * xx = x.permute(0, 4, 1, 2, 3) # (N, H, W, C) -> (N, C, H, W)x = input + self.drop_path(x)return x
Gradients: torch.Size([1, 768, 4, 7, 7])
Gradients: torch.Size([1, 768, 4, 7, 7])
Gradients: torch.Size([1, 768, 4, 7, 7])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 384, 4, 14, 14])
Gradients: torch.Size([1, 192, 4, 28, 28])
Gradients: torch.Size([1, 192, 4, 28, 28])
Gradients: torch.Size([1, 192, 4, 28, 28])
Gradients: torch.Size([1, 96, 4, 56, 56])
Gradients: torch.Size([1, 96, 4, 56, 56])
Gradients: torch.Size([1, 96, 4, 56, 56])
import torch
from torch.autograd import Variable
from torch.autograd import Function
from torchvision import models
from torchvision import utils
import cv2
import sys
import numpy as np
import argparse
from pytorchvideo.models.resnet import create_resnet
from conv import convnext_3d_tiny, Block3Ddef preprocess_image(img):means=[0.485, 0.456, 0.406]stds=[0.229, 0.224, 0.225]preprocessed_img = img.copy()[: , :, ::-1]for i in range(3):preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]preprocessed_img = \np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))preprocessed_img = torch.from_numpy(preprocessed_img)preprocessed_img.unsqueeze_(0)input = Variable(preprocessed_img, requires_grad = True)return inputif __name__ == '__main__':device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')image_path = "dog-cat.png"model = convnext_3d_tiny().to(device)img = cv2.imread(image_path, 1)img = np.float32(cv2.resize(img, (224, 224))) / 255input = preprocess_image(img)input = input.repeat(8, 1, 1, 1).unsqueeze(0).permute(0, 2, 1, 3, 4).to(device)output = model(input)loss = output.mean()loss.backward()# Access gradients stored in the Block3D layerfor stage_idx, stage in enumerate(model.stages):for block_idx, block in enumerate(stage):if isinstance(block, Block3D):# Print the stored gradients from the hookif block.grad:print(f"Stage {stage_idx}, Block {block_idx} - Gradient Shape: {block.grad[-1].shape}")else:print(f"Stage {stage_idx}, Block {block_idx} - No gradient recorded")
Stage 0, Block 0 - Gradient Shape: torch.Size([1, 96, 4, 56, 56])
Stage 0, Block 1 - Gradient Shape: torch.Size([1, 96, 4, 56, 56])
Stage 0, Block 2 - Gradient Shape: torch.Size([1, 96, 4, 56, 56])
Stage 1, Block 0 - Gradient Shape: torch.Size([1, 192, 4, 28, 28])
Stage 1, Block 1 - Gradient Shape: torch.Size([1, 192, 4, 28, 28])
Stage 1, Block 2 - Gradient Shape: torch.Size([1, 192, 4, 28, 28])
Stage 2, Block 0 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 1 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 2 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 3 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 4 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 5 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 6 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 7 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 2, Block 8 - Gradient Shape: torch.Size([1, 384, 4, 14, 14])
Stage 3, Block 0 - Gradient Shape: torch.Size([1, 768, 4, 7, 7])
Stage 3, Block 1 - Gradient Shape: torch.Size([1, 768, 4, 7, 7])
Stage 3, Block 2 - Gradient Shape: torch.Size([1, 768, 4, 7, 7])
class Block3D(nn.Module):r""" ConvNeXt Block. There are two equivalent implementations:(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute backWe use (2) as we find it slightly faster in PyTorchArgs:dim (int): Number of input channels.drop_path (float): Stochastic depth rate. Default: 0.0layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6."""def __init__(self, dim, drop_path=0., inflate_len=3, layer_scale_init_value=1e-6):super().__init__()self.dwconv = nn.Conv3d(dim, dim, kernel_size=(inflate_len, 7, 7), padding=(inflate_len // 2, 3, 3),groups=dim) # depthwise convself.norm = LayerNorm(dim, eps=1e-6)self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layersself.act = nn.GELU()self.pwconv2 = nn.Linear(4 * dim, dim)self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),requires_grad=True) if layer_scale_init_value > 0 else Noneself.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()self.grad = []self.features = []self.dwconv.register_forward_hook(self.save_features_hook) # 注册hookdef save_features_hook(self, module, input, output):self.features.append(output)def gradient_hook(self, grad):print(f"Gradients: {grad.shape}")self.grad.append(grad)def forward(self, x):input = xx = self.dwconv(x)x.register_hook(self.gradient_hook)x = x.permute(0, 2, 3, 4, 1) # (N, C, H, W) -> (N, H, W, C)x = self.norm(x)x = self.pwconv1(x)x = self.act(x)x = self.pwconv2(x)if self.gamma is not None:x = self.gamma * xx = x.permute(0, 4, 1, 2, 3) # (N, H, W, C) -> (N, C, H, W)x = input + self.drop_path(x)return x
完成这些操纵后,就可以根据grad进行特征加权,并且和原始图像进行叠加。因为我们采用的是一个3D网络,输入的一般是视频数据,大小为 [ b , c , t , w , h ] [b, c, t, w, h] [b,c,t,w,h],我们这里统一选取 t = 0 t=0 t=0作为示例。
if __name__ == '__main__':device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')image_path = "dog-cat.png"model = convnext_3d_tiny().to(device)img = cv2.imread(image_path, 1)img = np.float32(cv2.resize(img, (224, 224))) / 255input = preprocess_image(img)input = input.repeat(8, 1, 1, 1).unsqueeze(0).permute(0, 2, 1, 3, 4).to(device)output = model(input)loss = output.mean()loss.backward()grads = []features = []# Access gradients stored in the Block3D layerlast_grad = Nonefor stage_idx, stage in enumerate(model.stages):for block_idx, block in enumerate(stage):if isinstance(block, Block3D) and block.grad:last_grad = block.grad[-1] # Get the last gradientgrads.append(last_grad)last_feature = block.features[-1]features.append(last_feature)print(f"Stage {stage_idx}, Block {block_idx} - Gradient Shape: {last_grad.shape}")print(f"Stage {stage_idx}, Block {block_idx} - Feature Shape: {last_feature.shape}")for index in range(len(grads)):last_grad = grads[index].cpu().data.numpy()last_feature = features[index].cpu().data.numpy()[0, :]if last_grad is not None:# Calculate the weights of each feature mapweights = np.mean(last_grad, axis=(3, 4))[0, :][:,0]cam = np.zeros(last_feature.shape[2:], dtype=np.float32)for i, w in enumerate(weights):cam += w * last_feature[i, 0, :, :]cam = np.maximum(cam, 0)cam = cv2.resize(cam, (224, 224))cam = cam - np.min(cam)cam = cam / np.max(cam)show_cam_on_image(img, cam, index)