OpenMMlab导出Mask R-CNN模型并用onnxruntime和tensorrt推理

onnxruntime推理

使用mmdeploy导出onnx模型:

from mmdeploy.apis import torch2onnx
from mmdeploy.backend.sdk.export_info import export2SDKimg = 'demo.JPEG'
work_dir = './work_dir/onnx/mask_rcnn'
save_file = './end2end.onnx'
deploy_cfg = 'mmdeploy/configs/mmdet/detection/detection_onnxruntime_dynamic.py'
model_cfg = 'mmdetection/configs/mask_rcnn/mask-rcnn_r50_fpn_ms-poly-3x_coco.py'
model_checkpoint = 'checkpoints/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth'
device = 'cpu'# 1. convert model to onnx
torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, model_checkpoint, device)# 2. extract pipeline info for sdk use (dump-info)
export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device)

可以用netron查看onnx模型的结构:
在这里插入图片描述
也可以通过下面的代码尝试手动导出onnx模型:

import cv2
import torch
import torch.nn.functional as F
import torchvision
import numpy as np
from mmdet.apis import init_detector, inference_detector
from mmengine.config import Config
from mmcv.ops import batched_nms 
from mmcv.ops.point_sample import bilinear_grid_sample
from mmdet.structures.bbox import bbox2roi
from mmdet.models.layers import multiclass_nmsconfig_file = './configs/mask_rcnn/mask-rcnn_r50_fpn_ms-poly-3x_coco.py'
checkpoint_file = '../checkpoints/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth'
img_meta = {'batch_input_shape': (800, 800), 'pad_shape': (800, 800), 'ori_shape': (800, 800), 'scale_factor': (1.0, 1.0), 'img_shape': (800, 800)}
test_cfg = Config({'score_thr': 0.05, 'nms': {'type': 'nms', 'iou_threshold': 0.5}, 'max_per_img': 100, 'mask_thr_binary': 0.5})
num_classes = 80
nms_pre = 1000class MaskRCNN(torch.nn.Module):   def __init__(self):super().__init__()self.model = init_detector(config_file, checkpoint_file, device='cpu')self.base_sizes = [4, 8, 16, 32, 64]self.ratios = torch.tensor([0.5000, 1.0000, 2.0000])self.scales = torch.tensor([8.])self.strides = [(4, 4), (8, 8), (16, 16), (32, 32), (64, 64)]self.base_anchors = self.gen_base_anchors()def gen_base_anchors(self):multi_level_base_anchors = []for i, base_size in enumerate(self.base_sizes):multi_level_base_anchors.append(self.gen_single_level_base_anchors(base_size, scales=self.scales, ratios=self.ratios))return multi_level_base_anchorsdef gen_single_level_base_anchors(self, base_size, scales, ratios):w = base_sizeh = base_sizeh_ratios = torch.sqrt(ratios)w_ratios = 1 / h_ratiosws = (w * w_ratios[:, None] * scales[None, :]).view(-1)hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)base_anchors = [-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs]base_anchors = torch.stack(base_anchors, dim=-1)return base_anchorsdef _meshgrid(self, x, y):xx = x.repeat(y.shape[0])yy = y.view(-1, 1).repeat(1, x.shape[0]).view(-1)return xx, yydef single_level_grid_priors(self, featmap_size, level_idx, dtype: torch.dtype = torch.float32):base_anchors = self.base_anchors[level_idx].to(dtype)feat_h, feat_w = featmap_sizestride_w, stride_h = self.strides[level_idx]shift_x = torch.arange(0, feat_w).to(dtype) * stride_wshift_y = torch.arange(0, feat_h).to(dtype) * stride_hshift_xx, shift_yy = self._meshgrid(shift_x, shift_y)shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)all_anchors = base_anchors[None, :, :] + shifts[:, None, :]all_anchors = all_anchors.view(-1, 4)return all_anchorsdef grid_priors(self, featmap_sizes, dtype):multi_level_anchors = []for i in range(len(featmap_sizes)):anchors = self.single_level_grid_priors(featmap_sizes[i], level_idx=i, dtype=dtype)multi_level_anchors.append(anchors)return multi_level_anchorsdef predict_by_feat(self, cls_scores, bbox_preds):num_levels = len(cls_scores)featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]mlvl_priors = self.grid_priors(featmap_sizes,dtype=cls_scores[0].dtype)cls_score_list = [cls_scores[i][0].detach() for i in range(len(cls_scores))]bbox_pred_list = [bbox_preds[i][0].detach() for i in range(len(bbox_preds))]mlvl_bbox_preds = []mlvl_valid_priors = []mlvl_scores = []level_ids = []for level_idx, (cls_score, bbox_pred, priors) in enumerate(zip(cls_score_list, bbox_pred_list, mlvl_priors)):bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)cls_score = cls_score.permute(1, 2, 0).reshape(-1, 1)scores = cls_score.sigmoid()scores = torch.squeeze(scores)if 0 < nms_pre < scores.shape[0]:_, topk_inds = scores.topk(nms_pre)# ranked_scores, rank_inds = scores.sort(descending=True)# topk_inds = rank_inds[:nms_pre]# scores = ranked_scores[:nms_pre]scores = scores[topk_inds]bbox_pred = bbox_pred[topk_inds, :]priors = priors[topk_inds]mlvl_bbox_preds.append(bbox_pred)mlvl_valid_priors.append(priors)mlvl_scores.append(scores)level_ids.append(scores.new_full((scores.size(0), ), level_idx, dtype=torch.long))bbox_pred = torch.cat(mlvl_bbox_preds)priors = torch.cat(mlvl_valid_priors, dim=0)bboxes = self.model.rpn_head.bbox_coder.decode(priors, bbox_pred, max_shape=img_meta['img_shape'])scores = torch.cat(mlvl_scores)level_ids = torch.cat(level_ids)keep_idxs = torchvision.ops.batched_nms(bboxes, scores, level_ids, 0.7)bboxes = bboxes[keep_idxs][:nms_pre]priors = priors[keep_idxs][:nms_pre]scores = scores[keep_idxs][:nms_pre]return bboxes, scores, keep_idxsdef multiclass_nms(self, multi_bboxes, multi_scores, score_thr, nms_cfg, max_num, box_dim):num_classes = multi_scores.size(1) - 1bboxes = multi_bboxes.view(multi_scores.size(0), -1, box_dim)scores = multi_scores[:, :-1]labels = torch.arange(num_classes, dtype=torch.long, device=scores.device)labels = labels.view(1, -1).expand_as(scores)bboxes = bboxes.reshape(-1, box_dim)scores = scores.reshape(-1)labels = labels.reshape(-1)valid_mask = scores > score_thrinds = valid_mask.nonzero(as_tuple=False).squeeze(1)bboxes, scores, labels = bboxes[inds], scores[inds], labels[inds]#dets, keep = batched_nms(bboxes, scores, labels, test_cfg['nms'])keep = torchvision.ops.batched_nms(bboxes, scores, labels, test_cfg['nms']['iou_threshold'])dets = torch.cat([bboxes[keep], torch.unsqueeze(scores[keep], 1)], -1)if keep.shape[0] > max_num:dets = dets[:max_num]keep = keep[:max_num]return dets, labels[keep]def predict_bbox(self, x, rpn_results_list):proposals = [rpn_results_list[0]]rois = bbox2roi(proposals)bbox_results = self.model.roi_head._bbox_forward(x, rois)cls_scores = bbox_results['cls_score']bbox_preds = bbox_results['bbox_pred']scores = F.softmax(cls_scores, dim=-1)img_shape = img_meta['img_shape']num_rois = rois.size(0)#roi = rois[0].repeat_interleave(num_classes, dim=0)roi = rois.repeat(1, num_classes).view(-1, rois.shape[1])bbox_pred = bbox_preds.view(-1, 4)bboxes = self.model.roi_head.bbox_head.bbox_coder.decode(roi[..., 1:], bbox_pred, max_shape=img_shape)box_dim = bboxes.size(-1)bboxes = bboxes.view(num_rois, -1)#det_bboxes, det_labels = multiclass_nms(bboxes, scores, test_cfg['score_thr'], test_cfg['nms'], test_cfg['max_per_img'], box_dim)det_bboxes, det_labels = self.multiclass_nms(bboxes, scores, test_cfg['score_thr'], test_cfg['nms'], test_cfg['max_per_img'], box_dim)return det_bboxes[:, :-1], det_bboxes[:, -1], det_labelsdef _do_paste_mask(self, masks, boxes, img_h, img_w):device = masks.devicex0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to(dtype=torch.int32)x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)x0, y0, x1, y1 = boxes[0][0].reshape(1, 1), boxes[0][1].reshape(1, 1), boxes[0][2].reshape(1, 1), boxes[0][3].reshape(1, 1) #torch.split(boxes, 1, dim=1)  # each is Nx1N = masks.shape[0]img_y = torch.arange(y0_int, y1_int, device=device).to(torch.float32) + 0.5img_x = torch.arange(x0_int, x1_int, device=device).to(torch.float32) + 0.5img_y = (img_y - y0) / (y1 - y0) * 2 - 1img_x = (img_x - x0) / (x1 - x0) * 2 - 1#if not torch.onnx.is_in_onnx_export():if torch.isinf(img_x).any():inds = torch.where(torch.isinf(img_x))img_x[inds] = 0if torch.isinf(img_y).any():inds = torch.where(torch.isinf(img_y))img_y[inds] = 0gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))grid = torch.stack([gx, gy], dim=3)#img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False)img_masks = bilinear_grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False)return img_masks[:, 0], (slice(y0_int.to(dtype=torch.int64), y1_int.to(dtype=torch.int64)), slice(x0_int.to(dtype=torch.int64), x1_int.to(dtype=torch.int64)))def mask_roi_extractor(self, feats, rois):out_size = self.model.roi_head.mask_roi_extractor.roi_layers[0].output_sizenum_levels = len(feats)roi_feats = feats[0].new_zeros(rois.size(0), self.model.roi_head.mask_roi_extractor.out_channels, *out_size)target_lvls = self.model.roi_head.mask_roi_extractor.map_roi_levels(rois, num_levels)for i in range(num_levels):mask = target_lvls == iinds = mask.nonzero(as_tuple=False).squeeze(1)rois_ = rois[inds]roi_feats_t = self.model.roi_head.mask_roi_extractor.roi_layers[i](feats[i], rois_)roi_feats[inds] = roi_feats_treturn roi_featsdef predict_mask(self, x, img_meta, results_list):bboxes = results_list[0]scores = results_list[1]labels = results_list[2]rois = bbox2roi([bboxes])# mask_results = self.model.roi_head._mask_forward(x, rois)mask_feats = self.mask_roi_extractor(x[:4], rois)mask_preds = self.model.roi_head.mask_head(mask_feats)scale_factor = bboxes.new_tensor(img_meta['scale_factor']).repeat((1, 2))img_h, img_w = img_meta['ori_shape'][:2]mask_preds = mask_preds.sigmoid()bboxes /= scale_factor#return bboxes, scores, labels, mask_predsnum_chunks = len(mask_preds)chunks = torch.chunk(torch.arange(num_chunks), num_chunks)threshold = test_cfg['mask_thr_binary']im_mask = torch.zeros(num_chunks, img_h, img_w, dtype=torch.bool)mask_preds = mask_preds[range(num_chunks), labels][:, None]for inds in chunks:masks_chunk, spatial_inds = self._do_paste_mask(mask_preds[inds], bboxes[inds], img_h, img_w)masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)im_mask[(inds, ) + spatial_inds] = masks_chunkreturn bboxes, scores, labels, im_maskdef forward(self, x):x = self.model.backbone(x)x = self.model.neck(x)outs = self.model.rpn_head(x)#rpn_results_list = self.model.rpn_head.predict_by_feat(*outs, batch_img_metas=batch_img_metas, rescale=False)rpn_results_list = self.predict_by_feat(*outs)#results_list = self.model.roi_head.predict_bbox(x, batch_img_metas, rpn_results_list, rcnn_test_cfg=test_cfg, rescale=False)results_list = self.predict_bbox(x, rpn_results_list)#results_list = self.model.roi_head.predict_mask(x, batch_img_metas, results_list, rescale=True)results_list = self.predict_mask(x, img_meta, results_list)#np.savetxt("my.txt",results_list[-1].detach().numpy().reshape(-1,results_list[-1].shape[-1]),fmt="%.02f")return results_listmodel = MaskRCNN().eval()
input = torch.zeros(1, 3, 800, 800, device='cpu')  
torch.onnx.export(model, input, "./mmdetection/maskrcnn.onnx", opset_version=11)# import onnx
# from onnxsim import simplify
# onnx_model = onnx.load("maskrcnn.onnx")  # load onnx modell
# model_simp, check = simplify(onnx_model)
# assert check, "Simplified ONNX model could not be validated"
# onnx.save(model_simp, "maskrcnn_sim.onnx")

导出的模型结构如下:在这里插入图片描述
但是存在一些问题,如无法输入动态尺寸,推理速度较慢等,故下面推理代码中的模型均通过mmdeploy导出的模型:

python推理代码:

import cv2
import numpy as np
import onnxruntime
import json
import random
import torch
import torch.nn.functional as F
from pathlib import Path
from copy import deepcopyresize_shape = (1333, 800) confidence_threshold = 0.5mask_threshold = 0.5def filter_box(outputs, scale): flag = outputs[0][..., 4] > confidence_threshold #删除置信度小于confidence_threshold的detboxes = outputs[0][flag] boxes[..., [0, 2]] *= scale[1]boxes[..., [1, 3]] *= scale[0]class_ids = outputs[1][flag].reshape(-1, 1) outputs[2] = np.expand_dims(outputs[2][flag], axis=1)output = np.concatenate((boxes, class_ids), axis=1)  return outputdef resize_keep_ratio(image, img_scale):h, w = image.shape[0], image.shape[1]max_long_edge = max(img_scale)max_short_edge = min(img_scale)scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))scale_w = int(w * float(scale_factor ) + 0.5)scale_h = int(h * float(scale_factor ) + 0.5)img_new = cv2.resize(image, (scale_w, scale_h))return img_newdef draw_boxes(image, box_data):boxes = box_data[...,:4].astype(np.int32) scores = box_data[...,4]classes = box_data[...,5].astype(np.int32)   for box, score, cl in zip(boxes, scores, classes):x1, y1, x2, y2 = boxcv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 1)cv2.putText(image, 'class{0} {1:.6f}'.format(cl, score), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)def draw_binary_masks(img, binary_masks, alphas=0.5):binary_masks = binary_masks.astype('uint8') * 255alphas = [alphas] * binary_masks.shape[0]for binary_mask, alpha in zip(binary_masks, alphas):binary_mask_complement = cv2.bitwise_not(binary_mask)rgb = np.zeros_like(img)rgb[...] = [random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)]rgb = cv2.bitwise_and(rgb, rgb, mask=binary_mask)img_complement = cv2.bitwise_and(img, img, mask=binary_mask_complement)rgb = rgb + img_complementimg = cv2.addWeighted(img, 1 - alpha, rgb, alpha, 0)cv2.imwrite("result.jpg", img)if __name__=="__main__":onnx_session = onnxruntime.InferenceSession('end2end.onnx', providers=['CPUExecutionProvider'])input_name = []for node in onnx_session.get_inputs():input_name.append(node.name)output_name=[]for node in onnx_session.get_outputs():output_name.append(node.name)image = cv2.imread("bus.jpg")image_resize = resize_keep_ratio(image, resize_shape) scale = (image.shape[0]/image_resize.shape[0], image.shape[1]/image_resize.shape[1])pad_shape = (np.ceil(image_resize.shape[1]/32)*32, np.ceil(image_resize.shape[0]/32)*32) pad_x, pad_y = int(pad_shape[0]-image_resize.shape[1]), int(pad_shape[1]-image_resize.shape[0])image_pad = cv2.copyMakeBorder(image_resize, 0, pad_y, 0, pad_x, cv2.BORDER_CONSTANT, value=0)input = image_pad[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHWinput[0,:] = (input[0,:] - 123.675) / 58.395   input[1,:] = (input[1,:] - 116.28) / 57.12input[2,:] = (input[2,:] - 103.53) / 57.375# tensor = torch.from_numpy(input) #(3, 750, 1333)# input = F.pad(tensor, (0, pad_x, 0, pad_y, 0, 0), value=0).numpy()   input = np.expand_dims(input, axis=0)inputs = {} for name in input_name:inputs[name] = inputoutputs = onnx_session.run(None, inputs)boxes = filter_box(outputs, scale)masks = torch.from_numpy(outputs[2]) x0, y0, x1, y1 = torch.split(torch.from_numpy(boxes[..., :4]), 1, dim=1)img_y = torch.arange(0, image.shape[0]).to(torch.float32) + 0.5img_x = torch.arange(0, image.shape[1]).to(torch.float32) + 0.5img_y = (img_y - y0) / (y1 - y0) * 2 - 1img_x = (img_x - x0) / (x1 - x0) * 2 - 1gx = img_x[:, None, :].expand(masks.shape[0], img_y.size(1), img_x.size(1))gy = img_y[:, :, None].expand(masks.shape[0], img_y.size(1), img_x.size(1))grid = torch.stack([gx, gy], dim=3).to(torch.float32) img_masks = F.grid_sample(masks.to(torch.float32), grid, align_corners=False) masks_chunk = img_masks[:, 0]masks_chunk = (masks_chunk >= mask_threshold).to(dtype=torch.bool)mask_results = masks_chunk.detach().cpu().numpy()draw_boxes(image, boxes)draw_binary_masks(image, mask_results)

tensorrt推理

使用mmdeploy导出engine模型:

from mmdeploy.apis import torch2onnx
from mmdeploy.backend.tensorrt.onnx2tensorrt import onnx2tensorrt
from mmdeploy.backend.sdk.export_info import export2SDK
import osimg = 'bus.jpg'
work_dir = './work_dir/trt/maskrcnn'
save_file = './end2end.onnx'
deploy_cfg = './configs/mmdet/instance-seg/instance-seg_tensorrt_static-768x1344.py'
model_cfg = '../mmdetection-3.3.0/configs/mask_rcnn/mask-rcnn_r50_fpn_ms.py'
model_checkpoint = 'checkpoints/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth'
device = 'cuda'torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, model_checkpoint, device)# 2. convert IR to tensorrt
onnx_model = os.path.join(work_dir, save_file)
save_file = 'end2end.engine'
model_id = 0
device = 'cuda'
onnx2tensorrt(work_dir, save_file, model_id, deploy_cfg, onnx_model, device)# 3. extract pipeline info for sdk use (dump-info)
export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device)

手动编写tensorrt推理脚本:

import cv2
import ctypes
import numpy as np
import tensorrt as trt
import pycuda.autoinit 
import pycuda.driver as cuda  
import random
import torch
import torch.nn.functional as F
from pathlib import Path
import pycocotools.mask as mask_utilresize_shape = (1333, 800)  confidence_threshold = 0.5mask_threshold = 0.5def filter_box(outputs, scale): flag = outputs[0][..., 4] > confidence_threshold #删除置信度小于confidence_threshold的detboxes = outputs[0][flag] boxes[..., [0, 2]] *= scale[1]boxes[..., [1, 3]] *= scale[0]class_ids = outputs[1][flag].reshape(-1, 1) outputs[2] = np.expand_dims(outputs[2][flag], axis=1)output = np.concatenate((boxes, class_ids), axis=1)  return outputdef resize_keep_ratio(image, size):height, width = image.shape[0], image.shape[1]width_new, height_new = sizeif width / height >= width_new / height_new:img_new = cv2.resize(image, (width_new, round(height * width_new / width)))else:img_new = cv2.resize(image, (round(width * height_new / height), height_new))return img_newdef draw_boxes(image, box_data):boxes = box_data[...,:4].astype(np.int32) scores = box_data[...,4]classes = box_data[...,5].astype(np.int32)   for box, score, cl in zip(boxes, scores, classes):x1, y1, x2, y2 = boxcv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 1)cv2.putText(image, 'class{0} {1:.6f}'.format(cl, score), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)def draw_binary_masks(img, binary_masks, alphas=0.2):binary_masks = binary_masks.astype('uint8') * 255alphas = [alphas] * binary_masks.shape[0]for binary_mask, alpha in zip(binary_masks, alphas):binary_mask_complement = cv2.bitwise_not(binary_mask)rgb = np.zeros_like(img)rgb[...] = [random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)]rgb = cv2.bitwise_and(rgb, rgb, mask=binary_mask)img_complement = cv2.bitwise_and(img, img, mask=binary_mask_complement)rgb = rgb + img_complementimg = cv2.addWeighted(img, 1 - alpha, rgb, alpha, 0)cv2.imwrite("result.jpg", img)if __name__=="__main__":ctypes.cdll.LoadLibrary('mmdeploy_tensorrt_ops.dll')logger = trt.Logger(trt.Logger.WARNING) with open("end2end.engine", "rb") as f, trt.Runtime(logger) as runtime:engine = runtime.deserialize_cuda_engine(f.read())context = engine.create_execution_context()h_input = cuda.pagelocked_empty(trt.volume((1, 3, 768, 1344)), dtype=np.float32)h_output0 = cuda.pagelocked_empty(trt.volume((1, 100, 5)), dtype=np.float32)h_output1 = cuda.pagelocked_empty(trt.volume((1, 100)), dtype=np.int32)h_output2 = cuda.pagelocked_empty(trt.volume((1, 100, 28, 28)), dtype=np.float32)d_input = cuda.mem_alloc(h_input.nbytes)d_output0 = cuda.mem_alloc(h_output0.nbytes)d_output1 = cuda.mem_alloc(h_output1.nbytes)d_output2 = cuda.mem_alloc(h_output2.nbytes)stream = cuda.Stream()image = cv2.imread('bus.jpg')image_resize = resize_keep_ratio(image, resize_shape) scale = (image.shape[0]/image_resize.shape[0], image.shape[1]/image_resize.shape[1])pad_shape = (np.ceil(image_resize.shape[1]/32)*32, np.ceil(image_resize.shape[0]/32)*32) pad_x, pad_y = int(pad_shape[0]-image_resize.shape[1]), int(pad_shape[1]-image_resize.shape[0])input = image_resize[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHWinput[0,:] = (input[0,:] - 123.675) / 58.395   input[1,:] = (input[1,:] - 116.28) / 57.12input[2,:] = (input[2,:] - 103.53) / 57.375tensor = torch.from_numpy(input) #(3, 800, 1280)input = F.pad(tensor, (0, pad_x, 0, pad_y, 0, 0), value=0).numpy()   input = np.expand_dims(input, axis=0)h_input = input.flatten()with engine.create_execution_context() as context:context.set_input_shape("input", (1, 3, 768, 1344))cuda.memcpy_htod_async(d_input, h_input, stream)context.execute_async_v2(bindings=[int(d_input), int(d_output0), int(d_output1), int(d_output2)], stream_handle=stream.handle)cuda.memcpy_dtoh_async(h_output0, d_output0, stream)cuda.memcpy_dtoh_async(h_output1, d_output1, stream)cuda.memcpy_dtoh_async(h_output2, d_output2, stream)stream.synchronize()  h_output = []h_output.append(h_output0.reshape(1, 100, 5))h_output.append(h_output1.reshape(1, 100))h_output.append(h_output2.reshape(1, 100, 28, 28))boxes = filter_box(h_output, scale)masks = torch.from_numpy(h_output[2]) #torch.Size([1, 1, 28, 28])   x0, y0, x1, y1 = torch.split(torch.from_numpy(boxes[..., :4]), 1, dim=1)img_y = torch.arange(0, image.shape[0]).to(torch.float32) + 0.5img_x = torch.arange(0, image.shape[1]).to(torch.float32) + 0.5img_y = (img_y - y0) / (y1 - y0) * 2 - 1img_x = (img_x - x0) / (x1 - x0) * 2 - 1gx = img_x[:, None, :].expand(masks.shape[0], img_y.size(1), img_x.size(1))gy = img_y[:, :, None].expand(masks.shape[0], img_y.size(1), img_x.size(1))grid = torch.stack([gx, gy], dim=3).to(torch.float32) img_masks = F.grid_sample(masks.to(torch.float32), grid, align_corners=False) masks_chunk = img_masks[:, 0]masks_chunk = (masks_chunk >= mask_threshold).to(dtype=torch.bool)mask_results = masks_chunk.detach().cpu().numpy()draw_boxes(image, boxes)draw_binary_masks(image, mask_results)

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.rhkb.cn/news/475987.html

如若内容造成侵权/违法违规/事实不符,请联系长河编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

【大语言模型】ACL2024论文-19 SportsMetrics: 融合文本和数值数据以理解大型语言模型中的信息融合

【大语言模型】ACL2024论文-19 SportsMetrics: 融合文本和数值数据以理解大型语言模型中的信息融合 https://arxiv.org/pdf/2402.10979 目录 文章目录 【大语言模型】ACL2024论文-19 SportsMetrics: 融合文本和数值数据以理解大型语言模型中的信息融合目录摘要研究背景问题与挑…

39页PDF | 毕马威_数据资产运营白皮书(限免下载)

一、前言 《毕马威数据资产运营白皮书》探讨了数据作为新型生产要素在企业数智化转型中的重要性&#xff0c;提出了数据资产运营的“三要素”&#xff08;组织与意识、流程与规范、平台与工具&#xff09;和“四重奏”&#xff08;数据资产盘点、评估、治理、共享&#xff09;…

【UE5】使用基元数据对材质传参,从而避免新建材质实例

在项目中&#xff0c;经常会遇到这样的需求&#xff1a;多个模型&#xff08;例如 100 个&#xff09;使用相同的材质&#xff0c;但每个模型需要不同的参数设置&#xff0c;比如不同的颜色或随机种子等。 在这种情况下&#xff0c;创建 100 个实例材质不是最佳选择。正确的做…

[STBC]

空时分组编码STBC&#xff08;Space Time Block Coding&#xff09;: //一个数据流通过多个天线发射发送&#xff0c;硬件编码器 STBC概念是从MIMO技术衍生出来的&#xff0c;目的是在多天线系统中提高数据传输的可靠性和传输距离。在rx&#xff08;接收天线&#xff09;和tx&…

241120学习日志——[CSDIY] [InternStudio] 大模型训练营 [09]

CSDIY&#xff1a;这是一个非科班学生的努力之路&#xff0c;从今天开始这个系列会长期更新&#xff0c;&#xff08;最好做到日更&#xff09;&#xff0c;我会慢慢把自己目前对CS的努力逐一上传&#xff0c;帮助那些和我一样有着梦想的玩家取得胜利&#xff01;&#xff01;&…

PCB 间接雷击模拟

雷击是一种危险的静电放电事件&#xff0c;其中两个带电区域会瞬间释放高达 1 千兆焦耳的能量。雷击就像一个短暂而巨大的电流脉冲&#xff0c;会对建筑物和电子设备造成严重损坏。雷击可分为直接和间接两类&#xff0c;其中间接影响是由于感应能量耦合到靠近雷击位置的物体。间…

IDEA2019搭建Springboot项目基于java1.8 解决Spring Initializr无法创建jdk1.8项目 注释乱码

后端界面搭建 将 https://start.spring.io/ 替换https://start.aliyun.com/ 报错 打开设置 修改如下在这里插入代码片 按此方法无果 翻阅治疗后得知 IDEA2019无法按照网上教程修改此问题因此更新最新idea2024或利用插件Alibaba Clouod Toolkit 换用IDEA2024创建项目 下一步…

单向C to DP视频传输解决方案 | LDR6500

LDR6500D如何通过Type-C接口实现手机到DP接口的单向视频传输 在当今数字化浪潮中&#xff0c;投屏技术作为连接设备、共享视觉内容的桥梁&#xff0c;其重要性日益凸显。PD&#xff08;Power Delivery&#xff09;芯片&#xff0c;特别是集成了Type-C接口与DisplayPort&#xf…

Leetcode 第 143 场双周赛题解

Leetcode 第 143 场双周赛题解 Leetcode 第 143 场双周赛题解题目1&#xff1a;3345. 最小可整除数位乘积 I思路代码复杂度分析 题目2&#xff1a;3346. 执行操作后元素的最高频率 I思路代码复杂度分析 题目3&#xff1a;3347. 执行操作后元素的最高频率 II题目4&#xff1a;33…

Spark 之 Aggregate

Aggregate 参考链接&#xff1a; https://github.com/PZXWHU/SparkSQL-Kernel-Profiling 完整的聚合查询的关键字包括 group by、 cube、 grouping sets 和 rollup 4 种 。 分组语句 group by 后面可以是一个或多个分组表达式&#xff08; groupingExpressions &#xff09;…

【IDEA】解决总是自动导入全部类(.*)问题

文章目录 问题描述解决方法 我是一名立志把细节说清楚的博主&#xff0c;欢迎【关注】&#x1f389; ~ 原创不易&#xff0c; 如果有帮助 &#xff0c;记得【点赞】【收藏】 哦~ ❥(^_-)~ 如有错误、疑惑&#xff0c;欢迎【评论】指正探讨&#xff0c;我会尽可能第一时间回复…

如何快速将Excel数据导入到SQL Server数据库

工作中&#xff0c;我们经常需要将Excel数据导入到数据库&#xff0c;但是对于数据库小白来说&#xff0c;这可能并非易事&#xff1b;对于数据库专家来说&#xff0c;这又可能非常繁琐。 这篇文章将介绍如何帮助您快速的将Excel数据导入到sql server数据库。 准备工作 这里&…

在centos7中安装SqlDeveloper的Oracle可视化工具

1.下载安装包 &#xff08;1&#xff09;在SqlDeveloper官网下载&#xff08;Oracle SQL Developer Release 19.2 - Get Started&#xff09;对应版本的安装包即可&#xff08;安装包和安装命令如下&#xff09;&#xff1a; &#xff08;2&#xff09;执行完上述命令后&#x…

【动手学深度学习Pytorch】4. 神经网络基础

模型构造 回顾一下感知机。 nn.Sequential()&#xff1a;定义了一种特殊的module。 torch.rand()&#xff1a;用于生成具有均匀分布的随机数&#xff0c;这些随机数的范围在[0, 1)之间。它接受一个形状参数&#xff08;shape&#xff09;&#xff0c;返回一个指定形状的张量&am…

Spring Boot + Vue 基于 RSA 的用户身份认证加密机制实现

Spring Boot Vue 基于 RSA 的用户身份认证加密机制实现 什么是RSA&#xff1f;安全需求介绍前后端交互流程前端使用 RSA 加密密码安装 jsencrypt库实现敏感信息加密 服务器端生成RSA的公私钥文件Windows环境 生成rsa的公私钥文件Linux环境 生成rsa的公私钥文件 后端代码实现返…

一键部署 200+ 开源软件的 Websoft9 面板,Github 2k+ 星星

Websoft9面板是一款基于Web的PaaS/Linux面板&#xff0c;可用于在自己的服务器上一键部署200多种热门开源应用&#xff0c;在Github上获得了2k星星。 特点与优势 丰富的开源软件集成&#xff1a;涵盖数据库、Web服务器、企业建站、电商系统、教育系统、中间件、大数据工具等多…

NLP论文速读(MPO)|通过混合偏好优化提高多模态大型语言模型的推理能力

论文速读|Dynamic Rewarding with Prompt Optimization Enables Tuning-free Self-Alignment of Language Models 论文信息&#xff1a; 简介&#xff1a; 本文探讨的背景是多模态大型语言模型&#xff08;MLLMs&#xff09;在多模态推理能力上的局限性&#xff0c;尤其是在链式…

动态规划子数组系列一>等差数列划分

题目&#xff1a; 解析&#xff1a; 代码&#xff1a; public int numberOfArithmeticSlices(int[] nums) {int n nums.length;int[] dp new int[n];int ret 0;for(int i 2; i < n; i){dp[i] nums[i] - nums[i-1] nums[i-1] - nums[i-2] ? dp[i-1]1 : 0;ret dp[i…

用 React18 构建Tic-Tac-Toe(井字棋)游戏

下面是一个完整的 Tic-Tac-Toe&#xff08;井字棋&#xff09;游戏的实现&#xff0c;用 React 构建。包括核心逻辑和组件分离&#xff0c;支持两人对战。 1. 初始化 React 项目&#xff1a; npx create-react-app tic-tac-toe cd tic-tac-toe2.文件结构 src/ ├── App.js…

前端—Cursor编辑器

在当今快速发展的软件开发领域&#xff0c;效率和质量是衡量一个工具是否优秀的两个关键指标。今天&#xff0c;我要向大家推荐一款革命性的代码编辑器——Cursor&#xff0c;它集成了强大的AI功能&#xff0c;旨在提高开发者的编程效率。以下是Cursor编辑器的详细介绍和推荐理…