这里使用的源码版本是 mmclassification-0.25.0
训练数据标签文件格式如下,每行的空格前面是路径(图像文件所在的绝对路径),后面是标签名,因为特殊要求这里我的每张图像都记录了三个标签每个标签用“,”分开(具体看自己的需求),我的训练标签数量是17个。
训练参数配置文件,用ResNet作为特征提取主干,多标签分类要使用MultiLabelLinearClsHead作为分类头。数据集的格式使用CustomDataset,并修改该结构的定义文件,后面有详细内容。
# checkpoint saving
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(interval=100,hooks=[dict(type='TextLoggerHook'),# dict(type='TensorboardLoggerHook')])
# yapf:enable
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
optimizer = dict(lr=0.1, momentum=0.9, type='SGD', weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
runner = dict(max_epochs=100, type='EpochBasedRunner')
lr_config = dict(policy='step', step=[30,60,90,])model = dict(type='ImageClassifier',backbone=dict(type='ResNet',depth=18,num_stages=4,out_indices=(3, ),style='pytorch'), neck=dict(type='GlobalAveragePooling'),head=dict(type='MultiLabelLinearClsHead',num_classes=17,in_channels=512,))dataset_type = 'CustomDataset' #'MultiLabelDataset'
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [dict(type='LoadImageFromFile'),dict(type='RandomResizedCrop', size=224),dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),dict(type='Normalize', **img_norm_cfg),dict(type='ImageToTensor', keys=['img']),dict(type='ToTensor', keys=['gt_label']),dict(type='Collect', keys=['img', 'gt_label'])
]
test_pipeline = [dict(type='LoadImageFromFile'),dict(type='Resize', size=(256, -1)),dict(type='CenterCrop', crop_size=224),dict(type='Normalize', **img_norm_cfg),dict(type='ImageToTensor', keys=['img']),dict(type='Collect', keys=['img'])
]data = dict(samples_per_gpu=32,workers_per_gpu=2,train=dict(type=dataset_type,data_prefix='rootpath/images',ann_file='rootpath/train.txt',pipeline=train_pipeline),val=dict(type=dataset_type,data_prefix='rootpath/images',ann_file='rootpath/val.txt',pipeline=test_pipeline),test=dict(type=dataset_type,data_prefix='rootpath/images',ann_file='rootpath/test.txt',pipeline=test_pipeline))evaluation = dict(interval=1, metric='accuracy')
其他需要修改的地方:
1、修改加载数据的格式,将./mmclassification-0.25.0/mmcls/datasets/custom.py的CustomDataset里面的load_annotations函数替换成下面的函数:
###修改成多标签分类数据加载方式###def load_annotations(self):"""Load image paths and gt_labels."""if self.ann_file is None:samples = self._find_samples()elif isinstance(self.ann_file, str):lines = mmcv.list_from_file(self.ann_file, file_client_args=self.file_client_args)samples = [x.strip().rsplit(' ', 1) for x in lines]else:raise TypeError('ann_file must be a str or None')data_infos = []for filename, gt_label in samples:info = {'img_prefix': self.data_prefix}info['img_info'] = {'filename': filename.strip()}temp_label = np.zeros(len(self.CLASSES))# if not self.multi_label:# info['gt_label'] = np.array(gt_label, dtype=np.int64)# else:### multi-label classifyif len(gt_label) == 1:temp_label[np.array(gt_label, dtype=np.int64)] = 1info['gt_label'] = temp_labelelse:for label in gt_label.split(','):i = self.CLASSES.index(label)temp_label[np.array(i, dtype=np.int64)] = 1# for i in range(np.array(gt_label.split(','), dtype=np.int64).shape[0]):# temp_label[np.array(gt_label.split(','), dtype=np.int64)[i]] = 1info['gt_label'] = temp_label# print(info)data_infos.append(info)return data_infos
记得在初始函数__init__里修改成自己要训练的类别:
2、修改评估数据的函数,将./mmclassification-0.25.0/mmcls/models/losses/accuracy.py里面的accuracy_torch函数替换成如下函数。我这里只是增加了一些度量函数,方便可视化多标签的指标情况,并没有更新其他地方,训练时还是会验证原来的指标,里面调用的Metric类可以参考这篇文章:https://blog.csdn.net/u013250861/article/details/122727704
def accuracy_torch(pred, target, topk=(1,), thrs=0.):if isinstance(thrs, Number):thrs = (thrs,)res_single = Trueelif isinstance(thrs, tuple):res_single = Falseelse:raise TypeError(f'thrs should be a number or tuple, but got {type(thrs)}.')res = []maxk = max(topk)num = pred.size(0)pred = pred.float()#### ysn修改,增加对多标签分类的度量函数 ###pred_ = (pred > 0.5).float() # 将 pred 中大于0.5的元素替换为1,其余替换为0# print("pred shape:", pred.shape, "pred:", pred)# # print("pred_ shape:", pred_.shape, "pred_:", pred_)# # print("target shape", target.shape, "target:", target)from mmcls.utils import get_root_loggerlogger = get_root_logger()from sklearn.metrics import classification_reportclass_report = classification_report(target.numpy(), pred_.numpy(), target_names=[“这里可以写成你的训练类型列表,也可以不使用这个参数”]) #分类报告汇总了精确率、召回率和 F1 分数等指标logger.info("\nClassification Report:\n{}".format(class_report))myMetic = Metric(pred_.numpy(), target.numpy())ham = myMetic.hamming_distance()avgPrecision, _ = myMetic.avgPrecision()avgRecall, _, _ = myMetic.avgRecall()ranking_loss = myMetic.get_ranking_loss()accuracy_multiclass = myMetic.accuracy_multiclass()logger.info("\nHam:{}\tAvgPrecision:{}\tAvgRecall:{}\tRanking_loss:{}\tAccuracy_Multilabel:{}".format(ham, avgPrecision, avgRecall, ranking_loss, accuracy_multiclass))####原来的代码###pred_score, pred_label = pred.topk(maxk, dim=1)pred_label = pred_label.t()target = target.argmax(dim=1) ### ysn修改,这里是多标签分类标签列表的格式,单标签分类去掉这一句 ###correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))for k in topk:res_thr = []for thr in thrs:# Only prediction values larger than thr are counted as correct_correct = correct & (pred_score.t() > thr)correct_k = _correct[:k].reshape(-1).float().sum(0, keepdim=True)res_thr.append((correct_k.mul_(100. / num)))if res_single:res.append(res_thr[0])else:res.append(res_thr)return res
3、修改推理部分,将./mmclassification-0.25.0/mmcls/apis/inference.py里面的inference_model函数修改如下,推理多标签时候可以指定输出所有得分阈值大于0.5的所有标签类型。
def inference_model(model, img):"""Inference image(s) with the classifier.Args:model (nn.Module): The loaded classifier.img (str/ndarray): The image filename or loaded image.Returns:result (dict): The classification results that contains`class_name`, `pred_label` and `pred_score`."""cfg = model.cfgdevice = next(model.parameters()).device # model device# build the data pipelineif isinstance(img, str):if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile':cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile'))data = dict(img_info=dict(filename=img), img_prefix=None)else:if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':cfg.data.test.pipeline.pop(0)data = dict(img=img)test_pipeline = Compose(cfg.data.test.pipeline)data = test_pipeline(data)data = collate([data], samples_per_gpu=1)if next(model.parameters()).is_cuda:# scatter to specified GPUdata = scatter(data, [device])[0]# forward the model# with torch.no_grad():# scores = model(return_loss=False, **data)# pred_score = np.max(scores, axis=1)[0]# pred_label = np.argmax(scores, axis=1)[0]# result = {'pred_label': pred_label, 'pred_score': float(pred_score)}# result['pred_class'] = model.CLASSES[result['pred_label']]# return result## ysn修改 ##with torch.no_grad():scores = model(return_loss=False, **data)# print(scores, type(scores), len(scores), len(model.CLASSES))result = {'pred_label':[], 'pred_score': [], 'pred_class':[]}for i in range(len(scores[0])):if scores[0][i]>0.5:result['pred_label'].append(int(i))result['pred_score'].append(float(scores[0][i]))result['pred_class'].append(model.CLASSES[int(i)])else:continuereturn result
通过以上修改,可以成功运行和评估我的多标签分类训练了。
由于我没有找到mmcls官方的训练多标签的训练教程,因此做了上述修改。如果有其他更方便有效的多标签多分类方法或者项目,欢迎在该文章下面留言,非常感谢。
参考文章
https://blog.csdn.net/litt1e/article/details/125316552
https://blog.csdn.net/u013250861/article/details/122727704