在对生物医学图像进行处理时,不像自然图像有以前固定的数据集格式,比如coco的json格式,voc格式等等,而生物图像样式有点多且不标准,图片格式,npz格式等等。网上转换教程很多,但是其中不乏转换错误的代码,特此来记录,以下代码保证可以正常运行
1. coco格式
1.1 Yolo格式转COCO格式
参考博客:https://blog.csdn.net/ericdiii/article/details/137775872
下方代码来自上方博客(如有侵权立即删除),结合自己数据集需要修改的地方:
(1)图片后缀:默认是tif,根据自己是png,进行替换
(2)图片,标签,存储ann的路径
import json
import glob
import os
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from PIL import Image, ImageDraw, ImageFont
import numpy as npdef calculate_polygon_area(polygon):x = polygon[:, 0]y = polygon[:, 1]return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))def calculate_bounding_box(polygon):x_min = np.min(polygon[:, 0])y_min = np.min(polygon[:, 1])x_max = np.max(polygon[:, 0])y_max = np.max(polygon[:, 1])width = x_max - x_minheight = y_max - y_minreturn [x_min, y_min, width, height]def text_to_json_segmentation(in_labels, in_images, out_json):"""Convert instance segmentation dataset from text files generated by the function 'json_to_text_segmentation'(for YOLO) to a JSON file (for MMdet). This can be applied for Level 0/1/2 (must modify the last code):param in_labels: input folder containing the label text files:param in_images: input folder containing the image files (just for getting the image size):param out_json: output JSON file"""# Initialize the output JSON filedata = dict()data['annotations'] = []data['images'] = []# Initial the number of annotationsnum_annotations = 1 # index starts from 1# Process the text filestxt_files = glob.glob(in_labels + '/*.txt')for k in range(len(txt_files)):# Read the image to get image width and heightimg = Image.open(in_images + '/' + os.path.basename(txt_files[k]).replace('txt', 'tif'))image_width, image_height = img.size# Creates annotation items of the image and append them to the listwith open(txt_files[k]) as f:for line in f:# Get annotation information of each line in the text fileline = [float(x) for x in line.strip().split()]class_id = int(line[0]) + 1 # index starts from 1coordinates = line[1:]polygon = np.array(coordinates).reshape(-1, 2)polygon[:, 0] = polygon[:, 0] * image_widthpolygon[:, 1] = polygon[:, 1] * image_heightarea = calculate_polygon_area(polygon)bbox = calculate_bounding_box(polygon)# Create a new annotation itemann_item = dict()ann_item['segmentation'] = [polygon.flatten().tolist()]ann_item['area'] = areaann_item['iscrowd'] = 0ann_item['image_id'] = k + 1 # index starts from 1ann_item['bbox'] = bboxann_item['category_id'] = class_idann_item['id'] = num_annotationsdata['annotations'].append(ann_item)num_annotations += 1# Create a new image item and append it to the listimg_item = dict()img_item['id'] = k + 1 # index starts from 1img_item['file_name'] = os.path.basename(txt_files[k]).replace('txt', 'tif')img_item['height'] = image_heightimg_item['width'] = image_widthdata['images'].append(img_item)print(os.path.basename(txt_files[k]) + ' done')data['categories'] = [{'supercategory': 'point', 'id': 1, 'name': 'point'}]# Write the dictionary to a JSON fileprint('Writing the data to a JSON file')with open(out_json, 'w') as f:# json.dump(data, f, cls=NpEncoder)# f.write(json.dumps(data, cls=NpEncoder, indent=4))f.write(json.dumps(data, default=int, indent=4))if __name__ == '__main__':# Convert the segmentation text files to JSONtext_to_json_segmentation(in_labels='/home/liuhuan/pointseg/datasetsNormal/receptor/labels/valid',in_images='/home/liuhuan/pointseg/datasetsNormal/receptor/images/valid',out_json='/home/liuhuan/pointseg/datasetsNormal/receptor/annotations/instances_val2017.json')
如何对json标注文件进行检查:
生成的json文件不一定是对的,例如轮廓小于4个像素这种,需要检查,如果有错误,则会报错并打印错误图片的序号,结合自己数据集需要修改的是:
(1)json文件路径
import json
import numpy as np
from tqdm import tqdmdef frPyObjects(i, pyobj):# encode rle from a list of python objectsif type(pyobj) == np.ndarray:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))elif type(pyobj) == list and len(pyobj[0]) == 4:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))elif type(pyobj) == list and len(pyobj[0]) > 4:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))elif type(pyobj) == list and type(pyobj) == dict and 'counts' in pyobj[0] and 'size' in pyobj[0]:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))# encode rle from single python objectelif type(pyobj) == list and len(pyobj[0]) == 4:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))elif type(pyobj) == list and len(pyobj[0]) > 4:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:print("{}, {}, {}".format(i, type(pyobj), len(pyobj[0])))else:print("{}, {}, {}, ERROR".format(i, type(pyobj), len(pyobj[0])))raise Exception('input type is not supported.')def check(pathJson):jsonfile = open(pathJson, "r")jsonObj = json.load(jsonfile)jsonfile.close()for i, instance in tqdm(enumerate(jsonObj["annotations"])):frPyObjects(i, instance["segmentation"])if __name__ == "__main__":pathTrainJson = "/home/liuhuan/pointseg/datasetsNormal/receptor/annotations/instances_train2017.json"pathValJson = "/home/liuhuan/pointseg/datasetsNormal/receptor/annotations/instances_val2017.json"pathTestJson = "/home/liuhuan/pointseg/datasetsNormal/receptor/annotations/instances_test2017.json"check(pathTrainJson)check(pathValJson)check(pathTestJson)# 之后,可以根据上述print()打印出来的不符合条件对象的id,来修改对应的segmentation列表(手动修改即可,不符合条件的数量一般很少)
#json_object["annotations"][1510]["segmentation"] = [[230.83333333333331, 773.8888888888889, 231.83333333333331, 773.8888888888889, 237.22222222222223, 770.5555555555555]]# 将修改后的json文件重新写回到coco_instancestrain.json/coco_instancesval.json中即可
# val_json = open(JSON_LOC, "w")
# json.dump(json_object, val_json)
# val_json.close()
如何对json标注文件结合图片显示,进一步检查正确性
会根据图像和标注对某张图片显示,结合自己数据集需要修改的地方:
(1)图像文件所在文件夹
(2)json标注文件路径
(3)要查看图像文件的名称
import os.path
import numpy as np
import cv2
import json
from tqdm import tqdmdef get_bbox_segs(pathJson, img_name):jsonfile = open(pathJson, "r")jsonObj = json.load(jsonfile)jsonfile.close()# find img_id base img_namejsonImage = jsonObj["images"]i = 0while (jsonImage[i]["file_name"] != img_name):i = i + 1img_id = jsonImage[i]["id"]# find ann base img_idjsonAnn = jsonObj["annotations"]segs = []bboxes = []for i, instance in tqdm(enumerate(jsonAnn)):if instance["image_id"] == img_id:bboxes.append(instance["bbox"])segs.append(instance["segmentation"])return bboxes, segsif __name__ == "__main__":pathImg = "/home/liuhuan/pointseg/datasetsNormal/receptor/images/train"pathJson = "/home/liuhuan/pointseg/datasetsNormal/receptor/annotations/instances_train2017.json"img_name = "0.tif"bboxes, segs = get_bbox_segs(pathJson, img_name)img = cv2.imread(os.path.join(pathImg, img_name))# show bboxes'''for bbox in bboxes:x, y, w, h = bboxcv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)'''# show masksmasks = []for seg in segs:mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)polygon = np.array(seg).reshape((-1, 2)).astype(np.int32)cv2.fillPoly(mask, [polygon], 1)masks.append(mask)for mask in masks:mask_colored = cv2.cvtColor(mask*255, cv2.COLOR_GRAY2BGR)img = cv2.addWeighted(img, 1, mask_colored, 0.5, 0)cv2.imshow('Image', img)cv2.waitKey(0)cv2.destroyAllWindows()