论文:CenterFace: Joint Face Detection and Alignment Using Face as Point
Github:https://github.com/Star-Clouds/CenterFace
论文基于centerNet进行改进,提出了anchor free形式的人脸检测框架,可以同时实现人脸检测+关键点检测。精度和速度都优于主流的MTCNN,Face Box等框架。
主要贡献:
- 提出了anchor free的人脸检测设计,将人脸检测问题转化为关键点估计问题。相比之前的检测算法,该方法的的模型输出的下采样率只为4。
- 基于多任务学习策略,同时学习人脸检测+关键点定位
- 网络结构采用了FPN结构
- 大量的实验表明,速度和精度都空前的好
级联检测器的缺点:
- 推理速度受图片中人脸数目的影响,当人脸数目增多的时候,推理速度也会大大降低。
- 每一个模型都单独训练,训练过程繁琐。非端到端的训练模式,整体精度有限。
网络结构:
网络整体结构采用MobileNetV2结构,MobileNetV2进行了5次下采样,在MobileNetV2的最后一层,增加了3个上采样层。最终输出的大小进行了2次下采样,输出维度为原图的1/4。
Loss函数:
人脸分类loss,
其中,α = 2 ,β = 4
人脸框中心点偏移loss,
人脸框宽,高的loss,
关键点的loss,
整体loss,
实验结果:
推理速度,
FDDB精度,
WIDER FACE 精度,
Onnx推理:
Onnx模型格式,可以方便的使用程序进行op的增删改查操作。包括节点的增加,去除,输入输出维度的修改等。同时,基于onnx runtime的推理可以获得比基于pytorch推理略快的速度。缺点就是整个graph已经固定,不支持动态输入大小。
首先使用change_onnx.py修改作者提供的onnx模型的输入维度,
import onnxmodel = onnx.load("../models/onnx/centerface.onnx")# The model is represented as a protobuf structure and it can be accessed
# using the standard python-for-protobuf methods# iterate through inputs of the graph
for input in model.graph.input:print (input.name, end=": ")# get type of input tensortensor_type = input.type.tensor_type# check if it has a shape:if (tensor_type.HasField("shape")):# iterate through dimensions of the shape:for num,d in enumerate(tensor_type.shape.dim):# the dimension may have a definite (integer) value or a symbolic identifier or neither:if (d.HasField("dim_value")):if num ==0:d.dim_value = 1if num ==2:d.dim_value = 480if num ==3:d.dim_value = 640print (d.dim_value, end=", ") # known dimensionelif (d.HasField("dim_param")):print (d.dim_param, end=", ") # unknown dimension with symbolic nameelse:print ("?", end=", ") # unknown dimension with no nameelse:print ("unknown rank", end="")print()break
onnx.checker.check_model(model)
onnx.save(model, 'out.onnx')"""
model = onnx.load('models/centerface.onnx')
model.graph.input[0].type.tensor_type.shape.dim[0].dim_param = '?'
model.graph.input[0].type.tensor_type.shape.dim[1].dim_param = '3'
model.graph.input[0].type.tensor_type.shape.dim[2].dim_param = '?'
model.graph.input[0].type.tensor_type.shape.dim[3].dim_param = '?'
onnx.save(model, 'dynamic_model.onnx')
"""
模型另存在out.onnx,和原始的centerface.onnx的输入维度进行对比,
推理代码,centerface.py,
import numpy as np
import cv2
import datetime
import torch
import onnxruntime
import onnxclass CenterFace(object):def __init__(self, landmarks=True):self.landmarks = landmarksself.session = onnxruntime.InferenceSession("out.onnx")self.inputs = self.session.get_inputs()[0].nameself.outputs = ["537", "538", "539", '540']self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0def __call__(self, img, height, width, threshold=0.5):#self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 480, 640 , 480/height, 640/widthreturn self.inference_opencv(img, threshold)def inference_opencv(self, img, threshold):begin = datetime.datetime.now()image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)image =cv2.resize(image,(self.img_w_new, self.img_h_new))input_image = np.expand_dims(np.swapaxes(np.swapaxes(image,0,2),1,2),0).astype(np.float32)heatmap,scale , offset ,lms = self.session.run(None, {self.inputs: input_image})end = datetime.datetime.now()print("cpu times = ", end - begin)return self.postprocess(heatmap, lms, offset, scale, threshold)def transform(self, h, w):img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)scale_h, scale_w = img_h_new / h, img_w_new / wreturn img_h_new, img_w_new, scale_h, scale_wdef postprocess(self, heatmap, lms, offset, scale, threshold):if self.landmarks:dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)else:dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)if len(dets) > 0:dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_hif self.landmarks:lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_helse:dets = np.empty(shape=[0, 5], dtype=np.float32)if self.landmarks:lms = np.empty(shape=[0, 10], dtype=np.float32)if self.landmarks:return dets, lmselse:return detsdef decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):heatmap = np.squeeze(heatmap)scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]c0, c1 = np.where(heatmap > threshold)if self.landmarks:boxes, lms = [], []else:boxes = []if len(c0) > 0:for i in range(len(c0)):s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]s = heatmap[c0[i], c1[i]]x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)x1, y1 = min(x1, size[1]), min(y1, size[0])boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])if self.landmarks:lm = []for j in range(5):lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)lms.append(lm)boxes = np.asarray(boxes, dtype=np.float32)keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)boxes = boxes[keep, :]if self.landmarks:lms = np.asarray(lms, dtype=np.float32)lms = lms[keep, :]if self.landmarks:return boxes, lmselse:return boxesdef nms(self, boxes, scores, nms_thresh):x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]areas = (x2 - x1 + 1) * (y2 - y1 + 1)order = np.argsort(scores)[::-1]num_detections = boxes.shape[0]suppressed = np.zeros((num_detections,), dtype=np.bool)keep = []for _i in range(num_detections):i = order[_i]if suppressed[i]:continuekeep.append(i)ix1 = x1[i]iy1 = y1[i]ix2 = x2[i]iy2 = y2[i]iarea = areas[i]for _j in range(_i + 1, num_detections):j = order[_j]if suppressed[j]:continuexx1 = max(ix1, x1[j])yy1 = max(iy1, y1[j])xx2 = min(ix2, x2[j])yy2 = min(iy2, y2[j])w = max(0, xx2 - xx1 + 1)h = max(0, yy2 - yy1 + 1)inter = w * hovr = inter / (iarea + areas[j] - inter)if ovr >= nms_thresh:suppressed[j] = Truereturn keep
推理代码,demo.py
import cv2
import scipy.io as sio
import os
from centerface import CenterFacedef camera():cap = cv2.VideoCapture(0)ret, frame = cap.read()h, w = frame.shape[:2]centerface = CenterFace()while True:ret, frame = cap.read()dets, lms = centerface(frame, h, w, threshold=0.35)for det in dets:boxes, score = det[:4], det[4]cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)for lm in lms:for i in range(0, 5):cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)cv2.imshow('out', frame)# Press Q on keyboard to stop recordingif cv2.waitKey(1) & 0xFF == ord('q'):breakcap.release()def test_image():#frame = cv2.imread('000388.jpg')frame = cv2.imread('test.jpg')h, w = frame.shape[:2]landmarks = Truecenterface = CenterFace(landmarks=landmarks)if landmarks:dets, lms = centerface(frame, h, w, threshold=0.35)else:dets = centerface(frame, threshold=0.35)for det in dets:boxes, score = det[:4], det[4]cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)if landmarks:for lm in lms:for i in range(0, 5):cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)#cv2.imshow('out', frame)cv2.imwrite('out.jpg',frame)#cv2.waitKey(0)def test_image_tensorrt():frame = cv2.imread('000388.jpg')h, w = 480, 640 # must be 480* 640landmarks = Truecenterface = CenterFace(landmarks=landmarks, backend="tensorrt")if landmarks:dets, lms = centerface(frame, h, w, threshold=0.35)else:dets = centerface(frame, threshold=0.35)for det in dets:boxes, score = det[:4], det[4]cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)if landmarks:for lm in lms:for i in range(0, 5):cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)cv2.imshow('out', frame)cv2.waitKey(0)def test_widerface():Path = 'widerface/WIDER_val/images/'wider_face_mat = sio.loadmat('widerface/wider_face_split/wider_face_val.mat')event_list = wider_face_mat['event_list']file_list = wider_face_mat['file_list']save_path = 'save_out/'for index, event in enumerate(event_list):file_list_item = file_list[index][0]im_dir = event[0][0]# print(save_path + im_dir)if not os.path.exists(save_path + im_dir):os.makedirs(save_path + im_dir)landmarks = Truecenterface = CenterFace(landmarks=landmarks)for num, file in enumerate(file_list_item):im_name = file[0][0]zip_name = '%s/%s.jpg' % (im_dir, im_name)print(os.path.join(Path, zip_name))img = cv2.imread(os.path.join(Path, zip_name))h, w = img.shape[:2]if landmarks:dets, lms = centerface(img, h, w, threshold=0.05)else:dets = centerface(img, threshold=0.05)f = open(save_path + im_dir + '/' + im_name + '.txt', 'w')f.write('{:s}\n'.format('%s/%s.jpg' % (im_dir, im_name)))f.write('{:d}\n'.format(len(dets)))for b in dets:x1, y1, x2, y2, s = bf.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s))f.close()print('event:%d num:%d' % (index + 1, num + 1))if __name__ == '__main__':# camera()test_image()# test_widerface()
最终效果,
人脸检测小江湖:
自己的测试,时间包括网络推理+后处理时间,face++为服务接口测试,相比不是很公平。
方法 | MTCNN | ultra-face | Retina-Face-mobilenetv1 | Retina-Face-resnet50 | Centerface-mobileNetv2 | face++ |
模型大小 | 2.9M | 1M | 1.8M | 105M | 7.2M | - |
CPU(640*480) Intel(R) Xeon(R) Silver 4216 | 600ms | 200ms | 140ms | 2000ms | 130ms | 10ms |
GPU(640*480) 2080TI | 110ms | 18ms | 38ms | 50ms | 8ms | |
CPU(1280*720) Intel(R) Xeon(R) Silver 4216 | 1000ms | 500ms | 350ms | 3500ms | 300ms | 10ms |
GPU(1280*720) 2080TI | 200ms | 40ms | 100ms | 120ms | 25ms | |
CPU(1920*1080) Intel(R) Xeon(R) Silver 4216 | 1600ms | - | 800ms | 8000ms | 750ms | 10ms |
GPU(1920*1080) 2080TI | 330ms | - | 200ms | 250ms | 50ms | |
精度 | 框略好,5点好 | 框检出少 | 框差,5点差 | 框误检大,5点一般 | 框好,5点非常差 | 框好,点好 |