用我们训练的MTCNN中o-net测试训练图片的landmark的mean error

来源:互联网 发布:js获取当前系统时间 编辑:程序博客网 时间:2024/06/01 09:36

为了验证我们训练的MTCNN的O-Net在训练集上的表现,我们写了一个测试代码,来测试训练图片的landmark的mean error。

landmark标签格式如下所示:

48/landmark/0.jpg -1 -1 -1 -1 -1 0.224199 0.505338 0.334520 0.327402 0.583630 0.364769 0.336299 0.596085 0.674377 0.63523148/landmark/1.jpg -1 -1 -1 -1 -1 0.494662 0.775801 0.665480 0.416370 0.672598 0.336299 0.364769 0.596085 0.635231 0.67437748/landmark/2.jpg -1 -1 -1 -1 -1 0.283217 0.559441 0.391608 0.384615 0.636364 0.435315 0.407343 0.662587 0.739510 0.70104948/landmark/3.jpg -1 -1 -1 -1 -1 0.440559 0.716783 0.608392 0.363636 0.615385 0.407343 0.435315 0.662587 0.701049 0.73951048/landmark/4.jpg -1 -1 -1 -1 -1 0.153846 0.457692 0.273077 0.265385 0.542308 0.505769 0.475000 0.755769 0.840385 0.79807748/landmark/5.jpg -1 -1 -1 -1 -1 0.542308 0.846154 0.726923 0.457692 0.734615 0.475000 0.505769 0.755769 0.798077 0.84038548/landmark/6.jpg -1 -1 -1 -1 -1 0.110092 0.472477 0.252294 0.243119 0.573394 0.392202 0.355505 0.690367 0.791284 0.74082648/landmark/7.jpg -1 -1 -1 -1 -1 0.527523 0.889908 0.747706 0.426606 0.756881 0.355505 0.392202 0.690367 0.740826 0.791284


第1列表示:图片名;

第2列表示:是不是人脸,是人脸为1,不是人脸为0, ignore为-1;

第3-6列表示: 人脸的标签,即人脸相对于ground truch人脸左上角的偏移量;

第7-16列表示:  人脸landmark标签,即人脸相对于crop区域的归一化量。


训练样本如下图所示:

                    


将48*48的训练样本输入到O-Net中,预测landmark位置,  测试代码如下所示:


#!/usr/bin/env python# -*- coding: utf-8 -*-import syssys.path.append('D:\\Anaconda2\\libs')import _init_pathsimport caffeimport cv2import mathimport numpy as np# from python_wrapper import *import os# 计算pts_gt, pts_pre的mean errordef computer_meanerror(pts_gt, pts_pre):    mean_error = []    d_outer = math.sqrt((pts_gt[2] - pts_gt[0]) * (pts_gt[2] - pts_gt[0]) + (pts_gt[3] - pts_gt[1]) * (pts_gt[3] - pts_gt[1]))    for j in range(5):        error = math.sqrt((pts_gt[2 * j] - pts_pre[2 * j]) * (pts_gt[2 * j] - pts_pre[2 * j]) + (pts_gt[2 * j + 1] - pts_pre[2 * j + 1]) * (pts_gt[2 * j + 1] - pts_pre[2 * j + 1]))        error = error / d_outer        mean_error.append(error)    return mean_error# 画关键点def drawlandmark(im, points):    for i in range(points.shape[0]):        for j in range(5):            cv2.circle(im, (int(points[i][j]), int(points[i][j + 5])), 2, (255, 0, 0), -1)    return im# landmark检测程序def detect_face(img, ONet):    # 定义temping数据格式和blob相同,即: num*channel*height*width    temping = np.zeros((1, 48, 48, 3))    temping[0,:,:,:] = img    tempimg = (temping - 127.5) * 0.0078125  # [0,255] -> [-1,1] , 归一化# 对temping做轴变换,opencv读取img格式为:height*width*channel,变换后变为*channel*height*width    tempimg = np.swapaxes(tempimg, 1, 3)    tempimg = np.swapaxes(tempimg, 2, 3)    ONet.blobs['data'].reshape(1, 3, 48, 48)    ONet.blobs['data'].data[...] = tempimg    out = ONet.forward()    score = out['prob1'][:, 1]    # 预测得分    points = out['conv6-3']       # landmark预测偏移量    point = []    for i in range(5):        point.append( points[0][i] * 48 )        point.append( points[0][i+5] * 48 )    point = np.array(point)    return point# 判断这是否为一个主程序,其他python程序无法调用if __name__ == '__main__':    infile_list = open('C:/mtcnn/48/landmark_48_1000.txt', 'r')    img_dir = "C:/mtcnn/"    caffe_model_path = "E:/mtcnn_DuinoDu/model"    threshold = [0.6, 0.7, 0.7]    caffe.set_mode_gpu()    # ONet = caffe.Net(caffe_model_path+"/det3.prototxt", caffe_model_path+"/48net_v5_7.caffemodel", caffe.TEST)    ONet = caffe.Net(caffe_model_path + "/det3.prototxt", "C:/mtcnn/train_V11_64/models_48_31" + "/_iter_140000.caffemodel", caffe.TEST)    landmark_num = 0    landmark_pos = 0    mean_error = [0, 0, 0, 0, 0]    mean_error = np.array(mean_error)    for name_list in infile_list.readlines():        landmark_num = landmark_num + 1        print landmark_num        align_gt = []      # landmark真实坐标位置        # align_pre = []        name_list = name_list.strip().split(' ')        image_name = img_dir = "C:/mtcnn/" + name_list[0]        img = cv2.imread(image_name)        for i in range(5):            align_gt.append(float(name_list[i + 6]))            align_gt.append(float(name_list[i + 11]))        align_gt = np.array(align_gt)# 标签是相对于48*48的crop图像的归一化位置,进行反变换得到真实位置        for j in range(10):            align_gt[j] = align_gt[j] * 48        # forward fitch key point        align_pre = detect_face(img, ONet)     # 预测的landmark坐标位置        #for i in range(5):        #    cv2.circle(img, (int(align_gt[i]), int(align_gt[i+1])), 2, (255,0,0), -1)        cv2.circle(img, (int(align_pre[0]), int(align_pre[1])), 2, (255, 0, 0), -1)        cv2.circle(img, (int(align_pre[2]), int(align_pre[3])), 2, (255, 0, 0), -1)        cv2.circle(img, (int(align_pre[4]), int(align_pre[5])), 2, (255, 0, 0), -1)        cv2.circle(img, (int(align_pre[6]), int(align_pre[7])), 2, (255, 0, 0), -1)        cv2.circle(img, (int(align_pre[8]), int(align_pre[9])), 2, (255, 0, 0), -1)        #cv2.imshow("image",  img)        #cv2.waitKey(1000)        error = []# 计算landmark的 mean error        error = computer_meanerror(align_gt, align_pre)        error = np.array(error)        mean_error = mean_error + error        landmark_pos = landmark_pos + 1    mean_error = mean_error / float(landmark_pos)    print "left eye mean error:", mean_error[0]    print "right eye mean error:" , mean_error[1]    print "nose mean error:" , mean_error[2]    print "left mouth mean error:" , mean_error[3]    print "right mouth mean error:" , mean_error[4]    print  "eye and mouth mean error:" ,(mean_error[0] + mean_error[1] + mean_error[3] + mean_error[4] ) / 4.0    infile_list.close()

 

顺便说一句:linux上pycaffe的测试代码,可以直接移植到windows上,只要将对应平台依赖的库替换即可,代码不需要修改。




阅读全文
1 0
原创粉丝点击