用作者提供的net1->net2生成MTCNN的训练样本(positive,negative,part,landmark)
来源:互联网 发布:淘宝上卖符咒 编辑:程序博客网 时间:2024/06/05 10:40
本代码基于作者提供的python版本代码修改,参考:
https://github.com/DuinoDu/mtcnn/blob/master/demo.py (作者提供)
https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py
1,生成positive,negative,part三种样本,用作者的net1->net2生成bbox, 根据预测的bbox和ground truth计算IOU:
positive: IOU >= 0.65;
negative: IOU < 0.3;
part: 0.4 <= IOU < 0.65
代码如下:
#!/usr/bin/env python# -*- coding: utf-8 -*-import _init_pathsimport caffeimport cv2import numpy as np#from python_wrapper import *import osdef bbreg(boundingbox, reg): reg = reg.T # calibrate bouding boxes if reg.shape[1] == 1: print "reshape of reg" pass # reshape of reg w = boundingbox[:,2] - boundingbox[:,0] + 1 h = boundingbox[:,3] - boundingbox[:,1] + 1 bb0 = boundingbox[:,0] + reg[:,0]*w bb1 = boundingbox[:,1] + reg[:,1]*h bb2 = boundingbox[:,2] + reg[:,2]*w bb3 = boundingbox[:,3] + reg[:,3]*h boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T #print "bb", boundingbox return boundingboxdef pad(boxesA, w, h): boxes = boxesA.copy() # shit, value parameter!!! tmph = boxes[:,3] - boxes[:,1] + 1 tmpw = boxes[:,2] - boxes[:,0] + 1 numbox = boxes.shape[0] dx = np.ones(numbox) dy = np.ones(numbox) edx = tmpw edy = tmph x = boxes[:,0:1][:,0] y = boxes[:,1:2][:,0] ex = boxes[:,2:3][:,0] ey = boxes[:,3:4][:,0] tmp = np.where(ex > w)[0] if tmp.shape[0] != 0: edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp] ex[tmp] = w-1 tmp = np.where(ey > h)[0] if tmp.shape[0] != 0: edy[tmp] = -ey[tmp] + h-1 + tmph[tmp] ey[tmp] = h-1 tmp = np.where(x < 1)[0] if tmp.shape[0] != 0: dx[tmp] = 2 - x[tmp] x[tmp] = np.ones_like(x[tmp]) tmp = np.where(y < 1)[0] if tmp.shape[0] != 0: dy[tmp] = 2 - y[tmp] y[tmp] = np.ones_like(y[tmp]) # for python index from 0, while matlab from 1 dy = np.maximum(0, dy-1) dx = np.maximum(0, dx-1) y = np.maximum(0, y-1) x = np.maximum(0, x-1) edy = np.maximum(0, edy-1) edx = np.maximum(0, edx-1) ey = np.maximum(0, ey-1) ex = np.maximum(0, ex-1) return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]def IoU(box, boxes): """Compute IoU between detect box and gt boxes Parameters: ---------- box: numpy array , shape (5, ): x1, y1, x2, y2, score input box boxes: numpy array, shape (n, 4): x1, y1, x2, y2 input ground truth boxes Returns: ------- ovr: numpy.array, shape (n, ) IoU """ box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1) area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) xx1 = np.maximum(box[0], boxes[:, 0]) yy1 = np.maximum(box[1], boxes[:, 1]) xx2 = np.minimum(box[2], boxes[:, 2]) yy2 = np.minimum(box[3], boxes[:, 3]) # compute the width and height of the bounding box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h ovr = inter / (box_area + area - inter) return ovrdef rerec(bboxA): # convert bboxA to square w = bboxA[:,2] - bboxA[:,0] h = bboxA[:,3] - bboxA[:,1] l = np.maximum(w,h).T bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5 bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5 bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T return bboxAdef nms(boxes, threshold, type): """nms :boxes: [:,0:5] :threshold: 0.5 like :type: 'Min' or others :returns: TODO """ if boxes.shape[0] == 0: return np.array([]) x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] s = boxes[:,4] area = np.multiply(x2-x1+1, y2-y1+1) I = np.array(s.argsort()) # read s using I pick = []; while len(I) > 0: xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]]) yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]]) xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]]) yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h if type == 'Min': o = inter / np.minimum(area[I[-1]], area[I[0:-1]]) else: o = inter / (area[I[-1]] + area[I[0:-1]] - inter) pick.append(I[-1]) I = I[np.where( o <= threshold)[0]] return pickdef generateBoundingBox(map, reg, scale, t): stride = 2 cellsize = 12 map = map.T dx1 = reg[0,:,:].T dy1 = reg[1,:,:].T dx2 = reg[2,:,:].T dy2 = reg[3,:,:].T (x, y) = np.where(map >= t) yy = y xx = x score = map[x,y] reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]]) if reg.shape[0] == 0: pass boundingbox = np.array([yy, xx]).T bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1" bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to score = np.array([score]) boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0) return boundingbox_out.Tdef drawBoxes(im, boxes): x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] for i in range(x1.shape[0]): cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1) return imdef drawlandmark(im, points): for i in range(points.shape[0]): for j in range(5): cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0)) return imfrom time import time_tstart_stack = []def tic(): _tstart_stack.append(time())def toc(fmt="Elapsed: %s s"): print fmt % (time()-_tstart_stack.pop())def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor): img2 = img.copy() factor_count = 0 total_boxes = np.zeros((0,9), np.float) points = [] h = img.shape[0] w = img.shape[1] minl = min(h, w) img = img.astype(float) m = 12.0/minsize minl = minl*m # create scale pyramid scales = [] while minl >= 12: scales.append(m * pow(factor, factor_count)) minl *= factor factor_count += 1 # first stage for scale in scales: hs = int(np.ceil(h*scale)) ws = int(np.ceil(w*scale)) if fastresize: im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1] im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear else: im_data = cv2.resize(img, (ws,hs)) # default is bilinear im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1] #im_data = imResample(img, hs, ws); print "scale:", scale im_data = np.swapaxes(im_data, 0, 2) im_data = np.array([im_data], dtype = np.float) PNet.blobs['data'].reshape(1, 3, ws, hs) PNet.blobs['data'].data[...] = im_data out = PNet.forward() boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0]) if boxes.shape[0] != 0: pick = nms(boxes, 0.5, 'Union') if len(pick) > 0 : boxes = boxes[pick, :] if boxes.shape[0] != 0: total_boxes = np.concatenate((total_boxes, boxes), axis=0) #np.save('total_boxes_101.npy', total_boxes) ##### # 1 # ##### # print "[1]:",total_boxes.shape[0] #print total_boxes #return total_boxes, [] numbox = total_boxes.shape[0] if numbox > 0: # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick, :] # print "[2]:",total_boxes.shape[0] # revise and convert to square regh = total_boxes[:,3] - total_boxes[:,1] regw = total_boxes[:,2] - total_boxes[:,0] t1 = total_boxes[:,0] + total_boxes[:,5]*regw t2 = total_boxes[:,1] + total_boxes[:,6]*regh t3 = total_boxes[:,2] + total_boxes[:,7]*regw t4 = total_boxes[:,3] + total_boxes[:,8]*regh t5 = total_boxes[:,4] total_boxes = np.array([t1,t2,t3,t4,t5]).T total_boxes = rerec(total_boxes) # convert box to square # print "[4]:",total_boxes.shape[0] total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]) # print "[4.5]:",total_boxes.shape[0] #print total_boxes [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h) numbox = total_boxes.shape[0] if numbox > 0: # second stage # construct input for RNet tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox) for k in range(numbox): tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3)) tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1] #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k] #print "tmp", tmp.shape tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24)) #print tempimg.shape #print tempimg[0,0,0,:] tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python # RNet tempimg = np.swapaxes(tempimg, 1, 3) #print tempimg[0,:,0,0] RNet.blobs['data'].reshape(numbox, 3, 24, 24) RNet.blobs['data'].data[...] = tempimg out = RNet.forward() score = out['prob1'][:,1] #print 'score', score pass_t = np.where(score>threshold[1])[0] #print 'pass_t', pass_t score = np.array([score[pass_t]]).T total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1) # print "[5]:",total_boxes.shape[0] #print total_boxes #print "1.5:",total_boxes.shape mv = out['conv5-2'][pass_t, :].T #print "mv", mv if total_boxes.shape[0] > 0: pick = nms(total_boxes, 0.7, 'Union') # print 'pick', pick if len(pick) > 0: total_boxes = total_boxes[pick, :] # print "[6]:", total_boxes.shape[0] total_boxes = bbreg(total_boxes, mv[:, pick]) # print "[7]:", total_boxes.shape[0] total_boxes = rerec(total_boxes) # print "[8]:", total_boxes.shape[0] return total_boxesdef main(): img_dir = "/home/xiao/code/mtcnn-caffe/prepare_data/WIDER_train/images/" imglistfile = "wider_face_train.txt" with open(imglistfile, 'r') as f: annotations = f.readlines() num = len(annotations) print "%d pics in total" % num neg_save_dir = "/media/xiao/软件/mtcnn/train/48/negative/" pos_save_dir = "/media/xiao/软件/mtcnn/train/48/positive/" part_save_dir = "/media/xiao/软件/mtcnn/train/48/part/" image_size = 48 f1 = open('/media/xiao/软件/mtcnn/train/48/pos_48.txt', 'w') f2 = open('/media/xiao/软件/mtcnn/train/48/neg_48.txt', 'w') f3 = open('/media/xiao/软件/mtcnn/train/48/part_48.txt', 'w') p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 minsize = 20 caffe_model_path = "./model" threshold = [0.6, 0.7, 0.7] factor = 0.709 caffe.set_mode_gpu() PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST) RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST) for annotation in annotations: # imgpath = imgpath.split('\n')[0] annotation = annotation.strip().split(' ') bbox = map(float, annotation[1:]) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = img_dir + annotation[0] + '.jpg' #print "######\n", img_path print image_idx image_idx += 1 img = cv2.imread(img_path) img_matlab = img.copy() tmp = img_matlab[:,:,2].copy() img_matlab[:,:,2] = img_matlab[:,:,0] img_matlab[:,:,0] = tmp boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor) #img = drawBoxes(img, boundingboxes) #cv2.imshow('img', img) #cv2.waitKey(1000) # generate positive,negative,part samples for box in boundingboxes: x_left, y_top, x_right, y_bottom, _ = box crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size / 2 or crop_h < image_size / 2: continue if x_left < 0 or y_top < 0: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[int(y_top):int(y_bottom + 1) , int(x_left):int(x_right + 1) ] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) #try: # resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) #except Exception as e: # print " 1 " # print e # save negative images and write label, 负样本 if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write("%s/negative/%s.jpg" % (image_size, n_idx) + ' 0') f2.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n") cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label,offset_x1,offset_y1相对于左上角; offset_x2,offset_y2相对于右上角 offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) # offset_x2 = (x2 - x_left) / float(crop_w) # offset_y2 = (y2 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom )/ float(crop_h) # save positive and part-face images and write labels, 正样本 if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write("%s/positive/%s.jpg" % (image_size, p_idx) + ' 1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2)) f1.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n") cv2.imwrite(save_file, resized_im) p_idx += 1# part 样本 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write("%s/part/%s.jpg" % (image_size, d_idx) + ' -1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2)) f3.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n") cv2.imwrite(save_file, resized_im) d_idx += 1 f.close() f1.close() f2.close() f3.close()if __name__ == "__main__": main()
2,生成landmark样本,用作者的net1->net2生成bbox, 根据5个landmark是否都在bbox中作为判别条件:
代码如下:
#!/usr/bin/env python# -*- coding: utf-8 -*-import _init_pathsimport caffeimport cv2import numpy as np#from python_wrapper import *import osdef bbreg(boundingbox, reg): reg = reg.T # calibrate bouding boxes if reg.shape[1] == 1: print "reshape of reg" pass # reshape of reg w = boundingbox[:,2] - boundingbox[:,0] + 1 h = boundingbox[:,3] - boundingbox[:,1] + 1 bb0 = boundingbox[:,0] + reg[:,0]*w bb1 = boundingbox[:,1] + reg[:,1]*h bb2 = boundingbox[:,2] + reg[:,2]*w bb3 = boundingbox[:,3] + reg[:,3]*h boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T #print "bb", boundingbox return boundingboxdef pad(boxesA, w, h): boxes = boxesA.copy() # shit, value parameter!!! tmph = boxes[:,3] - boxes[:,1] + 1 tmpw = boxes[:,2] - boxes[:,0] + 1 numbox = boxes.shape[0] dx = np.ones(numbox) dy = np.ones(numbox) edx = tmpw edy = tmph x = boxes[:,0:1][:,0] y = boxes[:,1:2][:,0] ex = boxes[:,2:3][:,0] ey = boxes[:,3:4][:,0] tmp = np.where(ex > w)[0] if tmp.shape[0] != 0: edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp] ex[tmp] = w-1 tmp = np.where(ey > h)[0] if tmp.shape[0] != 0: edy[tmp] = -ey[tmp] + h-1 + tmph[tmp] ey[tmp] = h-1 tmp = np.where(x < 1)[0] if tmp.shape[0] != 0: dx[tmp] = 2 - x[tmp] x[tmp] = np.ones_like(x[tmp]) tmp = np.where(y < 1)[0] if tmp.shape[0] != 0: dy[tmp] = 2 - y[tmp] y[tmp] = np.ones_like(y[tmp]) # for python index from 0, while matlab from 1 dy = np.maximum(0, dy-1) dx = np.maximum(0, dx-1) y = np.maximum(0, y-1) x = np.maximum(0, x-1) edy = np.maximum(0, edy-1) edx = np.maximum(0, edx-1) ey = np.maximum(0, ey-1) ex = np.maximum(0, ex-1) return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]def IoU(box, boxes): """Compute IoU between detect box and gt boxes Parameters: ---------- box: numpy array , shape (5, ): x1, y1, x2, y2, score input box boxes: numpy array, shape (n, 4): x1, y1, x2, y2 input ground truth boxes Returns: ------- ovr: numpy.array, shape (n, ) IoU """ box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1) area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) xx1 = np.maximum(box[0], boxes[:, 0]) yy1 = np.maximum(box[1], boxes[:, 1]) xx2 = np.minimum(box[2], boxes[:, 2]) yy2 = np.minimum(box[3], boxes[:, 3]) # compute the width and height of the bounding box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h ovr = inter / (box_area + area - inter) return ovrdef rerec(bboxA): # convert bboxA to square w = bboxA[:,2] - bboxA[:,0] h = bboxA[:,3] - bboxA[:,1] l = np.maximum(w,h).T bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5 bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5 bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T return bboxAdef nms(boxes, threshold, type): """nms :boxes: [:,0:5] :threshold: 0.5 like :type: 'Min' or others :returns: TODO """ if boxes.shape[0] == 0: return np.array([]) x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] s = boxes[:,4] area = np.multiply(x2-x1+1, y2-y1+1) I = np.array(s.argsort()) # read s using I pick = []; while len(I) > 0: xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]]) yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]]) xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]]) yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h if type == 'Min': o = inter / np.minimum(area[I[-1]], area[I[0:-1]]) else: o = inter / (area[I[-1]] + area[I[0:-1]] - inter) pick.append(I[-1]) I = I[np.where( o <= threshold)[0]] return pickdef generateBoundingBox(map, reg, scale, t): stride = 2 cellsize = 12 map = map.T dx1 = reg[0,:,:].T dy1 = reg[1,:,:].T dx2 = reg[2,:,:].T dy2 = reg[3,:,:].T (x, y) = np.where(map >= t) yy = y xx = x score = map[x,y] reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]]) if reg.shape[0] == 0: pass boundingbox = np.array([yy, xx]).T bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1" bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to score = np.array([score]) boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0) return boundingbox_out.Tdef drawBoxes(im, boxes): x1 = boxes[:,0] y1 = boxes[:,1] x2 = boxes[:,2] y2 = boxes[:,3] for i in range(x1.shape[0]): cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1) return imdef drawBoxes_align(im, boxe): x1 = boxe[0] y1 = boxe[1] x2 = boxe[2] y2 = boxe[3] cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,0), 1) return imdef drawlandmark(im, points): for i in range(points.shape[0]): for j in range(5): cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0)) return imdef drawlandmark_align(im, point): for j in range(5): cv2.circle(im, (int(point[j*2]), int(point[j*2+1])), 2, (255,0,0)) return imfrom time import time_tstart_stack = []def tic(): _tstart_stack.append(time())def toc(fmt="Elapsed: %s s"): print fmt % (time()-_tstart_stack.pop())def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor): img2 = img.copy() factor_count = 0 total_boxes = np.zeros((0,9), np.float) points = [] h = img.shape[0] w = img.shape[1] minl = min(h, w) img = img.astype(float) m = 12.0/minsize minl = minl*m # create scale pyramid scales = [] while minl >= 12: scales.append(m * pow(factor, factor_count)) minl *= factor factor_count += 1 # first stage for scale in scales: hs = int(np.ceil(h*scale)) ws = int(np.ceil(w*scale)) if fastresize: im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1] im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear else: im_data = cv2.resize(img, (ws,hs)) # default is bilinear im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1] #im_data = imResample(img, hs, ws); print "scale:", scale im_data = np.swapaxes(im_data, 0, 2) im_data = np.array([im_data], dtype = np.float) PNet.blobs['data'].reshape(1, 3, ws, hs) PNet.blobs['data'].data[...] = im_data out = PNet.forward() boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0]) if boxes.shape[0] != 0: pick = nms(boxes, 0.5, 'Union') if len(pick) > 0 : boxes = boxes[pick, :] if boxes.shape[0] != 0: total_boxes = np.concatenate((total_boxes, boxes), axis=0) #np.save('total_boxes_101.npy', total_boxes) ##### # 1 # ##### # print "[1]:",total_boxes.shape[0] #print total_boxes #return total_boxes, [] numbox = total_boxes.shape[0] if numbox > 0: # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick, :] # print "[2]:",total_boxes.shape[0] # revise and convert to square regh = total_boxes[:,3] - total_boxes[:,1] regw = total_boxes[:,2] - total_boxes[:,0] t1 = total_boxes[:,0] + total_boxes[:,5]*regw t2 = total_boxes[:,1] + total_boxes[:,6]*regh t3 = total_boxes[:,2] + total_boxes[:,7]*regw t4 = total_boxes[:,3] + total_boxes[:,8]*regh t5 = total_boxes[:,4] total_boxes = np.array([t1,t2,t3,t4,t5]).T total_boxes = rerec(total_boxes) # convert box to square # print "[4]:",total_boxes.shape[0] total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]) # print "[4.5]:",total_boxes.shape[0] #print total_boxes [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h) numbox = total_boxes.shape[0] if numbox > 0: # second stage # construct input for RNet tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox) for k in range(numbox): tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3)) tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1] #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k] #print "tmp", tmp.shape tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24)) #print tempimg.shape #print tempimg[0,0,0,:] tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python # RNet tempimg = np.swapaxes(tempimg, 1, 3) #print tempimg[0,:,0,0] RNet.blobs['data'].reshape(numbox, 3, 24, 24) RNet.blobs['data'].data[...] = tempimg out = RNet.forward() score = out['prob1'][:,1] #print 'score', score pass_t = np.where(score>threshold[1])[0] #print 'pass_t', pass_t score = np.array([score[pass_t]]).T total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1) # print "[5]:",total_boxes.shape[0] #print total_boxes #print "1.5:",total_boxes.shape mv = out['conv5-2'][pass_t, :].T #print "mv", mv if total_boxes.shape[0] > 0: pick = nms(total_boxes, 0.7, 'Union') # print 'pick', pick if len(pick) > 0: total_boxes = total_boxes[pick, :] # print "[6]:", total_boxes.shape[0] total_boxes = bbreg(total_boxes, mv[:, pick]) # print "[7]:", total_boxes.shape[0] total_boxes = rerec(total_boxes) # print "[8]:", total_boxes.shape[0] return total_boxesdef main(): img_dir = "/media/xiao/学习/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/" anno_file = "celebA_bbox_landmark.txt" with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print "%d pics in total" % num# 图片大小为48*48 image_size = 48 # landmark_save_dir = "48/landmark/" landmark_save_dir = "/media/xiao/软件/mtcnn/train/48/landmark/" # save_dir = "./" + str(image_size) f1 = open('/media/xiao/软件/mtcnn/train/48/landmark_48.txt', 'w') l_idx = 0 # landmark image_idx = 0 minsize = 40 caffe_model_path = "./model" threshold = [0.6, 0.7, 0.7] factor = 0.709 caffe.set_mode_gpu() PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST) RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST) for annotation in annotations: # imgpath = imgpath.split('\n')[0] annotation = annotation.strip().split(' ') im_path = annotation[0] # bbox = map(float, annotation[1:-10]) pts = map(float, annotation[-10:]) # boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) im_path = img_dir + im_path backupPts = pts[:] #print "######\n", img_path print image_idx image_idx += 1 img = cv2.imread(im_path) img_matlab = img.copy() tmp = img_matlab[:,:,2].copy() img_matlab[:,:,2] = img_matlab[:,:,0] img_matlab[:,:,0] = tmp# 用作者提供的net1->net2生成人脸框 boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor) #img = drawBoxes(img, boundingboxes) #cv2.imshow('img', img) #cv2.waitKey(1000) # generate landmark samples for box in boundingboxes: x_left, y_top, x_right, y_bottom, _ = box # 得到人脸bbox坐标 crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size / 2 or crop_h < image_size / 2: continue if x_left < 0 or y_top < 0: continue # 当五个landmark都在bbox中时,这样的样本作为我们的landmark训练样本 if pts[0] < x_left or pts[0] > x_right: continue if pts[2] < x_left or pts[2] > x_right: continue if pts[4] < x_left or pts[4] > x_right: continue if pts[6] < x_left or pts[6] > x_right: continue if pts[8] < x_left or pts[8] > x_right: continue if pts[1] < y_top or pts[1] > y_bottom: continue if pts[3] < y_top or pts[3] > y_bottom: continue if pts[5] < y_top or pts[5] > y_bottom: continue if pts[7] < y_top or pts[7] > y_bottom: continue if pts[9] < y_top or pts[9] > y_bottom: continue # show image #img1 = drawBoxes_align(img, box) #img1 = drawlandmark_align(img1, pts) #cv2.imshow('img', img1) #cv2.waitKey(1000) # our method, x0,y0,x1,y1,x2,y2,x3,y3,x4,y4 #for k in range(len(pts) / 2): # pts[k * 2] = (pts[k * 2] - x_left) / float(crop_w); # pts[k * 2 + 1] = (pts[k * 2 + 1] - y_top) / float(crop_h); #author method, x0,x1,x2,x3,x4,y0,y1,y2,y3,y4 ptsss = pts[:]# landmark标注为其相对于crop区域左上角的归一化值 for k in range(len(ptsss) / 2): pts[k] = (ptsss[k * 2] - x_left) / float(crop_w); pts[5+k] = (ptsss[k * 2 + 1] - y_top) / float(crop_h);# 将crop区域进行resize cropped_im = img[int(y_top):int(y_bottom + 1), int(x_left):int(x_right + 1)] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # box_ = box.reshape(1, -1) # 写图片名,class坐标,bbox坐标。 save_file = os.path.join(landmark_save_dir, "%s.jpg" % l_idx) f1.write(str(image_size) + "/landmark/%s.jpg" % l_idx + ' -1 -1 -1 -1 -1')# 写landmark坐标并保存图片 for k in range(len(pts)): f1.write(" %f" % pts[k]) f1.write("\n") cv2.imwrite(save_file, resized_im) l_idx += 1 f.close() f1.close()if __name__ == "__main__": main()
阅读全文
1 0
- 用作者提供的net1->net2生成MTCNN的训练样本(positive,negative,part,landmark)
- MTCNN中生成positive,negative,part样本python代码解读
- 人脸检测——mtcnn思想,生成negative、positive、part样本。
- MTCNN中将自己训练的o-net接在作者提供的net1,net2后面python代码解读
- MTCNN中用celebA样本生成landmark训练样本python代码解读
- 用我们训练的MTCNN中o-net测试训练图片的landmark的mean error
- 300W数据集测试MTCNN的landmark效果代码
- 300W数据集测试MTCNN的landmark效果代码
- 关于【true positive,false positive,true negative,false negative】的几个解释
- 训练样本的过程
- OpenCV的样本训练
- 快速生成训练样本与标签样本之间的索引文件
- 训练样本集的制作
- 【OpenCV】OpenCV的样本训练
- mobileNet训练自己的样本
- 人脸检测之MTCNN训练自己的数据
- ASP.NET1.0升级ASP.NET2.0的问题小结
- ASP.NET1.0升级ASP.NET2.0的问题小结
- Deep Learning笔记(一) Windows | Linux配置Caffe环境
- CentOS系统启动流程(二)
- JPA Override 源码中createdDate,createdBy,lastModifiedBy,lastModifiedDate(过于麻烦,见下一篇已更新方法)
- Collection集合框架整理
- 谍报分析
- 用作者提供的net1->net2生成MTCNN的训练样本(positive,negative,part,landmark)
- 常见排序算法
- xpath基础知识定位
- SpringMVC信息转换---HttpMessageConverter(一)
- 优雅的点
- CoreSeek(全文检索引擎 Sphinx 中文版)安装使用指南(CentOS6.5)
- NSMutableArray 的修饰符
- 触发form表单的两种提交方式,submit和button的用法
- 日期间的间隔天数