使用Tensorflow实现MTCNN遇到了nms作用不理想的问题

来源：互联网发布：追星必备软件编辑：程序博客网时间：2024/05/29 17:59

使用widerface库对pnet,rnet,onet分别进行了训练，训练阶段没问题，测试FDDB数据集发现nms没有起到太好的效果，如下图所示：

一开始效果太差，后面进行了两次nms,效果依旧不好

代码如下：

 def detect_pnet(self, im):        """Get face candidates through pnet        Parameters:        ----------        im: numpy array            input image array        Returns:        -------        boxes: numpy array            detected boxes before calibration        boxes_c: numpy array            boxes after calibration        """        h, w, c = im.shape       # factro        net_size = 12        minl=np.amin([h, w])        total_boxes=np.empty((0,9))        current_scale = float(net_size) / self.min_face_size    # find initial scale        minl=minl*current_scale     #   print(current_scale,'-------------------------------current-scale')        im_resized = self.resize_image(im, current_scale)        current_height, current_width, _ = im_resized.shape        print(current_height,current_width,'---------------imresize.shape')        if self.slide_window:            # sliding window            temp_rectangles = list()            rectangles = list()     # list of rectangles [x11, y11, x12, y12, confidence] (corresponding to original image)            all_cropped_ims = list()            all_cropped_ims02=np.ndarray([12,12,3])            while min(current_height, current_width) > net_size:                if (current_height - net_size) % self.stride == 0:                    current_y_list = range(0, current_height - net_size + 1, self.stride)                else:                   # print(list(range(0, current_height - net_size + 1, self.stride)),'--------------print01')                  #  print(list(range(0, current_height - net_size + 1, self.stride)) + [current_height - net_size])                    current_y_list=list(range(0, current_height - net_size + 1, self.stride)) + [current_height - net_size]                if (current_width - net_size) % self.stride == 0:                    current_x_list = range(0, current_width - net_size + 1, self.stride)                else:                    current_x_list =list(range(0, current_width - net_size + 1, self.stride)) + [current_width - net_size]                for current_y in current_y_list:                    for current_x in current_x_list:                        cropped_im = im_resized[current_y:current_y + net_size, current_x:current_x + net_size, :]                     #   print(cropped_im.shape,'----------------------cropped--shape')                        current_rectangle = [int(w * float(current_x) / current_width), int(h * float(current_y) / current_height),                                             int(w * float(current_x) / current_width) + int(w * float(net_size) / current_width),                                             int(h * float(current_y) / current_height) + int(w * float(net_size) / current_width),                                                 0.0]                        temp_rectangles.append(current_rectangle)                        all_cropped_ims.append(cropped_im)                current_scale *= self.scale_factor                im_resized = self.resize_image(im, current_scale)                current_height, current_width ,_= im_resized.shape            '''            # helper for setting PNet batch size            num_boxes = len(all_cropped_ims)            batch_size = self.pnet_detector.batch_size            ratio = float(num_boxes) / batch_size            if ratio > 3 or ratio < 0.3:                print "You may need to reset PNet batch size if this info appears frequently, \face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)            '''      #      print(len(all_cropped_ims),'---------------------------len-----------ims-----')            all_cropped_ims = np.reshape(all_cropped_ims,(len(all_cropped_ims),12,12,3))        #    print('---before-----------------predict------shape-------',all_cropped_ims.shape)            cls_scores, reg = self.pnet_detector.predict(all_cropped_ims)            #print(cls_scores.flatten())            #cls_scores = cls_scores[0][:, 1].flatten()            cls_scores = cls_scores.flatten()            keep_inds = np.where(cls_scores > self.thresh[0])[0]            if len(keep_inds) > 0:                boxes = np.vstack(temp_rectangles[ind] for ind in keep_inds)                boxes[:, 4] = cls_scores[keep_inds]                reg = reg[keep_inds].reshape(-1, 4)            else:                return None, None        #     print(boxes,'--------------before--boxes')            keep = py_nms(boxes, 0.5, 'Union')            boxes = boxes[keep]            boxes_c = self.calibrate_box(boxes, reg[keep])        else:            # fcn            all_boxes = list()            while min(current_height, current_width) > net_size:                cls_map, reg = self.pnet_detector.predict(im_resized)                boxes = self.generate_bbox(cls_map[ 0, :, :], reg, current_scale, self.thresh[0])                current_scale *= self.scale_factor                im_resized = self.resize_image(im, current_scale)                current_height, current_width, _ = im_resized.shape                if boxes.size == 0:                    continue                keep = py_nms(boxes[:, :5], 0.5, 'Union')                boxes = boxes[keep]                keep = py_nms(boxes[:, :5], 0.7, 'Union')                boxes = boxes[keep]                all_boxes.append(boxes)            if len(all_boxes) == 0:                return None, None            all_boxes = np.vstack(all_boxes)            # merge the detection from first stage            keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union')            all_boxes = all_boxes[keep]            keep = py_nms(all_boxes[:, 0:5], 0.7, 'Union')            all_boxes = all_boxes[keep]            boxes = all_boxes[:, :5]            bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1            bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1            # refine the boxes            boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw,                                 all_boxes[:, 1] + all_boxes[:, 6] * bbh,                                 all_boxes[:, 2] + all_boxes[:, 7] * bbw,                                 all_boxes[:, 3] + all_boxes[:, 8] * bbh,                                 all_boxes[:, 4]])            boxes_c = boxes_c.T        return boxes, boxes_c    def detect_rnet(self, im, dets):        """Get face candidates using rnet        Parameters:        ----------        im: numpy array            input image array        dets: numpy array            detection results of pnet        Returns:        -------        boxes: numpy array            detected boxes before calibration        boxes_c: numpy array            boxes after calibration        """        h, w, c = im.shape        dets = self.convert_to_square(dets)        dets[:, 0:4] = np.round(dets[:, 0:4])        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)        num_boxes = dets.shape[0]   #     print(num_boxes,'-------------------------rnet--before--num-boxes')        '''        # helper for setting RNet batch size        batch_size = self.rnet_detector.batch_size        ratio = float(num_boxes) / batch_size        if ratio > 3 or ratio < 0.3:            print "You may need to reset RNet batch size if this info appears frequently, \face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)        '''        cropped_ims = np.zeros((num_boxes, 24, 24, 3), dtype=np.float32)        for i in range(num_boxes):            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]            cropped_ims[i, :, :, :] = cv2.resize(tmp, (24, 24))/127.5        cls_scores, reg = self.rnet_detector.predict(cropped_ims)        keep_inds = np.where(cls_scores > self.thresh[1])[0]        if len(keep_inds) > 0:            boxes = dets[keep_inds]            boxes[:, 4] = cls_scores[keep_inds]            reg = reg[keep_inds]        else:            return None, None        keep = py_nms(boxes, 0.5,"Union")        boxes = boxes[keep]        keep = py_nms(boxes, 0.7,"Minimum")        boxes = boxes[keep]        boxes_c = self.calibrate_box(boxes, reg[keep])        return boxes, boxes_c    def detect_onet(self, im, dets):        """Get face candidates using onet        Parameters:        ----------        im: numpy array            input image array        dets: numpy array            detection results of rnet        Returns:        -------        boxes: numpy array            detected boxes before calibration        boxes_c: numpy array            boxes after calibration        """        h, w, c = im.shape        dets = self.convert_to_square(dets)        dets[:, 0:4] = np.round(dets[:, 0:4])        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)        num_boxes = dets.shape[0]        '''        # helper for setting ONet batch size        batch_size = self.onet_detector.batch_size        ratio = float(num_boxes) / batch_size        if ratio > 3 or ratio < 0.3:            print "You may need to reset ONet batch size if this info appears frequently, \face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)        '''        cropped_ims = np.zeros((num_boxes, 48, 48, 3), dtype=np.float32)        for i in range(num_boxes):            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]            cropped_ims[i, :, :, :] = cv2.resize(tmp, (48, 48))/127.5        cls_scores, reg = self.onet_detector.predict(cropped_ims)        keep_inds = np.where(cls_scores > self.thresh[2])[0]        if len(keep_inds) > 0:            boxes = dets[keep_inds]            boxes[:, 4] = cls_scores[keep_inds]            reg = reg[keep_inds]        else:            return None, None        boxes_c = self.calibrate_box(boxes, reg)        keep = py_nms(boxes_c, 0.5, "Union")        boxes_c = boxes_c[keep]        keep = py_nms(boxes_c, 0.7, "Minimum")        boxes_c = boxes_c[keep]        return boxes, boxes_c

此处为NMS：

import numpy as npdef py_nms(dets,thresh,mode="Union"):    '''            使用 高的置信度 贪婪法 选择 boxes           使得boxes重叠 小于等于 阈值           除去大于阈值的boxes          形参：dets ：[[x1, y1, x2, y2 score]]         thresh:  overlap<= thresh         返回值：保留下来的索引    '''    x1=dets[:,0]   # print(x1,'------------pynms--x1')    y1=dets[:,1]    x2=dets[:,2]    y2=dets[:,3]    scores=dets[:,4]    areas=(x2-x1+1)*(y2-y1+1)#从大到小排列，取index    order = scores.argsort()[::-1]  #  print('pynms----order--scores--',order)#keep为最后保留的边框    keep = []    while order.size > 0:#order[0]是当前分数最大的窗口，之前没有被过滤掉，肯定是要保留的        i = order[0]        keep.append(i)#计算窗口i与其他所以窗口的交叠部分的面积        xx1 = np.maximum(x1[i], x1[order[1:]])        yy1 = np.maximum(y1[i], y1[order[1:]])        xx2 = np.minimum(x2[i], x2[order[1:]])        yy2 = np.minimum(y2[i], y2[order[1:]])        w = np.maximum(0.0, xx2 - xx1 + 1)        h = np.maximum(0.0, yy2 - yy1 + 1)        inter = w * h      #  print('pynms----------------inter',inter)        if mode =="Union":             #   ovr=inter/areas[i]+areas[order[1:]]                ovr = inter / (areas[i] + areas[order[1:]] - inter)              #  print(ovr,'============ovr')        elif mode=="Minimum":            ovr=inter/np.minimum(areas[i],areas[order[1:]])        #ind为所有与窗口i的iou值小于threshold值的窗口的index，其他窗口此次都被窗口i吸收        inds=np.where(ovr<=thresh)[0]        #下一次计算前要把窗口i去除，所有i对应的在order里的位置是0，所以剩下的加1        order=order[inds+1]   # print(keep,'-------------keep')    return keep

阅读全文

3 0