faster rcnn源码解读(六)之minibatch
来源:互联网 发布:centos net snmp 安装 编辑:程序博客网 时间:2024/05/21 14:40
faster rcnn用python版本的https://github.com/rbgirshick/py-faster-rcnn
minibatch源码:https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/roi_data_layer/minibatch.py
源码:
# --------------------------------------------------------# Fast R-CNN# Copyright (c) 2015 Microsoft# Licensed under The MIT License [see LICENSE for details]# Written by Ross Girshick# --------------------------------------------------------"""Compute minibatch blobs for training a Fast R-CNN network."""import numpy as npimport numpy.random as nprimport cv2from fast_rcnn.config import cfgfrom utils.blob import prep_im_for_blob, im_list_to_blobdef get_minibatch(roidb, num_classes): """Given a roidb, construct a minibatch sampled from it.""" num_images = len(roidb) # Sample random scales to use for each image in this batch random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images)#随机索引组成的numpy,大小是roidb的长度 assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 'num_images ({}) must divide BATCH_SIZE ({})'. \ format(num_images, cfg.TRAIN.BATCH_SIZE) rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) # Get the input image blob, formatted for caffe im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) blobs = {'data': im_blob} if cfg.TRAIN.HAS_RPN: assert len(im_scales) == 1, "Single batch only" assert len(roidb) == 1, "Single batch only" # gt boxes: (x1, y1, x2, y2, cls) gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] blobs['gt_boxes'] = gt_boxes blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) else: # not using RPN # Now, build the region of interest and label blobs rois_blob = np.zeros((0, 5), dtype=np.float32) labels_blob = np.zeros((0), dtype=np.float32) bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) # all_overlaps = [] for im_i in xrange(num_images): labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes) # Add to RoIs blob rois = _project_im_rois(im_rois, im_scales[im_i]) batch_ind = im_i * np.ones((rois.shape[0], 1)) rois_blob_this_image = np.hstack((batch_ind, rois)) rois_blob = np.vstack((rois_blob, rois_blob_this_image)) # Add to labels, bbox targets, and bbox loss blobs labels_blob = np.hstack((labels_blob, labels)) bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights)) # all_overlaps = np.hstack((all_overlaps, overlaps)) # For debug visualizations # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) blobs['rois'] = rois_blob blobs['labels'] = labels_blob if cfg.TRAIN.BBOX_REG: blobs['bbox_targets'] = bbox_targets_blob blobs['bbox_inside_weights'] = bbox_inside_blob blobs['bbox_outside_weights'] = \ np.array(bbox_inside_blob > 0).astype(np.float32) return blobsdef _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # label = class RoI has max overlap with labels = roidb['max_classes'] overlaps = roidb['max_overlaps'] rois = roidb['boxes'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 overlaps = overlaps[keep_inds] rois = rois[keep_inds] bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( roidb['bbox_targets'][keep_inds, :], num_classes) return labels, overlaps, rois, bbox_targets, bbox_inside_weightsdef _get_image_blob(roidb, scale_inds): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) processed_ims = [] im_scales = [] for i in xrange(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)prep_im_for_blob: util的blob.py中;用于将图片平均后缩放。#im_scales: 每张图片的缩放率
# cfg.PIXEL_MEANS: 原始图片会集体减去该值达到mean im_scales.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims)#将以list形式存放的图片数据处理成(batch elem, channel, height, width)的im_blob形式,height,width用的是此次计算所有图片的最大值 return blob, im_scales#blob是一个字典,与name_to_top对应,方便把blob数据放进topdef _project_im_rois(im_rois, im_scale_factor): """Project image RoIs into the rescaled training image.""" rois = im_rois * im_scale_factor return roisdef _get_bbox_regression_labels(bbox_target_data, num_classes): """Bounding-box regression targets are stored in a compact form in the roidb. This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). The loss weights are similarly expanded. Returns: bbox_target_data (ndarray): N x 4K blob of regression targets bbox_inside_weights (ndarray): N x 4K blob of loss weights """ clss = bbox_target_data[:, 0] bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) inds = np.where(clss > 0)[0] for ind in inds: cls = clss[ind] start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS return bbox_targets, bbox_inside_weightsdef _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): """Visualize a mini-batch for debugging.""" import matplotlib.pyplot as plt for i in xrange(rois_blob.shape[0]): rois = rois_blob[i, :] im_ind = rois[0] roi = rois[1:] im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() im += cfg.PIXEL_MEANS im = im[:, :, (2, 1, 0)] im = im.astype(np.uint8) cls = labels_blob[i] plt.imshow(im) print 'class: ', cls, ' overlap: ', overlaps[i] plt.gca().add_patch( plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], roi[3] - roi[1], fill=False, edgecolor='r', linewidth=3) ) plt.show()
solver.step(1)-》reshape-》forward-》_get_next_minbatch-》_get_next_minbatch_inds-》(前面在layers里,现在进入minibatch组建真正的blob)get_minibatch
4.1 cfg.TRAIN.SCALES: 图片被缩放的target_size列表
4.2 random_scale_inds:列表的随机索引组成的numpy,大小是roidb的长度
4.3 cfg.PIXEL_MEANS: 原始图片会集体减去该值达到mean
4.4 im_scales: 每张图片的缩放率
缩放率的求法: im_scales = target_size/min(width, height);
if im_scales*max(width, height) > cfg.TRAIN.MAX_SIZE
im_scales = cfg.TRAIN.MAX_SIZE * max(width, height)
4.5 prep_im_for_blob: util的blob.py中;用于将图片平均后缩放。
4.6 im_list_to_blob(ims): 将以list形式存放的图片数据处理成(batch elem, channel, height, width)的im_blob形式,height,width用的是此次计算所有图片的最大值
4.7 blob是一个字典:data,一个batch的处理过的所有图片数据,即上面的im_blob;
im_info, [im_blob.shape[2], im_blob.shape[3], im_scales[0]]
gt_boxes, 是前四列是box的值,第五列是box的类别。
box=原box*im_scales
blob与name_to_top对应,方便把blob数据放进top
!!!minibatch.py中34行的代码表明,batchsize即cfg.TRAIN.IMS_PER_BATCH只能是1
- faster rcnn源码解读(六)之minibatch
- faster rcnn源码解读(六)之minibatch
- Faster RCNN minibatch.py解读
- faster rcnn 源码解读
- faster rcnn 源码解读
- Faster RCNN minibatch.py
- faster rcnn源码解读总结
- faster rcnn源码解读总结
- faster rcnn 源码解读1
- faster rcnn源码解读2
- faster rcnn源码解读(五)之layer(网络里的input-data)
- faster rcnn源码解读(五)之layer(网络里的input-data)
- faster rcnn源码解读(三)train_faster_rcnn_alt_opt.py
- faster rcnn源码解读(三)train_faster_rcnn_alt_opt.py
- py-faster-rcnn源码解读系列
- py-faster-rcnn源码解读系列
- 针对Faster RCNN具体细节以及源码的解读之RoIPooling层
- 针对Faster RCNN具体细节以及源码的解读之SmoothL1Loss层
- 一种莫名的图像修复算法
- 让MySql支持表情符号(MySQL中4字节utf8字符保存方法)
- 51nod 1183 编辑距离 DP
- UVA10050Hartals
- 字符串编辑距离(相似度)
- faster rcnn源码解读(六)之minibatch
- 素数距离
- jvm垃圾收集器配置-2
- 会话管理之Cookie
- SpringMVC+AJAX处理浏览器无法通过put delete方式请求问题
- Matrix Calculus
- java入门第一季_1.6_数组
- tcp之select多路复用
- xml文档声明