wider face data 在 faster rcnn 上的实践记录(caffe)
来源:互联网 发布:数据录入用什么软件 编辑:程序博客网 时间:2024/06/01 11:01
按照githup上playerkk的工程进行实践,地址为:https://github.com/playerkk/face-py-faster-rcnn/blob/master/README.md
1.搭建faster rcnn
这个没什么好说的,参考rbg大神的官网即可,附上地址:https://github.com/rbgirshick/py-faster-rcnn
需要注意的是对于显卡是1080的,因为faster rcnn是基于老版本的cudnn,所以编译的时候会出现许多关于cudnn的报错问题。解决方案推荐的是用caffe中相关的文件替换掉faster rcnn中的相关文件。可参考地址:http://blog.csdn.net/u010733679/article/details/52221404。
2.克隆工程及下载预训练参数文件
git clone --recursive https://github.com/playerkk/face-py-faster-rcnn.git在工程的根目录下执行:
cd face-py-faster-rcnn
./data/scripts/fetch_faster_rcnn_models.sh
会在data下出现 faster rcnn models.tgz。
3.下载wider face data数据
网站下载地址为:http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/
下载三个数据文件到一个目录中,我选择的是ubuntu的home目录下:
如图所示进行分布。
图中的最后一个文本文件从该网址下载:https://people.cs.umass.edu/~hzjiang//files/wider_face_train_annot.txt 。按照的是FDDB的格式。
在如图所示的data目录下建立symlinks:
会在工程的data目录下出现链接,如上图所示。
4.下载预训练的Imagenet模型
在工程目录下执行上图所示命令。
接下来就是开始执行训练的过程:
在工程的根目录下执行命令:
++++++++++++++++++++++++2017.0224++++++++++++++++++++更新+++++++++++++++++++++++++++
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
更新工程后我的train_net.py文件内容为:
#!/usr/bin/env python# --------------------------------------------------------# Fast R-CNN# Copyright (c) 2015 Microsoft# Licensed under The MIT License [see LICENSE for details]# Written by Ross Girshick# --------------------------------------------------------"""Train a Fast R-CNN network on a region of interest database."""# import matplotlib # matplotlib.use('Agg') import _init_pathsfrom fast_rcnn.train import get_training_roidb, train_netfrom fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dirfrom datasets.factory import get_imdbimport datasets.imdbimport caffeimport argparseimport pprintimport numpy as npimport sysdef parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=0, type=int) parser.add_argument('--solver', dest='solver', help='solver prototxt', default=None, type=str) parser.add_argument('--iters', dest='max_iters', help='number of iterations to train', default=40000, type=int) parser.add_argument('--weights', dest='pretrained_model', help='initialize with pretrained model weights', default=None, type=str) parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) parser.add_argument('--imdb', dest='imdb_name', help='dataset to train on', default='voc_2007_trainval', type=str) parser.add_argument('--rand', dest='randomize', help='randomize (do not use a fixed seed)', action='store_true') parser.add_argument('--set', dest='set_cfgs', help='set config keys', default=None, nargs=argparse.REMAINDER) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return argsdef combined_roidb(imdb_names): def get_roidb(imdb_name): imdb = get_imdb(imdb_name) print 'Loaded dataset `{:s}` for training'.format(imdb.name) imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) roidb = get_training_roidb(imdb) return roidb roidbs = [get_roidb(s) for s in imdb_names.split('+')] roidb = roidbs[0] if len(roidbs) > 1: for r in roidbs[1:]: roidb.extend(r) imdb = datasets.imdb.imdb(imdb_names) else: imdb = get_imdb(imdb_names) return imdb, roidbif __name__ == '__main__': args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.GPU_ID = args.gpu_id print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe caffe.set_mode_gpu() caffe.set_device(args.gpu_id) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model, max_iters=args.max_iters)face.py:
# --------------------------------------------------------# Fast R-CNN# Copyright (c) 2015 Microsoft# Licensed under The MIT License [see LICENSE for details]# Written by Ross Girshick# --------------------------------------------------------# import datasets.face# import os# import datasets.imdb as imdb# import xml.dom.minidom as minidom# import numpy as np# import scipy.sparse# import scipy.io as sio# import utils.cython_bbox# import cPickle# import subprocessimport osfrom datasets.imdb import imdbimport datasets.ds_utils as ds_utilsimport xml.etree.ElementTree as ETimport numpy as npimport scipy.sparseimport scipy.io as sioimport utils.cython_bboximport cPickleimport subprocessimport uuidfrom voc_eval import voc_evalfrom fast_rcnn.config import cfgimport cv2import PILclass face(imdb): def __init__(self, image_set, split, devkit_path): imdb.__init__(self, 'wider') self._image_set = image_set # {'train', 'test'} self._split = split # {1, 2, ..., 10} self._devkit_path = devkit_path # /data2/hzjiang/Data/CS2 # self._data_path = os.path.join(self._devkit_path, 'data') self._data_path = self._devkit_path; self._classes = ('__background__', # always index 0 'face') self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._image_ext = ['.png'] self._image_index, self._gt_roidb = self._load_image_set_index() # Default to roidb handler self._roidb_handler = self.selective_search_roidb # Specific config options self.config = {'cleanup' : True, 'use_salt' : True, 'top_k' : 2000} assert os.path.exists(self._devkit_path), \ 'Devkit path does not exist: {}'.format(self._devkit_path) assert os.path.exists(self._data_path), \ 'Path does not exist: {}'.format(self._data_path) def image_path_at(self, i): """ Return the absolute path to image i in the image sequence. """ return self.image_path_from_index(self._image_index[i]) def image_path_from_index(self, index): """ Construct an image path from the image's "index" identifier. """ for ext in self._image_ext: image_path = os.path.join(self._data_path, index) if os.path.exists(image_path): break assert os.path.exists(image_path), \ 'Path does not exist: {}'.format(image_path) return image_path def _load_image_set_index(self): """ Load the indexes listed in this dataset's image set file. """ # # Example path to image set file: # # self._data_path + /ImageSets/val.txt # # read from file # image_set_file = 'split%d/%s_%d_annot.txt' % (self._fold, self._image_set, self._fold) # # image_set_file = os.path.join(self._devkit_path, image_set_file) # image_set_file = os.path.join('/home/hzjiang/Code/py-faster-rcnn/CS3-splits', image_set_file) image_set_file = self._name + '_face_' + self._image_set + '_annot.txt' image_set_file = os.path.join(self._devkit_path, image_set_file) # image_set_file = 'cs3_rand_train_annot.txt' # image_set_file = 'wider_dets_annot_from_cs3_model.txt' # image_set_file = 'wider_manual_annot.txt' assert os.path.exists(image_set_file), \ 'Path does not exist: {}'.format(image_set_file) image_index = [] gt_roidb = [] with open(image_set_file) as f: # print len(f.lines()) lines = f.readlines() idx = 0 while idx < len(lines): image_name = lines[idx].split('\n')[0] image_name = os.path.join('WIDER_%s/images' % self._image_set, image_name) # print image_name image_ext = os.path.splitext(image_name)[1].lower() # print image_ext assert(image_ext == '.png' or image_ext == '.jpg' or image_ext == '.jpeg') image = PIL.Image.open(os.path.join(self._data_path, image_name)) imw = image.size[0] imh = image.size[1] idx += 1 num_boxes = int(lines[idx]) # print num_boxes boxes = np.zeros((num_boxes, 4), dtype=np.uint16) gt_classes = np.zeros((num_boxes), dtype=np.int32) overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) for i in xrange(num_boxes): idx += 1 coor = map(float, lines[idx].split()) x1 = min(max(coor[0], 0), imw - 1) y1 = min(max(coor[1], 0), imh - 1) x2 = min(max(x1 + coor[2] - 1, 0), imw - 1) y2 = min(max(y1 + coor[3] - 1, 0), imh - 1) if np.isnan(x1): x1 = -1 if np.isnan(y1): y1 = -1 if np.isnan(x2): x2 = -1 if np.isnan(y2): y2 = -1 cls = self._class_to_ind['face'] boxes[i, :] = [x1, y1, x2, y2] gt_classes[i] = cls overlaps[i, cls] = 1.0 widths = boxes[:, 2] - boxes[:, 0] + 1 heights = boxes[:, 3] - boxes[:, 1] + 1 keep_idx = np.where(np.bitwise_and(widths > 5, heights > 5)) if len(keep_idx[0]) <= 0: idx += 1 continue boxes = boxes[keep_idx] gt_classes = gt_classes[keep_idx[0]] overlaps = overlaps[keep_idx[0], :] if not (boxes[:, 2] >= boxes[:, 0]).all(): print boxes print image_name # print boxes assert (boxes[:, 2] >= boxes[:, 0]).all() assert (boxes[:, 3] >= boxes[:, 1]).all() overlaps = scipy.sparse.csr_matrix(overlaps) gt_roidb.append({'boxes' : boxes, 'gt_classes': gt_classes, 'gt_overlaps' : overlaps, 'flipped' : False, 'image_name': image_name}) image_index.append(image_name) idx += 1 assert(idx == len(lines)) return image_index, gt_roidb def gt_roidb(self): """ Return the database of ground-truth regions of interest. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} gt roidb loaded from {}'.format(self.name, cache_file) return roidb with open(cache_file, 'wb') as fid: cPickle.dump(self._gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote gt roidb to {}'.format(cache_file) return self._gt_roidb def selective_search_roidb(self): """ Return the database of selective search regions of interest. Ground-truth ROIs are also included. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, self.name + '_selective_search_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb if self._image_set != 'test': gt_roidb = self.gt_roidb() ss_roidb = self._load_selective_search_roidb(gt_roidb) roidb = datasets.imdb.merge_roidbs(gt_roidb, ss_roidb) else: roidb = self._load_selective_search_roidb(None) print len(roidb) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb def _load_selective_search_roidb(self, gt_roidb): filename = os.path.abspath(os.path.join(self._devkit_path, self.name + '.mat')) assert os.path.exists(filename), \ 'Selective search data not found at: {}'.format(filename) raw_data = sio.loadmat(filename)['all_boxes'].ravel() box_list = [] for i in xrange(raw_data.shape[0]): boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 assert (boxes[:, 2] >= boxes[:, 0]).all() box_list.append(boxes) return self.create_roidb_from_box_list(box_list, gt_roidb) def selective_search_IJCV_roidb(self): """ Return the database of selective search regions of interest. Ground-truth ROIs are also included. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, '{:s}_selective_search_IJCV_top_{:d}_roidb.pkl'. format(self.name, self.config['top_k'])) if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = self.gt_roidb() ss_roidb = self._load_selective_search_IJCV_roidb(gt_roidb) roidb = datasets.imdb.merge_roidbs(gt_roidb, ss_roidb) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb def _load_selective_search_IJCV_roidb(self, gt_roidb): IJCV_path = os.path.abspath(os.path.join(self.cache_path, '..', 'selective_search_IJCV_data', self.name)) assert os.path.exists(IJCV_path), \ 'Selective search IJCV data not found at: {}'.format(IJCV_path) top_k = self.config['top_k'] box_list = [] for i in xrange(self.num_images): filename = os.path.join(IJCV_path, self.image_index[i] + '.mat') raw_data = sio.loadmat(filename) box_list.append((raw_data['boxes'][:top_k, :]-1).astype(np.uint16)) return self.create_roidb_from_box_list(box_list, gt_roidb) def _load_face_annotation(self, index): """ Load image and bounding boxes info from txt files of face. """ filename = os.path.join(self._data_path, 'Annotations', index + '.mat') data = sio.loadmat(filename) num_objs = data['gt'].shape[0] boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) # Load object bounding boxes into a data frame. for ix in xrange(num_objs): # Make pixel indexes 0-based coor = data['gt'][ix, :] x1 = float(coor[0]) - 1 y1 = float(coor[1]) - 1 x2 = float(coor[2]) - 1 y2 = float(coor[3]) - 1 cls = self._class_to_ind['face'] boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) if not (boxes[:, 2] >= boxes[:, 0]).all(): print boxes print filename assert (boxes[:, 2] >= boxes[:, 0]).all() return {'boxes' : boxes, 'gt_classes': gt_classes, 'gt_overlaps' : overlaps, 'flipped' : False} def _write_inria_results_file(self, all_boxes): use_salt = self.config['use_salt'] comp_id = 'comp4' if use_salt: comp_id += '-{}'.format(os.getpid()) # VOCdevkit/results/comp4-44503_det_test_aeroplane.txt path = os.path.join(self._devkit_path, 'results', self.name, comp_id + '_') for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue print 'Writing {} results file'.format(cls) filename = path + 'det_' + self._image_set + '_' + cls + '.txt' with open(filename, 'wt') as f: for im_ind, index in enumerate(self.image_index): dets = all_boxes[cls_ind][im_ind] if dets == []: continue # the VOCdevkit expects 1-based indices for k in xrange(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1)) return comp_id def _do_matlab_eval(self, comp_id, output_dir='output'): rm_results = self.config['cleanup'] path = os.path.join(os.path.dirname(__file__), 'VOCdevkit-matlab-wrapper') cmd = 'cd {} && '.format(path) cmd += '{:s} -nodisplay -nodesktop '.format(datasets.MATLAB) cmd += '-r "dbstop if error; ' cmd += 'setenv(\'LC_ALL\',\'C\'); voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\',{:d}); quit;"' \ .format(self._devkit_path, comp_id, self._image_set, output_dir, int(rm_results)) print('Running:\n{}'.format(cmd)) status = subprocess.call(cmd, shell=True) def evaluate_detections(self, all_boxes, output_dir): comp_id = self._write_inria_results_file(all_boxes) self._do_matlab_eval(comp_id, output_dir) def competition_mode(self, on): if on: self.config['use_salt'] = False self.config['cleanup'] = False else: self.config['use_salt'] = True self.config['cleanup'] = Trueif __name__ == '__main__': d = datasets.inria('train', '') res = d.roidb from IPython import embed; embed()
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
之后在命令行中输入:voole@zhx:~/face-py-faster-rcnn$ ./experiments/scripts/faster_rcnn_end2end.sh 0 VGG16 wider
输出:
问题已经得到解决了:原因是因为我安装的easydict的版本是1.4,即使我配置了faster_rcnn_end2end.yml,但是easydict并没有将参数传给train_net.py,所以才会出现上述问题,解决方案就是升级你的easydict。
- wider face data 在 faster rcnn 上的实践记录(caffe)
- wider face data 在 faster rcnn 上的实践记录(caffe)
- faster-rcnn的记录
- 仿照VOC2007制作自己的数据集,并在Caffe上训练Faster-RCNN
- 在Faster RCNN上运行自己的数据集
- faster rcnn在自己的数据集上训练
- faster rcnn+gtx1080+caffe
- faster rcnn caffe安装
- caffe+py-faster-rcnn
- Face Faster RCNN安装步骤和遇到的问题
- py-faster-rcnn:在windows上配置
- Ubuntu上编译Caffe和拓展应用(faster-rcnn, pvanet)的错误及解决方案
- ubuntu14.04在cpu上配置caffe以及py-faster-rcnn环境(可在odroid上实现)
- Ubuntu16.04+caffe的安装和Py-faster-rcnn在CPU电脑的安装-2
- Faster Rcnn 源码记录
- Caffe-faster-rcnn demo测试
- Face Detection with the Faster R-CNN (基于Faster RCNN的人脸检测)
- matlab faster-rcnn 的caffe的external(gpu/cpu)
- eclipse中中文注释乱码解决方法
- sql-基础(一)
- IDEA使用小心得
- [IO系统]13 通用块层-向通用块层提交IO
- STM32 ADC结合DMA数据采样与软件滤波处理
- wider face data 在 faster rcnn 上的实践记录(caffe)
- 股票实时数据接口提取地址
- LTS 轻量级分布式任务调度框架(Light Task Schedule)
- android手机获取短信、联系人,多媒体播放器
- 表情输入框的插件使用
- JavaScript数组整理(二)
- 浮点型数据在内存中存储的表示
- 面包屑导航----递归生成
- 删除xcode git 仓库