caffe python layer
来源:互联网 发布:上海知柚网络科技公司 编辑:程序博客网 时间:2024/06/05 10:00
caffe的大多数层是由c++写成的,借助于c++的高效性,网络可以快速训练。但是我们有时候需要自己写点输入层以应对各种不同的数据输入,比如你因为是需要在图像中取块而不想写成LMDB,这时候可以考虑使用python直接写一个层。而且输入层不需要GPU加速,所需写起来也比较容易。
python层怎么用
先看一个网上的例子吧(来自http://chrischoy.github.io/research/caffe-python-layer/)
layer { type: 'Python' name: 'loss' top: 'loss' bottom: 'ipx' bottom: 'ipy' python_param { # the module name -- usually the filename -- that needs to be in $PYTHONPATH module: 'pyloss' # the layer name -- the class name in the module layer: 'EuclideanLossLayer' } # set loss weight so Caffe knows this is a loss layer loss_weight: 1}
这里的type就只有Python一种,然后top,bottom和常见的层是一样的,module就是你的python module名字,一般就是文件名,然后layer就是定义的类的名字。
python层怎么写
这里就以 Fully Convolutional Networks for Semantic Segmentation 论文中公布的代码作为示例,解释python层该怎么写。
import caffeimport numpy as npfrom PIL import Imageimport randomclass VOCSegDataLayer(caffe.Layer): """ Load (input image, label image) pairs from PASCAL VOC one-at-a-time while reshaping the net to preserve dimensions. Use this to feed data to a fully convolutional network. """ def setup(self, bottom, top): """ Setup data layer according to parameters: - voc_dir: path to PASCAL VOC year dir - split: train / val / test - mean: tuple of mean values to subtract - randomize: load in random order (default: True) - seed: seed for randomization (default: None / current time) for PASCAL VOC semantic segmentation. example params = dict(voc_dir="/path/to/PASCAL/VOC2011", mean=(104.00698793, 116.66876762, 122.67891434), split="val") """ # config params = eval(self.param_str) self.voc_dir = params['voc_dir'] self.split = params['split'] self.mean = np.array(params['mean']) self.random = params.get('randomize', True) self.seed = params.get('seed', None) # two tops: data and label if len(top) != 2: raise Exception("Need to define two tops: data and label.") # data layers have no bottoms if len(bottom) != 0: raise Exception("Do not define a bottom.") # load indices for images and labels split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir, self.split) self.indices = open(split_f, 'r').read().splitlines() self.idx = # make eval deterministic if 'train' not in self.split: self.random = False # randomization: seed and pick if self.random: random.seed(self.seed) self.idx = random.randint(0, len(self.indices)-1) def reshape(self, bottom, top): # load image + label image pair self.data = self.load_image(self.indices[self.idx]) self.label = self.load_label(self.indices[self.idx]) # reshape tops to fit (leading 1 is for batch dimension) top[].reshape(1, *self.data.shape) top[].reshape(1, *self.label.shape) def forward(self, bottom, top): # assign output top[].data[...] = self.data top[].data[...] = self.label # pick next input if self.random: self.idx = random.randint(, len(self.indices)-1) else: self.idx += if self.idx == len(self.indices): self.idx = def backward(self, top, propagate_down, bottom): pass def load_image(self, idx): """ Load input image and preprocess for Caffe: - cast to float - switch channels RGB -> BGR - subtract mean - transpose to channel x height x width order """ im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx)) in_ = np.array(im, dtype=np.float32) in_ = in_[:,:,::-1] in_ -= self.mean in_ = in_.transpose((2,0,1)) return in_ def load_label(self, idx): """ Load label image as 1 x height x width integer array of label indices. The leading singleton dimension is required by the loss. """ im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx)) label = np.array(im, dtype=np.uint8) label = label[np.newaxis, ...] return labelclass SBDDSegDataLayer(caffe.Layer): """ Load (input image, label image) pairs from the SBDD extended labeling of PASCAL VOC for semantic segmentation one-at-a-time while reshaping the net to preserve dimensions. Use this to feed data to a fully convolutional network. """ def setup(self, bottom, top): """ Setup data layer according to parameters: - sbdd_dir: path to SBDD `dataset` dir - split: train / seg11valid - mean: tuple of mean values to subtract - randomize: load in random order (default: True) - seed: seed for randomization (default: None / current time) for SBDD semantic segmentation. N.B.segv11alid is the set of segval11 that does not intersect with SBDD. Find it here: https://gist.github.com/shelhamer/edb330760338892d511e. example params = dict(sbdd_dir="/path/to/SBDD/dataset", mean=(104.00698793, 116.66876762, 122.67891434), split="valid") """ # config params = eval(self.param_str) self.sbdd_dir = params['sbdd_dir'] self.split = params['split'] self.mean = np.array(params['mean']) self.random = params.get('randomize', True) self.seed = params.get('seed', None) # two tops: data and label if len(top) != 2: raise Exception("Need to define two tops: data and label.") # data layers have no bottoms if len(bottom) != 0: raise Exception("Do not define a bottom.") # load indices for images and labels split_f = '{}/{}.txt'.format(self.sbdd_dir, self.split) self.indices = open(split_f, 'r').read().splitlines() self.idx = # make eval deterministic if 'train' not in self.split: self.random = False # randomization: seed and pick if self.random: random.seed(self.seed) self.idx = random.randint(0, len(self.indices)-1) def reshape(self, bottom, top): # load image + label image pair self.data = self.load_image(self.indices[self.idx]) self.label = self.load_label(self.indices[self.idx]) # reshape tops to fit (leading 1 is for batch dimension) top[].reshape(1, *self.data.shape) top[].reshape(1, *self.label.shape) def forward(self, bottom, top): # assign output top[].data[...] = self.data top[].data[...] = self.label # pick next input if self.random: self.idx = random.randint(, len(self.indices)-1) else: self.idx += if self.idx == len(self.indices): self.idx = def backward(self, top, propagate_down, bottom): pass def load_image(self, idx): """ Load input image and preprocess for Caffe: - cast to float - switch channels RGB -> BGR - subtract mean - transpose to channel x height x width order """ im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx)) in_ = np.array(im, dtype=np.float32) in_ = in_[:,:,::-1] in_ -= self.mean in_ = in_.transpose((2,0,1)) return in_ def load_label(self, idx): """ Load label image as 1 x height x width integer array of label indices. The leading singleton dimension is required by the loss. """ import scipy.io mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx)) label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8) label = label[np.newaxis, ...] return label
每个类都是层,类的名字就是layer参数的名字。这两个都是数据输入层,由于需要一个data,一个label,所以有两个top,没有bottomo。
类直接继承的是caffe.Layer,然后必须重写setup(),reshape(),forward(),backward()函数,其他的函数可以自己定义,没有限制。
setup()是类启动时该做的事情,比如层所需数据的初始化。
reshape()就是取数据然后把它规范化为四维的矩阵。每次取数据都会调用此函数。
forward()就是网络的前向运行,这里就是把取到的数据往前传递,因为没有其他运算。
backward()就是网络的反馈,data层是没有反馈的,所以这里就直接pass。
PS
这里就把一些资料整合起来,以供参考吧。
1、caffe官网现在开始有了点pycaffe的资料,但是鉴于caffe经常更新,不知道什么时候就把它删除,所需摘录到此。
文件: pyloss.py
import caffeimport numpy as npclass EuclideanLossLayer(caffe.Layer): """ Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer to demonstrate the class interface for developing layers in Python. """ def setup(self, bottom, top): # check input pair if len(bottom) != 2: raise Exception("Need two inputs to compute distance.") def reshape(self, bottom, top): # check input dimensions match if bottom[0].count != bottom[1].count: raise Exception("Inputs must have the same dimension.") # difference is shape of inputs self.diff = np.zeros_like(bottom[].data, dtype=np.float32) # loss output is scalar top[].reshape(1) def forward(self, bottom, top): self.diff[...] = bottom[].data - bottom[1].data top[].data[...] = np.sum(self.diff**2) / bottom[0].num / 2. def backward(self, top, propagate_down, bottom): for i in range(2): if not propagate_down[i]: continue if i == 0: sign = else: sign = - bottom[i].diff[...] = sign * self.diff / bottom[i].num
下面这个就是如何使用这个层了:
linreg.prototxt
name: 'LinearRegressionExample'# define a simple network for linear regression on dummy data# that computes the loss by a PythonLayer.layer { type: 'DummyData' name: 'x' top: 'x' dummy_data_param { shape: { dim: 10 dim: 3 dim: 2 } data_filler: { type: 'gaussian' } }}layer { type: 'DummyData' name: 'y' top: 'y' dummy_data_param { shape: { dim: 10 dim: 3 dim: 2 } data_filler: { type: 'gaussian' } }}# include InnerProduct layers for parameters# so the net will need backwardlayer { type: 'InnerProduct' name: 'ipx' top: 'ipx' bottom: 'x' inner_product_param { num_output: 10 weight_filler { type: 'xavier' } }}layer { type: 'InnerProduct' name: 'ipy' top: 'ipy' bottom: 'y' inner_product_param { num_output: 10 weight_filler { type: 'xavier' } }}layer { type: 'Python' name: 'loss' top: 'loss' bottom: 'ipx' bottom: 'ipy' python_param { # the module name -- usually the filename -- that needs to be in $PYTHONPATH module: 'pyloss' # the layer name -- the class name in the module layer: 'EuclideanLossLayer' } # set loss weight so Caffe knows this is a loss layer. # since PythonLayer inherits directly from Layer, this isn't automatically # known to Caffe loss_weight: 1}
pascal_multilabel_datalayers.py
# importsimport jsonimport timeimport pickleimport scipy.miscimport skimage.ioimport caffeimport numpy as npimport os.path as ospfrom xml.dom import minidomfrom random import shufflefrom threading import Threadfrom PIL import Imagefrom tools import SimpleTransformerclass PascalMultilabelDataLayerSync(caffe.Layer): """ This is a simple syncronous datalayer for training a multilabel model on PASCAL. """ def setup(self, bottom, top): self.top_names = ['data', 'label'] # === Read input parameters === # params is a python dictionary with layer parameters. params = eval(self.param_str) # Check the paramameters for validity. check_params(params) # store input as class variables self.batch_size = params['batch_size'] # Create a batch loader to load the images. self.batch_loader = BatchLoader(params, None) # === reshape tops === # since we use a fixed input image size, we can shape the data layer # once. Else, we'd have to do it in the reshape call. top[].reshape( self.batch_size, , params['im_shape'][0], params['im_shape'][1]) # Note the 20 channels (because PASCAL has 20 classes.) top[].reshape(self.batch_size, 20) print_info("PascalMultilabelDataLayerSync", params) def forward(self, bottom, top): """ Load data. """ for itt in range(self.batch_size): # Use the batch loader to load the next image. im, multilabel = self.batch_loader.load_next_image() # Add directly to the caffe data layer top[].data[itt, ...] = im top[].data[itt, ...] = multilabel def reshape(self, bottom, top): """ There is no need to reshape the data, since the input is of fixed size (rows and columns) """ pass def backward(self, top, propagate_down, bottom): """ These layers does not back propagate """ passclass BatchLoader(object): """ This class abstracts away the loading of images. Images can either be loaded singly, or in a batch. The latter is used for the asyncronous data layer to preload batches while other processing is performed. """ def __init__(self, params, result): self.result = result self.batch_size = params['batch_size'] self.pascal_root = params['pascal_root'] self.im_shape = params['im_shape'] # get list of image indexes. list_file = params['split'] + '.txt' self.indexlist = [line.rstrip('\n') for line in open( osp.join(self.pascal_root, 'ImageSets/Main', list_file))] self._cur = # current image # this class does some simple data-manipulations self.transformer = SimpleTransformer() print "BatchLoader initialized with {} images".format( len(self.indexlist)) def load_next_image(self): """ Load the next image in a batch. """ # Did we finish an epoch? if self._cur == len(self.indexlist): self._cur = shuffle(self.indexlist) # Load an image index = self.indexlist[self._cur] # Get the image index image_file_name = index + '.jpg' im = np.asarray(Image.open( osp.join(self.pascal_root, 'JPEGImages', image_file_name))) im = scipy.misc.imresize(im, self.im_shape) # resize # do a simple horizontal flip as data augmentation flip = np.random.choice()*2-1 im = im[:, ::flip, :] # Load and prepare ground truth multilabel = np.zeros().astype(np.float32) anns = load_pascal_annotation(index, self.pascal_root) for label in anns['gt_classes']: # in the multilabel problem we don't care how MANY instances # there are of each class. Only if they are present. # The "-1" is b/c we are not interested in the background # class. multilabel[label - ] = 1 self._cur += return self.transformer.preprocess(im), multilabeldef load_pascal_annotation(index, pascal_root): """ This code is borrowed from Ross Girshick's FAST-RCNN code (https://github.com/rbgirshick/fast-rcnn). It parses the PASCAL .xml metadata files. See publication for further details: (http://arxiv.org/abs/1504.08083). Thanks Ross! """ classes = ('__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') class_to_ind = dict(zip(classes, xrange())) filename = osp.join(pascal_root, 'Annotations', index + '.xml') # print 'Loading: {}'.format(filename) def get_data_from_tag(node, tag): return node.getElementsByTagName(tag)[0].childNodes[0].data with open(filename) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = np.zeros((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, 21), dtype=np.float32) # Load object bounding boxes into a data frame. for ix, obj in enumerate(objs): # Make pixel indexes 0-based x1 = float(get_data_from_tag(obj, 'xmin')) - 1 y1 = float(get_data_from_tag(obj, 'ymin')) - 1 x2 = float(get_data_from_tag(obj, 'xmax')) - 1 y2 = float(get_data_from_tag(obj, 'ymax')) - 1 cls = class_to_ind[ str(get_data_from_tag(obj, "name")).lower().strip()] boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls overlaps[ix, cls] = 1.0 overlaps = scipy.sparse.csr_matrix(overlaps) return {'boxes': boxes, 'gt_classes': gt_classes, 'gt_overlaps': overlaps, 'flipped': False, 'index': index}def check_params(params): """ A utility function to check the parameters for the data layers. """ assert 'split' in params.keys( ), 'Params must include split (train, val, or test).' required = ['batch_size', 'pascal_root', 'im_shape'] for r in required: assert r in params.keys(), 'Params must include {}'.format(r)def print_info(name, params): """ Ouput some info regarding the class """ print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format( name, params['split'], params['batch_size'], params['im_shape'])
caffenet.py
from __future__ import print_functionfrom caffe import layers as L, params as P, to_protofrom caffe.proto import caffe_pb2# helper function for common structuresdef conv_relu(bottom, ks, nout, stride=1, pad=0, group=1): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, group=group) return conv, L.ReLU(conv, in_place=True)def fc_relu(bottom, nout): fc = L.InnerProduct(bottom, num_output=nout) return fc, L.ReLU(fc, in_place=True)def max_pool(bottom, ks, stride=1): return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)def caffenet(lmdb, batch_size=256, include_acc=False): data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=, transform_param=dict(crop_size=, mean_value=[104, 117, 123], mirror=True)) # the net itself conv1, relu1 = conv_relu(data, , 96, stride=4) pool1 = max_pool(relu1, , stride=2) norm1 = L.LRN(pool1, local_size=, alpha=1e-4, beta=0.75) conv2, relu2 = conv_relu(norm1, , 256, pad=2, group=2) pool2 = max_pool(relu2, , stride=2) norm2 = L.LRN(pool2, local_size=, alpha=1e-4, beta=0.75) conv3, relu3 = conv_relu(norm2, , 384, pad=1) conv4, relu4 = conv_relu(relu3, , 384, pad=1, group=2) conv5, relu5 = conv_relu(relu4, , 256, pad=1, group=2) pool5 = max_pool(relu5, , stride=2) fc6, relu6 = fc_relu(pool5, ) drop6 = L.Dropout(relu6, in_place=True) fc7, relu7 = fc_relu(drop6, ) drop7 = L.Dropout(relu7, in_place=True) fc8 = L.InnerProduct(drop7, num_output=) loss = L.SoftmaxWithLoss(fc8, label) if include_acc: acc = L.Accuracy(fc8, label) return to_proto(loss, acc) else: return to_proto(loss)def make_net(): with open('train.prototxt', 'w') as f: print(caffenet('/path/to/caffe-train-lmdb'), file=f) with open('test.prototxt', 'w') as f: print(caffenet('/path/to/caffe-val-lmdb', batch_size=50, include_acc=True), file=f)if __name__ == '__main__': make_net()
tools.py
import numpy as npclass SimpleTransformer: """ SimpleTransformer is a simple class for preprocessing and deprocessing images for caffe. """ def __init__(self, mean=[128, 128, 128]): self.mean = np.array(mean, dtype=np.float32) self.scale = 1.0 def set_mean(self, mean): """ Set the mean to subtract for centering the data. """ self.mean = mean def set_scale(self, scale): """ Set the data scaling. """ self.scale = scale def preprocess(self, im): """ preprocess() emulate the pre-processing occuring in the vgg16 caffe prototxt. """ im = np.float32(im) im = im[:, :, ::-1] # change to BGR im -= self.mean im *= self.scale im = im.transpose((2, 0, 1)) return im def deprocess(self, im): """ inverse of preprocess() """ im = im.transpose(, 2, 0) im /= self.scale im += self.mean im = im[:, :, ::-1] # change to RGB return np.uint8(im)class CaffeSolver: """ Caffesolver is a class for creating a solver.prototxt file. It sets default values and can export a solver parameter file. Note that all parameters are stored as strings. Strings variables are stored as strings in strings. """ def __init__(self, testnet_prototxt_path="testnet.prototxt", trainnet_prototxt_path="trainnet.prototxt", debug=False): self.sp = {} # critical: self.sp['base_lr'] = '0.001' self.sp['momentum'] = '0.9' # speed: self.sp['test_iter'] = '100' self.sp['test_interval'] = '250' # looks: self.sp['display'] = '25' self.sp['snapshot'] = '2500' self.sp['snapshot_prefix'] = '"snapshot"' # string withing a string! # learning rate policy self.sp['lr_policy'] = '"fixed"' # important, but rare: self.sp['gamma'] = '0.1' self.sp['weight_decay'] = '0.0005' self.sp['train_net'] = '"' + trainnet_prototxt_path + '"' self.sp['test_net'] = '"' + testnet_prototxt_path + '"' # pretty much never change these. self.sp['max_iter'] = '100000' self.sp['test_initialization'] = 'false' self.sp['average_loss'] = '25' # this has to do with the display. self.sp['iter_size'] = '1' # this is for accumulating gradients if (debug): self.sp['max_iter'] = '12' self.sp['test_iter'] = '1' self.sp['test_interval'] = '4' self.sp['display'] = '1' def add_from_file(self, filepath): """ Reads a caffe solver prototxt file and updates the Caffesolver instance parameters. """ with open(filepath, 'r') as f: for line in f: if line[0] == '#': continue splitLine = line.split(':') self.sp[splitLine[].strip()] = splitLine[1].strip() def write(self, filepath): """ Export solver parameters to INPUT "filepath". Sorted alphabetically. """ f = open(filepath, 'w') for key, value in sorted(self.sp.items()): if not(type(value) is str): raise TypeError('All solver parameters must be strings') f.write('%s: %s\n' % (key, value))
- caffe python layer
- caffe-python layer
- caffe python layer
- caffe python layer
- caffe的python layer
- caffe python layer
- caffe python layer
- Caffe python layer 的自定义
- Caffe python layer 的自定义
- caffe python layer初次使用记录
- 在Caffe中使用Python Layer
- Caffe python layer方法执行时机
- caffe layer
- caffe layer
- Caffe Convolutional Layer 记录
- Caffe: Softmax_Loss layer
- Caffe导读之layer
- caffe layer层详解
- BZOJ 4144 Dijkstra+Kruskal+倍增LCA
- 371.Print Numbers by Recursion-用递归打印数字(中等题)
- 20161108学习心得
- 学习制作GUI界面
- ajax请求状态:readyState的状态值,status状态值
- caffe python layer
- win7 cmd 里 找不到 没有 ping 命令 的 解决方法
- Android屏蔽屏保
- 记第一次跳槽
- HTML 颜色对应的值表
- form表单提交页面不跳转
- Android中调节屏幕亮度
- HBASE性能优化之最佳内存实践
- github使用教程详解