caffe python layer

来源：互联网发布：上海知柚网络科技公司编辑：程序博客网时间：2024/06/05 10:00

caffe的大多数层是由c++写成的，借助于c++的高效性，网络可以快速训练。但是我们有时候需要自己写点输入层以应对各种不同的数据输入，比如你因为是需要在图像中取块而不想写成LMDB，这时候可以考虑使用python直接写一个层。而且输入层不需要GPU加速，所需写起来也比较容易。

python层怎么用

先看一个网上的例子吧（来自http://chrischoy.github.io/research/caffe-python-layer/）

layer {  type: 'Python'  name: 'loss'  top: 'loss'  bottom: 'ipx'  bottom: 'ipy'  python_param {    # the module name -- usually the filename -- that needs to be in $PYTHONPATH    module: 'pyloss'    # the layer name -- the class name in the module    layer: 'EuclideanLossLayer'  }  # set loss weight so Caffe knows this is a loss layer  loss_weight: 1}

这里的type就只有Python一种，然后top，bottom和常见的层是一样的，module就是你的python module名字，一般就是文件名，然后layer就是定义的类的名字。

python层怎么写

这里就以 Fully Convolutional Networks for Semantic Segmentation 论文中公布的代码作为示例，解释python层该怎么写。

import caffeimport numpy as npfrom PIL import Imageimport randomclass VOCSegDataLayer(caffe.Layer):    """    Load (input image, label image) pairs from PASCAL VOC    one-at-a-time while reshaping the net to preserve dimensions.    Use this to feed data to a fully convolutional network.    """    def setup(self, bottom, top):        """        Setup data layer according to parameters:        - voc_dir: path to PASCAL VOC year dir        - split: train / val / test        - mean: tuple of mean values to subtract        - randomize: load in random order (default: True)        - seed: seed for randomization (default: None / current time)        for PASCAL VOC semantic segmentation.        example        params = dict(voc_dir="/path/to/PASCAL/VOC2011",            mean=(104.00698793, 116.66876762, 122.67891434),            split="val")        """        # config        params = eval(self.param_str)        self.voc_dir = params['voc_dir']        self.split = params['split']        self.mean = np.array(params['mean'])        self.random = params.get('randomize', True)        self.seed = params.get('seed', None)        # two tops: data and label        if len(top) != 2:            raise Exception("Need to define two tops: data and label.")        # data layers have no bottoms        if len(bottom) != 0:            raise Exception("Do not define a bottom.")        # load indices for images and labels        split_f  = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,                self.split)        self.indices = open(split_f, 'r').read().splitlines()        self.idx =         # make eval deterministic        if 'train' not in self.split:            self.random = False        # randomization: seed and pick        if self.random:            random.seed(self.seed)            self.idx = random.randint(0, len(self.indices)-1)    def reshape(self, bottom, top):        # load image + label image pair        self.data = self.load_image(self.indices[self.idx])        self.label = self.load_label(self.indices[self.idx])        # reshape tops to fit (leading 1 is for batch dimension)        top[].reshape(1, *self.data.shape)        top[].reshape(1, *self.label.shape)    def forward(self, bottom, top):        # assign output        top[].data[...] = self.data        top[].data[...] = self.label        # pick next input        if self.random:            self.idx = random.randint(, len(self.indices)-1)        else:            self.idx +=             if self.idx == len(self.indices):                self.idx =     def backward(self, top, propagate_down, bottom):        pass    def load_image(self, idx):        """        Load input image and preprocess for Caffe:        - cast to float        - switch channels RGB -> BGR        - subtract mean        - transpose to channel x height x width order        """        im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))        in_ = np.array(im, dtype=np.float32)        in_ = in_[:,:,::-1]        in_ -= self.mean        in_ = in_.transpose((2,0,1))        return in_    def load_label(self, idx):        """        Load label image as 1 x height x width integer array of label indices.        The leading singleton dimension is required by the loss.        """        im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))        label = np.array(im, dtype=np.uint8)        label = label[np.newaxis, ...]        return labelclass SBDDSegDataLayer(caffe.Layer):    """    Load (input image, label image) pairs from the SBDD extended labeling    of PASCAL VOC for semantic segmentation    one-at-a-time while reshaping the net to preserve dimensions.    Use this to feed data to a fully convolutional network.    """    def setup(self, bottom, top):        """        Setup data layer according to parameters:        - sbdd_dir: path to SBDD `dataset` dir        - split: train / seg11valid        - mean: tuple of mean values to subtract        - randomize: load in random order (default: True)        - seed: seed for randomization (default: None / current time)        for SBDD semantic segmentation.        N.B.segv11alid is the set of segval11 that does not intersect with SBDD.        Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.        example        params = dict(sbdd_dir="/path/to/SBDD/dataset",            mean=(104.00698793, 116.66876762, 122.67891434),            split="valid")        """        # config        params = eval(self.param_str)        self.sbdd_dir = params['sbdd_dir']        self.split = params['split']        self.mean = np.array(params['mean'])        self.random = params.get('randomize', True)        self.seed = params.get('seed', None)        # two tops: data and label        if len(top) != 2:            raise Exception("Need to define two tops: data and label.")        # data layers have no bottoms        if len(bottom) != 0:            raise Exception("Do not define a bottom.")        # load indices for images and labels        split_f  = '{}/{}.txt'.format(self.sbdd_dir,                self.split)        self.indices = open(split_f, 'r').read().splitlines()        self.idx =         # make eval deterministic        if 'train' not in self.split:            self.random = False        # randomization: seed and pick        if self.random:            random.seed(self.seed)            self.idx = random.randint(0, len(self.indices)-1)    def reshape(self, bottom, top):        # load image + label image pair        self.data = self.load_image(self.indices[self.idx])        self.label = self.load_label(self.indices[self.idx])        # reshape tops to fit (leading 1 is for batch dimension)        top[].reshape(1, *self.data.shape)        top[].reshape(1, *self.label.shape)    def forward(self, bottom, top):        # assign output        top[].data[...] = self.data        top[].data[...] = self.label        # pick next input        if self.random:            self.idx = random.randint(, len(self.indices)-1)        else:            self.idx +=             if self.idx == len(self.indices):                self.idx =     def backward(self, top, propagate_down, bottom):        pass    def load_image(self, idx):        """        Load input image and preprocess for Caffe:        - cast to float        - switch channels RGB -> BGR        - subtract mean        - transpose to channel x height x width order        """        im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))        in_ = np.array(im, dtype=np.float32)        in_ = in_[:,:,::-1]        in_ -= self.mean        in_ = in_.transpose((2,0,1))        return in_    def load_label(self, idx):        """        Load label image as 1 x height x width integer array of label indices.        The leading singleton dimension is required by the loss.        """        import scipy.io        mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))        label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)        label = label[np.newaxis, ...]        return label

每个类都是层，类的名字就是layer参数的名字。这两个都是数据输入层，由于需要一个data，一个label，所以有两个top，没有bottomo。
类直接继承的是caffe.Layer，然后必须重写setup()，reshape()，forward()，backward()函数，其他的函数可以自己定义，没有限制。
setup()是类启动时该做的事情，比如层所需数据的初始化。
reshape()就是取数据然后把它规范化为四维的矩阵。每次取数据都会调用此函数。
forward()就是网络的前向运行，这里就是把取到的数据往前传递，因为没有其他运算。
backward()就是网络的反馈，data层是没有反馈的，所以这里就直接pass。

PS

这里就把一些资料整合起来，以供参考吧。
1、caffe官网现在开始有了点pycaffe的资料，但是鉴于caffe经常更新，不知道什么时候就把它删除，所需摘录到此。
文件： pyloss.py

import caffeimport numpy as npclass EuclideanLossLayer(caffe.Layer):    """    Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer    to demonstrate the class interface for developing layers in Python.    """    def setup(self, bottom, top):        # check input pair        if len(bottom) != 2:            raise Exception("Need two inputs to compute distance.")    def reshape(self, bottom, top):        # check input dimensions match        if bottom[0].count != bottom[1].count:            raise Exception("Inputs must have the same dimension.")        # difference is shape of inputs        self.diff = np.zeros_like(bottom[].data, dtype=np.float32)        # loss output is scalar        top[].reshape(1)    def forward(self, bottom, top):        self.diff[...] = bottom[].data - bottom[1].data        top[].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.    def backward(self, top, propagate_down, bottom):        for i in range(2):            if not propagate_down[i]:                continue            if i == 0:                sign =             else:                sign = -            bottom[i].diff[...] = sign * self.diff / bottom[i].num

下面这个就是如何使用这个层了：
linreg.prototxt

name: 'LinearRegressionExample'# define a simple network for linear regression on dummy data# that computes the loss by a PythonLayer.layer {  type: 'DummyData'  name: 'x'  top: 'x'  dummy_data_param {    shape: { dim: 10 dim: 3 dim: 2 }    data_filler: { type: 'gaussian' }  }}layer {  type: 'DummyData'  name: 'y'  top: 'y'  dummy_data_param {    shape: { dim: 10 dim: 3 dim: 2 }    data_filler: { type: 'gaussian' }  }}# include InnerProduct layers for parameters# so the net will need backwardlayer {  type: 'InnerProduct'  name: 'ipx'  top: 'ipx'  bottom: 'x'  inner_product_param {    num_output: 10    weight_filler { type: 'xavier' }  }}layer {  type: 'InnerProduct'  name: 'ipy'  top: 'ipy'  bottom: 'y'  inner_product_param {    num_output: 10    weight_filler { type: 'xavier' }  }}layer {  type: 'Python'  name: 'loss'  top: 'loss'  bottom: 'ipx'  bottom: 'ipy'  python_param {    # the module name -- usually the filename -- that needs to be in $PYTHONPATH    module: 'pyloss'    # the layer name -- the class name in the module    layer: 'EuclideanLossLayer'  }  # set loss weight so Caffe knows this is a loss layer.  # since PythonLayer inherits directly from Layer, this isn't automatically  # known to Caffe  loss_weight: 1}

pascal_multilabel_datalayers.py

# importsimport jsonimport timeimport pickleimport scipy.miscimport skimage.ioimport caffeimport numpy as npimport os.path as ospfrom xml.dom import minidomfrom random import shufflefrom threading import Threadfrom PIL import Imagefrom tools import SimpleTransformerclass PascalMultilabelDataLayerSync(caffe.Layer):    """    This is a simple syncronous datalayer for training a multilabel model on    PASCAL.    """    def setup(self, bottom, top):        self.top_names = ['data', 'label']        # === Read input parameters ===        # params is a python dictionary with layer parameters.        params = eval(self.param_str)        # Check the paramameters for validity.        check_params(params)        # store input as class variables        self.batch_size = params['batch_size']        # Create a batch loader to load the images.        self.batch_loader = BatchLoader(params, None)        # === reshape tops ===        # since we use a fixed input image size, we can shape the data layer        # once. Else, we'd have to do it in the reshape call.        top[].reshape(            self.batch_size, , params['im_shape'][0], params['im_shape'][1])        # Note the 20 channels (because PASCAL has 20 classes.)        top[].reshape(self.batch_size, 20)        print_info("PascalMultilabelDataLayerSync", params)    def forward(self, bottom, top):        """        Load data.        """        for itt in range(self.batch_size):            # Use the batch loader to load the next image.            im, multilabel = self.batch_loader.load_next_image()            # Add directly to the caffe data layer            top[].data[itt, ...] = im            top[].data[itt, ...] = multilabel    def reshape(self, bottom, top):        """        There is no need to reshape the data, since the input is of fixed size        (rows and columns)        """        pass    def backward(self, top, propagate_down, bottom):        """        These layers does not back propagate        """        passclass BatchLoader(object):    """    This class abstracts away the loading of images.    Images can either be loaded singly, or in a batch. The latter is used for    the asyncronous data layer to preload batches while other processing is    performed.    """    def __init__(self, params, result):        self.result = result        self.batch_size = params['batch_size']        self.pascal_root = params['pascal_root']        self.im_shape = params['im_shape']        # get list of image indexes.        list_file = params['split'] + '.txt'        self.indexlist = [line.rstrip('\n') for line in open(            osp.join(self.pascal_root, 'ImageSets/Main', list_file))]        self._cur =   # current image        # this class does some simple data-manipulations        self.transformer = SimpleTransformer()        print "BatchLoader initialized with {} images".format(            len(self.indexlist))    def load_next_image(self):        """        Load the next image in a batch.        """        # Did we finish an epoch?        if self._cur == len(self.indexlist):            self._cur =             shuffle(self.indexlist)        # Load an image        index = self.indexlist[self._cur]  # Get the image index        image_file_name = index + '.jpg'        im = np.asarray(Image.open(            osp.join(self.pascal_root, 'JPEGImages', image_file_name)))        im = scipy.misc.imresize(im, self.im_shape)  # resize        # do a simple horizontal flip as data augmentation        flip = np.random.choice()*2-1        im = im[:, ::flip, :]        # Load and prepare ground truth        multilabel = np.zeros().astype(np.float32)        anns = load_pascal_annotation(index, self.pascal_root)        for label in anns['gt_classes']:            # in the multilabel problem we don't care how MANY instances            # there are of each class. Only if they are present.            # The "-1" is b/c we are not interested in the background            # class.            multilabel[label - ] = 1        self._cur +=         return self.transformer.preprocess(im), multilabeldef load_pascal_annotation(index, pascal_root):    """    This code is borrowed from Ross Girshick's FAST-RCNN code    (https://github.com/rbgirshick/fast-rcnn).    It parses the PASCAL .xml metadata files.    See publication for further details: (http://arxiv.org/abs/1504.08083).    Thanks Ross!    """    classes = ('__background__',  # always index 0               'aeroplane', 'bicycle', 'bird', 'boat',               'bottle', 'bus', 'car', 'cat', 'chair',                         'cow', 'diningtable', 'dog', 'horse',                         'motorbike', 'person', 'pottedplant',                         'sheep', 'sofa', 'train', 'tvmonitor')    class_to_ind = dict(zip(classes, xrange()))    filename = osp.join(pascal_root, 'Annotations', index + '.xml')    # print 'Loading: {}'.format(filename)    def get_data_from_tag(node, tag):        return node.getElementsByTagName(tag)[0].childNodes[0].data    with open(filename) as f:        data = minidom.parseString(f.read())    objs = data.getElementsByTagName('object')    num_objs = len(objs)    boxes = np.zeros((num_objs, 4), dtype=np.uint16)    gt_classes = np.zeros((num_objs), dtype=np.int32)    overlaps = np.zeros((num_objs, 21), dtype=np.float32)    # Load object bounding boxes into a data frame.    for ix, obj in enumerate(objs):        # Make pixel indexes 0-based        x1 = float(get_data_from_tag(obj, 'xmin')) - 1        y1 = float(get_data_from_tag(obj, 'ymin')) - 1        x2 = float(get_data_from_tag(obj, 'xmax')) - 1        y2 = float(get_data_from_tag(obj, 'ymax')) - 1        cls = class_to_ind[            str(get_data_from_tag(obj, "name")).lower().strip()]        boxes[ix, :] = [x1, y1, x2, y2]        gt_classes[ix] = cls        overlaps[ix, cls] = 1.0    overlaps = scipy.sparse.csr_matrix(overlaps)    return {'boxes': boxes,            'gt_classes': gt_classes,            'gt_overlaps': overlaps,            'flipped': False,            'index': index}def check_params(params):    """    A utility function to check the parameters for the data layers.    """    assert 'split' in params.keys(    ), 'Params must include split (train, val, or test).'    required = ['batch_size', 'pascal_root', 'im_shape']    for r in required:        assert r in params.keys(), 'Params must include {}'.format(r)def print_info(name, params):    """    Ouput some info regarding the class    """    print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(        name,        params['split'],        params['batch_size'],        params['im_shape'])

caffenet.py

from __future__ import print_functionfrom caffe import layers as L, params as P, to_protofrom caffe.proto import caffe_pb2# helper function for common structuresdef conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):    conv = L.Convolution(bottom, kernel_size=ks, stride=stride,                                num_output=nout, pad=pad, group=group)    return conv, L.ReLU(conv, in_place=True)def fc_relu(bottom, nout):    fc = L.InnerProduct(bottom, num_output=nout)    return fc, L.ReLU(fc, in_place=True)def max_pool(bottom, ks, stride=1):    return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)def caffenet(lmdb, batch_size=256, include_acc=False):    data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=,        transform_param=dict(crop_size=, mean_value=[104, 117, 123], mirror=True))    # the net itself    conv1, relu1 = conv_relu(data, , 96, stride=4)    pool1 = max_pool(relu1, , stride=2)    norm1 = L.LRN(pool1, local_size=, alpha=1e-4, beta=0.75)    conv2, relu2 = conv_relu(norm1, , 256, pad=2, group=2)    pool2 = max_pool(relu2, , stride=2)    norm2 = L.LRN(pool2, local_size=, alpha=1e-4, beta=0.75)    conv3, relu3 = conv_relu(norm2, , 384, pad=1)    conv4, relu4 = conv_relu(relu3, , 384, pad=1, group=2)    conv5, relu5 = conv_relu(relu4, , 256, pad=1, group=2)    pool5 = max_pool(relu5, , stride=2)    fc6, relu6 = fc_relu(pool5, )    drop6 = L.Dropout(relu6, in_place=True)    fc7, relu7 = fc_relu(drop6, )    drop7 = L.Dropout(relu7, in_place=True)    fc8 = L.InnerProduct(drop7, num_output=)    loss = L.SoftmaxWithLoss(fc8, label)    if include_acc:        acc = L.Accuracy(fc8, label)        return to_proto(loss, acc)    else:        return to_proto(loss)def make_net():    with open('train.prototxt', 'w') as f:        print(caffenet('/path/to/caffe-train-lmdb'), file=f)    with open('test.prototxt', 'w') as f:        print(caffenet('/path/to/caffe-val-lmdb', batch_size=50, include_acc=True), file=f)if __name__ == '__main__':    make_net()

tools.py

import numpy as npclass SimpleTransformer:    """    SimpleTransformer is a simple class for preprocessing and deprocessing    images for caffe.    """    def __init__(self, mean=[128, 128, 128]):        self.mean = np.array(mean, dtype=np.float32)        self.scale = 1.0    def set_mean(self, mean):        """        Set the mean to subtract for centering the data.        """        self.mean = mean    def set_scale(self, scale):        """        Set the data scaling.        """        self.scale = scale    def preprocess(self, im):        """        preprocess() emulate the pre-processing occuring in the vgg16 caffe        prototxt.        """        im = np.float32(im)        im = im[:, :, ::-1]  # change to BGR        im -= self.mean        im *= self.scale        im = im.transpose((2, 0, 1))        return im    def deprocess(self, im):        """        inverse of preprocess()        """        im = im.transpose(, 2, 0)        im /= self.scale        im += self.mean        im = im[:, :, ::-1]  # change to RGB        return np.uint8(im)class CaffeSolver:    """    Caffesolver is a class for creating a solver.prototxt file. It sets default    values and can export a solver parameter file.    Note that all parameters are stored as strings. Strings variables are    stored as strings in strings.    """    def __init__(self, testnet_prototxt_path="testnet.prototxt",                 trainnet_prototxt_path="trainnet.prototxt", debug=False):        self.sp = {}        # critical:        self.sp['base_lr'] = '0.001'        self.sp['momentum'] = '0.9'        # speed:        self.sp['test_iter'] = '100'        self.sp['test_interval'] = '250'        # looks:        self.sp['display'] = '25'        self.sp['snapshot'] = '2500'        self.sp['snapshot_prefix'] = '"snapshot"'  # string withing a string!        # learning rate policy        self.sp['lr_policy'] = '"fixed"'        # important, but rare:        self.sp['gamma'] = '0.1'        self.sp['weight_decay'] = '0.0005'        self.sp['train_net'] = '"' + trainnet_prototxt_path + '"'        self.sp['test_net'] = '"' + testnet_prototxt_path + '"'        # pretty much never change these.        self.sp['max_iter'] = '100000'        self.sp['test_initialization'] = 'false'        self.sp['average_loss'] = '25'  # this has to do with the display.        self.sp['iter_size'] = '1'  # this is for accumulating gradients        if (debug):            self.sp['max_iter'] = '12'            self.sp['test_iter'] = '1'            self.sp['test_interval'] = '4'            self.sp['display'] = '1'    def add_from_file(self, filepath):        """        Reads a caffe solver prototxt file and updates the Caffesolver        instance parameters.        """        with open(filepath, 'r') as f:            for line in f:                if line[0] == '#':                    continue                splitLine = line.split(':')                self.sp[splitLine[].strip()] = splitLine[1].strip()    def write(self, filepath):        """        Export solver parameters to INPUT "filepath". Sorted alphabetically.        """        f = open(filepath, 'w')        for key, value in sorted(self.sp.items()):            if not(type(value) is str):                raise TypeError('All solver parameters must be strings')            f.write('%s: %s\n' % (key, value))

0 0