这里的代码位于 $caffe-root/examples 下,文件名称为00-classification.ipynb,可以在自己的电脑下用jupyter跑一下,加深记忆。


# set up Python environment: numpy for numerical routines, and matplotlib for plottingimport numpy as npimport matplotlib.pyplot as plt# display plots in this notebook%matplotlib inline  #notebook 使用 matplotlib# set display defaultsplt.rcParams['figure.figsize'] = (10, 10)        # large imagesplt.rcParams['image.interpolation'] = 'nearest'  # 插值方式plt.rcParams['image.cmap'] = 'gray'  # 灰度输出


# The caffe module needs to be on the Python path;#  we'll add it here explicitly.import syscaffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)sys.path.insert(0, caffe_root + 'python')import caffe# If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.

如果import caffe 出现错误,则参考:重新编译
因为此时文件在 $caffe_root/examples下,所以要把此文件的环境放在$caffe_root/python下才可以。


import osif os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):    print 'CaffeNet found.'else:    print 'Downloading pre-trained CaffeNet model...'    !../scripts/ ../models/bvlc_reference_caffenet


caffe.set_mode_cpu()model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'net = caffe.Net(model_def,      # defines the structure of the model                model_weights,  # contains the trained weights                caffe.TEST)     # use test mode (e.g., don't perform dropout)



layer {  name: "data"  type: "Data"  top: "data"  top: "label"  include {    phase: TRAIN  }  transform_param {    mirror: true    crop_size: 227    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"  }# mean pixel / channel-wise mean instead of mean image#  transform_param {#    crop_size: 227#    mean_value: 104#    mean_value: 117#    mean_value: 123#    mirror: true#  }  data_param {    source: "examples/imagenet/ilsvrc12_train_lmdb"    batch_size: 256    backend: LMDB  }}layer {  name: "data"  type: "Data"  top: "data"  top: "label"  include {    phase: TEST  }  transform_param {    mirror: false    crop_size: 227    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"  }# mean pixel / channel-wise mean instead of mean image#  transform_param {#    crop_size: 227#    mean_value: 104#    mean_value: 117#    mean_value: 123#    mirror: false#  }  data_param {    source: "examples/imagenet/ilsvrc12_val_lmdb"    batch_size: 50    backend: LMDB  }}


layer {  name: "data"  type: "Input"  top: "data"  input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }}



  • 默认的caffenet数据的图像的三色通道读入顺序是BGR
  • 输入的数据范围在[0 255]之间,之后加载平均文件
  • 读入的图片数据,通道数在第三个位置,需要变换到第一个位置,即由[227 227 3]变为[3 227 227]
# load the mean ImageNet image (as distributed with Caffe) for subtractionmu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel valuesprint 'mean-subtracted values:', zip('BGR', mu)#mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]# create transformer for the input called 'data'transformer ={'data': net.blobs['data'].data.shape})transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimensiontransformer.set_mean('data', mu)            # subtract the dataset-mean value in each channeltransformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255] 数据变换到[0, 255]transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR

注意: RGB ——-> BGR



# set the size of the input (we can skip this if we're happy#  with the default; we can also change it later, e.g., for different batch sizes)net.blobs['data'].reshape(50,        # batch size                          3,         # 3-channel (BGR) images                          227, 227)  # image size is 227x227


image = + 'examples/images/cat.jpg')transformed_image = transformer.preprocess('data', image)plt.imshow(image)


# copy the image data into the memory allocated for the netnet.blobs['data'].data[...] = transformed_image### perform classificationoutput = net.forward()output_prob = output['prob'][0]  # 输出batch中第一张图片的概率向量(总共种类为1000类)print 'predicted class is:', output_prob.argmax()#输出概率向量中最大值的位置#predicted class is: 281

对于caffe Python 其他API的详细介绍可以参考:

# load ImageNet labelslabels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'if not os.path.exists(labels_file):    !../data/ilsvrc12/get_ilsvrc_aux.shlabels = np.loadtxt(labels_file, str, delimiter='\t')print 'output label:', labels[output_prob.argmax()]#output label: n02123045 tabby, tabby cat#"Tabby cat" is correct! But let's also look at other top (but less confident predictions).


# sort top five predictions from softmax outputtop_inds = output_prob.argsort()[::-1][:5]  # reverse sort and take five largest itemsprint 'probabilities and labels:'zip(output_prob[top_inds], labels[top_inds])

%timeit net.forward()## 1 loop, best of 3: 1.42 s per loop#切换到GPU模式caffe.set_device(0)  # if we have multiple GPUs, pick the first onecaffe.set_mode_gpu()net.forward()  # run once before timing to set up memory%timeit net.forward()# 10 loops, best of 3: 70.2 ms per loop# 速度明显增加


A net is not just a black box; let’s take a look at some of the parameters and intermediate activations.
First we’ll see how to read out the structure of the net in terms of activation and parameter shapes.
For each layer, let’s look at the activation shapes, which typically have the form (batch_size, channel_dim, height, width).
The activations are exposed as an OrderedDict, net.blobs.

# for each layer, show the output shapefor layer_name, blob in net.blobs.iteritems():    print layer_name + '\t' + str(

for layer_name, param in net.params.iteritems():    print layer_name + '\t' + str(param[0].data.shape), str(param[1].data.shape)

def vis_square(data):    """Take an array of shape (n, height, width) or (n, height, width, 3)       and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)"""    # normalize data for display    data = (data - data.min()) / (data.max() - data.min())    # force the number of filters to be square    n = int(np.ceil(np.sqrt(data.shape[0])))    padding = (((0, n ** 2 - data.shape[0]),               (0, 1), (0, 1))                 # add some space between filters               + ((0, 0),) * (data.ndim - 3))  # don't pad the last dimension (if there is one)    data = np.pad(data, padding, mode='constant', constant_values=1)  # pad with ones (white)    # tile the filters into an image    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])    plt.imshow(data); plt.axis('off')


# the parameters are a list of [weights, biases]filters = net.params['conv1'][0].datavis_square(filters.transpose(0, 2, 3, 1))


对前36个feature map 的可视化

feat = net.blobs['conv1'].data[0, :36]vis_square(feat)


对pooling层的feature map的可视化

feat = net.blobs['pool5'].data[0]vis_square(feat)



feat = net.blobs['fc6'].data[0]plt.subplot(2, 1, 1)plt.plot(feat.flat)plt.subplot(2, 1, 2)_ = plt.hist(feat.flat[feat.flat > 0], bins=100)



feat = net.blobs['prob'].data[0]plt.figure(figsize=(15, 3))plt.plot(feat.flat)



# download an imagemy_image_url = "..."  # paste your URL here# for example:# my_image_url = ""!wget -O image.jpg $my_image_url# transform it and copy it into the netimage ='image.jpg')net.blobs['data'].data[...] = transformer.preprocess('data', image)# perform classificationnet.forward()# obtain the output probabilitiesoutput_prob = net.blobs['prob'].data[0]# sort top five predictions from softmax outputtop_inds = output_prob.argsort()[::-1][:5]plt.imshow(image)print 'probabilities and labels:'zip(output_prob[top_inds], labels[top_inds])
