caffe学习笔记5-classification_demo.m学习

来源:互联网 发布:lurker软件怎么安装 编辑:程序博客网 时间:2024/06/05 18:15
function [scores, maxlabel] = classification_demo(im, use_gpu)
% [scores, maxlabel] = classification_demo(im, use_gpu)
%
% 使用BVLC CaffeNet进行图像分类的示例 
% 重要:运行前,应首先从Model Zoo(http://caffe.berkeleyvision.org/model_zoo.html) 
%下载BVLC CaffeNet训练好的权值
%
% ****************************************************************************
% For detailed documentation and usage on Caffe's Matlab interface, please
% refer to Caffe Interface Tutorial at
% http://caffe.berkeleyvision.org/tutorial/interfaces.html#matlab
% ****************************************************************************
%
% input
%   im       color image as uint8 HxWx3
%   use_gpu  1 to use the GPU, 0 to use the CPU
%
% output
%   scores   1000-dimensional ILSVRC score vector 1000维的特征向量
%   maxlabel the label of the highest score  对应分类匹配最大score的标签
%
% You may need to do the following before you start matlab:
%  $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda-5.5/lib64
%  $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
% Or the equivalent based on where things are installed on your system
%
% Usage:
%  im = imread('../../examples/images/cat.jpg');
%  scores = classification_demo(im, 1);
%  [score, class] = max(scores);
% Five things to be aware of: //五点需要注意
%   caffe uses row-major order //caffe使用行主序
%   matlab uses column-major order //matlab使用列主序
%   caffe uses BGR color channel order //caffe使用BGR顺序
%   matlab uses RGB color channel order //matlab使用RGB顺序
%   images need to have the data mean subtracted //图像需要冗余处理(值-均值),即中心化
% Data coming in from matlab needs to be in the order //数据按以下格式
%   [width, height, channels, images]
% where width is the fastest dimension.
% Here is the rough matlab for putting image data into the correct//以下用matlab进行数据的校正(格式和冗余)
% format in W x H x C with BGR channels:
%   % permute channels from RGB to BGR //变换通道RBG(matlab)->BGR(caffe)
%   im_data = im(:, :, [3, 2, 1]); 
%   % flip width and height to make width the fastest dimension 
%   im_data = permute(im_data, [2, 1, 3]);//翻转的宽度和高度,以使宽度最快尺寸,行主序列主序问题,并转换成BGR
%   % convert from uint8 to single //single为单精度浮点型
%   im_data = single(im_data); 
%   % reshape to a fixed size (e.g., 227x227).//改变大小为227×227
%   im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear');//双线型插值
%   % subtract mean_data (already in W x H x C with BGR channels)
%   im_data = im_data - mean_data;//图像数据中心化
% If you have multiple images, cat them with cat(4, ...)
% Add caffe/matlab to you Matlab search PATH to use matcaffe 

% 设置matlab路径去使用matcaffe, +caffe文件夹下都是matcaffe的.m接口,可用matlab操作caffe网络

if exist('../+caffe', 'dir')
  addpath('..');
else
  error('Please run this demo from caffe/matlab/demo');
end

% Set caffe mode //设置caffe cpu/gpu模式
if exist('use_gpu', 'var') && use_gpu
  caffe.set_mode_gpu();
  gpu_id = 0;  % we will use the first gpu in this demo
  caffe.set_device(gpu_id);
else
  caffe.set_mode_cpu();

end

% Initialize the network using BVLC CaffeNet for image classification //初始化测试的caffe网络模型

% Weights (parameter) file needs to be downloaded from Model Zoo.

model_dir = '../../models/bvlc_reference_caffenet/';% 定义网络模型和训练好的参数模型的路径
net_model = [model_dir 'deploy.prototxt'];% 导入模型描述文件,注意是deploy.prototxt,不包含data layers 
net_weights = [model_dir 'bvlc_reference_caffenet.caffemodel'];% 导入模型权值文件,需要预先下载到这里 
phase = 'test'; % run with phase test (so that dropout isn't applied) 选择阶段(训练or测试)
if ~exist(net_weights, 'file')
  error('Please download CaffeNet from Model Zoo before you run this demo');

end

% Initialize a network //网络初始化

net = caffe.Net(net_model, net_weights, phase);

if nargin < 1  % 如果输入变量小于1,classification_demo.m的输入参数
  % For demo purposes we will use the cat image //就使用默认图片
  fprintf('using caffe/examples/images/cat.jpg as input image\n');
  im = imread('../../examples/images/cat.jpg'); %读入图片
end

% prepare oversampled input //图像数据预处理
% input_data is Height x Width x Channel x Num //这里的channel,灰度图为1,rgb为3.
tic; %时钟起点
input_data = {prepare_image(im)};
toc; %时钟终点

% do forward pass to get scores //进行前向计算,得出分类scores
% scores are now Channels x Num, where Channels == 1000 // 这里的channels为分类的类别数,1000类
tic;
% The net forward function. It takes in a cell array of N-D arrays
% (where N == 4 here) containing data of input blob(s) and outputs a cell //
% array containing data from output blob(s)
scores = net.forward(input_data);%前向计算,得到scores(矩阵),这里每一列都是一个图像的分类的score。
toc;

scores = scores{1};%等价prob = net.blobs('prob').get_data();用法//计算之后再读取原块的数据,最后一层为prob
scores = mean(scores, 2);  % take average scores over 10 crops //crop裁剪的意思 size=10
% M = mean(A,dim)
% 返回A中沿着标量dim指定的维数上的元素的平均值。对于矩阵,mean(A,2)返回包含每一行的平均值的列向量。
% 1表示列平均(默认),最后变成行向量。
% 2表示求行平均(这里指类别向量平均),最后变成列向量
[~, maxlabel] = max(scores); %找到列向量最大概率对应的标签号 ~表示不取值,只取后面的标签(就是Index行号),可以代表是哪一类
% 如果A是一个向量,mean(A)返回A中元素的平均值。
% 如果A是一个矩阵,max(A)将A的每一列作为一个向量,返回一行向量包含了每一列的最大元素。
 
% call caffe.reset_all() to reset caffe
caffe.reset_all(); %搞完要reset

% ------------------------------------------------------------------------
function crops_data = prepare_image(im)
% ------------------------------------------------------------------------
% caffe/matlab/+caffe/imagenet/ilsvrc_2012_mean.mat contains mean_data that
% is already in W x H x C with BGR channels
d = load('../+caffe/imagenet/ilsvrc_2012_mean.mat');
mean_data = d.mean_data;
IMAGE_DIM = 256;
CROPPED_DIM = 227;

%----------------------------------------------------------------------------------------------------
% im_data = caffe.io.load_image('./examples/images/cat.jpg');用caffe自带读取, 四句话搞定下面所有
% mean_data = caffe.io.read_mean('./data/ilsvrc12/imagenet_mean.binaryproto');
% oversample (4 corners, center, and their x-axis flips)
% im_data = imresize(im_data, [width, height]); % resize using Matlab's imresize 227×227
% im_data = im_data - mean_data;  % subtract mean_data (already in W x H x C, BGR)
%----------------------------------------------------------------------------------------------------

% Convert an image returned by Matlab's imread to im_data in caffe's data //转换图像matlab(rgb)->caffe(bgr)
% format: W x H x C with BGR channels
im_data = im(:, :, [3, 2, 1]);  % permute channels from RGB to BGR
im_data = permute(im_data, [2, 1, 3]);  % flip width and height 
im_data = single(im_data);  % convert from uint8 to single //转为单精度浮点型
im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear');  % resize im_data //转为256×256大小,双线性插值
im_data = im_data - mean_data;  % subtract mean_data (already in W x H x C, BGR)  //

% oversample (4 corners, center, and their x-axis flips) //四个角点,四个中心,以及沿x轴翻转
crops_data = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single');%生成227×227的空白图像,3通道,单精度浮点型
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1;%用于循环参数
n = 1;
for i = indices
  for j = indices
    crops_data(:, :, :, n) = im_data(i:i+CROPPED_DIM-1, j:j+CROPPED_DIM-1, :);
    crops_data(:, :, :, n+5) = crops_data(end:-1:1, :, :, n);
    n = n + 1;
  end
end
center = floor(indices(2) / 2) + 1;
crops_data(:,:,:,5) = ...
  im_data(center:center+CROPPED_DIM-1,center:center+CROPPED_DIM-1,:);

crops_data(:,:,:,10) = crops_data(end:-1:1, :, :, 5);



0 0