caffe学习笔记5-classification_demo.m学习

来源：互联网发布：lurker软件怎么安装编辑：程序博客网时间：2024/06/05 18:15

function [scores, maxlabel] = classification_demo(im, use_gpu)
% [scores, maxlabel] = classification_demo(im, use_gpu)
%
% 使用BVLC CaffeNet进行图像分类的示例
% 重要：运行前，应首先从Model Zoo（http://caffe.berkeleyvision.org/model_zoo.html)
%下载BVLC CaffeNet训练好的权值
%
% ****************************************************************************
% For detailed documentation and usage on Caffe's Matlab interface, please
% refer to Caffe Interface Tutorial at
% http://caffe.berkeleyvision.org/tutorial/interfaces.html#matlab
% ****************************************************************************
%
% input
% im color image as uint8 HxWx3
% use_gpu 1 to use the GPU, 0 to use the CPU
%
% output
% scores 1000-dimensional ILSVRC score vector 1000维的特征向量
% maxlabel the label of the highest score 对应分类匹配最大score的标签
%
% You may need to do the following before you start matlab:
% $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda-5.5/lib64
% $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
% Or the equivalent based on where things are installed on your system
%
% Usage:
% im = imread('../../examples/images/cat.jpg');
% scores = classification_demo(im, 1);
% [score, class] = max(scores);
% Five things to be aware of: //五点需要注意
% caffe uses row-major order //caffe使用行主序
% matlab uses column-major order //matlab使用列主序
% caffe uses BGR color channel order //caffe使用BGR顺序
% matlab uses RGB color channel order //matlab使用RGB顺序
% images need to have the data mean subtracted //图像需要冗余处理（值-均值），即中心化
% Data coming in from matlab needs to be in the order //数据按以下格式
% [width, height, channels, images]
% where width is the fastest dimension.
% Here is the rough matlab for putting image data into the correct//以下用matlab进行数据的校正（格式和冗余）
% format in W x H x C with BGR channels:
% % permute channels from RGB to BGR //变换通道RBG(matlab)->BGR(caffe)
% im_data = im(:, :, [3, 2, 1]);
% % flip width and height to make width the fastest dimension
% im_data = permute(im_data, [2, 1, 3]);//翻转的宽度和高度，以使宽度最快尺寸，行主序列主序问题，并转换成BGR
% % convert from uint8 to single //single为单精度浮点型
% im_data = single(im_data);
% % reshape to a fixed size (e.g., 227x227).//改变大小为227×227
% im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear');//双线型插值
% % subtract mean_data (already in W x H x C with BGR channels)
% im_data = im_data - mean_data;//图像数据中心化
% If you have multiple images, cat them with cat(4, ...)
% Add caffe/matlab to you Matlab search PATH to use matcaffe

% 设置matlab路径去使用matcaffe, +caffe文件夹下都是matcaffe的.m接口,可用matlab操作caffe网络

if exist('../+caffe', 'dir')
addpath('..');
else
error('Please run this demo from caffe/matlab/demo');
end

% Set caffe mode //设置caffe cpu/gpu模式
if exist('use_gpu', 'var') && use_gpu
caffe.set_mode_gpu();
gpu_id = 0; % we will use the first gpu in this demo
caffe.set_device(gpu_id);
else
caffe.set_mode_cpu();

end

% Initialize the network using BVLC CaffeNet for image classification //初始化测试的caffe网络模型

% Weights (parameter) file needs to be downloaded from Model Zoo.

model_dir = '../../models/bvlc_reference_caffenet/';% 定义网络模型和训练好的参数模型的路径
net_model = [model_dir 'deploy.prototxt'];% 导入模型描述文件，注意是deploy.prototxt，不包含data layers
net_weights = [model_dir 'bvlc_reference_caffenet.caffemodel'];% 导入模型权值文件，需要预先下载到这里
phase = 'test'; % run with phase test (so that dropout isn't applied) 选择阶段（训练or测试）
if ~exist(net_weights, 'file')
error('Please download CaffeNet from Model Zoo before you run this demo');

end

% Initialize a network //网络初始化

net = caffe.Net(net_model, net_weights, phase);

if nargin < 1 % 如果输入变量小于1，classification_demo.m的输入参数
% For demo purposes we will use the cat image //就使用默认图片
fprintf('using caffe/examples/images/cat.jpg as input image\n');
im = imread('../../examples/images/cat.jpg'); %读入图片
end

% prepare oversampled input //图像数据预处理
% input_data is Height x Width x Channel x Num //这里的channel，灰度图为1,rgb为3.
tic; %时钟起点
input_data = {prepare_image(im)};
toc; %时钟终点

% do forward pass to get scores //进行前向计算，得出分类scores
% scores are now Channels x Num, where Channels == 1000 // 这里的channels为分类的类别数，1000类
tic;
% The net forward function. It takes in a cell array of N-D arrays
% (where N == 4 here) containing data of input blob(s) and outputs a cell //
% array containing data from output blob(s)
scores = net.forward(input_data);%前向计算，得到scores（矩阵），这里每一列都是一个图像的分类的score。
toc;

scores = scores{1};%等价prob = net.blobs('prob').get_data();用法//计算之后再读取原块的数据，最后一层为prob
scores = mean(scores, 2); % take average scores over 10 crops //crop裁剪的意思 size=10
% M = mean(A,dim)
% 返回A中沿着标量dim指定的维数上的元素的平均值。对于矩阵，mean(A,2)返回包含每一行的平均值的列向量。
% 1表示列平均（默认），最后变成行向量。
% 2表示求行平均（这里指类别向量平均），最后变成列向量
[~, maxlabel] = max(scores); %找到列向量最大概率对应的标签号～表示不取值，只取后面的标签（就是Index行号），可以代表是哪一类
% 如果A是一个向量，mean(A)返回A中元素的平均值。
% 如果A是一个矩阵，max(A)将A的每一列作为一个向量，返回一行向量包含了每一列的最大元素。

% call caffe.reset_all() to reset caffe
caffe.reset_all(); %搞完要reset

% ------------------------------------------------------------------------
function crops_data = prepare_image(im)
% ------------------------------------------------------------------------
% caffe/matlab/+caffe/imagenet/ilsvrc_2012_mean.mat contains mean_data that
% is already in W x H x C with BGR channels
d = load('../+caffe/imagenet/ilsvrc_2012_mean.mat');
mean_data = d.mean_data;
IMAGE_DIM = 256;
CROPPED_DIM = 227;

%----------------------------------------------------------------------------------------------------
% im_data = caffe.io.load_image('./examples/images/cat.jpg');用caffe自带读取，四句话搞定下面所有
% mean_data = caffe.io.read_mean('./data/ilsvrc12/imagenet_mean.binaryproto');
% oversample (4 corners, center, and their x-axis flips)
% im_data = imresize(im_data, [width, height]); % resize using Matlab's imresize 227×227
% im_data = im_data - mean_data; % subtract mean_data (already in W x H x C, BGR)
%----------------------------------------------------------------------------------------------------

% Convert an image returned by Matlab's imread to im_data in caffe's data //转换图像matlab(rgb)->caffe(bgr)
% format: W x H x C with BGR channels
im_data = im(:, :, [3, 2, 1]); % permute channels from RGB to BGR
im_data = permute(im_data, [2, 1, 3]); % flip width and height
im_data = single(im_data); % convert from uint8 to single //转为单精度浮点型
im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear'); % resize im_data //转为256×256大小，双线性插值
im_data = im_data - mean_data; % subtract mean_data (already in W x H x C, BGR) //

% oversample (4 corners, center, and their x-axis flips) //四个角点，四个中心，以及沿x轴翻转
crops_data = zeros(CROPPED_DIM, CROPPED_DIM, 3, 10, 'single');%生成227×227的空白图像，3通道,单精度浮点型
indices = [0 IMAGE_DIM-CROPPED_DIM] + 1;%用于循环参数
n = 1;
for i = indices
for j = indices
crops_data(:, :, :, n) = im_data(i:i+CROPPED_DIM-1, j:j+CROPPED_DIM-1, :);
crops_data(:, :, :, n+5) = crops_data(end:-1:1, :, :, n);
n = n + 1;
end
end
center = floor(indices(2) / 2) + 1;
crops_data(:,:,:,5) = ...
im_data(center:center+CROPPED_DIM-1,center:center+CROPPED_DIM-1,:);

crops_data(:,:,:,10) = crops_data(end:-1:1, :, :, 5);

0 0