vlfeat--caltech101.m个人注释

来源：互联网发布：图片大小修改软件编辑：程序博客网时间：2024/05/22 05:19

function phow_caltech101()

% PHOW_CALTECH101 Image classification in the Caltech-101 dataset

% This program demonstrates how to use VLFeat to construct an image

% classifier on the Caltech-101 data. The classifier uses PHOW

% features (dense SIFT), spatial histograms of visual words, and a

% Chi2 SVM. To speedup computation it uses VLFeat fast dense SIFT,

% kd-trees, and homogeneous kernel map. The program also

% demonstrates VLFeat PEGASOS SVM solver, although for this small

% dataset other solvers such as LIBLINEAR can be more efficient.

% By default 15 training images are used, which should result in

% about 64% performance (a good performance considering that only a

% single feature type is being used).

% Call PHOW_CALTECH101 to train and test a classifier on a small

% subset of the Caltech-101 data. Note that the program

% automatically downloads a copy of the Caltech-101 data from the

% Internet if it cannot find a local copy.

% Edit the PHOW_CALTECH101 file to change the program configuration.

% To run on the entire dataset change CONF.TINYPROBLEM to FALSE.

%（通过使FALSE从而在所有数据集上运行）

% The Caltech-101 data is saved into CONF.CALDIR, which defaults to

% 'data/caltech-101'. Change this path to the desired location, for

% instance to point to an existing copy of the Caltech-101 data.

% The program can also be used to train a model on custom data by

% pointing CONF.CALDIR to it. Just create a subdirectory for each

% class and put the training images there. Make sure to adjust

% CONF.NUMTRAIN accordingly.

%（基于自定义数据训练模型需要指定给CONF.CALDIR 并相应调整CONF.NUMTRAIN ）

% Intermediate files（中间文件） are stored in the directory CONF.DATADIR. All

% such files begin with the prefix CONF.PREFIX, which can be changed

% to test different parameter settings without overriding previous

% results.

% The program saves the trained model in

% <CONF.DATADIR>/<CONF.PREFIX>-model.mat. This model can be used to

% test novel images independently of the Caltech data.（可用模型测试新的图像数据）

% load('data/baseline-model.mat') ; # change to the model path

% label = model.classify(model, im) ;

% Author: Andrea Vedaldi

% This file is part of the VLFeat library and is made available under

% the terms of the BSD license (see the COPYING file).

%--配置默认参数--%

conf.calDir = 'data/caltech-101' ; %指定相关路径

conf.dataDir = 'data/' ;

conf.autoDownloadData = true ;%允许自动下载数据

conf.numTrain = 15 ;%15个训练图像

conf.numTest = 15 ;%15个测试图像

conf.numClasses = 102 ;%

conf.numWords = 600 ;%

conf.numSpatialX = [2 4] ;

conf.numSpatialY = [2 4] ;

conf.quantizer = 'kdtree' ;

conf.svm.C = 10 ;

conf.svm.solver = 'sdca' ;

%conf.svm.solver = 'sgd' ;

%conf.svm.solver = 'liblinear' ;

conf.svm.biasMultiplier = 1 ;

conf.phowOpts = {'Step', 3} ;

conf.clobber = false ;

conf.tinyProblem = true ;

conf.prefix = 'baseline' ;

conf.randSeed = 1 ;

if conf.tinyProblem

conf.prefix = 'tiny' ;

conf.numClasses = 5 ;

conf.numSpatialX = 2 ;

conf.numSpatialY = 2 ;

conf.numWords = 300 ;

conf.phowOpts = {'Verbose', 2, 'Sizes', 7, 'Step', 5} ;

end

%建立中间数据数据集 tiny_vocab.mat, tiny_hist.mat, tiny_model.mat, tiny_result.mat

conf.vocabPath = fullfile(conf.dataDir, [conf.prefix '-vocab.mat']) ;%fullfile通过指定文件夹及文件名建立。。

conf.histPath = fullfile(conf.dataDir, [conf.prefix '-hists.mat']) ;

conf.modelPath = fullfile(conf.dataDir, [conf.prefix '-model.mat']) ;

conf.resultPath = fullfile(conf.dataDir, [conf.prefix '-result']) ;

randn('state',conf.randSeed) ;%设定种子，使产生的随机数不相同，randn产生标准正态分布的随机矩阵

rand('state',conf.randSeed) ;%rand产生（0,1）随机数

vl_twister('state',conf.randSeed) ;%？

% --------------------------------------------------------------------

% Download Caltech-101 data

% --------------------------------------------------------------------

if ~exist(conf.calDir, 'dir') || ...

(~exist(fullfile(conf.calDir, 'airplanes'),'dir') && ...

~exist(fullfile(conf.calDir, '101_ObjectCategories', 'airplanes')))

if ~conf.autoDownloadData

error(...

['Caltech-101 data not found. ' ...

'Set conf.autoDownloadData=true to download the required data.']) ;

end

vl_xmkdir(conf.calDir) ;%创建所有目录如果指定路径不存在或被跳过

calUrl = ['http://www.vision.caltech.edu/Image_Datasets/'

'Caltech101/101_ObjectCategories.tar.gz'] ;

fprintf('Downloading Caltech-101 data to ''%s''. This will take a while.', conf.calDir) ;

untar(calUrl, conf.calDir) ;%解压calUrl到指定路径

end

if ~exist(fullfile(conf.calDir, 'airplanes'),'dir')

conf.calDir = fullfile(conf.calDir, '101_ObjectCategories') ;

end

% --------------------------------------------------------------------

% Setup data

% --------------------------------------------------------------------

classes = dir(conf.calDir) ;%列出文件夹内容（包括name,date,bytes,isdir,datenum)

classes = classes([classes.isdir]) ;%未有改变？

classes = {classes(3:conf.numClasses+2).name} ;%取出除默认两个文件夹’.''..'外numclass(5)个文件夹名存到classes中

images = {} ;

imageClass = {} ;%创建images数据集

for ci = 1:length(classes)%ci=1:5

ims = dir(fullfile(conf.calDir, classes{ci}, '*.jpg'))' ;%取出每个文件夹下每张图片的dir

ims = vl_colsubset(ims, conf.numTrain + conf.numTest) ;%随机取出30张图片

ims = cellfun(@(x)fullfile(classes{ci},x),{ims.name},'UniformOutput',false) ;%将ims中图片名改为文件夹名/图片名

images = {images{:}, ims{:}} ;%将ims数据加到images中

imageClass{end+1} = ci * ones(1,length(ims)) ;%在imageclass中产生一个1×1的cell元素，每个cell包含1×30元素，值即为ci

end

selTrain = find(mod(0:length(images)-1, conf.numTrain+conf.numTest) < conf.numTrain) ;%找到0:149中除30余数少于15的值

selTest = setdiff(1:length(images), selTrain) ;%找到1:150中不在selTrain的数，即另外余数大于等于15的数

imageClass = cat(2, imageClass{:}) ;%把矩阵中每个cell元素每行连接起来

%建立model结构体

model.classes = classes ;

model.phowOpts = conf.phowOpts ;

model.numSpatialX = conf.numSpatialX ;

model.numSpatialY = conf.numSpatialY ;

model.quantizer = conf.quantizer ;

model.vocab = [] ;

model.w = [] ;

model.b = [] ;

model.classify = @classify ;%存储fun?

% --------------------------------------------------------------------

% Train vocabulary

% --------------------------------------------------------------------

if ~exist(conf.vocabPath) || conf.clobber %检测是否训练过

% Get some PHOW descriptors to train the dictionary

selTrainFeats = vl_colsubset(selTrain, 30) ;%colsubset随机选selTrain的30列

descrs = {} ;

%for ii = 1:length(selTrainFeats)

parfor ii = 1:length(selTrainFeats)%区别for,parfor多线程更快速

im = imread(fullfile(conf.calDir, images{selTrainFeats(ii)})) ;%载入30张图像

im = standarizeImage(im) ;%预处理？？？，自定义的函数

[drop, descrs{ii}] = vl_phow(im, model.phowOpts{:}) ;%根据预先设置的参数得到vl_phow的结果

end

%descrs是每一个特征子的vector信息一共128行 M列每一列是一个128维度的向量

%drop是4行 M列的矩阵描述M个DSIFT特征子的信息

%drop(1:2,:) 特征子中心的（x,y)坐标；drop(3,:) 特征子对比度；drop(4,:) 特征子二进制大小

descrs = vl_colsubset(cat(2, descrs{:}), 10e4) ; %这里随机选取10000列

descrs = single(descrs) ;

% Quantize the descriptors to get the visual words量化特征子得到

vocab = vl_kmeans(descrs, conf.numWords, 'verbose', 'algorithm', 'elkan', 'MaxNumIterations', 50) ;%k均值聚类，对数据点descrs训练生成类心，使用numwords(300)个中心，最多50次迭代

save(conf.vocabPath, 'vocab') ;%vocab最终得到128×300个数据点，300类？

else

load(conf.vocabPath) ;%存在则直接load

end

model.vocab = vocab ;

if strcmp(model.quantizer, 'kdtree')%model.quantizer=kdtree

model.kdtree = vl_kdtreebuild(vocab) ;%对vocab建立kd树

end

% --------------------------------------------------------------------

% Compute spatial

% histograms(空间直方图）

% --------------------------------------------------------------------

if ~exist(conf.histPath) || conf.clobber

hists = {} ;

parfor ii = 1:length(images)

% for ii = 1:length(images)

fprintf('Processing %s (%.2f %%)\n', images{ii}, 100 * ii / length(images)) ;

im = imread(fullfile(conf.calDir, images{ii})) ;

hists{ii} = getImageDescriptor(model, im); %得到150张图片的

end

hists = cat(2, hists{:}) ;

save(conf.histPath, 'hists') ;

else

load(conf.histPath) ;

end

% --------------------------------------------------------------------

% Compute feature map

% --------------------------------------------------------------------

psix = vl_homkermap(hists, 1, 'kchi2', 'gamma', .5) ;%对hists变换（估算2N+1，N=1）维近似内核图，给chi2 svm核使用

% --------------------------------------------------------------------

% Train SVM

% 训练SVM

% --------------------------------------------------------------------

if ~exist(conf.modelPath) || conf.clobber

switch conf.svm.solver

case {'sgd', 'sdca'}

lambda = 1 / (conf.svm.C * length(selTrain)) ;

w = [] ;

parfor ci = 1:length(classes)

perm = randperm(length(selTrain)) ;

fprintf('Training model for class %s\n', classes{ci}) ;

y = 2 * (imageClass(selTrain) == ci) - 1 ;

[w(:,ci) b(ci) info] = vl_svmtrain(psix(:, selTrain(perm)), y(perm), lambda, ...

'Solver', conf.svm.solver, ...

'MaxNumIterations', 50/lambda, ...

'BiasMultiplier', conf.svm.biasMultiplier, ...

'Epsilon', 1e-3);

end

%vl_svmtrain核心函数:

case 'liblinear'

svm = train(imageClass(selTrain)', ...

sparse(double(psix(:,selTrain))), ...

sprintf(' -s 3 -B %f -c %f', ...

conf.svm.biasMultiplier, conf.svm.C), ...

'col') ;

w = svm.w(:,1:end-1)' ;

b = svm.w(:,end)' ;

end

model.b = conf.svm.biasMultiplier * b ;

model.w = w ;

save(conf.modelPath, 'model') ;

else

load(conf.modelPath) ;

end

% --------------------------------------------------------------------

% Test SVM and evaluate

% --------------------------------------------------------------------

% Estimate the class of the test images

scores = model.w' * psix + model.b' * ones(1,size(psix,2)) ;%计算分数得到5×150

[drop, imageEstClass] = max(scores, [], 1) ;%找到scores每列最大的元素存储到drop

% Compute the confusion matrix混淆矩阵

idx = sub2ind([length(classes), length(classes)], ...

imageClass(selTest), imageEstClass(selTest)) ;

confus = zeros(length(classes)) ;

confus = vl_binsum(confus, ones(size(idx)), idx) ;

% Plots画图

figure(1) ; clf;

subplot(1,2,1) ;

imagesc(scores(:,[selTrain selTest])) ; title('Scores') ;%转化为图像

set(gca, 'ytick', 1:length(classes), 'yticklabel', classes) ;

subplot(1,2,2) ;

imagesc(confus) ;

title(sprintf('Confusion matrix (%.2f %% accuracy)', ...

100 * mean(diag(confus)/conf.numTest) )) ;

print('-depsc2', [conf.resultPath '.ps']) ;

save([conf.resultPath '.mat'], 'confus', 'conf') ;

% -------------------------------------------------------------------------

function im = standarizeImage(im)

% -------------------------------------------------------------------------

im = im2single(im) ;

if size(im,1) > 480, im = imresize(im, [480 NaN]) ; end%图像最大为480

% -------------------------------------------------------------------------

function hist = getImageDescriptor(model, im)

% -------------------------------------------------------------------------

im = standarizeImage(im) ;

width = size(im,2) ;

height = size(im,1) ;

numWords = size(model.vocab, 2) ;%得到vocab的列数

% get PHOW features

[frames, descrs] = vl_phow(im, model.phowOpts{:}) ;

% quantize local descriptors into visual words把本地特征子量化为视觉词汇

switch model.quantizer

case 'vq'

[drop, binsa] = min(vl_alldist(model.vocab, single(descrs)), [], 1) ;

case 'kdtree'

binsa = double(vl_kdtreequery(model.kdtree, model.vocab, ...

single(descrs), ...

'MaxComparisons', 50)) ;%kd树在vocab中查询到距离descrs50个最近的数据点

end

for i = 1:length(model.numSpatialX)

binsx = vl_binsearch(linspace(1,width,model.numSpatialX(i)+1), frames(1,:)) ;%把frames数据映射到二进制

binsy = vl_binsearch(linspace(1,height,model.numSpatialY(i)+1), frames(2,:)) ;

% combined quantization

bins = sub2ind([model.numSpatialY(i), model.numSpatialX(i), numWords], ...

binsy,binsx,binsa) ;

hist = zeros(model.numSpatialY(i) * model.numSpatialX(i) * numWords, 1) ;

hist = vl_binsum(hist, ones(size(bins)), bins) ;

hists{i} = single(hist / sum(hist)) ;

end

hist = cat(1,hists{:}) ;

hist = hist / sum(hist) ;

% -------------------------------------------------------------------------

function [className, score] = classify(model, im)

% -------------------------------------------------------------------------

hist = getImageDescriptor(model, im) ;

psix = vl_homkermap(hist, 1, 'kchi2', 'gamma', .5) ;

scores = model.w' * psix + model.b' ;

[score, best] = max(scores) ;

className = model.classes{best} ;

0 0