vlfeat--caltech101.m个人注释

来源:互联网 发布:图片大小修改软件 编辑:程序博客网 时间:2024/05/22 05:19
function phow_caltech101()
% PHOW_CALTECH101 Image classification in the Caltech-101 dataset
%   This program demonstrates how to use VLFeat to construct an image
%   classifier on the Caltech-101 data. The classifier uses PHOW
%   features (dense SIFT), spatial histograms of visual words, and a
%   Chi2 SVM. To speedup computation it uses VLFeat fast dense SIFT,
%   kd-trees, and homogeneous kernel map. The program also
%   demonstrates VLFeat PEGASOS SVM solver, although for this small
%   dataset other solvers such as LIBLINEAR can be more efficient.
%
%   By default 15 training images are used, which should result in
%   about 64% performance (a good performance considering that only a
%   single feature type is being used).
%
%   Call PHOW_CALTECH101 to train and test a classifier on a small
%   subset of the Caltech-101 data. Note that the program
%   automatically downloads a copy of the Caltech-101 data from the
%   Internet if it cannot find a local copy.
%
%   Edit the PHOW_CALTECH101 file to change the program configuration.
%
%   To run on the entire dataset change CONF.TINYPROBLEM to FALSE.
%(通过使FALSE从而在所有数据集上运行)
%   The Caltech-101 data is saved into CONF.CALDIR, which defaults to
%   'data/caltech-101'. Change this path to the desired location, for
%   instance to point to an existing copy of the Caltech-101 data.
%
%   The program can also be used to train a model on custom data by
%   pointing CONF.CALDIR to it. Just create a subdirectory for each
%   class and put the training images there. Make sure to adjust
%   CONF.NUMTRAIN accordingly.
%(基于自定义数据训练模型需要指定给CONF.CALDIR 并相应调整CONF.NUMTRAIN )
%   Intermediate files(中间文件) are stored in the directory CONF.DATADIR. All
%   such files begin with the prefix CONF.PREFIX, which can be changed
%   to test different parameter settings without overriding previous
%   results.
%
%   The program saves the trained model in
%   <CONF.DATADIR>/<CONF.PREFIX>-model.mat. This model can be used to
%   test novel images independently of the Caltech data.(可用模型测试新的图像数据)
%
%     load('data/baseline-model.mat') ; # change to the model path
%     label = model.classify(model, im) ;
%

% Author: Andrea Vedaldi

% Copyright (C) 2011-2013 Andrea Vedaldi
% All rights reserved.
%
% This file is part of the VLFeat library and is made available under
% the terms of the BSD license (see the COPYING file).

%--配置默认参数--%
conf.calDir = 'data/caltech-101' ;  %指定相关路径
conf.dataDir = 'data/' ;
conf.autoDownloadData = true ;%允许自动下载数据
conf.numTrain = 15 ;%15个训练图像
conf.numTest = 15 ;%15个测试图像
conf.numClasses = 102 ;%
conf.numWords = 600 ;%
conf.numSpatialX = [2 4] ;
conf.numSpatialY = [2 4] ;
conf.quantizer = 'kdtree' ;
conf.svm.C = 10 ;

conf.svm.solver = 'sdca' ;
%conf.svm.solver = 'sgd' ;
%conf.svm.solver = 'liblinear' ;

conf.svm.biasMultiplier = 1 ;
conf.phowOpts = {'Step', 3} ;
conf.clobber = false ;
conf.tinyProblem = true ;
conf.prefix = 'baseline' ;
conf.randSeed = 1 ;

if conf.tinyProblem
  conf.prefix = 'tiny' ;
  conf.numClasses = 5 ;
  conf.numSpatialX = 2 ;
  conf.numSpatialY = 2 ;
  conf.numWords = 300 ;
  conf.phowOpts = {'Verbose', 2, 'Sizes', 7, 'Step', 5} ;
end
%建立中间数据数据集 tiny_vocab.mat, tiny_hist.mat, tiny_model.mat, tiny_result.mat
conf.vocabPath = fullfile(conf.dataDir, [conf.prefix '-vocab.mat']) ;%fullfile通过指定文件夹及文件名建立。。
conf.histPath = fullfile(conf.dataDir, [conf.prefix '-hists.mat']) ;
conf.modelPath = fullfile(conf.dataDir, [conf.prefix '-model.mat']) ;
conf.resultPath = fullfile(conf.dataDir, [conf.prefix '-result']) ;

randn('state',conf.randSeed) ;%设定种子,使产生的随机数不相同,randn产生标准正态分布的随机矩阵
rand('state',conf.randSeed) ;%rand产生(0,1)随机数
vl_twister('state',conf.randSeed) ;%?

% --------------------------------------------------------------------
%                                            Download Caltech-101 data
% --------------------------------------------------------------------

if ~exist(conf.calDir, 'dir') || ...
   (~exist(fullfile(conf.calDir, 'airplanes'),'dir') && ...
    ~exist(fullfile(conf.calDir, '101_ObjectCategories', 'airplanes')))
  if ~conf.autoDownloadData
    error(...
      ['Caltech-101 data not found. ' ...
       'Set conf.autoDownloadData=true to download the required data.']) ;
  end
  vl_xmkdir(conf.calDir) ;%创建所有目录如果指定路径不存在或被跳过
  calUrl = ['http://www.vision.caltech.edu/Image_Datasets/' 
    'Caltech101/101_ObjectCategories.tar.gz'] ;
  fprintf('Downloading Caltech-101 data to ''%s''. This will take a while.', conf.calDir) ;
  untar(calUrl, conf.calDir) ;%解压calUrl到指定路径
end

if ~exist(fullfile(conf.calDir, 'airplanes'),'dir')
  conf.calDir = fullfile(conf.calDir, '101_ObjectCategories') ;
end
% --------------------------------------------------------------------
%                                                           Setup data
% --------------------------------------------------------------------
classes = dir(conf.calDir) ;%列出文件夹内容(包括name,date,bytes,isdir,datenum)
classes = classes([classes.isdir]) ;%未有改变?
classes = {classes(3:conf.numClasses+2).name} ;%取出除默认两个文件夹’.''..'外numclass(5)个文件夹名存到classes中

images = {} ;
imageClass = {} ;%创建images数据集
for ci = 1:length(classes)%ci=1:5
  ims = dir(fullfile(conf.calDir, classes{ci}, '*.jpg'))' ;%取出每个文件夹下每张图片的dir
  ims = vl_colsubset(ims, conf.numTrain + conf.numTest) ;%随机取出30张图片
  ims = cellfun(@(x)fullfile(classes{ci},x),{ims.name},'UniformOutput',false) ;%将ims中图片名改为文件夹名/图片名
  images = {images{:}, ims{:}} ;%将ims数据加到images中
  imageClass{end+1} = ci * ones(1,length(ims)) ;%在imageclass中产生一个1×1的cell元素,每个cell包含1×30元素,值即为ci
end
selTrain = find(mod(0:length(images)-1, conf.numTrain+conf.numTest) < conf.numTrain) ;%找到0:149中除30余数少于15的值
selTest = setdiff(1:length(images), selTrain) ;%找到1:150中不在selTrain的数,即另外余数大于等于15的数
imageClass = cat(2, imageClass{:}) ;%把矩阵中每个cell元素每行连接起来

%建立model结构体
model.classes = classes ;
model.phowOpts = conf.phowOpts ;
model.numSpatialX = conf.numSpatialX ;
model.numSpatialY = conf.numSpatialY ;
model.quantizer = conf.quantizer ;
model.vocab = [] ;
model.w = [] ;
model.b = [] ;
model.classify = @classify ;%存储fun?

% --------------------------------------------------------------------
%                                                     Train vocabulary
% --------------------------------------------------------------------

if ~exist(conf.vocabPath) || conf.clobber %检测是否训练过

  % Get some PHOW descriptors to train the dictionary
  selTrainFeats = vl_colsubset(selTrain, 30) ;%colsubset随机选selTrain的30列
  descrs = {} ;
  %for ii = 1:length(selTrainFeats)
  parfor ii = 1:length(selTrainFeats)%区别for,parfor多线程更快速
    im = imread(fullfile(conf.calDir, images{selTrainFeats(ii)})) ;%载入30张图像
    im = standarizeImage(im) ;%预处理???,自定义的函数
    [drop, descrs{ii}] = vl_phow(im, model.phowOpts{:}) ;%根据预先设置的参数得到vl_phow的结果
  end
%descrs是每一个特征子的vector信息 一共128行 M列 每一列是一个128维度的向量
%drop是4行 M列的矩阵 描述M个DSIFT特征子的信息
%drop(1:2,:) 特征子中心的(x,y)坐标;drop(3,:) 特征子对比度;drop(4,:) 特征子二进制大小
  descrs = vl_colsubset(cat(2, descrs{:}), 10e4) ; %这里随机选取10000列
  descrs = single(descrs) ;

  % Quantize the descriptors to get the visual words量化特征子得到
  vocab = vl_kmeans(descrs, conf.numWords, 'verbose', 'algorithm', 'elkan', 'MaxNumIterations', 50) ;%k均值聚类,对数据点descrs训练生成类心,使用numwords(300)个中心,最多50次迭代
  save(conf.vocabPath, 'vocab') ;%vocab最终得到128×300个数据点,300类?
else
  load(conf.vocabPath) ;%存在则直接load
end

model.vocab = vocab ;

if strcmp(model.quantizer, 'kdtree')%model.quantizer=kdtree
  model.kdtree = vl_kdtreebuild(vocab) ;%对vocab建立kd树
end

% --------------------------------------------------------------------
%                                          Compute spatial
%                                        histograms(空间直方图)
% --------------------------------------------------------------------

if ~exist(conf.histPath) || conf.clobber
  hists = {} ;
  parfor ii = 1:length(images)
  % for ii = 1:length(images)
    fprintf('Processing %s (%.2f %%)\n', images{ii}, 100 * ii / length(images)) ;
    im = imread(fullfile(conf.calDir, images{ii})) ;
    hists{ii} = getImageDescriptor(model, im); %得到150张图片的
  end

  hists = cat(2, hists{:}) ;
  save(conf.histPath, 'hists') ;
else
  load(conf.histPath) ;
end

% --------------------------------------------------------------------
%                                                  Compute feature map
% --------------------------------------------------------------------

psix = vl_homkermap(hists, 1, 'kchi2', 'gamma', .5) ;%对hists变换(估算2N+1,N=1)维近似内核图,给chi2 svm核使用

% --------------------------------------------------------------------
%                                                            Train SVM
%                                                            训练SVM
% --------------------------------------------------------------------

if ~exist(conf.modelPath) || conf.clobber
  switch conf.svm.solver
    case {'sgd', 'sdca'}
      lambda = 1 / (conf.svm.C *  length(selTrain)) ;
      w = [] ;
      parfor ci = 1:length(classes)
        perm = randperm(length(selTrain)) ;
        fprintf('Training model for class %s\n', classes{ci}) ;
        y = 2 * (imageClass(selTrain) == ci) - 1 ;
        [w(:,ci) b(ci) info] = vl_svmtrain(psix(:, selTrain(perm)), y(perm), lambda, ...
          'Solver', conf.svm.solver, ...
          'MaxNumIterations', 50/lambda, ...
          'BiasMultiplier', conf.svm.biasMultiplier, ...
          'Epsilon', 1e-3);
      end
%vl_svmtrain核心函数:
    case 'liblinear'
      svm = train(imageClass(selTrain)', ...
                  sparse(double(psix(:,selTrain))),  ...
                  sprintf(' -s 3 -B %f -c %f', ...
                          conf.svm.biasMultiplier, conf.svm.C), ...
                  'col') ;
      w = svm.w(:,1:end-1)' ;
      b =  svm.w(:,end)' ;
  end

  model.b = conf.svm.biasMultiplier * b ;
  model.w = w ;

  save(conf.modelPath, 'model') ;
else
  load(conf.modelPath) ;
end

% --------------------------------------------------------------------
%                                                Test SVM and evaluate
% --------------------------------------------------------------------

% Estimate the class of the test images
scores = model.w' * psix + model.b' * ones(1,size(psix,2)) ;%计算分数得到5×150
[drop, imageEstClass] = max(scores, [], 1) ;%找到scores每列最大的元素存储到drop

% Compute the confusion matrix混淆矩阵
idx = sub2ind([length(classes), length(classes)], ...
              imageClass(selTest), imageEstClass(selTest)) ;
confus = zeros(length(classes)) ;
confus = vl_binsum(confus, ones(size(idx)), idx) ;

% Plots画图
figure(1) ; clf;
subplot(1,2,1) ;
imagesc(scores(:,[selTrain selTest])) ; title('Scores') ;%转化为图像
set(gca, 'ytick', 1:length(classes), 'yticklabel', classes) ;
subplot(1,2,2) ;
imagesc(confus) ;
title(sprintf('Confusion matrix (%.2f %% accuracy)', ...
              100 * mean(diag(confus)/conf.numTest) )) ;
print('-depsc2', [conf.resultPath '.ps']) ;
save([conf.resultPath '.mat'], 'confus', 'conf') ;

% -------------------------------------------------------------------------
function im = standarizeImage(im)
% -------------------------------------------------------------------------

im = im2single(im) ;
if size(im,1) > 480, im = imresize(im, [480 NaN]) ; end%图像最大为480

% -------------------------------------------------------------------------
function hist = getImageDescriptor(model, im)
% -------------------------------------------------------------------------

im = standarizeImage(im) ;
width = size(im,2) ;
height = size(im,1) ;
numWords = size(model.vocab, 2) ;%得到vocab的列数

% get PHOW features
[frames, descrs] = vl_phow(im, model.phowOpts{:}) ;

% quantize local descriptors into visual words把本地特征子量化为视觉词汇
switch model.quantizer
  case 'vq'
    [drop, binsa] = min(vl_alldist(model.vocab, single(descrs)), [], 1) ;
  case 'kdtree'
    binsa = double(vl_kdtreequery(model.kdtree, model.vocab, ...
                                  single(descrs), ...
                                  'MaxComparisons', 50)) ;%kd树在vocab中查询到距离descrs50个最近的数据点
end

for i = 1:length(model.numSpatialX)
  binsx = vl_binsearch(linspace(1,width,model.numSpatialX(i)+1), frames(1,:)) ;%把frames数据映射到二进制
  binsy = vl_binsearch(linspace(1,height,model.numSpatialY(i)+1), frames(2,:)) ;

  % combined quantization
  bins = sub2ind([model.numSpatialY(i), model.numSpatialX(i), numWords], ...
                 binsy,binsx,binsa) ;
  hist = zeros(model.numSpatialY(i) * model.numSpatialX(i) * numWords, 1) ;
  hist = vl_binsum(hist, ones(size(bins)), bins) ;
  hists{i} = single(hist / sum(hist)) ;
end
hist = cat(1,hists{:}) ;
hist = hist / sum(hist) ;

% -------------------------------------------------------------------------
function [className, score] = classify(model, im)
% -------------------------------------------------------------------------

hist = getImageDescriptor(model, im) ;
psix = vl_homkermap(hist, 1, 'kchi2', 'gamma', .5) ;
scores = model.w' * psix + model.b' ;
[score, best] = max(scores) ;
className = model.classes{best} ;

0 0
原创粉丝点击