Hinton关于RBM的代码注解之(二)backpropclassify.m

来源:互联网 发布:接电话变声软件 编辑:程序博客网 时间:2024/04/28 11:35

源代码:http://www.cs.toronto.edu/~hinton/MatlabForSciencePaper.html

这个是关于RBM的微调步骤的代码,代价函数是交叉熵

% Version 1.000

%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied.  As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application.  All use of these programs is entirely at the user's own risk.


% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper.  


maxepoch=200;
fprintf(1,'\nTraining discriminative model on MNIST by minimizing cross entropy error. \n');        %最小化交叉熵
fprintf(1,'60 batches of 1000 cases each. \n');


load mnistvhclassify
load mnisthpclassify
load mnisthp2classify


makebatches;                                                     %分包
[numcases numdims numbatches]=size(batchdata);
N=numcases;                                                      %每一个包的大小


%%%% PREINITIALIZE WEIGHTS OF THE DISCRIMINATIVE MODEL%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


w1=[vishid; hidrecbiases];                                       %对应的权重参数及第二层的偏置,矩阵大小是(N+1)*hidden                                
w2=[hidpen; penrecbiases];
w3=[hidpen2; penrecbiases2];
w_class = 0.1*randn(size(w3,2)+1,10);                             %随机生成第四层的列数+1行,10列的矩阵,不同的是这个是exp,,上面的是logistic的;而在backprop中这个是线性的
 


%%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


l1=size(w1,1)-1;                                         %这是输入可见层的number 28*28
l2=size(w2,1)-1;                                         %第一层隐含层的number  500
l3=size(w3,1)-1;                                         %第二层隐含层的number 500
l4=size(w_class,1)-1;                              %第三层隐含层的number  2000
l5=10;                                                        %label层单元个数
test_err=[];
train_err=[];




for epoch = 1:maxepoch                 %总共maxepoch200次BP更新参数


%%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err=0; 
err_cr=0;
counter=0;
[numcases numdims numbatches]=size(batchdata);
N=numcases;
 for batch = 1:numbatches
  data = [batchdata(:,:,batch)];
  target = [batchtargets(:,:,batch)];
  data = [data ones(N,1)];           %在原始数据后添加一列数据. %加一列是因为偏置,原本是bj+ w1*data,现在变化为[data 1]*[w1;bj]
  w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs  ones(N,1)];    %计算各隐含层为1的概率值
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)];
  targetout = exp(w3probs*w_class);         %计算最后的输出值 N行10列 ;这里不是LOGISTIC的,上面都是logistics
  targetout = targetout./repmat(sum(targetout,2),1,10);


  [I J]=max(targetout,[],2);       %取计算结果每行中的最大值及列标
  [I1 J1]=max(target,[],2);        %取原先设定目标值的最大值及列标
  counter=counter+length(find(J==J1)); %统计正确估计的数目
  err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;%重构的交叉熵的错误代价函数
 end
 train_err(epoch)=(numcases*numbatches-counter);%平均训练的错误次数
 train_crerr(epoch)=err_cr/numbatches; %平均每一批的代价函数


%%%%%%%%%%%%%% END OF COMPUTING TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%% COMPUTE TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%同上,自己对应

err=0;
err_cr=0;
counter=0;
[testnumcases testnumdims testnumbatches]=size(testbatchdata);
N=testnumcases;
for batch = 1:testnumbatches
  data = [testbatchdata(:,:,batch)];
  target = [testbatchtargets(:,:,batch)];
  data = [data ones(N,1)];           
  w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs  ones(N,1)];
  w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
  w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs  ones(N,1)];
  targetout = exp(w3probs*w_class);
  targetout = targetout./repmat(sum(targetout,2),1,10);


  [I J]=max(targetout,[],2);
  [I1 J1]=max(target,[],2);
  counter=counter+length(find(J==J1));
  err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
end
 test_err(epoch)=(testnumcases*testnumbatches-counter);
 test_crerr(epoch)=err_cr/testnumbatches;
 fprintf(1,'Before epoch %d Train # misclassified: %d (from %d). Test # misclassified: %d (from %d) \t \t \n',...
            epoch,train_err(epoch),numcases*numbatches,test_err(epoch),testnumcases*testnumbatches);


%%%%%%%%%%%%%% END OF COMPUTING TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


 tt=0;
 for batch = 1:numbatches/10               %进入微调,先组合10个小批次,原来是100个小批次的
 fprintf(1,'epoch %d batch %d\r',epoch,batch);


%%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%分包,分成1个小包

 tt=tt+1; 
 data=[];
 targets=[]; 
 for kk=1:10
  data=[data 
        batchdata(:,:,(tt-1)*10+kk)]; 
  targets=[targets
        batchtargets(:,:,(tt-1)*10+kk)];
 end 


%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  max_iter=3;                   %最多三次线性搜索


  if epoch<6              % First update top-level weights holding other weights fixed. 前5次最先更新顶层权重
    N = size(data,1);
    XX = [data ones(N,1)];     
    w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs  ones(N,1)];
    w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
    w3probs = 1./(1 + exp(-w2probs*w3)); %w3probs = [w3probs  ones(N,1)];


    VV = [w_class(:)']';%将w_class展开成一列,其实就是最后一层的权重,把它作为共轭梯度函数的变量
    Dim = [l4; l5];     %记录最后两层的单元节点个数,注L4,L5,代表的值看前面
    [X, fX] = minimize(VV,'CG_CLASSIFY_INIT',max_iter,Dim,w3probs,targets);
    %BP反向传播,计算出使用该函数经过最多3次搜索得到的更新后的权重X,及其梯度fx
    w_class = reshape(X,l4+1,l5); %使Xreshape为原来的矩阵大小


  else                             %更新全部权重
    VV = [w1(:)' w2(:)' w3(:)' w_class(:)']'; %所有的参数组合成一列,作为共轭梯度的变量
    Dim = [l1; l2; l3; l4; l5];
    [X, fX] = minimize(VV,'CG_CLASSIFY',max_iter,Dim,data,targets);
    %使用GC_CLASSIFY函数作为代价函数,实际上就是上面提到的交叉熵
    
    %以下是将得到的权重更新X转化会各个对应的原来的矩阵
    w1 = reshape(X(1:(l1+1)*l2),l1+1,l2);
    xxx = (l1+1)*l2;
    w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
    xxx = xxx+(l2+1)*l3;
    w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
    xxx = xxx+(l3+1)*l4;
    w_class = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);


  end
%%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%


 end


 save mnistclassify_weights w1 w2 w3 w_class
 save mnistclassify_error test_err test_crerr train_err train_crerr;


end


原文地址:http://blog.csdn.net/u014537068/article/details/44925437


0 0
原创粉丝点击