关于RBM中k步对比散度算法CDK的认识

来源：互联网发布：穿越火线手游淘宝商城编辑：程序博客网时间：2024/04/30 13:26

此图摘自受限玻尔兹曼机（RBM）学习笔记（六）对比散度算法，文章关于RBM的介绍非常详细，但是对于我这种小白还是有些地方不知理解对不对~~~如果有误，谢谢大家指正。

以下都是为了方便编写程序进行的理解：

首先外层循环就是为了针对所有样本。在外层和内层for循环之间一行伪代码，是为了记录k步采样之前的原始数据，后面计算可见层梯度需要使用，这个时候最好计算一下在已经随机给定各种参数，比如w，a，b的情况下，用S型函数计算的联合概率分布，以便计算后面△的时候使用。

然后第二层循环里面的第一个for循环就是精髓部分，即k步对比散度算法，在这里进行sample_h_given_v之前，要利用前面的参数计算一次negdata，然后再继续训练，就是里面的两行代码了。第一行用negdata使用S型函数计算隐藏层概率，然后可以计算出被激活的隐单元，然后看第二行伪代码，用激活的隐单元再次计算negdata，然后再计算隐藏层概率，不断地循环negdata->隐状态->negdata->隐状态->......->negdata，循环次数就是对比散度算法中的K了。在重复完毕以后，也就是迭代次数为k时，计算出相关的参数w，a，b。

将上面两步的w，a，b相减就得到△了。

rbm的代码，这个代码中没有太明显体现出上面过程，因为取得是k=1的情况：

rbm.m

% Version 1.000 %% Code provided by Geoff Hinton and Ruslan Salakhutdinov %% Permission is granted for anyone to copy, use, modify, or distribute this% program and accompanying programs and documents for any purpose, provided% this copyright notice is retained and prominently displayed, along with% a note saying that the original programs are available from our% web page.% The programs and documents are distributed without any warranty, express or% implied.  As the programs were written for research purposes only, they have% not been tested to the degree that would be advisable in any important% application.  All use of these programs is entirely at the user's own risk.% This program trains Restricted Boltzmann Machine in which% visible, binary, stochastic pixels are connected to% hidden, binary, stochastic feature detectors using symmetrically% weighted connections. Learning is done with 1-step Contrastive Divergence.   % The program assumes that the following variables are set externally:% 下面的参数都在外部设置% maxepoch  -- maximum number of epochs 最大的训练数% numhid    -- number of hidden units 隐单元个数% batchdata -- the data that is divided into batches (numcases numdims numbatches)%总数据被分批处理% restart   -- set to 1 if learning starts from beginning epsilonw      = 0.1;   % Learning rate for weights 权重学习率epsilonvb     = 0.1;   % Learning rate for biases of visible units 可见层偏置学习率epsilonhb     = 0.1;   % Learning rate for biases of hidden units  隐单元偏置学习率weightcost  = 0.0002;   initialmomentum  = 0.5;  %初始动量finalmomentum    = 0.9;  %最终动量[numcases numdims numbatches]=size(batchdata);if restart ==1,  restart=0;  epoch=1;% Initializing symmetric weights and biases.   vishid     = 0.1*randn(numdims, numhid);  %可见层到隐藏层的权重  hidbiases  = zeros(1,numhid); %隐藏层偏置  visbiases  = zeros(1,numdims); %可见层偏置  poshidprobs = zeros(numcases,numhid);    neghidprobs = zeros(numcases,numhid);  posprods    = zeros(numdims,numhid);  negprods    = zeros(numdims,numhid);  vishidinc  = zeros(numdims,numhid);  hidbiasinc = zeros(1,numhid);  visbiasinc = zeros(1,numdims);  batchposhidprobs=zeros(numcases,numhid,numbatches);endfor epoch = epoch:maxepoch, fprintf(1,'epoch %d\r',epoch);  errsum=0; for batch = 1:numbatches, fprintf(1,'epoch %d batch %d\r',epoch,batch); %%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%这个应该属于可见层到隐藏层的联合概率计算  data = batchdata(:,:,batch);  poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1)));    %用S函数计算联合概率，P(h=1|v)  batchposhidprobs(:,:,batc h)=poshidprobs; %第batch批的概率分布  posprods    = data' * poshidprobs;  %数据乘以概率  poshidact   = sum(poshidprobs);  %总概率  posvisact = sum(data);%%%%%%%%% END OF POSITIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  poshidstates = poshidprobs > rand(numcases,numhid);  %隐藏层状态：0或者1%%%%%%%%% START NEGATIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%这个部分就是反向计算的部分，从隐藏层到可见层  negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));  %预测数据  neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1)));      negprods  = negdata'*neghidprobs;   %权重的更新  neghidact = sum(neghidprobs); %隐含层偏置的更新  negvisact = sum(negdata); %可见层偏置的更新%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  err= sum(sum( (data-negdata).^2 ));  errsum = err + errsum;   if epoch>5,     momentum=finalmomentum;   else     momentum=initialmomentum;   end;%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     vishidinc = momentum*vishidinc + ...                epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);    visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);    hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);    vishid = vishid + vishidinc;    visbiases = visbiases + visbiasinc;    hidbiases = hidbiases + hidbiasinc;%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   end  fprintf(1, 'epoch %4i error %6.1f  \n', epoch, errsum); end;

再附加一个k=10的情况：

guassianfbm:

% Version 0.100 (Unsupported, unreleased)%% Code provided by Graham Taylor and Geoff Hinton%% For more information, see:%    http://www.cs.toronto.edu/~gwtaylor/publications/icml2009%% Permission is granted for anyone to copy, use, modify, or distribute this% program and accompanying programs and documents for any purpose, provided% this copyright notice is retained and prominently displayed, along with% a note saying that the original programs are available from our% web page.% The programs and documents are distributed without any warranty, expressed or% implied.  As the programs were written for research purposes only, they have% not been tested to the degree that would be advisable in any important% application.  All use of these programs is entirely at the user's own risk.%% Train a factored, conditional RBM which has label units that modulate% each pair of interactions训练一个具有标签单元的分解条件RBM，调节每一对的交互% CRBM has gaussian visible and binary stochastic hidden units% CRBM有高斯可见层和二值随机隐藏层% Standard dev on Gaussian units is fixed to 1% 高斯单元的标准差固定为1，应该就是标准高斯分布或者标准正态分布% Feature-factor weights are shared特征-因子权重共享%% The program assumes that the following variables are set externally:% nt        -- order of the model模型组织% numepochs -- maximum number of epochs最大的训练数% numhid    -- number of hidden units 隐藏单元个数% numfeat   -- number of real-valued features between labels and factors% 标签和因子之间的实参特征数% numfac    --  number of factors 因子数目% batchdata --  a matrix of data (numcases,numdims) 帧数*感兴趣关节角% minibatch -- a cell array of dimension batchsize, indexing the valid% frames in batchdata% restart   -- set to 1 if learning starts from beginning %batchdata is a big matrix of all the frames%we index it with "minibatch", a cell array of mini-batch indicesnumbatches = length(minibatch); %是一组随机数29823-42*12=29319，这里总共有294个numbatchesnumdims = size(batchdata,2);  % visible dimension可见层也就是感兴趣层维数58numlabels = size(labeldata,2);% 10个标签%Setting learning rates学习率的设置%Corresponding to the "undirected" observation model无向观察模型epsilonvisfac=single(1e-2);   %无向观察模型学习率0.01，可见层(输出层)到因子层(output-factors)%only one set of featfac parameters%shared between undirected, A & B models下面就是AB模型之间的共享epsilonfeatfac=single(1e-2);%feature到因子层三个权值共享一个，所以只有一个学习率0.01epsilonhidfac=single(1e-2);%隐藏层到因子层，hidden-factor是0.01%Corresponding to the "directed" Autoregressive model两个有向自回归子模型%A，也就是最下面那个蓝色的连接的三个地方里面的km(v<t到m)和im(v=t到m)epsilonpastfacA=single(1e-3);  %这一个代表的就是过去的与最下面的因子称为m的连接0.001epsilonvisfacA=single(1e-3);   %这一个代表的就是现在的与最下面的因子称为m的连接0.001%Corresponding to the "directed" past->hidden model%对应有向的过去到隐藏层模型，从图中可以看到那个绿色的(也就是B)连接了past和hiddenepsilonpastfacB=single(1e-2); %从past连接到B  是0.01epsilonhidfacB=single(1e-2); %从B连接到hid    是0.01epsilonlabelfeat=single(1e-3); %从标签层到特征层的学习率0.001epsilonvisbias=single(1e-2);  %可见层偏置学习率0.01epsilonhidbias=single(1e-2);  %隐含层偏置学习率0.01%epsilonvishid=1e-3;  %gated biases%currently we use the same weight decay for all weights当前，把所有的权重设置为一个%but no weight decay for biases没有偏置的权重衰减wdecay = single(0.0002);mom = single(0.9);       %momentum used only after 5 epochs of trainingif restart==1,    restart=0;  epoch=1;   %weights    visfac = single(0.01*randn(numdims,numfac));%可见层到因子层权重58*200(58是第二次预处理得到的关节数)  featfac = single(0.01*randn(numfeat,numfac));%特征层到因子层的权重100*200(预定义的numfeat=100)  hidfac = single(0.01*randn(numhid,numfac));%隐藏层到因子层之间的权重600*200(预定义的隐藏层数目600)      %Note the new parameterization of pastfac:  %First numdims rows correspond to time t-nt  %往回看的帧数，第一帧=当前帧-后退的帧数  %Last numdims rows correspond to time t-1    %往回看的帧数，最后帧=当前帧-1  pastfacA = single(0.01*randn(nt*numdims,numfac)); %（12*58）*200=696*200维对于A那一个子模型，蓝色的那个，过去到因子层  visfacA = single(0.01*randn(numdims,numfac));     %58*200对于A那一个子模型，蓝色的那个，可见层到因子层    pastfacB = single(0.01*randn(nt*numdims,numfac)); %696*200维对于B那一个子模型，绿色的那个，过去到因子层  hidfacB = single(0.01*randn(numhid,numfac));      %600*200对于B那一个子模型，绿色的那个，隐藏层到因子层        %matrix where rows are per-label features  labelfeat = single(0.01*randn(numlabels,numfeat));  %10*100的维数标签-特征对    %biases  visbiases = zeros(1,numdims,'single');  %可见层偏置1*58  hidbiases = zeros(1,numhid,'single');   %隐藏层偏置1*600  %vishid = 0.01*randn(numdims,numhid);       clear posdataprod pospastprod poshidprod posvishidprod posvisact poshidact  clear negdataprod negpastprod neghidprod negvishidprod negvisact neghidact  %keep previous updates around for momentum  %为当前状态保存前面的更新信息  visfacinc = zeros(size(visfac),'single');   %可见层到因子层的信息  featfacinc = zeros(size(featfac),'single'); %特征层到因子层的信息  hidfacinc = zeros(size(hidfac),'single');   %隐藏层到因子层的信息    pastfacAinc = zeros(size(pastfacA),'single');  %过去到因子层A(最下面蓝色的)的信息  visfacAinc = zeros(size(visfacA),'single');    %可见层到因子层A(最下面蓝色)的信息    pastfacBinc = zeros(size(pastfacB),'single');  %过去层到因子层B(中间绿色的)的信息  hidfacBinc = zeros(size(hidfacB),'single');    %隐藏层到因子层B(中间绿色的)的信息    labelfeatinc = zeros(size(labelfeat),'single'); %标签层到特征层    visbiasinc = zeros(size(visbiases),'single');   %可见层偏置  hidbiasinc = zeros(size(hidbiases),'single');   %隐藏层偏置  %vishidinc = zeros(size(vishid));    end%Main loopfor epoch = epoch:maxepoch,  %1到200 训练周期  errsum=0; %keep a running total of the difference between data and recon  for batch = 1:numbatches,     %1到294因为总共294个小批，每个小批100个数据帧，最后一个19个数据%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%        numcases = length(minibatch{batch});   %除了最后一次是19，其他都是100    mb = minibatch{batch}; %caches the indices  找到对应批次的内容，里面存的是随机数，前面定义过        past = zeros(numcases,nt*numdims,'single'); %initialization  100*696        data = single(batchdata(mb,:));%把随机数对应的那个帧的数据取出来一般来说是100*58维的(每批100帧，每帧58个维度)    %use pastindex to index the appropriate frames in batchdata    %(for each frame in the minibatch) depending on the delay    %past = reshape(batchdata(pastindex,:),numcases,nt*numdims);        %past = batchdata(mb-1,:); %one step in the past        %Easiest way to build past is by a loop    %Past looks like [ [data time t-nt] ... [data time t-1] ]     for hh=nt:-1:1 %note reverse order 100个随机帧的前面12个帧信息的综合，即100*(12*58维)      past(:,numdims*(nt-hh)+1:numdims*(nt-hh+1)) = batchdata(mb-hh,:) + randn(numcases,numdims);    end    %get the features from the one-hot labels    labels = labeldata(mb,:);  %100*10维度从单热度编码中获取特征，说白了就是根据标签找到对应的特征，这句话的功能是根据这个随机帧mb找到它的风格标签    features = labels*labelfeat; %根据标签找到对应的哪一个风格，lables每一行只有一个1，乘以labelfeat就可以找到对应的行了            %DEBUG    %past = double(rand(size(past))>0.5);    %calculate inputs to factors (will be used many times)    yvis = data*visfac; %summing over numdims当前批的帧100*58维度，乘以，输出层到因子层的权重58*200维    yfeat = features*featfac; %summing over numfeat特征层100*100维度，乘以特征层到因子层的偏置100*200维，feature是100*100的原因在于每批100帧，每帧100个特征数目            ypastA = past*pastfacA;     %summing over nt*numdims  过去的100帧前面的12帧数据100*(12*58)维，乘以，输入层(前N帧)到因子层的权重696*200    yfeatA = features*featfac;  %summing over numfeat     特征层100*100乘以特征到因子的权重100*200    yvisA = data*visfacA;       %summing over numdims     当前100帧100*58维，乘以，输出层到因子层的偏置58*200        ypastB = past*pastfacB;     %summing over nt*numdims  过去帧100*696维，乘以，过去帧到因此层的维度696*200    yfeatB = features*featfac;  %summing over numfeat     特征100*100维，乘以，特征到因子层的权重维度100*200            yvisfeat = yvis.*yfeat; %used twice, so cache         先存着，因为要用两次    ypastfeatB = ypastB.*yfeatB; %used twice, so cache    先存着，因为要用两次        %pass 3-way term + gated biases + hidbiases through sigmoid     %从图中可以看到，隐藏层是由f和n这两个因子决定的，所以发现连接的是yfeat和yvis和hidfac，ypastB和yfeatB和hidfacB    %联合概率使用S函数，指数值为两个连接到hidden layer的因子与hidden layer本身的偏置和，符号是S函数自带的    poshidprobs = 1./(1 + exp(-yvisfeat*hidfac'  ...      -ypastfeatB*hidfacB' - repmat(hidbiases,numcases,1)));      %-data*vishid - repmat(hidbiases,numcases,1)));  repmat按照行重复        %Activate the hidden units    激活隐单元    hidstates = single(poshidprobs > rand(numcases,numhid)); %全是0和1        yhid = hidstates*hidfac;%找到激活的单元，把权重保留，其他的权重置零    yhid_ = poshidprobs*hidfac; %smoothed version  %概率乘以隐藏层到因子层的权值        yhidB_ = poshidprobs*hidfacB; %smoothed version  %概率乘以隐藏层到因子层n(也就是B)的权值        %these are used multiple times, so cache    yvishid_ = yvis.*yhid_;      yvispastA = yvisA.*ypastA;    ypasthidB_ = ypastB.*yhidB_;    yfeatpastA = yfeatA.*ypastA;                            %Calculate statistics needed for gradient update    %Gradients are taken w.r.t neg energy    %Note that terms that are common to positive and negative stats    %are left out    posvisprod = data'*(yfeat.*yhid_); %smoothed    posfeatprod = features'*(yvishid_); %smoothed    poshidprod = poshidprobs'*(yvisfeat); %smoothed        posvisAprod = data'*(yfeatpastA);    posfeatAprod = features'*(yvispastA);    pospastAprod =  past'*(yvisA.*yfeatA);       pospastBprod = past'*(yfeatB.*yhidB_); %smoothed    posfeatBprod =  features'*(ypasthidB_); %smoothed    poshidBprod =  poshidprobs'*(ypastfeatB);        %Now the gradients for the label/feature matrix    %First find the grad terms w.r.t. the features    %Then backpropagate (it's linear, so simply matrix multiply)    %There are three terms, since the features gate the undirected & two    %sets of directed connections%     posfeatgrad = (yvishid_)*featfac' + ...%       (yvispastA)*featfac' + ...%       (ypasthidB_)*featfac';        posfeatgrad = (yvishid_ + yvispastA + ypasthidB_)*featfac';  %计算梯度            %posvishidprod = data'*poshidprobs;    posvisact = sum(data,1);    poshidact = sum(poshidprobs,1);  %smoothed                 %%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% START NEGATIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    %10步对比散度算法  for cdn = 1:cdsteps        %Activate the visible units    %Collect 3-way terms + vis biases + gated biases     %note use of stochastic hidstates    %Mean-field version (do not add Gaussian noise)            negdata = (yfeat.*yhid)*visfac' + ...      (yfeatpastA)*visfacA' + ...      repmat(visbiases,numcases,1);    %应该是通过f和m两个因子重构的输出值100*58维        yvis = negdata*visfac;    yvisfeat = yvis.*yfeat; %used twice, so cache             %pass 3-way term + gated biases + hidbiases through sigmoid     neghidprobs = 1./(1 + exp(-yvisfeat*hidfac'  ...      -ypastfeatB*hidfacB' - repmat(hidbiases,numcases,1)));    if cdn == 1      %Calculate reconstruction error计算重构误差      err= sum(sum( (data(:,:,1)-negdata).^2 ));      errsum = err + errsum;    end     if cdn == cdsteps           yhidB_ = neghidprobs*hidfacB; %smoothed version       yhid_ = neghidprobs*hidfac; %smoothed version      yvishid_ = yvis.*yhid_;      yvisA = negdata*visfacA;       %summing over numdims      yvispastA = yvisA.*ypastA;      ypasthidB_ = ypastB.*yhidB_;      %last cd step -- Calculate statistics needed for gradient update      %Gradients are taken w.r.t neg energy      %Note that terms that are common to positive and negative stats      %are left out      %对于f因子层的三个连接，每一条连接都是用另外两条的数据得到      negvisprod = negdata'*(yfeat.*yhid_); %smoothed       negfeatprod = features'*(yvishid_); %smoothed      neghidprod = neghidprobs'*(yvisfeat); %smoothed      %对于m因子层的三个连接      negvisAprod = negdata'*(yfeatpastA);      negfeatAprod = features'*(yvispastA);      negpastAprod =  past'*(yvisA.*yfeatA);      %对于n因子层的三个连接      negpastBprod = past'*(yfeatB.*yhidB_); %smoothed      negfeatBprod =  features'*(ypasthidB_); %smoothed      neghidBprod =  neghidprobs'*(ypastfeatB);      %Now the gradients for the label/feature matrix      %First find the grad terms w.r.t. the features      %Then backpropagate (it's linear, so simply matrix multiply)      %There are three terms, since the features gate the undirected & two      %sets of directed connections%       negfeatgrad = (yvishid_)*featfac' + ...%         (yvispastA)*featfac' + ...%         (ypasthidB_)*featfac';      negfeatgrad = (yvishid_ + yvispastA + ypasthidB_)*featfac';            %negvishidprod = data'*neghidprobs;      negvisact = sum(negdata,1);      neghidact = sum(neghidprobs,1);  %smoothed    else      %Stochastically sample the hidden units      hidstates = single(neghidprobs > rand(numcases,numhid));            yhid = hidstates*hidfac;    end   end            %%%%%%%%% END NEGATIVE PHASE  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%       if epoch > 5 %use momentum        momentum=mom;    else %no momentum        momentum=0;    end    %%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  visfacinc = momentum*visfacinc + ...  epsilonvisfac*( ( posvisprod - negvisprod)/numcases - wdecay*visfac);featfacinc = momentum*featfacinc + ...  epsilonfeatfac*((posfeatprod + posfeatAprod + posfeatBprod ...  - negfeatprod - negfeatAprod - negfeatBprod)/numcases - wdecay*featfac);% featfacinc = momentum*featfacinc + ...%   epsilonfeatfac*( (posfeatprod - negfeatprod)/numcases - wdecay*featfac);hidfacinc = momentum*hidfacinc + ...  epsilonhidfac*( (poshidprod - neghidprod)/numcases - wdecay*hidfac);   %前面一个参数应该是学习率，后面那个wdecay应该是权重衰减率visfacAinc = momentum*visfacAinc + ...  epsilonvisfacA*( (posvisAprod - negvisAprod)/numcases - wdecay*visfacA);% featfacAinc = momentum*featfacAinc + ...%   epsilonfeatfacA*( (posfeatAprod - negfeatAprod)/numcases - wdecay*featfacA);pastfacAinc = momentum*pastfacAinc + ...  epsilonpastfacA*( (pospastAprod - negpastAprod)/numcases - wdecay*pastfacA);hidfacBinc = momentum*hidfacBinc + ...  epsilonhidfacB*( (poshidBprod - neghidBprod)/numcases - wdecay*hidfacB);% featfacBinc = momentum*featfacBinc + ...%   epsilonfeatfacB*( (posfeatBprod - negfeatBprod)/numcases - wdecay*featfacB);pastfacBinc = momentum*pastfacBinc + ...  epsilonpastfacB*( (pospastBprod - negpastBprod)/numcases - wdecay*pastfacB);labelfeatinc = momentum*labelfeatinc + ...  epsilonlabelfeat*( labels'*(posfeatgrad - negfeatgrad)/numcases - wdecay*labelfeat);%两个偏置visbiasinc = momentum*visbiasinc + ...  (epsilonvisbias/numcases)*(posvisact - negvisact);hidbiasinc = momentum*hidbiasinc + ...  (epsilonhidbias/numcases)*(poshidact - neghidact);visfac = visfac + visfacinc;featfac = featfac + featfacinc;hidfac = hidfac + hidfacinc;visfacA = visfacA + visfacAinc;pastfacA = pastfacA + pastfacAinc;  %没有featfacA和featfacB的原因是参数feat-fac的参数共享hidfacB = hidfacB + hidfacBinc;pastfacB = pastfacB + pastfacBinc;labelfeat = labelfeat + labelfeatinc;%sfigure(34); imagesc(labelfeat); colormap gray; axis off     %drawnow;visbiases = visbiases + visbiasinc;hidbiases = hidbiases + hidbiasinc;    %%%%%%%%%%%%%%%% END OF UPDATES  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      end      %every 10 epochs, show output   每十个周期输出一次  if mod(epoch,10) ==0      fprintf(1, 'epoch %4i error %6.1f  \n', epoch, errsum);            if 0%mod(epoch,100)==0            %show hiddens      要播放的帧      plotindex = 101:500; %frames of batchdata that we will plot      nc = length(plotindex);            data = batchdata(plotindex,:);                  past = zeros(nc,nt*numdims); %initialization用过去的十二帧初始化      for hh=nt:-1:1 %note reverse order        %past(:,numdims*(hh-1)+1:numdims*hh) = initdata(hh:end-(nt-hh+1),:);        past(:,numdims*(nt-hh)+1:numdims*(nt-hh+1)) = batchdata(plotindex-hh,:);   %在batchdata中把对应的帧(101-500)保存      end            labels = labeldata(plotindex,:);      features = labels*labelfeat;                 yvis = data*visfac; %summing over numdims      yfeat = features*featfac; %summing over numfeat      ypastB = past*pastfacB;     %summing over nt*numdims      yfeatB = features*featfac; %summing over numfeat      yvisfeat = yvis.*yfeat; %used twice, so cache      ypastfeatB = ypastB.*yfeatB; %used twice, so cache      %pass 3-way term + gated biases + hidbiases through sigmoid      poshidprobs = 1./(1 + exp(-yvisfeat*hidfac'  ...        -ypastfeatB*hidfacB' - repmat(hidbiases,nc,1)));      sfigure(32); imagesc(poshidprobs'); colormap gray; axis off;            yhid_ = poshidprobs*hidfac; %smoothed version      ypastA = past*pastfacA;     %summing over nt*numdims      yfeatA = features*featfac;  %summing over numfeat      yfeatpastA = yfeatA.*ypastA;                   %look at mean-field reconstruction      negdata = (yfeat.*yhid_)*visfac' + ...        (yfeatpastA)*visfacA' + ...        repmat(visbiases,nc,1);                  sfigure(33);clf      subplot(2,1,1); plot(data(:,7)); hold on; plot(negdata(:,7),'r');      subplot(2,1,2); plot(data(:,18)); hold on; plot(negdata(:,18),'r');            %sfigure(34); imagesc(labelfeat); colormap gray; axis off                     %Hinton plots of parameters      %Likely do not want to plot all dims, all hiddens, all factors      maxdims = 30; maxhid = 100; maxfac = 50;      maxpast = 2; %how many time steps in past to plot (pastfac)            %undirected model      sfigure(35);       subplot(3,1,1); hinton(visfac(1:maxdims,1:maxfac));      subplot(3,1,2); hinton(featfac(:,1:maxfac));      subplot(3,1,3); hinton(hidfac(1:maxhid,1:maxfac));      set(gcf,'Name','undirected')                       %autoregressive model      sfigure(36);      %for past, we only want to plot maxdims & maxpast      %i don't know how to do this without a loop      pastrows = [];      for kk=maxpast:-1:1 %note reverse        %select maxdims rows corresponding to time step kk        pastrows = [pastrows; pastfacA(end-kk*numdims+1:...          end-kk*numdims+maxdims, 1:maxfac)];      end            subplot(3,1,1); hinton(pastrows);      %subplot(3,1,2); hinton(featfacA(:,1:maxfac));      subplot(3,1,3); hinton(visfacA(1:maxdims,1:maxfac));      set(gcf,'Name','autoregressive')                       %directed vis -> hid model      sfigure(37);      %see comment above      pastrows = [];      for kk=maxpast:-1:1 %note reverse        %select maxdims rows corresponding to time step kk        pastrows = [pastrows; pastfacB(end-kk*numdims+1:...          end-kk*numdims+maxdims, 1:maxfac)];      end           subplot(3,1,1); hinton(pastrows);      %subplot(3,1,2); hinton(featfacB(:,1:maxfac));      subplot(3,1,3); hinton(hidfacB(1:maxhid,1:maxfac));      set(gcf,'Name','directed')               %labelfeat and biases      sfigure(34);      subplot(3,1,1); hinton(visbiases(1:maxdims));      subplot(3,1,2); hinton(hidbiases(1:maxhid));      subplot(3,1,3); hinton(labelfeat);      set(gcf,'Name','labelfeat and biases')            %       %Could see a plot of the weights every 10 epochs%       sfigure(33);%       subplot(2,3,1); hinton(visfac);%       subplot(2,3,2); hinton(pastfac);%       subplot(2,3,3); hinton(hidfac);%       subplot(2,3,4); hinton(vishid);%       subplot(2,3,5); hinton(visbiases);%       subplot(2,3,6); hinton(hidbiases);%       drawnow;%       sfigure(34);%       subplot(3,1,1); imagesc(data'); colormap gray; axis off%       subplot(3,1,2); imagesc(poshidprobs',[0 1]); colormap gray; axis off%       subplot(3,1,3); imagesc(negdata',[0 1]); colormap gray; axis off%       drawnow;      %figure(3); weightreport      %drawnow;      end        end  %Checkpoint models   %最终得到的就是可见层-因子层权重、特征曾-因子层权重、隐藏层-因子层权重、过去-因子层A(或者称为m)的权重、可见层-因子层A的权重  %过去-因子层B的权重、隐藏层-因子层B的权重、标签-特征的权重；可见层偏置，隐藏层偏置、k步采样、隐藏层单元个数、周期数、训练帧数  if mod(epoch,snapshotevery) ==0  %每100帧来一张快照    snapshot_file = [snapshot_path '_ep' num2str(epoch) '.mat'];    save(snapshot_file, 'visfac','featfac','hidfac', ...      'pastfacA','visfacA', ...      'pastfacB','hidfacB', ...      'labelfeat','visbiases','hidbiases', ...      'cdsteps', 'numhid','numfac','epoch', 'nt');  end  drawnow; %update any plots你可以理解为把当前需要画的东西都画到屏幕上。一般用于循环内，显示动画效果。看看文档的例子吧http://cn.mathworks.com/help/matlab/ref/drawnow.htmlend

0 0