DeepLearnToolbox_CNN notes

来源:互联网 发布:网络大专多久可以报名 编辑:程序博客网 时间:2024/06/06 01:01

Preliminaries:

 1."Notes on Convolutional Neural Networks"

2."Gradient-based learning applied to document recognition"

Contents

  • ex1 Train a 6c-2s-12c-2s Convolutional neural network
function test_example_CNN
load mnist_uint8;train_x = double(reshape(train_x',28,28,60000))/255;%60000*784 reshape to 28*28*60000test_x = double(reshape(test_x',28,28,10000))/255;% 28*28*10000train_y = double(train_y');%each col presents each sampletest_y = double(test_y');

ex1 Train a 6c-2s-12c-2s Convolutional neural network

%will run 1 epoch in about 200 second and get around 11% error.%With 100 epochs you'll get around 1.2% error%网络结构 5*5-6c-2s-12c-2srand('state',0)cnn.layers = {    struct('type', 'i') %input layer    struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer    struct('type', 's', 'scale', 2) %sub sampling layer(pooling)    struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer    struct('type', 's', 'scale', 2) %subsampling layer};cnn = cnnsetup(cnn, train_x, train_y);opts.alpha = 1;  %学习率opts.batchsize = 50;%每次训练50个样本opts.numepochs = 1;cnn = cnntrain(cnn, train_x, train_y, opts);[er, bad] = cnntest(cnn, test_x, test_y);er %误差率 0.1113%plot mean squared errorfigure; plot(cnn.rL);assert(er<0.12, 'Too big error');
epoch 1/1Elapsed time is 111.068904 seconds.er =    0.1113
function net = cnnsetup(net, x, y)    inputmaps = 1;    mapsize = size(squeeze(x(:, :, 1)));%去单一维    for l = 1 : numel(net.layers)   %  layer        if strcmp(net.layers{l}.type, 's')            mapsize = mapsize / net.layers{l}.scale;            assert(all(floor(mapsize)==mapsize), ['Layer ' num2str(l) ' size must be integer. Actual: ' num2str(mapsize)]);            for j = 1 : inputmaps                net.layers{l}.b{j} = 0;            end        end        if strcmp(net.layers{l}.type, 'c')            mapsize = mapsize - net.layers{l}.kernelsize + 1;%(28-5+1)            fan_out = net.layers{l}.outputmaps * net.layers{l}.kernelsize ^ 2; %与后一个隐层之间权值参数的数量=#featureMaps*kernelsize^2            for j = 1 : net.layers{l}.outputmaps  %  output map                fan_in = inputmaps * net.layers{l}.kernelsize ^ 2;%有多少权值参数链接到前一层                for i = 1 : inputmaps  %  input map                    net.layers{l}.k{i}{j} = (rand(net.layers{l}.kernelsize) - 0.5) * 2 * sqrt(6 / (fan_in + fan_out));                end                net.layers{l}.b{j} = 0;            end            inputmaps = net.layers{l}.outputmaps;        end    end    % 'onum' is the number of labels, that's why it is calculated using size(y, 1). If you have 20 labels so the output of the network will be 20 neurons.    % 'fvnum' is the number of output neurons at the last layer, the layer just before the output layer.    % 'ffb' is the biases of the output neurons.    % 'ffW' is the weights between the last layer and the output neurons. Note that the last layer is fully connected to the output layer, that's why the size of the weights is (onum * fvnum)    fvnum = prod(mapsize) * inputmaps;%prod求积函数    onum = size(y, 1);    net.ffb = zeros(onum, 1);    net.ffW = (rand(onum, fvnum) - 0.5) * 2 * sqrt(6 / (onum + fvnum));end
function net = cnntrain(net, x, y, opts)    m = size(x, 3);%样本数量    numbatches = m / opts.batchsize;    if rem(numbatches, 1) ~= 0        error('numbatches not integer');    end    net.rL = [];    for i = 1 : opts.numepochs        disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)]);        tic;        kk = randperm(m);        for l = 1 : numbatches%1200个batches            batch_x = x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));%随机不重复选50个样本训练            batch_y = y(:,    kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));%50个样本对应的标签            net = cnnff(net, batch_x);            net = cnnbp(net, batch_y);            net = cnnapplygrads(net, opts);            if isempty(net.rL)%为空                net.rL(1) = net.L; %loss function的值            end            net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L;%这样来画误差曲线的依据是?相当于对每一个batch的误差进行累积(加权平均)        end        toc;    endend
function net = cnnff(net, x)    n = numel(net.layers);    net.layers{1}.a{1} = x;    inputmaps = 1;    for l = 2 : n   %  for each layer        if strcmp(net.layers{l}.type, 'c')            %  !!below can probably be handled by insane matrix operations            for j = 1 : net.layers{l}.outputmaps   %  for each output map                %  create temp output map                z = zeros(size(net.layers{l - 1}.a{1}) - [net.layers{l}.kernelsize - 1 net.layers{l}.kernelsize - 1 0]);%28 28 50 - 4 4 0                for i = 1 : inputmaps   %  for each input map                    %  convolve with corresponding kernel and add to temp output map                    z = z + convn(net.layers{l - 1}.a{i}, net.layers{l}.k{i}{j}, 'valid');                end                %  add bias, pass through nonlinearity                net.layers{l}.a{j} = sigm(z + net.layers{l}.b{j});            end            %  set number of input maps to this layers number of outputmaps            inputmaps = net.layers{l}.outputmaps;        elseif strcmp(net.layers{l}.type, 's')            %  downsample            for j = 1 : inputmaps                z = convn(net.layers{l - 1}.a{j}, ones(net.layers{l}.scale) / (net.layers{l}.scale ^ 2), 'valid'); %均值pooling  %  !! replace with variable                net.layers{l}.a{j} = z(1 : net.layers{l}.scale : end, 1 : net.layers{l}.scale : end, :);%pooling doesnot overlap            end        end    end    %  concatenate all end layer feature maps into vector    net.fv = [];    for j = 1 : numel(net.layers{n}.a)%最后一个pooling层为12个featureMaps        sa = size(net.layers{n}.a{j});%每一个featureMap的大小为4*4*50        net.fv = [net.fv; reshape(net.layers{n}.a{j}, sa(1) * sa(2), sa(3))];    end    %  feedforward into output perceptrons    net.o = sigm(net.ffW * net.fv + repmat(net.ffb, 1, size(net.fv, 2)));end

Contents

  • backprop deltas
  • calc gradients
function net = cnnbp(net, y)
    n = numel(net.layers);    aaa=0;    %  error    net.e = net.o - y;    %  loss function    net.L = 1/2* sum(net.e(:) .^ 2) / size(net.e, 2);

backprop deltas

    net.od = net.e .* (net.o .* (1 - net.o));   %  output delta 最外层的delta(L)    net.fvd = (net.ffW' * net.od);              %  feature vector delta 下一层的delata(l)    if strcmp(net.layers{n}.type, 'c')          %  only conv layers has sigm function        net.fvd = net.fvd .* (net.fv .* (1 - net.fv));    end    %  reshape feature vector deltas into output map style    sa = size(net.layers{n}.a{1});    fvnum = sa(1) * sa(2);%    for j = 1 : numel(net.layers{n}.a)        net.layers{n}.d{j} = reshape(net.fvd(((j - 1) * fvnum + 1) : j * fvnum, :), sa(1), sa(2), sa(3));%delta    end    for l = (n - 1) : -1 : 1%误差反传,从倒数第二层开始        if strcmp(net.layers{l}.type, 'c')            for j = 1 : numel(net.layers{l}.a)                net.layers{l}.d{j} = net.layers{l}.a{j} .* (1 - net.layers{l}.a{j}) .* (expand(net.layers{l + 1}.d{j}, [net.layers{l + 1}.scale net.layers{l + 1}.scale 1]) / net.layers{l + 1}.scale ^ 2);%f'(u).*up(delta)            end        elseif strcmp(net.layers{l}.type, 's')            for i = 1 : numel(net.layers{l}.a)                z = zeros(size(net.layers{l}.a{1}));%这里的每一个权值块为12*12,50个样本                for j = 1 : numel(net.layers{l + 1}.a)                     z = z + convn(net.layers{l + 1}.d{j}, rot180(net.layers{l + 1}.k{i}{j}), 'full');%对每一个i求和,相当于对当前i节点的一个delta累积(注意,这里pooling未经过sigmoid变换)                end                net.layers{l}.d{i} = z;            end        end    end

calc gradients

    for l = 2 : n %从第二层(c)开始,输入层(i)不算        if strcmp(net.layers{l}.type, 'c')% 对于卷积层计算梯度            for j = 1 : numel(net.layers{l}.a)                for i = 1 : numel(net.layers{l - 1}.a)                    net.layers{l}.dk{i}{j} = convn(flipall(net.layers{l - 1}.a{i}), net.layers{l}.d{j}, 'valid') / size(net.layers{l}.d{j}, 3);                end                net.layers{l}.db{j} = sum(net.layers{l}.d{j}(:)) / size(net.layers{l}.d{j}, 3);            end        end    end    %非卷积层    net.dffW = net.od * (net.fv)' / size(net.od, 2);    net.dffb = mean(net.od, 2);    function X = rot180(X)        X = flipdim(flipdim(X, 1), 2);    end
end
function net = cnnapplygrads(net, opts)    for l = 2 : numel(net.layers)        if strcmp(net.layers{l}.type, 'c')%卷积层与其他层分开更新            for j = 1 : numel(net.layers{l}.a)                for ii = 1 : numel(net.layers{l - 1}.a)                    net.layers{l}.k{ii}{j} = net.layers{l}.k{ii}{j} - opts.alpha * net.layers{l}.dk{ii}{j};                end                net.layers{l}.b{j} = net.layers{l}.b{j} - opts.alpha * net.layers{l}.db{j};            end        end    end    net.ffW = net.ffW - opts.alpha * net.dffW;    net.ffb = net.ffb - opts.alpha * net.dffb;end
function [er, bad] = cnntest(net, x, y)    %  feedforward    net = cnnff(net, x);    [~, h] = max(net.o);%按列取最大的元素的索引    [~, a] = max(y);    bad = find(h ~= a);    er = numel(bad) / size(y, 2);end


0 0
原创粉丝点击