DeepLearnToolbox_CNN notes
来源:互联网 发布:网络大专多久可以报名 编辑:程序博客网 时间:2024/06/06 01:01
Preliminaries:
1."Notes on Convolutional Neural Networks"
2."Gradient-based learning applied to document recognition"
Contents
- ex1 Train a 6c-2s-12c-2s Convolutional neural network
function test_example_CNN
load mnist_uint8;train_x = double(reshape(train_x',28,28,60000))/255;%60000*784 reshape to 28*28*60000test_x = double(reshape(test_x',28,28,10000))/255;% 28*28*10000train_y = double(train_y');%each col presents each sampletest_y = double(test_y');
ex1 Train a 6c-2s-12c-2s Convolutional neural network
%will run 1 epoch in about 200 second and get around 11% error.%With 100 epochs you'll get around 1.2% error%网络结构 5*5-6c-2s-12c-2srand('state',0)cnn.layers = { struct('type', 'i') %input layer struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer struct('type', 's', 'scale', 2) %sub sampling layer(pooling) struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer struct('type', 's', 'scale', 2) %subsampling layer};cnn = cnnsetup(cnn, train_x, train_y);opts.alpha = 1; %学习率opts.batchsize = 50;%每次训练50个样本opts.numepochs = 1;cnn = cnntrain(cnn, train_x, train_y, opts);[er, bad] = cnntest(cnn, test_x, test_y);er %误差率 0.1113%plot mean squared errorfigure; plot(cnn.rL);assert(er<0.12, 'Too big error');
epoch 1/1Elapsed time is 111.068904 seconds.er = 0.1113function net = cnnsetup(net, x, y) inputmaps = 1; mapsize = size(squeeze(x(:, :, 1)));%去单一维 for l = 1 : numel(net.layers) % layer if strcmp(net.layers{l}.type, 's') mapsize = mapsize / net.layers{l}.scale; assert(all(floor(mapsize)==mapsize), ['Layer ' num2str(l) ' size must be integer. Actual: ' num2str(mapsize)]); for j = 1 : inputmaps net.layers{l}.b{j} = 0; end end if strcmp(net.layers{l}.type, 'c') mapsize = mapsize - net.layers{l}.kernelsize + 1;%(28-5+1) fan_out = net.layers{l}.outputmaps * net.layers{l}.kernelsize ^ 2; %与后一个隐层之间权值参数的数量=#featureMaps*kernelsize^2 for j = 1 : net.layers{l}.outputmaps % output map fan_in = inputmaps * net.layers{l}.kernelsize ^ 2;%有多少权值参数链接到前一层 for i = 1 : inputmaps % input map net.layers{l}.k{i}{j} = (rand(net.layers{l}.kernelsize) - 0.5) * 2 * sqrt(6 / (fan_in + fan_out)); end net.layers{l}.b{j} = 0; end inputmaps = net.layers{l}.outputmaps; end end % 'onum' is the number of labels, that's why it is calculated using size(y, 1). If you have 20 labels so the output of the network will be 20 neurons. % 'fvnum' is the number of output neurons at the last layer, the layer just before the output layer. % 'ffb' is the biases of the output neurons. % 'ffW' is the weights between the last layer and the output neurons. Note that the last layer is fully connected to the output layer, that's why the size of the weights is (onum * fvnum) fvnum = prod(mapsize) * inputmaps;%prod求积函数 onum = size(y, 1); net.ffb = zeros(onum, 1); net.ffW = (rand(onum, fvnum) - 0.5) * 2 * sqrt(6 / (onum + fvnum));endfunction net = cnntrain(net, x, y, opts) m = size(x, 3);%样本数量 numbatches = m / opts.batchsize; if rem(numbatches, 1) ~= 0 error('numbatches not integer'); end net.rL = []; for i = 1 : opts.numepochs disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)]); tic; kk = randperm(m); for l = 1 : numbatches%1200个batches batch_x = x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));%随机不重复选50个样本训练 batch_y = y(:, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));%50个样本对应的标签 net = cnnff(net, batch_x); net = cnnbp(net, batch_y); net = cnnapplygrads(net, opts); if isempty(net.rL)%为空 net.rL(1) = net.L; %loss function的值 end net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L;%这样来画误差曲线的依据是?相当于对每一个batch的误差进行累积(加权平均) end toc; endendfunction net = cnnff(net, x) n = numel(net.layers); net.layers{1}.a{1} = x; inputmaps = 1; for l = 2 : n % for each layer if strcmp(net.layers{l}.type, 'c') % !!below can probably be handled by insane matrix operations for j = 1 : net.layers{l}.outputmaps % for each output map % create temp output map z = zeros(size(net.layers{l - 1}.a{1}) - [net.layers{l}.kernelsize - 1 net.layers{l}.kernelsize - 1 0]);%28 28 50 - 4 4 0 for i = 1 : inputmaps % for each input map % convolve with corresponding kernel and add to temp output map z = z + convn(net.layers{l - 1}.a{i}, net.layers{l}.k{i}{j}, 'valid'); end % add bias, pass through nonlinearity net.layers{l}.a{j} = sigm(z + net.layers{l}.b{j}); end % set number of input maps to this layers number of outputmaps inputmaps = net.layers{l}.outputmaps; elseif strcmp(net.layers{l}.type, 's') % downsample for j = 1 : inputmaps z = convn(net.layers{l - 1}.a{j}, ones(net.layers{l}.scale) / (net.layers{l}.scale ^ 2), 'valid'); %均值pooling % !! replace with variable net.layers{l}.a{j} = z(1 : net.layers{l}.scale : end, 1 : net.layers{l}.scale : end, :);%pooling doesnot overlap end end end % concatenate all end layer feature maps into vector net.fv = []; for j = 1 : numel(net.layers{n}.a)%最后一个pooling层为12个featureMaps sa = size(net.layers{n}.a{j});%每一个featureMap的大小为4*4*50 net.fv = [net.fv; reshape(net.layers{n}.a{j}, sa(1) * sa(2), sa(3))]; end % feedforward into output perceptrons net.o = sigm(net.ffW * net.fv + repmat(net.ffb, 1, size(net.fv, 2)));endContents
- backprop deltas
- calc gradients
function net = cnnbp(net, y)
n = numel(net.layers); aaa=0; % error net.e = net.o - y; % loss function net.L = 1/2* sum(net.e(:) .^ 2) / size(net.e, 2);backprop deltas
net.od = net.e .* (net.o .* (1 - net.o)); % output delta 最外层的delta(L) net.fvd = (net.ffW' * net.od); % feature vector delta 下一层的delata(l) if strcmp(net.layers{n}.type, 'c') % only conv layers has sigm function net.fvd = net.fvd .* (net.fv .* (1 - net.fv)); end % reshape feature vector deltas into output map style sa = size(net.layers{n}.a{1}); fvnum = sa(1) * sa(2);% for j = 1 : numel(net.layers{n}.a) net.layers{n}.d{j} = reshape(net.fvd(((j - 1) * fvnum + 1) : j * fvnum, :), sa(1), sa(2), sa(3));%delta end for l = (n - 1) : -1 : 1%误差反传,从倒数第二层开始 if strcmp(net.layers{l}.type, 'c') for j = 1 : numel(net.layers{l}.a) net.layers{l}.d{j} = net.layers{l}.a{j} .* (1 - net.layers{l}.a{j}) .* (expand(net.layers{l + 1}.d{j}, [net.layers{l + 1}.scale net.layers{l + 1}.scale 1]) / net.layers{l + 1}.scale ^ 2);%f'(u).*up(delta) end elseif strcmp(net.layers{l}.type, 's') for i = 1 : numel(net.layers{l}.a) z = zeros(size(net.layers{l}.a{1}));%这里的每一个权值块为12*12,50个样本 for j = 1 : numel(net.layers{l + 1}.a) z = z + convn(net.layers{l + 1}.d{j}, rot180(net.layers{l + 1}.k{i}{j}), 'full');%对每一个i求和,相当于对当前i节点的一个delta累积(注意,这里pooling未经过sigmoid变换) end net.layers{l}.d{i} = z; end end endcalc gradients
for l = 2 : n %从第二层(c)开始,输入层(i)不算 if strcmp(net.layers{l}.type, 'c')% 对于卷积层计算梯度 for j = 1 : numel(net.layers{l}.a) for i = 1 : numel(net.layers{l - 1}.a) net.layers{l}.dk{i}{j} = convn(flipall(net.layers{l - 1}.a{i}), net.layers{l}.d{j}, 'valid') / size(net.layers{l}.d{j}, 3); end net.layers{l}.db{j} = sum(net.layers{l}.d{j}(:)) / size(net.layers{l}.d{j}, 3); end end end %非卷积层 net.dffW = net.od * (net.fv)' / size(net.od, 2); net.dffb = mean(net.od, 2); function X = rot180(X) X = flipdim(flipdim(X, 1), 2); endend
function net = cnnapplygrads(net, opts) for l = 2 : numel(net.layers) if strcmp(net.layers{l}.type, 'c')%卷积层与其他层分开更新 for j = 1 : numel(net.layers{l}.a) for ii = 1 : numel(net.layers{l - 1}.a) net.layers{l}.k{ii}{j} = net.layers{l}.k{ii}{j} - opts.alpha * net.layers{l}.dk{ii}{j}; end net.layers{l}.b{j} = net.layers{l}.b{j} - opts.alpha * net.layers{l}.db{j}; end end end net.ffW = net.ffW - opts.alpha * net.dffW; net.ffb = net.ffb - opts.alpha * net.dffb;endfunction [er, bad] = cnntest(net, x, y) % feedforward net = cnnff(net, x); [~, h] = max(net.o);%按列取最大的元素的索引 [~, a] = max(y); bad = find(h ~= a); er = numel(bad) / size(y, 2);end
0 0
- DeepLearnToolbox_CNN notes
- Notes
- notes
- Notes
- notes
- notes
- notes
- notes
- Notes
- notes
- Notes
- notes
- Notes
- notes
- Notes
- Notes
- Notes
- notes
- I/O完成端口简单例子
- (Relax DP1.4)UVA 10648 Chocolate Box(求将n个巧克力放在m个盒子中的概率)
- solr自动补全
- 又是指针
- 排序引用法
- DeepLearnToolbox_CNN notes
- C语言平面几何5-两点确定一条直线
- ASCII Converter
- 利用引用排序
- 多网卡绑定Bonding生产实战
- http://www.cnblogs.com/ma6174/ 小马哥的博客
- OTA
- 处理rm -rf 无法删除文件
- HTTP协议的状态码