网络结构图:
打开路径\tests\test_example_cnn.m
function test_example_CNNclc,clear;addpath D:\matlab文档\DeepLearnToolbox\data\ %按照文档所在路径进行加载 addpath D:\matlab文档\DeepLearnToolbox\CNN\ addpath D:\matlab文档\DeepLearnToolbox\util\ load mnist_uint8; %加载训练、测试数据train_x = double(reshape(train_x',28,28,60000))/255; test_x = double(reshape(test_x',28,28,10000))/255; train_y = double(train_y'); test_y = double(test_y'); rand('state',0) cnn.layers = { %%% 设置各层feature maps个数及卷积模板大小等属性 struct('type', 'i') %input layer struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %卷积层有6张特征图,卷积核大小为5*5 struct('type', 's', 'scale', 2) %抽样层,定义2*2的临域相连接 struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %同上 struct('type', 's', 'scale', 2) %同上 }; opts.alpha = 1; opts.batchsize = 50; opts.numepochs = 1; cnn = cnnsetup(cnn, train_x, train_y); cnn = cnntrain(cnn, train_x, train_y, opts); [er, bad] = cnntest(cnn, test_x, test_y); figure; plot(cnn.rL); assert(er<0.12, 'Too big error'); end
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
\CNN\cnnsetup.m
function net = cnnsetup(net, x, y) %对各层参数进行初始化 包括权重和偏置 assert(~isOctave() || compare_versions(OCTAVE_VERSION, '3.8.0', '>='), ['Octave 3.8.0 or greater is required for CNNs as there is a bug in convolution in previous versions. See http://savannah.gnu.org/bugs/?39314. Your version is ' myOctaveVersion]); inputmaps = 1; mapsize = size(squeeze(x(:, :, 1))); for l = 1 : numel(net.layers) if strcmp(net.layers{l}.type, 's') mapsize = mapsize / net.layers{l}.scale; assert(all(floor(mapsize)==mapsize), ['Layer ' num2str(l) ' size must be integer. Actual: ' num2str(mapsize)]); for j = 1 : inputmaps net.layers{l}.b{j} = 0; end end if strcmp(net.layers{l}.type, 'c') mapsize = mapsize - net.layers{l}.kernelsize + 1; fan_out = net.layers{l}.outputmaps * net.layers{l}.kernelsize ^ 2; for j = 1 : net.layers{l}.outputmaps fan_in = inputmaps * net.layers{l}.kernelsize ^ 2; for i = 1 : inputmaps net.layers{l}.k{i}{j} = (rand(net.layers{l}.kernelsize) - 0.5) * 2 * sqrt(6 / (fan_in + fan_out)); end net.layers{l}.b{j} = 0; end inputmaps = net.layers{l}.outputmaps; end end fvnum = prod(mapsize) * inputmaps; onum = size(y, 1); net.ffb = zeros(onum, 1); net.ffW = (rand(onum, fvnum) - 0.5) * 2 * sqrt(6 / (onum + fvnum)); end
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
\CNN\cnnff.m
function net = cnntrain(net, x, y, opts) %%训练的过程,包括bp算法及迭代过程 m = size(x, 3); %% m为样本照片的数量,size(x)=[28*28*6000] numbatches = m / opts.batchsize;% 循环的次数 共1200次,每次使用50个样本进行 if rem(numbatches, 1) ~= 0 error('numbatches not integer'); end net.rL = [];%rL是最小均方误差的平滑序列,绘图要用 for i = 1 : opts.numepochs %迭代次数 disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)]); tic; kk = randperm(m); %% 随机产生m以内的不重复的m个数 for l = 1 : numbatches %% 循环1200次,每次选取50个不重复样本进行更新 batch_x = x(:,:, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));%50个样本的训练数据 batch_y = y(:, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));%50个样本所对应的标签 net = cnnff(net, batch_x);%计算前向传导过程 net = cnnbp(net, batch_y);%计算误差并反向传导,计算梯度 net = cnnapplygrads(net, opts); %% 应用梯度迭代更新模型 %net.L为模型的costFunction,即最小均方误差mse %rL是最小均方误差的平滑序列 if isempty(net.rL)%为空 net.rL(1) = net.L; %loss function的值 end net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L; %相当于对每一个batch的误差进行累积(加权平均) end toc; endend
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
\CNN\cnnff.m
back propagation 计算gradient
function net = cnnff(net, x)%完成训练的前向过程, n = numel(net.layers); net.layers{1}.a{1} = x; inputmaps = 1; for l = 2 : n if strcmp(net.layers{l}.type, 'c') for j = 1 : net.layers{l}.outputmaps z = zeros(size(net.layers{l - 1}.a{1}) - [net.layers{l}.kernelsize - 1 net.layers{l}.kernelsize - 1 0]); for i = 1 : inputmaps z = z + convn(net.layers{l - 1}.a{i}, net.layers{l}.k{i}{j}, 'valid'); end net.layers{l}.a{j} = sigm(z + net.layers{l}.b{j}); end inputmaps = net.layers{l}.outputmaps; elseif strcmp(net.layers{l}.type, 's') for j = 1 : inputmaps z = convn(net.layers{l - 1}.a{j}, ones(net.layers{l}.scale) / (net.layers{l}.scale ^ 2), 'valid'); net.layers{l}.a{j} = z(1 : net.layers{l}.scale : end, 1 : net.layers{l}.scale : end, :); end end end net.fv = []; for j = 1 : numel(net.layers{n}.a) sa = size(net.layers{n}.a{j}); net.fv = [net.fv; reshape(net.layers{n}.a{j}, sa(1) * sa(2), sa(3))]; end net.o = sigm(net.ffW * net.fv + repmat(net.ffb, 1, size(net.fv, 2))); end
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
\CNN\cnnbp.m
function net = cnnbp(net, y)%计算并传递神经网络的error,并计算梯度(权重的修改量) n = numel(net.layers); net.e = net.o - y; net.L = 1/2* sum(net.e(:) .^ 2) / size(net.e, 2); net.od = net.e .* (net.o .* (1 - net.o)); net.fvd = (net.ffW' * net.od); if strcmp(net.layers{n}.type, 'c') net.fvd = net.fvd .* (net.fv .* (1 - net.fv)); end sa = size(net.layers{n}.a{1}); fvnum = sa(1) * sa(2); for j = 1 : numel(net.layers{n}.a) net.layers{n}.d{j} = reshape(net.fvd(((j - 1) * fvnum + 1) : j * fvnum, :), sa(1), sa(2), sa(3)); end for l = (n - 1) : -1 : 1 if strcmp(net.layers{l}.type, 'c') for j = 1 : numel(net.layers{l}.a) net.layers{l}.d{j} = net.layers{l}.a{j} .* (1 - net.layers{l}.a{j}) .* (expand(net.layers{l + 1}.d{j}, [net.layers{l + 1}.scale net.layers{l + 1}.scale 1]) / net.layers{l + 1}.scale ^ 2); end elseif strcmp(net.layers{l}.type, 's') for i = 1 : numel(net.layers{l}.a) z = zeros(size(net.layers{l}.a{1})); for j = 1 : numel(net.layers{l + 1}.a) z = z + convn(net.layers{l + 1}.d{j}, rot180(net.layers{l + 1}.k{i}{j}), 'full'); end net.layers{l}.d{i} = z; end end end for l = 2 : n if strcmp(net.layers{l}.type, 'c') for j = 1 : numel(net.layers{l}.a) for i = 1 : numel(net.layers{l - 1}.a) net.layers{l}.dk{i}{j} = convn(flipall(net.layers{l - 1}.a{i}), net.layers{l}.d{j}, 'valid') / size(net.layers{l}.d{j}, 3); end net.layers{l}.db{j} = sum(net.layers{l}.d{j}(:)) / size(net.layers{l}.d{j}, 3); end end end net.dffW = net.od * (net.fv)' / size(net.od, 2); %softmax回归中参数所对应的导数 net.dffb = mean(net.od, 2);%% 第二维取均值 function X = rot180(X) X = flipdim(flipdim(X, 1), 2);% flipdim(X, 1) 行互换 flipdim(X, 2) 列互换 endend
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
在这里插入一张图片便于大家理解:
\CNN\cnnapplygrads.m
该函数完成权重修改,更新模型的功能
1更新特征抽取层的权重 weight+bias
2 更新末尾单层感知机的权重 weight+bias
function net = cnnapplygrads(net, opts)%% 把计算出来的梯度加到原始模型上去 for l = 2 : numel(net.layers) if strcmp(net.layers{l}.type, 'c') for j = 1 : numel(net.layers{l}.a) for ii = 1 : numel(net.layers{l - 1}.a) net.layers{l}.k{ii}{j} = net.layers{l}.k{ii}{j} - opts.alpha * net.layers{l}.dk{ii}{j}; end net.layers{l}.b{j} = net.layers{l}.b{j} - opts.alpha * net.layers{l}.db{j}; end end end net.ffW = net.ffW - opts.alpha * net.dffW; net.ffb = net.ffb - opts.alpha * net.dffb;end
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
\CNN\cnntest.m
验证测试样本的准确率
function [er, bad] = cnntest(net, x, y)%测试当前模型的准确率 net = cnnff(net, x); [~, h] = max(net.o); [~, a] = max(y); bad = find(h ~= a); er = numel(bad) / size(y, 2);end