ConvnetJS源代码分析第三篇
来源:互联网 发布:乌鲁木齐网站排名seo 编辑:程序博客网 时间:2024/06/07 11:55
在前面的两篇文章中介绍了其使用的基本数据结构Vol和神经网络组件Layer。最后两个就是convnet_net.js和convnet_trainers.js。
2.6 convnet_net.js
在第二篇曾经初步的设计过这里面的内容,主要是接受Layer类的array,形成一个健全的神经网络。一下是文件convnetjs_net.js的全部代码。其中中文注释为自己添加的,英文为作者添加的。
(function(global) { "use strict"; var Vol = global.Vol; // convenience var assert = global.assert; // Net manages a set of layers // For now constraints: Simple linear order of layers, first layer input last layer a cost layer var Net = function(options) { this.layers = []; // 输入为layer类实例的array。其中第一层为input型,最后为Loss型。layers是一个重要的属性。 } Net.prototype = { // takes a list of layer definitions and creates the network layer objects makeLayers: function(defs) { // few checks assert(defs.length >= 2, 'Error! At least one input layer and one loss layer are required.'); assert(defs[0].type === 'input', 'Error! First layer must be the input layer, to declare size of inputs'); // desuger函数完成了用户输入layer一些检查,并添加一部分的fullycnnlayer型层,同时对于输入的激活函数类型,添加对应的激活函数层,这一点在上一篇的补续有详细说明。 // desugar layer_defs for adding activation, dropout layers etc var desugar = function() { var new_defs = []; for(var i=0;i<defs.length;i++) { var def = defs[i]; if(def.type==='softmax' || def.type==='svm') { // add an fc layer here, there is no reason the user should // have to worry about this and we almost always want to new_defs.push({type:'fc', num_neurons: def.num_classes}); } if(def.type==='regression') { // add an fc layer here, there is no reason the user should // have to worry about this and we almost always want to new_defs.push({type:'fc', num_neurons: def.num_neurons}); } if((def.type==='fc' || def.type==='conv') && typeof(def.bias_pref) === 'undefined'){ def.bias_pref = 0.0; if(typeof def.activation !== 'undefined' && def.activation === 'relu') { def.bias_pref = 0.1; // relus like a bit of positive bias to get gradients early // otherwise it's technically possible that a relu unit will never turn on (by chance) // and will never get any gradient and never contribute any computation. Dead relu. } } new_defs.push(def); if(typeof def.activation !== 'undefined') { if(def.activation==='relu') { new_defs.push({type:'relu'}); } else if (def.activation==='sigmoid') { new_defs.push({type:'sigmoid'}); } else if (def.activation==='tanh') { new_defs.push({type:'tanh'}); } else if (def.activation==='maxout') { // create maxout activation, and pass along group size, if provided var gs = def.group_size !== 'undefined' ? def.group_size : 2; new_defs.push({type:'maxout', group_size:gs}); } else { console.log('ERROR unsupported activation ' + def.activation); } } if(typeof def.drop_prob !== 'undefined' && def.type !== 'dropout') { new_defs.push({type:'dropout', drop_prob: def.drop_prob}); } } return new_defs; } defs = desugar(defs); // 下面几行代码在第二篇中图一的截图。完成从输入layer参数到实际构建layer实例的转化。并将其值保存到layers属性。 // create the layers this.layers = []; for(var i=0;i<defs.length;i++) { var def = defs[i]; if(i>0) { var prev = this.layers[i-1]; def.in_sx = prev.out_sx; def.in_sy = prev.out_sy; def.in_depth = prev.out_depth; } switch(def.type) { case 'fc': this.layers.push(new global.FullyConnLayer(def)); break; case 'lrn': this.layers.push(new global.LocalResponseNormalizationLayer(def)); break; case 'dropout': this.layers.push(new global.DropoutLayer(def)); break; case 'input': this.layers.push(new global.InputLayer(def)); break; case 'softmax': this.layers.push(new global.SoftmaxLayer(def)); break; case 'regression': this.layers.push(new global.RegressionLayer(def)); break; case 'conv': this.layers.push(new global.ConvLayer(def)); break; case 'pool': this.layers.push(new global.PoolLayer(def)); break; case 'relu': this.layers.push(new global.ReluLayer(def)); break; case 'sigmoid': this.layers.push(new global.SigmoidLayer(def)); break; case 'tanh': this.layers.push(new global.TanhLayer(def)); break; case 'maxout': this.layers.push(new global.MaxoutLayer(def)); break; case 'svm': this.layers.push(new global.SVMLayer(def)); break; default: console.log('ERROR: UNRECOGNIZED LAYER TYPE: ' + def.type); } } }, /* 下面是类net的第二个方法,也就是前向传播方法。注意的是这里的forword与之前每一个layer的forword的不同,layer//层面的forword是对于这个层的,输入是in_act输出是out_act有带有权值层和激活函数层。这里的forword是对于每一个layer调用forword.对于一个Vol类型输入,经过input_layer层的in_act和out_act转变输出一个Vol类型的out_act,然后这个out_act有///作为下一个层的in_act,依次循环到最后一层,因此通过调用不同layer级别的out_act我们就可以知道数据现在已经被加工成什///么样子,这一点在作者的Demo中被使用。 */ // fordword prop the netowrk . The trainer class passes is_training = true, but when this function is // called from outside (not from the trainer), it defaults to prediction mode forward: function(V, is_training) { if(typeof(is_training) === 'undefined') is_training = false; var act = this.layers[0].forward(V, is_training); for(var i=1;i<this.layers.length;i++) { act = this.layers[i].forward(act, is_training); } return act; }, getCostLoss: function(V, y) {// 这里loss是最后一层backward()函数的返回的结果。具体的可以参看convnetjs_layer_los//s.js中。调用这个函数可以返回基于目前权重之下的Loss. this.forward(V, false); var N = this.layers.length; var loss = this.layers[N-1].backward(y); return loss; }, // backprop: compute gradients wrt all parameters.这里的后向传播是在每一个layer的级别上完成的。 backward: function(y) { var N = this.layers.length; var loss = this.layers[N-1].backward(y); // last layer assumed to be loss layer for(var i=N-2;i>=0;i--) { // first layer assumed input this.layers[i].backward(); } return loss; }, getParamsAndGrads: function() { // accumulate parameters and gradients for the entire network var response = []; for(var i=0;i<this.layers.length;i++) { var layer_reponse = this.layers[i].getParamsAndGrads(); for(var j=0;j<layer_reponse.length;j++) { response.push(layer_reponse[j]); } } return response; }, getPrediction: function() { // this is a convenience function for returning the argmax // prediction, assuming the last layer of the net is a softmax var S = this.layers[this.layers.length-1]; assert(S.layer_type === 'softmax', 'getPrediction function assumes softmax as last layer of the net!'); var p = S.out_act.w; var maxv = p[0]; var maxi = 0; for(var i=1;i<p.length;i++) { if(p[i] > maxv) { maxv = p[i]; maxi = i;} } return maxi; // return index of the class with highest class probability }, toJSON: function() { var json = {}; json.layers = []; for(var i=0;i<this.layers.length;i++) { json.layers.push(this.layers[i].toJSON()); } return json; }, fromJSON: function(json) { this.layers = []; for(var i=0;i<json.layers.length;i++) { var Lj = json.layers[i] var t = Lj.layer_type; var L; if(t==='input') { L = new global.InputLayer(); } if(t==='relu') { L = new global.ReluLayer(); } if(t==='sigmoid') { L = new global.SigmoidLayer(); } if(t==='tanh') { L = new global.TanhLayer(); } if(t==='dropout') { L = new global.DropoutLayer(); } if(t==='conv') { L = new global.ConvLayer(); } if(t==='pool') { L = new global.PoolLayer(); } if(t==='lrn') { L = new global.LocalResponseNormalizationLayer(); } if(t==='softmax') { L = new global.SoftmaxLayer(); } if(t==='regression') { L = new global.RegressionLayer(); } if(t==='fc') { L = new global.FullyConnLayer(); } if(t==='maxout') { L = new global.MaxoutLayer(); } if(t==='svm') { L = new global.SVMLayer(); } L.fromJSON(Lj); this.layers.push(L); } } } global.Net = Net;})(convnetjs);
2.7convnet_trainers.js
最后一个文件是类Trainer的定义文件,在文件中仅仅有一个方法train()。输入是之前的神级网络,和一些训练时指定的参数。
以下为convnet_trainers.js文件的全部代码,中文为我只加的注释。从下面的分析,可以知道作者的这里训练方式是BP算法。与一般的深度神经网络的训练方式不同的是,通常会对网络进行无监督训练,然后使用这些无监督训练得到的权值为初始化网络权值,然后使用BP算法,进行微调。
作者使用的默认训练方式是Stochastic Gradient Descent(SGD)随机梯度下降。使用了在梯度公式中有标准的参数的梯度+动量项+L1+L2正则项。
(function(global) { "use strict"; var Vol = global.Vol; // convenience var Trainer = function(net, options) { this.net = net; var options = options || {}; this.learning_rate = typeof options.learning_rate !== 'undefined' ? options.learning_rate : 0.01; this.l1_decay = typeof options.l1_decay !== 'undefined' ? options.l1_decay : 0.0; this.l2_decay = typeof options.l2_decay !== 'undefined' ? options.l2_decay : 0.0; this.batch_size = typeof options.batch_size !== 'undefined' ? options.batch_size : 1; this.method = typeof options.method !== 'undefined' ? options.method : 'sgd'; // sgd/adam/adagrad/adadelta/windowgrad/netsterov this.momentum = typeof options.momentum !== 'undefined' ? options.momentum : 0.9; this.ro = typeof options.ro !== 'undefined' ? options.ro : 0.95; // used in adadelta this.eps = typeof options.eps !== 'undefined' ? options.eps : 1e-8; // used in adam or adadelta this.beta1 = typeof options.beta1 !== 'undefined' ? options.beta1 : 0.9; // used in adam this.beta2 = typeof options.beta2 !== 'undefined' ? options.beta2 : 0.999; // used in adam this.k = 0; // iteration counter this.gsum = []; // last iteration gradients (used for momentum calculations) this.xsum = []; // used in adam or adadelta // check if regression is expected if(this.net.layers[this.net.layers.length - 1].layer_type === "regression") this.regression = true; else this.regression = false; } Trainer.prototype = { train: function(x, y) { var start = new Date().getTime(); this.net.forward(x, true); // also set the flag that lets the net know we're just training var end = new Date().getTime(); var fwd_time = end - start; var start = new Date().getTime(); var cost_loss = this.net.backward(y); var l2_decay_loss = 0.0; var l1_decay_loss = 0.0; var end = new Date().getTime(); var bwd_time = end - start; if(this.regression && y.constructor !== Array) console.log("Warning: a regression net requires an array as training output vector."); this.k++; if(this.k % this.batch_size === 0) { var pglist = this.net.getParamsAndGrads();// 返回目前net中的每一个layer的参数和各个梯度。 // initialize lists for accumulators. Will only be done once on first iteration if(this.gsum.length === 0 && (this.method !== 'sgd' || this.momentum > 0.0)) { // only vanilla sgd doesnt need either lists // momentum needs gsum // adagrad needs gsum // adam and adadelta needs gsum and xsum for(var i=0;i<pglist.length;i++) { this.gsum.push(global.zeros(pglist[i].params.length));// 建立和参数长度相同的统计量存储器gsum if(this.method === 'adam' || this.method === 'adadelta') { this.xsum.push(global.zeros(pglist[i].params.length)); } else { this.xsum.push([]); // conserve memory } } } // perform an update for all sets of weights for(var i=0;i<pglist.length;i++) { var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc) var p = pg.params;// 重要的变量p,神经网络的权值 var g = pg.grads;// 重要变量g,神经网络的梯度 // learning rate for some parameters. var l2_decay_mul = typeof pg.l2_decay_mul !== 'undefined' ? pg.l2_decay_mul : 1.0; var l1_decay_mul = typeof pg.l1_decay_mul !== 'undefined' ? pg.l1_decay_mul : 1.0; var l2_decay = this.l2_decay * l2_decay_mul; var l1_decay = this.l1_decay * l1_decay_mul; var plen = p.length; for(var j=0;j<plen;j++) { l2_decay_loss += l2_decay*p[j]*p[j]/2; // accumulate weight decay loss l1_decay_loss += l1_decay*Math.abs(p[j]);// 这里是对权值进行正则化的两种选择,L1,L2 var l1grad = l1_decay * (p[j] > 0 ? 1 : -1); var l2grad = l2_decay * (p[j]);// 这里是对正则化项的梯度公式,注意L1,L2的不同 var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient ,这里是batchlearning的梯度//公式 /// 下面是对于不同的训练方式的选择,默认的是SGD,我们直接跳入if语句的最后一行 var gsumi = this.gsum[i]; var xsumi = this.xsum[i]; if(this.method === 'adam') { // adam update gsumi[j] = gsumi[j] * this.beta1 + (1- this.beta1) * gij; // update biased first moment estimate xsumi[j] = xsumi[j] * this.beta2 + (1-this.beta2) * gij * gij; // update biased second moment estimate var biasCorr1 = gsumi[j] * (1 - Math.pow(this.beta1, this.k)); // correct bias first moment estimate var biasCorr2 = xsumi[j] * (1 - Math.pow(this.beta2, this.k)); // correct bias second moment estimate var dx = - this.learning_rate * biasCorr1 / (Math.sqrt(biasCorr2) + this.eps); p[j] += dx; } else if(this.method === 'adagrad') { // adagrad update gsumi[j] = gsumi[j] + gij * gij; var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; p[j] += dx; } else if(this.method === 'windowgrad') { // this is adagrad but with a moving window weighted average // so the gradient is not accumulated over the entire history of the run. // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij; var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning p[j] += dx; } else if(this.method === 'adadelta') { gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij; var dx = - Math.sqrt((xsumi[j] + this.eps)/(gsumi[j] + this.eps)) * gij; xsumi[j] = this.ro * xsumi[j] + (1-this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. p[j] += dx; } else if(this.method === 'nesterov') { var dx = gsumi[j]; gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij; dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j]; p[j] += dx; } else { // assume SGD if(this.momentum > 0.0) { // momentum update var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step gsumi[j] = dx; // back this up for next iteration of momentum p[j] += dx; // apply corrected gradient } else { // vanilla sgd p[j] += - this.learning_rate * gij; } } g[j] = 0.0; // zero out gradient so that we can begin accumulating anew } } } // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss // in future, TODO: have to completely redo the way loss is done around the network as currently // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer // and it should all be computed correctly and automatically. return {fwd_time: fwd_time, bwd_time: bwd_time, l2_decay_loss: l2_decay_loss, l1_decay_loss: l1_decay_loss, cost_loss: cost_loss, softmax_loss: cost_loss, loss: cost_loss + l1_decay_loss + l2_decay_loss} } } global.Trainer = Trainer; global.SGDTrainer = Trainer; // backwards compatibility})(convnetjs);
截止到目前为止,作者使用的基本API功能已经全部分析完了。在src中还有一个js文件convnet_magicnet.js。其中定义了magicnet.作者对其的描述是:
The MagicNet class performs fully-automatic prediction on your data. You don't have to worry about anything except providing your data and letting it train for a while. Internally, the MagicNet tries out many different types of networks, performs n-fold cross-validations of network hyper-parameters across folds of your data, and creates a final classifier ensemble by model averaging the best architectures.
从中可以看出这是一个对以上基本API的一个二次封装,用户使用时候不需要担心内部的模型选择。这个magicnet.js会后续文章中解析ConvnetJS华丽的应用上面进行详细说明。
- ConvnetJS源代码分析第三篇
- ConvnetJS源代码分析第二篇(补充)
- ConvNetJS源代码解析第二篇
- ConvNetJS源代码解析第一篇
- ConvNetJs
- gcc源代码分析,expand_call()函数第三部分
- AsyncTask 第三篇源代码篇
- AsyncTask 第三篇源代码篇
- PopStar(消灭星星)游戏源代码下载、分析及跨平台移植---第三篇(分数)
- 《LINUX3.0内核源代码分析》第三章:内核同步(1)
- Linux内核源代码分析-第三章 内核体系结构概述-1
- Linux内核源代码分析-第三章 内核体系结构概述-2
- Linux内核源代码分析-第三章 内核体系结构概述-3
- 《LINUX3.0内核源代码分析》第三章:内核同步(1)
- 源代码分析
- Linux内核源代码情景分析---第三章 中断、异常和系统调用
- 第三课 熟悉内核源代码
- idea关联第三方源代码
- The connection to adb is down, and a severe error has occured
- java写文本文件三种方式效率比较
- ajax 新闻实例和数组
- [算法学习]给定一个整型数组,找出两个整数为指定整数的和(3)
- 关于VC、MFC和ACCESS的一些使用问题
- ConvnetJS源代码分析第三篇
- tslib在arm上的安装使用
- 数据库表空间利用率及得到表空间创建相关
- TCP的哪些事(上)
- Win7安装Oracle10g详解
- [JavaScript] 3.JS 语句
- 素数的判定
- 软件推荐 - Source Insight
- iOS中下载大型文件的原理解析二