theano学习入门和进阶系列2: MLP示例

来源:互联网 发布:final cut mac 编辑:程序博客网 时间:2024/05/16 09:36

上一篇以实现logistic regression为例讲解了theano的基础用法。本篇以MLP的示例进一步讲解theano里更高级的用法,以及一般的模型写法。详情见代码中的注释。

#encoding:GBK  ############################################# multi-layers perceptron############################################import theanoimport numpyimport theano.tensor as Tfrom logistic_sgd import load_data########################################################### 对于模型中的特殊层,一般单独定义一个类,以方便模块化管理和复用##########################################################class HiddenLayer(object):    #每一层的初始化中,在__init__的输入参数中,最重要定义的几个元素是,input,n_in,n_out,initial parameters。部分依赖随机值做初始化的还需要定义rng,以保证全局随机性。    #每一层初始化后,自身的成员变量里一般会有以下几项:self.input, self.out, self.parameters(a list)    def __init__(self,                  rng, #don't forget this                 input,                  n_in,                  n_out,                  W=None,                  b=None,                  activation=None):         self.input = input         if W is None:              #不了解这样给W赋初始值的规矩,另一份代码中,当activation为sigmoid时,W = 4 * 以下赋值。值得注意的是,如果这里初始值全赋0程序基本不动,必须要非零的初始化方式。此处留疑。             W_value = numpy.asarray(                  rng.uniform(                      low=-numpy.sqrt(6. / (n_in + n_out)),                      high=numpy.sqrt(6. / (n_in + n_out)),                      size=(n_in, n_out)                  ),                  dtype=theano.config.floatX              )             W = theano.shared(value=W_value, name='W', borrow=True) #don't forget set borrow=True         self.W = W         if b is None:             b_value = numpy.zeros((n_out,), dtype=theano.config.floatX)             b = theano.shared(value=b_value, name='b', borrow=True) #don't forget set borrow=True         self.b = b         out =  T.dot(input, self.W) + self.b         # 以下写法也可以         #if activation is not None:         #     out = activation(out)          #self.out = out          # 更流畅简洁的写法         self.out = (              out if activation is None              else activation(out)         )         self.params = [self.W, self.b]class LogisticRegressionLayer(object):    #和前述一样,__init__的输入参数中有input, n_in, n_out和parameters。初始化后成员变量包括self.input, self.out和self.params。logistic regression一般是作为输出层,通过softmax输出一个在类别上的预测概率分布,实际应用时选择概率值最大的类别作为最终预测结果。因此这一层额外涉及两块内容:(1)计算log loss,为计算cost function服务。 (2)计算预测准确度,将预测的概率分布转为最终预测的类别,进而计算预测准确度。    def __init__(self,                  input,                  n_in,                  n_out,                  W=None,                  b=None):         self.input = input         #这里的W初始值赋为0,对效果无影响。         if W is None:              W_value = numpy.zeros((n_in, n_out), dtype=theano.config.floatX)             # 在shared变量的构造函数中使用borrow=True是安全的操作,表明多进程下对该对象均是共享同一个内存空间,在使用过程中无需对该对象做深层拷贝。这一块还需要再查阅读资料深究一下。http://www.cnblogs.com/shouhuxianjian/p/4590222.html一文中讲解得很仔细,在此先mark。             W = theano.shared(value=W_value, name='W', borrow=True)         self.W = W         if b is None:             b_value = numpy.zeros((n_out,), dtype=theano.config.floatX)             b = theano.shared(value=b_value, name='b', borrow=True)         self.b = b         out =  T.dot(input, self.W) + self.b         #T.nnet.softmax,重要函数         self.out = T.nnet.softmax(out)         #T.argmax,重要函数,取出值最大的index,axis表明是在行的方向取         self.y_pred = T.argmax(self.out, axis=1)         self.params = [self.W, self.b]    #定义的log loss函数,给定真实预测值后,    def log_loss(self, y):        """        negative log likelihood        """        #sequences= T.arange(y.shape[0]) :表示生成一个int序列从0~y        '''T.arange(y.shape[0])返回的是一个向量[0,1,...,len(y)],y也是一个向量,如[3,5,6...,9],代表的是所属的类别        T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]表示的是T.log(self.p_y_given_x)[0,3],mean函数内部是一个向量        '''        return -T.mean(T.log(self.out)[T.arange(y.shape[0]), y])    def errors(self, y):        return T.mean(T.neq(self.y_pred, y))class MLP(object):     def __init__(self,                  rng,                  input,                  n_in,                  n_hidden,                  n_out):         #hidden layer         self.hiddenLayer = HiddenLayer(rng,                                    input,                                    n_in,                                    n_hidden,                                    activation=T.tanh)         #logistic regression output         self.logisticRegressionLayer = LogisticRegressionLayer(                                                           self.hiddenLayer.out,                                                           n_hidden,                                                           n_out)         #L1 regualrization         self.l1 = abs(self.hiddenLayer.W).sum() + abs(self.logisticRegressionLayer.W).sum()         #L2 regualrization         self.l2 = (self.hiddenLayer.W ** 2).sum() + (self.logisticRegressionLayer.W ** 2).sum()         #log loss func         self.log_loss = self.logisticRegressionLayer.log_loss         #errors func         self.errors = self.logisticRegressionLayer.errors         #params         self.params = self.hiddenLayer.params + self.logisticRegressionLayer.params#模型类,__init__的输入参数中一般包括模型的参数,如各层网络数,cost function的正则化系数等等,并实现BP算法class UserTestMLP(object):    def __init__(self,                 dataset='mnist.pkl.gz',                 n_in=28 * 28,                 n_hidden=500,                 n_out=10,                 learning_rate=0.01,                 l1_coeff=0.00,                 l2_coeff=0.0001,                 epochs=1000,                 batch_size=20                 ):        datasets = load_data(dataset)          train_set_x, train_set_y = datasets[0]          valid_set_x, valid_set_y = datasets[1]          test_set_x, test_set_y = datasets[2]          #note .get_value(borrow=True)         self.train_batch_num = train_set_x.get_value(borrow=True).shape[0]/batch_size        self.valid_batch_num = valid_set_x.get_value(borrow=True).shape[0]/batch_size        self.test_batch_num = test_set_x.get_value(borrow=True).shape[0]/batch_size        index = T.lscalar()        x = T.matrix('x')        y = T.ivector('y')        rng = numpy.random.RandomState(1234)          #construct the MLP        mlp = MLP(rng,                  input=x,                  n_in=28 * 28,                  n_hidden=n_hidden,                  n_out=10)        #BP algorithm        cost = (mlp.log_loss(y) + l1_coeff * mlp.l1 + l2_coeff * mlp.l2)        ## grad        gparams = [T.grad(cost, param) for param in mlp.params]        ## update wrt grad        updates = [(param, param - learning_rate * gparam) for (param, gparam) in zip(mlp.params, gparams)]        # inputs里赋block的index,givens里设置具体取数方式,是一种提高计算效率的写法,对于小数据集时很有作用,在初始时GPU会一次性将数据都加载入内存中,后期就无需每次从CPU中再装载入GPU里,减少交互开销。        self.train_func = theano.function(            inputs=[index],            outputs=cost,            updates=updates,            givens={                x: train_set_x[index * batch_size: (index+1) * batch_size],                 y: train_set_y[index * batch_size: (index+1) * batch_size],             }       )if __name__ == '__main__':    dataset = 'mnist.pkl.gz'     userTestMLP = UserTestMLP()    train_func = userTestMLP.train_func    #training process    epoch = 0    epochs = 100    while (epoch < epochs):          cost = 0          for minibatch_index in xrange(userTestMLP.train_batch_num):               cost += train_func(minibatch_index)          print 'epoch:', epoch, '    error:', cost/userTestMLP.train_batch_num        epoch = epoch + 1  
0 0
原创粉丝点击