theano学习入门和进阶系列2: MLP示例

来源：互联网发布：final cut mac 编辑：程序博客网时间：2024/05/16 09:36
上一篇以实现logistic regression为例讲解了theano的基础用法。本篇以MLP的示例进一步讲解theano里更高级的用法，以及一般的模型写法。详情见代码中的注释。
#encoding:GBK  ############################################# multi-layers perceptron############################################import theanoimport numpyimport theano.tensor as Tfrom logistic_sgd import load_data########################################################### 对于模型中的特殊层，一般单独定义一个类，以方便模块化管理和复用##########################################################class HiddenLayer(object):    #每一层的初始化中，在__init__的输入参数中，最重要定义的几个元素是,input,n_in,n_out,initial parameters。部分依赖随机值做初始化的还需要定义rng，以保证全局随机性。    #每一层初始化后，自身的成员变量里一般会有以下几项：self.input, self.out, self.parameters(a list)    def __init__(self,                  rng, #don't forget this                 input,                  n_in,                  n_out,                  W=None,                  b=None,                  activation=None):         self.input = input         if W is None:              #不了解这样给W赋初始值的规矩，另一份代码中，当activation为sigmoid时，W = 4 * 以下赋值。值得注意的是，如果这里初始值全赋0程序基本不动，必须要非零的初始化方式。此处留疑。             W_value = numpy.asarray(                  rng.uniform(                      low=-numpy.sqrt(6. / (n_in + n_out)),                      high=numpy.sqrt(6. / (n_in + n_out)),                      size=(n_in, n_out)                  ),                  dtype=theano.config.floatX              )             W = theano.shared(value=W_value, name='W', borrow=True) #don't forget set borrow=True         self.W = W         if b is None:             b_value = numpy.zeros((n_out,), dtype=theano.config.floatX)             b = theano.shared(value=b_value, name='b', borrow=True) #don't forget set borrow=True         self.b = b         out =  T.dot(input, self.W) + self.b         # 以下写法也可以         #if activation is not None:         #     out = activation(out)          #self.out = out          # 更流畅简洁的写法         self.out = (              out if activation is None              else activation(out)         )         self.params = [self.W, self.b]class LogisticRegressionLayer(object):    #和前述一样，__init__的输入参数中有input, n_in, n_out和parameters。初始化后成员变量包括self.input, self.out和self.params。logistic regression一般是作为输出层，通过softmax输出一个在类别上的预测概率分布，实际应用时选择概率值最大的类别作为最终预测结果。因此这一层额外涉及两块内容：（1）计算log loss，为计算cost function服务。 （2）计算预测准确度，将预测的概率分布转为最终预测的类别，进而计算预测准确度。    def __init__(self,                  input,                  n_in,                  n_out,                  W=None,                  b=None):         self.input = input         #这里的W初始值赋为0，对效果无影响。         if W is None:              W_value = numpy.zeros((n_in, n_out), dtype=theano.config.floatX)             # 在shared变量的构造函数中使用borrow=True是安全的操作,表明多进程下对该对象均是共享同一个内存空间，在使用过程中无需对该对象做深层拷贝。这一块还需要再查阅读资料深究一下。http://www.cnblogs.com/shouhuxianjian/p/4590222.html一文中讲解得很仔细，在此先mark。             W = theano.shared(value=W_value, name='W', borrow=True)         self.W = W         if b is None:             b_value = numpy.zeros((n_out,), dtype=theano.config.floatX)             b = theano.shared(value=b_value, name='b', borrow=True)         self.b = b         out =  T.dot(input, self.W) + self.b         #T.nnet.softmax，重要函数         self.out = T.nnet.softmax(out)         #T.argmax，重要函数，取出值最大的index,axis表明是在行的方向取         self.y_pred = T.argmax(self.out, axis=1)         self.params = [self.W, self.b]    #定义的log loss函数，给定真实预测值后，    def log_loss(self, y):        """        negative log likelihood        """        #sequences= T.arange(y.shape[0]) ：表示生成一个int序列从0~y        '''T.arange(y.shape[0])返回的是一个向量[0,1,...,len(y)],y也是一个向量，如[3,5,6...,9]，代表的是所属的类别        T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]表示的是T.log(self.p_y_given_x)[0,3],mean函数内部是一个向量        '''        return -T.mean(T.log(self.out)[T.arange(y.shape[0]), y])    def errors(self, y):        return T.mean(T.neq(self.y_pred, y))class MLP(object):     def __init__(self,                  rng,                  input,                  n_in,                  n_hidden,                  n_out):         #hidden layer         self.hiddenLayer = HiddenLayer(rng,                                    input,                                    n_in,                                    n_hidden,                                    activation=T.tanh)         #logistic regression output         self.logisticRegressionLayer = LogisticRegressionLayer(                                                           self.hiddenLayer.out,                                                           n_hidden,                                                           n_out)         #L1 regualrization         self.l1 = abs(self.hiddenLayer.W).sum() + abs(self.logisticRegressionLayer.W).sum()         #L2 regualrization         self.l2 = (self.hiddenLayer.W ** 2).sum() + (self.logisticRegressionLayer.W ** 2).sum()         #log loss func         self.log_loss = self.logisticRegressionLayer.log_loss         #errors func         self.errors = self.logisticRegressionLayer.errors         #params         self.params = self.hiddenLayer.params + self.logisticRegressionLayer.params#模型类，__init__的输入参数中一般包括模型的参数，如各层网络数，cost function的正则化系数等等，并实现BP算法class UserTestMLP(object):    def __init__(self,                 dataset='mnist.pkl.gz',                 n_in=28 * 28,                 n_hidden=500,                 n_out=10,                 learning_rate=0.01,                 l1_coeff=0.00,                 l2_coeff=0.0001,                 epochs=1000,                 batch_size=20                 ):        datasets = load_data(dataset)          train_set_x, train_set_y = datasets[0]          valid_set_x, valid_set_y = datasets[1]          test_set_x, test_set_y = datasets[2]          #note .get_value(borrow=True)         self.train_batch_num = train_set_x.get_value(borrow=True).shape[0]/batch_size        self.valid_batch_num = valid_set_x.get_value(borrow=True).shape[0]/batch_size        self.test_batch_num = test_set_x.get_value(borrow=True).shape[0]/batch_size        index = T.lscalar()        x = T.matrix('x')        y = T.ivector('y')        rng = numpy.random.RandomState(1234)          #construct the MLP        mlp = MLP(rng,                  input=x,                  n_in=28 * 28,                  n_hidden=n_hidden,                  n_out=10)        #BP algorithm        cost = (mlp.log_loss(y) + l1_coeff * mlp.l1 + l2_coeff * mlp.l2)        ## grad        gparams = [T.grad(cost, param) for param in mlp.params]        ## update wrt grad        updates = [(param, param - learning_rate * gparam) for (param, gparam) in zip(mlp.params, gparams)]        # inputs里赋block的index，givens里设置具体取数方式，是一种提高计算效率的写法，对于小数据集时很有作用，在初始时GPU会一次性将数据都加载入内存中，后期就无需每次从CPU中再装载入GPU里，减少交互开销。        self.train_func = theano.function(            inputs=[index],            outputs=cost,            updates=updates,            givens={                x: train_set_x[index * batch_size: (index+1) * batch_size],                 y: train_set_y[index * batch_size: (index+1) * batch_size],             }       )if __name__ == '__main__':    dataset = 'mnist.pkl.gz'     userTestMLP = UserTestMLP()    train_func = userTestMLP.train_func    #training process    epoch = 0    epochs = 100    while (epoch < epochs):          cost = 0          for minibatch_index in xrange(userTestMLP.train_batch_num):               cost += train_func(minibatch_index)          print 'epoch:', epoch, '    error:', cost/userTestMLP.train_batch_num        epoch = epoch + 1
0 0