theano做的MLP

来源:互联网 发布:江苏瑞中数据 编辑:程序博客网 时间:2024/06/05 07:11

在贡献一个,和tutorial写的不太一样,现在是在学习着写。这个结果可以达到1.68%的误差,需要200多万次的迭代,快1000个epoch了


import theano, numpy, theano.tensor as T, gzip, cPickle, timeclass HiddenLayer():    def __init__(self, n_in, n_out, x, w=None, b=None):        if w is None:            t = numpy.sqrt(6.0/(n_in+n_out))            w = theano.shared(numpy.asarray(numpy.random.uniform(-t, t, [n_in, n_out]), theano.config.floatX))        self.w = w        if b is None:            b = theano.shared(numpy.asarray(numpy.zeros(n_out), theano.config.floatX))        self.b = b        self.out = T.tanh(T.dot(x, self.w) + self.b)        self.params = [self.w, self.b]    def get_l1(self):        return T.sum(T.abs_(self.w))    def get_l2(self):        return T.sum(self.w**2)            class MLP():    def __init__(self, n_in=784, n_out=10, n_hidden=500):        self.n_in = n_in        self.n_out = n_out        self.n_hidden = n_hidden        self.w = theano.shared(numpy.asarray(numpy.zeros([n_hidden, n_out]), theano.config.floatX))        self.b = theano.shared(numpy.asarray(numpy.zeros(n_out), theano.config.floatX))        self.params = [self.w, self.b]    def p_y_given_x(self, x):        return T.nnet.softmax(T.dot(x, self.w) + self.b)    def pred(self, x):        return T.argmax(self.p_y_given_x(x), 1)    def cost(self, x, y):        p_y_given_x = self.p_y_given_x(x)        return -T.mean(T.log(p_y_given_x[T.arange(y.shape[0]), y]))    def error(self, x, y):        pred = self.pred(x)        return T.mean(T.neq(pred, y))        def get_l1(self):        return T.sum(T.abs_(self.w))    def get_l2(self):        return T.sum(self.w**2)            def train(self):        def load_data():            f = gzip.open('mnist.pkl.gz')            trainxy, validatexy, testxy = cPickle.load(f)            def share_data(xy):                x,y = xy                x = theano.shared(numpy.asarray(x, theano.config.floatX))                y = theano.shared(numpy.asarray(y, theano.config.floatX))                return [x, T.cast(y, 'int32')]            trainx,trainy = share_data(trainxy)            validatex,validatey = share_data(validatexy)            testx,testy = share_data(testxy)            return [(trainx, trainy), (validatex, validatey), (testx, testy)]        [(trainx, trainy), (validatex, validatey), (testx, testy)] = load_data()        print 'Load the data successfully...'        batch_size = 20        train_batch = trainx.get_value().shape[0]/batch_size        validate_batch = validatex.get_value().shape[0]/batch_size        test_batch = testx.get_value().shape[0]/batch_size                x = T.matrix('x', theano.config.floatX)        y = T.ivector('y')        n_in = self.n_in        n_hidden = self.n_hidden        n_out = self.n_out        l1_decay = 0.0        l2_decay = 0.0001        hl = HiddenLayer(n_in, n_hidden, x)                mlp = MLP(n_hidden, n_out)        cost = mlp.cost(hl.out, y) + l1_decay*(mlp.get_l1() + hl.get_l1()) + l2_decay*(mlp.get_l2() + hl.get_l2())        error = mlp.error(hl.out, y)        params = hl.params + mlp.params        grad_params = [T.grad(cost, param) for param in params]        updates = [(param, param-0.01*grad) for param,grad in zip(params, grad_params)]                index = T.lscalar()        trainModel = theano.function([index], cost, updates=updates, givens={x:trainx[index*batch_size:(index+1)*batch_size], y:trainy[index*batch_size:(index+1)*batch_size]})        validateModel = theano.function([index], error, givens={x:validatex[index*batch_size:(index+1)*batch_size], y:validatey[index*batch_size:(index+1)*batch_size]})        testModel = theano.function([index], error, givens={x:testx[index*batch_size:(index+1)*batch_size], y:testy[index*batch_size:(index+1)*batch_size]})                patience = 5000        frequency = min(patience/2, train_batch)        increase = 2        best_validate_error = numpy.Inf        best_test_error = 0        epochs = 1000        epoch = 1        ite = 0        stopping = False                while (epoch < epochs) and (not stopping):            for i in xrange(train_batch):                ite += 1                this_cost = trainModel(i)                if ite%frequency == 0:                     this_validate_error = numpy.mean([validateModel(j) for j in xrange(validate_batch)])                    print 'ite:%d/%d, cost:%f, validate error:%f'%(ite, epoch, this_cost, this_validate_error)                    if this_validate_error < best_validate_error:                        if this_validate_error < 0.995*best_validate_error:                            patience = max(patience, ite*increase)                        best_validate_error = this_validate_error                        best_test_error = numpy.mean([testModel(j) for j in xrange(test_batch)])                        print 'ite:%d/%d, cost:%f, validate error:%f, test error:%f'%(ite, epoch, this_cost, this_validate_error, best_test_error)                    if patience <= ite:                        stopping = True                        break            epoch += 1                           print 'best validate error:%f, best test error:%f'%(best_validate_error, best_test_error)        if __name__=='__main__':    mlp = MLP()    start_time = time.clock()    mlp.train()    end_time = time.clock()    print 'total consuming %d mins'%((end_time-start_time)/60.0)


0 0