神经网络的层参数维度（14）---《深度学习》

来源：互联网发布：java泛型坑编辑：程序博客网时间：2024/06/11 16:02

我们了解神经网络中的梯度下降算法，反向传播算法，损失函数等等，现在我们参考Michael Nielsen实现自己的神经网络构建和梯度下降算法和反向传播算法等等的实现！

1）mnist数据集的读取：

import cPickleimport gzip# Third-party librariesimport numpy as npdef load_data():    """Return the MNIST data as a tuple containing the training data,    the validation data, and the test data.    The ``training_data`` is returned as a tuple with two entries.    The first entry contains the actual training images.  This is a    numpy ndarray with 50,000 entries.  Each entry is, in turn, a    numpy ndarray with 784 values, representing the 28 * 28 = 784    pixels in a single MNIST image.    The second entry in the ``training_data`` tuple is a numpy ndarray    containing 50,000 entries.  Those entries are just the digit    values (0...9) for the corresponding images contained in the first    entry of the tuple.    The ``validation_data`` and ``test_data`` are similar, except    each contains only 10,000 images.    This is a nice data format, but for use in neural networks it's    helpful to modify the format of the ``training_data`` a little.    That's done in the wrapper function ``load_data_wrapper()``, see    below.    """    f = gzip.open('../data/mnist.pkl.gz', 'rb')    training_data, test_data = cPickle.load(f)    f.close()    return (training_data,  test_data)def load_data_wrapper():    """Return a tuple containing ``(training_data, validation_data,    test_data)``. Based on ``load_data``, but the format is more    convenient for use in our implementation of neural networks.    In particular, ``training_data`` is a list containing 50,000    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray    containing the input image.  ``y`` is a 10-dimensional    numpy.ndarray representing the unit vector corresponding to the    correct digit for ``x``.    ``validation_data`` and ``test_data`` are lists containing 10,000    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional    numpy.ndarry containing the input image, and ``y`` is the    corresponding classification, i.e., the digit values (integers)    corresponding to ``x``.    Obviously, this means we're using slightly different formats for    the training data and the validation / test data.  These formats    turn out to be the most convenient for use in our neural network    code."""    tr_d, te_d = load_data()    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]    training_results = [vectorized_result(y) for y in tr_d[1]]    training_data = zip(training_inputs, training_results)    #validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]    #validation_data = zip(validation_inputs, va_d[1])    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]    test_data = zip(test_inputs, te_d[1])    return (training_data, test_data)def vectorized_result(j):    """Return a 10-dimensional unit vector with a 1.0 in the jth    position and zeroes elsewhere.  This is used to convert a digit    (0...9) into a corresponding desired output from the neural    network."""    e = np.zeros((10, 1))    e[j] = 1.0    return e

2）神经网络和梯度下降算法的实现：

#-*-coding=utf-8-*-import numpy as npimport tensorflow as tfimport randomimport mnist_loader#sizes=[3,4,3]#w=[[4*3],[3*4]]#b=[[4*1],[3*1]]#a=[[4,],[3,]]#z=[[4,],[3,]]def sigmoid(z):    return 1.0/(1.0+np.exp(-z))def sigmoid_prime(z):    return sigmoid(z)*(1-sigmoid(z))class Network(object):    def __init__(self,sizes):        self.num_layers=len(sizes)        self.sizes=sizes        self.weights=[np.random.rand(j,i) for i,j in zip(sizes[:-1],sizes[1:])]        self.biases=[np.random.rand(j,1) for j in sizes[1:]]    def feedforward(self,a):        for w,b in zip(self.weights,self.biases):            a=sigmoid(np.dot(w,a)+b)        return a    def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):        if test_data:            n_test=len(test_data)        n=len(training_data)        for j in xrange(epochs):            random.shuffle(training_data)            mini_batches=[training_data[k:k+mini_batch_size]                for k in xrange(0,n,mini_batch_size)]            #print(mini_batches)            for mini_batch in mini_batches:                self.update_mini_batch(mini_batch,eta)            if test_data:                print("Epoch:{0},acc:{1}/{2}".format(j,self.evaluate(test_data),n_test))            else:                print("finish")        #return (self.weights,self.biases)    def update_mini_batch(self,mini_batch,eta):        deriv_w_mb=[np.zeros(w.shape) for w in self.weights]        deriv_b_mb=[np.zeros(b.shape) for b in self.biases]        for x,y in mini_batch:            deriv_w,deriv_b=self.backprop(x,y)            deriv_w_mb=[w+dw for w,dw in zip(deriv_w_mb,deriv_w)]            deriv_b_mb=[b+db for b,db in zip(deriv_b_mb,deriv_b)]        self.weights=[w-(eta/len(mini_batch))*nw             for w,nw in zip(self.weights,deriv_w_mb)]        self.biases=[b-(eta/len(mini_batch))*nb            for b,nb in zip(self.biases,deriv_b_mb)]    def backprop(self,x,y):        deriv_b=[np.zeros(b.shape) for b in self.biases]        deriv_w=[np.zeros(w.shape) for w in self.weights]        a=x        a_arr=[x]        z_arr=[]        for w,b in zip(self.weights,self.biases):            #print(np.dot(w,a).shape,b.shape)            z=np.dot(w,a)+b            z_arr.append(z)            a=sigmoid(z)            a_arr.append(a)        #print(a_arr[-1].shape)        delta=(a_arr[-1]-y)*sigmoid_prime(z_arr[-1])        deriv_b[-1]=delta        deriv_w[-1]=np.dot(delta,np.transpose(a_arr[-2]))        for l in xrange(2,self.num_layers):            z=z_arr[-l]            delta=np.dot(np.transpose(self.weights[-l+1]),delta)*sigmoid_prime(z)            #print(delta.shape,a_arr[-l-1].shape)            deriv_b[-l]=delta            deriv_w[-l]=np.dot(delta,np.transpose(a_arr[-l-1]))        return (deriv_w,deriv_b)    def evaluate(self,test_data):        test_results=[(np.argmax(self.feedforward(x)),y)             for (x,y) in test_data]        #return sum(int(x == y) for (x, y) in test_results)        return sum(int(x==y) for (x,y) in test_results)if __name__=="__main__":    sizes=[784,30,10]    training_data,test_data=mnist_loader.load_data_wrapper()    print(training_data[0][0].shape,training_data[0][1].shape)    net=Network(sizes)    net.SGD(training_data,10,20000,0.2,test_data)    '''    #the model (len(n1_node)*1) format to start with    sizes=[3,4,5,6,7,3]    training_data=[([[1],[2],[3]],[[1],[0],[0]]),        ([[4],[5],[6]],[[0],[1],[0]]),        ([[7],[8],[9]],[[0],[0],[1]])]    test_data=[([[10],[11],[12]],0),                ([[13],[14],[15]],1),                ([[16],[17],[18]],2)]    #def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):    net=Network(sizes)    w,b=net.SGD(training_data,5,1,0.002,test_data)    '''    '''x=[1,2,3]    x=np.expand_dims(x,1)    y=np.random.rand(3)    y=np.expand_dims(y,1)    z=[(x,y)]    net1=Network(sizes)    (w,b)=net1.backprop(z[0][0],z[0][1])    print(w[0].shape,w[1].shape)    print(w[0],w[1])    print(b[0].shape,b[1].shape)    print(b[0],b[1])'''

如下，我们搭建了一个3层网络，然后第一层的输入为（3,1），第一层的权重为（4,3），第二层的输入为（4,1），第二层的权重为（3,4），第三层为（3,1），这是我们的实现方式！

ps：注意这儿我们并没有让第一层的输入为（3,），只是为了在计算偏导的时候可以方便求解，利用矩阵就可以进行处理而已，并不代表不能输入（3,），也是可以的，只是计算起来比较复杂而已!
这里写图片描述

ps：（3,）和（4,）两个ndarray进行np.dot运算：
这里写图片描述
可以看到这种结构无法进行矩阵运算，因此也就无法进行权重w偏导矩阵的求解或者偏置b矩阵的求解，因此注意我们送入网络中训练的数据的维度和权重维度！

阅读全文

0 0