Tensorflow07-mnist

来源：互联网发布：淘宝上有爱弹幕账号么编辑：程序博客网时间：2024/06/18 05:16

mnist手写数字识别问题是入门tensorflow的经典例子

使用人工神经网络处理mnist问题是以图片为输入，以一个模为10的向量为输出，最后1在的位置表示手写数字图片的数值。

训练神经网络的思路：

第一层以大小为64的训练集为输入，权重矩阵为64x50，该层使用tanh激活函数，最后输出大小为50的向量。这个过程中dropout40%的神经元。

第二层以大小为50的第一层输出为输入，权重矩阵为50x10，该层使用softmax激活函数，最后输出大小为10的向量。这个过程中也dropout40%的神经元。该层的输出就是最后的预测结果。

对预测结果求对数和的平均值，作为与真实结果1的差值。

训练使用梯度下降法来优化参数。

以下代码需要安装sklearn，scipy，numpy：

python需要的lib大部分可以在这里找到
http://www.lfd.uci.edu/~gohlke/pythonlibs
最好使用power shell来安装
pip install sklearn
pip install scipy (可能失败，下载whl文件，进入保存的文件夹再pip install scipy-0.19.0...whl)
如果显示下述提示
from numpy._distributor_init import NUMPY_MKL # requires numpy+mkl
ImportError: cannot import name 'NUMPY_MKL'

可能是numpy已经安装，但是scipy用预编译的版本安装，需要到上述网址下载numpy+mkl版本的whl文件手动安装
pip install num....whl

import tensorflow as tffrom sklearn.datasets import load_digitsfrom sklearn.cross_validation import train_test_splitfrom sklearn.preprocessing import LabelBinarizer# 加载数据digits=load_digits()X=digits.data # 从0~9的图片数字y=digits.target # 长度为10的向量y=LabelBinarizer().fit_transform(y)X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.3)def add_layer(inputs,in_size,out_size,name_layer,activation_function=None):# None means linear    layer_name='layer%s'%name_layer    Weights=tf.Variable(tf.random_normal([in_size,out_size]))# in_size行，out_size列的矩阵    biases=tf.Variable(tf.zeros([1,out_size])+0.1)# 向量，初始值最好不为0    Wx_plus_b=tf.matmul(inputs,Weights)+biases#矩阵乘法加上偏移就是预测值    # 需要将Wx_plus_b的一部分dropout    Wx_plus_b=tf.nn.dropout(Wx_plus_b,keep_prob)######！    if activation_function is None:        outputs=Wx_plus_b    else:        outputs=activation_function(Wx_plus_b)    # 需要有一个histogram summary来表示outputs    tf.summary.histogram(layer_name+'/outputs',outputs)    return outputs    keep_prob=tf.placeholder(tf.float32) # 保持不被dropout的百分比######！xs=tf.placeholder(tf.float32,[None,64]) # 8x8ys=tf.placeholder(tf.float32,[None,10])# 添加输入层l1=add_layer(xs,64,50,'l1',activation_function=tf.nn.tanh)prediction=add_layer(l1,50,10,'l2',activation_function=tf.nn.softmax) # 隐藏层输入过多可能报错，这里使用50# 预测和真实之间的差值->loss(cross_entropy算法)cross_entropy=tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1]))# 需要有一个scalar summary来表示losstf.summary.scalar('loss',cross_entropy)train_step=tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy)sess=tf.Session()# 所有的summarymerge=tf.summary.merge_all()# 文件存在log/train和logs/test文件夹中train_writer=tf.summary.FileWriter("logs/train",sess.graph)test_writer=tf.summary.FileWriter("logs/test",sess.graph)sess.run(tf.initialize_all_variables())for i in range(500):    sess.run(train_step,feed_dict={xs:X_train,ys:y_train,keep_prob:0.6}) # 保持60%的概率不被drop,即drop掉40%######！    if i%50==0:        # 每50步记录训练和测试结果存入所有的summary        train_result=sess.run(merge,feed_dict={xs:X_train,ys:y_train,keep_prob:1}) # test的过程中不drop任何######！        test_result=sess.run(merge,feed_dict={xs:X_test,ys:y_test,keep_prob:1})        # 加载到writer中        train_writer.add_summary(train_result,i)        test_writer.add_summary(test_result,i)

可以先使keep_prob为1再改为0.6，tensorboard会发现相比较，0.6时的测试准确度更高

另：tensorflow官网中的做法

input_data.py

# Copyright 2015 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # #     http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==============================================================================  """Functions for downloading and reading MNIST data.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function  import gzip import os  import tensorflow.python.platform  import numpy from six.moves import urllib from six.moves import xrange  # pylint: disable=redefined-builtin import tensorflow as tf  SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'   def maybe_download(filename, work_directory):   """Download the data from Yann's website, unless it's already here."""   if not os.path.exists(work_directory):     os.mkdir(work_directory)   filepath = os.path.join(work_directory, filename)   if not os.path.exists(filepath):     filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)     statinfo = os.stat(filepath)     print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')   return filepath   def _read32(bytestream):   dt = numpy.dtype(numpy.uint32).newbyteorder('>')   return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]   def extract_images(filename):   """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""   print('Extracting', filename)   with gzip.open(filename) as bytestream:     magic = _read32(bytestream)     if magic != 2051:       raise ValueError(           'Invalid magic simple_model_of_mnist %d in MNIST image file: %s' %           (magic, filename))     num_images = _read32(bytestream)     rows = _read32(bytestream)     cols = _read32(bytestream)     buf = bytestream.read(rows * cols * num_images)     data = numpy.frombuffer(buf, dtype=numpy.uint8)     data = data.reshape(num_images, rows, cols, 1)     return data   def dense_to_one_hot(labels_dense, num_classes=10):   """Convert class labels from scalars to one-hot vectors."""   num_labels = labels_dense.shape[0]   index_offset = numpy.arange(num_labels) * num_classes   labels_one_hot = numpy.zeros((num_labels, num_classes))   labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1   return labels_one_hot   def extract_labels(filename, one_hot=False):   """Extract the labels into a 1D uint8 numpy array [index]."""   print('Extracting', filename)   with gzip.open(filename) as bytestream:     magic = _read32(bytestream)     if magic != 2049:       raise ValueError(           'Invalid magic simple_model_of_mnist %d in MNIST label file: %s' %           (magic, filename))     num_items = _read32(bytestream)     buf = bytestream.read(num_items)     labels = numpy.frombuffer(buf, dtype=numpy.uint8)     if one_hot:       return dense_to_one_hot(labels)     return labels   class DataSet(object):    def __init__(self, images, labels, fake_data=False, one_hot=False,                dtype=tf.float32):     """Construct a DataSet.      one_hot arg is used only if fake_data is true.  `dtype` can be either     `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into     `[0, 1]`.     """     dtype = tf.as_dtype(dtype).base_dtype     if dtype not in (tf.uint8, tf.float32):       raise TypeError('Invalid image dtype %r, expected uint8 or float32' %                       dtype)     if fake_data:       self._num_examples = 10000       self.one_hot = one_hot     else:       assert images.shape[0] == labels.shape[0], (           'images.shape: %s labels.shape: %s' % (images.shape,                                                  labels.shape))       self._num_examples = images.shape[0]        # Convert shape from [num examples, rows, columns, depth]       # to [num examples, rows*columns] (assuming depth == 1)       assert images.shape[3] == 1       images = images.reshape(images.shape[0],                               images.shape[1] * images.shape[2])       if dtype == tf.float32:         # Convert from [0, 255] -> [0.0, 1.0].         images = images.astype(numpy.float32)         images = numpy.multiply(images, 1.0 / 255.0)     self._images = images     self._labels = labels     self._epochs_completed = 0     self._index_in_epoch = 0    @property   def images(self):     return self._images    @property   def labels(self):     return self._labels    @property   def num_examples(self):     return self._num_examples    @property   def epochs_completed(self):     return self._epochs_completed    def next_batch(self, batch_size, fake_data=False):     """Return the next `batch_size` examples from this data set."""     if fake_data:       fake_image = [1] * 784       if self.one_hot:         fake_label = [1] + [0] * 9       else:         fake_label = 0       return [fake_image for _ in xrange(batch_size)], [           fake_label for _ in xrange(batch_size)]     start = self._index_in_epoch     self._index_in_epoch += batch_size     if self._index_in_epoch > self._num_examples:       # Finished epoch       self._epochs_completed += 1       # Shuffle the data       perm = numpy.arange(self._num_examples)       numpy.random.shuffle(perm)       self._images = self._images[perm]       self._labels = self._labels[perm]       # Start next epoch       start = 0       self._index_in_epoch = batch_size       assert batch_size <= self._num_examples     end = self._index_in_epoch     return self._images[start:end], self._labels[start:end]   def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32):   class DataSets(object):     pass   data_sets = DataSets()    if fake_data:     def fake():       return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)     data_sets.train = fake()     data_sets.validation = fake()     data_sets.test = fake()     return data_sets    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'   TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'   TEST_IMAGES = 't10k-images-idx3-ubyte.gz'   TEST_LABELS = 't10k-labels-idx1-ubyte.gz'   VALIDATION_SIZE = 5000    local_file = maybe_download(TRAIN_IMAGES, train_dir)   train_images = extract_images(local_file)    local_file = maybe_download(TRAIN_LABELS, train_dir)   train_labels = extract_labels(local_file, one_hot=one_hot)    local_file = maybe_download(TEST_IMAGES, train_dir)   test_images = extract_images(local_file)    local_file = maybe_download(TEST_LABELS, train_dir)   test_labels = extract_labels(local_file, one_hot=one_hot)    validation_images = train_images[:VALIDATION_SIZE]   validation_labels = train_labels[:VALIDATION_SIZE]   train_images = train_images[VALIDATION_SIZE:]   train_labels = train_labels[VALIDATION_SIZE:]    data_sets.train = DataSet(train_images, train_labels, dtype=dtype)   data_sets.validation = DataSet(validation_images, validation_labels,                                  dtype=dtype)   data_sets.test = DataSet(test_images, test_labels, dtype=dtype)    return data_sets

引入imput_data来导入数据

import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

简单模型：

import tensorflow as tfimport input_data## 加载数据, mnist作为一个类，以numpy数组的形式存储训练、校验和测试的数据集，并提供一个函数用于在迭代中获得minibatchmnist = input_data.read_data_sets("MNIST_data/", one_hot=True)## 数据下载完毕##设置模型# x是784维图像的输入占位，None表示任意长度x = tf.placeholder("float", [None, 784])# 权重： 784维图像 * W = 10维证据, 每一位表示0~9一个数字的权重 W = tf.Variable(tf.zeros([784,10]))# 偏移量： 10维向量，每一位表示0~9一个数字的偏移量b = tf.Variable(tf.zeros([10]))# x * W + b 作为softmax函数的输入，得到 0~9 数字的预测概率分布y = tf.nn.softmax(tf.matmul(x,W) + b)# 真实的分布的输入占位y_ = tf.placeholder("float",[None, 10])# 交叉熵： 衡量预测分布的低效性cross_entropy = -tf.reduce_sum(y_ * tf.log(y))# 用梯度下降算法以0.01的学习速率最小化交叉熵（还有其他算法）train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)## 模型设置完毕## 训练模型# 初始化创建的变量init = tf.initialize_all_variables()# 在一个session中启动模型：tensorflow使用C++后端来计算，session是与后端的连接sess = tf.Session()sess.run(init)# 训练模型，循环1000次————随机梯度下降训练for i in range(1000):    batch_xs, batch_ys = mnist.train.next_batch(100) #随机取训练数据中100个数据点    sess.run(train_step, feed_dict={x: batch_xs, y_:batch_ys}) #将100个数据点作为参数替换之前的占位符来运行train_step## 模型训练完毕## 评估模型# tf.argmax得到某个tensor对象在某一维上的数据最大值所在的索引，即类别标签； tf.equal检测预测与真实是否匹配;得到一组布尔值correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))# 转化为0和1，求出平均值accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))# 计算学习的模型在测试数据集上的正确率print(sess.run(accuracy, feed_dict = {x: mnist.test.images, y_: mnist.test.labels}))

使用InteractiveSession

import input_dataimport tensorflow as tfmnist = input_data.read_data_sets('MNIST_data', one_hot=True)## 设置模型# 输入图像和预测概率分布的占位x = tf.placeholder("float", shape=[None,784])y = tf.placeholder("float", shape=[None,10])y_ = tf.placeholder("float",[None, 10])# 权重和偏置，初始化为0W = tf.Variable(tf.zeros([784,10]))b = tf.Variable(tf.zeros([10]))## 模型设置完毕## 训练模型# 若在python外部计算，减少切换开销sess = tf.InteractiveSession()# 通过session初始化sess.run(tf.initialize_all_variables())# 类别预测y = tf.nn.softmax(tf.matmul(x,W) + b)# 损失函数，每张图片的交叉熵值之和cross_entropy = -tf.reduce_sum(y_*tf.log(y))# 梯度下降法，步长0.01，优化参数，返回的train_step对象在运行时梯度下降来更新参数train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)# 训练for i in range(1000):    batch = mnist.train.next_batch(50) # 每次迭代加载50个训练样本，执行一次train_step，通过feed_dict将x和y_占位符用训练数据替代    train_step.run(feed_dict = {x:batch[0],y_:batch[1]})    ## 模型训练完毕## 评估模型# 预测标签与真实标签的符合情况数组correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))# 转化为0和1，并用平均值表示准确性accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))print(accuracy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels}))## 模型评估完毕

神经网络：

import input_dataimport tensorflow as tf# 加入少量噪声来打破对称性和避免0梯度def weight_variable(shape):    initial = tf.truncated_normal(shape, stddev=0.1)    return tf.Variable(initial)# 使用ReLU神经元，用一个较小正数来初始化偏置，避免神经元节点输出恒为0的问题def bias_variable(shape):    initial = tf.constant(0.1, shape=shape)    return tf.Variable(initial)# 卷积，1步长、0边距def conv2d(x, W):    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')# 池化，2x2模板def max_pool_2x2(x):    return tf.nn.max_pool(x, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')# 加载数据mnist = input_data.read_data_sets('MNIST_data', one_hot=True)## 设置模型# 输入图像和预测概率分布的占位x = tf.placeholder("float", shape=[None,784])y = tf.placeholder("float", shape=[None,10])y_ = tf.placeholder("float",[None, 10])## 模型设置完毕## 第一层：一个卷积接一个池化# 卷积在每个5x5的patch中算出32个特征，权重向量为[5,5,1,32]，表示patch大小、输入通道数目、输出通道数目W_conv1 = weight_variable([5,5,1,32])b_conv1 = bias_variable([32])# x变成一个4维向量，2、3维对应图片的宽、高，最后一维代表图片的颜色通道数x_image = tf.reshape(x,[-1,28,28,1])# 把x_image和权值向量进行卷积，加上偏置，用ReLU激活函数h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)# 池化h_pool1 = max_pool_2x2(h_conv1)## 第一层结束## 第二层：将几个类似的层堆叠起来# 每个5x5的patch得到64个特征W_conv2 = weight_variable([5,5,32,64])b_conv2 = bias_variable([64])h_conv2 = tf.nn.relu(conv2d(h_pool1,W_conv2) + b_conv2)h_pool2 = max_pool_2x2(h_conv2)## 第二层结束## 密集连接层：图片尺寸减小到7x7# 加入一个有1024个神经元的全连接层，用于处理整个图片W_fc1 = weight_variable([7*7*64,1024])b_fc1 = bias_variable([1024])# 池化层输出tensor重置为一些向量，乘上权重矩阵，加上偏置，然后对其使用ReLUh_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64])h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1) + b_fc1)## 密集连接层结束## Dropout：减少过拟合# 一个神经元输出在dropout中保持不变的概率占位keep_prob = tf.placeholder("float")# 在训练过程中启用dropout，在测试过程中关闭dropout，tf.nn.dropout除了可以屏蔽神经元的输出外，还会自动处理神经元输出值的scaleh_fc1_drop = tf.nn.dropout(h_fc1,keep_prob)## Dropout结束## 输出层:softmaxW_fc2 = weight_variable([1024,10])b_fc2 = bias_variable([10])y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop,W_fc2) + b_fc2)## 输出层结束## 训练模型sess = tf.InteractiveSession()cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))# ADAM优化器来做梯度最速下降train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))sess.run(tf.initialize_all_variables())for i in range(20000):    batch = mnist.train.next_batch(50)    if i%100 == 0:        train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})        print("step %d, training accuracy %g"%(i, train_accuracy))    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})## 模型训练完毕## 评估模型print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))## 模型训练完毕

阅读全文

0 0