利用tensorflow搭建CIFAR10 分类网络

来源：互联网发布：javascript 移除元素编辑：程序博客网时间：2024/05/22 10:51

一、准备数据：
cifar10 的数据集共有 6 万幅 32 * 32 大小的图片，分为 10 类，每类 6000 张，其中 5 万张用于训练， 1 万张用于测试。数据集被分成了5 个训练的 batches 和 1 个测试的 batch。每个 batch 里的图片都是随机排列的。官网上提供了三个版本的下载链接，分别是 Python 版本的，Matlab 版本的和二进制文件版本的。其中，Python 版本的数据格式，官网上给了读取数据的代码，Matlab 版本的数据和 Python 版本的数据格式差不多。二进制版本的数据，有 5 个训练用的 batches，data_batch_1.bin ~ data_batch_5.bin 和一个测试用的 test_batch.bin。
考虑到 TensorFlow 可以读取固定长度格式的数据（用 tf.FixedLengthRecordReader ），我们下载二进制格式的数据。新建文件夹/home/your_name/TensorFlow/cifar10/data，从cifar10 官网上下载二进制格式的文件压缩包，解压到此文件夹，得到 cifar-10- batches-bin 文件夹，里面有 8 个文件，6 个 .bin文件，一个 readme，一个 .txt 说明了类别。

from __future__ import absolute_import        # 绝对导入from __future__ import division                # 精确除法，/是精确除，//是取整除from __future__ import print_function        # 打印函数import osimport tensorflow as tf# 建立一个 cifar10_data 的类， 输入文件名队列，输出 labels 和imagesclass cifar10_data(object):    def __init__(self, filename_queue):        # 类初始化        # 根据上一篇文章介绍的文件格式，定义初始化参数        self.height = 32        self.width = 32        self.depth = 3        # label 一个字节        self.label_bytes = 1        # 图像 32*32*3 = 3072 字节        self.image_bytes = self.height * self.width * self.depth        # 读取的固定字节长度为 3072 + 1 = 3073         self.record_bytes = self.label_bytes + self.image_bytes        self.label, self.image = self.read_cifar10(filename_queue)    def read_cifar10(self, filename_queue):        # 读取固定长度文件        reader = tf.FixedLengthRecordReader(record_bytes = self.record_bytes)        key, value = reader.read(filename_queue)        record_bytes = tf.decode_raw(value, tf.uint8)        # tf.slice(record_bytes, 起始位置， 长度)        label = tf.cast(tf.slice(record_bytes, [0], [self.label_bytes]), tf.int32)        # 从 label 起，切片 self.image_bytes = 3072 长度为图像        image_raw = tf.slice(record_bytes, [self.label_bytes], [self.image_bytes])        # 图片转化成 3*32*32        image_raw = tf.reshape(image_raw, [self.depth, self.height, self.width])        # 图片转化成 32*32*3        image = tf.transpose(image_raw, (1,2,0))                image = tf.cast(image, tf.float32)        return label, imagedef inputs(data_dir, batch_size, train = True, name = 'input'):    # 建议加上 tf.name_scope, 可以画出漂亮的流程图。    with tf.name_scope(name):        if train:             # 要读取的文件的名字            filenames = [os.path.join(data_dir,'data_batch_%d.bin' % ii)                         for ii in range(1,6)]            # 不存在该文件的时候报错            for f in filenames:                if not tf.gfile.Exists(f):                    raise ValueError('Failed to find file: ' + f)            #用文件名生成文件名队列            filename_queue = tf.train.string_input_producer(filenames)            # 送入 cifar10_data 类中            read_input = cifar10_data(filename_queue)            images = read_input.image            # 图像白化操作，由于网络结构简单，不加这句正确率很低。            # images = tf.image.per_image_whitening(images)            labels = read_input.label            # 生成 batch 队列，16 线程操作，容量 20192，min_after_dequeue 是            # 离队操作后，队列中剩余的最少的元素，确保队列中一直有 min_after_dequeue            # 以上元素，建议设置 capacity = min_after_dequeue + batch_size * 3            num_preprocess_threads = 16            image, label = tf.train.shuffle_batch(                                    [images,labels], batch_size = batch_size,                                     num_threads = num_preprocess_threads,                                     min_after_dequeue = 20000, capacity = 20192)            return image, tf.reshape(label, [batch_size])        else:            filenames = [os.path.join(data_dir,'test_batch.bin')]            for f in filenames:                if not tf.gfile.Exists(f):                    raise ValueError('Failed to find file: ' + f)            filename_queue = tf.train.string_input_producer(filenames)            read_input = cifar10_data(filename_queue)            images = read_input.image            images = tf.image.per_image_whitening(images)            labels = read_input.label            num_preprocess_threads = 16            image, label = tf.train.shuffle_batch(                                    [images,labels], batch_size = batch_size,                                     num_threads = num_preprocess_threads,                                     min_after_dequeue = 20000, capacity = 20192)            return image, tf.reshape(label, [batch_size])

二、思路
考虑如下的网络结构进行 cifar10 的分类：每次输入一个batch的 64 幅图像，转化成 64*32*32*3 的四维张量，经过步长为 1，卷积核大小为 5*5 ，Feature maps 为64的卷积操作，变为 64*32*32*64 的四维张量，然后经过一个步长为 2 的 max_pool 的池化层，变成 64*16*16*64 大小的四维张量，再经过一次类似的卷积池化操作，变为 64*8*8*64 大小的4维张量，再经过两个全连接层，映射到 64*192 的二维张量，然后经过一个 sortmax 层，变为 64*10 的张量，最后和标签 label 做一个交叉熵的损失函数。

第一步：定义权重和偏置

def variable_on_cpu(name, shape, initializer = tf.constant_initializer(0.1)):    with tf.device('/cpu:0'):        dtype = tf.float32        var = tf.get_variable(name, shape, initializer = initializer,                               dtype = dtype)    return var # 用 get_variable 在 CPU 上定义变量def variables(name, shape, stddev):     dtype = tf.float32    var = variable_on_cpu(name, shape,                           tf.truncated_normal_initializer(stddev = stddev,                                                           dtype = dtype))    return var

第二步、定义网络结构

# 定义网络结构def inference(images):    ''' 每次输入一个batch的 64 幅图像， 转化成 64*32*32*3 的四维张量，经过步长为 1，卷积核大小为 5*5 ，    Feature maps 为64的卷积操作，变为 64*32*32*64 的四维张量，然后经过一个步长为 2 的 max_pool 的池化层，    变成 64*16*16*64 大小的四维张量，再经过一次类似的卷积池化操作，    变为 64*8*8*64 大小的4维张量，再经过两个全连接层，映射到 64*192 的二维张量，然后经过一个 sortmax 层，    变为 64*10 的张量，最后和标签 label 做一个交叉熵的损失函数'''    # 第一卷积层    with tf.variable_scope('conv1') as scope:        # 用 5*5 的卷积核，64 个 Feature maps        weights = variables('weights', [5,5,3,64], 5e-2)        # 卷积，步长为 1*1        conv = tf.nn.conv2d(images, weights, [1,1,1,1], padding = 'SAME')        biases = variable_on_cpu('biases', [64])        # 加上偏置        bias = tf.nn.bias_add(conv, biases)        # 通过 ReLu 激活函数        conv1 = tf.nn.relu(bias, name = scope.name)        # 柱状图总结 conv1        tf.summary.histogram(scope.name + '/activations', conv1)      with tf.variable_scope('pooling1_lrn') as scope:        # 最大池化，3*3 的卷积核，2*2 的卷积        pool1 = tf.nn.max_pool(conv1, ksize = [1,3,3,1], strides = [1,2,2,1],                               padding = 'SAME', name='pool1')        # 局部响应归一化        norm1 = tf.nn.lrn(pool1, 4, bias = 1.0, alpha = 0.001/9.0,                           beta = 0.75, name = 'norm1')    # 第二卷积层    with tf.variable_scope('conv2') as scope:        weights = variables('weights', [5,5,64,64], 5e-2)        conv = tf.nn.conv2d(norm1, weights, [1,1,1,1], padding = 'SAME')        biases = variable_on_cpu('biases', [64])        bias = tf.nn.bias_add(conv, biases)        conv2 = tf.nn.relu(bias, name = scope.name)        tf.summary.histogram(scope.name + '/activations', conv2)    with tf.variable_scope('pooling2_lrn') as scope:        norm2 = tf.nn.lrn(conv2, 4, bias = 1.0, alpha = 0.001/9.0,                           beta = 0.75, name = 'norm1')                pool2 = tf.nn.max_pool(norm2, ksize = [1,3,3,1], strides = [1,2,2,1],                               padding = 'SAME', name='pool1')    with tf.variable_scope('local3') as scope:        # 第一层全连接        reshape = tf.reshape(pool2, [BATCH_SIZE,-1])        dim = reshape.get_shape()[1].value        weights = variables('weights', shape=[dim,384], stddev=0.004)        biases = variable_on_cpu('biases', [384])        # ReLu 激活函数        local3 = tf.nn.relu(tf.matmul(reshape, weights)+biases,                             name = scope.name)        # 柱状图总结 local3        tf.summary.histogram(scope.name + '/activations', local3)    with tf.variable_scope('local4') as scope:        # 第二层全连接        weights = variables('weights', shape=[384,192], stddev=0.004)        biases = variable_on_cpu('biases', [192])        local4 = tf.nn.relu(tf.matmul(local3, weights)+biases,                             name = scope.name)        tf.summary.histogram(scope.name + '/activations', local4)    with tf.variable_scope('softmax_linear') as scope:        # softmax 层，实际上不是严格的 softmax ，真正的 softmax 在损失层        weights = variables('weights', [192, 10], stddev=1/192.0)        biases = variable_on_cpu('biases', [10])        softmax_linear = tf.add(tf.matmul(local4, weights), biases,                                 name = scope.name)        tf.summary.histogram(scope.name + '/activations', softmax_linear)    return softmax_lineardef losses(logits, labels):    with tf.variable_scope('loss') as scope:        labels = tf.cast(labels, tf.int64)        # 交叉熵损失，至于为什么是这个函数，后面会说明。        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(            labels=labels, logits=logits, name='cross_entropy_per_example')        loss = tf.reduce_mean(cross_entropy, name = 'loss')        tf.add_to_collection('losses', loss)    return tf.add_n(tf.get_collection('losses'), name='total_loss')

第三步、开始训练

BATCH_SIZE = 64      #设置batch的大小     每次输入一个batch的 64 幅图像LEARNING_RATE = 0.1    #学习率MAX_STEP = 50000    #循环次数

def train():    # global_step    global_step = tf.Variable(0, name = 'global_step', trainable=False)    # cifar10 数据文件夹    data_dir = 'F:/python3.5/Machine Learning/tensorflow/CNN/cifar10/data/cifar-10-batches-bin/'    # 训练时的日志logs文件，没有这个目录要先建一个    train_dir = 'F:/python3.5/Machine Learning/tensorflow/CNN/cifar10/data/cifar-10-batches-bin/'    # 加载 images，labels    images, labels = cifar10_input.inputs(data_dir, BATCH_SIZE)    # 求 loss    loss = losses(inference(images), labels)    # 设置优化算法，这里用 SGD 随机梯度下降法，恒定学习率    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)    # global_step 用来设置初始化    train_op = optimizer.minimize(loss, global_step = global_step)    # 保存操作    saver = tf.train.Saver(tf.all_variables())    # 汇总操作    summary_op = tf.summary.merge_all()    # 初始化方式是初始化所有变量    init = tf.initialize_all_variables()    os.environ['CUDA_VISIBLE_DEVICES'] = str(0)    config = tf.ConfigProto()    # 占用 GPU 的 20% 资源    config.gpu_options.per_process_gpu_memory_fraction = 0.2    # 设置会话模式，用 InteractiveSession 可交互的会话，逼格高    sess = tf.InteractiveSession(config=config)    # 运行初始化    sess.run(init)    # 设置多线程协调器    coord = tf.train.Coordinator()           # 开始 Queue Runners (队列运行器)    threads = tf.train.start_queue_runners(sess = sess, coord = coord)    # 把汇总写进 train_dir，注意此处还没有运行    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)    # 开始训练过程    try:                for step in xrange(MAX_STEP):            if coord.should_stop():                break            start_time = time.time()            # 在会话中运行 loss            _, loss_value = sess.run([train_op, loss])            duration = time.time() - start_time            # 确认收敛            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'                            if step % 30 == 0:                # 本小节代码设置一些花哨的打印格式，可以不用管                num_examples_per_step = BATCH_SIZE                examples_per_sec = num_examples_per_step / duration                sec_per_batch = float(duration)                                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '                              'sec/batch)')                print (format_str % (datetime.now(), step, loss_value,                                      examples_per_sec, sec_per_batch))            if step % 100 == 0:                # 运行汇总操作， 写入汇总                summary_str = sess.run(summary_op)                summary_writer.add_summary(summary_str, step)                            if step % 1000 == 0 or (step + 1) == MAX_STEP:                # 保存当前的模型和权重到 train_dir，global_step 为当前的迭代次数                checkpoint_path = os.path.join(train_dir, 'model.ckpt')                saver.save(sess, checkpoint_path, global_step=step)    except ShortInputException as e:        coord.request_stop(e)    finally:        coord.request_stop()        coord.join(threads)    sess.close()

第四步，评估

def evaluate():    data_dir = '/Machine Learning/tensorflow/卷积神经网络/练习/data/cifar-10-batches-bin/'    train_dir = '/Machine Learning/tensorflow/卷积神经网络/练习/cifar10_train/'    images, labels = cifar10_input.inputs(data_dir, BATCH_SIZE, train = False)    logits = inference(images)     saver = tf.train.Saver(tf.all_variables())            os.environ['CUDA_VISIBLE_DEVICES'] = str(0)    config = tf.ConfigProto()    config.gpu_options.per_process_gpu_memory_fraction = 0.2    sess = tf.InteractiveSession(config=config)    coord = tf.train.Coordinator()           threads = tf.train.start_queue_runners(sess = sess, coord = coord)    # 加载模型参数    print("Reading checkpoints...")    ckpt = tf.train.get_checkpoint_state(train_dir)    if ckpt and ckpt.model_checkpoint_path:        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]                 saver.restore(sess, os.path.join(train_dir, ckpt_name))        print('Loading success, global_step is %s' % global_step)    try:             top_k_op = tf.nn.in_top_k(logits, labels, 1)        true_count = 0        step = 0        while step < 157:            if coord.should_stop():                break            predictions = sess.run(top_k_op)            true_count += np.sum(predictions)            step += 1        precision = true_count / 10000        print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))    except tf.errors.OutOfRangeError:        coord.request_stop()    finally:        coord.request_stop()        coord.join(threads)    sess.close()

结束：具体代码网站

0 0