第六课 Tensorflow Cifar10 CNN 卷积神经网络

来源:互联网 发布:c网络语言是什么意思 编辑:程序博客网 时间:2024/06/06 02:22

对于train来说,基本就是几个步骤:

  1. 读取数据
  2. 构建训练网络
  3. 训练
# coding:utf-8"""训练"""from abc import ABCMetafrom abc import abstractmethodclass ITrain(object):    __metaclass__ = ABCMeta    @abstractmethod    def train(self):        """        训练.        :return: train op        """        pass
# coding:utf-8"""cifar10 train"""from train import ITrainimport tensorflow as tffrom cifar10_data_input import CIFAR10DataInputfrom cifar10_inference import CIFAR10Inferenceimport timeimport datetimeclass CIFAR10Train(ITrain):    INPUT_PATH = 'input/cifar10_bin_data/*.bin'    TRAIN_PATH = 'output/train'    BATCH_SIZE = 128    NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000    LEARNING_RATE_INITILAIZE = 0.1    LEARING_RATE_DECAY_FACTOR = 0.1    NUM_EPOCHS_PER_DECAY = 350.0    def train(self):        input_paths = tf.train.match_filenames_once(CIFAR10Train.INPUT_PATH)        cifar10_input = CIFAR10DataInput(input_file_paths=input_paths,                                         batch_size=CIFAR10Train.BATCH_SIZE,                                         example_per_epoch_num=CIFAR10Train.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN)        image_batch, label_batch = cifar10_input.read_data()        tf.summary.image('images', image_batch)        cifar10_inference = CIFAR10Inference(image_channel=3,                                             batch_size=CIFAR10Train.BATCH_SIZE,                                             label_class_num=10)        logits = cifar10_inference.inference(images=image_batch)        loss = cifar10_inference.loss(logits, label_batch)        train_op = self._train_op(loss)        class _LoggerHook(tf.train.SessionRunHook):            def __init__(self):                super(_LoggerHook, self).__init__()                self._step = -1                self._start_time = time.time()                self._log_frequency = 100            def begin(self):                self._step = -1                self._start_time = time.time()                self._log_frequency = 100            def before_run(self, run_context):                self._step += 1                # loss会作为参数一起被运行 会在after_run运行结束后 将run_values 也就是这里的loss值传回                return tf.train.SessionRunArgs(loss)            def after_run(self, run_context, run_values):                if self._step % self._log_frequency == 0:                    current_time = time.time()                    duration = current_time - self._start_time                    self._start_time = current_time                    loss_value = run_values.results                    examples_per_sec = self._log_frequency* CIFAR10Train.BATCH_SIZE / duration                    sec_per_batch = float(duration / self._log_frequency)                    format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '                                  'sec/batch)')                    print(format_str % (datetime.datetime.now(), self._step, loss_value,                                        examples_per_sec, sec_per_batch))        with tf.train.MonitoredTrainingSession(checkpoint_dir=CIFAR10Train.TRAIN_PATH,                                               hooks=[tf.train.StopAtStepHook(last_step=1000),  # 在执行了last_step会request stop                                                      tf.train.NanTensorHook(loss), # 监控loss 为None                                                      _LoggerHook()],                                               config=tf.ConfigProto(log_device_placement=False)) as mon_sess:            while not mon_sess.should_stop():                mon_sess.run(train_op)    def _train_op(self, loss):        # 用来记录全局的global steps 也就是一共运行了多少步        global_step = tf.contrib.framework.get_or_create_global_step()        num_batchs_per_epoch = CIFAR10Train.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / CIFAR10Train.BATCH_SIZE        decay_steps = int(num_batchs_per_epoch * CIFAR10Train.NUM_EPOCHS_PER_DECAY)        # 使用指数衰减来计算变化的学习率        learning_rate = tf.train.exponential_decay(CIFAR10Train.LEARNING_RATE_INITILAIZE,                                                   global_step,                                                   decay_steps=decay_steps,                                                   decay_rate=CIFAR10Train.LEARING_RATE_DECAY_FACTOR,                                                   staircase=True)        tf.summary.scalar('learning_rate', learning_rate)        # 计算平均loss        loss_averages_op = self._add_loss_summaryies(total_loss=loss)        # 表示控制执行的顺序 是计算完loss之后 在进行loss的优化        # 如果不这样做,在并行计算的时候,就会出问题。所以 control_dependencies就相当于并行计算的汇总        with tf.control_dependencies([loss_averages_op]):            opt = tf.train.GradientDescentOptimizer(learning_rate)            grads = opt.compute_gradients(loss)        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)        # vairablie add to histogram        for var in tf.trainable_variables():            tf.summary.histogram(var.op.name, var)        for grad, var in grads:            if grad is not None:                tf.summary.histogram(var.op.name + '/gradients', grad)        # 为什么变量也要计算移动平均,因为 最终可以使用移动平均的值来代替最终的变量。可以消除抖动引起的影响        variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)        viariables_averages_op = variable_averages.apply(tf.trainable_variables())        with tf.control_dependencies([apply_gradient_op, viariables_averages_op]):            train_op = tf.no_op(name='train')        return train_op    def _add_loss_summaryies(self, total_loss):        """        计算total loss的移动平均        :param total_loss: 每一次的total loss        :return:        """        # ExponentialMovingAverage 这个的含义是创建移动平均        # 也就是说收集所有的loss,这些会存储在 GraphKeys.MOVING_AVERAGE_VARIABLES        # 所以这是一个存储的全局变量 session级别的生命周期        # 初始化的时候 每一个初始值设置为0,然后 后面每一轮迭代产生的新的值会叠加上去        loss_averages = tf.train.ExponentialMovingAverage(decay=0.9, name='avg')        losses = tf.get_collection('losses')        # apply 表示要对下面的每一个变量进行每一次迭代的移动平均计算. 不是list中的所有变量进行移动平均计算        loss_averages_op = loss_averages.apply(losses + [total_loss])        for l in losses + [total_loss]:            # 输出查看需要看原始数据            tf.summary.scalar(l.op.name + ' (raw)', l)            # 和移动平均的数据            tf.summary.scalar(l.op.name, loss_averages.average(l))        return loss_averages_op
# coding:utf-8"""数据输入接口,包含数据的读取以及变换产出标准的数据格式供模型使用"""import tensorflow as tffrom abc import ABCMetafrom abc import abstractmethodclass IDataInput(object):    """    数据的输入    """    __metaclass__ = ABCMeta    def __init__(self,                 input_file_paths,                 batch_size,                 example_per_epoch_num,                 parallel_thread_num=16):        """        初始化        :param input_file_paths: 输入的文件路径列表        :param batch_size: batch size大小        :param example_per_epoch_num: 每一个epoch的样本数量 一般来说是总的样本数        :param parallel_thread_num: 并行处理的线程数        """        self._input_file_paths = input_file_paths        self._batch_size = batch_size        self._parallel_thread_num = parallel_thread_num        self._example_per_echo_num = example_per_epoch_num    def read_data(self):        """        读取数据        :return: (data_batch, image_batch)        """        # 创建输入的queue        file_path_queue = tf.train.string_input_producer(self._input_file_paths)        record = self._read_data_from_queue(file_path_queue)        stander_data, label = self._preprocess_data(record)        data_batch, label_batch = self._generate_train_batch(stander_data, label, shuffle=False)        return data_batch, label_batch    @abstractmethod    def _read_data_from_queue(self, file_path_queue):        """        根据queue 读取数据并返回需要的格式        :param file_path_queue:        :return:        """        pass    def test_read_data_from_queue(self):        # 创建输入的queue        file_path_queue = tf.train.string_input_producer(self._input_file_paths)        return self._read_data_from_queue(file_path_queue)    @abstractmethod    def _preprocess_data(self, record):        """        对读入的record进行一些预处理。对于图像来说,进行一些扭曲,加入噪音等操作        :param record: 在_read_data_from_queue读取的record        :return:        """        pass    def _generate_train_batch(self, train_data, label, shuffle=True):        """        通过队列创建数据batch        :param train_data: 训练数据        :param label: label        :param shuffle: 是否将样本随机后生成batch        :return:        """        # 队列的capacity,设置来保证内存够用        capacity = self._example_per_echo_num * 0.4 + 3 * self._batch_size        if shuffle:            data_batch, label_batch = tf.train.shuffle_batch([train_data, label],                                                             batch_size=self._batch_size,                                                             num_threads=self._parallel_thread_num,                                                             capacity=capacity,                                                             min_after_dequeue=self._example_per_echo_num * 0.4)        else:            data_batch, label_batch = tf.train.batch([train_data, label],                                                     batch_size=self._batch_size,                                                     num_threads=self._parallel_thread_num,                                                     capacity=capacity)        return data_batch, tf.reshape(label_batch, [self._batch_size])
# coding:utf-8"""CIFAR10的输入读取"""import tensorflow as tffrom data_input import IDataInputclass CIFAR10Record(object):    """    CIFAR10 读取的record    """    passclass CIFAR10DataInput(IDataInput):    """    Cifar10的数据输入.    数据集说明如下:    data_batch_1.bin - data_batch_5.bin 5个bin用作训练集.    bin 中的数据,第1个字是label,接下来的3072表示图片, 前1024字节是R,接下来1024是G,最后1024字节是B,    所以是32 * 32=1024的R G B图片. 每个bin包含 10000 个图片,所以总共有5W个训练图片。    test_batch.bin 1个bin用作测试集。    它的结构与训练集是一样的。这样是方便做评估。实际的问题中测试集是不包含label的。    batches.meta.txt 说明每个label对应的含义    """    def __init__(self,                 input_file_paths,                 batch_size,                 example_per_epoch_num,                 parallel_thread_num=16,                 label_bytes=1,                 image_height=32,                 image_width=32,                 target_image_height=24,                 target_image_width=24,                 channel=3):        """        初始化        :param label_bytes: lable占的字节数        :param image_height: 原始读入的图片高度        :param image_width: 原始读入的图片宽度        :param target_image_height: 目标生成的image height        :param target_image_width: 目标生成的image width        :param channel: 图片channel        """        super(CIFAR10DataInput, self).__init__(input_file_paths=input_file_paths,                                               batch_size=batch_size,                                               example_per_epoch_num=example_per_epoch_num,                                               parallel_thread_num=parallel_thread_num)        self._label_bytes = label_bytes        self._image_height = image_height        self._image_width = image_width        self._target_image_height = target_image_height        self._target_image_width = target_image_width        self.channel = channel    def _read_data_from_queue(self, file_path_queue):        record = CIFAR10Record()        record.channel = 3        record.height = self._image_height        record.width = self._image_width        image_bytes = self._image_height * self._image_width*self.channel        record_bytes = self._label_bytes + image_bytes        # 从queue中读取        reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)        # retern, key, value就是我们要的数据        record.key, value = reader.read(file_path_queue, name='image_reader')        # 对value解码成图片 和 label的字节        image_label_bytes = tf.decode_raw(value, tf.uint8)        # 提取label        record.label = tf.cast(tf.slice(image_label_bytes, [0], [self._label_bytes]), tf.int32)        # 提取image        image_bytes = tf.slice(image_label_bytes, [0+self._label_bytes], [image_bytes])        # reshape成 3 * 32 * 32 的矩阵。因为数据的存储就是 (R, G, B) => 1024, 1024, 1024        image_bytes = tf.reshape(image_bytes, [record.channel, record.height, record.width])        # 而tensorflow要的image是 => 32 * 32 * 3的格式. 将[0, 1, 2] (c,h,w)变成 [1, 2, 0](h,w,c)        record.uint8image = tf.transpose(image_bytes, [1, 2, 0])        return record    def _preprocess_data(self, record):        """        对图像的预处理 加入噪音        :param record: 在_read_data_from_queue读取到record        :return:        """        # 将image cast成float32数据        reshaped_image = tf.cast(record.uint8image, tf.float32)        height = self._target_image_height        width = self._target_image_width        # 随机crop        distorted_image = tf.random_crop(reshaped_image, [height, width, 3])        # 随机翻转        distorted_image = tf.image.random_flip_left_right(distorted_image)        # 加入brithness        distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)        # 调整对比度        distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)        # 图片标准化        float_image = tf.image.per_image_standardization(distorted_image)        return float_image, record.label
# coding:utf-8"""建立前向模型"""from abc import ABCMetafrom abc import abstractmethodimport tensorflow as tfclass IInference(object):    """    建立前向模型    """    def __init__(self):        self._loss_name = 'losses'    @abstractmethod    def inference(self, data):        """        建立前向模型        :param data: 输入的数据        :return: tensorflow op        """        pass    @abstractmethod    def loss(self, inference, label):        """        计算loss        :param inference: inference中产生的前向数据        :param label: label        :return:        """        pass    def bias(self, name, shape, initializer=tf.constant_initializer(0.0)):        """        创建bias        :param name: bias名字        :param shape: bias的shape        :param initializer: initializer        :return: bias variable        """        return tf.get_variable(name=name,                               shape=shape,                               initializer=initializer)    def viariable_with_weight_decay(self, name, shape, stddev, l2_decay):        """        创建viariable        :param name: 名称        :param shape: shape        :param stddev: 标准差        :param l2_decay: l2loss 的系数. 如果l2_decay=None 则不进行l2 loss.        :return: 生成viaralibe tensor        """        var = tf.get_variable(name=name,                              shape=shape,                              initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32),                              dtype=tf.float32)        # 增加l2loss        if l2_decay is not None:            weight_decay = tf.multiply(tf.nn.l2_loss(var), l2_decay, name='weight_loss')            # 产生的l2 regular 是在最后计算的时候,将所有的l2 添加进去,所以需要先保存起来.            # add_to_collection 相当于维护了一个key, value结构,value是一个list.            tf.add_to_collection(self._loss_name, weight_decay)        return var
# coding:utf-8"""CIFAR10的inference"""from inference import IInferenceimport tensorflow as tfimport reclass CIFAR10Inference(IInference):    """    CIFAR10的Inference使用cnn    """    def __init__(self, image_channel, batch_size, label_class_num):        """        初始化        :param image_channel: image_channel        """        super(CIFAR10Inference, self).__init__()        # 定义卷积核shape, 5*5*channel        self._kernel_width = 5        self._kernel_height = 5        self._image_channel = image_channel        self._batch_size = batch_size        self._label_class_num = label_class_num    @staticmethod    def activation_summary(x):        """        将激活后的结果summary出来        :param x:        :return:        """        tower_name = 'tower'        tensor_name = re.sub('%s_[0-9]*/' % tower_name, '', x.op.name)        tf.summary.histogram(tensor_name + '/activations', x)        tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))    def inference(self, images):        # 构建 卷积层1        conv1_kernel_num = 64  # 第一层 64个神经元        with tf.variable_scope('conv1') as scope:            kernel = self.viariable_with_weight_decay(                name='weight',                shape=[self._kernel_height, self._kernel_width, self._image_channel, conv1_kernel_num],                stddev=5e-2,                l2_decay=0.0)            # 卷积            conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')            # bias            bias = self.bias('bias', [conv1_kernel_num])            pre_activation = tf.nn.bias_add(conv, bias=bias)            # 激活函数 relu            conv1 = tf.nn.relu(pre_activation, name=scope.name)            # summary conv1            CIFAR10Inference.activation_summary(conv1)        # max pooling        pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],                               padding='SAME', name='pool1')        # 对pool1 normalize        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1,                          alpha=0.001/9.0, beta=0.75,                          name='norm1')        # 构建conv2        conv2_kernel_num = 64  # 第一层 64个神经元        with tf.variable_scope('conv2') as scope:            kernel = self.viariable_with_weight_decay(                name='weight',                shape=[self._kernel_height, self._kernel_width, conv1_kernel_num, conv2_kernel_num],                stddev=5e-2,                l2_decay=0.0)            # 卷积            conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')            # bias            bias = self.bias('bias', [conv2_kernel_num], tf.constant_initializer(0.1))            pre_activation = tf.nn.bias_add(conv, bias=bias)            # 激活函数 relu            conv2 = tf.nn.relu(pre_activation, name=scope.name)            # summary conv1            CIFAR10Inference.activation_summary(conv2)        # 对pool1 normalize        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1,                          alpha=0.001 / 9.0, beta=0.75,                          name='norm2')        # max pooling        pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],                               padding='SAME', name='pool1')        # local3 对卷积层进行全连接        with tf.variable_scope('local3') as scope:            reshape = tf.reshape(pool2,                                 shape=[self._batch_size, -1])            # 将pool2打平之后的一维向量            dim = reshape.get_shape()[1].value            weights = self.viariable_with_weight_decay('weights',                                                       shape=[dim, 384],                                                       stddev=0.04,                                                       l2_decay=0.004)            bias = self.bias('bias',                             shape=[384],                             initializer=tf.constant_initializer(0.1))            local3 = tf.nn.relu(tf.matmul(reshape, weights) + bias, name=scope.name)            self.activation_summary(local3)        # local4 全连接        with tf.variable_scope('local4') as scope:            weights = self.viariable_with_weight_decay('weights',                                                       shape=[384, 192],                                                       stddev=0.04,                                                       l2_decay=0.004)            bias = self.bias('bias',                             shape=[192],                             initializer=tf.constant_initializer(0.1))            local4 = tf.nn.relu(tf.matmul(local3, weights) + bias, name=scope.name)            self.activation_summary(local4)        # 最后一层softmax        with tf.variable_scope('softmax') as scope:            weights = self.viariable_with_weight_decay('weights',                                                       shape=[192, self._label_class_num],                                                       stddev=0.04,                                                       l2_decay=0.004)            bias = self.bias('bias',                             shape=[self._label_class_num],                             initializer=tf.constant_initializer(0.0))            softmax_linear = tf.add(tf.matmul(local4, weights), bias, name=scope.name)            self.activation_summary(softmax_linear)        return softmax_linear    def loss(self, logits, label):        label = tf.cast(label, tf.int64)        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(            labels=label,            logits=logits,            name='corss_entropy_per_exampel'        )        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')        tf.add_to_collection(self._loss_name, cross_entropy_mean)        # 最后将l2 loss叠加上        return tf.add_n(tf.get_collection(self._loss_name), name='total_loss')
原创粉丝点击