本文复现AI challenger的官方baseline模型,数据通过tfrecord和队列来供给。
“This simple model consists of three convolutional layers, three max pool layers and two fully connected layers. Local response normalization and dropout are also used. Details of network structure is in”只是近似复现,因为数据的预处理,batch_size等部分不是完全一样。

注意:手头没有合适的GPU,这个代码的结果没有汇报。相比于上篇AI challenger 场景分类 train test softmax只做了少量更改。

# -*- coding: utf-8 -*-"""Created on Wed Sep 20 16:05:02 2017@author: wayneCHANGES- 复现场景分类官方baseline model, 注意对图片的resize处理不完全一样!!!TODO- NEXT (train_flag = True): 增加每训练一段时间显示一次验证准确率,即train_flag = True时需要load train和val.    训练结束显示整个训练集上的准确率?- NEXT: finetune基于imagenet的inception-resnet v2, senet等- NEXT: 调参和数据增强,模型复杂度, use log file, use input args 模块化等REFERENCES官方baseline 变量简单存储与恢复"""import tensorflow as tfimport timeimport jsonimport networkdef read_and_decode(tfrecords_file, batch_size, num_epochs):      filename_queue = tf.train.string_input_producer([tfrecord_file], num_epochs = num_epochs)      reader = tf.TFRecordReader()      _, serialized_example =      img_features = tf.parse_single_example(                                          serialized_example,                                          features={                                                 'label': tf.FixedLenFeature([], tf.int64),                                                 'h': tf.FixedLenFeature([], tf.int64),                                               'w': tf.FixedLenFeature([], tf.int64),                                               'c': tf.FixedLenFeature([], tf.int64),                                               'image': tf.FixedLenFeature([], tf.string),                                                 })      h = tf.cast(img_features['h'], tf.int32)    w = tf.cast(img_features['w'], tf.int32)    c = tf.cast(img_features['c'], tf.int32)    image = tf.decode_raw(img_features['image'], tf.uint8)      image = tf.reshape(image, [h, w, c])    label = tf.cast(img_features['label'],tf.int32)     #label = tf.reshape(label, [1])    ##########################################################      '''data augmentation here'''   #    distorted_image = tf.random_crop(images, [530, 530, img_channel])#    distorted_image = tf.image.random_flip_left_right(distorted_image)#    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)#    distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)    image = tf.image.resize_images(image, (image_size,image_size))    image = tf.image.per_image_standardization(image)  # '''Linearly scales image to have zero mean and unit norm.'''    image = tf.reshape(image, [image_size, image_size,  image_channel])    #image, label = tf.train.batch([image, label],  batch_size= batch_size)      ##########################################################    '''shuffle here'''    image_batch, label_batch = tf.train.shuffle_batch([image, label],                                                       batch_size= batch_size,                                                  num_threads= 64,    # 注意多线程有可能改变图片顺序                                                capacity = 2048,                                               min_after_dequeue= 64 #256                                                )    #print(type(label_batch))    return image_batch, label_batch # tf.reshape(label_batch, [batch_size]) def read_and_decode_test(tfrecords_file, batch_size, num_epochs):      filename_queue = tf.train.string_input_producer([tfrecord_file], num_epochs = num_epochs)      reader = tf.TFRecordReader()      _, serialized_example =      img_features = tf.parse_single_example(                                          serialized_example,                                          features={                                                 'label': tf.FixedLenFeature([], tf.int64),                                                 'h': tf.FixedLenFeature([], tf.int64),                                               'w': tf.FixedLenFeature([], tf.int64),                                               'c': tf.FixedLenFeature([], tf.int64),                                               'image': tf.FixedLenFeature([], tf.string),   #                                               'image_id': tf.FixedLenFeature([], tf.string)                                                                 })      h = tf.cast(img_features['h'], tf.int32)    w = tf.cast(img_features['w'], tf.int32)    c = tf.cast(img_features['c'], tf.int32)    image_id = img_features['image_id']    image = tf.decode_raw(img_features['image'], tf.uint8)      image = tf.reshape(image, [h, w, c])    label = tf.cast(img_features['label'],tf.int32)     #label = tf.reshape(label, [1])    ##########################################################      '''no data augmentation'''       image = tf.image.resize_images(image, (image_size,image_size))    image = tf.image.per_image_standardization(image)    image = tf.reshape(image, [image_size, image_size, image_channel])    #image, label = tf.train.batch([image, label],  batch_size= batch_size)      image_batch, label_batch, image_id_batch= tf.train.batch([image, label, image_id],                                                   batch_size= batch_size,                                                  num_threads= 64,    # 注意多线程有可能改变图片顺序                                                capacity = 2048)      #print(type(label_batch))    return image_batch, label_batch, image_id_batch def batch_to_list_of_dicts(indices2, image_id_batch2):    result = [] #[{"image_id":"a0563eadd9ef79fcc137e1c60be29f2f3c9a65ea.jpg","label_id": [5,18,32]}]    dict_ = {}    for item in range(batch_size):        dict_ ['image_id'] = image_id_batch2[item].decode()        dict_['label_id'] = indices2[item,:].tolist()        result.append(dict_)        dict_ = {}    return resultdef read_tfrecord2(tfrecord_file, batch_size, train_flag):#    weights = tf.Variable(#    tf.truncated_normal([image_size * image_size * image_channel, num_labels]))#    biases = tf.Variable(tf.zeros([num_labels]))    #因为test有image_id,否则和train共用输入函数就行了。另外read_and_decode训练中会加入data augmentation,因此验证集和测试集均用第二个函数    if train_flag:        train_batch, train_label_batch = read_and_decode(tfrecord_file, batch_size, num_epochs)#        val_test_batch, val_test_label_batch, image_id_batch= read_and_decode_test(tfrecord_file_val, batch_size, 1)  #每次用val的时候整个数据过一遍,下次又用怎么办?        # Variables.#        # Training computation.#        logits = tf.matmul(train_batch, weights) + biases#        # : 张量流tf.nn.softmax和tf.nn.softmax_cross_entropy_with_logits之间的差异#        loss = tf.reduce_mean(#            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_label_batch, logits=logits))      #        # Optimizer.#        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)              optimizer, loss, logits, keep_prob = network.inference(train_batch, train_label_batch)        # Predictions for the training        train_prediction = tf.nn.softmax(logits)        '''minibatch accuracy, non-streaming'''        accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(predictions = logits, targets=train_label_batch, k=3),tf.float32))    else:        val_test_batch, val_test_label_batch, image_id_batch= read_and_decode_test(tfrecord_file, batch_size, num_epochs) #        val_test_logits =  tf.matmul(val_test_batch, weights) + biases#        val_test_prediction = tf.nn.softmax(val_test_logits)        val_test_optimizer, val_test_loss, val_test_logits, val_test_keep_prob = network.inference(val_test_batch, val_test_label_batch)        '''Useless minibatch accuracy, non-streaming'''        # correct = tf.nn.in_top_k(logits, labels, k)        # tf.nn.in_top_k的用法        val_test_accuracy_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(predictions = val_test_logits, targets=val_test_label_batch, k=3),tf.float32))        '''不是minibatch accuracy'''            val_test_accuracy, val_test_accuracy_update= tf.metrics.mean(tf.cast(tf.nn.in_top_k(predictions = val_test_logits, targets=val_test_label_batch, k=3),tf.float32))        #        #    Implementing non streaming accuracy is simple, ex:        #    tf.reduce_mean(tf.to_float32(predictions == labels))        values, indices = tf.nn.top_k(val_test_logits, 3)    saver = tf.train.Saver() # 生成saver#    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)#    config = tf.ConfigProto()#    config.gpu_options.allow_growth=True    with tf.Session() as sess:        #, tf.local_variables_initializer()))        print("Initialized")        coord = tf.train.Coordinator()        threads = tf.train.start_queue_runners(coord=coord)        if train_flag:            try:                step = 0                start_time = time.time()                while not coord.should_stop():                    _, l, predictions, logits2, acc=[optimizer, loss, train_prediction, logits, accuracy], feed_dict={keep_prob: 0.5})                    step += 1                    if (step % 100 == 0):                        l=, feed_dict={keep_prob: 1})                        acc=, feed_dict={keep_prob: 1})                        duration = time.time() - start_time                        print("Minibatch loss at step %d: %.6f (%.3f sec)" % (step, l, duration))                        print("Minibatch accuracy: %.6f" % acc)                    #if (step % 100 == 0):                    #Validating accuracy                    if (step % 2000 ==0):              , checkfile, global_step=step)                        print('writing checkpoint at step %s' % step)            except tf.errors.OutOfRangeError:                print('Done training for %d epochs, %d steps.' % (num_epochs, step))                #Final Training accuracy                #Final Validating accuracy      , "save_path/final_model.ckpt")            finally:                        coord.request_stop()        else:#            # read a batch of test set to verify the input function#            val_test_batch22, val_test_label_batch22, image_id_batch22 =[val_test_batch, val_test_label_batch, image_id_batch])#            print(val_test_batch22.shape) #(8, 43200)#            print(val_test_label_batch22.shape) #(8,)#            print(image_id_batch22)#            print(type(image_id_batch22[0])) # bytes#            print(type(image_id_batch22[0].decode())) # str#            coord.request_stop()            saver.restore(sess, "save_path/final_model.ckpt") #会将已经保存的变量值resotre到 变量中。            results = []            try:                step = 0                start_time = time.time()                while not coord.should_stop():                    val_test_logits2, val_test_acc2_batch, val_test_acc2, val_test_acc2_update,image_id_batch2, indices2, values2=[val_test_logits, val_test_accuracy_batch, val_test_accuracy, val_test_accuracy_update, image_id_batch, indices, values], feed_dict={keep_prob: 1})                    step += 1                    results += batch_to_list_of_dicts(indices2, image_id_batch2)                    if (step % 10 == 0):                        print('Useless minibatch testing accuracy at step %d: %.6f' % (step, val_test_acc2_batch))                        #print(val_test_logits2[0])                        #print(indices2[0])                        #print(values2[0])                        #print(val_test_predictions2[0])                        #print(val_test_acc2)                        #print('Useless streaming testing accuracy at step %d: %.6f' % (step, val_test_acc2))            except tf.errors.OutOfRangeError:                print('Done testing in, %d steps.' % (step))                print('FInal Testing accuracy: %.6f' % (val_test_acc2_update))                '''Writing JSON data'''                #results = [{"image_id":"a0563eadd9ef79fcc137e1c60be29f2f3c9a65ea.jpg","label_id": [5,18,32]}]                print(len(results))                print(results[0:20])                with open('submit.json', 'w') as f:                    json.dump(results, f)            finally:                        coord.request_stop()        coord.join(threads)train_flag = Trueimage_size = 128num_labels = 80image_channel = 3checkfile = 'save_path/model.ckpt'#max_step = 65000if train_flag:    tfrecord_file = '../ai_challenger_scene_train_20170904/train.tfrecord'#    tfrecord_file_val = '../ai_challenger_scene_train_20170904/val.tfrecord' # validate while training    batch_size = 32# 我电脑可以128    num_epochs = 38    print('max step num is %.1f' % (num_epochs*53879.0/batch_size))    read_tfrecord2(tfrecord_file, batch_size, train_flag)else:    tfrecord_file = '../ai_challenger_scene_train_20170904/val.tfrecord'  #test    batch_size = 16 # 要求metric能累加起来, 除不尽的话最后不足的,不够一个batch的部分不会被使用!!!    num_epochs = 1    read_tfrecord2(tfrecord_file, batch_size, train_flag)#    with open('submit.json', 'r') as file1:#        submit_data = json.load(file1)#    with open('scene_validation_annotations_20170908.json', 'r') as file2:#        ref_data1 = json.load(file2)#    with open('ref.json', 'r') as file2:#        ref_data2 = json.load(file2)#    with open('submit0.json', 'r') as file3:#        submit0_data = json.load(file3)# 53879  7120

#!/usr/bin/python# -*- coding: UTF-8 -*-# Copyright 2017 Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.'''Description:Network structure of a simple CNN network like AlexnetCHANGES:Comments left by Yan Wang according to (python API r1.3)one_hot_labels 改为 original_labels思考batch normalization 加哪'''import tensorflow as tfLEARNINGRATE = 1e-3#more than 2 standard deviations from the mean are dropped and re-pickeddef weight_variable(shape, stddev=0.1):    initial = tf.truncated_normal(shape, stddev=stddev)       return tf.Variable(initial)def bias_variable(shape, bais=0.1):    initial = tf.constant(bais, shape=shape)    return tf.Variable(initial)#input tensor of shape [batch, in_height, in_width, in_channels]#filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]#strides: A list of ints. 1-D tensor of length 4. With the default NHWC format, must have strides[0] = strides[3] = 1.#padding: A string from: "SAME", "VALID". The type of padding algorithm to use.def conv2d(x, w):    return tf.nn.conv2d(x, w, [1, 1, 1, 1], 'SAME')#ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.#strides: A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor.def max_pool_2x2(x):    return tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')def max_pool_3x3(x):    return tf.nn.max_pool(x, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME')def avg_pool_3x3(x):    return tf.nn.avg_pool(x, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME')def inference(features, original_labels):    # network structure    # conv1    W_conv1 = weight_variable([5, 5, 3, 64], stddev=1e-4)  #[filter_height, filter_width, in_channels, out_channels]    b_conv1 = bias_variable([64])    h_conv1 = tf.nn.relu(conv2d(features, W_conv1) + b_conv1)    h_pool1 = max_pool_3x3(h_conv1)    # norm1    norm1 = tf.nn.lrn(h_pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')  #'''tf.nn.local_response_normalization'''     # conv2    W_conv2 = weight_variable([5, 5, 64, 64], stddev=1e-2)    b_conv2 = bias_variable([64])    h_conv2 = tf.nn.relu(conv2d(norm1, W_conv2) + b_conv2)    # norm2    norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')    h_pool2 = max_pool_3x3(norm2)    # conv3    W_conv3 = weight_variable([5, 5, 64, 64], stddev=1e-2)    b_conv3 = bias_variable([64])    h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)    h_pool3 = max_pool_3x3(h_conv3)    # fc1    W_fc1 = weight_variable([16 * 16 * 64, 128])    #'''16*16的计算'''    b_fc1 = bias_variable([128])    h_pool3_flat = tf.reshape(h_pool3, [-1, 16*16*64])    h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)    # introduce dropout    keep_prob = tf.placeholder("float")    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)  #'''tf.nn.dropout'''    # fc2    W_fc2 = weight_variable([128, 80])    #'''还是128'''    b_fc2 = bias_variable([80])    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2    # calculate loss    cross_entropy = tf.reduce_mean(        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=original_labels, logits=y_conv))    train_step = tf.train.AdamOptimizer(LEARNINGRATE).minimize(cross_entropy)    return train_step, cross_entropy, y_conv, keep_prob
