Tensorflow fine-tunning AlexNet

来源:互联网 发布:键盘钢琴软件 编辑:程序博客网 时间:2024/05/16 00:41

包括Tensorflow下的 AlexNet 网络模型搭建、测试、使用自己的数据做 fine-tunning、批量测试训练结果

主要参考文章《Finetuning AlexNet with TensorFlow》,文章《TensorFlow微调AlexNet》是对其的翻译

tensorflow 比 caffe 要快很多。。。


AlexNet网络搭建

每个卷积和池化层后添加了输出尺寸的计算,方便fine-tunning时对于不同尺寸(227*227)的处理

与Caffe需要指明 pad 值不同,Tensorflow通过 ‘VALID’ 和 ‘SAME’ 区分两种 padding 方法

  • VALID
    边界按情况填0,输出尺寸out_h=ceil(in_hfilter_h+1strides_h)
  • SAME
    不填0,输出尺寸out_h=ceil(in_hstrides_h)
def create(self):    # 1st Layer: Conv (w ReLu) -> Pool -> Lrn    conv1 = conv(self.X, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1')    pool1 = max_pool(conv1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1')    norm1 = lrn(pool1, 2, 2e-05, 0.75, name = 'norm1'    #    calc output size    out_h = int(math.ceil(float(self.IN_SIZE[0]-11+1)/4))    out_w = int(math.ceil(float(self.IN_SIZE[1]-11+1)/4))    out_h = int(math.ceil(float(out_h-3+1)/2))    out_w = int(math.ceil(float(out_w-3+1)/2))    # 2nd Layer: Conv (w ReLu) -> Pool -> Lrn with 2 groups    conv2 = conv(norm1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2')    pool2 = max_pool(conv2, 3, 3, 2, 2, padding = 'VALID', name ='pool2')    norm2 = lrn(pool2, 2, 2e-05, 0.75, name = 'norm2')    #    calc output size    out_h = int(math.ceil(float(out_h-3+1)/2))    out_w = int(math.ceil(float(out_w-3+1)/2))    # 3rd Layer: Conv (w ReLu)    conv3 = conv(norm2, 3, 3, 384, 1, 1, name = 'conv3')    # 4th Layer: Conv (w ReLu) splitted into two groups    conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4')    # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups    conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5')    pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5')    #    calc output size    out_h = int(math.ceil(float(out_h-3+1)/2))    out_w = int(math.ceil(float(out_w-3+1)/2))    # 6th Layer: Flatten -> FC (w ReLu) -> Dropout    flattened = tf.reshape(pool5, [-1, out_h*out_w*256])    fc6 = fc(flattened, out_h*out_w*256, 4096, name='fc6')    dropout6 = dropout(fc6, self.KEEP_PROB)    # 7th Layer: FC (w ReLu) -> Dropout    fc7 = fc(dropout6, 4096, 4096, name = 'fc7')    dropout7 = dropout(fc7, self.KEEP_PROB)    # 8th Layer: FC and return unscaled activations    # (for tf.nn.softmax_cross_entropy_with_logits)    self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu = False, name='fc8')

辅助函数

def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name, padding='SAME', groups=1):    # Get number of input channels    input_channels = int(x.get_shape()[-1])    # Create lambda function for the convolution    convolve = lambda i, k: tf.nn.conv2d(i, k,                                strides = [1, stride_y, stride_x, 1],                                padding = padding)    with tf.variable_scope(name) as scope:        # Create tf variables for the weights and biases of the conv layer        weights = tf.get_variable('weights',                                shape = [filter_height, filter_width,                                input_channels/groups, num_filters])        biases = tf.get_variable('biases', shape = [num_filters])        if groups == 1:            conv = convolve(x, weights)        # In the cases of multiple groups, split inputs & weights and        else:            # Split input and weights and convolve them separately            input_groups = tf.split(axis = 3, num_or_size_splits=groups, value=x)            weight_groups = tf.split(axis = 3, num_or_size_splits=groups, value=weights)            output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)]            # Concat the convolved output together again            conv = tf.concat(axis = 3, values = output_groups)        # Add biases        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())        # Apply relu function        relu = tf.nn.relu(bias, name = scope.name)        return reludef fc(x, num_in, num_out, name, relu = True):    with tf.variable_scope(name) as scope:        # Create tf variables for the weights and biases        weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True)        biases = tf.get_variable('biases', [num_out], trainable=True)        # Matrix multiply weights and inputs and add bias        act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)        if relu == True:            # Apply ReLu non linearity            relu = tf.nn.relu(act)            return relu        else:            return actdef max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):    return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],                              strides = [1, stride_y, stride_x, 1],                              padding = padding, name = name)def lrn(x, radius, alpha, beta, name, bias=1.0):    return tf.nn.local_response_normalization(x, depth_radius = radius,                                             alpha = alpha, beta = beta,                                             bias = bias, name = name)def dropout(x, keep_prob):    return tf.nn.dropout(x, keep_prob)

初始权重模型载入(.npy)

《caffe-tensorflow》提供了从 caffe 模型到 tensorflow 模型转换的工具
本文直接从这里下载.npy模型

def load_initial_weights(self, session):    # Load the weights into memory    weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item()    # Loop over all layer names stored in the weights dict    for op_name in weights_dict:        # Check if the layer is one of the layers that should be reinitialized       if op_name not in self.SKIP_LAYER:            with tf.variable_scope(op_name, reuse = True):                # Loop over list of weights/biases and assign them to their corresponding tf variable                for data in weights_dict[op_name]:                    # Biases                    if len(data.shape) == 1:                        var = tf.get_variable('biases', trainable = False)                        session.run(var.assign(data))                    # Weights                    else:                        var = tf.get_variable('weights', trainable = False)                        session.run(var.assign(data))

AlexNet 网络测试

class alexnet_test(object):    def __init__(self):        self.PRE_MODEL = 'bvlc_alexnet.npy'    def test_imagenet(self, imgs_):        num_classes = 1000        skip_layer = []        imgs = []        #mean of imagenet dataset in BGR        imagenet_mean = np.array([104., 117., 124.], dtype=np.float32)        #plot images        fig = plt.figure(figsize=(15,6))        for i, img_ in enumerate(imgs_):            img = cv2.imread(img_)            imgs.append(img)            fig.add_subplot(1,3,i+1)            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))            plt.axis('off')        #placeholder for input and dropout rate        x = tf.placeholder(tf.float32, [1, 227, 227, 3])        keep_prob = tf.placeholder(tf.float32)        #create model with default config ( == no skip_layer and 1000 units in the last layer)        model = alexnet(x, keep_prob, num_classes, skip_layer, weights_path=self.PRE_MODEL)        #define activation of last layer as score        score = model.fc8        #create op to calculate softmax         softmax = tf.nn.softmax(score)        with tf.Session() as sess:            # Initialize all variables            sess.run(tf.global_variables_initializer())            # Load the pretrained weights into the model            model.load_initial_weights(sess)            # Create figure handle            fig2 = plt.figure(figsize=(15,6))            # Loop over all images            for i, image in enumerate(imgs):                # Convert image to float32 and resize to (227x227)                img = cv2.resize(image.astype(np.float32), (227,227))                # Subtract the ImageNet mean                img -= imagenet_mean                # Reshape as needed to feed into model                img = img.reshape((1,227,227,3))                # Run the session and calculate the class probability                probs = sess.run(softmax, feed_dict={x: img, keep_prob: 1})                # Get the class name of the class with the highest probability                class_name = class_names[np.argmax(probs)]                # Plot image with class name and prob in the title                fig2.add_subplot(1,3,i+1)                plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))                plt.title("Class: " + class_name + ", probability: %.4f" %probs[0,np.argmax(probs)])                plt.axis('off')        plt.show()

fine-tunning

def fine_tuning(self, train_list, test_list, mean, snapshot, filewriter_path):    # Learning params    learning_rate = 0.001    num_epochs = 80000    batch_size = 50    # Network params    in_img_size = (332, 675) #(height, width)    dropout_rate = 0.5    num_classes = 6    train_layers = ['fc6', 'fc7', 'fc8']    # How often we want to write the tf.summary data to disk    display_step = 40    x = tf.placeholder(tf.float32, [batch_size, in_img_size[0], in_img_size[1], 3])    y = tf.placeholder(tf.float32, [None, num_classes])    keep_prob = tf.placeholder(tf.float32)    # Initialize model    model = alexnet(x, keep_prob, num_classes, train_layers, in_size=in_img_size    #link variable to model output    score = model.fc8    # List of trainable variables of the layers we want to train    var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]    # Op for calculating the loss    with tf.name_scope("cross_ent"):        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = score, labels = y))    # Train op    with tf.name_scope("train"):        # Get gradients of all trainable variables        gradients = tf.gradients(loss, var_list)        gradients = list(zip(gradients, var_list))        # Create optimizer and apply gradient descent to the trainable variables        optimizer = tf.train.GradientDescentOptimizer(learning_rate)        train_op = optimizer.apply_gradients(grads_and_vars=gradients)    # Add gradients to summary    for gradient, var in gradients:        tf.summary.histogram(var.name + '/gradient', gradient)    # Add the variables we train to the summary    for var in var_list:        tf.summary.histogram(var.name, var)    # Add the loss to summary    tf.summary.scalar('cross_entropy', loss)    # Evaluation op: Accuracy of the model    with tf.name_scope("accuracy"):        correct_pred = tf.equal(tf.argmax(score, 1), tf.argmax(y, 1))        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))    # Add the accuracy to the summary    tf.summary.scalar('accuracy', accuracy)    # Merge all summaries together    merged_summary = tf.summary.merge_all()    # Initialize the FileWriter    writer = tf.summary.FileWriter(filewriter_path)    # Initialize an saver for store model checkpoints    saver = tf.train.Saver()    # Initalize the data generator seperately for the training and validation set    train_generator = ImageDataGenerator(train_list, horizontal_flip = True, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)    val_generator = ImageDataGenerator(test_list, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)    # Get the number of training/validation steps per epoch    train_batches_per_epoch = np.floor(train_generator.data_size / batch_size).astype(np.int16)    val_batches_per_epoch = np.floor(val_generator.data_size / batch_size).astype(np.int16)    # Start Tensorflow session    with tf.Session() as sess:        # Initialize all variables        sess.run(tf.global_variables_initializer())        # Add the model graph to TensorBoard        writer.add_graph(sess.graph)        # Load the pretrained weights into the non-trainable layer        model.load_initial_weights(sess)        print("{} Start training...".format(datetime.now()))        print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path))        # Loop over number of epochs        for epoch in range(num_epochs):            print("{} Epoch number: {}/{}".format(datetime.now(), epoch+1, num_epochs))            step = 1            while step < train_batches_per_epoch:                # Get a batch of images and labels                batch_xs, batch_ys = train_generator.next_batch(batch_size)                # And run the training op                sess.run(train_op, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout_rate})                # Generate summary with the current batch of data and write to file                if step%display_step == 0:                    s = sess.run(merged_summary, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})                    writer.add_summary(s, epoch*train_batches_per_epoch + step)                step += 1            # Validate the model on the entire validation set            print("{} Start validation".format(datetime.now()))            test_acc = 0.            test_count = 0            for _ in range(val_batches_per_epoch):                batch_tx, batch_ty = val_generator.next_batch(batch_size)                acc = sess.run(accuracy, feed_dict={x: batch_tx, y: batch_ty, keep_prob: 1.})                test_acc += acc                test_count += 1            test_acc /= test_count            print("{} Validation Accuracy = {:.4f}".format(datetime.now(), test_acc))            # Reset the file pointer of the image data generator            val_generator.reset_pointer()            train_generator.reset_pointer()            print("{} Saving checkpoint of model...".format(datetime.now()))            #save checkpoint of the model            if epoch % display_step == 0:                checkpoint_name = os.path.join(snapshot, 'model_epoch'+str(epoch)+'.ckpt')                save_path = saver.save(sess, checkpoint_name)                print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))

另一个终端,tensorboard –logdir train_log,打开浏览器,127.0.0.1:6006 查看训练细节


batch predict

def predict_batch(self, val_list, mean, weight_file, result_file):    in_img_size = (332, 675) #(height, width)    dropout_rate = 0.5    num_classes = 6    train_layers = []    x = tf.placeholder(tf.float32, [1, in_img_size[0], in_img_size[1], 3])    model = alexnet(x, 1., num_classes, train_layers, in_size=in_img_size, weights_path=weight_file)    score = model.fc8    softmax = tf.nn.softmax(score)    val_generator = ImageDataGenerator(val_list, horizontal_flip = False, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)    precision = np.zeros((num_classes+1, num_classes), dtype=np.float)    total_presion = 0.    with tf.Session() as sess:        sess.run(tf.global_variables_initializer())        tf.train.Saver().restore(sess, weight_file)        self._start_end_time[0] = time.clock()        for index in range(val_generator.data_size):            print 'handing %d / %d ...\r'%(index+1, val_generator.data_size),            img_ = val_generator.images[index]            label = val_generator.labels[index]            img = cv2.imread(img_)            img = cv2.resize(img, (val_generator.scale_size[1], val_generator.scale_size[0]))            img = img.reshape(1, val_generator.scale_size[0], val_generator.scale_size[1], 3)            img = img.astype(np.float32)            probs = sess.run(softmax, feed_dict={x: img})            guess = np.argmax(probs)            if guess == label:                precision[guess][guess] += 1                total_presion += 1            else:                precision[guess][int(val_generator.labels[index])] += 1        self._start_end_time[1] = time.clock()        for i in range(num_classes):            for j in range(num_classes):                precision[num_classes][i] += precision[j][i]        for i in range(num_classes):            for j in range(num_classes):                precision[i][j] /= precision[num_classes][j]        total_presion /= val_generator.data_size        slaped = (self._start_end_time[1] - self._start_end_time[0]) / val_generator.data_size        file = open(result_file, 'w')        file.write('model: ' + weight_file + '\n')        print '\n#####################################################################'            file.writelines(['################################################################\n'])        text_ = ''        for i in range(num_classes):            print '        %d'%i,            text_ += '        %d'%i        print '\n'        file.write(text_ + '\n')        for i in range(num_classes):            print '  %d'%i,            file.write('  ' + str(i))            for j in range(num_classes):                str_preci = '    %.2f'%precision[i][j]                print '  %.2f  '%precision[i][j],                file.write(str_preci)            print '\n'            file.write('\n')        print '\ntotal precision: %.2f'%total_presion        print 'average speed: %.4f / image'%slaped        str_preci = 'total precision: %.2f'%total_presion        file.writelines(['\n' + str_preci + '\n'])        str_slaped = 'average speed: %.4f s / image'%slaped        file.write(str_slaped + '\n')        file.close()

代码:https://github.com/yayo13/tensorflow_finetunning_alexnet