TensorFlow学习日记22

来源:互联网 发布:知乎 第三方存管账户 编辑:程序博客网 时间:2024/06/12 21:36

1. MNIST MLP

解析:

'''Trains a simple deep NN on the MNIST dataset.'''from __future__ import print_functionimport kerasfrom keras.datasets import mnistfrom keras.models import Sequentialfrom keras.layers import Dense, Dropoutfrom keras.optimizers import RMSpropbatch_size = 128num_classes = 10epochs = 20# the data, shuffled and split between train and test sets(x_train, y_train), (x_test, y_test) = mnist.load_data()x_train = x_train.reshape(60000, 784)x_test = x_test.reshape(10000, 784)x_train = x_train.astype('float32')x_test = x_test.astype('float32')x_train /= 255x_test /= 255print(x_train.shape[0], 'train samples')print(x_test.shape[0], 'test samples')# convert class vectors to binary class matricesy_train = keras.utils.to_categorical(y_train, num_classes)y_test = keras.utils.to_categorical(y_test, num_classes)model = Sequential()model.add(Dense(512, activation='relu', input_shape=(784,)))model.add(Dropout(0.2))model.add(Dense(512, activation='relu'))model.add(Dropout(0.2))model.add(Dense(num_classes, activation='softmax'))model.summary()model.compile(loss='categorical_crossentropy',              optimizer=RMSprop(),              metrics=['accuracy'])history = model.fit(x_train, y_train,                    batch_size=batch_size,                    epochs=epochs,                    verbose=1,                    validation_data=(x_test, y_test))score = model.evaluate(x_test, y_test, verbose=0)print('Test loss:', score[0])print('Test accuracy:', score[1])


2. MNIST CNN
解析:

'''Trains a simple convnet on the MNIST dataset.'''from __future__ import print_functionimport kerasfrom keras.datasets import mnistfrom keras.models import Sequentialfrom keras.layers import Dense, Dropout, Flattenfrom keras.layers import Conv2D, MaxPooling2Dfrom keras import backend as Kbatch_size = 128num_classes = 10epochs = 12# input image dimensionsimg_rows, img_cols = 28, 28# the data, shuffled and split between train and test sets(x_train, y_train), (x_test, y_test) = mnist.load_data()if K.image_data_format() == 'channels_first':    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)    input_shape = (1, img_rows, img_cols)else:    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)    input_shape = (img_rows, img_cols, 1)x_train = x_train.astype('float32')x_test = x_test.astype('float32')x_train /= 255x_test /= 255print('x_train shape:', x_train.shape)print(x_train.shape[0], 'train samples')print(x_test.shape[0], 'test samples')# convert class vectors to binary class matricesy_train = keras.utils.to_categorical(y_train, num_classes)y_test = keras.utils.to_categorical(y_test, num_classes)model = Sequential()model.add(Conv2D(32, kernel_size=(3, 3),                 activation='relu',                 input_shape=input_shape))model.add(Conv2D(64, (3, 3), activation='relu'))model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Dropout(0.25))model.add(Flatten())model.add(Dense(128, activation='relu'))model.add(Dropout(0.5))model.add(Dense(num_classes, activation='softmax'))model.compile(loss=keras.losses.categorical_crossentropy,              optimizer=keras.optimizers.Adadelta(),              metrics=['accuracy'])model.fit(x_train, y_train,          batch_size=batch_size,          epochs=epochs,          verbose=1,          validation_data=(x_test, y_test))score = model.evaluate(x_test, y_test, verbose=0)print('Test loss:', score[0])print('Test accuracy:', score[1])


3. CIFAR10 CNN

解析:

'''Train a simple deep CNN on the CIFAR10 small images dataset.'''from __future__ import print_functionimport kerasfrom keras.datasets import cifar10from keras.preprocessing.image import ImageDataGeneratorfrom keras.models import Sequentialfrom keras.layers import Dense, Dropout, Activation, Flattenfrom keras.layers import Conv2D, MaxPooling2Dimport numpy as npimport osbatch_size = 32num_classes = 10epochs = 200data_augmentation = Truenum_predictions = 20save_dir = os.path.join(os.getcwd(), 'saved_models')model_name = 'keras_cifar10_trained_model.h5'# The data, shuffled and split between train and test sets:(x_train, y_train), (x_test, y_test) = cifar10.load_data()print('x_train shape:', x_train.shape)print(x_train.shape[0], 'train samples')print(x_test.shape[0], 'test samples')# Convert class vectors to binary class matrices.y_train = keras.utils.to_categorical(y_train, num_classes)y_test = keras.utils.to_categorical(y_test, num_classes)model = Sequential()model.add(Conv2D(32, (3, 3), padding='same',                 input_shape=x_train.shape[1:]))model.add(Activation('relu'))model.add(Conv2D(32, (3, 3)))model.add(Activation('relu'))model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Dropout(0.25))model.add(Conv2D(64, (3, 3), padding='same'))model.add(Activation('relu'))model.add(Conv2D(64, (3, 3)))model.add(Activation('relu'))model.add(MaxPooling2D(pool_size=(2, 2)))model.add(Dropout(0.25))model.add(Flatten())model.add(Dense(512))model.add(Activation('relu'))model.add(Dropout(0.5))model.add(Dense(num_classes))model.add(Activation('softmax'))# initiate RMSprop optimizeropt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)# Let's train the model using RMSpropmodel.compile(loss='categorical_crossentropy',              optimizer=opt,              metrics=['accuracy'])x_train = x_train.astype('float32')x_test = x_test.astype('float32')x_train /= 255x_test /= 255if not data_augmentation:    print('Not using data augmentation.')    model.fit(x_train, y_train,              batch_size=batch_size,              epochs=epochs,              validation_data=(x_test, y_test),              shuffle=True)else:    print('Using real-time data augmentation.')    # This will do preprocessing and realtime data augmentation:    datagen = ImageDataGenerator(        featurewise_center=False,  # set input mean to 0 over the dataset        samplewise_center=False,  # set each sample mean to 0        featurewise_std_normalization=False,  # divide inputs by std of the dataset        samplewise_std_normalization=False,  # divide each input by its std        zca_whitening=False,  # apply ZCA whitening        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)        horizontal_flip=True,  # randomly flip images        vertical_flip=False)  # randomly flip images    # Compute quantities required for feature-wise normalization    # (std, mean, and principal components if ZCA whitening is applied).    datagen.fit(x_train)    # Fit the model on the batches generated by datagen.flow().    model.fit_generator(datagen.flow(x_train, y_train,                                     batch_size=batch_size),                        steps_per_epoch=int(np.ceil(x_train.shape[0] / float(batch_size))),                        epochs=epochs,                        validation_data=(x_test, y_test),                        workers=4)# Save model and weightsif not os.path.isdir(save_dir):    os.makedirs(save_dir)model_path = os.path.join(save_dir, model_name)model.save(model_path)print('Saved trained model at %s ' % model_path)# Score trained model.scores = model.evaluate(x_test, y_test, verbose=1)print('Test loss:', scores[0])print('Test accuracy:', scores[1])


4. CIFAR10 ResNet

解析:

"""Trains a ResNet on the CIFAR10 dataset."""from __future__ import print_functionimport kerasfrom keras.layers import Dense, Conv2D, BatchNormalization, Activationfrom keras.layers import MaxPooling2D, AveragePooling2D, Input, Flattenfrom keras.optimizers import Adamfrom keras.callbacks import ModelCheckpoint, ReduceLROnPlateaufrom keras.preprocessing.image import ImageDataGeneratorfrom keras.regularizers import l2from keras import backend as Kfrom keras.models import Modelfrom keras.datasets import cifar10import numpy as npimport os# Training params.batch_size = 32epochs = 100data_augmentation = True# Network architecture params.num_classes = 10num_filters = 64num_blocks = 4num_sub_blocks = 2use_max_pool = False# Load the CIFAR10 data.(x_train, y_train), (x_test, y_test) = cifar10.load_data()# Input image dimensions.# We assume data format "channels_last".img_rows = x_train.shape[1]img_cols = x_train.shape[2]channels = x_train.shape[3]if K.image_data_format() == 'channels_first':    img_rows = x_train.shape[2]    img_cols = x_train.shape[3]    channels = x_train.shape[1]    x_train = x_train.reshape(x_train.shape[0], channels, img_rows, img_cols)    x_test = x_test.reshape(x_test.shape[0], channels, img_rows, img_cols)    input_shape = (channels, img_rows, img_cols)else:    img_rows = x_train.shape[1]    img_cols = x_train.shape[2]    channels = x_train.shape[3]    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels)    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels)    input_shape = (img_rows, img_cols, channels)# Normalize data.x_train = x_train.astype('float32') / 255x_test = x_test.astype('float32') / 255print('x_train shape:', x_train.shape)print(x_train.shape[0], 'train samples')print(x_test.shape[0], 'test samples')print('y_train shape:', y_train.shape)# Convert class vectors to binary class matrices.y_train = keras.utils.to_categorical(y_train, num_classes)y_test = keras.utils.to_categorical(y_test, num_classes)# Start model definition.inputs = Input(shape=input_shape)x = Conv2D(num_filters,           kernel_size=7,           padding='same',           strides=2,           kernel_initializer='he_normal',           kernel_regularizer=l2(1e-4))(inputs)x = BatchNormalization()(x)x = Activation('relu')(x)# Orig paper uses max pool after 1st conv.# Reaches up 87% acc if use_max_pool = True.# Cifar10 images are already too small at 32x32 to be maxpooled. So, we skip.if use_max_pool:    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)    num_blocks = 3# Instantiate convolutional base (stack of blocks).for i in range(num_blocks):    for j in range(num_sub_blocks):        strides = 1        is_first_layer_but_not_first_block = j == 0 and i > 0        if is_first_layer_but_not_first_block:            strides = 2        y = Conv2D(num_filters,                   kernel_size=3,                   padding='same',                   strides=strides,                   kernel_initializer='he_normal',                   kernel_regularizer=l2(1e-4))(x)        y = BatchNormalization()(y)        y = Activation('relu')(y)        y = Conv2D(num_filters,                   kernel_size=3,                   padding='same',                   kernel_initializer='he_normal',                   kernel_regularizer=l2(1e-4))(y)        y = BatchNormalization()(y)        if is_first_layer_but_not_first_block:            x = Conv2D(num_filters,                       kernel_size=1,                       padding='same',                       strides=2,                       kernel_initializer='he_normal',                       kernel_regularizer=l2(1e-4))(x)        x = keras.layers.add([x, y])        x = Activation('relu')(x)    num_filters = 2 * num_filters# Add classifier on top.x = AveragePooling2D()(x)y = Flatten()(x)outputs = Dense(num_classes,                activation='softmax',                kernel_initializer='he_normal')(y)# Instantiate and compile model.model = Model(inputs=inputs, outputs=outputs)model.compile(loss='categorical_crossentropy',              optimizer=Adam(),              metrics=['accuracy'])model.summary()# Prepare model model saving directory.save_dir = os.path.join(os.getcwd(), 'saved_models')model_name = 'cifar10_resnet_model.h5'if not os.path.isdir(save_dir):    os.makedirs(save_dir)filepath = os.path.join(save_dir, model_name)# Prepare callbacks for model saving and for learning rate decaying.checkpoint = ModelCheckpoint(filepath=filepath,                             verbose=1,                             save_best_only=True)lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),                               cooldown=0,                               patience=5,                               min_lr=0.5e-6)callbacks = [checkpoint, lr_reducer]# Run training, with or without data augmentation.if not data_augmentation:    print('Not using data augmentation.')    model.fit(x_train, y_train,              batch_size=batch_size,              epochs=epochs,              validation_data=(x_test, y_test),              shuffle=True,              callbacks=callbacks)else:    print('Using real-time data augmentation.')    # This will do preprocessing and realtime data augmentation:    datagen = ImageDataGenerator(        featurewise_center=False,  # set input mean to 0 over the dataset        samplewise_center=False,  # set each sample mean to 0        featurewise_std_normalization=False,  # divide inputs by std of the dataset        samplewise_std_normalization=False,  # divide each input by its std        zca_whitening=False,  # apply ZCA whitening        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)        horizontal_flip=True,  # randomly flip images        vertical_flip=False)  # randomly flip images    # Compute quantities required for featurewise normalization    # (std, mean, and principal components if ZCA whitening is applied).    datagen.fit(x_train)    # Fit the model on the batches generated by datagen.flow().    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),                        steps_per_epoch=int(np.ceil(x_train.shape[0] / float(batch_size))),                        validation_data=(x_test, y_test),                        epochs=epochs, verbose=1, workers=4,                        callbacks=callbacks)# Score trained model.scores = model.evaluate(x_test, y_test, verbose=1)print('Test loss:', scores[0])print('Test accuracy:', scores[1])


5. CONV LSTM

解析:

"""This script demonstrates the use of a convolutional LSTM network.This network is used to predict the next frame of an artificiallygenerated movie which contains moving squares."""from keras.models import Sequentialfrom keras.layers.convolutional import Conv3Dfrom keras.layers.convolutional_recurrent import ConvLSTM2Dfrom keras.layers.normalization import BatchNormalizationimport numpy as npimport pylab as plt# We create a layer which take as input movies of shape# (n_frames, width, height, channels) and returns a movie# of identical shape.seq = Sequential()seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),                   input_shape=(None, 40, 40, 1),                   padding='same', return_sequences=True))seq.add(BatchNormalization())seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),                   padding='same', return_sequences=True))seq.add(BatchNormalization())seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),                   padding='same', return_sequences=True))seq.add(BatchNormalization())seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),                   padding='same', return_sequences=True))seq.add(BatchNormalization())seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3),               activation='sigmoid',               padding='same', data_format='channels_last'))seq.compile(loss='binary_crossentropy', optimizer='adadelta')# Artificial data generation:# Generate movies with 3 to 7 moving squares inside.# The squares are of shape 1x1 or 2x2 pixels,# which move linearly over time.# For convenience we first create movies with bigger width and height (80x80)# and at the end we select a 40x40 window.def generate_movies(n_samples=1200, n_frames=15):    row = 80    col = 80    noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)    shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),                              dtype=np.float)    for i in range(n_samples):        # Add 3 to 7 moving squares        n = np.random.randint(3, 8)        for j in range(n):            # Initial position            xstart = np.random.randint(20, 60)            ystart = np.random.randint(20, 60)            # Direction of motion            directionx = np.random.randint(0, 3) - 1            directiony = np.random.randint(0, 3) - 1            # Size of the square            w = np.random.randint(2, 4)            for t in range(n_frames):                x_shift = xstart + directionx * t                y_shift = ystart + directiony * t                noisy_movies[i, t, x_shift - w: x_shift + w,                y_shift - w: y_shift + w, 0] += 1                # Make it more robust by adding noise.                # The idea is that if during inference,                # the value of the pixel is not exactly one,                # we need to train the network to be robust and still                # consider it as a pixel belonging to a square.                if np.random.randint(0, 2):                    noise_f = (-1) ** np.random.randint(0, 2)                    noisy_movies[i, t,                    x_shift - w - 1: x_shift + w + 1,                    y_shift - w - 1: y_shift + w + 1,                    0] += noise_f * 0.1                # Shift the ground truth by 1                x_shift = xstart + directionx * (t + 1)                y_shift = ystart + directiony * (t + 1)                shifted_movies[i, t, x_shift - w: x_shift + w,                y_shift - w: y_shift + w, 0] += 1    # Cut to a 40x40 window    noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]    shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]    noisy_movies[noisy_movies >= 1] = 1    shifted_movies[shifted_movies >= 1] = 1    return noisy_movies, shifted_movies# Train the networknoisy_movies, shifted_movies = generate_movies(n_samples=1200)seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,        epochs=300, validation_split=0.05)# Testing the network on one movie# feed it with the first 7 positions and then# predict the new positionswhich = 1004track = noisy_movies[which][:7, ::, ::, ::]for j in range(16):    new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])    new = new_pos[::, -1, ::, ::, ::]    track = np.concatenate((track, new), axis=0)# And then compare the predictions# to the ground truthtrack2 = noisy_movies[which][::, ::, ::, ::]for i in range(15):    fig = plt.figure(figsize=(10, 5))    ax = fig.add_subplot(121)    if i >= 7:        ax.text(1, 3, 'Predictions !', fontsize=20, color='w')    else:        ax.text(1, 3, 'Initial trajectory', fontsize=20)    toplot = track[i, ::, ::, 0]    plt.imshow(toplot)    ax = fig.add_subplot(122)    plt.text(1, 3, 'Ground truth', fontsize=20)    toplot = track2[i, ::, ::, 0]    if i >= 2:        toplot = shifted_movies[which][i - 1, ::, ::, 0]    plt.imshow(toplot)    plt.savefig('%i_animate.png' % (i + 1))


6. Image OCR

解析:

# -*- coding: utf-8 -*-'''This example uses a convolutional stack followed by a recurrent stackand a CTC logloss function to perform optical character recognitionof generated text images.'''import osimport itertoolsimport codecsimport reimport datetimeimport cairocffi as cairoimport editdistanceimport numpy as npfrom scipy import ndimageimport pylabfrom keras import backend as Kfrom keras.layers.convolutional import Conv2D, MaxPooling2Dfrom keras.layers import Input, Dense, Activationfrom keras.layers import Reshape, Lambdafrom keras.layers.merge import add, concatenatefrom keras.models import Modelfrom keras.layers.recurrent import GRUfrom keras.optimizers import SGDfrom keras.utils.data_utils import get_filefrom keras.preprocessing import imageimport keras.callbacksOUTPUT_DIR = 'image_ocr'# character classes and matching regex filterregex = r'^[a-z ]+$'alphabet = u'abcdefghijklmnopqrstuvwxyz 'np.random.seed(55)# this creates larger "blotches" of noise which look# more realistic than just adding gaussian noise# assumes greyscale with pixels ranging from 0 to 1def speckle(img):    severity = np.random.uniform(0, 0.6)    blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1)    img_speck = (img + blur)    img_speck[img_speck > 1] = 1    img_speck[img_speck <= 0] = 0    return img_speck# paints the string in a random location the bounding box# also uses a random font, a slight random rotation,# and a random amount of speckle noisedef paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):    surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)    with cairo.Context(surface) as context:        context.set_source_rgb(1, 1, 1)  # White        context.paint()        # this font list works in CentOS 7        if multi_fonts:            fonts = ['Century Schoolbook', 'Courier', 'STIX', 'URW Chancery L', 'FreeMono']            context.select_font_face(np.random.choice(fonts), cairo.FONT_SLANT_NORMAL,                                     np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))        else:            context.select_font_face('Courier', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD)        context.set_font_size(25)        box = context.text_extents(text)        border_w_h = (4, 4)        if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):            raise IOError('Could not fit string into image. Max char count is too large for given image width.')        # teach the RNN translational invariance by        # fitting text box randomly on canvas, with some room to rotate        max_shift_x = w - box[2] - border_w_h[0]        max_shift_y = h - box[3] - border_w_h[1]        top_left_x = np.random.randint(0, int(max_shift_x))        if ud:            top_left_y = np.random.randint(0, int(max_shift_y))        else:            top_left_y = h // 2        context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))        context.set_source_rgb(0, 0, 0)        context.show_text(text)    buf = surface.get_data()    a = np.frombuffer(buf, np.uint8)    a.shape = (h, w, 4)    a = a[:, :, 0]  # grab single channel    a = a.astype(np.float32) / 255    a = np.expand_dims(a, 0)    if rotate:        a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)    a = speckle(a)    return adef shuffle_mats_or_lists(matrix_list, stop_ind=None):    ret = []    assert all([len(i) == len(matrix_list[0]) for i in matrix_list])    len_val = len(matrix_list[0])    if stop_ind is None:        stop_ind = len_val    assert stop_ind <= len_val    a = list(range(stop_ind))    np.random.shuffle(a)    a += list(range(stop_ind, len_val))    for mat in matrix_list:        if isinstance(mat, np.ndarray):            ret.append(mat[a])        elif isinstance(mat, list):            ret.append([mat[i] for i in a])        else:            raise TypeError('`shuffle_mats_or_lists` only supports '                            'numpy.array and list objects.')    return ret# Translation of characters to unique integer valuesdef text_to_labels(text):    ret = []    for char in text:        ret.append(alphabet.find(char))    return ret# Reverse translation of numerical classes back to charactersdef labels_to_text(labels):    ret = []    for c in labels:        if c == len(alphabet):  # CTC Blank            ret.append("")        else:            ret.append(alphabet[c])    return "".join(ret)# only a-z and space..probably not to difficult# to expand to uppercase and symbolsdef is_valid_str(in_str):    search = re.compile(regex, re.UNICODE).search    return bool(search(in_str))# Uses generator functions to supply train/test with# data. Image renderings are text are created on the fly# each time with random perturbationsclass TextImageGenerator(keras.callbacks.Callback):    def __init__(self, monogram_file, bigram_file, minibatch_size,                 img_w, img_h, downsample_factor, val_split,                 absolute_max_string_len=16):        self.minibatch_size = minibatch_size        self.img_w = img_w        self.img_h = img_h        self.monogram_file = monogram_file        self.bigram_file = bigram_file        self.downsample_factor = downsample_factor        self.val_split = val_split        self.blank_label = self.get_output_size() - 1        self.absolute_max_string_len = absolute_max_string_len    def get_output_size(self):        return len(alphabet) + 1    # num_words can be independent of the epoch size due to the use of generators    # as max_string_len grows, num_words can grow    def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5):        assert max_string_len <= self.absolute_max_string_len        assert num_words % self.minibatch_size == 0        assert (self.val_split * num_words) % self.minibatch_size == 0        self.num_words = num_words        self.string_list = [''] * self.num_words        tmp_string_list = []        self.max_string_len = max_string_len        self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1        self.X_text = []        self.Y_len = [0] * self.num_words        # monogram file is sorted by frequency in english speech        with codecs.open(self.monogram_file, mode='rt', encoding='utf-8') as f:            for line in f:                if len(tmp_string_list) == int(self.num_words * mono_fraction):                    break                word = line.rstrip()                if max_string_len == -1 or max_string_len is None or len(word) <= max_string_len:                    tmp_string_list.append(word)        # bigram file contains common word pairings in english speech        with codecs.open(self.bigram_file, mode='rt', encoding='utf-8') as f:            lines = f.readlines()            for line in lines:                if len(tmp_string_list) == self.num_words:                    break                columns = line.lower().split()                word = columns[0] + ' ' + columns[1]                if is_valid_str(word) and \                        (max_string_len == -1 or max_string_len is None or len(word) <= max_string_len):                    tmp_string_list.append(word)        if len(tmp_string_list) != self.num_words:            raise IOError('Could not pull enough words from supplied monogram and bigram files. ')        # interlace to mix up the easy and hard words        self.string_list[::2] = tmp_string_list[:self.num_words // 2]        self.string_list[1::2] = tmp_string_list[self.num_words // 2:]        for i, word in enumerate(self.string_list):            self.Y_len[i] = len(word)            self.Y_data[i, 0:len(word)] = text_to_labels(word)            self.X_text.append(word)        self.Y_len = np.expand_dims(np.array(self.Y_len), 1)        self.cur_val_index = self.val_split        self.cur_train_index = 0    # each time an image is requested from train/val/test, a new random    # painting of the text is performed    def get_batch(self, index, size, train):        # width and height are backwards from typical Keras convention        # because width is the time dimension when it gets fed into the RNN        if K.image_data_format() == 'channels_first':            X_data = np.ones([size, 1, self.img_w, self.img_h])        else:            X_data = np.ones([size, self.img_w, self.img_h, 1])        labels = np.ones([size, self.absolute_max_string_len])        input_length = np.zeros([size, 1])        label_length = np.zeros([size, 1])        source_str = []        for i in range(size):            # Mix in some blank inputs.  This seems to be important for            # achieving translational invariance            if train and i > size - 4:                if K.image_data_format() == 'channels_first':                    X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T                else:                    X_data[i, 0:self.img_w, :, 0] = self.paint_func('', )[0, :, :].T                labels[i, 0] = self.blank_label                input_length[i] = self.img_w // self.downsample_factor - 2                label_length[i] = 1                source_str.append('')            else:                if K.image_data_format() == 'channels_first':                    X_data[i, 0, 0:self.img_w, :] = self.paint_func(self.X_text[index + i])[0, :, :].T                else:                    X_data[i, 0:self.img_w, :, 0] = self.paint_func(self.X_text[index + i])[0, :, :].T                labels[i, :] = self.Y_data[index + i]                input_length[i] = self.img_w // self.downsample_factor - 2                label_length[i] = self.Y_len[index + i]                source_str.append(self.X_text[index + i])        inputs = {'the_input': X_data,                  'the_labels': labels,                  'input_length': input_length,                  'label_length': label_length,                  'source_str': source_str  # used for visualization only                  }        outputs = {'ctc': np.zeros([size])}  # dummy data for dummy loss function        return (inputs, outputs)    def next_train(self):        while 1:            ret = self.get_batch(self.cur_train_index, self.minibatch_size, train=True)            self.cur_train_index += self.minibatch_size            if self.cur_train_index >= self.val_split:                self.cur_train_index = self.cur_train_index % 32                (self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists(                    [self.X_text, self.Y_data, self.Y_len], self.val_split)            yield ret    def next_val(self):        while 1:            ret = self.get_batch(self.cur_val_index, self.minibatch_size, train=False)            self.cur_val_index += self.minibatch_size            if self.cur_val_index >= self.num_words:                self.cur_val_index = self.val_split + self.cur_val_index % 32            yield ret    def on_train_begin(self, logs={}):        self.build_word_list(16000, 4, 1)        self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,                                                  rotate=False, ud=False, multi_fonts=False)    def on_epoch_begin(self, epoch, logs={}):        # rebind the paint function to implement curriculum learning        if 3 <= epoch < 6:            self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,                                                      rotate=False, ud=True, multi_fonts=False)        elif 6 <= epoch < 9:            self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,                                                      rotate=False, ud=True, multi_fonts=True)        elif epoch >= 9:            self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h,                                                      rotate=True, ud=True, multi_fonts=True)        if epoch >= 21 and self.max_string_len < 12:            self.build_word_list(32000, 12, 0.5)# the actual loss calc occurs here despite it not being# an internal Keras loss functiondef ctc_lambda_func(args):    y_pred, labels, input_length, label_length = args    # the 2 is critical here since the first couple outputs of the RNN    # tend to be garbage:    y_pred = y_pred[:, 2:, :]    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)# For a real OCR application, this should be beam search with a dictionary# and language model.  For this example, best path is sufficient.def decode_batch(test_func, word_batch):    out = test_func([word_batch])[0]    ret = []    for j in range(out.shape[0]):        out_best = list(np.argmax(out[j, 2:], 1))        out_best = [k for k, g in itertools.groupby(out_best)]        outstr = labels_to_text(out_best)        ret.append(outstr)    return retclass VizCallback(keras.callbacks.Callback):    def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):        self.test_func = test_func        self.output_dir = os.path.join(            OUTPUT_DIR, run_name)        self.text_img_gen = text_img_gen        self.num_display_words = num_display_words        if not os.path.exists(self.output_dir):            os.makedirs(self.output_dir)    def show_edit_distance(self, num):        num_left = num        mean_norm_ed = 0.0        mean_ed = 0.0        while num_left > 0:            word_batch = next(self.text_img_gen)[0]            num_proc = min(word_batch['the_input'].shape[0], num_left)            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])            for j in range(num_proc):                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])                mean_ed += float(edit_dist)                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])            num_left -= num_proc        mean_norm_ed = mean_norm_ed / num        mean_ed = mean_ed / num        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'              % (num, mean_ed, mean_norm_ed))    def on_epoch_end(self, epoch, logs={}):        self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))        self.show_edit_distance(256)        word_batch = next(self.text_img_gen)[0]        res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words])        if word_batch['the_input'][0].shape[0] < 256:            cols = 2        else:            cols = 1        for i in range(self.num_display_words):            pylab.subplot(self.num_display_words // cols, cols, i + 1)            if K.image_data_format() == 'channels_first':                the_input = word_batch['the_input'][i, 0, :, :]            else:                the_input = word_batch['the_input'][i, :, :, 0]            pylab.imshow(the_input.T, cmap='Greys_r')            pylab.xlabel('Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i]))        fig = pylab.gcf()        fig.set_size_inches(10, 13)        pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))        pylab.close()def train(run_name, start_epoch, stop_epoch, img_w):    # Input Parameters    img_h = 64    words_per_epoch = 16000    val_split = 0.2    val_words = int(words_per_epoch * (val_split))    # Network parameters    conv_filters = 16    kernel_size = (3, 3)    pool_size = 2    time_dense_size = 32    rnn_size = 512    minibatch_size = 32    if K.image_data_format() == 'channels_first':        input_shape = (1, img_w, img_h)    else:        input_shape = (img_w, img_h, 1)    fdir = os.path.dirname(get_file('wordlists.tgz',                                    origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True))    img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),                                 bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),                                 minibatch_size=minibatch_size,                                 img_w=img_w,                                 img_h=img_h,                                 downsample_factor=(pool_size ** 2),                                 val_split=words_per_epoch - val_words                                 )    act = 'relu'    input_data = Input(name='the_input', shape=input_shape, dtype='float32')    inner = Conv2D(conv_filters, kernel_size, padding='same',                   activation=act, kernel_initializer='he_normal',                   name='conv1')(input_data)    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)    inner = Conv2D(conv_filters, kernel_size, padding='same',                   activation=act, kernel_initializer='he_normal',                   name='conv2')(inner)    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)    conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)    # cuts down input size going into RNN:    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)    # Two layers of bidirectional GRUs    # GRU seems to work as well, if not better than LSTM:    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(        inner)    gru1_merged = add([gru_1, gru_1b])    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(        gru1_merged)    # transforms RNN output to character activations:    inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',                  name='dense2')(concatenate([gru_2, gru_2b]))    y_pred = Activation('softmax', name='softmax')(inner)    Model(inputs=input_data, outputs=y_pred).summary()    labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')    input_length = Input(name='input_length', shape=[1], dtype='int64')    label_length = Input(name='label_length', shape=[1], dtype='int64')    # Keras doesn't currently support loss funcs with extra parameters    # so CTC loss is implemented in a lambda layer    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])    # clipnorm seems to speeds up convergence    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)    if start_epoch > 0:        weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))        model.load_weights(weight_file)    # captures output of softmax so we can decode the output during visualization    test_func = K.function([input_data], [y_pred])    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())    model.fit_generator(generator=img_gen.next_train(),                        steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,                        epochs=stop_epoch,                        validation_data=img_gen.next_val(),                        validation_steps=val_words // minibatch_size,                        callbacks=[viz_cb, img_gen],                        initial_epoch=start_epoch)if __name__ == '__main__':    run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')    train(run_name, 0, 20, 128)    # increase to wider images and start at epoch 20. The learned weights are reloaded    train(run_name, 20, 25, 512)


参考文献:

[1] Keras Examples:https://github.com/fchollet/keras/tree/master/examples