TensorFlow学习日记26

来源：互联网发布：我知你好歌词编辑：程序博客网时间：2024/06/07 13:15

1. LSTM Text Generation

解析：

'''Example script to generate text from Nietzsche's writings.'''from __future__ import print_functionfrom keras.models import Sequentialfrom keras.layers import Dense, Activationfrom keras.layers import LSTMfrom keras.optimizers import RMSpropfrom keras.utils.data_utils import get_fileimport numpy as npimport randomimport syspath = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')text = open(path).read().lower()print('corpus length:', len(text))chars = sorted(list(set(text)))print('total chars:', len(chars))char_indices = dict((c, i) for i, c in enumerate(chars))indices_char = dict((i, c) for i, c in enumerate(chars))# cut the text in semi-redundant sequences of maxlen charactersmaxlen = 40step = 3sentences = []next_chars = []for i in range(0, len(text) - maxlen, step):    sentences.append(text[i: i + maxlen])    next_chars.append(text[i + maxlen])print('nb sequences:', len(sentences))print('Vectorization...')x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)y = np.zeros((len(sentences), len(chars)), dtype=np.bool)for i, sentence in enumerate(sentences):    for t, char in enumerate(sentence):        x[i, t, char_indices[char]] = 1    y[i, char_indices[next_chars[i]]] = 1# build the model: a single LSTMprint('Build model...')model = Sequential()model.add(LSTM(128, input_shape=(maxlen, len(chars))))model.add(Dense(len(chars)))model.add(Activation('softmax'))optimizer = RMSprop(lr=0.01)model.compile(loss='categorical_crossentropy', optimizer=optimizer)def sample(preds, temperature=1.0):    # helper function to sample an index from a probability array    preds = np.asarray(preds).astype('float64')    preds = np.log(preds) / temperature    exp_preds = np.exp(preds)    preds = exp_preds / np.sum(exp_preds)    probas = np.random.multinomial(1, preds, 1)    return np.argmax(probas)# train the model, output generated text after each iterationfor iteration in range(1, 60):    print()    print('-' * 50)    print('Iteration', iteration)    model.fit(x, y,              batch_size=128,              epochs=1)    start_index = random.randint(0, len(text) - maxlen - 1)    for diversity in [0.2, 0.5, 1.0, 1.2]:        print()        print('----- diversity:', diversity)        generated = ''        sentence = text[start_index: start_index + maxlen]        generated += sentence        print('----- Generating with seed: "' + sentence + '"')        sys.stdout.write(generated)        for i in range(400):            x_pred = np.zeros((1, maxlen, len(chars)))            for t, char in enumerate(sentence):                x_pred[0, t, char_indices[char]] = 1.            preds = model.predict(x_pred, verbose=0)[0]            next_index = sample(preds, diversity)            next_char = indices_char[next_index]            generated += next_char            sentence = sentence[1:] + next_char            sys.stdout.write(next_char)            sys.stdout.flush()        print()

2. Conv Filter Visualization

解析：

'''Visualization of the filters of VGG16, via gradient ascent in input space.'''from __future__ import print_functionfrom scipy.misc import imsaveimport numpy as npimport timefrom keras.applications import vgg16from keras import backend as K# dimensions of the generated pictures for each filter.img_width = 128img_height = 128# the name of the layer we want to visualize# (see model definition at keras/applications/vgg16.py)layer_name = 'block5_conv1'# util function to convert a tensor into a valid imagedef deprocess_image(x):    # normalize tensor: center on 0., ensure std is 0.1    x -= x.mean()    x /= (x.std() + 1e-5)    x *= 0.1    # clip to [0, 1]    x += 0.5    x = np.clip(x, 0, 1)    # convert to RGB array    x *= 255    if K.image_data_format() == 'channels_first':        x = x.transpose((1, 2, 0))    x = np.clip(x, 0, 255).astype('uint8')    return x# build the VGG16 network with ImageNet weightsmodel = vgg16.VGG16(weights='imagenet', include_top=False)print('Model loaded.')model.summary()# this is the placeholder for the input imagesinput_img = model.input# get the symbolic outputs of each "key" layer (we gave them unique names).layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])def normalize(x):    # utility function to normalize a tensor by its L2 norm    return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)kept_filters = []for filter_index in range(200):    # we only scan through the first 200 filters,    # but there are actually 512 of them    print('Processing filter %d' % filter_index)    start_time = time.time()    # we build a loss function that maximizes the activation    # of the nth filter of the layer considered    layer_output = layer_dict[layer_name].output    if K.image_data_format() == 'channels_first':        loss = K.mean(layer_output[:, filter_index, :, :])    else:        loss = K.mean(layer_output[:, :, :, filter_index])    # we compute the gradient of the input picture wrt this loss    grads = K.gradients(loss, input_img)[0]    # normalization trick: we normalize the gradient    grads = normalize(grads)    # this function returns the loss and grads given the input picture    iterate = K.function([input_img], [loss, grads])    # step size for gradient ascent    step = 1.    # we start from a gray image with some random noise    if K.image_data_format() == 'channels_first':        input_img_data = np.random.random((1, 3, img_width, img_height))    else:        input_img_data = np.random.random((1, img_width, img_height, 3))    input_img_data = (input_img_data - 0.5) * 20 + 128    # we run gradient ascent for 20 steps    for i in range(20):        loss_value, grads_value = iterate([input_img_data])        input_img_data += grads_value * step        print('Current loss value:', loss_value)        if loss_value <= 0.:            # some filters get stuck to 0, we can skip them            break    # decode the resulting input image    if loss_value > 0:        img = deprocess_image(input_img_data[0])        kept_filters.append((img, loss_value))    end_time = time.time()    print('Filter %d processed in %ds' % (filter_index, end_time - start_time))# we will stich the best 64 filters on a 8 x 8 grid.n = 8# the filters that have the highest loss are assumed to be better-looking.# we will only keep the top 64 filters.kept_filters.sort(key=lambda x: x[1], reverse=True)kept_filters = kept_filters[:n * n]# build a black picture with enough space for# our 8 x 8 filters of size 128 x 128, with a 5px margin in betweenmargin = 5width = n * img_width + (n - 1) * marginheight = n * img_height + (n - 1) * marginstitched_filters = np.zeros((width, height, 3))# fill the picture with our saved filtersfor i in range(n):    for j in range(n):        img, loss = kept_filters[i * n + j]        stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,        (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img# save the result to diskimsave('stitched_filters_%dx%d.png' % (n, n), stitched_filters)

3. Deep Dream

解析：

'''Deep Dreaming in Keras.Run the script with:```python deep_dream.py path_to_your_base_image.jpg prefix_for_results```e.g.:```python deep_dream.py img/mypic.jpg results/dream```'''from __future__ import print_functionfrom keras.preprocessing.image import load_img, img_to_arrayimport numpy as npimport scipyimport argparsefrom keras.applications import inception_v3from keras import backend as Kparser = argparse.ArgumentParser(description='Deep Dreams with Keras.')parser.add_argument('base_image_path', metavar='base', type=str,                    help='Path to the image to transform.')parser.add_argument('result_prefix', metavar='res_prefix', type=str,                    help='Prefix for the saved results.')args = parser.parse_args()base_image_path = args.base_image_pathresult_prefix = args.result_prefix# These are the names of the layers# for which we try to maximize activation,# as well as their weight in the final loss# we try to maximize.# You can tweak these setting to obtain new visual effects.settings = {    'features': {        'mixed2': 0.2,        'mixed3': 0.5,        'mixed4': 2.,        'mixed5': 1.5,    },}def preprocess_image(image_path):    # Util function to open, resize and format pictures    # into appropriate tensors.    img = load_img(image_path)    img = img_to_array(img)    img = np.expand_dims(img, axis=0)    img = inception_v3.preprocess_input(img)    return imgdef deprocess_image(x):    # Util function to convert a tensor into a valid image.    if K.image_data_format() == 'channels_first':        x = x.reshape((3, x.shape[2], x.shape[3]))        x = x.transpose((1, 2, 0))    else:        x = x.reshape((x.shape[1], x.shape[2], 3))    x /= 2.    x += 0.5    x *= 255.    x = np.clip(x, 0, 255).astype('uint8')    return xK.set_learning_phase(0)# Build the InceptionV3 network with our placeholder.# The model will be loaded with pre-trained ImageNet weights.model = inception_v3.InceptionV3(weights='imagenet',                                 include_top=False)dream = model.inputprint('Model loaded.')# Get the symbolic outputs of each "key" layer (we gave them unique names).layer_dict = dict([(layer.name, layer) for layer in model.layers])# Define the loss.loss = K.variable(0.)for layer_name in settings['features']:    # Add the L2 norm of the features of a layer to the loss.    assert layer_name in layer_dict.keys(), 'Layer ' + layer_name + ' not found in model.'    coeff = settings['features'][layer_name]    x = layer_dict[layer_name].output    # We avoid border artifacts by only involving non-border pixels in the loss.    scaling = K.prod(K.cast(K.shape(x), 'float32'))    if K.image_data_format() == 'channels_first':        loss += coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling    else:        loss += coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling# Compute the gradients of the dream wrt the loss.grads = K.gradients(loss, dream)[0]# Normalize gradients.grads /= K.maximum(K.mean(K.abs(grads)), 1e-7)# Set up function to retrieve the value# of the loss and gradients given an input image.outputs = [loss, grads]fetch_loss_and_grads = K.function([dream], outputs)def eval_loss_and_grads(x):    outs = fetch_loss_and_grads([x])    loss_value = outs[0]    grad_values = outs[1]    return loss_value, grad_valuesdef resize_img(img, size):    img = np.copy(img)    if K.image_data_format() == 'channels_first':        factors = (1, 1,                   float(size[0]) / img.shape[2],                   float(size[1]) / img.shape[3])    else:        factors = (1,                   float(size[0]) / img.shape[1],                   float(size[1]) / img.shape[2],                   1)    return scipy.ndimage.zoom(img, factors, order=1)def gradient_ascent(x, iterations, step, max_loss=None):    for i in range(iterations):        loss_value, grad_values = eval_loss_and_grads(x)        if max_loss is not None and loss_value > max_loss:            break        print('..Loss value at', i, ':', loss_value)        x += step * grad_values    return xdef save_img(img, fname):    pil_img = deprocess_image(np.copy(img))    scipy.misc.imsave(fname, pil_img)"""Process:- Load the original image.- Define a number of processing scales (i.e. image shapes),    from smallest to largest.- Resize the original image to the smallest scale.- For every scale, starting with the smallest (i.e. current one):    - Run gradient ascent    - Upscale image to the next scale    - Reinject the detail that was lost at upscaling time- Stop when we are back to the original size.To obtain the detail lost during upscaling, we simplytake the original image, shrink it down, upscale it,and compare the result to the (resized) original image."""# Playing with these hyperparameters will also allow you to achieve new effectsstep = 0.01  # Gradient ascent step sizenum_octave = 3  # Number of scales at which to run gradient ascentoctave_scale = 1.4  # Size ratio between scalesiterations = 20  # Number of ascent steps per scalemax_loss = 10.img = preprocess_image(base_image_path)if K.image_data_format() == 'channels_first':    original_shape = img.shape[2:]else:    original_shape = img.shape[1:3]successive_shapes = [original_shape]for i in range(1, num_octave):    shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape])    successive_shapes.append(shape)successive_shapes = successive_shapes[::-1]original_img = np.copy(img)shrunk_original_img = resize_img(img, successive_shapes[0])for shape in successive_shapes:    print('Processing image shape', shape)    img = resize_img(img, shape)    img = gradient_ascent(img,                          iterations=iterations,                          step=step,                          max_loss=max_loss)    upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape)    same_size_original = resize_img(original_img, shape)    lost_detail = same_size_original - upscaled_shrunk_original_img    img += lost_detail    shrunk_original_img = resize_img(original_img, shape)save_img(img, fname=result_prefix + '.png')

4. Neural Doodle

解析：

'''Neural doodle with Keras'''from __future__ import print_functionimport timeimport argparseimport numpy as npfrom scipy.optimize import fmin_l_bfgs_bfrom scipy.misc import imread, imsavefrom keras import backend as Kfrom keras.layers import Input, AveragePooling2Dfrom keras.models import Modelfrom keras.preprocessing.image import load_img, img_to_arrayfrom keras.applications import vgg19# Command line argumentsparser = argparse.ArgumentParser(description='Keras neural doodle example')parser.add_argument('--nlabels', type=int,                    help='number of semantic labels'                         ' (regions in differnet colors)'                         ' in style_mask/target_mask')parser.add_argument('--style-image', type=str,                    help='path to image to learn style from')parser.add_argument('--style-mask', type=str,                    help='path to semantic mask of style image')parser.add_argument('--target-mask', type=str,                    help='path to semantic mask of target image')parser.add_argument('--content-image', type=str, default=None,                    help='path to optional content image')parser.add_argument('--target-image-prefix', type=str,                    help='path prefix for generated results')args = parser.parse_args()style_img_path = args.style_imagestyle_mask_path = args.style_masktarget_mask_path = args.target_maskcontent_img_path = args.content_imagetarget_img_prefix = args.target_image_prefixuse_content_img = content_img_path is not Nonenum_labels = args.nlabelsnum_colors = 3  # RGB# determine image sizes based on target_maskref_img = imread(target_mask_path)img_nrows, img_ncols = ref_img.shape[:2]total_variation_weight = 50.style_weight = 1.content_weight = 0.1 if use_content_img else 0content_feature_layers = ['block5_conv2']# To get better generation qualities, use more conv layers for style featuresstyle_feature_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1',                        'block4_conv1', 'block5_conv1']# helper functions for reading/processing imagesdef preprocess_image(image_path):    img = load_img(image_path, target_size=(img_nrows, img_ncols))    img = img_to_array(img)    img = np.expand_dims(img, axis=0)    img = vgg19.preprocess_input(img)    return imgdef deprocess_image(x):    if K.image_data_format() == 'channels_first':        x = x.reshape((3, img_nrows, img_ncols))        x = x.transpose((1, 2, 0))    else:        x = x.reshape((img_nrows, img_ncols, 3))    # Remove zero-center by mean pixel    x[:, :, 0] += 103.939    x[:, :, 1] += 116.779    x[:, :, 2] += 123.68    # 'BGR'->'RGB'    x = x[:, :, ::-1]    x = np.clip(x, 0, 255).astype('uint8')    return xdef kmeans(xs, k):    assert xs.ndim == 2    try:        from sklearn.cluster import k_means        _, labels, _ = k_means(xs.astype('float64'), k)    except ImportError:        from scipy.cluster.vq import kmeans2        _, labels = kmeans2(xs, k, missing='raise')    return labelsdef load_mask_labels():    '''Load both target and style masks.    A mask image (nr x nc) with m labels/colors will be loaded    as a 4D boolean tensor: (1, m, nr, nc) for 'channels_first' or (1, nr, nc, m) for 'channels_last'    '''    target_mask_img = load_img(target_mask_path,                               target_size=(img_nrows, img_ncols))    target_mask_img = img_to_array(target_mask_img)    style_mask_img = load_img(style_mask_path,                              target_size=(img_nrows, img_ncols))    style_mask_img = img_to_array(style_mask_img)    if K.image_data_format() == 'channels_first':        mask_vecs = np.vstack([style_mask_img.reshape((3, -1)).T,                               target_mask_img.reshape((3, -1)).T])    else:        mask_vecs = np.vstack([style_mask_img.reshape((-1, 3)),                               target_mask_img.reshape((-1, 3))])    labels = kmeans(mask_vecs, num_labels)    style_mask_label = labels[:img_nrows *                               img_ncols].reshape((img_nrows, img_ncols))    target_mask_label = labels[img_nrows *                               img_ncols:].reshape((img_nrows, img_ncols))    stack_axis = 0 if K.image_data_format() == 'channels_first' else -1    style_mask = np.stack([style_mask_label == r for r in range(num_labels)],                          axis=stack_axis)    target_mask = np.stack([target_mask_label == r for r in range(num_labels)],                           axis=stack_axis)    return (np.expand_dims(style_mask, axis=0),            np.expand_dims(target_mask, axis=0))# Create tensor variables for imagesif K.image_data_format() == 'channels_first':    shape = (1, num_colors, img_nrows, img_ncols)else:    shape = (1, img_nrows, img_ncols, num_colors)style_image = K.variable(preprocess_image(style_img_path))target_image = K.placeholder(shape=shape)if use_content_img:    content_image = K.variable(preprocess_image(content_img_path))else:    content_image = K.zeros(shape=shape)images = K.concatenate([style_image, target_image, content_image], axis=0)# Create tensor variables for masksraw_style_mask, raw_target_mask = load_mask_labels()style_mask = K.variable(raw_style_mask.astype('float32'))target_mask = K.variable(raw_target_mask.astype('float32'))masks = K.concatenate([style_mask, target_mask], axis=0)# index constants for images and tasks variablesSTYLE, TARGET, CONTENT = 0, 1, 2# Build image model, mask model and use layer outputs as features# image model as VGG19image_model = vgg19.VGG19(include_top=False, input_tensor=images)# mask model as a series of poolingmask_input = Input(tensor=masks, shape=(None, None, None), name='mask_input')x = mask_inputfor layer in image_model.layers[1:]:    name = 'mask_%s' % layer.name    if 'conv' in layer.name:        x = AveragePooling2D((3, 3), padding='same', strides=(            1, 1), name=name)(x)    elif 'pool' in layer.name:        x = AveragePooling2D((2, 2), name=name)(x)mask_model = Model(mask_input, x)# Collect features from image_model and task_modelimage_features = {}mask_features = {}for img_layer, mask_layer in zip(image_model.layers, mask_model.layers):    if 'conv' in img_layer.name:        assert 'mask_' + img_layer.name == mask_layer.name        layer_name = img_layer.name        img_feat, mask_feat = img_layer.output, mask_layer.output        image_features[layer_name] = img_feat        mask_features[layer_name] = mask_feat# Define loss functionsdef gram_matrix(x):    assert K.ndim(x) == 3    features = K.batch_flatten(x)    gram = K.dot(features, K.transpose(features))    return gramdef region_style_loss(style_image, target_image, style_mask, target_mask):    '''Calculate style loss between style_image and target_image,    for one common region specified by their (boolean) masks    '''    assert 3 == K.ndim(style_image) == K.ndim(target_image)    assert 2 == K.ndim(style_mask) == K.ndim(target_mask)    if K.image_data_format() == 'channels_first':        masked_style = style_image * style_mask        masked_target = target_image * target_mask        num_channels = K.shape(style_image)[0]    else:        masked_style = K.permute_dimensions(            style_image, (2, 0, 1)) * style_mask        masked_target = K.permute_dimensions(            target_image, (2, 0, 1)) * target_mask        num_channels = K.shape(style_image)[-1]    num_channels = K.cast(num_channels, dtype='float32')    s = gram_matrix(masked_style) / K.mean(style_mask) / num_channels    c = gram_matrix(masked_target) / K.mean(target_mask) / num_channels    return K.mean(K.square(s - c))def style_loss(style_image, target_image, style_masks, target_masks):    '''Calculate style loss between style_image and target_image,    in all regions.    '''    assert 3 == K.ndim(style_image) == K.ndim(target_image)    assert 3 == K.ndim(style_masks) == K.ndim(target_masks)    loss = K.variable(0)    for i in range(num_labels):        if K.image_data_format() == 'channels_first':            style_mask = style_masks[i, :, :]            target_mask = target_masks[i, :, :]        else:            style_mask = style_masks[:, :, i]            target_mask = target_masks[:, :, i]        loss += region_style_loss(style_image,                                  target_image, style_mask, target_mask)    return lossdef content_loss(content_image, target_image):    return K.sum(K.square(target_image - content_image))def total_variation_loss(x):    assert 4 == K.ndim(x)    if K.image_data_format() == 'channels_first':        a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] -                     x[:, :, 1:, :img_ncols - 1])        b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] -                     x[:, :, :img_nrows - 1, 1:])    else:        a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] -                     x[:, 1:, :img_ncols - 1, :])        b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] -                     x[:, :img_nrows - 1, 1:, :])    return K.sum(K.pow(a + b, 1.25))# Overall loss is the weighted sum of content_loss, style_loss and tv_loss# Each individual loss uses features from image/mask models.loss = K.variable(0)for layer in content_feature_layers:    content_feat = image_features[layer][CONTENT, :, :, :]    target_feat = image_features[layer][TARGET, :, :, :]    loss += content_weight * content_loss(content_feat, target_feat)for layer in style_feature_layers:    style_feat = image_features[layer][STYLE, :, :, :]    target_feat = image_features[layer][TARGET, :, :, :]    style_masks = mask_features[layer][STYLE, :, :, :]    target_masks = mask_features[layer][TARGET, :, :, :]    sl = style_loss(style_feat, target_feat, style_masks, target_masks)    loss += (style_weight / len(style_feature_layers)) * slloss += total_variation_weight * total_variation_loss(target_image)loss_grads = K.gradients(loss, target_image)# Evaluator class for computing efficiencyoutputs = [loss]if isinstance(loss_grads, (list, tuple)):    outputs += loss_gradselse:    outputs.append(loss_grads)f_outputs = K.function([target_image], outputs)def eval_loss_and_grads(x):    if K.image_data_format() == 'channels_first':        x = x.reshape((1, 3, img_nrows, img_ncols))    else:        x = x.reshape((1, img_nrows, img_ncols, 3))    outs = f_outputs([x])    loss_value = outs[0]    if len(outs[1:]) == 1:        grad_values = outs[1].flatten().astype('float64')    else:        grad_values = np.array(outs[1:]).flatten().astype('float64')    return loss_value, grad_valuesclass Evaluator(object):    def __init__(self):        self.loss_value = None        self.grads_values = None    def loss(self, x):        assert self.loss_value is None        loss_value, grad_values = eval_loss_and_grads(x)        self.loss_value = loss_value        self.grad_values = grad_values        return self.loss_value    def grads(self, x):        assert self.loss_value is not None        grad_values = np.copy(self.grad_values)        self.loss_value = None        self.grad_values = None        return grad_valuesevaluator = Evaluator()# Generate images by iterative optimizationif K.image_data_format() == 'channels_first':    x = np.random.uniform(0, 255, (1, 3, img_nrows, img_ncols)) - 128.else:    x = np.random.uniform(0, 255, (1, img_nrows, img_ncols, 3)) - 128.for i in range(50):    print('Start of iteration', i)    start_time = time.time()    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),                                     fprime=evaluator.grads, maxfun=20)    print('Current loss value:', min_val)    # save current generated image    img = deprocess_image(x.copy())    fname = target_img_prefix + '_at_iteration_%d.png' % i    imsave(fname, img)    end_time = time.time()    print('Image saved as', fname)    print('Iteration %d completed in %ds' % (i, end_time - start_time))

5. Neural Style Transfer

解析：

'''Neural style transfer with Keras.Run the script with:```python neural_style_transfer.py path_to_your_base_image.jpg path_to_your_reference.jpg prefix_for_results```e.g.:```python neural_style_transfer.py img/tuebingen.jpg img/starry_night.jpg results/my_result```'''from __future__ import print_functionfrom keras.preprocessing.image import load_img, img_to_arrayfrom scipy.misc import imsaveimport numpy as npfrom scipy.optimize import fmin_l_bfgs_bimport timeimport argparsefrom keras.applications import vgg19from keras import backend as Kparser = argparse.ArgumentParser(description='Neural style transfer with Keras.')parser.add_argument('base_image_path', metavar='base', type=str,                    help='Path to the image to transform.')parser.add_argument('style_reference_image_path', metavar='ref', type=str,                    help='Path to the style reference image.')parser.add_argument('result_prefix', metavar='res_prefix', type=str,                    help='Prefix for the saved results.')parser.add_argument('--iter', type=int, default=10, required=False,                    help='Number of iterations to run.')parser.add_argument('--content_weight', type=float, default=0.025, required=False,                    help='Content weight.')parser.add_argument('--style_weight', type=float, default=1.0, required=False,                    help='Style weight.')parser.add_argument('--tv_weight', type=float, default=1.0, required=False,                    help='Total Variation weight.')args = parser.parse_args()base_image_path = args.base_image_pathstyle_reference_image_path = args.style_reference_image_pathresult_prefix = args.result_prefixiterations = args.iter# these are the weights of the different loss componentstotal_variation_weight = args.tv_weightstyle_weight = args.style_weightcontent_weight = args.content_weight# dimensions of the generated picture.width, height = load_img(base_image_path).sizeimg_nrows = 400img_ncols = int(width * img_nrows / height)# util function to open, resize and format pictures into appropriate tensorsdef preprocess_image(image_path):    img = load_img(image_path, target_size=(img_nrows, img_ncols))    img = img_to_array(img)    img = np.expand_dims(img, axis=0)    img = vgg19.preprocess_input(img)    return img# util function to convert a tensor into a valid imagedef deprocess_image(x):    if K.image_data_format() == 'channels_first':        x = x.reshape((3, img_nrows, img_ncols))        x = x.transpose((1, 2, 0))    else:        x = x.reshape((img_nrows, img_ncols, 3))    # Remove zero-center by mean pixel    x[:, :, 0] += 103.939    x[:, :, 1] += 116.779    x[:, :, 2] += 123.68    # 'BGR'->'RGB'    x = x[:, :, ::-1]    x = np.clip(x, 0, 255).astype('uint8')    return x# get tensor representations of our imagesbase_image = K.variable(preprocess_image(base_image_path))style_reference_image = K.variable(preprocess_image(style_reference_image_path))# this will contain our generated imageif K.image_data_format() == 'channels_first':    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))else:    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))# combine the 3 images into a single Keras tensorinput_tensor = K.concatenate([base_image,                              style_reference_image,                              combination_image], axis=0)# build the VGG16 network with our 3 images as input# the model will be loaded with pre-trained ImageNet weightsmodel = vgg19.VGG19(input_tensor=input_tensor,                    weights='imagenet', include_top=False)print('Model loaded.')# get the symbolic outputs of each "key" layer (we gave them unique names).outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])# compute the neural style loss# first we need to define 4 util functions# the gram matrix of an image tensor (feature-wise outer product)def gram_matrix(x):    assert K.ndim(x) == 3    if K.image_data_format() == 'channels_first':        features = K.batch_flatten(x)    else:        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))    gram = K.dot(features, K.transpose(features))    return gram# the "style loss" is designed to maintain# the style of the reference image in the generated image.# It is based on the gram matrices (which capture style) of# feature maps from the style reference image# and from the generated imagedef style_loss(style, combination):    assert K.ndim(style) == 3    assert K.ndim(combination) == 3    S = gram_matrix(style)    C = gram_matrix(combination)    channels = 3    size = img_nrows * img_ncols    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))# an auxiliary loss function# designed to maintain the "content" of the# base image in the generated imagedef content_loss(base, combination):    return K.sum(K.square(combination - base))# the 3rd loss function, total variation loss,# designed to keep the generated image locally coherentdef total_variation_loss(x):    assert K.ndim(x) == 4    if K.image_data_format() == 'channels_first':        a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])        b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])    else:        a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])        b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])    return K.sum(K.pow(a + b, 1.25))# combine these loss functions into a single scalarloss = K.variable(0.)layer_features = outputs_dict['block5_conv2']base_image_features = layer_features[0, :, :, :]combination_features = layer_features[2, :, :, :]loss += content_weight * content_loss(base_image_features,                                      combination_features)feature_layers = ['block1_conv1', 'block2_conv1',                  'block3_conv1', 'block4_conv1',                  'block5_conv1']for layer_name in feature_layers:    layer_features = outputs_dict[layer_name]    style_reference_features = layer_features[1, :, :, :]    combination_features = layer_features[2, :, :, :]    sl = style_loss(style_reference_features, combination_features)    loss += (style_weight / len(feature_layers)) * slloss += total_variation_weight * total_variation_loss(combination_image)# get the gradients of the generated image wrt the lossgrads = K.gradients(loss, combination_image)outputs = [loss]if isinstance(grads, (list, tuple)):    outputs += gradselse:    outputs.append(grads)f_outputs = K.function([combination_image], outputs)def eval_loss_and_grads(x):    if K.image_data_format() == 'channels_first':        x = x.reshape((1, 3, img_nrows, img_ncols))    else:        x = x.reshape((1, img_nrows, img_ncols, 3))    outs = f_outputs([x])    loss_value = outs[0]    if len(outs[1:]) == 1:        grad_values = outs[1].flatten().astype('float64')    else:        grad_values = np.array(outs[1:]).flatten().astype('float64')    return loss_value, grad_values# this Evaluator class makes it possible# to compute loss and gradients in one pass# while retrieving them via two separate functions,# "loss" and "grads". This is done because scipy.optimize# requires separate functions for loss and gradients,# but computing them separately would be inefficient.class Evaluator(object):    def __init__(self):        self.loss_value = None        self.grads_values = None    def loss(self, x):        assert self.loss_value is None        loss_value, grad_values = eval_loss_and_grads(x)        self.loss_value = loss_value        self.grad_values = grad_values        return self.loss_value    def grads(self, x):        assert self.loss_value is not None        grad_values = np.copy(self.grad_values)        self.loss_value = None        self.grad_values = None        return grad_valuesevaluator = Evaluator()# run scipy-based optimization (L-BFGS) over the pixels of the generated image# so as to minimize the neural style lossx = preprocess_image(base_image_path)for i in range(iterations):    print('Start of iteration', i)    start_time = time.time()    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),                                     fprime=evaluator.grads, maxfun=20)    print('Current loss value:', min_val)    # save current generated image    img = deprocess_image(x.copy())    fname = result_prefix + '_at_iteration_%d.png' % i    imsave(fname, img)    end_time = time.time()    print('Image saved as', fname)    print('Iteration %d completed in %ds' % (i, end_time - start_time))

6. Variational AutoEncoder

解析：

'''This script demonstrates how to build a variational autoencoder with Keras.'''import numpy as npimport matplotlib.pyplot as pltfrom scipy.stats import normfrom keras.layers import Input, Dense, Lambda, Layerfrom keras.models import Modelfrom keras import backend as Kfrom keras import metricsfrom keras.datasets import mnistbatch_size = 100original_dim = 784latent_dim = 2intermediate_dim = 256epochs = 50epsilon_std = 1.0x = Input(shape=(original_dim,))h = Dense(intermediate_dim, activation='relu')(x)z_mean = Dense(latent_dim)(h)z_log_var = Dense(latent_dim)(h)def sampling(args):    z_mean, z_log_var = args    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,                              stddev=epsilon_std)    return z_mean + K.exp(z_log_var / 2) * epsilon# note that "output_shape" isn't necessary with the TensorFlow backendz = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])# we instantiate these layers separately so as to reuse them laterdecoder_h = Dense(intermediate_dim, activation='relu')decoder_mean = Dense(original_dim, activation='sigmoid')h_decoded = decoder_h(z)x_decoded_mean = decoder_mean(h_decoded)# Custom loss layerclass CustomVariationalLayer(Layer):    def __init__(self, **kwargs):        self.is_placeholder = True        super(CustomVariationalLayer, self).__init__(**kwargs)    def vae_loss(self, x, x_decoded_mean):        xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)        kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)        return K.mean(xent_loss + kl_loss)    def call(self, inputs):        x = inputs[0]        x_decoded_mean = inputs[1]        loss = self.vae_loss(x, x_decoded_mean)        self.add_loss(loss, inputs=inputs)        # We won't actually use the output.        return xy = CustomVariationalLayer()([x, x_decoded_mean])vae = Model(x, y)vae.compile(optimizer='rmsprop', loss=None)# train the VAE on MNIST digits(x_train, y_train), (x_test, y_test) = mnist.load_data()x_train = x_train.astype('float32') / 255.x_test = x_test.astype('float32') / 255.x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))vae.fit(x_train,        shuffle=True,        epochs=epochs,        batch_size=batch_size,        validation_data=(x_test, None))# build a model to project inputs on the latent spaceencoder = Model(x, z_mean)# display a 2D plot of the digit classes in the latent spacex_test_encoded = encoder.predict(x_test, batch_size=batch_size)plt.figure(figsize=(6, 6))plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)plt.colorbar()plt.show()# build a digit generator that can sample from the learned distributiondecoder_input = Input(shape=(latent_dim,))_h_decoded = decoder_h(decoder_input)_x_decoded_mean = decoder_mean(_h_decoded)generator = Model(decoder_input, _x_decoded_mean)# display a 2D manifold of the digitsn = 15  # figure with 15x15 digitsdigit_size = 28figure = np.zeros((digit_size * n, digit_size * n))# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian# to produce values of the latent variables z, since the prior of the latent space is Gaussiangrid_x = norm.ppf(np.linspace(0.05, 0.95, n))grid_y = norm.ppf(np.linspace(0.05, 0.95, n))for i, yi in enumerate(grid_x):    for j, xi in enumerate(grid_y):        z_sample = np.array([[xi, yi]])        x_decoded = generator.predict(z_sample)        digit = x_decoded[0].reshape(digit_size, digit_size)        figure[i * digit_size: (i + 1) * digit_size,        j * digit_size: (j + 1) * digit_size] = digitplt.figure(figsize=(10, 10))plt.imshow(figure, cmap='Greys_r')plt.show()

7. Variational Autoencoder Deconv

解析：

'''This script demonstrates how to build a variational autoencoderwith Keras and deconvolution layers.'''import numpy as npimport matplotlib.pyplot as pltfrom scipy.stats import normfrom keras.layers import Input, Dense, Lambda, Flatten, Reshape, Layerfrom keras.layers import Conv2D, Conv2DTransposefrom keras.models import Modelfrom keras import backend as Kfrom keras import metricsfrom keras.datasets import mnist# input image dimensionsimg_rows, img_cols, img_chns = 28, 28, 1# number of convolutional filters to usefilters = 64# convolution kernel sizenum_conv = 3batch_size = 100if K.image_data_format() == 'channels_first':    original_img_size = (img_chns, img_rows, img_cols)else:    original_img_size = (img_rows, img_cols, img_chns)latent_dim = 2intermediate_dim = 128epsilon_std = 1.0epochs = 5x = Input(shape=original_img_size)conv_1 = Conv2D(img_chns,                kernel_size=(2, 2),                padding='same', activation='relu')(x)conv_2 = Conv2D(filters,                kernel_size=(2, 2),                padding='same', activation='relu',                strides=(2, 2))(conv_1)conv_3 = Conv2D(filters,                kernel_size=num_conv,                padding='same', activation='relu',                strides=1)(conv_2)conv_4 = Conv2D(filters,                kernel_size=num_conv,                padding='same', activation='relu',                strides=1)(conv_3)flat = Flatten()(conv_4)hidden = Dense(intermediate_dim, activation='relu')(flat)z_mean = Dense(latent_dim)(hidden)z_log_var = Dense(latent_dim)(hidden)def sampling(args):    z_mean, z_log_var = args    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),                              mean=0., stddev=epsilon_std)    return z_mean + K.exp(z_log_var) * epsilon# note that "output_shape" isn't necessary with the TensorFlow backend# so you could write `Lambda(sampling)([z_mean, z_log_var])`z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])# we instantiate these layers separately so as to reuse them laterdecoder_hid = Dense(intermediate_dim, activation='relu')decoder_upsample = Dense(filters * 14 * 14, activation='relu')if K.image_data_format() == 'channels_first':    output_shape = (batch_size, filters, 14, 14)else:    output_shape = (batch_size, 14, 14, filters)decoder_reshape = Reshape(output_shape[1:])decoder_deconv_1 = Conv2DTranspose(filters,                                   kernel_size=num_conv,                                   padding='same',                                   strides=1,                                   activation='relu')decoder_deconv_2 = Conv2DTranspose(filters,                                   kernel_size=num_conv,                                   padding='same',                                   strides=1,                                   activation='relu')if K.image_data_format() == 'channels_first':    output_shape = (batch_size, filters, 29, 29)else:    output_shape = (batch_size, 29, 29, filters)decoder_deconv_3_upsamp = Conv2DTranspose(filters,                                          kernel_size=(3, 3),                                          strides=(2, 2),                                          padding='valid',                                          activation='relu')decoder_mean_squash = Conv2D(img_chns,                             kernel_size=2,                             padding='valid',                             activation='sigmoid')hid_decoded = decoder_hid(z)up_decoded = decoder_upsample(hid_decoded)reshape_decoded = decoder_reshape(up_decoded)deconv_1_decoded = decoder_deconv_1(reshape_decoded)deconv_2_decoded = decoder_deconv_2(deconv_1_decoded)x_decoded_relu = decoder_deconv_3_upsamp(deconv_2_decoded)x_decoded_mean_squash = decoder_mean_squash(x_decoded_relu)# Custom loss layerclass CustomVariationalLayer(Layer):    def __init__(self, **kwargs):        self.is_placeholder = True        super(CustomVariationalLayer, self).__init__(**kwargs)    def vae_loss(self, x, x_decoded_mean_squash):        x = K.flatten(x)        x_decoded_mean_squash = K.flatten(x_decoded_mean_squash)        xent_loss = img_rows * img_cols * metrics.binary_crossentropy(x, x_decoded_mean_squash)        kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)        return K.mean(xent_loss + kl_loss)    def call(self, inputs):        x = inputs[0]        x_decoded_mean_squash = inputs[1]        loss = self.vae_loss(x, x_decoded_mean_squash)        self.add_loss(loss, inputs=inputs)        # We don't use this output.        return xy = CustomVariationalLayer()([x, x_decoded_mean_squash])vae = Model(x, y)vae.compile(optimizer='rmsprop', loss=None)vae.summary()# train the VAE on MNIST digits(x_train, _), (x_test, y_test) = mnist.load_data()x_train = x_train.astype('float32') / 255.x_train = x_train.reshape((x_train.shape[0],) + original_img_size)x_test = x_test.astype('float32') / 255.x_test = x_test.reshape((x_test.shape[0],) + original_img_size)print('x_train.shape:', x_train.shape)vae.fit(x_train,        shuffle=True,        epochs=epochs,        batch_size=batch_size,        validation_data=(x_test, None))# build a model to project inputs on the latent spaceencoder = Model(x, z_mean)# display a 2D plot of the digit classes in the latent spacex_test_encoded = encoder.predict(x_test, batch_size=batch_size)plt.figure(figsize=(6, 6))plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)plt.colorbar()plt.show()# build a digit generator that can sample from the learned distributiondecoder_input = Input(shape=(latent_dim,))_hid_decoded = decoder_hid(decoder_input)_up_decoded = decoder_upsample(_hid_decoded)_reshape_decoded = decoder_reshape(_up_decoded)_deconv_1_decoded = decoder_deconv_1(_reshape_decoded)_deconv_2_decoded = decoder_deconv_2(_deconv_1_decoded)_x_decoded_relu = decoder_deconv_3_upsamp(_deconv_2_decoded)_x_decoded_mean_squash = decoder_mean_squash(_x_decoded_relu)generator = Model(decoder_input, _x_decoded_mean_squash)# display a 2D manifold of the digitsn = 15  # figure with 15x15 digitsdigit_size = 28figure = np.zeros((digit_size * n, digit_size * n))# linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian# to produce values of the latent variables z, since the prior of the latent space is Gaussiangrid_x = norm.ppf(np.linspace(0.05, 0.95, n))grid_y = norm.ppf(np.linspace(0.05, 0.95, n))for i, yi in enumerate(grid_x):    for j, xi in enumerate(grid_y):        z_sample = np.array([[xi, yi]])        z_sample = np.tile(z_sample, batch_size).reshape(batch_size, 2)        x_decoded = generator.predict(z_sample, batch_size=batch_size)        digit = x_decoded[0].reshape(digit_size, digit_size)        figure[i * digit_size: (i + 1) * digit_size,        j * digit_size: (j + 1) * digit_size] = digitplt.figure(figsize=(10, 10))plt.imshow(figure, cmap='Greys_r')plt.show()

参考文献：

[1] Keras Examples：https://github.com/fchollet/keras/tree/master/examples

阅读全文

1 0