TensorFlow学习日记11

来源：互联网发布：实名淘宝小号单个购买编辑：程序博客网时间：2024/06/06 23:18

1. word2vec（skip-gram）[1][2][3]

解析：

import collectionsimport numpy as npimport tensorflow as tfimport matplotlibmatplotlib.use('Agg')import matplotlib.pyplot as plt# Configurationbatch_size = 20# Dimension of the embedding vector. Two too small to get# any meaningful embeddings, but let's make it 2 for simple visualizationembedding_size = 2num_sampled = 15  # Number of negative examples to sample.# Sample sentencessentences = ["the quick brown fox jumped over the lazy dog",             "I love cats and dogs",             "we all love cats and dogs",             "cats and dogs are great",             "sung likes cats",             "she loves dogs",             "cats can be very independent",             "cats are great companions when they want to be",             "cats are playful",             "cats are natural hunters",             "It's raining cats and dogs",             "dogs and cats love sung"]# sentences to words and countwords = " ".join(sentences).split()count = collections.Counter(words).most_common()print ("Word count", count[:5])# Build dictionariesrdic = [i[0] for i in count]  # reverse dic, idx -> worddic = {w: i for i, w in enumerate(rdic)}  # dic, word -> idvoc_size = len(dic)# Make indexed word datadata = [dic[word] for word in words]print('Sample data', data[:10], [rdic[t] for t in data[:10]])# Let's make a training data for window size 1 for simplicity# ([the, brown], quick), ([quick, fox], brown), ([brown, jumped], fox), ...cbow_pairs = []for i in range(1, len(data) - 1):    cbow_pairs.append([[data[i - 1], data[i + 1]], data[i]]);print('Context pairs', cbow_pairs[:10])# Let's make skip-gram pairs# (quick, the), (quick, brown), (brown, quick), (brown, fox), ...skip_gram_pairs = []for c in cbow_pairs:    skip_gram_pairs.append([c[1], c[0][0]])    skip_gram_pairs.append([c[1], c[0][1]])print('skip-gram pairs', skip_gram_pairs[:5])def generate_batch(size):    assert size < len(skip_gram_pairs)    x_data = []    y_data = []    r = np.random.choice(range(len(skip_gram_pairs)), size, replace=False)    for i in r:        x_data.append(skip_gram_pairs[i][0])  # n dim        y_data.append([skip_gram_pairs[i][1]])  # n, 1 dim    return x_data, y_data# generate_batch testprint ('Batches (x, y)', generate_batch(3))# Input datatrain_inputs = tf.placeholder(tf.int32, shape=[batch_size])# need to shape [batch_size, 1] for nn.nce_losstrain_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])# Ops and variables pinned to the CPU because of missing GPU implementationwith tf.device('/cpu:0'):    # Look up embeddings for inputs.    embeddings = tf.Variable(        tf.random_uniform([voc_size, embedding_size], -1.0, 1.0))    embed = tf.nn.embedding_lookup(embeddings, train_inputs)  # lookup table# Construct the variables for the NCE lossnce_weights = tf.Variable(    tf.random_uniform([voc_size, embedding_size], -1.0, 1.0))nce_biases = tf.Variable(tf.zeros([voc_size]))# Compute the average NCE loss for the batch.# This does the magic:#   tf.nn.nce_loss(weights, biases, inputs, labels, num_sampled, num_classes ...)# It automatically draws negative samples when we evaluate the loss.loss = tf.reduce_mean(tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed, num_sampled, voc_size))# Use the adam optimizertrain_op = tf.train.AdamOptimizer(1e-1).minimize(loss)# Launch the graph in a sessionwith tf.Session() as sess:    # Initializing all variables    tf.global_variables_initializer().run()    for step in range(100):        batch_inputs, batch_labels = generate_batch(batch_size)        _, loss_val = sess.run([train_op, loss],                               feed_dict={train_inputs: batch_inputs, train_labels: batch_labels})        if step % 10 == 0:            print("Loss at ", step, loss_val)  # Report the loss    # Final embeddings are ready for you to use. Need to normalize for practical use    trained_embeddings = embeddings.eval()# Show word2vec if dim is 2if trained_embeddings.shape[1] == 2:    labels = rdic[:10]  # Show top 10 words    for i, label in enumerate(labels):        x, y = trained_embeddings[i, :]        plt.scatter(x, y)        plt.annotate(label, xy=(x, y), xytext=(5, 2),                     textcoords='offset points', ha='right', va='bottom')    plt.savefig("word2vec.png")

（1）embedding_lookup(params, ids, partition_strategy="mod", name=None, validate_indices=True,

max_norm=None)：根据ids，寻找params中对应的元素。

（2）tf.nn.nce_loss(nce_weights, nce_biases, train_labels, embed, num_sampled, voc_size)。

2. tf.nn.nce_loss

解析：nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, num_true=1, sampled_values=None,

remove_accidental_hits=False, partition_strategy="mod", name="nce_loss")：Computes and returns the noise-

contrastive estimation training loss.

3. Linear Regression
解析：

# -*- coding: utf-8 -*-import tensorflow as tfimport numpy as npimport matplotlib.pyplot as pltrng = np.random# Parameterslearning_rate = 0.01# training_epochs = 1000training_epochs = 20# display_step = 50display_step = 5# Training Datatrain_X = np.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167,                         7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1])train_Y = np.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221,                         2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3])n_samples = train_X.shape[0]# tf Graph InputX = tf.placeholder("float")Y = tf.placeholder("float")# Set model weightsW = tf.Variable(rng.randn(), name="weight")b = tf.Variable(rng.randn(), name="bias")# Construct a linear modelpred = tf.add(tf.multiply(X, W), b)# Mean squared errorcost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)# Gradient descentoptimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)# Initialize the variables (i.e. assign their default value)init = tf.global_variables_initializer()# Start trainingwith tf.Session() as sess:    sess.run(init)    # Fit all training data    for epoch in range(training_epochs):        for (x, y) in zip(train_X, train_Y):            sess.run(optimizer, feed_dict={X: x, Y: y})        #Display logs per epoch step        if (epoch+1) % display_step == 0:            c = sess.run(cost, feed_dict={X: train_X, Y:train_Y})            print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \                "W=", sess.run(W), "b=", sess.run(b)    print "Optimization Finished!"    training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})    print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n'    #Graphic display    plt.plot(train_X, train_Y, 'ro', label='Original data')    plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')    plt.legend()    plt.show()

（1）training_epochs：指的是算法把全部数据集过一遍的数目。

（2）MSE（Mean Squared Error）：cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)，均方误差方程本来没

有分母的2，应该是为了在求导数的时候，将差平方的指数2抵消掉，无论如何常数都不会影响损失值的刻画。

4. Logistic Regression

解析：

# -*- coding: utf-8 -*-import tensorflow as tf# Import MINST datafrom tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("data/", one_hot=True)# Parameterslearning_rate = 0.01# training_epochs = 25training_epochs = 2batch_size = 100batch_size = 100display_step = 1# tf Graph Inputx = tf.placeholder(tf.float32, [None, 784])  # mnist data image of shape 28*28=784y = tf.placeholder(tf.float32, [None, 10])  # 0-9 digits recognition => 10 classes# Set model weightsW = tf.Variable(tf.zeros([784, 10]))b = tf.Variable(tf.zeros([10]))# Construct modelpred = tf.nn.softmax(tf.matmul(x, W) + b)  # Softmax# Minimize error using cross entropycost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))# Gradient Descentoptimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)# Initialize the variables (i.e. assign their default value)init = tf.global_variables_initializer()# Start trainingwith tf.Session() as sess:    sess.run(init)    # Training cycle    for epoch in range(training_epochs):        avg_cost = 0.        total_batch = int(mnist.train.num_examples / batch_size)        # Loop over all batches        for i in range(total_batch):            batch_xs, batch_ys = mnist.train.next_batch(batch_size)            # Fit training using batch data            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,                                                          y: batch_ys})            # Compute average loss            avg_cost += c / total_batch        # Display logs per epoch step        if (epoch + 1) % display_step == 0:            print "Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)    print "Optimization Finished!"    # Test model    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))    # Calculate accuracy for 3000 examples    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    print "Accuracy:", accuracy.eval({x: mnist.test.images[:3000], y: mnist.test.labels[:3000]})

（1）交叉熵损失函数：cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))

（2）tf.reduce_mean：Computes the mean of elements across dimensions of a tensor.

（3）模型权重W和b初始化：在代码中将W和b初始化为0，当然也可以初始化为其它数值，比如标准正太分布等。

5. Nearest Neighbor

解析：

# -*- coding: utf-8 -*-import numpy as npimport tensorflow as tf# Import MINST datafrom tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("data/", one_hot=True)# In this example, we limit mnist dataXtr, Ytr = mnist.train.next_batch(5000)  # 5000 for training (nn candidates)Xte, Yte = mnist.test.next_batch(200)  # 200 for testing# tf Graph Inputxtr = tf.placeholder("float", [None, 784])xte = tf.placeholder("float", [784])# Nearest Neighbor calculation using L1 Distance# Calculate L1 Distancedistance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.negative(xte))), reduction_indices=1)# Prediction: Get min distance index (Nearest neighbor)pred = tf.arg_min(distance, 0)accuracy = 0.# Initialize the variables (i.e. assign their default value)init = tf.global_variables_initializer()# Start trainingwith tf.Session() as sess:    sess.run(init)    # loop over test data    for i in range(len(Xte)):        # Get nearest neighbor        nn_index = sess.run(pred, feed_dict={xtr: Xtr, xte: Xte[i, :]})        # Get nearest neighbor class label and compare it to its true label        print "Test", i, "Prediction:", np.argmax(Ytr[nn_index]), \            "True Class:", np.argmax(Yte[i])        # Calculate accuracy        if np.argmax(Ytr[nn_index]) == np.argmax(Yte[i]):            accuracy += 1. / len(Xte)    print "Done!"    print "Accuracy:", accuracy

（1）曼哈顿距离对应L1-范数，即||x||为x向量各个元素绝对值之和。

6. K-Means

解析：

# -*- coding: utf-8 -*-from __future__ import print_functionimport numpy as npimport tensorflow as tffrom tensorflow.contrib.factorization import KMeans# Ignore all GPUs, tf random forest does not benefit from it.import osos.environ["CUDA_VISIBLE_DEVICES"] = ""# Import MNIST datafrom tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("data/", one_hot=True)full_data_x = mnist.train.images# Parameters# num_steps = 50  # Total steps to trainnum_steps = 20  # Total steps to trainbatch_size = 1024  # The number of samples per batchk = 25  # The number of clustersnum_classes = 10  # The 10 digitsnum_features = 784  # Each image is 28x28 pixels# Input imagesX = tf.placeholder(tf.float32, shape=[None, num_features])# Labels (for assigning a label to a centroid and testing)Y = tf.placeholder(tf.float32, shape=[None, num_classes])# K-Means Parameterskmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine', use_mini_batch=True)# Build KMeans graph(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op, train_op) = kmeans.training_graph()cluster_idx = cluster_idx[0]  # fix for cluster_idx being a tupleavg_distance = tf.reduce_mean(scores)# Initialize the variables (i.e. assign their default value)init_vars = tf.global_variables_initializer()# Start TensorFlow sessionsess = tf.Session()# Run the initializersess.run(init_vars, feed_dict={X: full_data_x})sess.run(init_op, feed_dict={X: full_data_x})# Trainingfor i in range(1, num_steps + 1):    _, d, idx = sess.run([train_op, avg_distance, cluster_idx], feed_dict={X: full_data_x})    if i % 10 == 0 or i == 1:        print("Step %i, Avg Distance: %f" % (i, d))# Assign a label to each centroid# Count total number of labels per centroid, using the label of each training# sample to their closest centroid (given by 'idx')counts = np.zeros(shape=(k, num_classes))for i in range(len(idx)):    counts[idx[i]] += mnist.train.labels[i]# Assign the most frequent label to the centroidlabels_map = [np.argmax(c) for c in counts]labels_map = tf.convert_to_tensor(labels_map)# Evaluation ops# Lookup: centroid_id -> labelcluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)# Compute accuracycorrect_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32))accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))# Test Modeltest_x, test_y = mnist.test.images, mnist.test.labelsprint("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))

7. Random Forest

解析：

# -*- coding: utf-8 -*-from __future__ import print_functionimport tensorflow as tffrom tensorflow.contrib.tensor_forest.python import tensor_forest# Ignore all GPUs, tf random forest does not benefit from it.import osos.environ["CUDA_VISIBLE_DEVICES"] = ""# Import MNIST datafrom tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("data/", one_hot=False)# Parameters# num_steps = 500  # Total steps to trainnum_steps = 100  # Total steps to trainbatch_size = 1024  # The number of samples per batchnum_classes = 10  # The 10 digitsnum_features = 784  # Each image is 28x28 pixelsnum_trees = 10max_nodes = 1000# Input and Target dataX = tf.placeholder(tf.float32, shape=[None, num_features])# For random forest, labels must be integers (the class id)Y = tf.placeholder(tf.int32, shape=[None])# Random Forest Parametershparams = tensor_forest.ForestHParams(num_classes=num_classes,                                      num_features=num_features,                                      num_trees=num_trees,                                      max_nodes=max_nodes).fill()# Build the Random Forestforest_graph = tensor_forest.RandomForestGraphs(hparams)# Get training graph and losstrain_op = forest_graph.training_graph(X, Y)loss_op = forest_graph.training_loss(X, Y)# Measure the accuracyinfer_op = forest_graph.inference_graph(X)correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))# Initialize the variables (i.e. assign their default value)init_vars = tf.global_variables_initializer()# Start TensorFlow sessionsess = tf.Session()# Run the initializersess.run(init_vars)# Trainingfor i in range(1, num_steps + 1):    # Prepare Data    # Get the next batch of MNIST data (only images are needed, not labels)    batch_x, batch_y = mnist.train.next_batch(batch_size)    _, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})    if i % 50 == 0 or i == 1:        acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})        print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))# Test Modeltest_x, test_y = mnist.test.images, mnist.test.labelsprint("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))

参考文献：

[1] 详解Word2vec之Skip-Gram模型（结构篇）：https://www.leiphone.com/news/201706/PamWKpfRFEI42McI.html

[2] 详解Word2vec之Skip-Gram模型（训练篇）: https://www.leiphone.com/news/201706/eV8j3Nu8SMqGBnQB.html

[3] 详解Word2vec之Skip-Gram模型（实现篇）：https://www.leiphone.com/news/201706/QprrvzsrZCl4S2lw.html

[4] Tensorflow的NCE-Loss的实现和word2vec：http://www.jianshu.com/p/fab82fa53e16

阅读全文

1 0