静态与动态RNN函数比较

来源：互联网发布：手机信号干扰软件编辑：程序博客网时间：2024/06/07 06:11

静态与动态RNN比较

Tensorflow中提供了两种创建RNN结构的函数:

tf.nn.static_rnn
tf.nn.dynamic_rnn

两个函数的区别如下[1]：

tf.nn.static_rnn creates an unrolled graph for a fixed RNN length. That means, if you call tf.nn.static_rnn with inputs having 200 time steps you are creating a static graph with 200 RNN steps. First, graph creation is slow. Second, you’re unable to pass in longer sequences (> 200) than you’ve originally specified.tf.nn.dynamic_rnn solves this. It uses a tf.While loop to dynamically construct the graph when it is executed. That means graph creation is faster and you can feed batches of variable size.

import tensorflow as tffrom tensorflow.examples.tutorials.mnist import input_data

file_dir = '/Users/wangruidong/Documents/MachineLearning/Dataset/MNIST/'mnist = input_data.read_data_sets(file_dir, one_hot=True)train_img = mnist.train.imagestrain_labels = mnist.train.labelsprint(train_img.shape)print(train_labels.shape)print('Load data finish.')

Extracting /Users/wangruidong/Documents/MachineLearning/Dataset/MNIST/train-images-idx3-ubyte.gzExtracting /Users/wangruidong/Documents/MachineLearning/Dataset/MNIST/train-labels-idx1-ubyte.gzExtracting /Users/wangruidong/Documents/MachineLearning/Dataset/MNIST/t10k-images-idx3-ubyte.gzExtracting /Users/wangruidong/Documents/MachineLearning/Dataset/MNIST/t10k-labels-idx1-ubyte.gz(55000, 784)(55000, 10)Load data finish.

input_dim = 28time_step = 28output_dim = 10batch_size = 15n_hidden = 128 # LSTM 输出维度n_epoch = 20 # 总训练轮数n_batches = 10display_step = 2weights = {    'w_out': tf.Variable(tf.random_normal([n_hidden, output_dim], stddev=0.1)),}bias = {    'b_out': tf.Variable(tf.random_normal([output_dim], stddev=0.1))}x = tf.placeholder(tf.float32, [None, time_step, input_dim])y = tf.placeholder(tf.float32, [None, output_dim])

tf.nn.static_rnn

def static_rnn(x, weights, bias):    print('==========> static_rnn')    # initial x shape = [batch_size, time_step, features]    print('Initia x shape = ', x.shape)    x = tf.transpose(x, [1, 0, 2]) # [time_step, batch_size, features]    x = tf.reshape(x, [-1, input_dim]) # [time_step*batch_size, features]    # split_x 是list类型，每个元素的shape为[batch_size, features]    # split_x的长度是 time_step    split_x = tf.split(x, time_step, axis=0)    print('type(split_x) = ', type(split_x) ) # list    print('len(split_x) = ', len(split_x) ) # time_step    print('type(split_x[0]) = ', type(split_x[0]) ) # tensor    print('split_x[0].shape = ', split_x[0].shape ) # [batch_size, features]    with tf.variable_scope('static_rnn') as scope:        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_hidden, forget_bias=1.0, reuse=tf.AUTO_REUSE)        lstm_o, lstm_s = tf.nn.static_rnn(lstm_cell,split_x, dtype=tf.float32)    # lstm_o 的shape和split_x类似    print('type(lstm_o) = ', type(lstm_o)) # list    print('len(lstm_o) = ', len(lstm_o)) # time_step    print('type(lstm_o[0]) = ', type(lstm_o[0])) # tensor    print('lstm_o[0].shape = ', lstm_o[0].shape) # [batch_size, n_hidden]    out = tf.add(tf.matmul(lstm_o[-1], weights['w_out']), bias['b_out'])    return out# ==========> loss and accuracypre = static_rnn(x, weights, bias)cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pre))optm = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)_equal = tf.equal(tf.argmax(pre, axis=1), tf.argmax(y, axis=1))accuracy = tf.reduce_mean(tf.cast(_equal, tf.float32))sess = tf.Session()sess.run(tf.global_variables_initializer())for epoch in range(n_epoch):    avg_loss, avg_acc = 0.0, 0.0    for i in range(n_batches):        batch_x, batch_y = mnist.train.next_batch(batch_size)        batch_x = batch_x.reshape(batch_size, time_step, input_dim)        feed = {x:batch_x, y:batch_y}        sess.run(optm, feed_dict=feed)        avg_loss += sess.run(cost, feed_dict=feed)        avg_acc += sess.run(accuracy, feed_dict=feed)    avg_loss /= n_batches    avg_acc /= n_batches    if epoch % display_step == 0 and epoch != 0:        print('Epoch %d/%d \t Train acc = %.4f loss = %.4f' % (epoch, n_epoch, avg_acc, avg_loss))print('Finish')

==========> static_rnnInitia x shape =  (?, 28, 28)type(split_x) =  <class 'list'>len(split_x) =  28type(split_x[0]) =  <class 'tensorflow.python.framework.ops.Tensor'>split_x[0].shape =  (?, 28)type(lstm_o) =  <class 'list'>len(lstm_o) =  28type(lstm_o[0]) =  <class 'tensorflow.python.framework.ops.Tensor'>lstm_o[0].shape =  (?, 128)Epoch 2/20   Train acc = 0.0533 loss = 2.3052Epoch 4/20   Train acc = 0.0467 loss = 2.2965Epoch 6/20   Train acc = 0.0667 loss = 2.3039Epoch 8/20   Train acc = 0.1333 loss = 2.2921Epoch 10/20      Train acc = 0.1400 loss = 2.2758Epoch 12/20      Train acc = 0.0733 loss = 2.2959Epoch 14/20      Train acc = 0.0933 loss = 2.2871Epoch 16/20      Train acc = 0.1467 loss = 2.2784Epoch 18/20      Train acc = 0.0867 loss = 2.2875Finish

tf.nn.dynamic_rnn

def dynamic_rnn(x, weights, bias):    print('==========> dynamic_rnn')    # initial x shape = [batch_size, time_step, features]    print('Initial x shape = ', x.shape)    with tf.variable_scope('dynamic_rnn') as scope:        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_hidden, forget_bias=1.0, reuse=tf.AUTO_REUSE)        lstm_o, lstm_s = tf.nn.dynamic_rnn(lstm_cell,x, dtype=tf.float32, time_major=False)    # lstm_o 的shape和split_x类似    print('type(lstm_o) = ', type(lstm_o)) # tensor    print('lstm_o.shape = ', lstm_o.shape) # [batch_size, time_step, n_hidden]    print('lstm_o[:,-1,:] shape = ', lstm_o[:,-1,:].shape) # [batch_size,n_hidden]    out = tf.add(tf.matmul(lstm_o[:,-1,:], weights['w_out']), bias['b_out'])    return out# ==========> loss and accuracypre = dynamic_rnn(x, weights, bias)cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pre))optm = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)_equal = tf.equal(tf.argmax(pre, axis=1), tf.argmax(y, axis=1))accuracy = tf.reduce_mean(tf.cast(_equal, tf.float32))sess = tf.Session()sess.run(tf.global_variables_initializer())for epoch in range(n_epoch):    avg_loss, avg_acc = 0.0, 0.0    for i in range(n_batches):        batch_x, batch_y = mnist.train.next_batch(batch_size)        batch_x = batch_x.reshape(batch_size, time_step, input_dim)        feed = {x:batch_x, y:batch_y}        sess.run(optm, feed_dict=feed)        avg_loss += sess.run(cost, feed_dict=feed)        avg_acc += sess.run(accuracy, feed_dict=feed)    avg_loss /= n_batches    avg_acc /= n_batches    if epoch % display_step == 0 and epoch != 0:        print('Epoch %d/%d \t Train acc = %.4f loss = %.4f' % (epoch, n_epoch, avg_acc, avg_loss))print('Finish')

==========> dynamic_rnnInitial x shape =  (?, 28, 28)type(lstm_o) =  <class 'tensorflow.python.framework.ops.Tensor'>lstm_o.shape =  (?, 28, 128)lstm_o[:,-1,:] shape =  (?, 128)Epoch 2/20   Train acc = 0.1267 loss = 2.2809Epoch 4/20   Train acc = 0.1933 loss = 2.2858Epoch 6/20   Train acc = 0.1000 loss = 2.3058Epoch 8/20   Train acc = 0.2267 loss = 2.2720Epoch 10/20      Train acc = 0.2267 loss = 2.2750Epoch 12/20      Train acc = 0.2733 loss = 2.2610Epoch 14/20      Train acc = 0.2000 loss = 2.2719Epoch 16/20      Train acc = 0.2333 loss = 2.2611Epoch 18/20      Train acc = 0.2533 loss = 2.2503Finish

Reference

[1] https://stackoverflow.com/questions/39734146/whats-the-difference-between-tensorflow-dynamic-rnn-and-rnn

阅读全文

0 0