Tensorflow: recurrent neural network char-level 0

来源:互联网 发布:阿里云是基于openstack 编辑:程序博客网 时间:2024/05/27 16:42
import numpy as npimport tensorflow as tfimport matplotlib.pyplot as pltfrom tensorflow.models.rnn import rnn, rnn_cellfrom tensorflow.models.rnn import seq2seqimport collections# @karpathydata = open('ThreeMusketeers.txt').read()chars = list(set(data))data_size , vocab_size = len(data), len(chars)print 'data has %d characters, %d unique.' %(data_size, vocab_size)char_to_ix = {ch:i for i, ch in enumerate(chars)}ix_to_char = {i:ch for i, ch in enumerate(chars)}counter = collections.Counter(data)counter = sorted(counter.items(), key=lambda x:-x[1])for i in xrange(5):    print counter[i]corpus = [char_to_ix[c] for c in data]batch_size = 50seq_length = 200num_batches = len(corpus) / (batch_size*seq_length)print 'num_batches: ', num_batchescorpus_reduced_0 = corpus[0 : num_batches*batch_size*seq_length]corpus_reduced_1 = corpus[1 : num_batches*batch_size*seq_length+1]xdata = np.copy(corpus_reduced_0)ydata = np.copy(corpus_reduced_1)# ydata[:-1] = xdata[1:]# ydata[-1] = xdata[0]xbatches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)ybatches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)print 'numbers of xbatches:', len(xbatches)print type(xbatches[0]), xbatches[0].shapehidden_size = 128num_layers = 2max_grad_norm = 5.0an_lstm = rnn_cell.BasicLSTMCell(hidden_size)multi_lstm = rnn_cell.MultiRNNCell([an_lstm] * num_layers)x = tf.placeholder(tf.int32, [batch_size, seq_length])y = tf.placeholder(tf.int32, [batch_size, seq_length])init_state = multi_lstm.zero_state(batch_size, tf.float32)with tf.variable_scope('rnn'):    softmax_w = tf.get_variable('softmax_w', [hidden_size, vocab_size])    softmax_b = tf.get_variable('softmax_b', [vocab_size])    with tf.device('/cpu:0'):        embedding = tf.get_variable('embedding', [vocab_size, hidden_size])        inputs = tf.nn.embedding_lookup(embedding, x)        inputs = tf.split(1, seq_length, inputs)        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]def loop(prev):    prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)    prev_symbol = tf.stop_gradient(tf.arg_max(prev, 1))    return tf.nn.embedding_lookup(embedding, prev_symbol)outputs, last_state = seq2seq.rnn_decoder(inputs, init_state,                                           multi_lstm,                                           loop_function=None,                                           scope='rnn')# outputs is a list of 2D-Tensor with shape [batch_size , hidden_size]# the len(outputs)) is seq_length# first, hiddenlayer outputs belong to same sequence should be concatenated together out_conca = tf.concat(1, outputs) # [batch_size, hidden_size*seq_length]# second, to get the softmax prob and add the fc layer, the out_conca's second dim should# be reshaped to the size: hidden_size# [batch_size*seq_length, hidden_size]output = tf.reshape(out_conca, [-1, hidden_size])# [batch_size*seq_length, vocab_size]score = tf.nn.xw_plus_b(output, softmax_w, softmax_b)# [batch_size*seq_length, vocab_size]probs = tf.nn.softmax(score)loss = seq2seq.sequence_loss_by_example([score],                                         [tf.reshape(y, [-1])],                                        [tf.ones([batch_size*seq_length])],                                        vocab_size)cost = tf.reduce_sum(loss)/ batch_size/seq_length######################################################################lr = 0.001max_grad_norm = 5tvars = tf.trainable_variables()grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)# opt = tf.train.GradientDescentOptimizer(lr)opt = tf.train.AdamOptimizer(lr)optimizer = opt.apply_gradients(zip(grads, tvars))######################################################################init = tf.initialize_all_variables()sess = tf.Session()sess.run(init)epoch = 20batch_size = 100snapshot = 5save_step = 1saver = tf.train.Saver()loss_cache = []for ep in xrange(epoch):    avg_loss = 0    state = sess.run(init_state)    for nb in xrange(num_batches):        rand_idx = np.random.randint(num_batches)        batch_x, batch_y = xbatches[rand_idx], ybatches[rand_idx]        train_loss, state, _ = sess.run([cost, last_state, optimizer],                                         feed_dict={x:batch_x, y:batch_y, init_state: state})        avg_loss += train_loss/num_batches    loss_cache.append(avg_loss)    if ep % snapshot ==0:        print 'Epoch: %d/%d, loss: %.4f'%(ep, epoch, avg_loss)    if ep % save_step == 0:            saver.save(sess, save_path='net_snapshot/rnn_char_net_tfmodel', global_step=ep)    plt.figure(1)plt.plot(range(len(loss_cache)), loss_cache, 'b-', label='loss')plt.legend(loc = 'upper right')plt.show()
0 1