Tensorflow: recurrent neural network char-level 0
来源:互联网 发布:阿里云是基于openstack 编辑:程序博客网 时间:2024/05/27 16:42
import numpy as npimport tensorflow as tfimport matplotlib.pyplot as pltfrom tensorflow.models.rnn import rnn, rnn_cellfrom tensorflow.models.rnn import seq2seqimport collections# @karpathydata = open('ThreeMusketeers.txt').read()chars = list(set(data))data_size , vocab_size = len(data), len(chars)print 'data has %d characters, %d unique.' %(data_size, vocab_size)char_to_ix = {ch:i for i, ch in enumerate(chars)}ix_to_char = {i:ch for i, ch in enumerate(chars)}counter = collections.Counter(data)counter = sorted(counter.items(), key=lambda x:-x[1])for i in xrange(5): print counter[i]corpus = [char_to_ix[c] for c in data]batch_size = 50seq_length = 200num_batches = len(corpus) / (batch_size*seq_length)print 'num_batches: ', num_batchescorpus_reduced_0 = corpus[0 : num_batches*batch_size*seq_length]corpus_reduced_1 = corpus[1 : num_batches*batch_size*seq_length+1]xdata = np.copy(corpus_reduced_0)ydata = np.copy(corpus_reduced_1)# ydata[:-1] = xdata[1:]# ydata[-1] = xdata[0]xbatches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)ybatches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)print 'numbers of xbatches:', len(xbatches)print type(xbatches[0]), xbatches[0].shapehidden_size = 128num_layers = 2max_grad_norm = 5.0an_lstm = rnn_cell.BasicLSTMCell(hidden_size)multi_lstm = rnn_cell.MultiRNNCell([an_lstm] * num_layers)x = tf.placeholder(tf.int32, [batch_size, seq_length])y = tf.placeholder(tf.int32, [batch_size, seq_length])init_state = multi_lstm.zero_state(batch_size, tf.float32)with tf.variable_scope('rnn'): softmax_w = tf.get_variable('softmax_w', [hidden_size, vocab_size]) softmax_b = tf.get_variable('softmax_b', [vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding', [vocab_size, hidden_size]) inputs = tf.nn.embedding_lookup(embedding, x) inputs = tf.split(1, seq_length, inputs) inputs = [tf.squeeze(input_, [1]) for input_ in inputs]def loop(prev): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.arg_max(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol)outputs, last_state = seq2seq.rnn_decoder(inputs, init_state, multi_lstm, loop_function=None, scope='rnn')# outputs is a list of 2D-Tensor with shape [batch_size , hidden_size]# the len(outputs)) is seq_length# first, hiddenlayer outputs belong to same sequence should be concatenated together out_conca = tf.concat(1, outputs) # [batch_size, hidden_size*seq_length]# second, to get the softmax prob and add the fc layer, the out_conca's second dim should# be reshaped to the size: hidden_size# [batch_size*seq_length, hidden_size]output = tf.reshape(out_conca, [-1, hidden_size])# [batch_size*seq_length, vocab_size]score = tf.nn.xw_plus_b(output, softmax_w, softmax_b)# [batch_size*seq_length, vocab_size]probs = tf.nn.softmax(score)loss = seq2seq.sequence_loss_by_example([score], [tf.reshape(y, [-1])], [tf.ones([batch_size*seq_length])], vocab_size)cost = tf.reduce_sum(loss)/ batch_size/seq_length######################################################################lr = 0.001max_grad_norm = 5tvars = tf.trainable_variables()grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)# opt = tf.train.GradientDescentOptimizer(lr)opt = tf.train.AdamOptimizer(lr)optimizer = opt.apply_gradients(zip(grads, tvars))######################################################################init = tf.initialize_all_variables()sess = tf.Session()sess.run(init)epoch = 20batch_size = 100snapshot = 5save_step = 1saver = tf.train.Saver()loss_cache = []for ep in xrange(epoch): avg_loss = 0 state = sess.run(init_state) for nb in xrange(num_batches): rand_idx = np.random.randint(num_batches) batch_x, batch_y = xbatches[rand_idx], ybatches[rand_idx] train_loss, state, _ = sess.run([cost, last_state, optimizer], feed_dict={x:batch_x, y:batch_y, init_state: state}) avg_loss += train_loss/num_batches loss_cache.append(avg_loss) if ep % snapshot ==0: print 'Epoch: %d/%d, loss: %.4f'%(ep, epoch, avg_loss) if ep % save_step == 0: saver.save(sess, save_path='net_snapshot/rnn_char_net_tfmodel', global_step=ep) plt.figure(1)plt.plot(range(len(loss_cache)), loss_cache, 'b-', label='loss')plt.legend(loc = 'upper right')plt.show()
0 1
- Tensorflow: recurrent neural network char-level 0
- Tensorflow: recurrent neural network char-level 1
- Tensorflow: recurrent neural network (mnist basic)
- Recurrent Neural Network (RNN)
- lecture10,Recurrent Neural Network
- CS231N-10-Recurrent Neural Network
- tensorflow 的 Recurrent Neural Networks
- TensorFlow Neural Network Lab
- Tensorflow API: Neural network
- tensorflow编程: Neural Network
- Recurrent Neural Network 学习之路
- 回归神经网络RNN(Recurrent Neural network)
- 机器学习: Python with Recurrent Neural Network
- 【论文笔记】Recurrent Neural Network Regularization
- 详解循环神经网络(Recurrent Neural Network)
- 详解循环神经网络(Recurrent Neural Network)
- 循环神经网络(Recurrent Neural Network)
- Tensorflow: Convolutional Neural Network Basic
- JavaScript之全面理解面向对象的JS
- eclipse自动补全的设置
- IOS 如何选择delegate、notification、KVO?
- 表连接例子
- MyBatis XML 映射配置文件
- Tensorflow: recurrent neural network char-level 0
- JZ2440 启动NFS网络文件系统_初试led驱动
- iTextSharp 绘制字体中特殊符号到PDF
- CUDA编程(一):背景与安装
- 图解Android View的scrollTo(),scrollBy(),getScrollX(), getScrollY()
- AsyncTask异步任务类使用
- TableViewController中的搜索框和mj刷新配合使用
- picasso-强大的Android图片下载缓存库
- 善用iOS App中Web View控件的好处