Simple Recurrent Unit For Sentence Classification tensorflow实现

来源:互联网 发布:简明python教程豆瓣 编辑:程序博客网 时间:2024/06/13 03:51

网上看到一篇文章关于SRU的原理以及实现,不过是基于pytorch,地址如下:

https://mp.weixin.qq.com/s/2I9_cF3F5MO3l5rE1VY2ng


在这里用tensorflow来实现,先看下SRUCell实现:

  1. import math
  2. import numpy as np
  3. import tensorflow as tf
  4. from tensorflow.python.ops.rnn_cell import RNNCell
  5. class SRUCell(RNNCell):
  6. def __init__(self, num_units):
  7. self.num_units = num_units
  8. @property
  9. def state_size(self):
  10. return (self.num_units, self.num_units)
  11. @property
  12. def output_size(self):
  13. return self.num_units
  14. def __call__(self, x, state, scope=None):
  15. with tf.variable_scope(scope or type(self).__name__):
  16. c, _ = state
  17. x_size = x.get_shape().as_list()[1] # x: [1, x_size], x_size == num_units
  18. W_u = tf.get_variable('W_u',
  19. [x_size, 3 * self.num_units])
  20. xh = tf.matmul(x, W_u) # [1, 3 * self.num_units]
  21. tx, f, r = tf.split(xh, 3, 1) # [1, self.num_units] * 3
  22. b_f = tf.get_variable('b_f', [self.num_units])
  23. b_r = tf.get_variable('b_r', [self.num_units])
  24. f = tf.sigmoid(f + b_f)
  25. r = tf.sigmoid(r + b_r)
  26. new_c = f * c + (1 - f) * tx # element wise - multiply
  27. new_h = r * tf.tanh(new_c) + (1 - r) * x
  28. return new_h, (new_c, new_h)


提供两种方式来实现,看下面:

  1. import tensorflow as tf
  2. from SRU_tensorflow import SRUCell
  3. class TextRnn(object):
  4. def __init__(self,config):
  5. self.config=config
  6. self.input_x=tf.placeholder(tf.int32,[None,self.config.seq_length],name='input_x')
  7. self.input_y=tf.placeholder(tf.float32,[None,self.config.num_classes],name='input_y')
  8. self.keep_prob=tf.placeholder(tf.float32,name='keep_prob')
  9. self.isbirnn=self.config.isbirnn
  10. # if self.isbirnn:
  11. # self.birnn()
  12. # else:
  13. # self.rnn()
  14. if self.isbirnn:
  15. self.bisru()
  16. else:
  17. self.sru()
  18. def input_embedding(self):
  19. """词嵌套
  20. 这里先把指定gpu的程序去掉,线上用cpu部署,指定gpu模型会报错
  21. """
  22. #with tf.device('/gpu:0'):
  23. embedding=tf.get_variable('embedding',[self.config.vocab_size,self.config.embedding_dim])
  24. _inputs=tf.nn.embedding_lookup(embedding,self.input_x)
  25. return _inputs
  26. def rnn(self):
  27. def lstm_cell():
  28. """lstm核"""
  29. return tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
  30. def gru_cell():
  31. """gru核"""
  32. return tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
  33. def dropout():
  34. """为每一个rnn后面加一个dropout层"""
  35. if (self.config.rnn=='lstm'):
  36. cell=lstm_cell()
  37. else:
  38. cell=gru_cell()
  39. return tf.contrib.rnn.DropoutWrapper(cell,
  40. output_keep_prob=self.keep_prob)
  41. embedding_inputs=self.input_embedding()
  42. with tf.name_scope("rnn"):
  43. cells=[dropout() for _ in range(self.config.num_layers)]
  44. """
  45. 定义多层rnn
  46. """
  47. rnn_cell=tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=True)
  48. _outputs,_=tf.nn.dynamic_rnn(cell=rnn_cell,inputs=embedding_inputs,dtype=tf.float32)
  49. """
  50. 这里例如 _outputs size is[30,100,128]
  51. lastsize变为 [30,128]
  52. """
  53. last= _outputs[:, -1, :] # 取最后一个时序输出作为结果
  54. with tf.name_scope("score"):
  55. # 全连接层,后面接dropout以及relu激活
  56. fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1")
  57. fc = tf.contrib.layers.dropout(fc, self.keep_prob)
  58. fc = tf.nn.relu(fc)
  59. #分类器
  60. self.logits=tf.layers.dense(fc,self.config.num_classes,name="fc2")
  61. self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
  62. tf.add_to_collection('pred_network', self.pred_y)
  63. with tf.name_scope("loss"):
  64. cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
  65. self.loss=tf.reduce_mean(cross_entropy)
  66. with tf.name_scope("optimize"):
  67. # 优化器
  68. optimizer = tf.train.AdamOptimizer(
  69. learning_rate=self.config.learning_rate)
  70. self.optim = optimizer.minimize(self.loss)
  71. with tf.name_scope("accuracy"):
  72. # 准确率
  73. correct_pred = tf.equal(tf.argmax(self.input_y, 1),
  74. tf.argmax(self.pred_y, 1))
  75. self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  76. def birnn(self):
  77. def lstm_cell():
  78. """lstm核"""
  79. cell_fw=tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
  80. cell_bw=tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
  81. return cell_fw,cell_bw
  82. def gru_cell():
  83. """gru核"""
  84. cell_fw= tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
  85. cell_bw = tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
  86. return cell_fw,cell_bw
  87. embedding_inputs=self.input_embedding()
  88. with tf.name_scope("birnn"):
  89. if (self.config.rnn == 'lstm'):
  90. cell_fw, cell_bw=lstm_cell()
  91. else:
  92. cell_fw, cell_fw=gru_cell()
  93. outputs, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_fw, embedding_inputs, dtype=tf.float32)
  94. outputs = tf.concat(outputs, 2) # 取最后一个时序输出作为结果
  95. last = tf.transpose(outputs, [1, 0, 2])[-1]
  96. with tf.name_scope("score"):
  97. # 全连接层,后面接dropout以及relu激活
  98. fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1",
  99. kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
  100. ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda))
  101. fc = tf.contrib.layers.dropout(fc, self.keep_prob)
  102. fc = tf.nn.relu(fc)
  103. #分类器
  104. self.logits=tf.layers.dense(fc,self.config.num_classes,kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
  105. ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda),name="fc2")
  106. self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
  107. tf.add_to_collection('pred_network', self.pred_y)
  108. with tf.name_scope("loss"):
  109. cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
  110. self.loss=tf.reduce_mean(cross_entropy)
  111. with tf.name_scope("optimize"):
  112. # 优化器
  113. optimizer = tf.train.AdamOptimizer(
  114. learning_rate=self.config.learning_rate)
  115. self.optim = optimizer.minimize(self.loss)
  116. with tf.name_scope("accuracy"):
  117. # 准确率
  118. correct_pred = tf.equal(tf.argmax(self.input_y, 1),
  119. tf.argmax(self.pred_y, 1))
  120. self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  121. def sru(self):
  122. def sru_cell():
  123. """lstm核"""
  124. return SRUCell(self.config.hidden_dim//2)
  125. def dropout():
  126. """为每一个rnn后面加一个dropout层"""
  127. cell=sru_cell()
  128. return tf.contrib.rnn.DropoutWrapper(cell,
  129. output_keep_prob=self.keep_prob)
  130. embedding_inputs=self.input_embedding()
  131. with tf.name_scope("rnn"):
  132. cells=[dropout() for _ in range(self.config.num_layers)]
  133. """
  134. 定义多层rnn
  135. """
  136. rnn_cell=tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=True)
  137. _outputs,_=tf.nn.dynamic_rnn(cell=rnn_cell,inputs=embedding_inputs,dtype=tf.float32)
  138. """
  139. 这里例如 _outputs size is[30,100,128]
  140. lastsize变为 [30,128]
  141. """
  142. last= _outputs[:, -1, :] # 取最后一个时序输出作为结果
  143. with tf.name_scope("score"):
  144. # 全连接层,后面接dropout以及relu激活
  145. fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1")
  146. fc = tf.contrib.layers.dropout(fc, self.keep_prob)
  147. fc = tf.nn.relu(fc)
  148. #分类器
  149. self.logits=tf.layers.dense(fc,self.config.num_classes,name="fc2")
  150. self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
  151. tf.add_to_collection('pred_network', self.pred_y)
  152. with tf.name_scope("loss"):
  153. cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
  154. self.loss=tf.reduce_mean(cross_entropy)
  155. with tf.name_scope("optimize"):
  156. # 优化器
  157. optimizer = tf.train.AdamOptimizer(
  158. learning_rate=self.config.learning_rate)
  159. self.optim = optimizer.minimize(self.loss)
  160. with tf.name_scope("accuracy"):
  161. # 准确率
  162. correct_pred = tf.equal(tf.argmax(self.input_y, 1),
  163. tf.argmax(self.pred_y, 1))
  164. self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  165. def bisru(self):
  166. def sru():
  167. """lstm核"""
  168. cell_fw=SRUCell(self.config.hidden_dim//2)
  169. cell_bw=SRUCell(self.config.hidden_dim//2)
  170. return cell_fw,cell_bw
  171. embedding_inputs=self.input_embedding()
  172. with tf.name_scope("birnn"):
  173. cell_fw, cell_bw=sru()
  174. outputs, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_fw, embedding_inputs, dtype=tf.float32)
  175. outputs = tf.concat(outputs, 2) # 取最后一个时序输出作为结果
  176. last = tf.transpose(outputs, [1, 0, 2])[-1]
  177. with tf.name_scope("score"):
  178. # 全连接层,后面接dropout以及relu激活
  179. fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1",
  180. kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
  181. ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda))
  182. fc = tf.contrib.layers.dropout(fc, self.keep_prob)
  183. fc = tf.nn.relu(fc)
  184. #分类器
  185. self.logits=tf.layers.dense(fc,self.config.num_classes,kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
  186. ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda),name="fc2")
  187. self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
  188. tf.add_to_collection('pred_network', self.pred_y)
  189. with tf.name_scope("loss"):
  190. cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
  191. self.loss=tf.reduce_mean(cross_entropy)
  192. with tf.name_scope("optimize"):
  193. # 优化器
  194. optimizer = tf.train.AdamOptimizer(
  195. learning_rate=self.config.learning_rate)
  196. self.optim = optimizer.minimize(self.loss)
  197. with tf.name_scope("accuracy"):
  198. # 准确率
  199. correct_pred = tf.equal(tf.argmax(self.input_y, 1),
  200. tf.argmax(self.pred_y, 1))
  201. self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))



run sru:

  1. from rnn_model import TextRnn
  2. from configuration import TRNNConfig,Path,TCNNConfig
  3. from data_utils_cut import preocess_file,batch_iter
  4. import time
  5. import tensorflow as tf
  6. import os
  7. from datetime import timedelta
  8. #basepath="/Users/shuubiasahi/Documents/python"
  9. #noexperience
  10. #business
  11. #together
  12. basepath="/home/zhoumeixu"
  13. data_path=basepath+"/credit-tftextclassify/tensorflow/sex/cnn.txt"
  14. vocapath=basepath+"/credit-tftextclassify/tensorflow/sex/vocab.txt"
  15. modelpath=basepath+"/credit-tftextclassify/tensorflow/sex/"
  16. print(modelpath,"开始训练")
  17. def run_epoch(rnn=True):
  18. # 载入数据
  19. print('Loading data...')
  20. start_time = time.time()
  21. x_train, y_train, words = preocess_file(data_path,
  22. vocapath)
  23. if rnn:
  24. print('Using RNN model...')
  25. config = TRNNConfig()
  26. config.vocab_size = len(words)
  27. print("vocab_size is:", config.vocab_size)
  28. model = TextRnn(config)
  29. tensorboard_dir = basepath+'/boardlog'
  30. end_time = time.time()
  31. time_dif = end_time - start_time
  32. time_dif = timedelta(seconds=int(round(time_dif)))
  33. print('Time usage:', time_dif)
  34. print('Constructing TensorFlow Graph...')
  35. session = tf.Session()
  36. session.run(tf.global_variables_initializer())
  37. saver = tf.train.Saver()
  38. # 配置 tensorboard
  39. tf.summary.scalar("loss", model.loss)
  40. tf.summary.scalar("accuracy", model.acc)
  41. if not os.path.exists(tensorboard_dir):
  42. os.makedirs(tensorboard_dir)
  43. merged_summary = tf.summary.merge_all()
  44. writer = tf.summary.FileWriter(tensorboard_dir)
  45. writer.add_graph(session.graph)
  46. # 生成批次数据
  47. print('Generating batch...')
  48. batch_train = batch_iter(list(zip(x_train, y_train)),
  49. config.batch_size, config.num_epochs)
  50. def feed_data(batch):
  51. """准备需要喂入模型的数据"""
  52. x_batch, y_batch = zip(*batch)
  53. feed_dict = {
  54. model.input_x: x_batch,
  55. model.input_y: y_batch
  56. }
  57. return feed_dict, len(x_batch)
  58. def evaluate(x_, y_):
  59. """
  60. 模型评估
  61. 一次运行所有的数据会OOM,所以需要分批和汇总
  62. """
  63. batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)
  64. total_loss = 0.0
  65. total_acc = 0.0
  66. cnt = 0
  67. for batch in batch_eval:
  68. feed_dict, cur_batch_len = feed_data(batch)
  69. feed_dict[model.keep_prob] = 1.0
  70. loss, acc = session.run([model.loss, model.acc],
  71. feed_dict=feed_dict)
  72. total_loss += loss * cur_batch_len
  73. total_acc += acc * cur_batch_len
  74. cnt += cur_batch_len
  75. return total_loss / cnt, total_acc / cnt
  76. # 训练与验证
  77. print('Training and evaluating...')
  78. start_time = time.time()
  79. print_per_batch = config.print_per_batch
  80. for i, batch in enumerate(batch_train):
  81. feed_dict, _ = feed_data(batch)
  82. feed_dict[model.keep_prob] = config.dropout_keep_prob
  83. if i % 5 == 0: # 每5次将训练结果写入tensorboard scalar
  84. s = session.run(merged_summary, feed_dict=feed_dict)
  85. writer.add_summary(s, i)
  86. if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能
  87. loss_train, acc_train = session.run([model.loss, model.acc],
  88. feed_dict=feed_dict)
  89. #loss, acc = evaluate(x_val, y_val) 验证机暂时不需要
  90. # 时间
  91. end_time = time.time()
  92. time_dif = end_time - start_time
  93. time_dif = timedelta(seconds=int(round(time_dif)))
  94. msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
  95. + ' Time: {3}'
  96. print(msg.format(i + 1, loss_train, acc_train, time_dif))
  97. # if i%10==0 and i>0:
  98. # graph=tf.graph_util.convert_variables_to_constants(session,session.graph_def,["keep_prob","input_x","score/pred_y"])
  99. # tf.train.write_graph(graph,".","/Users/shuubiasahi/Desktop/tensorflow/modelsavegraph/graph.db",as_text=False)
  100. if i%500==0 and i>0:
  101. graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
  102. ["keep_prob", "input_x", "score/pred_y"])
  103. if rnn:
  104. tf.train.write_graph(graph, ".", modelpath+"graphrnn.model",
  105. as_text=False)
  106. print("rnn模型在第{0}步已经保存".format(i))
  107. else:
  108. tf.train.write_graph(graph, ".", modelpath+"graph.model",
  109. as_text=False)
  110. print("cnn模型在第{0}步已经保存".format(i))
  111. session.run(model.optim, feed_dict=feed_dict) # 运行优化
  112. # 最后在测试集上进行评估
  113. session.close()
  114. if __name__ == '__main__':
  115. #run_epoch(rnn=True)
  116. run_epoch(rnn=True)

结果:

  1. Iter: 100, Train Loss: 0.33, Train Acc: 84.38%, Time: 0:00:46
  2. Iter: 200, Train Loss: 0.24, Train Acc: 91.41%, Time: 0:01:33
  3. Iter: 300, Train Loss: 0.22, Train Acc: 90.62%, Time: 0:02:19
  4. Iter: 400, Train Loss: 0.28, Train Acc: 92.19%, Time: 0:03:04
  5. Iter: 500, Train Loss: 0.11, Train Acc: 95.31%, Time: 0:03:49
  6. Converted 11 variables to const ops.
  7. rnn模型在第500步已经保存
  8. Iter: 600, Train Loss: 0.12, Train Acc: 94.53%, Time: 0:04:35
  9. Iter: 700, Train Loss: 0.086, Train Acc: 97.66%, Time: 0:05:21
  10. Iter: 800, Train Loss: 0.21, Train Acc: 93.75%, Time: 0:06:07
  11. Iter: 900, Train Loss: 0.12, Train Acc: 96.09%, Time: 0:06:52
  12. Iter: 1000, Train Loss: 0.086, Train Acc: 97.66%, Time: 0:07:38
训练过程中并没有发现在论文提到的快很多倍,不知道是什么原因,还是我写错了,求各位大神不吝赐教。
阅读全文
0 0
原创粉丝点击