rnn Attention网络的实现

来源:互联网 发布:中国银行淘宝卡 编辑:程序博客网 时间:2024/05/29 10:55

在gru、lstm网络中加入Attention机制,具体看看网络实现:


utils方法:

  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. from collections import Counter
  4. import tensorflow.contrib.keras as kr
  5. import numpy as np
  6. import os
  7. import codecs
  8. import tensorflow as tf
  9. def _read_file(filename):
  10. """读取文件数据"""
  11. counters=[]
  12. labels=[]
  13. #改用codecs模块,2.x open函数不支持utf-8编码,增加代码健壮性
  14. with codecs.open(filename,'r',encoding='utf-8') as f:
  15. for line in f.readlines():
  16. try:
  17. label,contet=line.strip().split('\t')
  18. counters.append(contet.strip().split(" "))
  19. labels.append(label)
  20. except Exception as e:
  21. pass
  22. return counters,labels
  23. def _read_vocab(filename):
  24. """读取词汇列别"""
  25. words=list(map(lambda line:line.strip(),codecs.open(filename,'r',encoding='utf-8').readlines()))
  26. word_to_id=dict(zip(words,range(len(words))))
  27. return words,word_to_id
  28. def read_vocab_predict(filename):
  29. """读取词汇"""
  30. words = list(map(lambda line: line.strip(), codecs.open(filename, 'r', encoding='utf-8').readlines()))
  31. word_to_id = dict(zip(words, range(len(words))))
  32. return word_to_id
  33. def _read_category():
  34. """返回一个分类目标分类的结果"""
  35. categories=["0","1"]
  36. cat_to_id=dict(zip(categories,range(len(categories))))
  37. return categories,cat_to_id
  38. def to_words(content,words):
  39. """降id表示的内容转换成文字"""
  40. return ''.join(words[x] for x in content)
  41. def _file_to_ids(filename,word_to_id,max_len=300):
  42. """将文件转换为id表示"""
  43. _,cat_to_id=_read_category()
  44. contents,labels=_read_file(filename)
  45. data_id=[]
  46. label_id=[]
  47. for i in range(len(contents)):
  48. data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
  49. label_id.append(cat_to_id[labels[i]])
  50. # 使用keras提供的pad_sequences来将文本pad为固定长度
  51. x_pad=kr.preprocessing.sequence.pad_sequences(data_id,max_len)
  52. y_pad=kr.utils.to_categorical(label_id)
  53. return x_pad, y_pad
  54. def preocess_file(data_path,vocapath,seq_length=300):
  55. """一次性返回所有的数据"""
  56. words,word_to_id=_read_vocab(vocapath)
  57. x_train, y_train = _file_to_ids(data_path, word_to_id, seq_length)
  58. # x_test,y_test=_file_to_ids(os.path.join(data_path,
  59. # 'cnews.test.txt'), word_to_id, seq_length)
  60. # x_val, y_val = _file_to_ids(os.path.join(data_path,
  61. # 'cnews.val.txt'), word_to_id, seq_length)
  62. return x_train, y_train,words
  63. # def preocess_file_test(data_path="/Users/shuubiasahi/Desktop/rnn.txt",seq_length=300):
  64. # """一次性返回所有的数据"""
  65. # words,word_to_id=_read_vocab("vocab_cnews.txt")
  66. # print("words length is:", len(words))
  67. # print("word_to_id length is:",len(word_to_id))
  68. # x_train,y_train=_file_to_ids_test(data_path,word_to_id,seq_length)
  69. # # x_test,y_test=_file_to_ids(os.path.join(data_path,
  70. # # 'cnews.test.txt'), word_to_id, seq_length)
  71. # # x_val, y_val = _file_to_ids(os.path.join(data_path,
  72. # # 'cnews.val.txt'), word_to_id, seq_length)
  73. # return x_train, y_train,words
  74. # def _file_to_ids_test(filename,word_to_id,max_len=300):
  75. # """将文件转换为id表示"""
  76. # _,cat_to_id=_read_category()
  77. # contents,labels=_read_file(filename)
  78. #
  79. # data_id=[]
  80. # label_id=[]
  81. # for i in range(len(contents)):
  82. # data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
  83. # label_id.append(cat_to_id[labels[i]])
  84. # # 使用keras提供的pad_sequences来将文本pad为固定长度
  85. #
  86. # print("contens is:", contents)
  87. # print("data id is:",data_id)
  88. # print("labels is:", labels)
  89. # print("label id is:",label_id)
  90. # x_pad=kr.preprocessing.sequence.pad_sequences(data_id,max_len)
  91. # y_pad=kr.utils.to_categorical(label_id)
  92. # print("xpad is:",x_pad)
  93. # print("ypad is:",y_pad)
  94. # return x_pad, y_pad
  95. def file_to_ids_single(content,word_to_id,maxlen=300):
  96. contents=[]
  97. contents.append(list(content.lower()))
  98. data_id = []
  99. for i in range(len(contents)):
  100. data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
  101. #print("data_id is :",data_id)
  102. x_pad = kr.preprocessing.sequence.pad_sequences(data_id, maxlen)
  103. return x_pad
  104. def batch_iter(data,batch_size=64,num_epochs=5):
  105. """生成批次数据"""
  106. data=np.array(data)
  107. data_size=len(data)
  108. num_batchs_per_epchs=int((data_size-1)/batch_size)+1
  109. for epoch in range(num_epochs):
  110. indices=np.random.permutation(np.arange(data_size))
  111. shufflfed_data=data[indices]
  112. for batch_num in range(num_batchs_per_epchs):
  113. start_index=batch_num*batch_size
  114. end_index=min((batch_num + 1) * batch_size, data_size)
  115. yield shufflfed_data[start_index:end_index]
  116. if __name__=='__main__':
  117. """data_id is : [[266, 1548, 255]]"""
  118. words, word_to_id = _read_vocab("vocab_cnews.txt")
  119. print("len word_to_id:",len(word_to_id))
  120. result=file_to_ids_single("日你个香蕉芭乐",word_to_id=word_to_id)
  121. print(result[0][299])
  122. print(result)
  123. #build_vocab(Path.baseabusepath)
  124. # x_train, y_train, words = preocess_file()
  125. # print(x_train.shape, y_train.shape)

显示config:

  1. class AttentionConfig(object):
  2. embedding_dim = 64 # 词向量维度
  3. seq_len = 300 # 序列长度
  4. num_classes = 2 # 类别个数
  5. vocab_size = 9000 # 词汇表的大小
  6. num_rnn_layers = 2 # 隐含层的层数
  7. rnn_size = 128 # 隐藏层神经元
  8. rnn = 'gru' # lstm 或 gru
  9. keep_prob = 0.6 # dropout保留比例
  10. learning_rate = 1e-3 # 学习率
  11. batch_size = 128 # 每批训练大小
  12. num_epochs = 10 # 总迭代轮次
  13. print_per_batch = 100 # 每多少轮输出一次结果
  14. l2_reg_lambda = 0.006
  15. attention_dim=100
  16. max_grad_norm=5
  17. isgru = False


model:

  1. import tensorflow as tf
  2. class RnnAttention:
  3. def __init__(self, config):
  4. # define input variable
  5. self.config=config
  6. self.input_x = tf.placeholder(tf.int32, [None, self.config.seq_len],name="input_x")
  7. self.input_y = tf.placeholder(tf.float32,[None,self.config.num_classes],name="input_y")
  8. self.keep_prob= tf.placeholder(tf.float32, name='keep_prob')
  9. self.birnn()
  10. #self.mubirnn()
  11. def input_embedding(self):
  12. """词嵌套
  13. 这里先把指定gpu的程序去掉,线上用cpu部署,指定gpu模型会报错
  14. """
  15. # with tf.device('/gpu:0'):
  16. embeddings = tf.get_variable("embedding", [self.config.vocab_size, self.config.embedding_dim])
  17. inputs = tf.nn.embedding_lookup(embeddings, self.input_x)
  18. return inputs
  19. def birnn(self):
  20. inputs=self.input_embedding()
  21. with tf.name_scope("rnn"):
  22. def gru():
  23. rnn_cell_fw= tf.contrib.rnn.GRUCell(num_units=self.config.rnn_size)
  24. rnn_cell_cw = tf.contrib.rnn.GRUCell(num_units=self.config.rnn_size)
  25. return rnn_cell_cw,rnn_cell_fw
  26. def lstm():
  27. rnn_cell_fw = tf.contrib.rnn.LSTMCell(num_units=self.config.rnn_size)
  28. rnn_cell_cw = tf.contrib.rnn.LSTMCell(num_units=self.config.rnn_size)
  29. return rnn_cell_cw, rnn_cell_fw
  30. if self.config.isgru:
  31. rnn_cell_cw, rnn_cell_fw=gru()
  32. else:
  33. rnn_cell_cw, rnn_cell_fw=lstm()
  34. rnn_outputs,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=rnn_cell_fw,
  35. cell_bw=rnn_cell_cw,
  36. inputs=inputs,dtype=tf.float32)
  37. rnn_outputs=tf.concat(rnn_outputs,2)
  38. # An attention model
  39. with tf.name_scope("attention"):
  40. # Attention mechanism
  41. sequence_length = rnn_outputs.shape[1].value # the length of sequences processed in the antecedent RNN layer
  42. hidden_size = rnn_outputs.shape[2].value # hidden size of the RNN laye
  43. W = tf.Variable(
  44. tf.truncated_normal([hidden_size, self.config.attention_dim],
  45. stddev=0.1), name="W"
  46. )
  47. b = tf.Variable(tf.random_normal([self.config.attention_dim], stddev=0.1),
  48. name="b")
  49. u = tf.Variable(tf.random_normal([self.config.attention_dim], stddev=0.1),
  50. name="u")
  51. v = tf.tanh(tf.matmul(tf.reshape(rnn_outputs, [-1, hidden_size]), W) + tf.reshape(b, [1, -1]))
  52. vu = tf.matmul(v, tf.reshape(u, [-1, 1]))
  53. exps = tf.reshape(tf.exp(vu), [-1, sequence_length])
  54. alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
  55. # Output of Bi-gru is reduced with attention vector
  56. output = tf.reduce_sum(rnn_outputs * tf.reshape(alphas, [-1, sequence_length, 1]), 1)
  57. #增加weight的损失函数
  58. tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(W),
  59. tf.GraphKeys.REGULARIZATION_LOSSES)
  60. tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(b),
  61. tf.GraphKeys.REGULARIZATION_LOSSES)
  62. tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(u), tf.GraphKeys.REGULARIZATION_LOSSES)
  63. dropout_outputs = tf.nn.dropout(
  64. output, self.keep_prob,
  65. name="dropout")
  66. with tf.name_scope("score"):
  67. W = tf.Variable(
  68. tf.truncated_normal(
  69. [dropout_outputs.shape[1].value, self.config.num_classes], stddev=0.1
  70. ),
  71. name="W"
  72. )
  73. b = tf.Variable(tf.constant(0.1, shape=[self.config.num_classes]), name="b")
  74. tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(W), tf.GraphKeys.REGULARIZATION_LOSSES)
  75. tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(b), tf.GraphKeys.REGULARIZATION_LOSSES)
  76. self.scores = tf.nn.xw_plus_b(dropout_outputs, W, b, name="scores")
  77. self.pred_y = tf.nn.softmax(self.scores, name="pred_y")
  78. tf.add_to_collection('pred_network', self.pred_y)
  79. self.predictions = tf.argmax(self.scores, 1, name="predictions")
  80. #计算损失函数
  81. with tf.name_scope("loss"):
  82. tf.losses.softmax_cross_entropy(
  83. logits=self.scores,onehot_labels =self.input_y
  84. )
  85. self.cost =tf.losses.get_total_loss()
  86. # 优化器
  87. with tf.name_scope("optimize"):
  88. optimizer = tf.train.AdamOptimizer(
  89. learning_rate=self.config.learning_rate)
  90. self.train_op = optimizer.minimize(self.cost)
  91. # Accuracy
  92. with tf.name_scope("accuracy"):
  93. correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
  94. self.acc = tf.reduce_mean(
  95. tf.cast(correct_predictions, "float"),
  96. name="accuracy")


run:

  1. from attentionmodelrnn import RnnAttention
  2. from configuration import AttentionConfig
  3. from data_utils import preocess_file,batch_iter
  4. import time
  5. import tensorflow as tf
  6. import os
  7. from datetime import timedelta
  8. #basepath="/Users/shuubiasahi/Documents/python"
  9. #noexperience
  10. #business
  11. #together
  12. basepath="/home/zhoumeixu"
  13. data_path=basepath+"/credit-tftextclassify/tensorflow/noexperience/cnn.txt"
  14. vocapath=basepath+"/credit-tftextclassify/tensorflow/noexperience/vocab.txt"
  15. modelpath=basepath+"/credit-tftextclassify/tensorflow/noexperience/"
  16. print(modelpath,"attenion相关模型开始训练")
  17. def run_epoch(rnn=False):
  18. # 载入数据
  19. print('Loading data...')
  20. start_time = time.time()
  21. x_train, y_train, words = preocess_file(data_path,
  22. vocapath)
  23. config = AttentionConfig()
  24. if config.isgru:
  25. print('Using attention gru model...')
  26. else:
  27. print('Using attention lstm model...')
  28. config.vocab_size = len(words)
  29. print("vocab_size is:", config.vocab_size)
  30. model = RnnAttention(config)
  31. tensorboard_dir = basepath+'/boardlog'
  32. end_time = time.time()
  33. time_dif = end_time - start_time
  34. time_dif = timedelta(seconds=int(round(time_dif)))
  35. print('Time usage:', time_dif)
  36. print('Constructing TensorFlow Graph...')
  37. session = tf.Session()
  38. session.run(tf.global_variables_initializer())
  39. saver = tf.train.Saver()
  40. # 配置 tensorboard
  41. tf.summary.scalar("loss", model.cost)
  42. tf.summary.scalar("accuracy", model.acc)
  43. if not os.path.exists(tensorboard_dir):
  44. os.makedirs(tensorboard_dir)
  45. merged_summary = tf.summary.merge_all()
  46. writer = tf.summary.FileWriter(tensorboard_dir)
  47. writer.add_graph(session.graph)
  48. # 生成批次数据
  49. print('Generating batch...')
  50. batch_train = batch_iter(list(zip(x_train, y_train)),
  51. config.batch_size, config.num_epochs)
  52. def feed_data(batch):
  53. """准备需要喂入模型的数据"""
  54. x_batch, y_batch = zip(*batch)
  55. feed_dict = {
  56. model.input_x: x_batch,
  57. model.input_y: y_batch
  58. }
  59. return feed_dict, len(x_batch)
  60. def evaluate(x_, y_):
  61. """
  62. 模型评估
  63. 一次运行所有的数据会OOM,所以需要分批和汇总
  64. """
  65. batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)
  66. total_loss = 0.0
  67. total_acc = 0.0
  68. cnt = 0
  69. for batch in batch_eval:
  70. feed_dict, cur_batch_len = feed_data(batch)
  71. feed_dict[model.keep_prob] = 1.0
  72. loss, acc = session.run([model.loss, model.acc],
  73. feed_dict=feed_dict)
  74. total_loss += loss * cur_batch_len
  75. total_acc += acc * cur_batch_len
  76. cnt += cur_batch_len
  77. return total_loss / cnt, total_acc / cnt
  78. # 训练与验证
  79. print('Training and evaluating...')
  80. start_time = time.time()
  81. print_per_batch = config.print_per_batch
  82. for i, batch in enumerate(batch_train):
  83. feed_dict, _ = feed_data(batch)
  84. feed_dict[model.keep_prob] = config.keep_prob
  85. if i % 5 == 0: # 每5次将训练结果写入tensorboard scalar
  86. s = session.run(merged_summary, feed_dict=feed_dict)
  87. writer.add_summary(s, i)
  88. if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能
  89. loss_train, acc_train = session.run([model.cost, model.acc],
  90. feed_dict=feed_dict)
  91. #loss, acc = evaluate(x_val, y_val) 验证机暂时不需要
  92. # 时间
  93. end_time = time.time()
  94. time_dif = end_time - start_time
  95. time_dif = timedelta(seconds=int(round(time_dif)))
  96. msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
  97. + ' Time: {3}'
  98. print(msg.format(i + 1, loss_train, acc_train, time_dif))
  99. if i%500==0 and i>0:
  100. graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
  101. ["keep_prob", "input_x", "score/pred_y"])
  102. tf.train.write_graph(graph, ".", modelpath+"graphattention.model",
  103. as_text=False)
  104. print("attention模型在第{0}步已经保存".format(i))
  105. session.run(model.train_op, feed_dict=feed_dict) # 运行优化
  106. session.close()
  107. if __name__ == '__main__':
  108. #run_epoch(rnn=True)
  109. run_epoch(rnn=False)



结果:

  1. 2017-11-25 13:48:37.183112: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 3920 get requests, put_count=6089 evicted_count=2000 eviction_rate=0.328461 and unsatisfied allocation rate=0
  2. 2017-11-25 13:48:37.995920: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 5649 get requests, put_count=8853 evicted_count=3000 eviction_rate=0.338868 and unsatisfied allocation rate=0
  3. 2017-11-25 13:48:39.200365: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 2297 get requests, put_count=3569 evicted_count=1000 eviction_rate=0.280191 and unsatisfied allocation rate=0
  4. 2017-11-25 13:48:40.431985: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 2448 get requests, put_count=3810 evicted_count=1000 eviction_rate=0.262467 and unsatisfied allocation rate=0
  5. Iter: 100, Train Loss: 0.24, Train Acc: 95.31%, Time: 0:00:42
  6. Iter: 200, Train Loss: 0.14, Train Acc: 96.09%, Time: 0:01:23
  7. Iter: 300, Train Loss: 0.14, Train Acc: 95.31%, Time: 0:02:03
  8. Iter: 400, Train Loss: 0.1, Train Acc: 97.66%, Time: 0:02:44
  9. Iter: 500, Train Loss: 0.27, Train Acc: 89.84%, Time: 0:03:25
  10. Converted 10 variables to const ops.
  11. attention模型在第500步已经保存
  12. Iter: 600, Train Loss: 0.16, Train Acc: 93.75%, Time: 0:04:06
  13. Iter: 700, Train Loss: 0.15, Train Acc: 96.09%, Time: 0:04:46
  14. Iter: 800, Train Loss: 0.14, Train Acc: 94.53%, Time: 0:05:26
  15. Iter: 900, Train Loss: 0.1, Train Acc: 95.31%, Time: 0:06:06
  16. Iter: 1000, Train Loss: 0.11, Train Acc: 93.75%, Time: 0:06:47
  17. Converted 10 variables to const ops.
  18. attention模型在第1000步已经保存
  19. Iter: 1100, Train Loss: 0.044, Train Acc: 99.22%, Time: 0:07:28
  20. Iter: 1200, Train Loss: 0.23, Train Acc: 90.62%, Time: 0:08:09
  21. Iter: 1300, Train Loss: 0.11, Train Acc: 96.88%, Time: 0:08:51
  22. Iter: 1400, Train Loss: 0.077, Train Acc: 96.88%, Time: 0:09:31
  23. Iter: 1500, Train Loss: 0.087, Train Acc: 96.09%, Time: 0:10:11
  24. Converted 10 variables to const ops.
  25. attention模型在第1500步已经保存
  26. Iter: 1600, Train Loss: 0.11, Train Acc: 96.88%, Time: 0:10:52
  27. Iter: 1700, Train Loss: 0.099, Train Acc: 95.31%, Time: 0:11:32
  28. Iter: 1800, Train Loss: 0.08, Train Acc: 96.09%, Time: 0:12:13
  29. Iter: 1900, Train Loss: 0.1, Train Acc: 96.88%, Time: 0:12:53
  30. Iter: 2000, Train Loss: 0.13, Train Acc: 94.53%, Time: 0:13:34
  31. Converted 10 variables to const ops.
阅读全文
0 0