rnn Attention网络的实现
来源:互联网 发布:中国银行淘宝卡 编辑:程序博客网 时间:2024/05/29 10:55
在gru、lstm网络中加入Attention机制,具体看看网络实现:
utils方法:
#!/usr/bin/python
# -*- coding: utf-8 -*-
from collections import Counter
import tensorflow.contrib.keras as kr
import numpy as np
import os
import codecs
import tensorflow as tf
def _read_file(filename):
"""读取文件数据"""
counters=[]
labels=[]
#改用codecs模块,2.x open函数不支持utf-8编码,增加代码健壮性
with codecs.open(filename,'r',encoding='utf-8') as f:
for line in f.readlines():
try:
label,contet=line.strip().split('\t')
counters.append(contet.strip().split(" "))
labels.append(label)
except Exception as e:
pass
return counters,labels
def _read_vocab(filename):
"""读取词汇列别"""
words=list(map(lambda line:line.strip(),codecs.open(filename,'r',encoding='utf-8').readlines()))
word_to_id=dict(zip(words,range(len(words))))
return words,word_to_id
def read_vocab_predict(filename):
"""读取词汇"""
words = list(map(lambda line: line.strip(), codecs.open(filename, 'r', encoding='utf-8').readlines()))
word_to_id = dict(zip(words, range(len(words))))
return word_to_id
def _read_category():
"""返回一个分类目标分类的结果"""
categories=["0","1"]
cat_to_id=dict(zip(categories,range(len(categories))))
return categories,cat_to_id
def to_words(content,words):
"""降id表示的内容转换成文字"""
return ''.join(words[x] for x in content)
def _file_to_ids(filename,word_to_id,max_len=300):
"""将文件转换为id表示"""
_,cat_to_id=_read_category()
contents,labels=_read_file(filename)
data_id=[]
label_id=[]
for i in range(len(contents)):
data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
label_id.append(cat_to_id[labels[i]])
# 使用keras提供的pad_sequences来将文本pad为固定长度
x_pad=kr.preprocessing.sequence.pad_sequences(data_id,max_len)
y_pad=kr.utils.to_categorical(label_id)
return x_pad, y_pad
def preocess_file(data_path,vocapath,seq_length=300):
"""一次性返回所有的数据"""
words,word_to_id=_read_vocab(vocapath)
x_train, y_train = _file_to_ids(data_path, word_to_id, seq_length)
# x_test,y_test=_file_to_ids(os.path.join(data_path,
# 'cnews.test.txt'), word_to_id, seq_length)
# x_val, y_val = _file_to_ids(os.path.join(data_path,
# 'cnews.val.txt'), word_to_id, seq_length)
return x_train, y_train,words
# def preocess_file_test(data_path="/Users/shuubiasahi/Desktop/rnn.txt",seq_length=300):
# """一次性返回所有的数据"""
# words,word_to_id=_read_vocab("vocab_cnews.txt")
# print("words length is:", len(words))
# print("word_to_id length is:",len(word_to_id))
# x_train,y_train=_file_to_ids_test(data_path,word_to_id,seq_length)
# # x_test,y_test=_file_to_ids(os.path.join(data_path,
# # 'cnews.test.txt'), word_to_id, seq_length)
# # x_val, y_val = _file_to_ids(os.path.join(data_path,
# # 'cnews.val.txt'), word_to_id, seq_length)
# return x_train, y_train,words
# def _file_to_ids_test(filename,word_to_id,max_len=300):
# """将文件转换为id表示"""
# _,cat_to_id=_read_category()
# contents,labels=_read_file(filename)
#
# data_id=[]
# label_id=[]
# for i in range(len(contents)):
# data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
# label_id.append(cat_to_id[labels[i]])
# # 使用keras提供的pad_sequences来将文本pad为固定长度
#
# print("contens is:", contents)
# print("data id is:",data_id)
# print("labels is:", labels)
# print("label id is:",label_id)
# x_pad=kr.preprocessing.sequence.pad_sequences(data_id,max_len)
# y_pad=kr.utils.to_categorical(label_id)
# print("xpad is:",x_pad)
# print("ypad is:",y_pad)
# return x_pad, y_pad
def file_to_ids_single(content,word_to_id,maxlen=300):
contents=[]
contents.append(list(content.lower()))
data_id = []
for i in range(len(contents)):
data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
#print("data_id is :",data_id)
x_pad = kr.preprocessing.sequence.pad_sequences(data_id, maxlen)
return x_pad
def batch_iter(data,batch_size=64,num_epochs=5):
"""生成批次数据"""
data=np.array(data)
data_size=len(data)
num_batchs_per_epchs=int((data_size-1)/batch_size)+1
for epoch in range(num_epochs):
indices=np.random.permutation(np.arange(data_size))
shufflfed_data=data[indices]
for batch_num in range(num_batchs_per_epchs):
start_index=batch_num*batch_size
end_index=min((batch_num + 1) * batch_size, data_size)
yield shufflfed_data[start_index:end_index]
if __name__=='__main__':
"""data_id is : [[266, 1548, 255]]"""
words, word_to_id = _read_vocab("vocab_cnews.txt")
print("len word_to_id:",len(word_to_id))
result=file_to_ids_single("日你个香蕉芭乐",word_to_id=word_to_id)
print(result[0][299])
print(result)
#build_vocab(Path.baseabusepath)
# x_train, y_train, words = preocess_file()
# print(x_train.shape, y_train.shape)
显示config:
class AttentionConfig(object):
embedding_dim = 64 # 词向量维度
seq_len = 300 # 序列长度
num_classes = 2 # 类别个数
vocab_size = 9000 # 词汇表的大小
num_rnn_layers = 2 # 隐含层的层数
rnn_size = 128 # 隐藏层神经元
rnn = 'gru' # lstm 或 gru
keep_prob = 0.6 # dropout保留比例
learning_rate = 1e-3 # 学习率
batch_size = 128 # 每批训练大小
num_epochs = 10 # 总迭代轮次
print_per_batch = 100 # 每多少轮输出一次结果
l2_reg_lambda = 0.006
attention_dim=100
max_grad_norm=5
isgru = False
model:
import tensorflow as tf
class RnnAttention:
def __init__(self, config):
# define input variable
self.config=config
self.input_x = tf.placeholder(tf.int32, [None, self.config.seq_len],name="input_x")
self.input_y = tf.placeholder(tf.float32,[None,self.config.num_classes],name="input_y")
self.keep_prob= tf.placeholder(tf.float32, name='keep_prob')
self.birnn()
#self.mubirnn()
def input_embedding(self):
"""词嵌套
这里先把指定gpu的程序去掉,线上用cpu部署,指定gpu模型会报错
"""
# with tf.device('/gpu:0'):
embeddings = tf.get_variable("embedding", [self.config.vocab_size, self.config.embedding_dim])
inputs = tf.nn.embedding_lookup(embeddings, self.input_x)
return inputs
def birnn(self):
inputs=self.input_embedding()
with tf.name_scope("rnn"):
def gru():
rnn_cell_fw= tf.contrib.rnn.GRUCell(num_units=self.config.rnn_size)
rnn_cell_cw = tf.contrib.rnn.GRUCell(num_units=self.config.rnn_size)
return rnn_cell_cw,rnn_cell_fw
def lstm():
rnn_cell_fw = tf.contrib.rnn.LSTMCell(num_units=self.config.rnn_size)
rnn_cell_cw = tf.contrib.rnn.LSTMCell(num_units=self.config.rnn_size)
return rnn_cell_cw, rnn_cell_fw
if self.config.isgru:
rnn_cell_cw, rnn_cell_fw=gru()
else:
rnn_cell_cw, rnn_cell_fw=lstm()
rnn_outputs,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=rnn_cell_fw,
cell_bw=rnn_cell_cw,
inputs=inputs,dtype=tf.float32)
rnn_outputs=tf.concat(rnn_outputs,2)
# An attention model
with tf.name_scope("attention"):
# Attention mechanism
sequence_length = rnn_outputs.shape[1].value # the length of sequences processed in the antecedent RNN layer
hidden_size = rnn_outputs.shape[2].value # hidden size of the RNN laye
W = tf.Variable(
tf.truncated_normal([hidden_size, self.config.attention_dim],
stddev=0.1), name="W"
)
b = tf.Variable(tf.random_normal([self.config.attention_dim], stddev=0.1),
name="b")
u = tf.Variable(tf.random_normal([self.config.attention_dim], stddev=0.1),
name="u")
v = tf.tanh(tf.matmul(tf.reshape(rnn_outputs, [-1, hidden_size]), W) + tf.reshape(b, [1, -1]))
vu = tf.matmul(v, tf.reshape(u, [-1, 1]))
exps = tf.reshape(tf.exp(vu), [-1, sequence_length])
alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
# Output of Bi-gru is reduced with attention vector
output = tf.reduce_sum(rnn_outputs * tf.reshape(alphas, [-1, sequence_length, 1]), 1)
#增加weight的损失函数
tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(W),
tf.GraphKeys.REGULARIZATION_LOSSES)
tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(b),
tf.GraphKeys.REGULARIZATION_LOSSES)
tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(u), tf.GraphKeys.REGULARIZATION_LOSSES)
dropout_outputs = tf.nn.dropout(
output, self.keep_prob,
name="dropout")
with tf.name_scope("score"):
W = tf.Variable(
tf.truncated_normal(
[dropout_outputs.shape[1].value, self.config.num_classes], stddev=0.1
),
name="W"
)
b = tf.Variable(tf.constant(0.1, shape=[self.config.num_classes]), name="b")
tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(W), tf.GraphKeys.REGULARIZATION_LOSSES)
tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(b), tf.GraphKeys.REGULARIZATION_LOSSES)
self.scores = tf.nn.xw_plus_b(dropout_outputs, W, b, name="scores")
self.pred_y = tf.nn.softmax(self.scores, name="pred_y")
tf.add_to_collection('pred_network', self.pred_y)
self.predictions = tf.argmax(self.scores, 1, name="predictions")
#计算损失函数
with tf.name_scope("loss"):
tf.losses.softmax_cross_entropy(
logits=self.scores,onehot_labels =self.input_y
)
self.cost =tf.losses.get_total_loss()
# 优化器
with tf.name_scope("optimize"):
optimizer = tf.train.AdamOptimizer(
learning_rate=self.config.learning_rate)
self.train_op = optimizer.minimize(self.cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.acc = tf.reduce_mean(
tf.cast(correct_predictions, "float"),
name="accuracy")
run:
from attentionmodelrnn import RnnAttention
from configuration import AttentionConfig
from data_utils import preocess_file,batch_iter
import time
import tensorflow as tf
import os
from datetime import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
#noexperience
#business
#together
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify/tensorflow/noexperience/cnn.txt"
vocapath=basepath+"/credit-tftextclassify/tensorflow/noexperience/vocab.txt"
modelpath=basepath+"/credit-tftextclassify/tensorflow/noexperience/"
print(modelpath,"attenion相关模型开始训练")
def run_epoch(rnn=False):
# 载入数据
print('Loading data...')
start_time = time.time()
x_train, y_train, words = preocess_file(data_path,
vocapath)
config = AttentionConfig()
if config.isgru:
print('Using attention gru model...')
else:
print('Using attention lstm model...')
config.vocab_size = len(words)
print("vocab_size is:", config.vocab_size)
model = RnnAttention(config)
tensorboard_dir = basepath+'/boardlog'
end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))
print('Time usage:', time_dif)
print('Constructing TensorFlow Graph...')
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
# 配置 tensorboard
tf.summary.scalar("loss", model.cost)
tf.summary.scalar("accuracy", model.acc)
if not os.path.exists(tensorboard_dir):
os.makedirs(tensorboard_dir)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(tensorboard_dir)
writer.add_graph(session.graph)
# 生成批次数据
print('Generating batch...')
batch_train = batch_iter(list(zip(x_train, y_train)),
config.batch_size, config.num_epochs)
def feed_data(batch):
"""准备需要喂入模型的数据"""
x_batch, y_batch = zip(*batch)
feed_dict = {
model.input_x: x_batch,
model.input_y: y_batch
}
return feed_dict, len(x_batch)
def evaluate(x_, y_):
"""
模型评估
一次运行所有的数据会OOM,所以需要分批和汇总
"""
batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)
total_loss = 0.0
total_acc = 0.0
cnt = 0
for batch in batch_eval:
feed_dict, cur_batch_len = feed_data(batch)
feed_dict[model.keep_prob] = 1.0
loss, acc = session.run([model.loss, model.acc],
feed_dict=feed_dict)
total_loss += loss * cur_batch_len
total_acc += acc * cur_batch_len
cnt += cur_batch_len
return total_loss / cnt, total_acc / cnt
# 训练与验证
print('Training and evaluating...')
start_time = time.time()
print_per_batch = config.print_per_batch
for i, batch in enumerate(batch_train):
feed_dict, _ = feed_data(batch)
feed_dict[model.keep_prob] = config.keep_prob
if i % 5 == 0: # 每5次将训练结果写入tensorboard scalar
s = session.run(merged_summary, feed_dict=feed_dict)
writer.add_summary(s, i)
if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能
loss_train, acc_train = session.run([model.cost, model.acc],
feed_dict=feed_dict)
#loss, acc = evaluate(x_val, y_val) 验证机暂时不需要
# 时间
end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))
msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
+ ' Time: {3}'
print(msg.format(i + 1, loss_train, acc_train, time_dif))
if i%500==0 and i>0:
graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
["keep_prob", "input_x", "score/pred_y"])
tf.train.write_graph(graph, ".", modelpath+"graphattention.model",
as_text=False)
print("attention模型在第{0}步已经保存".format(i))
session.run(model.train_op, feed_dict=feed_dict) # 运行优化
session.close()
if __name__ == '__main__':
#run_epoch(rnn=True)
run_epoch(rnn=False)
结果:
2017-11-25 13:48:37.183112: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 3920 get requests, put_count=6089 evicted_count=2000 eviction_rate=0.328461 and unsatisfied allocation rate=0
2017-11-25 13:48:37.995920: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 5649 get requests, put_count=8853 evicted_count=3000 eviction_rate=0.338868 and unsatisfied allocation rate=0
2017-11-25 13:48:39.200365: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 2297 get requests, put_count=3569 evicted_count=1000 eviction_rate=0.280191 and unsatisfied allocation rate=0
2017-11-25 13:48:40.431985: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 2448 get requests, put_count=3810 evicted_count=1000 eviction_rate=0.262467 and unsatisfied allocation rate=0
Iter: 100, Train Loss: 0.24, Train Acc: 95.31%, Time: 0:00:42
Iter: 200, Train Loss: 0.14, Train Acc: 96.09%, Time: 0:01:23
Iter: 300, Train Loss: 0.14, Train Acc: 95.31%, Time: 0:02:03
Iter: 400, Train Loss: 0.1, Train Acc: 97.66%, Time: 0:02:44
Iter: 500, Train Loss: 0.27, Train Acc: 89.84%, Time: 0:03:25
Converted 10 variables to const ops.
attention模型在第500步已经保存
Iter: 600, Train Loss: 0.16, Train Acc: 93.75%, Time: 0:04:06
Iter: 700, Train Loss: 0.15, Train Acc: 96.09%, Time: 0:04:46
Iter: 800, Train Loss: 0.14, Train Acc: 94.53%, Time: 0:05:26
Iter: 900, Train Loss: 0.1, Train Acc: 95.31%, Time: 0:06:06
Iter: 1000, Train Loss: 0.11, Train Acc: 93.75%, Time: 0:06:47
Converted 10 variables to const ops.
attention模型在第1000步已经保存
Iter: 1100, Train Loss: 0.044, Train Acc: 99.22%, Time: 0:07:28
Iter: 1200, Train Loss: 0.23, Train Acc: 90.62%, Time: 0:08:09
Iter: 1300, Train Loss: 0.11, Train Acc: 96.88%, Time: 0:08:51
Iter: 1400, Train Loss: 0.077, Train Acc: 96.88%, Time: 0:09:31
Iter: 1500, Train Loss: 0.087, Train Acc: 96.09%, Time: 0:10:11
Converted 10 variables to const ops.
attention模型在第1500步已经保存
Iter: 1600, Train Loss: 0.11, Train Acc: 96.88%, Time: 0:10:52
Iter: 1700, Train Loss: 0.099, Train Acc: 95.31%, Time: 0:11:32
Iter: 1800, Train Loss: 0.08, Train Acc: 96.09%, Time: 0:12:13
Iter: 1900, Train Loss: 0.1, Train Acc: 96.88%, Time: 0:12:53
Iter: 2000, Train Loss: 0.13, Train Acc: 94.53%, Time: 0:13:34
Converted 10 variables to const ops.
阅读全文
0 0
- rnn Attention网络的实现
- 用于文本分类的RNN-Attention网络
- cnn+rnn+attention
- 图解RNN、RNN变体、Seq2Seq、Attention机制
- TensorFlow中RNN网络的实现和关键参数选择
- RNN的四种代表性扩展—Attention and Augmented Recurrent Neural Networks(一)
- RNN的四种代表性扩展—Attention and Augmented Recurrent Neural Networks(二)
- RNN的四种变形:Attention and Augmented Recurrent Neural Networks【译文】
- RNN的四种代表性扩展—Attention and Augmented Recurrent Neural Networks(一)
- RNN的四种代表性扩展—Attention and Augmented Recurrent Neural Networks(二)
- RNN的四种变形:Attention and Augmented Recurrent Neural Networks【译文】
- 完全图解RNN、RNN变体、Seq2Seq、Attention机制
- 一个Hierarchical Attention神经网络的实现
- Tensorflow RNN源代码解析笔记2:RNN的基本实现
- 基于tensorflow的RNN-LSTM(一)实现RNN
- 多层RNN的网络结构示意图
- 深度学习-RNN网络的理解
- 深度学习笔记——基于双向RNN(LSTM、GRU)和Attention Model的句子对匹配方法
- C/C++语言基础_static,const,sizeof
- Tensorflow学习笔记-变量管理
- Freeswitch 命令和Appliation 应用的介绍
- TensorFlow在线预测
- Cura 2.6.2 在 windows 的离线 编译方法带教程视频
- rnn Attention网络的实现
- JavaScript 数据类型使用小结
- 通过TortoiseGit上传项目到GitHub
- Webpack 3.x 通过webpack安装React和Babel
- 【转载】java内存分析
- PAT考试乙级1046(C语言实现)
- 金庸说的:人不要太牛逼
- VMware安装Ubuntu
- 技术笔记——Django+Nginx+uwsgi搭建自己的博客(二)