Simple Recurrent Unit For Sentence Classification tensorflow实现
来源:互联网 发布:简明python教程豆瓣 编辑:程序博客网 时间:2024/06/13 03:51
网上看到一篇文章关于SRU的原理以及实现,不过是基于pytorch,地址如下:
https://mp.weixin.qq.com/s/2I9_cF3F5MO3l5rE1VY2ng
在这里用tensorflow来实现,先看下SRUCell实现:
import math
import numpy as np
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import RNNCell
class SRUCell(RNNCell):
def __init__(self, num_units):
self.num_units = num_units
@property
def state_size(self):
return (self.num_units, self.num_units)
@property
def output_size(self):
return self.num_units
def __call__(self, x, state, scope=None):
with tf.variable_scope(scope or type(self).__name__):
c, _ = state
x_size = x.get_shape().as_list()[1] # x: [1, x_size], x_size == num_units
W_u = tf.get_variable('W_u',
[x_size, 3 * self.num_units])
xh = tf.matmul(x, W_u) # [1, 3 * self.num_units]
tx, f, r = tf.split(xh, 3, 1) # [1, self.num_units] * 3
b_f = tf.get_variable('b_f', [self.num_units])
b_r = tf.get_variable('b_r', [self.num_units])
f = tf.sigmoid(f + b_f)
r = tf.sigmoid(r + b_r)
new_c = f * c + (1 - f) * tx # element wise - multiply
new_h = r * tf.tanh(new_c) + (1 - r) * x
return new_h, (new_c, new_h)
提供两种方式来实现,看下面:
import tensorflow as tf
from SRU_tensorflow import SRUCell
class TextRnn(object):
def __init__(self,config):
self.config=config
self.input_x=tf.placeholder(tf.int32,[None,self.config.seq_length],name='input_x')
self.input_y=tf.placeholder(tf.float32,[None,self.config.num_classes],name='input_y')
self.keep_prob=tf.placeholder(tf.float32,name='keep_prob')
self.isbirnn=self.config.isbirnn
# if self.isbirnn:
# self.birnn()
# else:
# self.rnn()
if self.isbirnn:
self.bisru()
else:
self.sru()
def input_embedding(self):
"""词嵌套
这里先把指定gpu的程序去掉,线上用cpu部署,指定gpu模型会报错
"""
#with tf.device('/gpu:0'):
embedding=tf.get_variable('embedding',[self.config.vocab_size,self.config.embedding_dim])
_inputs=tf.nn.embedding_lookup(embedding,self.input_x)
return _inputs
def rnn(self):
def lstm_cell():
"""lstm核"""
return tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
def gru_cell():
"""gru核"""
return tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
def dropout():
"""为每一个rnn后面加一个dropout层"""
if (self.config.rnn=='lstm'):
cell=lstm_cell()
else:
cell=gru_cell()
return tf.contrib.rnn.DropoutWrapper(cell,
output_keep_prob=self.keep_prob)
embedding_inputs=self.input_embedding()
with tf.name_scope("rnn"):
cells=[dropout() for _ in range(self.config.num_layers)]
"""
定义多层rnn
"""
rnn_cell=tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=True)
_outputs,_=tf.nn.dynamic_rnn(cell=rnn_cell,inputs=embedding_inputs,dtype=tf.float32)
"""
这里例如 _outputs size is[30,100,128]
则last的size变为 [30,128]
"""
last= _outputs[:, -1, :] # 取最后一个时序输出作为结果
with tf.name_scope("score"):
# 全连接层,后面接dropout以及relu激活
fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1")
fc = tf.contrib.layers.dropout(fc, self.keep_prob)
fc = tf.nn.relu(fc)
#分类器
self.logits=tf.layers.dense(fc,self.config.num_classes,name="fc2")
self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
tf.add_to_collection('pred_network', self.pred_y)
with tf.name_scope("loss"):
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
self.loss=tf.reduce_mean(cross_entropy)
with tf.name_scope("optimize"):
# 优化器
optimizer = tf.train.AdamOptimizer(
learning_rate=self.config.learning_rate)
self.optim = optimizer.minimize(self.loss)
with tf.name_scope("accuracy"):
# 准确率
correct_pred = tf.equal(tf.argmax(self.input_y, 1),
tf.argmax(self.pred_y, 1))
self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def birnn(self):
def lstm_cell():
"""lstm核"""
cell_fw=tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
cell_bw=tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
return cell_fw,cell_bw
def gru_cell():
"""gru核"""
cell_fw= tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
cell_bw = tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
return cell_fw,cell_bw
embedding_inputs=self.input_embedding()
with tf.name_scope("birnn"):
if (self.config.rnn == 'lstm'):
cell_fw, cell_bw=lstm_cell()
else:
cell_fw, cell_fw=gru_cell()
outputs, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_fw, embedding_inputs, dtype=tf.float32)
outputs = tf.concat(outputs, 2) # 取最后一个时序输出作为结果
last = tf.transpose(outputs, [1, 0, 2])[-1]
with tf.name_scope("score"):
# 全连接层,后面接dropout以及relu激活
fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1",
kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda))
fc = tf.contrib.layers.dropout(fc, self.keep_prob)
fc = tf.nn.relu(fc)
#分类器
self.logits=tf.layers.dense(fc,self.config.num_classes,kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda),name="fc2")
self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
tf.add_to_collection('pred_network', self.pred_y)
with tf.name_scope("loss"):
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
self.loss=tf.reduce_mean(cross_entropy)
with tf.name_scope("optimize"):
# 优化器
optimizer = tf.train.AdamOptimizer(
learning_rate=self.config.learning_rate)
self.optim = optimizer.minimize(self.loss)
with tf.name_scope("accuracy"):
# 准确率
correct_pred = tf.equal(tf.argmax(self.input_y, 1),
tf.argmax(self.pred_y, 1))
self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def sru(self):
def sru_cell():
"""lstm核"""
return SRUCell(self.config.hidden_dim//2)
def dropout():
"""为每一个rnn后面加一个dropout层"""
cell=sru_cell()
return tf.contrib.rnn.DropoutWrapper(cell,
output_keep_prob=self.keep_prob)
embedding_inputs=self.input_embedding()
with tf.name_scope("rnn"):
cells=[dropout() for _ in range(self.config.num_layers)]
"""
定义多层rnn
"""
rnn_cell=tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=True)
_outputs,_=tf.nn.dynamic_rnn(cell=rnn_cell,inputs=embedding_inputs,dtype=tf.float32)
"""
这里例如 _outputs size is[30,100,128]
则last的size变为 [30,128]
"""
last= _outputs[:, -1, :] # 取最后一个时序输出作为结果
with tf.name_scope("score"):
# 全连接层,后面接dropout以及relu激活
fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1")
fc = tf.contrib.layers.dropout(fc, self.keep_prob)
fc = tf.nn.relu(fc)
#分类器
self.logits=tf.layers.dense(fc,self.config.num_classes,name="fc2")
self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
tf.add_to_collection('pred_network', self.pred_y)
with tf.name_scope("loss"):
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
self.loss=tf.reduce_mean(cross_entropy)
with tf.name_scope("optimize"):
# 优化器
optimizer = tf.train.AdamOptimizer(
learning_rate=self.config.learning_rate)
self.optim = optimizer.minimize(self.loss)
with tf.name_scope("accuracy"):
# 准确率
correct_pred = tf.equal(tf.argmax(self.input_y, 1),
tf.argmax(self.pred_y, 1))
self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def bisru(self):
def sru():
"""lstm核"""
cell_fw=SRUCell(self.config.hidden_dim//2)
cell_bw=SRUCell(self.config.hidden_dim//2)
return cell_fw,cell_bw
embedding_inputs=self.input_embedding()
with tf.name_scope("birnn"):
cell_fw, cell_bw=sru()
outputs, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_fw, embedding_inputs, dtype=tf.float32)
outputs = tf.concat(outputs, 2) # 取最后一个时序输出作为结果
last = tf.transpose(outputs, [1, 0, 2])[-1]
with tf.name_scope("score"):
# 全连接层,后面接dropout以及relu激活
fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1",
kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda))
fc = tf.contrib.layers.dropout(fc, self.keep_prob)
fc = tf.nn.relu(fc)
#分类器
self.logits=tf.layers.dense(fc,self.config.num_classes,kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda),name="fc2")
self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
tf.add_to_collection('pred_network', self.pred_y)
with tf.name_scope("loss"):
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
self.loss=tf.reduce_mean(cross_entropy)
with tf.name_scope("optimize"):
# 优化器
optimizer = tf.train.AdamOptimizer(
learning_rate=self.config.learning_rate)
self.optim = optimizer.minimize(self.loss)
with tf.name_scope("accuracy"):
# 准确率
correct_pred = tf.equal(tf.argmax(self.input_y, 1),
tf.argmax(self.pred_y, 1))
self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
run sru:
from rnn_model import TextRnn
from configuration import TRNNConfig,Path,TCNNConfig
from data_utils_cut import preocess_file,batch_iter
import time
import tensorflow as tf
import os
from datetime import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
#noexperience
#business
#together
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify/tensorflow/sex/cnn.txt"
vocapath=basepath+"/credit-tftextclassify/tensorflow/sex/vocab.txt"
modelpath=basepath+"/credit-tftextclassify/tensorflow/sex/"
print(modelpath,"开始训练")
def run_epoch(rnn=True):
# 载入数据
print('Loading data...')
start_time = time.time()
x_train, y_train, words = preocess_file(data_path,
vocapath)
if rnn:
print('Using RNN model...')
config = TRNNConfig()
config.vocab_size = len(words)
print("vocab_size is:", config.vocab_size)
model = TextRnn(config)
tensorboard_dir = basepath+'/boardlog'
end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))
print('Time usage:', time_dif)
print('Constructing TensorFlow Graph...')
session = tf.Session()
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
# 配置 tensorboard
tf.summary.scalar("loss", model.loss)
tf.summary.scalar("accuracy", model.acc)
if not os.path.exists(tensorboard_dir):
os.makedirs(tensorboard_dir)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(tensorboard_dir)
writer.add_graph(session.graph)
# 生成批次数据
print('Generating batch...')
batch_train = batch_iter(list(zip(x_train, y_train)),
config.batch_size, config.num_epochs)
def feed_data(batch):
"""准备需要喂入模型的数据"""
x_batch, y_batch = zip(*batch)
feed_dict = {
model.input_x: x_batch,
model.input_y: y_batch
}
return feed_dict, len(x_batch)
def evaluate(x_, y_):
"""
模型评估
一次运行所有的数据会OOM,所以需要分批和汇总
"""
batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)
total_loss = 0.0
total_acc = 0.0
cnt = 0
for batch in batch_eval:
feed_dict, cur_batch_len = feed_data(batch)
feed_dict[model.keep_prob] = 1.0
loss, acc = session.run([model.loss, model.acc],
feed_dict=feed_dict)
total_loss += loss * cur_batch_len
total_acc += acc * cur_batch_len
cnt += cur_batch_len
return total_loss / cnt, total_acc / cnt
# 训练与验证
print('Training and evaluating...')
start_time = time.time()
print_per_batch = config.print_per_batch
for i, batch in enumerate(batch_train):
feed_dict, _ = feed_data(batch)
feed_dict[model.keep_prob] = config.dropout_keep_prob
if i % 5 == 0: # 每5次将训练结果写入tensorboard scalar
s = session.run(merged_summary, feed_dict=feed_dict)
writer.add_summary(s, i)
if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能
loss_train, acc_train = session.run([model.loss, model.acc],
feed_dict=feed_dict)
#loss, acc = evaluate(x_val, y_val) 验证机暂时不需要
# 时间
end_time = time.time()
time_dif = end_time - start_time
time_dif = timedelta(seconds=int(round(time_dif)))
msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
+ ' Time: {3}'
print(msg.format(i + 1, loss_train, acc_train, time_dif))
# if i%10==0 and i>0:
# graph=tf.graph_util.convert_variables_to_constants(session,session.graph_def,["keep_prob","input_x","score/pred_y"])
# tf.train.write_graph(graph,".","/Users/shuubiasahi/Desktop/tensorflow/modelsavegraph/graph.db",as_text=False)
if i%500==0 and i>0:
graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
["keep_prob", "input_x", "score/pred_y"])
if rnn:
tf.train.write_graph(graph, ".", modelpath+"graphrnn.model",
as_text=False)
print("rnn模型在第{0}步已经保存".format(i))
else:
tf.train.write_graph(graph, ".", modelpath+"graph.model",
as_text=False)
print("cnn模型在第{0}步已经保存".format(i))
session.run(model.optim, feed_dict=feed_dict) # 运行优化
# 最后在测试集上进行评估
session.close()
if __name__ == '__main__':
#run_epoch(rnn=True)
run_epoch(rnn=True)
结果:
Iter: 100, Train Loss: 0.33, Train Acc: 84.38%, Time: 0:00:46
Iter: 200, Train Loss: 0.24, Train Acc: 91.41%, Time: 0:01:33
Iter: 300, Train Loss: 0.22, Train Acc: 90.62%, Time: 0:02:19
Iter: 400, Train Loss: 0.28, Train Acc: 92.19%, Time: 0:03:04
Iter: 500, Train Loss: 0.11, Train Acc: 95.31%, Time: 0:03:49
Converted 11 variables to const ops.
rnn模型在第500步已经保存
Iter: 600, Train Loss: 0.12, Train Acc: 94.53%, Time: 0:04:35
Iter: 700, Train Loss: 0.086, Train Acc: 97.66%, Time: 0:05:21
Iter: 800, Train Loss: 0.21, Train Acc: 93.75%, Time: 0:06:07
Iter: 900, Train Loss: 0.12, Train Acc: 96.09%, Time: 0:06:52
Iter: 1000, Train Loss: 0.086, Train Acc: 97.66%, Time: 0:07:38
训练过程中并没有发现在论文提到的快很多倍,不知道是什么原因,还是我写错了,求各位大神不吝赐教。
阅读全文
0 0
- Simple Recurrent Unit For Sentence Classification tensorflow实现
- 论文Convolutional Naural Networks for Sentence Classification--TensorFlow实现篇
- Convolutional Neural Networks for Sentence Classification
- Convolutional Neural Network For Sentence Classification
- Hierarchical Attention Network for Document Classification--tensorflow实现篇
- Convolutional Neural Networks for Sentence Classification笔记整理
- 论文笔记《Convolutional Neural Networks for Sentence Classification》
- Convolutional Neural Network For Sentence Classification<Yoon Kim>解析(一)
- Convolutional Neural Network For Sentence Classification<Yoon Kim>解析(二)
- Convolutional Neural Network For Sentence Classification<Yoon Kim>解析(三)
- 论文Convolutional Naural Networks for Sentence Classification--模型介绍篇
- 论文《Convolutional Neural Networks for Sentence Classification》总结
- 【论文笔记】Convolutional Neural Networks for Sentence Classification
- 读论文《Convolutional Neural Networks for Sentence Classification》
- Convolutional Neural Networks for Sentence Classification论文解读
- Recurrent Convolutional Neural Networks for Text Classification阅读笔记
- 论文《Recurrent Convolutional Neural Networks for Text Classification》总结
- 论文《Recurrent Convolutional Neural Networks for Text Classification》总结
- Bitmap oom
- 惊喜链小程序开发代码分析
- 区块链:共记网络流水账
- Ubuntu 安装libjpeg-turbo库
- SOSP 2013
- Simple Recurrent Unit For Sentence Classification tensorflow实现
- Mysql 的创建及使用
- html+js实现城市联动、日期联动示例
- 基于windows版本下不同服务器上redis 的主从复制配置
- iOS OS X 和 iOS 中的多线程技术-3 (NSThread)
- 前端css知识大全——解读浏览器默认样式
- ubuntu系统用scp向RK3399发送文件
- 文章标题
- Python入门笔记三