Simple Recurrent Unit For Sentence Classification tensorflow实现

来源：互联网发布：简明python教程豆瓣编辑：程序博客网时间：2024/06/13 03:51

网上看到一篇文章关于SRU的原理以及实现，不过是基于pytorch，地址如下：

https://mp.weixin.qq.com/s/2I9_cF3F5MO3l5rE1VY2ng

在这里用tensorflow来实现,先看下SRUCell实现:

import math
import numpy as np
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import RNNCell
class SRUCell(RNNCell):
    def __init__(self, num_units):
        self.num_units = num_units
    @property
    def state_size(self):
        return (self.num_units, self.num_units)
    @property
    def output_size(self):
        return self.num_units
    def __call__(self, x, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            c, _ = state
            x_size = x.get_shape().as_list()[1] # x: [1, x_size], x_size == num_units
            W_u = tf.get_variable('W_u',
                [x_size, 3 * self.num_units])
            xh = tf.matmul(x, W_u) # [1, 3 * self.num_units]
            tx, f, r = tf.split(xh, 3, 1) # [1, self.num_units] * 3
            b_f = tf.get_variable('b_f', [self.num_units])
            b_r = tf.get_variable('b_r', [self.num_units])
            f = tf.sigmoid(f + b_f)
            r = tf.sigmoid(r + b_r)
            new_c = f * c + (1 - f) * tx # element wise - multiply
            new_h = r * tf.tanh(new_c) + (1 - r) * x
            return new_h, (new_c, new_h)

提供两种方式来实现，看下面:

import tensorflow as tf
from SRU_tensorflow import SRUCell
class  TextRnn(object):
    def __init__(self,config):
        self.config=config
        self.input_x=tf.placeholder(tf.int32,[None,self.config.seq_length],name='input_x')
        self.input_y=tf.placeholder(tf.float32,[None,self.config.num_classes],name='input_y')
        self.keep_prob=tf.placeholder(tf.float32,name='keep_prob')
        self.isbirnn=self.config.isbirnn
        # if  self.isbirnn:
        #     self.birnn()
        # else:
        #     self.rnn()
        if  self.isbirnn:
             self.bisru()
        else:
             self.sru()
    def input_embedding(self):
        """词嵌套
         这里先把指定gpu的程序去掉，线上用cpu部署，指定gpu模型会报错
        """
        #with tf.device('/gpu:0'):
        embedding=tf.get_variable('embedding',[self.config.vocab_size,self.config.embedding_dim])
        _inputs=tf.nn.embedding_lookup(embedding,self.input_x)
        return  _inputs
    def rnn(self):
        def lstm_cell():
            """lstm核"""
            return tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
        def gru_cell():
            """gru核"""
            return tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
        def dropout():
            """为每一个rnn后面加一个dropout层"""
            if (self.config.rnn=='lstm'):
                cell=lstm_cell()
            else:
                cell=gru_cell()
            return tf.contrib.rnn.DropoutWrapper(cell,
                output_keep_prob=self.keep_prob)
        embedding_inputs=self.input_embedding()
        with tf.name_scope("rnn"):
            cells=[dropout() for _ in range(self.config.num_layers)]
            """
            定义多层rnn
            """
            rnn_cell=tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=True)
            _outputs,_=tf.nn.dynamic_rnn(cell=rnn_cell,inputs=embedding_inputs,dtype=tf.float32)
            """
            这里例如 _outputs size is[30,100,128]
            则last的size变为  [30,128]
            """
            last= _outputs[:, -1, :]  # 取最后一个时序输出作为结果
        with tf.name_scope("score"):
            # 全连接层，后面接dropout以及relu激活
            fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1")
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)
            #分类器
            self.logits=tf.layers.dense(fc,self.config.num_classes,name="fc2")
            self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
            tf.add_to_collection('pred_network', self.pred_y)
        with tf.name_scope("loss"):
            cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
            self.loss=tf.reduce_mean(cross_entropy)
        with tf.name_scope("optimize"):
            # 优化器
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
            self.optim = optimizer.minimize(self.loss)
        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(tf.argmax(self.input_y, 1),
                                    tf.argmax(self.pred_y, 1))
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    def birnn(self):
        def lstm_cell():
            """lstm核"""
            cell_fw=tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
            cell_bw=tf.nn.rnn_cell.BasicLSTMCell(self.config.hidden_dim,state_is_tuple=True)
            return cell_fw,cell_bw
        def gru_cell():
            """gru核"""
            cell_fw= tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
            cell_bw = tf.nn.rnn_cell.GRUCell(self.config.hidden_dim)
            return cell_fw,cell_bw
        embedding_inputs=self.input_embedding()
        with tf.name_scope("birnn"):
            if (self.config.rnn == 'lstm'):
                cell_fw, cell_bw=lstm_cell()
            else:
                cell_fw, cell_fw=gru_cell()
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_fw, embedding_inputs, dtype=tf.float32)
            outputs = tf.concat(outputs, 2)  # 取最后一个时序输出作为结果
            last = tf.transpose(outputs, [1, 0, 2])[-1]
        with tf.name_scope("score"):
            # 全连接层，后面接dropout以及relu激活
            fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1",
                               kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
                                ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda))
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)
            #分类器
            self.logits=tf.layers.dense(fc,self.config.num_classes,kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
                                ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda),name="fc2")
            self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
            tf.add_to_collection('pred_network', self.pred_y)
        with tf.name_scope("loss"):
            cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
            self.loss=tf.reduce_mean(cross_entropy)
        with tf.name_scope("optimize"):
            # 优化器
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
            self.optim = optimizer.minimize(self.loss)
        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(tf.argmax(self.input_y, 1),
                                    tf.argmax(self.pred_y, 1))
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    def sru(self):
        def sru_cell():
            """lstm核"""
            return SRUCell(self.config.hidden_dim//2)
        def dropout():
            """为每一个rnn后面加一个dropout层"""
            cell=sru_cell()
            return tf.contrib.rnn.DropoutWrapper(cell,
                output_keep_prob=self.keep_prob)
        embedding_inputs=self.input_embedding()
        with tf.name_scope("rnn"):
            cells=[dropout() for _ in range(self.config.num_layers)]
            """
            定义多层rnn
            """
            rnn_cell=tf.contrib.rnn.MultiRNNCell(cells,state_is_tuple=True)
            _outputs,_=tf.nn.dynamic_rnn(cell=rnn_cell,inputs=embedding_inputs,dtype=tf.float32)
            """
            这里例如 _outputs size is[30,100,128]
            则last的size变为  [30,128]
            """
            last= _outputs[:, -1, :]  # 取最后一个时序输出作为结果
        with tf.name_scope("score"):
            # 全连接层，后面接dropout以及relu激活
            fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1")
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)
            #分类器
            self.logits=tf.layers.dense(fc,self.config.num_classes,name="fc2")
            self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
            tf.add_to_collection('pred_network', self.pred_y)
        with tf.name_scope("loss"):
            cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
            self.loss=tf.reduce_mean(cross_entropy)
        with tf.name_scope("optimize"):
            # 优化器
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
            self.optim = optimizer.minimize(self.loss)
        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(tf.argmax(self.input_y, 1),
                                    tf.argmax(self.pred_y, 1))
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    def bisru(self):
        def sru():
            """lstm核"""
            cell_fw=SRUCell(self.config.hidden_dim//2)
            cell_bw=SRUCell(self.config.hidden_dim//2)
            return cell_fw,cell_bw
        embedding_inputs=self.input_embedding()
        with tf.name_scope("birnn"):
            cell_fw, cell_bw=sru()
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_fw, embedding_inputs, dtype=tf.float32)
            outputs = tf.concat(outputs, 2)  # 取最后一个时序输出作为结果
            last = tf.transpose(outputs, [1, 0, 2])[-1]
        with tf.name_scope("score"):
            # 全连接层，后面接dropout以及relu激活
            fc=tf.layers.dense(last,self.config.hidden_dim,name="fc1",
                               kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
                                ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda))
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)
            #分类器
            self.logits=tf.layers.dense(fc,self.config.num_classes,kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)
                                ,bias_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda),name="fc2")
            self.pred_y=tf.nn.softmax(self.logits,name="pred_y")
            tf.add_to_collection('pred_network', self.pred_y)
        with tf.name_scope("loss"):
            cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y)
            self.loss=tf.reduce_mean(cross_entropy)
        with tf.name_scope("optimize"):
            # 优化器
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
            self.optim = optimizer.minimize(self.loss)
        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(tf.argmax(self.input_y, 1),
                                    tf.argmax(self.pred_y, 1))
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

run sru:

from  rnn_model import TextRnn
from  configuration import TRNNConfig,Path,TCNNConfig
from  data_utils_cut  import preocess_file,batch_iter
import time
import tensorflow as tf
import os
from  datetime  import timedelta
#basepath="/Users/shuubiasahi/Documents/python"
#noexperience
#business
#together
basepath="/home/zhoumeixu"
data_path=basepath+"/credit-tftextclassify/tensorflow/sex/cnn.txt"
vocapath=basepath+"/credit-tftextclassify/tensorflow/sex/vocab.txt"
modelpath=basepath+"/credit-tftextclassify/tensorflow/sex/"
print(modelpath,"开始训练")
def run_epoch(rnn=True):
    # 载入数据
    print('Loading data...')
    start_time = time.time()
    x_train, y_train, words = preocess_file(data_path,
                                            vocapath)
    if rnn:
        print('Using RNN model...')
        config = TRNNConfig()
        config.vocab_size = len(words)
        print("vocab_size is:", config.vocab_size)
        model = TextRnn(config)
        tensorboard_dir = basepath+'/boardlog'
    end_time = time.time()
    time_dif = end_time - start_time
    time_dif = timedelta(seconds=int(round(time_dif)))
    print('Time usage:', time_dif)
    print('Constructing TensorFlow Graph...')
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    # 配置 tensorboard
    tf.summary.scalar("loss", model.loss)
    tf.summary.scalar("accuracy", model.acc)
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    writer.add_graph(session.graph)
    # 生成批次数据
    print('Generating batch...')
    batch_train = batch_iter(list(zip(x_train, y_train)),
        config.batch_size, config.num_epochs)
    def feed_data(batch):
        """准备需要喂入模型的数据"""
        x_batch, y_batch = zip(*batch)
        feed_dict = {
            model.input_x: x_batch,
            model.input_y: y_batch
        }
        return feed_dict, len(x_batch)
    def evaluate(x_, y_):
        """
        模型评估
        一次运行所有的数据会OOM，所以需要分批和汇总
        """
        batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)
        total_loss = 0.0
        total_acc = 0.0
        cnt = 0
        for batch in batch_eval:
            feed_dict, cur_batch_len = feed_data(batch)
            feed_dict[model.keep_prob] = 1.0
            loss, acc = session.run([model.loss, model.acc],
                feed_dict=feed_dict)
            total_loss += loss * cur_batch_len
            total_acc += acc * cur_batch_len
            cnt += cur_batch_len
        return total_loss / cnt, total_acc / cnt
    # 训练与验证
    print('Training and evaluating...')
    start_time = time.time()
    print_per_batch = config.print_per_batch
    for i, batch in enumerate(batch_train):
        feed_dict, _ = feed_data(batch)
        feed_dict[model.keep_prob] = config.dropout_keep_prob
        if i % 5 == 0:  # 每5次将训练结果写入tensorboard scalar
            s = session.run(merged_summary, feed_dict=feed_dict)
            writer.add_summary(s, i)
        if i % print_per_batch == print_per_batch - 1:  # 每200次输出在训练集和验证集上的性能
            loss_train, acc_train = session.run([model.loss, model.acc],
                feed_dict=feed_dict)
            #loss, acc = evaluate(x_val, y_val)   验证机暂时不需要
            # 时间
            end_time = time.time()
            time_dif = end_time - start_time
            time_dif = timedelta(seconds=int(round(time_dif)))
            msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\
                + '  Time: {3}'
            print(msg.format(i + 1, loss_train, acc_train, time_dif))
        # if  i%10==0 and i>0:
        #     graph=tf.graph_util.convert_variables_to_constants(session,session.graph_def,["keep_prob","input_x","score/pred_y"])
        #     tf.train.write_graph(graph,".","/Users/shuubiasahi/Desktop/tensorflow/modelsavegraph/graph.db",as_text=False)
        if i%500==0  and i>0:
            graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,
                                                                 ["keep_prob", "input_x", "score/pred_y"])
            if rnn:
                tf.train.write_graph(graph, ".", modelpath+"graphrnn.model",
                                     as_text=False)
                print("rnn模型在第{0}步已经保存".format(i))
            else:
                tf.train.write_graph(graph, ".", modelpath+"graph.model",
                                 as_text=False)
                print("cnn模型在第{0}步已经保存".format(i))
        session.run(model.optim, feed_dict=feed_dict)  # 运行优化
    # 最后在测试集上进行评估
    session.close()
if __name__ == '__main__':
    #run_epoch(rnn=True)
     run_epoch(rnn=True)

结果：

Iter:    100, Train Loss:   0.33, Train Acc:  84.38%,  Time: 0:00:46
Iter:    200, Train Loss:   0.24, Train Acc:  91.41%,  Time: 0:01:33
Iter:    300, Train Loss:   0.22, Train Acc:  90.62%,  Time: 0:02:19
Iter:    400, Train Loss:   0.28, Train Acc:  92.19%,  Time: 0:03:04
Iter:    500, Train Loss:   0.11, Train Acc:  95.31%,  Time: 0:03:49
Converted 11 variables to const ops.
rnn模型在第500步已经保存
Iter:    600, Train Loss:   0.12, Train Acc:  94.53%,  Time: 0:04:35
Iter:    700, Train Loss:  0.086, Train Acc:  97.66%,  Time: 0:05:21
Iter:    800, Train Loss:   0.21, Train Acc:  93.75%,  Time: 0:06:07
Iter:    900, Train Loss:   0.12, Train Acc:  96.09%,  Time: 0:06:52
Iter:   1000, Train Loss:  0.086, Train Acc:  97.66%,  Time: 0:07:38
训练过程中并没有发现在论文提到的快很多倍，不知道是什么原因，还是我写错了，求各位大神不吝赐教。

阅读全文

0 0