TensorFlow搭建RNN(1/7) 简单案例

来源：互联网发布：营销活动效果数据分析编辑：程序博客网时间：2024/06/05 05:54

Rnn最大的不同是，它有状态(State)
相对于普通的神经网络和算法

如何体现State的不同呢？
我们现在有这样一个需求：

我一次又一次的输入一些值，  我希望在我每次输入的时候，  我的程序会输出我之前 倒数第3次时的输入的值  比如，我输入 1 2 3 4 5 6 7 8 （一次 输入 一个）它输出 - - - 1 2 3 4 5  然后我又输入 5 4 3 2 1 0然后它的输出 6 7 8 5 4 3

引入需要的库

import numpy as npimport tensorflow as tfimport matplotlib.pyplot as plt

一生成数据

函数generateData随机生成了5w个由0和1组成的序列，
然后把x分成5份，变成一个5行,10000列的二维数组，
y也是一样，不过y中的项全部向右位移了3位

total_series_length = 50000  #总共有5万个数让Rnn训练truncated_backprop_length = 15  #某种限制的长度为15，之后会用到batch_size = 5 #把数据分成5份echo_step = 3 #位移的大小def generateData():    # 1.返回一个 total_series_length 长度的数组    # 2.返回数组中的每一项，都是从 0 1 中随机选出的（第一个参数的含义）    # 3.选0和选1的概率分别为0.5和0.5    x = np.random.choice(2, total_series_length, p=[0.5, 0.5])    # 整体向右移动echo_step个数，超出的补在前面    y = np.roll(x, echo_step)     # 从后面补到前面的项都设置为0    y[0:echo_step] = 0    # 把x(长度为50000)变成一个5行，10000列的数组    x = x.reshape((batch_size, -1))  # -1 表示自动计算出column的数量    y = y.reshape((batch_size, -1))    return (x, y)

二构造输入序列

truncated_backprop_length = 15  #某种限制的长度为15，之后会用到batch_size = 5 #把数据分成5份# 定义两个placeholder, 数据的结构为(5,15)的二维数组# 注意，虽然placeholder没有输入实际的值，但是我们已经把定了了数据结构batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])# 之后会把第一部分生成的5行10000列的数据，撕成一片一片的，# 每片长度15列(行数不变，5x15), 喂给placeholder# 按列解包，你可以理解成转置，(变成15x5了)# 但是要注意的是：# 现在inputs_series是一个长度为15的list，list中的每一项是一个长度为5的arrayinputs_series = tf.unstack(batchX_placeholder, axis=1) labels_series = tf.unstack(batchY_placeholder, axis=1) # 为什么把数据倒来倒去，看得人都是晕的# unstack的目的是，# 你可以理解那5行10000列的数据(现在是5行15列)，当成5根并列的葱，# 然后一刀切下去，

三构造状态序列，吸收新的input

input 和原来的state 通过W和b融合之后形成了新的state

# 这是state每次吸收新的input的时候 用到的权重W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)state_size = 4batch_size = 5# state的结构是 (5,4) ，保持不变init_state = tf.placeholder(tf.float32, [batch_size, state_size])current_state = init_statestates_series = [] # 提示：inputs_series是一个长度为15的list，list中的每一项是一个长度为5的array# 这15个placeholder进行一些操作# 把输入序列(placeholder) 映射成 state序列， 加入了 w 和 bfor current_input in inputs_series:     # 对placeholder的每一项"current_input"进行reshape    # current_input是15list中的一项，这一项中是5(batch_size)length的array    # reshape以后，这个一维的shape为(batch_size,)的数组，变成了一个二维的，shape为(batch_size,1)的数组    # 5x1    # current_input: (5,) ==> (5,1)    current_input = tf.reshape(current_input, [batch_size, 1])    # 把当前输入项 和 当前状态合并，1表示列     # current_state: (5,4)     # current_input: (5,1)    # 在第1维上合并，从0开始算，所以，合并之后是(5,5), 行数不变，多了一列    input_and_state_concatenated = tf.concat([current_input, current_state],1)      # 矩阵乘法 (5,5) x (5,4) ==> (5,4)    # 合并后的5行5列 通过矩阵乘法 又变成了5行4列，和之前的state的结构一样    # input 和 原来的state通过W和b，融合之后形成了新的state    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)     # 把这个新的state放进一个数组    states_series.append(next_state)    current_state = next_state# 虽然还没有输入具体的值，我们先把placeholderX ==映射成==> states_series，# 每次placeholderX的输入不同，所以这个结构里填的值不同

四计算误差和训练

得到一个预测结果，然后跟labels比较计算cross_entropy作为loss
然后根据loss来优化W、b 和 W2、b2

W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)# 通过W2和b2两个参数 与 state 矩阵相乘 ==得到==> 一个(5,2)的数组，# 这里的加法是Broadcasted addition，# 代表了当前的预测结果logits_series = [tf.matmul(state, W2) + b2 for state in states_series] # tf.nn.sparse_softmax_cross_entropy_with_logits# 常规用法是logits的shape为[batch_size, num_classes]，labels的shape是[batch_size]# labels是一个一维的list，必须为整数，不能大于num_classes，比如说你有3个类别，labels最大为2# 这个函数会自动帮你执行softmax，所以不要自己执行softmaxlosses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series,labels_series)]# tf.reduce_mean(loss)把这个数组的值平均了一下total_loss = tf.reduce_mean(losses)# 用Adagrad作为optimizertrain_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)# tf.nn.softmax... 手动百度：“多类分类下为什么用softmax?”predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

五运行训练

# 训练100轮num_epochs = 100 # 每一轮的数据 分成了num_batches块num_batches = total_series_length//batch_size//truncated_backprop_lengthwith tf.Session() as sess:    #初始化变量    sess.run(tf.global_variables_initializer())     # 训练100轮    for epoch_idx in range(num_epochs):        x,y = generateData() #产生随机数据         _current_state = np.zeros((batch_size, state_size)) #初始状态        print("New data, epoch:", epoch_idx)        # 前面不是把数据变成5行10000列，现在一截截切开        for batch_idx in range(num_batches):            # 保持5行不变，每次截取truncated_backprop_length的长度            # 看来stride为5, 每一节之间没有重叠            start_idx = batch_idx * truncated_backprop_length            end_idx = start_idx + truncated_backprop_length            batchX = x[:,start_idx:end_idx]            batchY = y[:,start_idx:end_idx]            # 把切好的数据喂给session            _total_loss, _train_step, _current_state, _predictions_series = sess.run(                [total_loss, train_step, current_state, predictions_series],                feed_dict={                    batchX_placeholder:batchX,                    batchY_placeholder:batchY,                    init_state:_current_state                })            loss_list.append(_total_loss)            # 每一百次打印一下            if batch_idx%100 == 0:                print("Step",batch_idx, "Loss", _total_loss)                plot(loss_list, _predictions_series, batchX, batchY)

六展示图表

def plot(loss_list, predictions_series, batchX, batchY):    plt.subplot(2, 3, 1) # 返回axes，2行 3列 中的第1幅图    plt.cla() #清除当前axes    plt.plot(loss_list)     for batch_series_idx in range(5): #那5根葱...现在一根一根的看        # 对list里所有predictions选择 第batch_series_idx个，        # 选择第 batch_series_idx 跟葱        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]        # 把最后那个2维的 处理下        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])        # 每根葱对应一个axe        plt.subplot(2, 3, batch_series_idx + 2)        plt.cla()        # 坐标轴 0到15， 0到2        plt.axis([0, truncated_backprop_length, 0, 2])        #这个是啥, 横坐标嘛        left_offset = range(truncated_backprop_length)        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")    plt.draw()    plt.pause(0.0001)

七所有代码

import numpy as npimport tensorflow as tfimport matplotlib.pyplot as pltnum_epochs = 100total_series_length = 50000truncated_backprop_length = 15state_size = 4num_classes = 2echo_step = 3batch_size = 5num_batches = total_series_length//batch_size//truncated_backprop_lengthdef generateData():    x = np.random.choice(2, total_series_length, p=[0.5, 0.5])    y = np.roll(x, echo_step)     y[0:echo_step] = 0    x = x.reshape((batch_size, -1))      y = y.reshape((batch_size, -1))    return (x, y)batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])init_state = tf.placeholder(tf.float32, [batch_size, state_size])W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)# Unpack columnsinputs_series = tf.unstack(batchX_placeholder, axis=1) labels_series = tf.unstack(batchY_placeholder, axis=1) # Forward passcurrent_state = init_statestates_series = [] for current_input in inputs_series:     current_input = tf.reshape(current_input, [batch_size, 1])    input_and_state_concatenated = tf.concat([current_input, current_state],1)  # Increasing number of columns    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)     states_series.append(next_state)    current_state = next_statelogits_series = [tf.matmul(state, W2) + b2 for state in states_series] # Broadcasted additionpredictions_series = [tf.nn.softmax(logits) for logits in logits_series]losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series,labels_series)]total_loss = tf.reduce_mean(losses)train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)def plot(loss_list, predictions_series, batchX, batchY):    plt.subplot(2, 3, 1)    plt.cla()    plt.plot(loss_list)    for batch_series_idx in range(5):        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])        plt.subplot(2, 3, batch_series_idx + 2)        plt.cla()        plt.axis([0, truncated_backprop_length, 0, 2])        left_offset = range(truncated_backprop_length)        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")    plt.draw()    plt.pause(0.0001)with tf.Session() as sess:    sess.run(tf.global_variables_initializer())    plt.ion()    plt.figure()    plt.show()    loss_list = []    for epoch_idx in range(num_epochs):        x,y = generateData()        _current_state = np.zeros((batch_size, state_size))        print("New data, epoch", epoch_idx)        for batch_idx in range(num_batches):            start_idx = batch_idx * truncated_backprop_length            end_idx = start_idx + truncated_backprop_length            batchX = x[:,start_idx:end_idx]            batchY = y[:,start_idx:end_idx]            _total_loss, _train_step, _current_state, _predictions_series = sess.run(                [total_loss, train_step, current_state, predictions_series],                feed_dict={                    batchX_placeholder:batchX,                    batchY_placeholder:batchY,                    init_state:_current_state                })            loss_list.append(_total_loss)            if batch_idx%100 == 0:                print("Step",batch_idx, "Loss", _total_loss)                plot(loss_list, _predictions_series, batchX, batchY)plt.ioff()plt.show()

这里写图片描述

最后，
图(2,3,1)代表了损失随着训练次数的变化，
发现损失曲线会有尖峰，精确来说是每隔666次会有一次尖峰，

原因是每隔666次，会重新产生一次新的数据,
新老数据之间没有关系，所以预测会失败

下一步

TensorFlow搭建RNN(2/7) 使用TensorFlow的RNN API

原文来自medium:
https://medium.com/@erikhallstrm/hello-world-rnn-83cd7105b767

原文的翻译：
https://zhuanlan.zhihu.com/p/26646665

本文根据tensorflow 1.2的api修改了代码

阅读全文

0 0

TensorFlow搭建RNN(1/7) 简单案例

引入需要的库

一 生成数据

二 构造输入序列

三 构造状态序列， 吸收新的input

四 计算误差和训练

五 运行训练

六 展示图表

七 所有代码