代码-RNN --文本数据
来源:互联网 发布:淘宝无人机 编辑:程序博客网 时间:2024/06/05 17:19
"""Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)BSD License"""import numpy as npimport jieba# data I/Odata = open('/home/kwane/下载/tfword2vec-master/280.txt', 'rb').read() # should be simple plain text filedata = data.decode('gbk')data = list(jieba.cut(data,cut_all=False))chars = list(set(data))data_size, vocab_size = len(data), len(chars)print ('data has %d characters, %d unique.' % (data_size, vocab_size))char_to_ix = { ch:i for i,ch in enumerate(chars) }ix_to_char = { i:ch for i,ch in enumerate(chars) }# hyperparametershidden_size = 200 # size of hidden layer of neuronsseq_length = 25 # number of steps to unroll the RNN forlearning_rate = 1e-1# model parametersWxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hiddenWhh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hiddenWhy = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to outputbh = np.zeros((hidden_size, 1)) # hidden biasby = np.zeros((vocab_size, 1)) # output biasdef lossFun(inputs, targets, hprev): """ inputs,targets are both list of integers. hprev is Hx1 array of initial hidden state returns the loss, gradients on model parameters, and last hidden state """ xs, hs, ys, ps = {}, {}, {}, {} hs[-1] = np.copy(hprev) # hprev 中间层的值, 存作-1,为第一个做准备 loss = 0 # forward pass for t in range(len(inputs)): xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation xs[t][inputs[t]] = 1 # x[t] 是一个第t个输入单词的向量 # 双曲正切, 激活函数, 作用跟sigmoid类似 # h(t) = tanh(Wxh*X + Whh*h(t-1) + bh) 生成新的中间层 hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state tanh # y(t) = Why*h(t) + by ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars # softmax regularization # p(t) = softmax(y(t)) ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars, 对输出作softmax # loss += -log(value) 预期输出是1,因此这里的value值就是此次的代价函数,使用 -log(*) 使得离正确输出越远,代价函数就越高 loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss) 代价函数是交叉熵 # 将输入循环一遍以后,得到各个时间段的h, y 和 p # 得到此时累积的loss, 准备进行更新矩阵 # backward pass: compute gradients going backwards dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why) # 各矩阵的参数进行 dbh, dby = np.zeros_like(bh), np.zeros_like(by) dhnext = np.zeros_like(hs[0]) # 下一个时间段的潜在层,初始化为零向量 for t in reversed(range(len(inputs))): # 把时间作为维度,则梯度的计算应该沿着时间回溯 dy = np.copy(ps[t]) # 设dy为实际输出,而期望输出(单位向量)为y, 代价函数为交叉熵函数 dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here dWhy += np.dot(dy, hs[t].T) # dy * h(t).T h层值越大的项,如果错误,则惩罚越严重。反之,奖励越多(这边似乎没有考虑softmax的求导?) dby += dy # 这个没什么可说的,与dWhy一样,只不过h项=1, 所以直接等于dy dh = np.dot(Why.T, dy) + dhnext # backprop into h z_t = Why*H_t + b_y H_t = tanh(Whh*H_t-1 + Whx*X_t), 第一阶段求导 dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity 第二阶段求导,注意tanh的求导 dbh += dhraw # dbh表示传递 到h层的误差 dWxh += np.dot(dhraw, xs[t].T) # 对Wxh的修正,同Why dWhh += np.dot(dhraw, hs[t-1].T) # 对Whh的修正 dhnext = np.dot(Whh.T, dhraw) # h层的误差通过Whh不停地累积 for dparam in [dWxh, dWhh, dWhy, dbh, dby]: np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]def sample(h, seed_ix, n): """ sample a sequence of integers from the model h is memory state, seed_ix is seed letter for first time step """ x = np.zeros((vocab_size, 1)) x[seed_ix] = 1 ixes = [] for t in range(n): h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh) # 更新中间层 y = np.dot(Why, h) + by # 得到输出 p = np.exp(y) / np.sum(np.exp(y)) # softmax ix = np.random.choice(range(vocab_size), p=p.ravel()) # 根据softmax得到的结果,按概率产生下一个字符 x = np.zeros((vocab_size, 1)) # 产生下一轮的输入 x[ix] = 1 ixes.append(ix) return ixesn, p = 0, 0mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagradsmooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0while True: # prepare inputs (we're sweeping from left to right in steps seq_length long) if p+seq_length+1 >= len(data) or n == 0: # 如果 n=0 或者 p过大 hprev = np.zeros((hidden_size,1)) # reset RNN memory 中间层内容初始化,零初始化 p = 0 # go from start of data # p 重置 inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]] # 一批输入seq_length个字符 targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]] # targets是对应的inputs的期望输出。 # sample from the model now and then if n % 100 == 0: # 每循环100词, sample一次,显示结果 sample_ix = sample(hprev, inputs[0], 200) txt = ''.join(ix_to_char[ix] for ix in sample_ix) print ('----\n %s \n----' % (txt, )) # forward seq_length characters through the net and fetch gradient loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev) smooth_loss = smooth_loss * 0.999 + loss * 0.001 # 将原有的Loss与新loss结合起来 if n % 100 == 0: print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress # perform parameter update with Adagrad for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]): mem += dparam * dparam # 梯度的累加 param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update 随着迭代次数增加,参数的变更量会越来越小 p += seq_length # move data pointer n += 1 # iteration counter, 循环次数
0 0
- 代码-RNN --文本数据
- [代码]基于RNN的文本生成算法
- 基于RNN的文本生成算法的代码运转
- 代码 RNN
- rnn学习,keras rnn实践 文本生成
- 用rnn做文本生成
- TensorFlow RNN 文本情感分析
- tensorflow-rnn代码解读
- MINI-RNN代码学习
- 深度学习RNN实现股票预测实战(附数据、代码)
- RNN(LSTM)处理文本资料总结
- 用 RNN 训练语言模型生成文本
- 基于RNN的文本分析及预测
- 用于文本分类的RNN-Attention网络
- 简单的Char RNN生成文本
- cnn、rnn相结合进行文本分类
- CRF as RNN 代码解读
- TensorFlow RNN 教程和代码
- oracle的建表语句
- Java内存模型和内存间相互操作
- Spring学习之缓存机制EhCache---Condition
- Java 基于链表的栈结构实现
- Hadoop安装包目录下几个比较重要的目录及其作用
- 代码-RNN --文本数据
- friend
- 数据类型和变量
- STM32 SPI DMA 的使用
- matlab中如何使用外部工具箱
- Spring学习之缓存机制EhCache---Key
- Redis学习总结_1_底层数据结构
- 2016 Al-Baath University Training Camp Contest-1 I. March Rain —— 二分
- uml类图的图示方法