基于对评论进行分类的持续运行模型
来源:互联网 发布:充话费软件怎么赚钱 编辑:程序博客网 时间:2024/06/05 06:57
由于工作的原因,在运行tensorflow时,有时候必须中断训练,而为了下次再训练的时候可以接着上次的模型继续运行,做了如下整理:
代码如下:
import osimport randomimport tensorflow as tfimport pickleimport numpy as npfrom nltk.tokenize import word_tokenizefrom nltk.stem import WordNetLemmatizerf = open('lexcion.pickle', 'rb')lex = pickle.load(f)f.close()# print(np.size(lex))def get_random_line(file, point): file.seek(point) file.readline() return file.readline()# 从文件中随机选择n条记录def get_n_random_line(file_name, n=150): lines = [] file = open(file_name, encoding='latin-1') total_bytes = os.stat(file_name).st_size for i in range(n): random_point = random.randint(0, total_bytes) lines.append(get_random_line(file, random_point)) file.close() return linesdef get_test_dataset(test_file): with open(test_file, encoding='latin-1') as f: test_x = [] test_y = [] lemmatizer = WordNetLemmatizer() for line in f: label = line.split(':%:%:%:')[0] tweet = line.split(':%:%:%:')[1] words = word_tokenize(tweet.lower()) words = [lemmatizer.lemmatize(word) for word in words] features = np.zeros(len(lex)) for word in words: if word in lex: features[lex.index(word)] = 1 test_x.append(list(features)) test_y.append(eval(label)) return test_x, test_ytest_x, test_y = get_test_dataset('tesing.csv')##############################################################################input_size = len(lex)print(input_size)num_classes = 3X = tf.placeholder(tf.int32, [None, input_size])Y = tf.placeholder(tf.float32, [None, num_classes])dropout_keep_prob = tf.placeholder(tf.float32)batch_size = 10def neural_network(): # embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): embedding_size = 128 W = tf.Variable(tf.random_uniform([input_size, embedding_size], -1.0, 1.0)) embedded_chars = tf.nn.embedding_lookup(W, X) embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) # convolution + maxpool layer num_filters = 128 filter_sizes = [3,4,5] pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[num_filters])) conv = tf.nn.conv2d(embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID") h = tf.nn.relu(tf.nn.bias_add(conv, b)) pooled = tf.nn.max_pool(h, ksize=[1, input_size - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID') pooled_outputs.append(pooled) num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(3, pooled_outputs) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # dropout with tf.name_scope("dropout"): h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob) # output with tf.name_scope("output"): W = tf.get_variable("W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes])) output = tf.nn.xw_plus_b(h_drop, W, b) return outputdef train_neural_network(): output = neural_network() optimizer = tf.train.AdamOptimizer(1e-3) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y)) grads_and_vars = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads_and_vars) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) lemmatizer = WordNetLemmatizer() i = 0 pre_accuracy = 0 while True: batch_x = [] batch_y = [] #每次循环时,先判断 if os.path.exists('checkpoint'): saver.restore(sess, tf.train.latest_checkpoint('.')) #恢复保存的session print("-----------") try: lines = get_n_random_line('training.csv', batch_size) for line in lines: label = line.split(':%:%:%:')[0] tweet = line.split(':%:%:%:')[1] words = word_tokenize(tweet.lower()) words = [lemmatizer.lemmatize(word) for word in words] features = np.zeros(len(lex)) for word in words: if word in lex: features[lex.index(word)] = 1 # 一个句子中某个词可能出现两次,可以用+=1,其实区别不大 batch_x.append(list(features)) batch_y.append(eval(label)) _, loss_ = sess.run([train_op, loss], feed_dict={X:batch_x, Y:batch_y, dropout_keep_prob:0.5}) print(loss_) except Exception as e: print(e) if i % 10 == 0: predictions = tf.argmax(output, 1) correct_predictions = tf.equal(predictions, tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float")) accur = sess.run(accuracy, feed_dict={X:test_x[0:50], Y:test_y[0:50], dropout_keep_prob:1.0}) print('第%s步的准确率: %.3f' % (i, accur)) saver.save(sess, './model.ckpt') # 每隔10次保存一下模型 i += 1train_neural_network()
0 0
- 基于对评论进行分类的持续运行模型
- 基于Tensorflow的英文评论二分类CNN模型
- TensorFlow练习1: 对评论进行分类
- TensorFlow练习2: 对评论进行分类
- TensorFlow练习1: 对评论进行分类
- Tensorflow练习1-对评论进行分类
- 利用训练好的参数模型对图片进行分类
- 评论进行分类
- 基于R语言对用户评论进行情感分析
- 深度学习:基于 Gensim 的 Yelp 评论文本分类实例
- 基于caffe特征可视化 以及 用训练好的模型进行分类
- 基于caffe特征可视化 以及 用训练好的模型进行分类 2
- [TensorFlow实战练习]1-对电影评论的分类
- caffe+python 使用训练好的VGG16模型 对 单张图片进行分类,输出置信度
- caffe+python 使用训练好的VGG16模型 对 单张图片进行分类,输出置信度
- 京东评论情感分类器(基于bag-of-words模型)
- 使用词袋模型对图像进行分类
- 使用预训练模型对图像进行分类
- PTA——宿舍谁最高
- tensorflow张量详解
- Java构造函数内部的多态行为可能带来的问题
- Unity&网络--HLAPI(2):Unity HLAPI NetworkManager与玩家NetworkBehaviour的各回调函数的调用时间序列
- 笔记4-JAVA的初学浅识(Java 对象和类)
- 基于对评论进行分类的持续运行模型
- maven多模块管理jetty插件运行
- HAWQ技术解析(十六) —— 运维监控
- 基于JFreeChart的javaweb图表绘制(接上文)
- Swift3数组访问
- 异步上传文件并获得返回值(完全跨域)
- GPU渲染流水线
- zstack(四)zstack架构简介
- mysql同一实例上将一个库“克隆”到另一个库