tensorflow入门(3)运行一段代码
来源:互联网 发布:儿童编程培训班加盟 编辑:程序博客网 时间:2024/05/29 03:47
运行一段博客上的代码:
# encoding: utf-8import tensorflow as tf # 0.12import numpy as npimport osfrom collections import Counterimport librosa # https://github.com/librosa/librosawav_path = '/home/liuyuan/Desktop/data/wav/train'label_file = '/home/liuyuan/Desktop/data/doc/trans/train.word.txt'def get_wav_files(wav_path=wav_path): wav_files = [] for (dirpath, dirnames, filenames) in os.walk(wav_path): for filename in filenames: if filename.endswith('.wav') or filename.endswith('.WAV'): filename_path = os.sep.join([dirpath, filename]) if os.stat(filename_path).st_size < 240000: continue wav_files.append(filename_path) return wav_fileswav_files = get_wav_files()def get_wav_lable(wav_files=wav_files, label_file=label_file): labels_dict = {} with open(label_file, 'r') as f: for label in f: label = label.strip('\n') label_id = label.split(' ', 1)[0] label_text = label.split(' ', 1)[1] labels_dict[label_id] = label_text labels = [] new_wav_files = [] for wav_file in wav_files: wav_id = os.path.basename(wav_file).split('.')[0] if wav_id in labels_dict: labels.append(labels_dict[wav_id]) new_wav_files.append(wav_file) return new_wav_files, labelswav_files, labels = get_wav_lable()print("样本数:", len(wav_files)) # 8911# print(wav_files[0], labels[0])# wav/train/A11/A11_0.WAV -> 绿 是 阳春 烟 景 大块 文章 的 底色 四月 的 林 峦 更是 绿 得 鲜活 秀媚 诗意 盎然# 词汇表(参看练习1和7)all_words = []for label in labels: all_words += [word for word in label]counter = Counter(all_words)count_pairs = sorted(counter.items(), key=lambda x: -x[1])words, _ = zip(*count_pairs)words_size = len(words)print('词汇表大小:', words_size)word_num_map = dict(zip(words, range(len(words))))to_num = lambda word: word_num_map.get(word, len(words))labels_vector = [list(map(to_num, label)) for label in labels]# print(wavs_file[0], labels_vector[0])# wav/train/A11/A11_0.WAV -> [479, 0, 7, 0, 138, 268, 0, 222, 0, 714, 0, 23, 261, 0, 28, 1191, 0, 1, 0, 442, 199, 0, 72, 38, 0, 1, 0, 463, 0, 1184, 0, 269, 7, 0, 479, 0, 70, 0, 816, 254, 0, 675, 1707, 0, 1255, 136, 0, 2020, 91]# print(words[479]) #绿label_max_len = np.max([len(label) for label in labels_vector])print('最长句子的字数:', label_max_len)wav_max_len = 0 # 673for wav in wav_files: wav, sr = librosa.load(wav, mono=True) mfcc = np.transpose(librosa.feature.mfcc(wav, sr), [1, 0]) if len(mfcc) > wav_max_len: wav_max_len = len(mfcc)print("最长的语音:", wav_max_len)batch_size = 16n_batch = len(wav_files) // batch_size# 获得一个batchpointer = 0def get_next_batches(batch_size): global pointer batches_wavs = [] batches_labels = [] for i in range(batch_size): wav, sr = librosa.load(wav_files[pointer], mono=True) mfcc = np.transpose(librosa.feature.mfcc(wav, sr), [1, 0]) batches_wavs.append(mfcc.tolist()) batches_labels.append(labels_vector[pointer]) pointer += 1 # 补零对齐 for mfcc in batches_wavs: while len(mfcc) < wav_max_len: mfcc.append([0] * 20) for label in batches_labels: while len(label) < label_max_len: label.append(0) return batches_wavs, batches_labelsX = tf.placeholder(dtype=tf.float32, shape=[batch_size, None, 20])sequence_len = tf.reduce_sum(tf.cast(tf.not_equal(tf.reduce_sum(X, reduction_indices=2), 0.), tf.int32), reduction_indices=1)Y = tf.placeholder(dtype=tf.int32, shape=[batch_size, None])# conv1d_layerconv1d_index = 0def conv1d_layer(input_tensor, size, dim, activation, scale, bias): global conv1d_index with tf.variable_scope('conv1d_' + str(conv1d_index)): W = tf.get_variable('W', (size, input_tensor.get_shape().as_list()[-1], dim), dtype=tf.float32, initializer=tf.random_uniform_initializer(minval=-scale, maxval=scale)) if bias: b = tf.get_variable('b', [dim], dtype=tf.float32, initializer=tf.constant_initializer(0)) out = tf.nn.conv1d(input_tensor, W, stride=1, padding='SAME') + (b if bias else 0) if not bias: beta = tf.get_variable('beta', dim, dtype=tf.float32, initializer=tf.constant_initializer(0)) gamma = tf.get_variable('gamma', dim, dtype=tf.float32, initializer=tf.constant_initializer(1)) mean_running = tf.get_variable('mean', dim, dtype=tf.float32, initializer=tf.constant_initializer(0)) variance_running = tf.get_variable('variance', dim, dtype=tf.float32, initializer=tf.constant_initializer(1)) mean, variance = tf.nn.moments(out, axes=range(len(out.get_shape()) - 1)) def update_running_stat(): decay = 0.99 update_op = [mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay))] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) m, v = tf.cond(tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]), update_running_stat, lambda: (mean_running, variance_running)) out = tf.nn.batch_normalization(out, m, v, beta, gamma, 1e-8) if activation == 'tanh': out = tf.nn.tanh(out) if activation == 'sigmoid': out = tf.nn.sigmoid(out) conv1d_index += 1 return out # aconv1d_layeraconv1d_index = 0def aconv1d_layer(input_tensor, size, rate, activation, scale, bias): global aconv1d_index with tf.variable_scope('aconv1d_' + str(aconv1d_index)): shape = input_tensor.get_shape().as_list() W = tf.get_variable('W', (1, size, shape[-1], shape[-1]), dtype=tf.float32, initializer=tf.random_uniform_initializer(minval=-scale, maxval=scale)) if bias: b = tf.get_variable('b', [shape[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0)) out = tf.nn.atrous_conv2d(tf.expand_dims(input_tensor, dim=1), W, rate=rate, padding='SAME') out = tf.squeeze(out, [1]) if not bias: beta = tf.get_variable('beta', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(0)) gamma = tf.get_variable('gamma', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(1)) mean_running = tf.get_variable('mean', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(0)) variance_running = tf.get_variable('variance', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(1)) mean, variance = tf.nn.moments(out, axes=range(len(out.get_shape()) - 1)) def update_running_stat(): decay = 0.99 update_op = [mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay))] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) m, v = tf.cond(tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]), update_running_stat, lambda: (mean_running, variance_running)) out = tf.nn.batch_normalization(out, m, v, beta, gamma, 1e-8) if activation == 'tanh': out = tf.nn.tanh(out) if activation == 'sigmoid': out = tf.nn.sigmoid(out) aconv1d_index += 1 return out # 定义神经网络def speech_to_text_network(n_dim=128, n_blocks=3): out = conv1d_layer(input_tensor=X, size=1, dim=n_dim, activation='tanh', scale=0.14, bias=False) # skip connections def residual_block(input_sensor, size, rate): conv_filter = aconv1d_layer(input_sensor, size=size, rate=rate, activation='tanh', scale=0.03, bias=False) conv_gate = aconv1d_layer(input_sensor, size=size, rate=rate, activation='sigmoid', scale=0.03, bias=False) out = conv_filter * conv_gate out = conv1d_layer(out, size=1, dim=n_dim, activation='tanh', scale=0.08, bias=False) return out + input_sensor, out skip = 0 for _ in range(n_blocks): for r in [1, 2, 4, 8, 16]: out, s = residual_block(out, size=7, rate=r) skip += s logit = conv1d_layer(skip, size=1, dim=skip.get_shape().as_list()[-1], activation='tanh', scale=0.08, bias=False) logit = conv1d_layer(logit, size=1, dim=words_size, activation=None, scale=0.04, bias=True) return logitclass MaxPropOptimizer(tf.train.Optimizer): def __init__(self, learning_rate=0.001, beta2=0.999, use_locking=False, name="MaxProp"): super(MaxPropOptimizer, self).__init__(use_locking, name) self._lr = learning_rate self._beta2 = beta2 self._lr_t = None self._beta2_t = None def _prepare(self): self._lr_t = tf.convert_to_tensor(self._lr, name="learning_rate") self._beta2_t = tf.convert_to_tensor(self._beta2, name="beta2") def _create_slots(self, var_list): for v in var_list: self._zeros_slot(v, "m", self._name) def _apply_dense(self, grad, var): lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: eps = 1e-7 else: eps = 1e-8 m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = grad / m_t var_update = tf.assign_sub(var, lr_t * g_t) return tf.group(*[var_update, m_t]) def _apply_sparse(self, grad, var): return self._apply_dense(grad, var)def train_speech_to_text_network(): logit = speech_to_text_network() # CTC loss indices = tf.where(tf.not_equal(tf.cast(Y, tf.float32), 0.)) target = tf.SparseTensor(indices=indices, values=tf.gather_nd(Y, indices) - 1, dense_shape=tf.cast(tf.shape(Y), tf.int64)) loss = tf.nn.ctc_loss(target, logit, sequence_len, time_major=False) # optimizer lr = tf.Variable(0.001, dtype=tf.float32, trainable=False) optimizer = MaxPropOptimizer(learning_rate=lr, beta2=0.99) var_list = [t for t in tf.trainable_variables()] gradient = optimizer.compute_gradients(loss, var_list=var_list) optimizer_op = optimizer.apply_gradients(gradient) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) for epoch in range(16): sess.run(tf.assign(lr, 0.001 * (0.97 ** epoch))) global pointer pointer = 0 for batch in range(n_batch): batches_wavs, batches_labels = get_next_batches(batch_size) train_loss, _ = sess.run([loss, optimizer_op], feed_dict={X: batches_wavs, Y: batches_labels}) print(epoch, batch, train_loss) if epoch % 5 == 0: saver.save(sess, 'speech.module', global_step=epoch) # 训练train_speech_to_text_network()# 语音识别# 把batch_size改为1def speech_to_text(wav_file): wav, sr = librosa.load(wav_file, mono=True) mfcc = np.transpose(np.expand_dims(librosa.feature.mfcc(wav, sr), axis=0), [0, 2, 1]) logit = speech_to_text_network() saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint('.')) decoded = tf.transpose(logit, perm=[1, 0, 2]) decoded, _ = tf.nn.ctc_beam_search_decoder(decoded, sequence_len, merge_repeated=False) predict = tf.sparse_to_dense(decoded[0].indices, decoded[0].shape, decoded[0].values) + 1 output = sess.run(decoded, feed_dict={X: mfcc}) # print(output)
总结方法:
$ sudo apt install python-pip$ pip install librosa
1、问题:
no such module named librosa
解决方法:安装librosa,见librosa官网
在https://github.com/librosa/librosa 中下载librosa包,并运行setup.py文件。
Source
If you’ve downloaded the archive manually from the releases page, you can install using the setuptools script:
tar xzf librosa-VERSION.tar.gz
cd librosa-VERSION/
python setup.py install
2、问题:
from setuptools import setup, find_packages
解决方法:安装setuptools
sudo apt-get install python-setuptools
3、问题:
ImportError: No module named setuptools
解决方法:Linux环境下安装setuptools
sudo apt-get install python-setuptools
4、问题: dpkg was interrupted, you must manually run 'dpkg --configure -a' to correct the problem. E: _cache->open() failed, please report.
出现该错误其实有很多种可能,但是终其原因就是系统中的某个软件有问题
解决方法:CSDN博客文章
sudo dpkg --configure -a sudo apt-get update sudo apt-get upgrade
最后又在终端出现了这样的一个问题:、
libc6-dev:依赖:libc6(=2.9-4ubuntu6.1) 但是2.9-Ubuntu6已经安装了 E:不能满足依赖关系。不妨试一下-f选项 于是我在终端输入sudo apt-get -f install
问题得到解决!
5、问题:
ImportError: No module named numpy
解决方法:
sudo apt-get install python-numpy
6、问题:
warning: no files found matching '*.pxd' under directory 'resampy'resampy/interp.c:24:20: fatal error: Python.h: No such file or directorycompilation terminated.error: Setup script exited with error: command 'x86_64-linux-gnu-gcc' failed with exit status 1
解决办法:不使用问题1的解决方法,改用pypi
pypi
The simplest way to install librosa is through the Python Package Index (PyPI). This will ensure that all required dependencies are fulfilled. This can be achieved by executing the following command:
pip install librosa
7、问题:没有安装pip
正确的解决方法:
sudo apt install python-pip
错误的解决方法:运行以下命令 (需要管理员权限): 详见博客。
$ sudo wget https://bootstrap.pypa.io/get-pip.py$ python get-pip.py$ pip -V #查看pip版本
pip install librosa
8、问题:
Collecting scikit-learn>=0.14.0 (from librosa) Downloading scikit_learn-0.18.1-cp27-cp27mu-manylinux1_x86_64.whl (11.6MB) 39% |████████████▋ | 4.6MB 22kB/s eta 0:05:10Exception:...ReadTimeoutError: HTTPSConnectionPool(host='pypi.python.org', port=443): Read timed out.
因为从librosa里下载速度太慢,所以超时报错,
解决方法:直接下载+设置超时报错时间
pip --default-timeout=100 install -U scikit-learn
但是搞了一圈,又回到了问题1:
no such module named libraso
问题的根本在使用docker运行tensorflow程序,无法导入其他的module,比如libraso。于是改变方法:
解决方法:使用pycharm同时导入tensorflow包和libroso包
文件:pycharm-professional-2017.1.1.tar.gz
激活服务器:http://pycharm.tyr.gift:8888
在解压后的bin文件内运行pycharm.sh文件,启动pycharm。
sudo ./pycharm.sh
在file-setting中导入tensorflow1.2即可。
运行结果如下:
- tensorflow入门(3)运行一段代码
- TensorFlow 从入门到精通(七):TensorFlow 运行原理
- TensorFlow入门(一)——了解和运行 TensorFlow
- 关于tensorflow入门代码
- tensorflow入门之tensorflow的运行结构
- 一段可自动点击运行的代码
- VC下测试一段代码运行时间
- asp.net2.0自动运行一段代码
- 如何计算一段代码的运行时间
- 封装测试一段代码的运行时间
- iOS 计算一段代码运行的时间
- C++获得一段代码的运行时间
- 一段代码:运行就堆栈溢出,呵呵。
- 一段代码(原)
- TensorFlow-MNIST入门篇代码
- tensorflow:mnis入门代码注释
- Tensorflow快速入门3–代码结构及模型例子
- 关于java web中自动运行一段代码的一些事儿(计时器、Date比较大小、自动运行代码)
- 19.struts2_ 自定义类型转换
- 超级表格如何单条共享数据
- 推荐系统
- 陈dy学姐教师招聘经验
- select、poll、epoll函数介绍
- tensorflow入门(3)运行一段代码
- kafka开发配置-----最新kafka版本(0.9.0以后的版本)配置
- python 同时读取多个文件
- STM32启动BOOT0 BOOT1设置方法 [
- 解释执行和编译执行的区别、基于栈和基于寄存器的指令集区别
- 文章标题
- CSR8670获取来电号码及开发流程
- apache2: Could not reliably determine the server’s fully qualified domain name 解决方法
- 合并文件夹一