ROS语音识别
来源:互联网 发布:网络定制蛋糕 编辑:程序博客网 时间:2024/05/20 08:24
一、语音识别包
1、安装
$ sudo apt-get install gstreamer0.10-pocketsphinx
$ sudo apt-get install ros-indigo-audio-common
//我安装的是indigo版本的
$ sudo apt-get install libasound2
$ sudo apt-get install gstreamer0.10-gconf
然后来安装ROS包:
sudo apt-get install ros-indigo-pocketsphinx
2. 安装完成后我们就可以运行测试了。
首先,插入你的麦克风设备,然后在系统设置里测试麦克风是否有语音输入。
然后,运行包中的测试程序:
$ roslaunch pocketsphinx robocup.launch
我们也可以直接看ROS最后发布的结果消息:
$ rostopic echo /recognizer/output
二、语音库
1、查看语音库
$ roscd rbx1_speech/config
$ more nav_commands.txt
这就是需要添加的文本,我们也可以修改其中的某些文本,改成自己需要的。
然后我们要把这个文件在线生成语音信息和库文件,这一步需要登陆网站http://www.speech.cs.cmu.edu/tools/lmtool-new.html,根据网站的提示上传文件,然后在线编译生成库文件。
上传nav_commands.txt文件
把下载的文件都解压放在rbx1_speech包的config文件夹下。我们可以给这些文件改个名字:
rename
-f
's/593/nav_commands/'
*
在rbx1_speech/launch文件夹下看看voice_nav_commands.launch这个文件:
<launch>
<node name=
"recognizer"
pkg=
"pocketsphinx"
type=
"recognizer.py"
output=
"screen"
>
<param name=
"lm"
value=
"$(find rbx1_speech)/config/nav_commands.lm"
/>
<param name=
"dict"
value=
"$(find rbx1_speech)/config/nav_commands.dic"
/>
</node>
</launch>
可以看到,这个launch文件在运行recognizer.py节点的时候使用了我们生成的语音识别库和文件参数,这样就可以实用我们自己的语音库来进行语音识别了。
通过之前的命令来测试一下效果如何吧:
$ roslaunch rbx1_speech voice_nav_commands.launch $ rostopic echo /recognizer/output
三、语音控制
1、机器人控制节点
在pocketsphinx包中本身有一个语音控制发布Twist消息的例程voice_cmd_vel.py,rbx1_speech包对其进行了一些简化修改,在nodes文件夹里可以查看voice_nav.py文件:
#!/usr/bin/env python
"""
voice_nav.py - Version 1.1 2013-12-20
Allows controlling a mobile base using simple speech commands.
Based on the voice_cmd_vel.py script by Michael Ferguson in
the pocketsphinx ROS package.
See http://www.ros.org/wiki/pocketsphinx
"""
import
rospy
from
geometry_msgs.msg
import
Twist
from
std_msgs.msg
import
String
from
math
import
copysign
class
VoiceNav:
def
__init__(
self
):
rospy.init_node(
'voice_nav'
)
rospy.on_shutdown(
self
.cleanup)
# Set a number of parameters affecting the robot's speed
self
.max_speed
=
rospy.get_param(
"~max_speed"
,
0.4
)
self
.max_angular_speed
=
rospy.get_param(
"~max_angular_speed"
,
1.5
)
self
.speed
=
rospy.get_param(
"~start_speed"
,
0.1
)
self
.angular_speed
=
rospy.get_param(
"~start_angular_speed"
,
0.5
)
self
.linear_increment
=
rospy.get_param(
"~linear_increment"
,
0.05
)
self
.angular_increment
=
rospy.get_param(
"~angular_increment"
,
0.4
)
# We don't have to run the script very fast
self
.rate
=
rospy.get_param(
"~rate"
,
5
)
r
=
rospy.Rate(
self
.rate)
# A flag to determine whether or not voice control is paused
self
.paused
=
False
# Initialize the Twist message we will publish.
self
.cmd_vel
=
Twist()
# Publish the Twist message to the cmd_vel topic
self
.cmd_vel_pub
=
rospy.Publisher(
'cmd_vel'
, Twist, queue_size
=
5
)
# Subscribe to the /recognizer/output topic to receive voice commands.
rospy.Subscriber(
'/recognizer/output'
, String,
self
.speech_callback)
# A mapping from keywords or phrases to commands
self
.keywords_to_command
=
{
'stop'
: [
'stop'
,
'halt'
,
'abort'
,
'kill'
,
'panic'
,
'off'
,
'freeze'
,
'shut down'
,
'turn off'
,
'help'
,
'help me'
],
'slower'
: [
'slow down'
,
'slower'
],
'faster'
: [
'speed up'
,
'faster'
],
'forward'
: [
'forward'
,
'ahead'
,
'straight'
],
'backward'
: [
'back'
,
'backward'
,
'back up'
],
'rotate left'
: [
'rotate left'
],
'rotate right'
: [
'rotate right'
],
'turn left'
: [
'turn left'
],
'turn right'
: [
'turn right'
],
'quarter'
: [
'quarter speed'
],
'half'
: [
'half speed'
],
'full'
: [
'full speed'
],
'pause'
: [
'pause speech'
],
'continue'
: [
'continue speech'
]}
rospy.loginfo(
"Ready to receive voice commands"
)
# We have to keep publishing the cmd_vel message if we want the robot to keep moving.
while
not
rospy.is_shutdown():
self
.cmd_vel_pub.publish(
self
.cmd_vel)
r.sleep()
def
get_command(
self
, data):
# Attempt to match the recognized word or phrase to the
# keywords_to_command dictionary and return the appropriate
# command
for
(command, keywords)
in
self
.keywords_to_command.iteritems():
for
word
in
keywords:
if
data.find(word) >
-
1
:
return
command
def
speech_callback(
self
, msg):
# Get the motion command from the recognized phrase
command
=
self
.get_command(msg.data)
# Log the command to the screen
rospy.loginfo(
"Command: "
+
str
(command))
# If the user has asked to pause/continue voice control,
# set the flag accordingly
if
command
=
=
'pause'
:
self
.paused
=
True
elif
command
=
=
'continue'
:
self
.paused
=
False
# If voice control is paused, simply return without
# performing any action
if
self
.paused:
return
# The list of if-then statements should be fairly
# self-explanatory
if
command
=
=
'forward'
:
self
.cmd_vel.linear.x
=
self
.speed
self
.cmd_vel.angular.z
=
0
elif
command
=
=
'rotate left'
:
self
.cmd_vel.linear.x
=
0
self
.cmd_vel.angular.z
=
self
.angular_speed
elif
command
=
=
'rotate right'
:
self
.cmd_vel.linear.x
=
0
self
.cmd_vel.angular.z
=
-
self
.angular_speed
elif
command
=
=
'turn left'
:
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.angular.z
+
=
self
.angular_increment
else
:
self
.cmd_vel.angular.z
=
self
.angular_speed
elif
command
=
=
'turn right'
:
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.angular.z
-
=
self
.angular_increment
else
:
self
.cmd_vel.angular.z
=
-
self
.angular_speed
elif
command
=
=
'backward'
:
self
.cmd_vel.linear.x
=
-
self
.speed
self
.cmd_vel.angular.z
=
0
elif
command
=
=
'stop'
:
# Stop the robot! Publish a Twist message consisting of all zeros.
self
.cmd_vel
=
Twist()
elif
command
=
=
'faster'
:
self
.speed
+
=
self
.linear_increment
self
.angular_speed
+
=
self
.angular_increment
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.linear.x
+
=
copysign(
self
.linear_increment,
self
.cmd_vel.linear.x)
if
self
.cmd_vel.angular.z !
=
0
:
self
.cmd_vel.angular.z
+
=
copysign(
self
.angular_increment,
self
.cmd_vel.angular.z)
elif
command
=
=
'slower'
:
self
.speed
-
=
self
.linear_increment
self
.angular_speed
-
=
self
.angular_increment
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.linear.x
-
=
copysign(
self
.linear_increment,
self
.cmd_vel.linear.x)
if
self
.cmd_vel.angular.z !
=
0
:
self
.cmd_vel.angular.z
-
=
copysign(
self
.angular_increment,
self
.cmd_vel.angular.z)
elif
command
in
[
'quarter'
,
'half'
,
'full'
]:
if
command
=
=
'quarter'
:
self
.speed
=
copysign(
self
.max_speed
/
4
,
self
.speed)
elif
command
=
=
'half'
:
self
.speed
=
copysign(
self
.max_speed
/
2
,
self
.speed)
elif
command
=
=
'full'
:
self
.speed
=
copysign(
self
.max_speed,
self
.speed)
if
self
.cmd_vel.linear.x !
=
0
:
self
.cmd_vel.linear.x
=
copysign(
self
.speed,
self
.cmd_vel.linear.x)
if
self
.cmd_vel.angular.z !
=
0
:
self
.cmd_vel.angular.z
=
copysign(
self
.angular_speed,
self
.cmd_vel.angular.z)
else
:
return
self
.cmd_vel.linear.x
=
min
(
self
.max_speed,
max
(
-
self
.max_speed,
self
.cmd_vel.linear.x))
self
.cmd_vel.angular.z
=
min
(
self
.max_angular_speed,
max
(
-
self
.max_angular_speed,
self
.cmd_vel.angular.z))
def
cleanup(
self
):
# When shutting down be sure to stop the robot!
twist
=
Twist()
self
.cmd_vel_pub.publish(twist)
rospy.sleep(
1
)
if
__name__
=
=
"__main__"
:
try
:
VoiceNav()
rospy.spin()
except
rospy.ROSInterruptException:
rospy.loginfo(
"Voice navigation terminated."
)
2、仿真测试
和前面一样,我们在rviz中进行仿真测试。
首先是运行一个机器人模型:
$ roslaunch rbx1_bringup fake_turtlebot.launch
然后打开rviz:
$ rosrun rviz rviz
-
d `rospack find rbx1_nav`
/
sim.rviz
最后就是机器人的控制节点了:
$ roslaunch rbx1_speech voice_nav_commands.launch 再打开语音识别的节点:$ roslaunch rbx1_speech turtlebot_voice_nav.launch 最后就是机器人的控制节点了:
OK,打开上面这一堆的节点之后,就可以开始了。可以使用的命令如下:
可能是英语太差吧,很难识别;
四、播放语音
运行下面的命令:
$ rosrun sound_play soundplay_node.py
$ rosrun sound_play say.py
"Greetings Humans. Take me to your leader."
有没有听见声音!ROS通过识别我们输入的文本,让机器人读了出来。发出这个声音的人叫做kal_diphone,如果不喜欢,我们也可以换一个人来读:
$ sudo apt
-
get install festvox
-
don
$ rosrun sound_play say.py
"Welcome to the future"
voice_don_diphone
哈哈,这回换了一个人吧,好吧,这不也是我们的重点。
在rbx1_speech/nodes文件夹中有一个让机器人说话的节点talkback.py:
#!/usr/bin/env python
"""
talkback.py - Version 1.1 2013-12-20
Use the sound_play client to say back what is heard by the pocketsphinx recognizer.
Created for the Pi Robot Project: http://www.pirobot.org
Copyright (c) 2012 Patrick Goebel. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.5
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details at:
http://www.gnu.org/licenses/gpl.htmlPoint
"""
import
rospy
from
std_msgs.msg
import
String
from
sound_play.libsoundplay
import
SoundClient
import
sys
class
TalkBack:
def
__init__(
self
, script_path):
rospy.init_node(
'talkback'
)
rospy.on_shutdown(
self
.cleanup)
# Set the default TTS voice to use
self
.voice
=
rospy.get_param(
"~voice"
,
"voice_don_diphone"
)
# Set the wave file path if used
self
.wavepath
=
rospy.get_param(
"~wavepath"
, script_path
+
"/../sounds"
)
# Create the sound client object
self
.soundhandle
=
SoundClient()
# Wait a moment to let the client connect to the
# sound_play server
rospy.sleep(
1
)
# Make sure any lingering sound_play processes are stopped.
self
.soundhandle.stopAll()
# Announce that we are ready for input
self
.soundhandle.playWave(
self
.wavepath
+
"/R2D2a.wav"
)
rospy.sleep(
1
)
self
.soundhandle.say(
"Ready"
,
self
.voice)
rospy.loginfo(
"Say one of the navigation commands..."
)
# Subscribe to the recognizer output and set the callback function
rospy.Subscriber(
'/recognizer/output'
, String,
self
.talkback)
def
talkback(
self
, msg):
# Print the recognized words on the screen
rospy.loginfo(msg.data)
# Speak the recognized words in the selected voice
self
.soundhandle.say(msg.data,
self
.voice)
# Uncomment to play one of the built-in sounds
#rospy.sleep(2)
#self.soundhandle.play(5)
# Uncomment to play a wave file
#rospy.sleep(2)
#self.soundhandle.playWave(self.wavepath + "/R2D2a.wav")
def
cleanup(
self
):
self
.soundhandle.stopAll()
rospy.loginfo(
"Shutting down talkback node..."
)
if
__name__
=
=
"__main__"
:
try
:
TalkBack(sys.path[
0
])
rospy.spin()
except
rospy.ROSInterruptException:
rospy.loginfo(
"Talkback node terminated."
)
我们来运行看一下效果:
$ roslaunch rbx1_speech talkback.launch
- ROS语音识别
- ROS 语音识别
- ROS学习--语音合成&语音识别
- ROS语音识别-----pocketsphinx安装和使用
- 【ROS总结】ROS下的百度语音识别应用
- ros语音识别:pocketsphinx_continuous工作正常而gst-pocketsphinx不能识别相同语音的问题。
- ros语音识别功能库安装过程总结
- ROS下使用科大讯飞SDK进行在线语音识别
- Ros语音
- ROS中PocketSphinx语音识别_安装arbotix simulator仿真环境_turtlebot的仿真语音控制
- 语音识别
- 语音识别
- 语音识别
- 语音识别
- 语音识别
- 语音识别
- 语音识别
- 语音识别
- 输出模拟
- 447. Number of Boomerangs | 点之间的距离相等的数量
- spring redis 相关配置总结
- vue学习(一) vue指令和过滤器
- 三层
- ROS语音识别
- 初认识Java调用SPSS统计软件
- IDEA安装和JDK的配置
- CodeForces - 274B Zero Tree — 树形DP
- 基于msm8916移植lcd流程--kernel
- Android平台美颜相机/Camera实时滤镜/视频编解码/影像后期/人脸技术探索——2.3 仿制Snow相机和FaceU的边框/小脸模式
- 1019. General Palindromic Number
- Android开发中熟识的方法或类
- oj1615: 12!配对