Unreal语音示例:讯飞在线语音听写UE4 Demo
来源:互联网 发布:尚学堂javascript视频 编辑:程序博客网 时间:2024/05/10 14:20
关键字: Unreal 蓝图 语音识别 C++
由于项目需求,使用语音进行游戏交互,踩了两周多的坑,终于搞出来了,记录一下踩坑经历
先贴上demo:https://github.com/303snowing/UnrealXunFeiSpeech
demo的引擎版本是4.17.1,使用Visual Studio 2017 15.3
准备工作
- 先到科大讯飞的语音平台注册、创建一个应用(这步必须,因为只有拥有appid才能下载对应的sdk)
- 下载在线语音听写sdk:http://www.xfyun.cn/services/voicedictation,解压
- 建立Unreal C++空白项目,不需要包含Start Content,示例的项目名称为
UnrealXunFeiSpeech
- 编译项目并启动项目实例
- 建立一个继承自Actor的C++类,可见性为Public
讯飞库在 Windows_voice_1166_59940824/libs 和 Windows_voice_1166_59940824/bin 目录下,头文件在 Windows_voice_1166_59940824/include 目录下
导入讯飞库
- 在项目根目录下建立一个XunFei文件夹
- 将Windows_voice_1166_59940824/libs和Windows_voice_1166_59940824/include目录拷贝到XunFei文件夹中
- 将Windows_voice_1166_59940824/bin下的
msc_x64.dll
文件拷贝到项目工程的Binaries/Win64目录下(该.dll文件在项目迁移或打包过程中都不可缺少)
配置讯飞库搜索路径
编辑Source/UnrealXunFeiSpeech/UnrealXunFeiSpeech.Build.cs文件,代码如下
// Fill out your copyright notice in the Description page of Project Settings.using UnrealBuildTool;public class UnrealXunFeiSpeech : ModuleRules{ public UnrealXunFeiSpeech(ReadOnlyTargetRules Target) : base(Target) { PCHUsage = PCHUsageMode.UseExplicitOrSharedPCHs; PublicDependencyModuleNames.AddRange(new string[] { "Core", "CoreUObject", "Engine", "InputCore", "Json" }); PrivateDependencyModuleNames.AddRange(new string[] { }); // Uncomment if you are using Slate UI // PrivateDependencyModuleNames.AddRange(new string[] { "Slate", "SlateCore" }); // Uncomment if you are using online features // PrivateDependencyModuleNames.Add("OnlineSubsystem"); // To include OnlineSubsystemSteam, add it to the plugins section in your uproject file with the Enabled attribute set to true // 此处指定文件搜索路径 PrivateIncludePaths.Add("UnrealXunFeiSpeech/Private"); PublicIncludePaths.Add("UnrealXunFeiSpeech/Public"); //引入讯飞静态库 PublicLibraryPaths.AddRange(new string[] { "..\\XunFei\\libs" }); PublicAdditionalLibraries.AddRange(new string[] { "msc_x64.lib" }); //添加文件搜索路径 PublicIncludePaths.AddRange(new string[] { "..\\XunFei\\include" }); }}
编写代码
1. 创建FWinRec
类
FWinRec类对应讯飞官方例子的winrec.c文件,是封装的Windows录音功能,在Source/UnrealXunFeiSpeech/Public下创建WinRec.h
,在Source/UnrealXunFeiSpeech/Private下创建WinRec.cpp
,并在项目中添加到对应目录下
WinRec.h | WinRec.cpp
由于在其他文件中,均直接或者间接的包含WinRec.h,所以在WinRec.h中添加了自定义的Log标签。
// Fill out your copyright notice in the Description page of Project Settings./** @file* @brief a record interface in windows** it encapsluate the windows API waveInxxx;* Common steps:* create_recorder,* open_recorder,* start_record,* stop_record,* close_recorder,* destroy_recorder** @author 303snowing* @date 2017/09/09*/#pragma once//包含项目文件,可在该文件中使用Unreal的库#include "UnrealXunFeiSpeech.h"#include <stdlib.h>#include <windows.h>#include <mmsystem.h> #include <process.h>#include <errno.h>// 自定义定义静态Log 包含WinRec.h使用DEFINE_LOG_CATEGORY_STATIC(SnowingLog, Log, All);DEFINE_LOG_CATEGORY_STATIC(SnowingWarning, Warning, All);DEFINE_LOG_CATEGORY_STATIC(SnowingError, Error, All);/* error code */enum { RECORD_ERR_BASE = 0, RECORD_ERR_GENERAL, RECORD_ERR_MEMFAIL, RECORD_ERR_INVAL, RECORD_ERR_NOT_READY};/* recorder object. */struct recorder { void(*on_data_ind)(char *data, unsigned long len, void *user_para); void * user_cb_para; volatile int state; /* internal record state */ void * wavein_hdl; void * rec_thread_hdl; void * bufheader; unsigned int bufcount;};//每个类中声明自己的Log标签,方便调试DECLARE_LOG_CATEGORY_EXTERN(WinRec, Warning, All);class FWinRec{ //将回调函数声明为友元,以方便访问私有方法 friend static unsigned int __stdcall record_thread_proc(void * para); /* the recording callback thread procedure */public: FWinRec() = default; FWinRec(FString); ~FWinRec();private: void dbg_wave_header(WAVEHDR * buf); int create_callback_thread(void *thread_proc_para, HANDLE *thread_hdl_out); void close_callback_thread(HANDLE thread); int open_rec_device(int dev, WAVEFORMATEX *format, HANDLE thread, HWAVEIN *wave_hdl_out); int prepare_rec_buffer(HWAVEIN wi, WAVEHDR ** bufheader_out, unsigned int headercount, unsigned int bufsize); void free_rec_buffer(HWAVEIN wi, WAVEHDR *first_header, unsigned headercount); void close_rec_device(HWAVEIN wi); int start_record_internal(HWAVEIN wi, WAVEHDR *header, unsigned int bufcount); int stop_record_internal(HWAVEIN wi); void data_proc(struct recorder *rec, MSG *msg); int is_stopped_internal(struct recorder *rec); int open_recorder_internal(struct recorder * rec, unsigned int dev, WAVEFORMATEX * fmt); void close_recorder_internal(struct recorder *rec);public: /** * @fn * @brief Get the default input device ID * * @return returns WAVE_MAPPER in windows. */ int get_default_input_dev(); /** * @fn * @brief Get the total number of active input devices. * @return the number. 0 means no active device. */ unsigned int get_input_dev_num(); /** * @fn * @brief Create a recorder object. * @return int - Return 0 in success, otherwise return error code. * @param out_rec - [out] recorder object holder * @param on_data_ind - [in] callback. called when data coming. * @param user_cb_para - [in] user params for the callback. * @see */ int create_recorder(struct recorder ** out_rec, void(*on_data_ind)(char *data, unsigned long len, void *user_para), void* user_cb_para); /** * @fn * @brief Destroy recorder object. free memory. * @param rec - [in]recorder object */ void destroy_recorder(struct recorder *rec); /** * @fn * @brief open the device. * @return int - Return 0 in success, otherwise return error code. * @param rec - [in] recorder object * @param dev - [in] device id, from 0. * @param fmt - [in] record format. * @see * get_default_input_dev() */ int open_recorder(struct recorder * rec, unsigned int dev, WAVEFORMATEX * fmt); /** * @fn * @brief close the device. * @param rec - [in] recorder object */ void close_recorder(struct recorder *rec); /** * @fn * @brief start record. * @return int - Return 0 in success, otherwise return error code. * @param rec - [in] recorder object */ int start_record(struct recorder * rec); /** * @fn * @brief stop record. * @return int - Return 0 in success, otherwise return error code. * @param rec - [in] recorder object */ int stop_record(struct recorder * rec); /** * @fn * @brief test if the recording has been stopped. * @return int - 1: stopped. 0 : recording. * @param rec - [in] recorder object */ int is_record_stopped(struct recorder *rec);};//定义一个静态变量供C++代码使用,以访问全局变量static FWinRec * winrec = new FWinRec(FString("static winrec be created !"));
2.创建FSpeechRecoginzer
类
FSpeechRecoginzer类对应讯飞官方例子的speechrecoginzer.c文件,封装了语音在线听写功能,在Source/UnrealXunFeiSpeech/Public下创建SpeechRecognizer.h
,在Source/UnrealXunFeiSpeech/Private下创建SpeechRecognizer.cpp
,并在项目中添加到对应目录下
SpeechRecoginzer.h | SpeechRecoginzer.cpp
基于录音接口和讯飞MSC接口封装一个MIC录音识别的模块
// Fill out your copyright notice in the Description page of Project Settings./*@file@brief 基于录音接口和讯飞MSC接口封装一个MIC录音识别的模块@author taozhang9@date 2016/05/27*/#pragma once#include <stdlib.h>#include <windows.h>#include "qisr.h"#include "msp_cmn.h"#include "msp_errors.h"#include "WinRec.h"#include "SpeechActor.h"enum sr_audsrc{ SR_MIC, /* write data from mic */ SR_USER /* write data from user by calling API */};#define DEFAULT_INPUT_DEVID (-1)#define E_SR_NOACTIVEDEVICE 1#define E_SR_NOMEM 2#define E_SR_INVAL 3#define E_SR_RECORDFAIL 4#define E_SR_ALREADY 5struct speech_rec_notifier { void(*on_result)(const char *result, char is_last); void(*on_speech_begin)(); void(*on_speech_end)(int reason); /* 0 if VAD. others, error : see E_SR_xxx and msp_errors.h */};#define END_REASON_VAD_DETECT 0 /* detected speech done */struct speech_rec { enum sr_audsrc aud_src; /* from mic or manual stream write */ struct speech_rec_notifier notif; const char * session_id; int ep_stat; int rec_stat; int audio_status; struct recorder *recorder; volatile int state; char * session_begin_params;};DECLARE_LOG_CATEGORY_EXTERN(SpeechRecoginzer, Warning, All);//声明代理//DECLARE_DELEGATE_RetVal(FString, OnGetResult)class FSpeechRecoginzer{ friend static void iat_cb(char *data, unsigned long len, void *user_para);public: FSpeechRecoginzer() = default; FSpeechRecoginzer(FString); virtual ~FSpeechRecoginzer(); //OnGetResult GettedResult;private: void end_sr_on_error(struct speech_rec *sr, int errcode); void end_sr_on_vad(struct speech_rec *sr); char * skip_space(char *s); int update_format_from_sessionparam(const char * session_para, WAVEFORMATEX *wavefmt); void wait_for_rec_stop(struct recorder *rec, unsigned int timeout_ms);public: /* must init before start . devid = -1, then the default device will be used. devid will be ignored if the aud_src is not SR_MIC */ int sr_init(struct speech_rec * sr, const char * session_begin_params, enum sr_audsrc aud_src, int devid, struct speech_rec_notifier * notifier); int sr_start_listening(struct speech_rec *sr); int sr_stop_listening(struct speech_rec *sr); /* only used for the manual write way. */ int sr_write_audio_data(struct speech_rec *sr, char *data, unsigned int len); /* must call uninit after you don't use it */ void sr_uninit(struct speech_rec * sr);};//定义一个静态变量供C++代码使用,以访问全局变量static FSpeechRecoginzer * speechrecoginzer = new FSpeechRecoginzer("static soeech recoginzer be created !");
3. 创建FXunFeiSpeech
类
FXunFeiSpeech类中封装了语音识别的执行方法,包含整体流程控住与事件控制,在Source/UnrealXunFeiSpeech/Public下创建XunFeiSpeech.h
,在Source/UnrealXunFeiSpeech/Private下创建XunFeiSpeech.cpp
,并在项目中添加到对应目录下
XunFeiSpeech.h | XunFeiSpeech.cpp
语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。
#pragma once#include <conio.h>#include "msp_cmn.h"#include "msp_errors.h"#include "SpeechRecoginzer.h"/** 语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。*/#define FRAME_LEN 640#define BUFFER_SIZE 4096// 识别状态类型enum { EVT_START = 0, EVT_STOP, EVT_QUIT, EVT_TOTAL};DECLARE_LOG_CATEGORY_EXTERN(XunFeiSpeech, Warning, All);class FXunFeiSpeech{public: //struct speech_rec iat;public: FXunFeiSpeech(); FXunFeiSpeech(FString); //事件触发,控制录音的开始、结束、与程序的退出 void SetStart(); void SetStop(); void SetQuit(); //整个流程控制 void speech_mic(const char* session_beging_params); //将识别结果返回 const char* get_result() const;};//定义静态实例方便其他C++代码使用static FXunFeiSpeech * xunfeispeech = new FXunFeiSpeech(FString("static xunfeispeech be created !"));
4. 创建FSpeechTask
类
FSpeechTask类继承FNonAbandonableTask
,用来将语音识别作为独立线程启动,避免在语音录入和识别时阻塞游戏主线程,在Source/UnrealXunFeiSpeech/Public下创建SpeechTask.h
,在Source/UnrealXunFeiSpeech/Private下创建SpeechTask.cpp
,并在项目中添加到对应目录下
SpeechTask.h | SpeechTask.cpp
#pragma once#include "XunFeiSpeech.h"#include "AsyncWork.h"class FSpeechTask : public FNonAbandonableTask{ friend class FAutoDeleteAsyncTask<FSpeechTask>; FSpeechTask() { UE_LOG(SnowingWarning, Warning, TEXT("Speech Task be Create !")); } void DoWork(); FORCEINLINE TStatId GetStatId() const { RETURN_QUICK_DECLARE_CYCLE_STAT(FSpeechTask, STATGROUP_ThreadPoolAsyncTasks); }};
编写ASpeechActor类
ASpeechActor
类为蓝图暴露操作方法,包含语音初始化、打开录音、停止录音和退出录音释放资源操作
- SpeechActor.h
// Fill out your copyright notice in the Description page of Project Settings.#pragma once//包含SpeechTask,在初始化的时候,启动语音功能#include "SpeechTask.h"#include "CoreMinimal.h"#include "GameFramework/Actor.h"#include "SpeechActor.generated.h"UCLASS()class UNREALXUNFEISPEECH_API ASpeechActor : public AActor{ GENERATED_BODY()private: //存放语音识别结果 FString Result;public: // Sets default values for this actor's properties ASpeechActor();protected: // Called when the game starts or when spawned virtual void BeginPlay() override;public: // Called every frame virtual void Tick(float DeltaTime) override; UFUNCTION(BlueprintCallable, Category = "XunFei", meta = (DisplayName = "SpeechInit", Keywords = "Speech Recognition Initialization")) void SpeechInit(); UFUNCTION(BlueprintCallable, Category = "XunFei", meta = (DisplayName = "SpeechOpen", Keywords = "Speech Recognition Open")) void SpeechOpen(); UFUNCTION(BlueprintCallable, Category = "XunFei", meta = (DisplayName = "SpeechStop", Keywords = "Speech Recognition Stop")) void SpeechStop(); UFUNCTION(BlueprintCallable, Category = "XunFei", meta = (DisplayName = "SpeechQuit", Keywords = "Speech Recognition Quit")) void SpeechQuit(); UFUNCTION(BlueprintCallable, Category = "XunFei", meta = (DisplayName = "SpeechResult", Keywords = "Speech Recognition GetResult")) FString SpeechResult();};
- SpeechActor.cpp
// Fill out your copyright notice in the Description page of Project Settings.#pragma once#include "SpeechActor.h"//引入Unreal的Json库,用来解析识别结果(!!!注意在Build.cs文件的Module中加载Json)/*PublicDependencyModuleNames.AddRange(new string[] { "Core", "CoreUObject", "Engine", "InputCore", "Json" });*/#include "Serialization/JsonReader.h"#include "Dom/JsonObject.h"#include "Serialization/JsonSerializer.h"// Sets default valuesASpeechActor::ASpeechActor() : Result{}{ // Set this actor to call Tick() every frame. You can turn this off to improve performance if you don't need it. PrimaryActorTick.bCanEverTick = false;}// Called when the game starts or when spawnedvoid ASpeechActor::BeginPlay(){ Super::BeginPlay();}// Called every framevoid ASpeechActor::Tick(float DeltaTime){ Super::Tick(DeltaTime);}void ASpeechActor::SpeechInit(){ //创建一个SpeechTask任务实例 FAutoDeleteAsyncTask<FSpeechTask>* SpeechTask = new FAutoDeleteAsyncTask<FSpeechTask>(); if (SpeechTask) { //异步启动SpeechTask实例 会去单开线程异步执行SpeechTask中的DoWork方法 SpeechTask->StartBackgroundTask(); } else { UE_LOG(SnowingError, Error, TEXT("XunFei task object could not be create !")); return; } UE_LOG(SnowingWarning, Warning, TEXT("XunFei Task Stopped !")); return;}void ASpeechActor::SpeechOpen(){ xunfeispeech->SetStart(); return;}void ASpeechActor::SpeechStop(){ xunfeispeech->SetStop(); return;}void ASpeechActor::SpeechQuit(){ xunfeispeech->SetQuit(); Sleep(300);//延迟等待资源释放完成 return;}FString ASpeechActor::SpeechResult(){ Result = FString(UTF8_TO_TCHAR(xunfeispeech->get_result())); //去掉讯飞生成结果中的标点符号json串 FString LajiString("{\"sn\":2,\"ls\":true,\"bg\":0,\"ed\":0,\"ws\":[{\"bg\":0,\"cw\":[{\"sc\":0.00,\"w\":\"\"}]}]}"); int32 LajiIndex = Result.Find(*LajiString); if (LajiIndex != -1) { Result.RemoveFromEnd(LajiString); } TSharedPtr<FJsonObject> JsonObject; TSharedRef< TJsonReader<TCHAR> > Reader = TJsonReaderFactory<TCHAR>::Create(Result); //解析并拼接结果 返回给调用者(蓝图) if (FJsonSerializer::Deserialize(Reader, JsonObject)) { Result.Reset(); TArray< TSharedPtr<FJsonValue> > TempArray = JsonObject->GetArrayField("ws"); for (auto rs : TempArray) { Result.Append((rs->AsObject()->GetArrayField("cw"))[0]->AsObject()->GetStringField("w")); } } UE_LOG(SnowingError, Error, TEXT("%s"), *Result); return Result;}
编写蓝图脚本示例
这里就比较随意啦,在蓝图中构建一个SpeechActor即可使用其中的方法
注意:
1. 语音初始化在关卡运行时只需要执行一次,避免重复初始化
2. 在关卡结束的时候,或者SpeechActor实例被销毁之前,需要执行SpeechQuit方法,释放语音资源,否则在本次游戏实例中无法再次初始化
3. 在SpeechResult调用之前务必延迟至少0.3秒,等待语音识别结果完整返回
4. 请务必使用Custom Event的形式调用SpeechResult,如果直接在游戏主线程中直接使用函数调用,会造成游戏卡帧
补充
如果出现识别准确率不够,或者对自己的词语不太友好,可以在官方平台的应用管理>语音听写>个性化听写
页面上传热词文件,可以优化识别率。
- Unreal语音示例:讯飞在线语音听写UE4 Demo
- 在线语音听写
- 使用讯飞实现语音听写与语音合成功能
- Android 科大讯飞 在线和离线语音听写
- iOS语音听写、语音合成
- 使用讯飞语音的语音听写
- 讯飞语音听写和语音合成
- 讯飞语音开发之语音语音听写
- 离线语音听写
- 科大讯飞的语音听写
- 语音听写第二版
- 语音听写(识别)
- 基于讯飞开放平台的安卓语音开发——语音听写(语音→文本)
- 基于讯飞开放平台的安卓语音开发——语音听写(语音→文本)
- H5调用讯飞语音接口实现在线语音听写测评
- 讯飞在线语音合成
- 科大讯飞(语音合成和语音听写)
- Hololens入门之语音识别(语音听写)
- 深度学习笔记: 稀疏自编码器
- 用@Resource注解来装配属性
- 2的幂次方表示
- 爬取需要登录的网站数据
- 用@Autowired 注解来装配属性、自动装配
- Unreal语音示例:讯飞在线语音听写UE4 Demo
- [USACO08NOV]玩具Toys [洛谷2917] [bzoj1229]
- 5. 数据结构进阶五动态查询
- effective java(5) 之避免创建不必要的对象
- BZOJ 1040: [ZJOI2008]骑士 环套树dp
- 实验一 、Linux操作系统的安装
- 6. 数据结构进阶六之二叉树排序树
- 归并排序
- PAT 甲级 1024. Palindromic Number (25)