U3D C# 中文语音识别功能 之 Hololens篇----百度语音识别REST API
来源:互联网 发布:淘宝怎么用照片找同款 编辑:程序博客网 时间:2024/06/07 05:53
下面介绍如何在U3D 开发适用于 Hololens 的 UWP 平台的 语音识别功能。
首先这篇文章,得对Hololens有一定的了解。那么你如果理解的话,应该也知道Hololens本身其实也是支持语音识别的而且效率速度不错。但是,它并不能识别中文。这是笔者在墙外面查了不少资料得到结果。当然笔者能力有限,如果你发现他有方法支持中文语音识别的话,请告诉我!!!!!大恩不辞言谢。
如果您碰到什么其他问题的话,欢迎来 我自己的一个 讨论群559666429
来,大家一起找答案,共同进步
说实话,文中讲解的不是很详细。但是认真看的话,其实也不难。试着去学习,不会的先百度。实在不行的,请给我留言。我有空会给您解答。
首先你先得了解一下百度的语音识别REST的相关API以及使用方式,很简单的。(由于这不是文章主体,所以不多加描述)
相关链接:百度语音识别服务 —— 语音识别 REST API 开发笔记 ,
API请求方式基本说明
如果你看会了。那么应该知道。其实我们就首先得需要token``令牌。这个通过U3D的
www`类直接拿到就OK了。也很简单。最主要的就是要去给百度云上传的音频文件。所以对音频文件的处理,也是这个项目的重要地方。. 所以我们将创建两个类,一个用于网络处理
SpeechRecognition
.另一个用于录取以及处理音频操作RecordingWav
。能读这篇文章的大概都会面对同一个坑吧。那就是 UWP 不支持好多文件读写的类,比如
FileStream
,Directory
,File
…等等类。官方文档有所陈述,以及推荐的代替类库和这些类库的使用方法。https://docs.unity3d.com/Manual/windowsstore-missingtypes.html
.所以我们也就用这些类来进行对录音文件的读写操作。具体怎么做。等会代码中陈述。
1. 那么先展示出来录取以及处理音频操作类:RecordingWav
:
using UnityEngine;using System;using UnityEngine.UI;using System.Collections;using System.IO;#if NETFX_CORE //UWP下编译 using Windows.Storage;using StreamWriter = WinRTLegacy.IO.StreamWriter;using StreamReader = WinRTLegacy.IO.StreamReader;#elseusing StreamWriter = System.IO.StreamWriter;using StreamReader = System.IO.StreamReader;#endif[RequireComponent(typeof(AudioSource))]public class RecordingWav : MonoBehaviour{ //录音图片变红。停止变蓝 public Image imageButton; //按钮点击的动画。 public Animator recordingButton; string filePath = null; int audioLength_time; private AudioSource m_audioSource; private AudioClip m_audioClip; public const int SamplingRate = 8000; private const int HEADER_SIZE = 44; public SpeechRecognition speechRecognition; //判断是否录音 [HideInInspector] public bool isRecording = false; //文件的大小 [HideInInspector] public Byte[] speech_Byte; //用于缓存处理后的录音文件。待用在 SpeechRecognition类中。 [HideInInspector] public MemoryStream memoryStream; public MicrophoneManager microphoneManager; // Use this for initialization void Start() { m_audioSource = GetComponent<AudioSource>(); filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav"); } public void StartRecording( bool isRecording) { if (isRecording) { Microphone.End(null); m_audioClip = Microphone.Start(null, false, 60, SamplingRate); imageButton.color = Color.red; } else { imageButton.color = Color.white; audioLength_time = 0; int lastPos = Microphone.GetPosition(null); if (Microphone.IsRecording(null)) { audioLength_time = lastPos / SamplingRate; } else { audioLength_time = 0; Debug.Log("error : 录音时间太短"); } Microphone.End(null); if (audioLength_time <= 1.0f) { return; } SaveWav(filePath, m_audioClip); PlayAudioClip(); } } public void PlayAudioClip() { if (m_audioClip.length > 5 && m_audioClip != null) { if (m_audioSource.isPlaying) { m_audioSource.Stop(); } m_audioSource.clip = m_audioClip; m_audioSource.Play(); } } bool SaveWav(string filename, AudioClip clip) { try { File.Delete(filePath); } catch (Exception ex) { Debug.Log(ex); } try { FileInfo info = new FileInfo(filePath); if (!info.Exists) { info.Create(); } ConvertAndWrite(clip); return true; } catch (Exception ex) { Debug.Log("error : " + ex); return false; } } //从新计算录音文件的长度大小。录音长度公式为 : SamplingRate * 实际录音时间 void ConvertAndWrite(AudioClip clip) { int actual_Length = (audioLength_time + 1) * SamplingRate * 2; //防止数据丢失,多加一秒的时间 float[] samples = new float[actual_Length]; clip.GetData(samples, 0); Int16[] intData = new Int16[samples.Length]; //converting in 2 float[] steps to Int16[], //then Int16[] to Byte[] Byte[] bytesData = new Byte[samples.Length * 2]; //bytesData array is twice the size of //dataSource array because a float converted in Int16 is 2 bytes. int rescaleFactor = 32767; //to convert float to Int16 for (int i = 0; i < samples.Length; i++) { intData[i] = (short)(samples[i] * rescaleFactor); // bytesData = BitConverter.GetBytes(intData[i]); Byte[] byteArr = new Byte[2]; byteArr = BitConverter.GetBytes(intData[i]); byteArr.CopyTo(bytesData, i * 2); } speech_Byte = null; //把处理后的二进制文件。通过内存流先缓存下来。 memoryStream = new MemoryStream(bytesData,false); StartCoroutine(WriteFileStream()); } IEnumerator WriteFileStream() { yield return new WaitForSeconds(1); speechRecognition.UploadAudio(); } public void UIHighlighted() { recordingButton.Play("Pressed"); } bool isPressed = false; public void UIPressed() { isPressed = !isPressed; recordingButton.Play("Highlighted"); if(microphoneManager) { if(isPressed) { microphoneManager.StartRecording(); imageButton.color = Color.red; } else { microphoneManager.StopRecording(); imageButton.color = Color.white; } } else { StartRecording(isPressed); } } public void UINormal() { recordingButton.Play("Normal"); if(isPressed) { // StartRecording(false); } }}
2. 那么先展示出来网络处理类:SpeechRecognition
:
using UnityEngine;using System.Collections;using System.Text;using System;using UnityEngine.Networking;using UnityEngine.UI;using System.IO;#if NETFX_CORE //UWP下编译 using Windows.Storage;using StreamWriter = WinRTLegacy.IO.StreamWriter;using StreamReader = WinRTLegacy.IO.StreamReader;#elseusing StreamWriter = System.IO.StreamWriter;using StreamReader = System.IO.StreamReader;#endif[Serializable]public class UploadData{ public string format; public int rate; public int channel; public string cuid; public string lan; public string token; public string speech; public int len;}[Serializable]public class BaiDuTokenData{ public string access_token; public string session_key; public string scope; public string refresh_token; public string session_secret; public int expires_in;}[Serializable]public class AcceptanceIdentification{ public string err_no; public string err_msg; public string sn; public string[] result; public string corpus_no;}public class SpeechRecognition : MonoBehaviour { // 用于识别语音中的对应文字 public GameObject text_Group_layout; private Text[] textgroup; //展示识别结果 public Text show_text; //录音以及处理音频文件的类 private RecordingWav recordingWav; string filePath = null; string format = "wav"; int rate = 8000; int channel = 1; string cuid = "xxxxx"; //此处写入你的相关账号 string lan = "zh"; string token; string speech; int len; // Use this for initialization void Start () { filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav"); recordingWav = GetComponent<RecordingWav>(); if (text_Group_layout == null) return; textgroup = text_Group_layout.gameObject.GetComponentsInChildren<Text>(); } public void UploadAudio() { StartCoroutine(GetIdentifyWords()); }//用来进行语音识别的方法。如果识别正确则对应的文字变色 void MatchTheWords(string needTest) { if (text_Group_layout == null) return; Debug.Log(needTest); for (int i = 0;i < textgroup.Length;i++) { if(needTest.Contains(textgroup[i].text)) { textgroup[i].color = Color.green; } else { textgroup[i].color = Color.red; } } }//写入你的客户端ID string client_id = "........."; string client_secret = ".............."; string url_token = "https://openapi.baidu.com/oauth/2.0/token"; string url_api = "http://vop.baidu.com/server_api"; string post_string; IEnumerator GetIdentifyWords() { // using (StreamReader audio = new StreamReader(filePath, Encoding.UTF8)) using (StreamReader audio = new StreamReader(recordingWav.memoryStream)) { // StreamReader audio = new StreamReader(recordingWav.memoryStream); BinaryReader br = new BinaryReader(audio.BaseStream); len = (int)audio.BaseStream.Length; byte[] buffer = new byte[len]; br.Read(buffer, 0, buffer.Length); speech = Convert.ToBase64String(buffer); audio.Dispose(); recordingWav.memoryStream.Dispose(); } #region GetToken WWWForm form1 = new WWWForm(); form1.AddField("grant_type", "client_credentials"); form1.AddField("client_id", client_id); form1.AddField("client_secret", client_secret); WWW w1 = new WWW(url_token, form1); yield return w1; BaiDuTokenData getToken = new BaiDuTokenData(); JsonUtility.FromJsonOverwrite(w1.text, getToken); token = getToken.access_token; #endregion if (token == null || speech == null) show_text.text = "参数信息不够"; yield return 0; var request = new UnityWebRequest(url_api, "POST"); UploadData uploadData = new UploadData(); uploadData.format = format; uploadData.rate = rate; uploadData.channel = channel; uploadData.cuid = cuid; uploadData.token = token; uploadData.speech = speech; uploadData.len = len; uploadData.lan = lan; string data = JsonUtility.ToJson(uploadData); //JsonData data = new JsonData(); Byte[] post_byte = Encoding.UTF8.GetBytes(data); request.uploadHandler = (UploadHandler)new UploadHandlerRaw(post_byte); request.downloadHandler = (DownloadHandler)new DownloadHandlerBuffer(); request.SetRequestHeader("Content-Type", "application/json"); yield return request.Send(); try { AcceptanceIdentification acceptData = new AcceptanceIdentification(); JsonUtility.FromJsonOverwrite(request.downloadHandler.text, acceptData); show_text.text = request.downloadHandler.text; show_text.text = acceptData.result[0].ToString(); MatchTheWords(acceptData.result[0]); } catch(Exception ex) { show_text.text = "error :" + ex; } }}
- U3D C# 中文语音识别功能 之 Hololens篇----百度语音识别REST API
- 【记录】百度语音识别之REST API
- php 百度语音识别 REST API demo
- 百度语音识别REST API完整Demo
- Hololens入门之语音识别(语音命令)
- Hololens入门之语音识别(语音听写)
- 百度语音识别-REST-android
- Hololens入门之在线中文识别实现中文语音问答
- 百度语音识别REST API——通过使用Http网络请求方式获得语音识别功能
- C#调用百度语音识别API
- Qt:使用百度语音识别REST API,做全平台语音识别
- 百度语音识别服务 —— 语音识别 REST API 开发笔记
- 【Python】调用百度REST API实现语音识别
- 【Python】调用百度REST API实现语音识别
- 百度语音识别REST API C版本debug过程
- 百度语音识别API初探
- Hololens语音识别之holotoolkit1.5.7
- python实现百度语音之语音识别
- ContentValues
- calendar相关
- 关于chrome控制台警告:Synchronous XMLHttpRequest on the main thread终极解决办法
- Find Largest Value in Each Tree Row
- RDD、DataFrame和DataSet三者的关系
- U3D C# 中文语音识别功能 之 Hololens篇----百度语音识别REST API
- 向socket一次写多少数据合适?
- deepin--更改最低亮度
- Java_JSONObject
- cordova 安卓APP开发 版本更新解析
- iOS学习笔记之同步对象性能对比(iOS锁对比)
- lastIndexOf相关知识
- ContentResolver和ContentProvider
- Django——登陆模块的简单实现