U3D C# 中文语音识别功能 之 Hololens篇----百度语音识别REST API

来源:互联网 发布:淘宝怎么用照片找同款 编辑:程序博客网 时间:2024/06/07 05:53

下面介绍如何在U3D 开发适用于 Hololens 的 UWP 平台的 语音识别功能

首先这篇文章,得对Hololens有一定的了解。那么你如果理解的话,应该也知道Hololens本身其实也是支持语音识别的而且效率速度不错。但是,它并不能识别中文。这是笔者在墙外面查了不少资料得到结果。当然笔者能力有限,如果你发现他有方法支持中文语音识别的话,请告诉我!!!!!大恩不辞言谢。


如果您碰到什么其他问题的话,欢迎来 我自己的一个 讨论群559666429来,大家一起找答案,共同进步

说实话,文中讲解的不是很详细。但是认真看的话,其实也不难。试着去学习,不会的先百度。实在不行的,请给我留言。我有空会给您解答。


  1. 首先你先得了解一下百度的语音识别REST的相关API以及使用方式,很简单的。(由于这不是文章主体,所以不多加描述)
    相关链接:百度语音识别服务 —— 语音识别 REST API 开发笔记 ,
    API请求方式基本说明
    如果你看会了。那么应该知道。其实我们就首先得需要token``令牌。这个通过U3D的www`类直接拿到就OK了。也很简单。最主要的就是要去给百度云上传的音频文件。所以对音频文件的处理,也是这个项目的重要地方。

    . 所以我们将创建两个类,一个用于网络处理SpeechRecognition.另一个用于录取以及处理音频操作RecordingWav

  2. 能读这篇文章的大概都会面对同一个坑吧。那就是 UWP 不支持好多文件读写的类,比如FileStream,Directory,File…等等类。官方文档有所陈述,以及推荐的代替类库和这些类库的使用方法。https://docs.unity3d.com/Manual/windowsstore-missingtypes.html.所以我们也就用这些类来进行对录音文件的读写操作。具体怎么做。等会代码中陈述。


1. 那么先展示出来录取以及处理音频操作类:RecordingWav

using UnityEngine;using System;using UnityEngine.UI;using System.Collections;using System.IO;#if NETFX_CORE  //UWP下编译  using Windows.Storage;using StreamWriter = WinRTLegacy.IO.StreamWriter;using StreamReader = WinRTLegacy.IO.StreamReader;#elseusing StreamWriter = System.IO.StreamWriter;using StreamReader = System.IO.StreamReader;#endif[RequireComponent(typeof(AudioSource))]public class RecordingWav : MonoBehaviour{    //录音图片变红。停止变蓝    public Image imageButton;    //按钮点击的动画。    public Animator recordingButton;    string filePath = null;    int audioLength_time;    private AudioSource m_audioSource;    private AudioClip m_audioClip;    public const int SamplingRate = 8000;    private const int HEADER_SIZE = 44;    public SpeechRecognition speechRecognition;    //判断是否录音    [HideInInspector]    public bool isRecording = false;    //文件的大小    [HideInInspector]    public Byte[] speech_Byte;    //用于缓存处理后的录音文件。待用在 SpeechRecognition类中。    [HideInInspector]    public MemoryStream memoryStream;    public MicrophoneManager microphoneManager;    // Use this for initialization      void Start()    {        m_audioSource = GetComponent<AudioSource>();        filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav");    }    public void StartRecording( bool isRecording)    {        if (isRecording)        {            Microphone.End(null);            m_audioClip = Microphone.Start(null, false, 60, SamplingRate);            imageButton.color = Color.red;        }        else        {            imageButton.color = Color.white;            audioLength_time = 0;            int lastPos = Microphone.GetPosition(null);            if (Microphone.IsRecording(null))            {                audioLength_time = lastPos / SamplingRate;            }            else            {                audioLength_time = 0;                Debug.Log("error : 录音时间太短");            }            Microphone.End(null);            if (audioLength_time <= 1.0f)            {                return;            }            SaveWav(filePath, m_audioClip);            PlayAudioClip();        }    }    public void PlayAudioClip()    {        if (m_audioClip.length > 5 && m_audioClip != null)        {            if (m_audioSource.isPlaying)            {                m_audioSource.Stop();            }            m_audioSource.clip = m_audioClip;            m_audioSource.Play();        }    }    bool SaveWav(string filename, AudioClip clip)    {        try        {            File.Delete(filePath);        }        catch (Exception ex)        {            Debug.Log(ex);        }        try        {              FileInfo info = new FileInfo(filePath);            if (!info.Exists)            {                 info.Create();            }            ConvertAndWrite(clip);            return true;        }        catch (Exception ex)        {            Debug.Log("error : " + ex);            return false;        }    }    //从新计算录音文件的长度大小。录音长度公式为 : SamplingRate * 实际录音时间    void ConvertAndWrite(AudioClip clip)    {        int actual_Length = (audioLength_time + 1) * SamplingRate * 2;        //防止数据丢失,多加一秒的时间        float[] samples = new float[actual_Length];        clip.GetData(samples, 0);        Int16[] intData = new Int16[samples.Length];        //converting in 2 float[] steps to Int16[], //then Int16[] to Byte[]          Byte[] bytesData = new Byte[samples.Length * 2];        //bytesData array is twice the size of          //dataSource array because a float converted in Int16 is 2 bytes.          int rescaleFactor = 32767; //to convert float to Int16          for (int i = 0; i < samples.Length; i++)        {            intData[i] = (short)(samples[i] * rescaleFactor);            // bytesData = BitConverter.GetBytes(intData[i]);            Byte[] byteArr = new Byte[2];            byteArr = BitConverter.GetBytes(intData[i]);            byteArr.CopyTo(bytesData, i * 2);        }        speech_Byte = null;        //把处理后的二进制文件。通过内存流先缓存下来。        memoryStream = new MemoryStream(bytesData,false);        StartCoroutine(WriteFileStream());    }    IEnumerator WriteFileStream()    {        yield return new WaitForSeconds(1);        speechRecognition.UploadAudio();    }    public void UIHighlighted()    {        recordingButton.Play("Pressed");    }    bool isPressed = false;    public void UIPressed()    {        isPressed = !isPressed;        recordingButton.Play("Highlighted");        if(microphoneManager)        {            if(isPressed)            {                microphoneManager.StartRecording();                imageButton.color = Color.red;            }            else            {                microphoneManager.StopRecording();                imageButton.color = Color.white;            }        }        else        {            StartRecording(isPressed);        }    }    public void UINormal()    {        recordingButton.Play("Normal");        if(isPressed)        {          //  StartRecording(false);        }    }}

2. 那么先展示出来网络处理类:SpeechRecognition

using UnityEngine;using System.Collections;using System.Text;using System;using UnityEngine.Networking;using UnityEngine.UI;using System.IO;#if NETFX_CORE  //UWP下编译  using Windows.Storage;using StreamWriter = WinRTLegacy.IO.StreamWriter;using StreamReader = WinRTLegacy.IO.StreamReader;#elseusing StreamWriter = System.IO.StreamWriter;using StreamReader = System.IO.StreamReader;#endif[Serializable]public class UploadData{    public string format;    public int rate;    public int channel;    public string cuid;    public string lan;    public string token;    public string speech;    public int len;}[Serializable]public class BaiDuTokenData{    public string access_token;    public string session_key;    public string scope;    public string refresh_token;    public string session_secret;    public int expires_in;}[Serializable]public class AcceptanceIdentification{    public string err_no;    public string err_msg;    public string sn;    public string[] result;    public string corpus_no;}public class SpeechRecognition : MonoBehaviour {    // 用于识别语音中的对应文字    public GameObject text_Group_layout;    private Text[] textgroup;    //展示识别结果    public Text show_text;    //录音以及处理音频文件的类    private RecordingWav recordingWav;    string filePath = null;    string format = "wav";    int rate = 8000;    int channel = 1;    string cuid = "xxxxx";   //此处写入你的相关账号    string lan = "zh";    string token;    string speech;    int len;    // Use this for initialization    void Start () {        filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav");        recordingWav = GetComponent<RecordingWav>();        if (text_Group_layout == null)            return;         textgroup = text_Group_layout.gameObject.GetComponentsInChildren<Text>();    }    public void UploadAudio()    {        StartCoroutine(GetIdentifyWords());    }//用来进行语音识别的方法。如果识别正确则对应的文字变色    void MatchTheWords(string needTest)    {        if (text_Group_layout == null)            return;        Debug.Log(needTest);        for (int i = 0;i < textgroup.Length;i++)        {            if(needTest.Contains(textgroup[i].text))            {                textgroup[i].color = Color.green;            }            else            {                textgroup[i].color = Color.red;            }        }    }//写入你的客户端ID    string client_id = ".........";    string client_secret = "..............";    string url_token = "https://openapi.baidu.com/oauth/2.0/token";    string url_api = "http://vop.baidu.com/server_api";    string post_string;    IEnumerator GetIdentifyWords()    {       // using (StreamReader audio = new StreamReader(filePath, Encoding.UTF8))        using (StreamReader audio = new StreamReader(recordingWav.memoryStream))        {            // StreamReader audio = new StreamReader(recordingWav.memoryStream);            BinaryReader br = new BinaryReader(audio.BaseStream);            len = (int)audio.BaseStream.Length;            byte[] buffer = new byte[len];            br.Read(buffer, 0, buffer.Length);            speech = Convert.ToBase64String(buffer);            audio.Dispose();            recordingWav.memoryStream.Dispose();        }        #region GetToken        WWWForm form1 = new WWWForm();        form1.AddField("grant_type", "client_credentials");        form1.AddField("client_id", client_id);        form1.AddField("client_secret", client_secret);        WWW w1 = new WWW(url_token, form1);        yield return w1;        BaiDuTokenData getToken = new BaiDuTokenData();        JsonUtility.FromJsonOverwrite(w1.text, getToken);        token = getToken.access_token;        #endregion        if (token == null || speech == null)            show_text.text = "参数信息不够";        yield return 0;        var request = new UnityWebRequest(url_api, "POST");        UploadData uploadData = new UploadData();        uploadData.format = format;        uploadData.rate = rate;        uploadData.channel = channel;        uploadData.cuid = cuid;        uploadData.token = token;        uploadData.speech = speech;        uploadData.len = len;        uploadData.lan = lan;        string data = JsonUtility.ToJson(uploadData);        //JsonData data = new JsonData();        Byte[] post_byte = Encoding.UTF8.GetBytes(data);        request.uploadHandler = (UploadHandler)new UploadHandlerRaw(post_byte);        request.downloadHandler = (DownloadHandler)new DownloadHandlerBuffer();        request.SetRequestHeader("Content-Type", "application/json");        yield return request.Send();        try        {            AcceptanceIdentification acceptData = new AcceptanceIdentification();            JsonUtility.FromJsonOverwrite(request.downloadHandler.text, acceptData);            show_text.text = request.downloadHandler.text;            show_text.text = acceptData.result[0].ToString();            MatchTheWords(acceptData.result[0]);        }        catch(Exception ex)        {            show_text.text = "error :" + ex;        }    }}
原创粉丝点击