语音识别总结

来源:互联网 发布:js怎么获取多个id 编辑:程序博客网 时间:2024/05/21 09:22

<p>1.试用了google 在线语音识别,长语音没有成功,思路试用`GitHub <https://github.com/Uberi/speech_recognition>`__</p><p>2.试用科大讯飞sdk.成功,但是背景噪音大后,识别基本不行。其中参数ent=sms-en16k识别英文。</p>
#include "stdlib.h"#include "stdio.h"#include <windows.h>#include <conio.h>#include <errno.h>#include "../../include/qisr.h"#include "../../include/msp_cmn.h"#include "../../include/msp_errors.h"#pragma comment(lib,"D:\\work\\Windows_voice_1.051_54a924a4\\lib\\msc.lib")//x86int ran_iat(){        int ret=0;        int error=0;        char rec_result[102400] = {0};        const char *sessionID = NULL;        FILE *f_pcm = NULL;        FILE* fout=NULL;        char *pPCM = NULL;        int lastAudio = 0 ;        int audStat = MSP_AUDIO_SAMPLE_CONTINUE ;        int epStatus = MSP_EP_LOOKING_FOR_SPEECH;        int recStatus = MSP_REC_STATUS_SUCCESS ;        long pcmCount = 0;        long pcmSize = 0;        int conutSession=0;char *fname = "D:\\work\\Windows_voice_1.051_54a924a4\\bin\\wav\\abcddel.wav";         f_pcm = fopen(fname, "rb");        if (NULL != f_pcm) {                fseek(f_pcm, 0, SEEK_END);                pcmSize = ftell(f_pcm);                fseek(f_pcm, 0, SEEK_SET);                pPCM = (char *)malloc(pcmSize);                fread((void *)pPCM, pcmSize, 1, f_pcm);                fclose(f_pcm);                f_pcm = NULL;        }//读取音频文件        else{                printf("Open audio failed");                return 0;        }        fout = fopen( "iat.txt" , "ab");        if( NULL == fout )        {                printf("failed to open file,please check the file.\n");        }        exit:        sessionID = QISRSessionBegin(NULL,"sub=iat,auf=audio/L16;rate=16000,aue=speex-wb,ent=sms-en16k,rst=plain,rse=gb2312,vad_speech_tail=900",&error);//vad_speech_tail=900,这是vad后端点检测,就靠这个来断句,默认2s,感觉太大了,900ms时,就是说话停顿,结束此次会话,开启下次会话        if (error !=0)        {                printf("session begin error %d",error);                return 0;        }        conutSession ++;        while (1) {                unsigned int len = 6400;                int countLen=6400;                if (pcmSize < 12800) {                        len = pcmSize;                        lastAudio = 1;//音频长度小于12800                }                audStat = MSP_AUDIO_SAMPLE_CONTINUE;//有后继音频                if (pcmCount == 0)                        audStat = MSP_AUDIO_SAMPLE_FIRST;                if (len<=0)                {                        break;                }                printf("csid=%s,count=%d,aus=%d,",sessionID,pcmCount/countLen,audStat);                ret = QISRAudioWrite(sessionID, (const void *)&pPCM[pcmCount], len, audStat, &epStatus, &recStatus);//写音频                printf("eps=%d,rss=%d,ret=%d\n",epStatus,recStatus,error);                if (epStatus >=3)//开发文档中epStatus>=3时,就会检测到一句话的尾端点,然后认为这句话结束和会话session结束,就另开一路会话                {                        QISRAudioWrite(sessionID, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &epStatus, &recStatus);//写一个0,告诉云端,这次会话结束了              //必须拿完所有结果才开启下次会话,msc不能多线程调用,必须按sessionbegin、audiowrite、getresult、sessionend流程来,所以后面不能单独调getresult,                        while (recStatus != MSP_REC_STATUS_COMPLETE && 0 == error)                        {                                const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//获取结果                                if (NULL != rslt)                                {                                        strcat(rec_result,rslt);                                        fwrite(rslt,1,strlen(rslt),fout);                                }                        }                        QISRSessionEnd(sessionID, NULL);                        goto exit;                }                if (ret != 0)                        break;                pcmCount += (long)len;                pcmSize -= (long)len;                if (recStatus == MSP_REC_STATUS_SUCCESS)                 {                        const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//服务端已经有识别结果,可以获取                        if (NULL != rslt)                        {                                strcat(rec_result,rslt);                                fwrite(rslt,1,strlen(rslt),fout);                        }                }                if (epStatus == MSP_EP_AFTER_SPEECH)                        break;                _sleep(30);//需要sleep下,        }        QISRAudioWrite(sessionID, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &epStatus, &recStatus);        free(pPCM);        pPCM = NULL;        while (recStatus != MSP_REC_STATUS_COMPLETE && 0 == error) {                const char *rslt = QISRGetResult(sessionID, &recStatus, 0, &error);//获取结果                if (NULL != rslt)                {                        strcat(rec_result,rslt);                        fwrite(rslt,1,strlen(rslt),fout);                }                _sleep(50);        }        QISRSessionEnd(sessionID, NULL);        printf("=============================================================\n");        printf("The result is: %s\n",rec_result);        printf("=============================================================\n");        printf("session count %d\n",conutSession);        return 0;}        int main(){        int ret=0;        ret = MSPLogin(NULL,NULL,"appid = 54a924a4");        ran_iat();        MSPLogout();        system("pause");        return 0;}




0 0
原创粉丝点击