windows下利用微软的SpeechSDK实现语音控制

来源:互联网 发布:中国撤侨哭了警戒知乎 编辑:程序博客网 时间:2024/06/16 02:11

语音识别是当前研究的热点,我们可以利用微软的语音识别引擎来进行二次开发。首先需要安装微软的SpeechSDK,安装完成后配置项目的包含目录和库目录,这些都不是重点,不会的可以去百度。然后就可以写代码了,码农嘛,还是看代码有感觉。

源文件<Demo.cpp>

#include <Windows.h>#include <atlstr.h>#include <sphelper.h>#include <sapi.h>#include <comutil.h>#include <string.h>#pragma comment(lib,"sapi.lib")#ifdef _UNICODE#pragma   comment(lib,   "comsuppw.lib")  //_com_util::ConvertBSTRToString#else#pragma   comment(lib,   "comsupp.lib")  //_com_util::ConvertBSTRToString#endif#define GID_CMD_GR 333333#define WM_RECOEVENT WM_USER+1LRESULT CALLBACK WndProc(HWND,UINT,WPARAM,LPARAM);TCHAR szAppName[] = TEXT("语音控制Demo");BOOL b_initSR;BOOL b_Cmd_Grammar;CComPtr<ISpRecoContext> m_cpRecoCtxt;//语音识别程序接口CComPtr<ISpRecoGrammar> m_cpCmdGramma;//识别语法CComPtr<ISpRecognizer> m_cpRecoEngine; //语音识别引擎int speak(wchar_t *str);int WINAPI WinMain(HINSTANCE hInstance,HINSTANCE hPrevInstance,PSTR szCmdLine,int iCmdShow){HWND hwnd;MSG msg;WNDCLASS wndclass;//窗口类结构体初始化值wndclass.cbClsExtra = 0;wndclass.cbWndExtra = 0;wndclass.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH);wndclass.hCursor = LoadCursor(NULL,IDC_ARROW);wndclass.hIcon = LoadIcon(NULL,IDI_APPLICATION);wndclass.hInstance = hInstance;wndclass.lpfnWndProc = WndProc;wndclass.lpszClassName = szAppName;wndclass.lpszMenuName = NULL;wndclass.style = CS_HREDRAW|CS_VREDRAW;//注册窗口类if(!RegisterClass(&wndclass)){//失败后提示并返回MessageBox(NULL,TEXT("This program requires Windows NT!"),szAppName,MB_ICONERROR);return 0;}//创建窗口hwnd = CreateWindow(szAppName,TEXT("语音识别"),WS_OVERLAPPEDWINDOW,CW_USEDEFAULT,CW_USEDEFAULT,CW_USEDEFAULT,CW_USEDEFAULT,NULL,NULL,hInstance,NULL);//显示窗口ShowWindow(hwnd,iCmdShow);UpdateWindow(hwnd);//进入消息循环while(GetMessage(&msg,NULL,0,0)){TranslateMessage(&msg);//翻译消息DispatchMessage(&msg);//分发消息}return msg.wParam;}/**消息回调函数,由操作系统调用*/LRESULT CALLBACK WndProc(HWND hwnd, UINT message,WPARAM wParam,LPARAM lParam){HDC hdc;PAINTSTRUCT ps;switch(message){case WM_CREATE:{//初始化COM端口::CoInitializeEx(NULL,COINIT_APARTMENTTHREADED);//创建识别上下文接口HRESULT hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpSharedRecognizer);if(SUCCEEDED(hr)){hr = m_cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);}else{MessageBox(hwnd,TEXT("引擎实例化出错"),TEXT("提示"),S_OK);}//设置识别消息,使计算机时刻监听语音消息if(SUCCEEDED(hr)){hr = m_cpRecoCtxt->SetNotifyWindowMessage(hwnd,WM_RECOEVENT,0,0);}else{MessageBox(hwnd,TEXT("创建上下文接口出错"),TEXT("提示"),S_OK);}//设置我们感兴趣的事件if(SUCCEEDED(hr)){ULONGLONG ullMyEvents = SPFEI(SPEI_SOUND_START)|SPFEI(SPEI_RECOGNITION)|SPFEI(SPEI_SOUND_END);hr = m_cpRecoCtxt->SetInterest(ullMyEvents,ullMyEvents);}else{MessageBox(hwnd,TEXT("设置识别消息出错"),TEXT("提示"),S_OK);}//创建语法规则b_Cmd_Grammar = TRUE;if(FAILED(hr)){MessageBox(hwnd,TEXT("创建语法规则出错"),TEXT("提示"),S_OK);}hr = m_cpRecoCtxt->CreateGrammar(GID_CMD_GR,&m_cpCmdGramma);hr = m_cpCmdGramma->LoadCmdFromFile(L"cmd.xml",SPLO_DYNAMIC);if(FAILED(hr)){MessageBox(hwnd,TEXT("配置文件打开出错"),TEXT("提示"),S_OK);}b_initSR = TRUE;//在开始识别时,激活语法进行识别hr = m_cpCmdGramma->SetRuleState(NULL,NULL,SPRS_ACTIVE);break;}case WM_RECOEVENT:{RECT rect;GetClientRect(hwnd,&rect);hdc = GetDC(hwnd);USES_CONVERSION;CSpEvent event;while(event.GetFrom(m_cpRecoCtxt)==S_OK){switch(event.eEventId){case SPEI_RECOGNITION:{static const WCHAR wszUnrecognized[] = L"<Unrecognized>";CSpDynamicString dstrText;//取得消息结果if(FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))){dstrText = wszUnrecognized;}BSTR SRout;dstrText.CopyToBSTR(&SRout);char * lpszText2 = _com_util::ConvertBSTRToString(SRout);if(b_Cmd_Grammar){//MessageBoxA(0,lpszText2,"内容",0);if(strcmp("腾讯QQ",lpszText2)==0){//MessageBox(0,TEXT("计算机"),TEXT("内容"),0);speak(L"好的");//打开QQ.exeShellExecuteA(NULL,"open","D:\\QQ\\QQProtect\\Bin\\QQProtect.exe",0,0,1);}if(strcmp("确定",lpszText2)==0){//按下回车键keybd_event(VK_RETURN,0,0,0);keybd_event(VK_RETURN,0,KEYEVENTF_KEYUP,0);}if(strcmp("音乐",lpszText2)==0){speak(L"好的");//调用系统程序wmplayer.exe播放音乐ShellExecuteA(NULL,"open","\"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe\"","C:\\Users\\KYT\\Desktop\\123.mp3",0,0);}}}}}break;}case WM_PAINT:hdc = BeginPaint(hwnd,&ps);EndPaint(hwnd,&ps);break;case WM_DESTROY:PostQuitMessage(0);break;}return DefWindowProc(hwnd,message,wParam,lParam);}#pragma comment(lib,"ole32.lib")//CoInitialize CoCreateInstance 需要调用ole32.dll/**语音合成函数,朗读字符串str*/int speak(wchar_t *str){ISpVoice * pVoice = NULL;::CoInitialize(NULL);//获得ISpVoice接口long hr = CoCreateInstance(CLSID_SpVoice,NULL,CLSCTX_ALL,IID_ISpVoice,(void **)&pVoice);hr = pVoice->Speak(str,0,NULL);pVoice->Release();pVoice = NULL;//千万不要忘记::CoUninitialize();return TRUE;}

只有源文件还不行,程序中还用到一个XML格式的语音配置文件cmd.xml。如果是利用编译器调试程序,cmd.xml需要和源代码同目录;发布程序以后,cmd.xml就得放到程序可执行文件所在的目录下了。总之,程序必须访问到cmd.xml文件,方便起见,可以在两个目录下都放一个cmd.xml文件。

配置文件<cmd.xml>

<?xml version="1.0" encoding="utf-8"?><GRAMMAR LANGID="804"> <DEFINE>    <ID NAME="CMD" VAL="10"/>    </DEFINE>  <RULE NAME="COMMAND" ID="CMD" TOPLEVEL="ACTIVE">   <L>    <P>腾讯QQ</P>    <P>确定</P>    <P>音乐</P>    </L>   </RULE></GRAMMAR>

到这里,一个简单的语音控制程序Demo就算完成了,你对它说:“腾迅QQ”,它就打开了一个QQ,你说:“确定”,默认的QQ账号就自己登陆了。你还可以说:“音乐”,于是一首曼妙的音乐响起(为什么看不见播放器呢,因为在代码
ShellExecuteA(NULL,"open","\"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe\"","C:\\Users\\KYT\\Desktop\\123.mp3",0,0);

设置了wmplayer.exe隐藏执行,想要关闭,嘻嘻,请打开任务管理器。另外,一定要设置你电脑上的音乐路径,不然你懂得)。

语音识别是不是挺有意思的,发挥你的想象,去震精别人吧。

0 0