微软TTS 使用

来源：互联网发布：mathcad读取数据编辑：程序博客网时间：2024/05/22 02:08

Win7环境下测试。

首先安装 Windows Speech SDK，下载地址为：http://www.microsoft.com/download/en/details.aspx?id=10121，SpeechSDK51.exe是简体中文语音引擎，SpeechSDK51LangPach.exe是中文男生语音库。

微软本身的语音库中文语音比较生硬，可以安装NeoSpeech的中文语音库。百度下直接搜索NeoSpeech的中文语音即可找到。

从网上找的代码封装成类，qt5.5，在简单环境下可以满足需求，直接上代码

头文件

<pre name="code" class="cpp">#include <QObject>#include <QList>#include <sapi.h>class ISpVoice;class ISpObjectToken;class ISpAudio;class SpFormat;class SimpleTTS : public QObject{Q_OBJECTpublic:SimpleTTS(QObject *parent);~SimpleTTS();void speak(QString text);//当前的音频格式SPSTREAMFORMAT currFormat();private:void initVoices();//获取可用的语音库void initSpFormat();//设置可用的音频格式private:bool m_bTTSEnable;//语音库是否可用ISpVoice* m_pVoice;    //ms com tts 组件ISpAudio* m_pAudio;//voice 所使用的 音频相关QList<ISpObjectToken*> m_voices;//可用的语音库数组QList<SpFormat> m_spFmts;       //};class SpFormat//封装ms描述的音频格式{public:SpFormat(SPSTREAMFORMAT vl, QString sz);~SpFormat(){};QString discription()const { return m_discription; }SPSTREAMFORMAT getFormat()const{ return m_val; }DWORD rate()const{ return m_bytePS; }private:SPSTREAMFORMAT m_val;QString m_discription;DWORD m_bytePS;};

cpp

#include "simpletts.h"#include <conio.h>#include <sphelper.h>#include <vector>#include <queue>#include <string>#include <QDebug>#include "jiontctrllmgr.h"#include "simplelog.h"#include "jiontctrllmgr.h"#include <sphelper.h>#include <spuihelp.h>#pragma comment(lib,"sapi.lib")    //sapi.lib在SDK的lib目录,必需正确配置SimpleTTS::SimpleTTS(QObject *parent): QObject(parent), m_pVoice(NULL), m_pAudio(NULL){m_bTTSEnable = true;TCHAR szBuf[80] = { 0 };LPVOID lpMsgBuf = NULL;HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);if (FAILED(hr)){hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);if (FAILED(hr)){FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |FORMAT_MESSAGE_FROM_SYSTEM,NULL,hr,MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),(LPTSTR)&lpMsgBuf,0, NULL);wsprintf(szBuf, _T("error %d: %s"), hr, lpMsgBuf);LocalFree(lpMsgBuf);#ifdef QT_DEBUGqDebug().noquote() << "Error to intiliaze COM reason:" + QString::fromStdWString(szBuf);#elseLOGERROR("Error to intiliaze COM reason:"+ QString::fromStdWString(szBuf));#endif // QT_DEBUGreturn;}}hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&m_pVoice);if (FAILED(hr)){FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&lpMsgBuf, 0, NULL);wsprintf(szBuf, _T("error %d: %s"), hr, lpMsgBuf);LocalFree(lpMsgBuf);#ifdef QT_DEBUGqDebug().noquote() << "Error to intiliaze ISPVoice component,reason: " + QString::fromStdWString(szBuf);#elseLOGERROR("Error to intiliaze ISPVoice component,reason: " + QString::fromStdWString(szBuf));#endif // QT_DEBUGm_bTTSEnable = false;return;}hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOOUT, &m_pAudio);if (FAILED(hr)){FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&lpMsgBuf, 0, NULL);wsprintf(szBuf, _T("error %d: %s"), hr, lpMsgBuf);LocalFree(lpMsgBuf);#ifdef QT_DEBUGqDebug().noquote() << "SpCreateDefaultObjectFromCategoryId failed,reason:" + QString::fromStdWString(szBuf);#elseLOGERROR("SpCreateDefaultObjectFromCategoryId failed,reason:" + QString::fromStdWString(szBuf));#endif // QT_DEBUGm_bTTSEnable = false;return;}initVoices();WCHAR* szDesc;QStringList list;for (size_t i = 0; i < m_voices.size(); i++){SpGetDescription(m_voices[i], &szDesc); //获取语音库描述list << QString::fromStdWString(std::wstring(szDesc));qDebug() << szDesc;}if (list.empty())LOGERROR(QString::fromLocal8Bit("TTS 没有可用的语音库"));else LOGINFO(QString::fromLocal8Bit("TTS 支持语音库：%1").arg(list.join(",")));int index = JiontCtrllMgr::getInstance()->config()->voiceIndex();if (index > 0 && index < m_voices.size()){hr = m_pVoice->SetVoice(m_voices[index]);}else{#ifdef QT_DEBUGqDebug().noquote() <<QString::fromLocal8Bit("SetVoice index 无效，语音为正确设置！");#elseLOGERROR(QString::fromLocal8Bit("SetVoice index 无效，语音为正确设置！"));#endif // QT_DEBUG}initSpFormat();//输出音频设置CSpStreamFormat Fmt;index = JiontCtrllMgr::getInstance()->config()->audioIndex();if (index >= 0 && index < m_spFmts.size()){Fmt.AssignFormat(m_spFmts[index].getFormat());hr = m_pAudio->SetFormat(Fmt.FormatId(), Fmt.WaveFormatExPtr());if (FAILED(hr)){#ifdef QT_DEBUGqDebug().noquote() << QString::fromLocal8Bit("SetFormat TTS 初始化失败,TTS 不可用！");#elseLOGERROR(QString::fromLocal8Bit("SetFormat TTS 初始化失败,TTS 不可用！"));#endif // QT_DEBUGm_bTTSEnable = false;return;}hr = m_pVoice->SetOutput(m_pAudio, FALSE);if (FAILED(hr)){#ifdef QT_DEBUGqDebug().noquote() << QString::fromLocal8Bit("SetOutput TTS 初始化失败，TTS 不可用！");#elseLOGERROR(QString::fromLocal8Bit("SetOutput TTS 初始化失败，TTS 不可用！"));#endif // QT_DEBUGm_bTTSEnable = false;return;}}}SimpleTTS::~SimpleTTS(){m_pVoice->Release();m_pVoice = NULL;m_pAudio->Release();CoUninitialize();}void SimpleTTS::speak(QString text){if (!m_bTTSEnable){printf("tts library cant use!");return;}for (int i = 0; i <= 9;i++){QString temp = QString("%1").arg(i);text.replace(temp, "[" + temp + "]");}const QList<QPair<QString, QString>>& list = JiontCtrllMgr::getInstance()->config()->replaceList();for (int i = 0; i < list.size();i++){text.replace(list[i].first, list[i].second);}m_pVoice->Speak((LPCTSTR)text.toStdWString().c_str(), SPF_ASYNC, NULL);//m_pVoice->WaitUntilDone(INFINITE);}SPSTREAMFORMAT SimpleTTS::currFormat(){SPSTREAMFORMAT fmt = SPSF_Default;CComPtr<ISpStreamFormat> cpStream;HRESULT hr = m_pVoice->GetOutputStream(&cpStream);CSpStreamFormat Fmt;if (hr == S_OK){hr = Fmt.AssignFormat(cpStream);if (SUCCEEDED(hr)){fmt = Fmt.ComputeFormatEnum();}}return fmt;}void SimpleTTS::initVoices(){IEnumSpObjectTokens* cpEnum;HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);ULONG i = 0, ulCount = 0;hr = cpEnum->GetCount(&ulCount);ISpObjectToken* tok;while (SUCCEEDED(hr) && i < ulCount){hr = cpEnum->Next(1, &tok, NULL);m_voices.push_back(tok);i++;}cpEnum->Release();}void SimpleTTS::initSpFormat(){m_spFmts.push_back(SpFormat(SPSF_12kHz16BitStereo, "SPSF_12kHz16BitStereo"));m_spFmts.push_back(SpFormat(SPSF_16kHz16BitMono, "SPSF_16kHz16BitMono"));m_spFmts.push_back(SpFormat(SPSF_16kHz16BitStereo, "SPSF_16kHz16BitStereo"));m_spFmts.push_back(SpFormat(SPSF_22kHz16BitMono, "SPSF_22kHz16BitMono"));m_spFmts.push_back(SpFormat(SPSF_22kHz16BitStereo, "SPSF_22kHz16BitStereo"));m_spFmts.push_back(SpFormat(SPSF_24kHz16BitStereo, "SPSF_24kHz16BitStereo"));m_spFmts.push_back(SpFormat(SPSF_32kHz16BitStereo, "SPSF_32kHz16BitStereo"));m_spFmts.push_back(SpFormat(SPSF_44kHz16BitMono, "SPSF_44kHz16BitMono"));m_spFmts.push_back(SpFormat(SPSF_44kHz16BitStereo, "SPSF_44kHz16BitStereo"));m_spFmts.push_back(SpFormat(SPSF_48kHz16BitMono, "SPSF_48kHz16BitMono"));m_spFmts.push_back(SpFormat(SPSF_48kHz16BitStereo, "SPSF_48kHz16BitStereo"));}/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////SpFormat::SpFormat(SPSTREAMFORMAT vl, QString  sz){m_val = vl;m_discription = sz;QString sChannel = m_discription.left(15);DWORD sampleRate, bitRate;sscanf(sz.toStdString().c_str(), "SPSF_%ukHz%uBit", &sampleRate, &bitRate);if (sampleRate == 22)sampleRate = 22050;else if (sampleRate == 44)sampleRate = 44100;elsesampleRate *= 1000;m_bytePS = sampleRate*bitRate / 8;if (sChannel == "Stereo")m_bytePS *= 2;}

0 0