语音识别
来源:互联网 发布:网络大学有哪些专业 编辑:程序博客网 时间:2024/04/27 23:04
Hmm算法,nuance asr关键实现
hmm为目前最流行的大规模语音识别算法,基本思想是把每个声音片段(20ms)的特征信息(如MFCC)作为每个马科夫链的观察节点,说话过程,就是这些节点之间的转移,根据已经了解到每个音节对应的节点跳转关系,可以识别出当前语言对应的最靠谱的音节(维特比算法)。
//------------------------------------------------------
// calculate the log prob at a state with given ob
// It doesnt matter if alpha not used in denominator!
//------------------------------------------------------
float CalculateLogProb(STATE *s, float *ob, int veclen)
{
int m, n, nVecLen, nMix;
float alpha, det, tmp, sum;
float prob=0.0;
if(!s || !ob) return prob;
// if we can use pre-computed constant?
nVecLen = veclen;
if(nVecLen != VecLength) {
alpha = (float)pow(TWOPI, nVecLen/2.0);
AlphaConst = alpha;
}
else {
alpha = AlphaConst;
}
// for each mixture, compute prob
nMix = s->nMixtures;
for(m=0; m<nMix; m++) {
sum = 0.0;
det = 1.0;
for(n=0; n<nVecLen; n++){
tmp = ob[n] - (s->pfMean[m][n]);
sum += tmp*tmp/(s->pfCov[m][n]);
det *= (float)(s->pfCov[m][n]);
}
det = (float)sqrt(det);
prob += (s->pfMixWt[m])*exp(-0.5*sum)/(alpha*det);
}
// check if prob is valid?
if( prob < (float)PROBFLOOR )
prob = (float)PROBFLOOR;
prob = (float)log(prob);
return prob;
}
hmm为目前最流行的大规模语音识别算法,基本思想是把每个声音片段(20ms)的特征信息(如MFCC)作为每个马科夫链的观察节点,说话过程,就是这些节点之间的转移,根据已经了解到每个音节对应的节点跳转关系,可以识别出当前语言对应的最靠谱的音节(维特比算法)。
//------------------------------------------------------
// calculate the log prob at a state with given ob
// It doesnt matter if alpha not used in denominator!
//------------------------------------------------------
float CalculateLogProb(STATE *s, float *ob, int veclen)
{
int m, n, nVecLen, nMix;
float alpha, det, tmp, sum;
float prob=0.0;
if(!s || !ob) return prob;
// if we can use pre-computed constant?
nVecLen = veclen;
if(nVecLen != VecLength) {
alpha = (float)pow(TWOPI, nVecLen/2.0);
AlphaConst = alpha;
}
else {
alpha = AlphaConst;
}
// for each mixture, compute prob
nMix = s->nMixtures;
for(m=0; m<nMix; m++) {
sum = 0.0;
det = 1.0;
for(n=0; n<nVecLen; n++){
tmp = ob[n] - (s->pfMean[m][n]);
sum += tmp*tmp/(s->pfCov[m][n]);
det *= (float)(s->pfCov[m][n]);
}
det = (float)sqrt(det);
prob += (s->pfMixWt[m])*exp(-0.5*sum)/(alpha*det);
}
// check if prob is valid?
if( prob < (float)PROBFLOOR )
prob = (float)PROBFLOOR;
prob = (float)log(prob);
return prob;
}