Vad for Detection

来源:互联网 发布:hdfs java创建目录 编辑:程序博客网 时间:2024/06/10 17:13

在常规端点检测函数部分做如下修改:

void CMyWaveView::NormalvadDefault(){CMyWaveDoc* pDoc = GetDocument();ASSERT_VALID(pDoc);if (!pDoc)return;OnPower();//修改一:预加重,加汉明窗 在methods类里OnZero();m_minForZcr=pDoc->minForOneZero;m_maxForZcr=pDoc->maxForOneZero;m_avgForZcr=pDoc->avgForOneZero;int i, j, nCount;nCount=pDoc->zero_one.size();pDoc->tag_Norm.clear();for (i=0;i < nCount;i++){pDoc->tag_Norm.push_back(0);}double max, min, avg;min=m_minForEnergy;max=m_maxForEnergy;avg=m_avgForEnergy;double max_zero=pDoc->maxForOneZero;double energy_high =avg*e1;double energy_low =avg*0.04;//修改2:门限值double energy_consonant=avg*e3;double energy_suspect=avg*e4;double zcrThreshold = m_avgForZcr+zz * (m_maxForZcr-m_avgForZcr);m_EnergyLevel1=energy_high;m_EnergyLevel2=energy_low;m_EnergyLevel3=energy_consonant;m_EnergyLevel4=energy_suspect;m_ZcrThreshold=zcrThreshold;/****寻找主音段**/std::vector<int> voiceIndex;for (i=0;i < nCount;i++){if (pDoc->dataout_energy[i] > energy_high){voiceIndex.push_back(i);pDoc->tag_Norm[i]=1;}}std::vector<int> sound;int nums=voiceIndex.size();sound.push_back(voiceIndex[0]);for (i=1;i < nums-1;i++){if (voiceIndex[i+1]-voiceIndex[i]>1){sound.push_back(voiceIndex[i]);sound.push_back(voiceIndex[i+1]);}}sound.push_back(voiceIndex[nums-1]);voiceIndex.clear();//释放voiceIndex容器内存vector<int>(voiceIndex).swap(voiceIndex);/********寻找中振幅和常规辅音段*****************/int head, tail;for (i=0;i<sound.size()/2;i++){head=sound[2*i];while ((head-1)>=0 && (pDoc->dataout_energy[head-1]>energy_low||(pDoc->dataout_energy[head-1]>energy_consonant && pDoc->zero_one[head-1] > zcrThreshold))){head--;pDoc->tag_Norm.at(head)=1;}sound[2*i]=head;tail=sound[2*i+1];while ((tail+1)<nCount && pDoc->dataout_energy[tail+1] > energy_low){tail++;pDoc->tag_Norm.at(tail)=1;}sound[2*i+1]=tail;}tail=head=0;/******寻找不依靠主音的广义振幅******/std::vector<int>soundSuspect;soundSuspect.clear();for (i=1;i<nCount;i++) //从1开始 跳过第0帧 避免phone噪声初始干扰{if (pDoc->tag_Norm.at(i)==0 && (pDoc->dataout_energy[i]>energy_low||(pDoc->dataout_energy[i]>energy_consonant && pDoc->zero_one[i]>zcrThreshold)))soundSuspect.push_back(i);}/****根据广义振幅寻找疑似辅音***/if (soundSuspect.size()>1){int k1=0, k2=0;std::vector<int>suspect_terminal;suspect_terminal.clear();suspect_terminal.push_back(soundSuspect[0]);for (i=0;i<soundSuspect.size()-1;i++){if (soundSuspect.at(i+1)-soundSuspect.at(i)>1){suspect_terminal.push_back(soundSuspect[i]);suspect_terminal.push_back(soundSuspect[i+1]);}}suspect_terminal.push_back(soundSuspect[soundSuspect.size()-1]);/******寻找疑似语音段***************/for (i=0;i<suspect_terminal.size()/2;i++){k1=suspect_terminal[2*i+1]-suspect_terminal[2*i]+1; k2=0;head=suspect_terminal[2*i];tail=suspect_terminal[2*i+1]+1;for (;pDoc->tag_Norm[tail]==0 && tail<nCount;tail++){if (pDoc->dataout_energy[tail]>energy_consonant||(pDoc->dataout_energy[tail]>energy_suspect&&pDoc->zero_one[tail]>zcrThreshold))k2++;else break;}if (k2<=k1 && pDoc->tag_Norm[tail]==1)    /***右边连通主音段 如清辅音***/{for (j=head;j<tail;j++)pDoc->tag_Norm[j]=1;}else if ( pDoc->tag_Norm[head-2]==1)   /***左边联通主音段 比如后鼻音***/{if (k2<=k1){if (pDoc->dataout_energy[head-1]>energy_suspect && pDoc->zero_one[head-1]>zcrThreshold)pDoc->tag_Norm[head-1]=1; //判断前面一帧for (j=head;j<tail;j++)pDoc->tag_Norm[j]=1; }else {if (pDoc->dataout_energy[head-1]>energy_suspect && pDoc->zero_one[head-1]>zcrThreshold)pDoc->tag_Norm[head-1]=1; //丢弃后面疑似帧for (j=head;j<tail-k2;j++)pDoc->tag_Norm[j]=1; }}   /***不连通 看出过零率与能量差距 比如男音落单低振幅***/else if (k2<=k1 && pDoc->zero_one[head]>zcrThreshold  &&   ( pDoc->dataout_energy[head]>1.5*energy_consonant || pDoc->dataout_energy[head+1]>1.5*energy_consonant)){if (pDoc->dataout_energy[head-1]>energy_suspect && pDoc->zero_one[head-1]>zcrThreshold)pDoc->tag_Norm[head-1]=1; //判断前面一帧for (j=head;j<tail;j++)pDoc->tag_Norm[j]=1;}}suspect_terminal.clear();vector<int>(suspect_terminal).swap(suspect_terminal);}//if大条件结束/*******修改3:保存各分离段语音的头尾*/int npoint=0;for( i=0;i<pDoc->tag_Norm.size()-1;i++){   if(pDoc->tag_Norm[i]==0 && pDoc->tag_Norm[i+1] == 1)   vad[npoint].head=i+1;   else if(pDoc->tag_Norm[i]==1 && pDoc->tag_Norm[i+1] == 0)   vad[npoint++].tail=i;}double av_eng=0;vector<double>avg_oneE;int x1=0;float gapv1,gapv2,gapv3,gapv4;gapv1=gapv2=gapv3=gapv4=0;for( i=0;i<npoint;i++){/*for(x1=vad[i].head; x1 <vad[i].tail; x1++)av_eng+=pDoc->dataout_energy[x1];av_eng/=x1;avg_oneE.push_back(av_eng);*/for(x1=vad[i].head+6; x1 < vad[i].tail-5; x1++)//避开头尾if(  pDoc->dataout_energy[x1] <  pDoc->dataout_energy[x1-1] && pDoc->dataout_energy[x1] < pDoc->dataout_energy[x1+1] ){gapv1=(pDoc->dataout_energy[x1-1] - pDoc->dataout_energy[x1])/pDoc->dataout_energy[x1];gapv2=(pDoc->dataout_energy[x1+1] - pDoc->dataout_energy[x1])/pDoc->dataout_energy[x1];gapv3=abs(pDoc->dataout_energy[x1+1] - pDoc->dataout_energy[x1-1])/pDoc->dataout_energy[x1];gapv4=pDoc->zero_one[x1+2]+pDoc->zero_one[x1+3]+pDoc->zero_one[x1+4]+ pDoc->zero_one[x1+5]-4*pDoc->zero_one[x1+1];if(gapv1 < gapv2){ float ttmp=gapv1; gapv1=gapv2; gapv2=ttmp;}if( gapv1 > 0.16 && gapv2 > 0.0568 && gapv3 >0.02 && !(pDoc->zero_one[x1] < pDoc->zero_one[x1-1] && pDoc->zero_one[x1] > pDoc->zero_one[x1+1] )&&( ( pDoc->zero_one[x1] < pDoc->zero_one[x1+1]  && pDoc->zero_one[x1] < pDoc->zero_one[x1+2] && pDoc->zero_one[x1] < pDoc->zero_one[x1+3] )||( gapv4 <10 && pDoc->zero_one[x1+1] <= pDoc->zero_one[x1+2]  && pDoc->zero_one[x1+1] <= pDoc->zero_one[x1+3] && pDoc->zero_one[x1] <= pDoc->zero_one[x1+4] && pDoc->zero_one[x1] < pDoc->zero_one[x1+5] )||( (pDoc->zero_one[x1+1] < pDoc->zero_one[x1+2] || (pDoc->zero_one[x1+1] == pDoc->zero_one[x1+2] && pDoc->zero_one[x1+2] <= pDoc->zero_one[x1+3]) )  && (pDoc->zero_one[x1+2] < pDoc->zero_one[x1+3] || (pDoc->zero_one[x1] == pDoc->zero_one[x1+1]  && pDoc->zero_one[x1+2] == pDoc->zero_one[x1+3] ) )  && ! pDoc->zero_one[x1] == pDoc->zero_one[x1-1]  )) )pDoc->tag_Norm[x1]=0;gapv1=gapv2=gapv3=0;}}soundSuspect.clear();vector<int>(soundSuspect).swap(soundSuspect);}

其中 vad为:

struct Endpoint {

int head;

int tail;

}vad[500];



原创粉丝点击