离线版-端点检测代码重写

来源：互联网发布：淘宝店铺怎么经营模式编辑：程序博客网时间：2024/05/23 14:17
根据对双门限的理解和修改，重写了一个离线版端点检测。
function [st,en]=VAD(x, fs)x=double(x);x=x/max(abs(x));framelen= floor(fs*40/1000);%frameinc= floor(fs*10/1000);%y=enframe(x,framelen,frameinc);%计算短时间能量 amp=sum(abs(y),2);    %开始端点检测tmp1=enframe(x(1:length(x)-1),framelen,frameinc);tmp2=enframe(x(2:length(x)),framelen,frameinc);signs=(tmp1.*tmp2)<0;    %diffs=(tmp1-tmp2)>0.01;zcr=sum(signs.*diffs,2);  %    zcr=[zcr;zcr(end)];zcr_yu=0.2*mean(zcr);%yuzhi=0.2*mean(amp);%minspeech=10;count=0;%start=[];tail=[];N=length(amp);flag=0;%biaozhi2=0;biaozhi3=0;%%%%%%%%%%%%for n=1:N     if amp(n)<yuzhi || (zcr(n)<zcr_yu)        continue;    end    kaitou=n;    break;endfor n=N:-1:1    if amp(n)<yuzhi || (zcr(n)<zcr_yu)        continue;    end    jiewei=n;    break;endnoise=[amp(1:kaitou-1);amp(jiewei+1:end)];noise_mean=mean(noise);noise_var = std(noise);speech_mean=mean(amp(kaitou:jiewei));%noise_meanyuzhi1= 0.3*speech_mean; %yuzhi2= max(0.3*speech_mean , (noise_mean + noise_var)*1.3);%noise_zcr=[zcr(1:kaitou-1);zcr(jiewei+1:end)];noise_zcr_mean = mean(noise_zcr);noise_zcr_std = std(noise_zcr);speech_zcr_mean=mean(zcr(kaitou:jiewei));zcr_yu1 = 0.3*speech_zcr_mean;zcr_yu2 = max(0.3*speech_zcr_mean , (noise_zcr_mean+noise_zcr_std)*0.3);%%%%%%%%%%%%%%st = [];en = [];bstart_state = 0;bend_state = 0;segment = 0;unvoice = 0;voice_min_len = 7;% 最短语音长度70ms  unvoice_min_len = 5;%结束段最小持续50msst_candicate = 0;en_candicate = 0;for  i = 1:N        if( (amp(i) >= yuzhi2 && zcr(i) >= zcr_yu1) && ~bstart_state ) %find  start            bstart_state = 1;  %                 if(~st_candicate)                st_candicate = i;            end            segment = segment + 1;                            elseif( (amp(i) >= yuzhi2 || zcr(i) >= zcr_yu1) && bstart_state )%        if(unvoice >= unvoice_min_len) %            st = [st; st_candicate];            en_candicate = en_candicate + unvoice_min_len - 1;            en = [en; en_candicate];                   bstart_state = 0;            bend_state = 1;            unvoice = 0;            segment = 0;            st_candicate = 0;        else %            unvoice = 0;  %            bend_state = 0;            segment = segment + 1;               end    elseif( (amp(i) < yuzhi2 && zcr(i) < zcr_yu1)  && bstart_state )     %         if segment >= voice_min_len  %               unvoice = unvoice + 1; %             if ~bend_state  %                 en_candicate = i;             end             bend_state = 1; %         else %                           bstart_state = 0; %             segment = 0;             bend_state = 0;             unvoice = 0;                   st_candicate = 0;             %prepare_for_start = 0;             en_candicate = 0;         end                         elseif((amp(i) >= yuzhi2 || zcr(i) >= zcr_yu1) && ~bstart_state) %          if ~ st_candicate              st_candicate = i;          end                       else%        st_candicate = 0;        continue;    end    endif(unvoice >= unvoice_min_len )%    st = [st; st_candicate];                  en = [en; (en_candicate + unvoice_min_len -1)];    segment = 0;endif( segment >= voice_min_len) %    st = [st; st_candicate];      en = [en; N];endfigure(1);subplot(3,1,1)plot(x);  %原始语音波形axis([1,length(x),-1,1])ylabel('speech');xlabel('样本点');for  k=1:length(st)line([st(k)*frameinc,st(k)*frameinc],[-1,1],'linestyle',':','color','blue','LineWidth',2);line([en(k)*frameinc,en(k)*frameinc],[-1,1],'linestyle',':','color','red','LineWidth',2);endsubplot(3,1,2)plot(amp);%原始语音能量axis([1,length(amp),0,max(amp)])ylabel('energy');xlabel('帧数');line([1,N],[yuzhi1,yuzhi1],'color','yellow','LineWidth',2);%由语音能量line([1,N],[yuzhi2,yuzhi2],'color','red','LineWidth',2);%由噪声平均能量和语音能量比较而得for  k=1:length(st)line([st(k),st(k)],[min(amp),max(amp)],'linestyle',':','color','blue','LineWidth',2);line([en(k),en(k)],[min(amp),max(amp)],'linestyle',':','color','red','LineWidth',2);endsubplot(3,1,3)plot(zcr);%原始语音过零率axis([1,length(zcr),0,max(zcr)])ylabel('zcr');xlabel('帧数');line([1,N],[zcr_yu1,zcr_yu1],'color','yellow','LineWidth',2);%由语音能量line([1,N],[zcr_yu2,zcr_yu2],'color','red','LineWidth',2);%语音加噪音for  k=1:length(st)line([st(k),st(k)],[min(zcr),max(zcr)],'linestyle',':','color','blue','LineWidth',2);line([en(k),en(k)],[min(zcr),max(zcr)],'linestyle',':','color','red','LineWidth',2);endend
用语音识别的强制对齐自动切分连续语音汉字，工程量和资源巨大，此处做了个简易版的汉字自动切分，效果还可以。
0 0