PortAudio+webrtc+lame实现采集降噪增益mp3

来源:互联网 发布:东莞农村商业银行网络 编辑:程序博客网 时间:2024/05/15 02:10

一、使用PortAudio采集
首先你应该下载这个库编出动态库放好头文件和链接库lib,这些都不是重点不赘述。

//定义pa的sample类型为int16,这个可以配合webrtc模块#define PA_SAMPLE_TYPE paInt16      //采用双字节,一个sample=sizeof(int)=2字节#define SAMPLE_RATE  (32000)        //代表1秒有32000个sample#define FRAMES_PER_BUFFER (6400)    //代表回调一次提供6400个sample//说明:input是从设备读到的数据,字节长度是frameCount*sizeof(SAMPLE)//如果要echo,需要在初始化的时候指定好设备,把input拷到output即可.static int PACallback(        const void *input, void *output,        unsigned long frameCount,        const PaStreamCallbackTimeInfo* timeInfo,        PaStreamCallbackFlags statusFlags,        void *userData ){    Q_UNUSED(output);    Q_UNUSED(timeInfo);    Q_UNUSED(statusFlags);    //进一步处理数据    WcRecordMedia2 *media=(WcRecordMedia2*)userData;    bool rt = media->tHandle(input,frameCount);    return rt?paContinue:paComplete;}
    PaError err;    err = Pa_Initialize();    if( err != paNoError ) goto error;    PaDeviceIndex didx = Pa_GetDefaultInputDevice();    if(didx == paNoDevice){        //没有录音设备        return false;    }    PaStreamParameters inputDev;    inputDev.device = Pa_GetDefaultInputDevice();    inputDev.channelCount = 1;    inputDev.sampleFormat = PA_SAMPLE_TYPE;    inputDev.suggestedLatency = 1;    inputDev.hostApiSpecificStreamInfo = NULL;    PaStreamParameters outputDev;    //以下写paNoDevice会导致Pa_OpenStream失败,所以如果没喇叭输出设备,录音也玩不了啦?!    outputDev.device = Pa_GetDefaultOutputDevice();    outputDev.channelCount = 1;    outputDev.sampleFormat = PA_SAMPLE_TYPE;    outputDev.suggestedLatency = 1;    outputDev.hostApiSpecificStreamInfo = NULL;    PaStream* paStream = NULL;    err = Pa_OpenStream(                &paStream,                &inputDev,                &outputDev,                SAMPLE_RATE,                FRAMES_PER_BUFFER, /* frames per buffer */                paDitherOff,    /* paDitherOff, // flags */                PACallback,                this);   //userdata,这里是我的类指针.    if( err != paNoError ) goto error;    err = Pa_StartStream( paStream );    if( err != paNoError ) goto error;    //如果是demo,可以紧接着睡一分钟.    for(int i=0;i<6000;i++){        Pa_Sleep(10);    }    //...结束的时候    PaError err;    if(paStream_){        err = Pa_CloseStream( paStream );        qDebug()<<"Pa_CloseStream:"<<Pa_GetErrorText(err);        paStream = NULL;    }    Pa_Terminate();

有用经验:①webrtc只支持SAMPLE_RATE 为8000,16000,32000,常见的44100罢工!
②FRAMES_PER_BUFFER 可以比较自由,但是给webrtc处理的frameCount有限制..咳咳..请坐稳你的小板凳: webrtc提供的Ns(降噪),每次给的frameCount最多是480,因为拆分高低频后,count减半不能超过240; AGC(增益)只支持160或者320; 所以我选择了320的20倍,分20次把6400个sample给进去.


二、webrtc处理
这部分代码是从另一个帖子下载抄回来的,作者从webrtc代码里抠出来需要的部分,神!原贴在这里,遥谢一下原始作者!
http://www.jianshu.com/p/77a363960711
以下代码是我根据原贴封装的C++类

//webrtc的音频处理模块,支持降噪和增益.//只接受int16的sample,其他的..谁知道能不能支持呢!class WebRtcAudioModule{public:    WebRtcAudioModule()        : nsInst_(NULL)        , agcHandle_(NULL)        , micLevel_(0)    {        memset(internalBuf,0,sizeof(int)*24);        filter_state1 = internalBuf + 0;        filter_state2 = internalBuf + 6;        synthesis_state1 = internalBuf + 12;        synthesis_state2 = internalBuf + 18;        memset(handlerBuf,0,sizeof(short)*240*4);        //以下4个buf长度只需要单次处理frame的一半长度.(单位short2字节)        shInL = handlerBuf + 0;        shInH = handlerBuf + 240;        shOutL = handlerBuf + 240*2;        shOutH = handlerBuf + 240*3;    }    ~WebRtcAudioModule(){        if(nsInst_) WebRtcNsx_Free(nsInst_);        if(agcHandle_) WebRtcAgc_Free(agcHandle_);    }    bool initNs(uint32_t sampleRate)    {        int ret = WebRtcNsx_Create(&nsInst_);        if(ret != 0) return false;        ret = WebRtcNsx_Init(nsInst_,sampleRate);        if(ret != 0) return false;        ret = WebRtcNsx_set_policy(nsInst_,1);        if(ret != 0) return false;        return true;    }    bool initAgc(uint32_t sampleRate)    {        int ret = WebRtcAgc_Create(&agcHandle_);        if(ret != 0) return false;        int minLevel = 0;        int maxLevel = 255;        int agcMode  = kAgcModeFixedDigital;        ret = WebRtcAgc_Init(agcHandle_, minLevel, maxLevel, agcMode, sampleRate);        if(ret != 0) return false;        WebRtcAgc_config_t agcConfig;        agcConfig.compressionGaindB = 20;        agcConfig.limiterEnable     = 1;        agcConfig.targetLevelDbfs   = 3;        ret = WebRtcAgc_set_config(agcHandle_, agcConfig);        if(ret != 0) return false;        return true;    }    //处理数据. data里面的有效长度是frameCount*sizeof(short). 也就是为什么只支持int16的sample    bool handle(short* data, unsigned long frameCount)    {       //记住,一个frame目前是2字节!        int ret = 0;        Q_ASSERT(frameCount%320==0); //agc操作要求frame必须是160, 320.        Q_ASSERT(nsInst_ || agcHandle_);        if(frameCount%320!=0) return false;        for(unsigned i=0;i<frameCount;i+=320)        {            //分拆高频低频.这个函数一定会成功,否则崩掉.            WebRtcSpl_AnalysisQMF(data+i,320,shInL,shInH,                                  filter_state1,filter_state2);            if(nsInst_){                ret = WebRtcNsx_Process(nsInst_, shInL, shInH, shOutL, shOutH);                if(ret != 0) break;            }            if(agcHandle_){                if(nsInst_){                    qSwap(shInL,shOutL);                    qSwap(shInH,shOutH);                }                uint8_t saturationWarning;                int inMicLevel = micLevel_;                ret = WebRtcAgc_Process(agcHandle_, shInL, shInH, 160,                                        shOutL ,shOutH, inMicLevel,                                        &micLevel_, 0, &saturationWarning);                if(ret != 0) break;            }            WebRtcSpl_SynthesisQMF(shOutL,shOutH, 160, data+i, synthesis_state1,synthesis_state2);        }        return (ret==0);    }private:    NsxHandle *nsInst_;    int internalBuf[24];    int *filter_state1;//[6];    int *filter_state2;//[6];    int *synthesis_state1;//[6];    int *synthesis_state2;//[6];    short handlerBuf[240*4]; //webrtc降噪函数限制最大240个short    short *shInL,*shInH,*shOutL,*shOutH;    void *agcHandle_;    int micLevel_;  //micLevel输入换输出,下一个处理再作为输入.};

有用经验:①我这里是先降噪后增益,噪音降低后增益把原声和降低的噪音又都放大了,效果是降噪不太明显,主声增大比较显著.
②如果想要降噪–增益–再降噪, 你需要多初始化一套NsxHandle *nsInst2_, 然后照着样子调用,否则不行.
③WebRtcNsx_Process和WebRtcNs_Process听不出差别,但是有x是不是要更牛一点呢.
④webrtc这部分代码免费取用http://download.csdn.net/detail/jinzeyu_cn/9894495
三、lame编码mp3

//sdk要求lame的buffer最少要7200用于lame_encode_flush#define MIN_LAME_BUFFERSIZE (7200)class LameMp3Encoder{public:    LameMp3Encoder()        : flags_(NULL)        , mp3buf(NULL)        , fp_(NULL)    {}    ~LameMp3Encoder()    {        this->finish();        if(flags_) lame_close(flags_);        if(mp3buf) JFree(mp3buf);        if(fp_) fclose(fp_);    }    bool init(const char* filePath, uint32_t sampleRate, int bufferSize=FRAMES_PER_BUFFER*2)    {        fp_ = fopen(filePath,"wb+");        if(fp_ == NULL) return false;        bufferSize_ = qMax(bufferSize,MIN_LAME_BUFFERSIZE);        mp3buf = (unsigned char*)JMalloc(bufferSize_);        flags_ = lame_init();        if(flags_==NULL) return false;        int ret = 0;        lame_set_in_samplerate(flags_,sampleRate);        lame_set_num_channels(flags_,1);        lame_set_VBR(flags_,vbr_default); //vbr_abr);        //关闭tag自动写入,因为lame_mp3_tags_fid函数崩溃.        //参考 http://mp3-encoding.31853.n2.nabble.com/Re-lame-mp3-tags-fid-and-file-access-callbacks-td34000.html        lame_set_write_id3tag_automatic(flags_,0);        lame_set_brate(flags_,32);   //好像是影响最低比特率.        lame_set_mode(flags_,MONO);  //单声道即可,源都是单声道.        lame_set_quality(flags_,2);  //最佳效果,webrtc的效果占主导,其实听不出来差别.        ret = lame_init_params(flags_);        if(ret < 0) return false;        ret = lame_get_id3v2_tag(flags_,mp3buf,bufferSize_);        if(ret > 0){            fwrite(mp3buf,1,ret,fp_);        }        tagPos_ = ftell(fp_);  //记录位置,结束前要回头写.        return true;    }    bool handle(short* data, unsigned long frameCount)    {        if(fp_ == NULL) return false;        Q_ASSERT(frameCount*2 <= bufferSize_);        //int mp3bytes  = lame_encode_buffer_interleaved(flags_,data,frameCount,mp3buf,bufferSize_);        int mp3bytes = lame_encode_buffer(flags_,data,data,frameCount,mp3buf,bufferSize_);        if(mp3bytes < 0) return false;        fwrite(mp3buf,1,mp3bytes,fp_);        return true;    }    void finish()    {        if(flags_==NULL || bufferSize_<7200 || fp_==NULL) return;        int mp3bytes = lame_encode_flush(flags_,mp3buf,bufferSize_);        if(mp3bytes>0){            fwrite(mp3buf,1,mp3bytes,fp_);        }        //lame_mp3_tags_fid(flags_,fp_);  此函数崩溃,看堆栈在fseek.使用手动tag写入解决.        mp3bytes = lame_get_id3v1_tag(flags_,mp3buf,bufferSize_);        if(mp3bytes>0){            fwrite(mp3buf,1,mp3bytes,fp_);        }        mp3bytes = lame_get_lametag_frame(flags_,mp3buf,bufferSize_);        if(mp3bytes>0){            fseek(fp_,tagPos_,SEEK_SET);            fwrite(mp3buf,1,mp3bytes,fp_);        }        fclose(fp_);        fp_ = NULL;    }private:    lame_global_flags* flags_;    unsigned bufferSize_;    unsigned char* mp3buf;    FILE* fp_;    int tagPos_;};

有用经验:如果不用我的代码,mp3文件编完后很容易出现时长偏差,播放器和文件夹里看的时间不一致,那是因为没给mp3写好tag文件头。简单的方法是结束的时候调用lame_mp3_tags_fid,但是我一次也没成功,后找到老外一个帖子,手工添加tag完美解决,就是略麻烦一点,参见以上代码和url

最后

如果您和我有一样的需求,以上的经验足以让您节省至少一天的工时.算一下您月薪假如是12000,工作日22天,为您节省500+的人民币,真实在!!
当然为了我的工作,我将PortAudio的调用也C++类化,并且支持录音设备的热插拔,用空白语音填充无设备的情况.这一部分的有用经验:一旦Pa_Initialize();成功后Pa_GetDefaultInputDevice()返回值不会变,无法发现设备插入、移除或变更哦。手头没代码,先卖个关子吧!

原创粉丝点击