x265探索与研究（九）：compressFrame()函数

来源：互联网发布：最好广告过滤软件编辑：程序博客网时间：2024/06/06 13:05
x265探索与研究（九）：compressFrame()函数

compressFrame()函数是一个功能繁杂且分析难度较大的函数，主要包括时间戳的初始化工作、access unit的设计、加权预测技术、运动参考帧的估计、当前Slice的QP值确定、熵编码相关信息配置、并行计算与否及其空间的申请、SEI相关配置、线程控制、CTU分析、Multi-pass Encoding、滤波与去噪处理等等，其中最重要的就是调用了encodeSlice()函数。
compressFrame()函数中调用的主要函数如下图所示：
下面给出compressFrame()函数的代码分析：
/*=============================================================*//* ====== Analysed by: RuiDong Fang  ====== Csdn Blog: http://blog.csdn.net/frd2009041510  ====== Date: 2016.04.18 ====== Funtion： compressFrame()函数,编码一帧。 *//*=============================================================*/void FrameEncoder::compressFrame(){    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////1、初始化一些变量ProfileScopeEvent(frameThread);//帧线程的档次范围    m_startCompressTime = x265_mdate();//编码的开始时间戳(timestamp时间戳 when frame encoder is given a frame)    m_totalActiveWorkerCount = 0;//统计m_activeWorkerCount的和，即经过CTU压缩后统计，进入帧前初始化为0（sum of m_activeWorkerCount sampled at end of each CTU）    m_activeWorkerCountSamples = 0;//当前帧已经编码分析完毕的CTU个数（count of times m_activeWorkerCount was sampled (think vbv restarts)）    m_totalWorkerElapsedTime = 0;//初始化所有CTU编码滤波占用的时间（total elapsed time spent by worker threads processing CTUs）    m_totalNoWorkerTime = 0;//初始化当前帧编码占用的时间（total elapsed time without any active worker threads）    m_countRowBlocks = 0;//正在运行的CTU行因为上一行没有完成完毕而强制退出的个数，在帧编码前初始化为0（count of workers forced to abandon a row because of top dependency）    m_allRowsAvailableTime = 0;//初始化当前帧所有CTU行准备好的时间点（timestamp when all reference dependencies are resolved）    m_stallStartTime = 0;//初始化正在进行编码的rows个数为0时的时间点（timestamp when worker count becomes 0）    m_completionCount = 0;    m_bAllRowsStop = false;//是否将所有CTU的编码停止,在每帧进入前初始化为false,在CTU编码决策中需要重新编码时将置为true    m_vbvResetTriggerRow = -1;//需要重新编码的CTU行号,每帧开始编码前初始化为-1    m_SSDY = m_SSDU = m_SSDV = 0;    m_ssim = 0;    m_ssimCnt = 0;    memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats));//将当前帧的统计信息初始化为0///////////////////////////////////////////////////////////////////////////////////////////////////////////////////2、存取单元access unit    /* Emit access unit delimiter unless this is the first frame and the user is     * not repeating headers (since AUD is supposed to be the first NAL in the access     * unit) *///当是第一帧时，需要给出一个定界符    Slice* slice = m_frame->m_encData->m_slice;//获取当前slice    if (m_param->bEnableAccessUnitDelimiters && (m_frame->m_poc || m_param->bRepeatHeaders))    {        m_bs.resetBits();        m_entropyCoder.setBitstream(&m_bs);        m_entropyCoder.codeAUD(*slice);        m_bs.writeByteAlignment();        m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);    }    if (m_frame->m_lowres.bKeyframe && m_param->bRepeatHeaders)        m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);//====================get stream headers///////////////////////////////////////////////////////////////////////////////////////////////////////////////////3、加权预测    // Weighted Prediction parameters estimation.    bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred;//P帧加权预测标志位    bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred;//B帧加权预测标志位    //加权预测的参数估计if (bUseWeightP || bUseWeightB)//使能加权预测    {#if DETAILED_CU_STATS        m_cuStats.countWeightAnalyze++;        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);#endif        WeightAnalysis wa(*this);//用于多线程加权分析        if (m_pool && wa.tryBondPeers(*this, 1))//从当前job中拥有核并且sleep状态的核可以触发多线程，如果没有可用核则在当前线程中完成进入else            /* use an idle worker for weight analysis */            wa.waitForExit();//一直等待到任务全部完成，这里等待的是核释放，内核释放了任务也就完成了        else            weightAnalyse(*slice, *m_frame, *m_param);//====================加权预测分析(每个list的第一帧分析加权与否，其它不加权)    }    else        slice->disableWeights();//不使能加权预测    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////4、运动参考，配置参考帧信息// Generate motion references    int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;//预测方向的数量，若为P帧则数量为1，若为B帧则数量为2，否则为0，即获取当前有几个list    for (int l = 0; l < numPredDir; l++)//根据预测方向的数量遍历list的个数    {        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)//遍历当前list的所有参考帧        {            WeightParam *w = NULL;            if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)//如果是P帧或B帧，并且bPresentFlag为1                w = slice->m_weightPredTable[l][ref];//获取加权参数            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);//获取参考帧信息，申请加权帧内存        }    }///////////////////////////////////////////////////////////////////////////////////////////////////////////////////5、根据码率控制确定QP的大小    /* Get the QP for this frame from rate control. This call may block until     * frames ahead of it in encode order have called rateControlEnd() */    int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top);//====================rateControlStart()函数，即码率控制开始    m_rce.newQp = qp;//获取当前估计的量化参数    /* Clip slice QP to 0-51 spec range before encoding */    slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);//编码前，根据Offset确定/修正当前Slice的具体QP，取值必须处在0~51内///////////////////////////////////////////////////////////////////////////////////////////////////////////////////6、熵编码前的一些准备    m_initSliceContext.resetEntropy(*slice);//====================重置熵编码相关信息    m_frameFilter.start(m_frame, m_initSliceContext, qp);//====================滤波，在后面的调用中若bEnableSAO使能，则SAO    /* ensure all rows are blocked prior to initializing row CTU counters */    WaveFront::clearEnabledRowMask();//将当前WPPmap全部初始化为不可执行    /* reset entropy coders */    m_entropyCoder.load(m_initSliceContext);    for (uint32_t i = 0; i < m_numRows; i++)        m_rows[i].init(m_initSliceContext);///////////////////////////////////////////////////////////////////////////////////////////////////////////////////7、并行与否，申请空间    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1;//一帧并行的流数（CTU行数）    //根据并行的流数申请相关空间if (!m_outStreams)    {        m_outStreams = new Bitstream[numSubstreams];//申请空间        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);//申请空间        if (!m_param->bEnableSAO)//若没有使能SAO            for (uint32_t i = 0; i < numSubstreams; i++)                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);    }    else        for (uint32_t i = 0; i < numSubstreams; i++)            m_outStreams[i].resetBits();///////////////////////////////////////////////////////////////////////////////////////////////////////////////////8、SEI（Supplemental Enhancement Information）相关配置    int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;    if (m_frame->m_lowres.bKeyframe)    {        if (m_param->bEmitHRDSEI)        {            SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;            // since the temporal layer HRD is not ready, we assumed it is fixed            bpSei->m_auCpbRemovalDelayDelta = 1;            bpSei->m_cpbDelayOffset = 0;            bpSei->m_dpbDelayOffset = 0;            // hrdFullness() calculates the initial CPB removal delay and offset            m_top->m_rateControl->hrdFullness(bpSei);            m_bs.resetBits();            bpSei->write(m_bs, *slice->m_sps);            m_bs.writeByteAlignment();            m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);            m_top->m_lastBPSEI = m_rce.encodeOrder;        }    }    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)    {        SEIPictureTiming *sei = m_rce.picTimingSEI;        const VUI *vui = &slice->m_sps->vuiParameters;        const HRDInfo *hrd = &vui->hrdParameters;        int poc = slice->m_poc;        if (vui->frameFieldInfoPresentFlag)        {            if (m_param->interlaceMode == 2)                sei->m_picStruct = (poc & 1) ? 1 /* top */ : 2 /* bottom */;            else if (m_param->interlaceMode == 1)                sei->m_picStruct = (poc & 1) ? 2 /* bottom */ : 1 /* top */;            else                sei->m_picStruct = 0;            sei->m_sourceScanType = 0;            sei->m_duplicateFlag = false;        }        if (vui->hrdParametersPresentFlag)        {            // The m_aucpbremoval delay specifies how many clock ticks the            // access unit associated with the picture timing SEI message has to            // wait after removal of the access unit with the most recent            // buffering period SEI message            sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics + poc - m_rce.encodeOrder;        }        m_bs.resetBits();        sei->write(m_bs, *slice->m_sps);        m_bs.writeByteAlignment();        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);    }///////////////////////////////////////////////////////////////////////////////////////////////////////////////////9、配合RC的线程控制    /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to      * tune RateControl parameters for other frames.     * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in     * RateControlEnd here, after the slicecontexts are initialized. For the rest - ABR     * and VBV, unlock only after rateControlUpdateStats of this frame is called *///因为m_startEndOrder在rateControlUpdateStats中只对ABR或者VBV模式更新,在此更新为了配合RC的线程控制    if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)    {        m_top->m_rateControl->m_startEndOrder.incr();//更新计数        if (m_rce.encodeOrder < m_param->frameNumThreads - 1)//刚启动时多更新一次            m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames    }//================================================================================================================大部分的计算处理都在此处之下///////////////////////////////////////////////////////////////////////////////////////////////////////////////////10、CTU分析（分两种情况：WPP使能、WPP不使能）    /* Analyze CTU rows, most of the hard work is done here.  Frame is     * compressed in a wave-front pattern if WPP is enabled. Row based loop     * filters runs behind the CTU compression and reconstruction */    m_rows[0].active = true;//触发第一个CTU行    if (m_param->bEnableWavefront)//如果WPP使能    {        for (uint32_t row = 0; row < m_numRows; row++)//遍历所有CTU行        {            // block until all reference frames have reconstructed the rows we need            for (int l = 0; l < numPredDir; l++)//当前list的个数            {                for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)//当前list 中ref的个数                {                    Frame *refpic = slice->m_refPicList[l][ref];//获取参考帧                    uint32_t reconRowCount = refpic->m_reconRowCount.get();                    while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))                        reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);                    if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)                        m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);                }            }//当前外部参考块（如参考帧对应的参考块）准备完毕,将当前row对应位置的map置为1,标记可以执行            enableRowEncoder(row); //===========================/* clear external dependency for this row */            if (!row)//如果是第一行            {                m_row0WaitTime = x265_mdate();//获取当前帧开始编码的的时间点,用于计算当前帧的编码时间                //当前内部参考块准备完毕,将当前row对应位置的map置为1,标记可以执行enqueueRowEncoder(0); //===========================/* clear internal dependency, start wavefront */            }            tryWakeOne();//CTU行准备好并触发wpp， 在findjob中运行        }        m_allRowsAvailableTime = x265_mdate();//当前帧所有CTU行准备好的时间点        tryWakeOne(); //多触发一次/* ensure one thread is active or help-wanted flag is set prior to blocking */        static const int block_ms = 250;//超时时间        //每250ms触发一次,保证全部CTU行都能够执行（如果m_completionEvent在某一位置触发，则会造成不超时，循环退出）while (m_completionEvent.timedWait(block_ms))            tryWakeOne();//触发    }    else//如果WPP不使能    {        for (uint32_t i = 0; i < m_numRows + m_filterRowDelay; i++)        {            // compress            if (i < m_numRows)            {                // block until all reference frames have reconstructed the rows we need                for (int l = 0; l < numPredDir; l++)                {                    int list = l;                    for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)                    {                        Frame *refpic = slice->m_refPicList[list][ref];//获取参考帧                        uint32_t reconRowCount = refpic->m_reconRowCount.get();                        while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))                            reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)                            m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows);                    }                }                if (!i)                    m_row0WaitTime = x265_mdate();                else if (i == m_numRows - 1)                    m_allRowsAvailableTime = x265_mdate();                processRowEncoder(i, m_tld[m_localTldIdx]);//===========================Called by worker threads            }            // filter            if (i >= m_filterRowDelay)                m_frameFilter.processRow(i - m_filterRowDelay);//===========================滤波每一行(Row based loop filters)        }    }///////////////////////////////////////////////////////////////////////////////////////////////////////////////////11、Multi-pass encoding    if (m_param->rc.bStatWrite)//Multi-pass encoding(Enable writing the stats in a multi-pass encode to the stat output file)    {        int totalI = 0, totalP = 0, totalSkip = 0;        // accumulate intra,inter,skip cu count per frame for 2 pass        for (uint32_t i = 0; i < m_numRows; i++)        {            m_frame->m_encData->m_frameStats.mvBits    += m_rows[i].rowStats.mvBits;            m_frame->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;            m_frame->m_encData->m_frameStats.miscBits  += m_rows[i].rowStats.miscBits;            totalI                                     += m_rows[i].rowStats.intra8x8Cnt;            totalP                                     += m_rows[i].rowStats.inter8x8Cnt;            totalSkip                                  += m_rows[i].rowStats.skip8x8Cnt;        }        int totalCuCount = totalI + totalP + totalSkip;//总CU数目        m_frame->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;        m_frame->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;        m_frame->m_encData->m_frameStats.percent8x8Skip  = (double)totalSkip / totalCuCount;    }    for (uint32_t i = 0; i < m_numRows; i++)    {        m_frame->m_encData->m_frameStats.cntIntraNxN      += m_rows[i].rowStats.cntIntraNxN;        m_frame->m_encData->m_frameStats.totalCu          += m_rows[i].rowStats.totalCu;        m_frame->m_encData->m_frameStats.totalCtu         += m_rows[i].rowStats.totalCtu;        m_frame->m_encData->m_frameStats.lumaDistortion   += m_rows[i].rowStats.lumaDistortion;        m_frame->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;        m_frame->m_encData->m_frameStats.psyEnergy        += m_rows[i].rowStats.psyEnergy;        m_frame->m_encData->m_frameStats.lumaLevel        += m_rows[i].rowStats.lumaLevel;        if (m_rows[i].rowStats.maxLumaLevel > m_frame->m_encData->m_frameStats.maxLumaLevel)            m_frame->m_encData->m_frameStats.maxLumaLevel = m_rows[i].rowStats.maxLumaLevel;        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)        {            m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];            m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];            for (int m = 0; m < INTER_MODES; m++)                m_frame->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];            for (int n = 0; n < INTRA_MODES; n++)                m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];        }    }    m_frame->m_encData->m_frameStats.avgLumaDistortion   = (double)(m_frame->m_encData->m_frameStats.lumaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;    m_frame->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame->m_encData->m_frameStats.chromaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;    m_frame->m_encData->m_frameStats.avgPsyEnergy        = (double)(m_frame->m_encData->m_frameStats.psyEnergy) / m_frame->m_encData->m_frameStats.totalCtu;    m_frame->m_encData->m_frameStats.avgLumaLevel        = m_frame->m_encData->m_frameStats.lumaLevel / m_frame->m_encData->m_frameStats.totalCtu;    m_frame->m_encData->m_frameStats.percentIntraNxN     = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)    {        m_frame->m_encData->m_frameStats.percentSkipCu[depth]  = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;        m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;        for (int n = 0; n < INTRA_MODES; n++)            m_frame->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame->m_encData->m_frameStats.totalCu;        uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts        cuInterRectCnt += m_frame->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame->m_encData->m_frameStats.cuInterDistribution[depth][2];        m_frame->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame->m_encData->m_frameStats.totalCu;        m_frame->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame->m_encData->m_frameStats.totalCu;        m_frame->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame->m_encData->m_frameStats.totalCu;    }    m_bs.resetBits();    m_entropyCoder.load(m_initSliceContext);    m_entropyCoder.setBitstream(&m_bs);    m_entropyCoder.codeSliceHeader(*slice, *m_frame->m_encData);//===========================codeSliceHeader()函数///////////////////////////////////////////////////////////////////////////////////////////////////////////////////12、SAO，并调用encodeSlice()    // finish encode of each CTU row, only required when SAO is enabled    if (m_param->bEnableSAO)        encodeSlice();//===========================调用encodeSlice()    // serialize each row, record final lengths in slice header    uint32_t maxStreamSize = m_nalList.serializeSubstreams(m_substreamSizes, numSubstreams, m_outStreams);    // complete the slice header by writing WPP row-starts    m_entropyCoder.setBitstream(&m_bs);    if (slice->m_pps->bEntropyCodingSyncEnabled)        m_entropyCoder.codeSliceHeaderWPPEntryPoints(*slice, m_substreamSizes, maxStreamSize);    m_bs.writeByteAlignment();    m_nalList.serialize(slice->m_nalUnitType, m_bs);    if (m_param->decodedPictureHashSEI)    {        if (m_param->decodedPictureHashSEI == 1)        {            m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::MD5;            for (int i = 0; i < 3; i++)                MD5Final(&m_state[i], m_seiReconPictureDigest.m_digest[i]);        }        else if (m_param->decodedPictureHashSEI == 2)        {            m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CRC;            for (int i = 0; i < 3; i++)                crcFinish(m_crc[i], m_seiReconPictureDigest.m_digest[i]);        }        else if (m_param->decodedPictureHashSEI == 3)        {            m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CHECKSUM;            for (int i = 0; i < 3; i++)                checksumFinish(m_checksum[i], m_seiReconPictureDigest.m_digest[i]);        }        m_bs.resetBits();        m_seiReconPictureDigest.write(m_bs, *slice->m_sps);        m_bs.writeByteAlignment();        m_nalList.serialize(NAL_UNIT_SUFFIX_SEI, m_bs);    }    uint64_t bytes = 0;    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)    {        int type = m_nalList.m_nal[i].type;        // exclude SEI        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)        {            bytes += m_nalList.m_nal[i].sizeBytes;            // and exclude start code prefix            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;        }    }    m_accessUnitBits = bytes << 3;    m_endCompressTime = x265_mdate();//timestamp after all CTUs are compressed//printf("编码时间(Compressinging Time): %d ms\n",m_endCompressTime-m_startCompressTime);//added by Fred    /* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */    if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce) < 0)        m_top->m_aborted = true;//一般不进入,错误返回    /* Decrement referenced frame reference counts, allow them to be recycled */    for (int l = 0; l < numPredDir; l++)    {        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)//遍历每个List的参考帧        {            Frame *refpic = slice->m_refPicList[l][ref];//获取参考帧地址            ATOMIC_DEC(&refpic->m_countRefEncoders);//被参考计数减一        }    }///////////////////////////////////////////////////////////////////////////////////////////////////////////////////13、去噪处理NR updates（Noise Reduction）    int numTLD;//TLD=Thread Local Data当前并行线程个数    if (m_pool)        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;//应用WPP，为当前机器的核数，否则为当前核数加上当前的线程个数    else        numTLD = 1;//不应用多线程，并行个数为1    if (m_nr)//如果noise reduction(更新去噪偏移值)    {        /* Accumulate NR statistics from all worker threads */        //加速所有线程的NR统计for (int i = 0; i < numTLD; i++)//遍历当前帧编码应用所有线程：累加当前帧各个系数的统计数字        {            //获取当前线程对应当前帧的去噪类；i 确定当前帧的所有线程，如每个WPP行  m_jpId 确定当前帧NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)//遍历所有TU类别            {                for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)                    m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];//累加当前帧所有TU对应位置的系数绝对值和                            m_nr->count[cat] += nr->count[cat];//累加当前帧的TU计数            }        }        noiseReductionUpdate();//===========================DCT-domain noise reduction / adaptive deadzone from libavcodec        /* Copy updated NR coefficients back to all worker threads *///拷贝NR系数至所有的工作线程        for (int i = 0; i < numTLD; i++)//遍历当前帧编码应用所有线程        {            //获取当前线程对应当前帧的去噪类；i 确定当前帧的所有线程，如每个WPP行  m_jpId 确定当前帧NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];            //将当前更新的去噪偏移值拷贝到各个线程中的去噪偏移中去memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);            memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);//初始为0            memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);//初始为0        }    }#if DETAILED_CU_STATS//统计数据    /* Accumulate CU statistics from each worker thread, we could report     * per-frame stats here, but currently we do not. */    for (int i = 0; i < numTLD; i++)        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId]);#endif    m_endFrameTime = x265_mdate();//帧编码的结束时间(timestamp after RCEnd, NR updates, etc)}
2 0