以Intra的亮度模式为例。主要实现代码实现于TEncSearch::estIntraPredQT方法中。TEncSearch::estIntraPredQT实现时,首先获取当前CU的分割子块的个数,并且对每个子块分别进行预测、变换量化操作(代码中称之为PU Loop)。在每一次的PU Loop中,编码器首先遍历35种预测模式,对每一种模式进行参考像素配置、预测和率失真代价判断,并选择出某几个最优的候选模式。对这几个选取出的最优模式,递归地进行变换、量化、熵编码操作(代码中称之为Mode loop)。以下伪代码可以作为参考:

Void   TEncSearch::estIntraPredQT( TComDataCU* pcCU, TComYuv*    pcOrgYuv, TComYuv*    pcPredYuv,  TComYuv*    pcResiYuv, TComYuv*    pcRecoYuv, UInt&       ruiDistC, Bool        bLumaOnly )  {    UInt    uiNumPU        = pcCU->getNumPartInter();//当前CU的分割模式下,子块的个数    for( UInt uiPU = 0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )    {      // 获取参考像素,对参考像素进行滤波      pcCU->getPattern()->initAdiPattern();      for( Int modeIdx = 0; modeIdx < 35; modeIdx++ )      {        predIntraLumaAng();//获取各个预测模式的结果        //计算预测模式的代价        UInt uiSad = m_pcRdCost->calcHAD();        UInt   iModeBits = xModeBitsIntra();        xUpdateCandList();//更新候选模式的cost值      }//Mode loop      //递归编码Intra CU,包括变换、量化等      xRecurIntraCodingQT();    }// PU loop  }  


VoidTEncSearch::xRecurIntraCodingLumaQT(TComYuv*    pcOrgYuv,                                    TComYuv*    pcPredYuv,                                    TComYuv*    pcResiYuv,                                    Pel         resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],                                    Distortion& ruiDistY,#if HHI_RQT_INTRA_SPEEDUP                                    Bool        bCheckFirst,#endif                                    Double&     dRDCost,                                    TComTU&     rTu                                    DEBUG_STRING_FN_DECLARE(sDebug)){  TComDataCU   *pcCU          = rTu.getCU();  const UInt    uiAbsPartIdx  = rTu.GetAbsPartIdxTU();  const UInt    uiFullDepth   = rTu.GetTransformDepthTotal();  const UInt    uiTrDepth     = rTu.GetTransformDepthRel();  const UInt    uiLog2TrSize  = rTu.GetLog2LumaTrSize();        Bool    bCheckFull    = ( uiLog2TrSize  <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() );        Bool    bCheckSplit   = ( uiLog2TrSize  >  pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );        Pel     resiLumaSplit [NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];        Pel     resiLumaSingle[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];        Bool    bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];        for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)        {          bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise        }        bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate());#if HHI_RQT_INTRA_SPEEDUP  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);  // don't check split if TU size is less or equal to max TU size   Bool noSplitIntraMaxTuSize = bCheckFull;  if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice)  {    // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice(对于非帧内的片)如果TU大小小于或等于16x16 TU大小,则不会检查分割 这里non-intra我也不知道是什么    noSplitIntraMaxTuSize = ( uiLog2TrSize  <= min(maxTuSize,4) );    // if maximum RD-penalty don't check TU size 32x32 总之以上是三种不对TU继续分割的条件    if(m_pcEncCfg->getRDpenalty()==2)    {      bCheckFull    = ( uiLog2TrSize  <= min(maxTuSize,4));    }  }  if( bCheckFirst && noSplitIntraMaxTuSize )  {    bCheckSplit = false;  }#else  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);  // if maximum RD-penalty don't check TU size 32x32  if((m_pcEncCfg->getRDpenalty()==2)  && !isIntraSlice)  {    bCheckFull    = ( uiLog2TrSize  <= min(maxTuSize,4));  }#endif  //...  if ( m_pcEncCfg->getUseTransformSkipFast() )  {    checkTransformSkip       &= (pcCU->getPartitionSize(uiAbsPartIdx)==SIZE_NxN);  }  if( bCheckFull )  {//按照一整个TU进行变换、量化      if(checkTransformSkip == true)    {//skip模式        //...          xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 );//对亮度TU进行变换和量化编码          }        singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );        //----- determine rate and r-d cost ----- 确定码率和RDcost      //...      //----- store original entropy coding status -----      if( bCheckSplit )      {        m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );      }      //----- code luma/chroma block with given intra prediction mode and store Cbf-----      dSingleCost   = 0.0;      if (rTu.ProcessComponentSection(COMPONENT_Y))      {        const UInt totalAdjustedDepthChan   = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);        pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );      }      xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug));      if( bCheckSplit )      {        uiSingleCbfLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );      }      //----- determine rate and r-d cost -----      UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );      if(m_pcEncCfg->getRDpenalty() && (uiLog2TrSize==5) && !isIntraSlice)      {        uiSingleBits=uiSingleBits*4;      }      dSingleCost       = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDistLuma );      if (pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction())      {        const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;        const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;        for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)        {          if (bMaintainResidual[storedResidualIndex])          {            xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);          }        }      }    }  }  if( bCheckSplit )  {//分割成4个TU,进行递归编码      //----- store full entropy coding status, load original entropy coding status -----    if( bCheckFull )    {      m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] );      m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );    }    else    {      m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );    }    //----- code splitted block -----    Double     dSplitCost      = 0.0;    Distortion uiSplitDistLuma = 0;    UInt       uiSplitCbfLuma  = 0;    TComTURecurse tuRecurseChild(rTu, false);    DEBUG_STRING_NEW(sSplit)    do    {      DEBUG_STRING_NEW(sChild)#if HHI_RQT_INTRA_SPEEDUP//< tests one best mode with full rqt      xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );//本函数递归调用  #else      xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );#endif//...}

上面的代码被剪得不成样子了(⊙﹏⊙)b,在该函数调用的xIntraCodingBlock中,按照给定的预测模式获取CU相应的预测数据,并与实际的像素值求取差值数据作为残差。随后,调用transformNxN函数进行变换编码。原博文提到里面有调用色度TU变换和量化编码,亮度色度分别为xIntraCodingLumaBlk xIntraCodingChromaBlk 不过我用的HM版本中没找到,应该是做了调整,这里只有xIntraCodingTUBlock,跳转到它的定义:

Void TEncSearch::xIntraCodingTUBlock(       TComYuv*    pcOrgYuv,                                            TComYuv*    pcPredYuv,                                            TComYuv*    pcResiYuv,                                            Pel         resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],                                      const Bool        checkCrossCPrediction,                                            Distortion& ruiDist,                                      const ComponentID compID,                                            TComTU&     rTu                                      DEBUG_STRING_FN_DECLARE(sDebug)                                           ,Int         default0Save1Load2                                     )//...  //===== init availability pattern =====  #ifndef DEBUG_STRING  if( default0Save1Load2 != 2 )#endif  {    const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(compID, uiChFinalMode, uiWidth, uiHeight, chFmt, pcCU->getSlice()->getSPS()->getDisableIntraReferenceSmoothing());    initAdiPatternChType( rTu, bAboveAvail, bLeftAvail, compID, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sDebug) );    //===== get prediction signal =====    predIntraAng( compID, uiChFinalMode, piOrg, uiStride, piPred, uiStride, rTu, bAboveAvail, bLeftAvail, bUseFilteredPredictions );//针对给定模式进行帧内预测,获取预测像素块      // save prediction//...  //===== get residual signal =====//...  /*//===== transform and quantization ===== 对预测残差进行变换和量化编码*/  //--- init rate estimation arrays for RDOQ ---  if( useTransformSkip ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ() )  {    m_pcEntropyCoder->estimateBit( m_pcTrQuant->m_pcEstBitsSbac, uiWidth, uiHeight, chType );  }  //--- transform and quantization ---  TCoeff uiAbsSum = 0;  if (bIsLuma)  {    pcCU       ->setTrIdxSubParts ( uiTrDepth, uiAbsPartIdx, uiFullDepth );  }  const QpParam cQP(*pcCU, compID);#if RDOQ_CHROMA_LAMBDA  m_pcTrQuant->selectLambda     (compID);#endif  m_pcTrQuant->transformNxN     ( rTu, compID, piResi, uiStride, pcCoeff, #if ADAPTIVE_QP_SELECTION    pcArlCoeff,#endif    uiAbsSum, cQP    );  //--- inverse transform ---#ifdef DEBUG_STRING  if ( (uiAbsSum > 0) || (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask) )#else  if ( uiAbsSum > 0 )#endif  {    m_pcTrQuant->invTransformNxN ( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sDebug, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) );  }  else  {    Pel* pResi = piResi;    memset( pcCoeff, 0, sizeof( TCoeff ) * uiWidth * uiHeight );    for( UInt uiY = 0; uiY < uiHeight; uiY++ )    {      memset( pResi, 0, sizeof( Pel ) * uiWidth );      pResi += uiStride;    }  }  //===== reconstruction =====  //===== update distortion =====}


Void TComTrQuant::transformNxN(       TComTU        & rTu,                                const ComponentID     compID,                                      Pel          *  pcResidual,                                const UInt            uiStride,                                      TCoeff       *  rpcCoeff,#if ADAPTIVE_QP_SELECTION                                      TCoeff       *  pcArlCoeff,#endif                                      TCoeff        & uiAbsSum,                                const QpParam       & cQP                              ){  const TComRectangle &rect = rTu.getRect(compID);  const UInt uiWidth        = rect.width;  const UInt uiHeight       = rect.height;  TComDataCU* pcCU          = rTu.getCU();  const UInt uiAbsPartIdx   = rTu.GetAbsPartIdxTU();  const UInt uiOrgTrDepth   = rTu.GetTransformDepthRel();  uiAbsSum=0;  RDPCMMode rdpcmMode = RDPCM_OFF;  rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );  if (rdpcmMode == RDPCM_OFF)  {    uiAbsSum = 0;    //transform and quantise    if(pcCU->getCUTransquantBypass(uiAbsPartIdx))    {      const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);      const UInt uiSizeMinus1   = (uiWidth * uiHeight) - 1;      for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)      {        for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)        {          const Pel currentSample = pcResidual[(y * uiStride) + x];          rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;          uiAbsSum += TCoeff(abs(currentSample));        }      }    }    else    {#ifdef DEBUG_TRANSFORM_AND_QUANTISE      std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";      printBlock(pcResidual, uiWidth, uiHeight, uiStride);#endif      assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );      if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)      {        xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID ); //skip模式变换        }      else      {        xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );//常规残差变换      }


Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange){  static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];  const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) +  bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;  const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;  assert(shift_1st >= 0);  assert(shift_2nd >= 0);  TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];  switch (iWidth)  {    case 4:      {        if ((iHeight == 4) && useDST)    // Check for DCT or DST        {           fastForwardDst( block, tmp, shift_1st );        }        else        {          partialButterfly4 ( block, tmp, shift_1st, iHeight );        }      }      break;    case 8:     partialButterfly8 ( block, tmp, shift_1st, iHeight );  break;    case 16:    partialButterfly16( block, tmp, shift_1st, iHeight );  break;    case 32:    partialButterfly32( block, tmp, shift_1st, iHeight );  break;    default:      assert(0); exit (1); break;  }  switch (iHeight)  {    case 4:      {        if ((iWidth == 4) && useDST)    // Check for DCT or DST        {          fastForwardDst( tmp, coeff, shift_2nd );        }        else        {          partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );        }      }      break;    case 8:     partialButterfly8 ( tmp, coeff, shift_2nd, iWidth );    break;    case 16:    partialButterfly16( tmp, coeff, shift_2nd, iWidth );    break;    case 32:    partialButterfly32( tmp, coeff, shift_2nd, iWidth );    break;    default:      assert(0); exit (1); break;  }}

