HEVC函数入门（8）——变换的实现

来源：互联网发布：个性域名格式不正确编辑：程序博客网时间：2024/05/22 16:06

前面我们讲的都是预测，下面开始变换的实现。
本文参考http://blog.csdn.net/shaqoneal/article/details/44856469
1、HM中Intra模式的主要实现逻辑
以Intra的亮度模式为例。主要实现代码实现于TEncSearch::estIntraPredQT方法中。TEncSearch::estIntraPredQT实现时，首先获取当前CU的分割子块的个数，并且对每个子块分别进行预测、变换量化操作（代码中称之为PU Loop）。在每一次的PU Loop中，编码器首先遍历35种预测模式，对每一种模式进行参考像素配置、预测和率失真代价判断，并选择出某几个最优的候选模式。对这几个选取出的最优模式，递归地进行变换、量化、熵编码操作（代码中称之为Mode loop）。以下伪代码可以作为参考：

Void   TEncSearch::estIntraPredQT( TComDataCU* pcCU, TComYuv*    pcOrgYuv, TComYuv*    pcPredYuv,  TComYuv*    pcResiYuv, TComYuv*    pcRecoYuv, UInt&       ruiDistC, Bool        bLumaOnly )  {    UInt    uiNumPU        = pcCU->getNumPartInter();//当前CU的分割模式下，子块的个数    for( UInt uiPU = 0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )    {      // 获取参考像素，对参考像素进行滤波      pcCU->getPattern()->initAdiPattern();      for( Int modeIdx = 0; modeIdx < 35; modeIdx++ )      {        predIntraLumaAng();//获取各个预测模式的结果        //计算预测模式的代价        UInt uiSad = m_pcRdCost->calcHAD();        UInt   iModeBits = xModeBitsIntra();        xUpdateCandList();//更新候选模式的cost值      }//Mode loop      //递归编码Intra CU，包括变换、量化等      xRecurIntraCodingQT();    }// PU loop  }

这里就不放原代码了，直接放的原博主写的伪代码。
2、xRecurIntraCodingQT以及变换量化的实现
本段主要通过代码注释讨论变换和量化的方法(由于过长删除了一部分)：

VoidTEncSearch::xRecurIntraCodingLumaQT(TComYuv*    pcOrgYuv,                                    TComYuv*    pcPredYuv,                                    TComYuv*    pcResiYuv,                                    Pel         resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],                                    Distortion& ruiDistY,#if HHI_RQT_INTRA_SPEEDUP                                    Bool        bCheckFirst,#endif                                    Double&     dRDCost,                                    TComTU&     rTu                                    DEBUG_STRING_FN_DECLARE(sDebug)){  TComDataCU   *pcCU          = rTu.getCU();  const UInt    uiAbsPartIdx  = rTu.GetAbsPartIdxTU();  const UInt    uiFullDepth   = rTu.GetTransformDepthTotal();  const UInt    uiTrDepth     = rTu.GetTransformDepthRel();  const UInt    uiLog2TrSize  = rTu.GetLog2LumaTrSize();        Bool    bCheckFull    = ( uiLog2TrSize  <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() );        Bool    bCheckSplit   = ( uiLog2TrSize  >  pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );        Pel     resiLumaSplit [NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];        Pel     resiLumaSingle[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];        Bool    bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];        for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)        {          bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise        }        bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate());#if HHI_RQT_INTRA_SPEEDUP  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);  // don't check split if TU size is less or equal to max TU size   Bool noSplitIntraMaxTuSize = bCheckFull;  if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice)  {    // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice（对于非帧内的片）如果TU大小小于或等于16x16 TU大小，则不会检查分割 这里non-intra我也不知道是什么    noSplitIntraMaxTuSize = ( uiLog2TrSize  <= min(maxTuSize,4) );    // if maximum RD-penalty don't check TU size 32x32 总之以上是三种不对TU继续分割的条件    if(m_pcEncCfg->getRDpenalty()==2)    {      bCheckFull    = ( uiLog2TrSize  <= min(maxTuSize,4));    }  }  if( bCheckFirst && noSplitIntraMaxTuSize )  {    bCheckSplit = false;  }#else  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);  // if maximum RD-penalty don't check TU size 32x32  if((m_pcEncCfg->getRDpenalty()==2)  && !isIntraSlice)  {    bCheckFull    = ( uiLog2TrSize  <= min(maxTuSize,4));  }#endif  //...  if ( m_pcEncCfg->getUseTransformSkipFast() )  {    checkTransformSkip       &= (pcCU->getPartitionSize(uiAbsPartIdx)==SIZE_NxN);  }  if( bCheckFull )  {//按照一整个TU进行变换、量化      if(checkTransformSkip == true)    {//skip模式        //...          xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 );//对亮度TU进行变换和量化编码          }        singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );        //----- determine rate and r-d cost ----- 确定码率和RDcost      //...      //----- store original entropy coding status -----      if( bCheckSplit )      {        m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );      }      //----- code luma/chroma block with given intra prediction mode and store Cbf-----      dSingleCost   = 0.0;      if (rTu.ProcessComponentSection(COMPONENT_Y))      {        const UInt totalAdjustedDepthChan   = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);        pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );      }      xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug));      if( bCheckSplit )      {        uiSingleCbfLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );      }      //----- determine rate and r-d cost -----      UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );      if(m_pcEncCfg->getRDpenalty() && (uiLog2TrSize==5) && !isIntraSlice)      {        uiSingleBits=uiSingleBits*4;      }      dSingleCost       = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDistLuma );      if (pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction())      {        const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;        const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;        for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)        {          if (bMaintainResidual[storedResidualIndex])          {            xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);          }        }      }    }  }  if( bCheckSplit )  {//分割成4个TU，进行递归编码      //----- store full entropy coding status, load original entropy coding status -----    if( bCheckFull )    {      m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] );      m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );    }    else    {      m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );    }    //----- code splitted block -----    Double     dSplitCost      = 0.0;    Distortion uiSplitDistLuma = 0;    UInt       uiSplitCbfLuma  = 0;    TComTURecurse tuRecurseChild(rTu, false);    DEBUG_STRING_NEW(sSplit)    do    {      DEBUG_STRING_NEW(sChild)#if HHI_RQT_INTRA_SPEEDUP//< tests one best mode with full rqt      xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );//本函数递归调用  #else      xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );#endif//...}

上面的代码被剪得不成样子了(⊙﹏⊙)b，在该函数调用的xIntraCodingBlock中，按照给定的预测模式获取CU相应的预测数据，并与实际的像素值求取差值数据作为残差。随后，调用transformNxN函数进行变换编码。原博文提到里面有调用色度TU变换和量化编码，亮度色度分别为xIntraCodingLumaBlk xIntraCodingChromaBlk 不过我用的HM版本中没找到，应该是做了调整，这里只有xIntraCodingTUBlock，跳转到它的定义：

Void TEncSearch::xIntraCodingTUBlock(       TComYuv*    pcOrgYuv,                                            TComYuv*    pcPredYuv,                                            TComYuv*    pcResiYuv,                                            Pel         resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],                                      const Bool        checkCrossCPrediction,                                            Distortion& ruiDist,                                      const ComponentID compID,                                            TComTU&     rTu                                      DEBUG_STRING_FN_DECLARE(sDebug)                                           ,Int         default0Save1Load2                                     )//...  //===== init availability pattern =====  #ifndef DEBUG_STRING  if( default0Save1Load2 != 2 )#endif  {    const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(compID, uiChFinalMode, uiWidth, uiHeight, chFmt, pcCU->getSlice()->getSPS()->getDisableIntraReferenceSmoothing());    initAdiPatternChType( rTu, bAboveAvail, bLeftAvail, compID, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sDebug) );    //===== get prediction signal =====    predIntraAng( compID, uiChFinalMode, piOrg, uiStride, piPred, uiStride, rTu, bAboveAvail, bLeftAvail, bUseFilteredPredictions );//针对给定模式进行帧内预测，获取预测像素块      // save prediction//...  //===== get residual signal =====//...  /*//===== transform and quantization ===== 对预测残差进行变换和量化编码*/  //--- init rate estimation arrays for RDOQ ---  if( useTransformSkip ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ() )  {    m_pcEntropyCoder->estimateBit( m_pcTrQuant->m_pcEstBitsSbac, uiWidth, uiHeight, chType );  }  //--- transform and quantization ---  TCoeff uiAbsSum = 0;  if (bIsLuma)  {    pcCU       ->setTrIdxSubParts ( uiTrDepth, uiAbsPartIdx, uiFullDepth );  }  const QpParam cQP(*pcCU, compID);#if RDOQ_CHROMA_LAMBDA  m_pcTrQuant->selectLambda     (compID);#endif  m_pcTrQuant->transformNxN     ( rTu, compID, piResi, uiStride, pcCoeff, #if ADAPTIVE_QP_SELECTION    pcArlCoeff,#endif    uiAbsSum, cQP    );  //--- inverse transform ---#ifdef DEBUG_STRING  if ( (uiAbsSum > 0) || (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask) )#else  if ( uiAbsSum > 0 )#endif  {    m_pcTrQuant->invTransformNxN ( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sDebug, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) );  }  else  {    Pel* pResi = piResi;    memset( pcCoeff, 0, sizeof( TCoeff ) * uiWidth * uiHeight );    for( UInt uiY = 0; uiY < uiHeight; uiY++ )    {      memset( pResi, 0, sizeof( Pel ) * uiWidth );      pResi += uiStride;    }  }  //===== reconstruction =====  //===== update distortion =====}

其中，transformNxN函数实现了变换和量化功能，其核心功能在于调用了三个函数：xTransformSkip、xT和xQuant，分别实现skip模式变换、常规残差数据的变换和量化编码。

Void TComTrQuant::transformNxN(       TComTU        & rTu,                                const ComponentID     compID,                                      Pel          *  pcResidual,                                const UInt            uiStride,                                      TCoeff       *  rpcCoeff,#if ADAPTIVE_QP_SELECTION                                      TCoeff       *  pcArlCoeff,#endif                                      TCoeff        & uiAbsSum,                                const QpParam       & cQP                              ){  const TComRectangle &rect = rTu.getRect(compID);  const UInt uiWidth        = rect.width;  const UInt uiHeight       = rect.height;  TComDataCU* pcCU          = rTu.getCU();  const UInt uiAbsPartIdx   = rTu.GetAbsPartIdxTU();  const UInt uiOrgTrDepth   = rTu.GetTransformDepthRel();  uiAbsSum=0;  RDPCMMode rdpcmMode = RDPCM_OFF;  rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );  if (rdpcmMode == RDPCM_OFF)  {    uiAbsSum = 0;    //transform and quantise    if(pcCU->getCUTransquantBypass(uiAbsPartIdx))    {      const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);      const UInt uiSizeMinus1   = (uiWidth * uiHeight) - 1;      for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)      {        for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)        {          const Pel currentSample = pcResidual[(y * uiStride) + x];          rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;          uiAbsSum += TCoeff(abs(currentSample));        }      }    }    else    {#ifdef DEBUG_TRANSFORM_AND_QUANTISE      std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";      printBlock(pcResidual, uiWidth, uiHeight, uiStride);#endif      assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );      if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)      {        xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID ); //skip模式变换        }      else      {        xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );//常规残差变换      }

xT函数实现比较简单，主要为调用xTrMxN函数实现变换功能，该函数的实现为：

Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange){  static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];  const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) +  bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;  const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;  assert(shift_1st >= 0);  assert(shift_2nd >= 0);  TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];  switch (iWidth)  {    case 4:      {        if ((iHeight == 4) && useDST)    // Check for DCT or DST        {           fastForwardDst( block, tmp, shift_1st );        }        else        {          partialButterfly4 ( block, tmp, shift_1st, iHeight );        }      }      break;    case 8:     partialButterfly8 ( block, tmp, shift_1st, iHeight );  break;    case 16:    partialButterfly16( block, tmp, shift_1st, iHeight );  break;    case 32:    partialButterfly32( block, tmp, shift_1st, iHeight );  break;    default:      assert(0); exit (1); break;  }  switch (iHeight)  {    case 4:      {        if ((iWidth == 4) && useDST)    // Check for DCT or DST        {          fastForwardDst( tmp, coeff, shift_2nd );        }        else        {          partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );        }      }      break;    case 8:     partialButterfly8 ( tmp, coeff, shift_2nd, iWidth );    break;    case 16:    partialButterfly16( tmp, coeff, shift_2nd, iWidth );    break;    case 32:    partialButterfly32( tmp, coeff, shift_2nd, iWidth );    break;    default:      assert(0); exit (1); break;  }}

partialButterfly该函数调用不同预设的蝶形变换函数实现对某个大小残差信号矩阵的变换，以4×4为例：
Void partialButterfly4(TCoeff *src, TCoeff *dst, Int shift, Int line)
{
Int j;
TCoeff E[2],O[2];
TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;

for (j=0; j

阅读全文

0 0