HEVC函数入门(8)——变换的实现
来源:互联网 发布:个性域名格式不正确 编辑:程序博客网 时间:2024/05/22 16:06
前面我们讲的都是预测,下面开始变换的实现。
本文参考http://blog.csdn.net/shaqoneal/article/details/44856469
1、HM中Intra模式的主要实现逻辑
以Intra的亮度模式为例。主要实现代码实现于TEncSearch::estIntraPredQT方法中。TEncSearch::estIntraPredQT实现时,首先获取当前CU的分割子块的个数,并且对每个子块分别进行预测、变换量化操作(代码中称之为PU Loop)。在每一次的PU Loop中,编码器首先遍历35种预测模式,对每一种模式进行参考像素配置、预测和率失真代价判断,并选择出某几个最优的候选模式。对这几个选取出的最优模式,递归地进行变换、量化、熵编码操作(代码中称之为Mode loop)。以下伪代码可以作为参考:
Void TEncSearch::estIntraPredQT( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, UInt& ruiDistC, Bool bLumaOnly ) { UInt uiNumPU = pcCU->getNumPartInter();//当前CU的分割模式下,子块的个数 for( UInt uiPU = 0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts ) { // 获取参考像素,对参考像素进行滤波 pcCU->getPattern()->initAdiPattern(); for( Int modeIdx = 0; modeIdx < 35; modeIdx++ ) { predIntraLumaAng();//获取各个预测模式的结果 //计算预测模式的代价 UInt uiSad = m_pcRdCost->calcHAD(); UInt iModeBits = xModeBitsIntra(); xUpdateCandList();//更新候选模式的cost值 }//Mode loop //递归编码Intra CU,包括变换、量化等 xRecurIntraCodingQT(); }// PU loop }
这里就不放原代码了,直接放的原博主写的伪代码。
2、xRecurIntraCodingQT以及变换量化的实现
本段主要通过代码注释讨论变换和量化的方法(由于过长删除了一部分):
VoidTEncSearch::xRecurIntraCodingLumaQT(TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE], Distortion& ruiDistY,#if HHI_RQT_INTRA_SPEEDUP Bool bCheckFirst,#endif Double& dRDCost, TComTU& rTu DEBUG_STRING_FN_DECLARE(sDebug)){ TComDataCU *pcCU = rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiFullDepth = rTu.GetTransformDepthTotal(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); Bool bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() ); Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ); Pel resiLumaSplit [NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]; Pel resiLumaSingle[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]; Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES]; for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++) { bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise } bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate());#if HHI_RQT_INTRA_SPEEDUP Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize(); Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE); // don't check split if TU size is less or equal to max TU size Bool noSplitIntraMaxTuSize = bCheckFull; if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice) { // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice(对于非帧内的片)如果TU大小小于或等于16x16 TU大小,则不会检查分割 这里non-intra我也不知道是什么 noSplitIntraMaxTuSize = ( uiLog2TrSize <= min(maxTuSize,4) ); // if maximum RD-penalty don't check TU size 32x32 总之以上是三种不对TU继续分割的条件 if(m_pcEncCfg->getRDpenalty()==2) { bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4)); } } if( bCheckFirst && noSplitIntraMaxTuSize ) { bCheckSplit = false; }#else Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize(); Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE); // if maximum RD-penalty don't check TU size 32x32 if((m_pcEncCfg->getRDpenalty()==2) && !isIntraSlice) { bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4)); }#endif //... if ( m_pcEncCfg->getUseTransformSkipFast() ) { checkTransformSkip &= (pcCU->getPartitionSize(uiAbsPartIdx)==SIZE_NxN); } if( bCheckFull ) {//按照一整个TU进行变换、量化 if(checkTransformSkip == true) {//skip模式 //... xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 );//对亮度TU进行变换和量化编码 } singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth ); //----- determine rate and r-d cost ----- 确定码率和RDcost //... //----- store original entropy coding status ----- if( bCheckSplit ) { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } //----- code luma/chroma block with given intra prediction mode and store Cbf----- dSingleCost = 0.0; if (rTu.ProcessComponentSection(COMPONENT_Y)) { const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y); pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan ); } xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug)); if( bCheckSplit ) { uiSingleCbfLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth ); } //----- determine rate and r-d cost ----- UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false ); if(m_pcEncCfg->getRDpenalty() && (uiLog2TrSize==5) && !isIntraSlice) { uiSingleBits=uiSingleBits*4; } dSingleCost = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDistLuma ); if (pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) { const Int xOffset = rTu.getRect( COMPONENT_Y ).x0; const Int yOffset = rTu.getRect( COMPONENT_Y ).y0; for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++) { if (bMaintainResidual[storedResidualIndex]) { xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE); } } } } } if( bCheckSplit ) {//分割成4个TU,进行递归编码 //----- store full entropy coding status, load original entropy coding status ----- if( bCheckFull ) { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] ); m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } else { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } //----- code splitted block ----- Double dSplitCost = 0.0; Distortion uiSplitDistLuma = 0; UInt uiSplitCbfLuma = 0; TComTURecurse tuRecurseChild(rTu, false); DEBUG_STRING_NEW(sSplit) do { DEBUG_STRING_NEW(sChild)#if HHI_RQT_INTRA_SPEEDUP//< tests one best mode with full rqt xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );//本函数递归调用 #else xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );#endif//...}
上面的代码被剪得不成样子了(⊙﹏⊙)b,在该函数调用的xIntraCodingBlock中,按照给定的预测模式获取CU相应的预测数据,并与实际的像素值求取差值数据作为残差。随后,调用transformNxN函数进行变换编码。原博文提到里面有调用色度TU变换和量化编码,亮度色度分别为xIntraCodingLumaBlk xIntraCodingChromaBlk 不过我用的HM版本中没找到,应该是做了调整,这里只有xIntraCodingTUBlock,跳转到它的定义:
Void TEncSearch::xIntraCodingTUBlock( TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE], const Bool checkCrossCPrediction, Distortion& ruiDist, const ComponentID compID, TComTU& rTu DEBUG_STRING_FN_DECLARE(sDebug) ,Int default0Save1Load2 )//... //===== init availability pattern ===== #ifndef DEBUG_STRING if( default0Save1Load2 != 2 )#endif { const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(compID, uiChFinalMode, uiWidth, uiHeight, chFmt, pcCU->getSlice()->getSPS()->getDisableIntraReferenceSmoothing()); initAdiPatternChType( rTu, bAboveAvail, bLeftAvail, compID, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sDebug) ); //===== get prediction signal ===== predIntraAng( compID, uiChFinalMode, piOrg, uiStride, piPred, uiStride, rTu, bAboveAvail, bLeftAvail, bUseFilteredPredictions );//针对给定模式进行帧内预测,获取预测像素块 // save prediction//... //===== get residual signal =====//... /*//===== transform and quantization ===== 对预测残差进行变换和量化编码*/ //--- init rate estimation arrays for RDOQ --- if( useTransformSkip ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ() ) { m_pcEntropyCoder->estimateBit( m_pcTrQuant->m_pcEstBitsSbac, uiWidth, uiHeight, chType ); } //--- transform and quantization --- TCoeff uiAbsSum = 0; if (bIsLuma) { pcCU ->setTrIdxSubParts ( uiTrDepth, uiAbsPartIdx, uiFullDepth ); } const QpParam cQP(*pcCU, compID);#if RDOQ_CHROMA_LAMBDA m_pcTrQuant->selectLambda (compID);#endif m_pcTrQuant->transformNxN ( rTu, compID, piResi, uiStride, pcCoeff, #if ADAPTIVE_QP_SELECTION pcArlCoeff,#endif uiAbsSum, cQP ); //--- inverse transform ---#ifdef DEBUG_STRING if ( (uiAbsSum > 0) || (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask) )#else if ( uiAbsSum > 0 )#endif { m_pcTrQuant->invTransformNxN ( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sDebug, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) ); } else { Pel* pResi = piResi; memset( pcCoeff, 0, sizeof( TCoeff ) * uiWidth * uiHeight ); for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { memset( pResi, 0, sizeof( Pel ) * uiWidth ); pResi += uiStride; } } //===== reconstruction ===== //===== update distortion =====}
其中,transformNxN函数实现了变换和量化功能,其核心功能在于调用了三个函数:xTransformSkip、xT和xQuant,分别实现skip模式变换、常规残差数据的变换和量化编码。
Void TComTrQuant::transformNxN( TComTU & rTu, const ComponentID compID, Pel * pcResidual, const UInt uiStride, TCoeff * rpcCoeff,#if ADAPTIVE_QP_SELECTION TCoeff * pcArlCoeff,#endif TCoeff & uiAbsSum, const QpParam & cQP ){ const TComRectangle &rect = rTu.getRect(compID); const UInt uiWidth = rect.width; const UInt uiHeight = rect.height; TComDataCU* pcCU = rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiOrgTrDepth = rTu.GetTransformDepthRel(); uiAbsSum=0; RDPCMMode rdpcmMode = RDPCM_OFF; rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode ); if (rdpcmMode == RDPCM_OFF) { uiAbsSum = 0; //transform and quantise if(pcCU->getCUTransquantBypass(uiAbsPartIdx)) { const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1; for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++) { for (UInt x = 0; x<uiWidth; x++, coefficientIndex++) { const Pel currentSample = pcResidual[(y * uiStride) + x]; rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample; uiAbsSum += TCoeff(abs(currentSample)); } } } else {#ifdef DEBUG_TRANSFORM_AND_QUANTISE std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n"; printBlock(pcResidual, uiWidth, uiHeight, uiStride);#endif assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) ); if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) { xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID ); //skip模式变换 } else { xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );//常规残差变换 }
xT函数实现比较简单,主要为调用xTrMxN函数实现变换功能,该函数的实现为:
Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange){ static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD]; const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange; const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT; assert(shift_1st >= 0); assert(shift_2nd >= 0); TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ]; switch (iWidth) { case 4: { if ((iHeight == 4) && useDST) // Check for DCT or DST { fastForwardDst( block, tmp, shift_1st ); } else { partialButterfly4 ( block, tmp, shift_1st, iHeight ); } } break; case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break; case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break; case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break; default: assert(0); exit (1); break; } switch (iHeight) { case 4: { if ((iWidth == 4) && useDST) // Check for DCT or DST { fastForwardDst( tmp, coeff, shift_2nd ); } else { partialButterfly4 ( tmp, coeff, shift_2nd, iWidth ); } } break; case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break; case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break; case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break; default: assert(0); exit (1); break; }}
partialButterfly该函数调用不同预设的蝶形变换函数实现对某个大小残差信号矩阵的变换,以4×4为例:
Void partialButterfly4(TCoeff *src, TCoeff *dst, Int shift, Int line)
{
Int j;
TCoeff E[2],O[2];
TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
for (j=0; j
- HEVC函数入门(8)——变换的实现
- HEVC函数入门(22)——变换&量化
- HEVC函数入门(13)——HEVC中容易混淆的类和结构
- HEVC学习(十) —— 与变换有关的几个主要函数及重要变量
- HEVC学习(十) —— 与变换有关的几个主要函数及重要变量
- HEVC学习(十) —— 与变换有关的几个主要函数及重要变量
- HEVC函数入门(9)——tile相关
- HEVC函数入门(16)——Slice编码
- HEVC函数入门(17)——编码一个CU
- HEVC函数入门(23)——熵编码&CABAC
- HEVC函数入门(24)——比特流
- 【HEVC学习与研究】43、HEVC变换编码的实现
- HEVC函数入门(1)——HM编码器的基本结构
- HEVC函数入门(18)——帧间预测的原理
- HEVC函数入门(20)——预测MV的获取
- HEVC函数入门(6)——帧内预测-详细概念和HM16.3实现(上)
- HEVC函数入门(7)——帧内预测-详细概念和HM16.3实现(下)
- HEVC函数入门(14)——建议先看:整个编码流程以及相关的函数
- hadoop2.7.3分布式集群搭建
- Android之 AudioManager媒体管理器
- iOS 的小功能代码段
- 交易趣谈——如何快速致亏的几个好主意
- 查找子串
- HEVC函数入门(8)——变换的实现
- 针对接口编程的理解
- 安卓studio的一些使用技巧
- 页面布局再也别用select控件了,样式不好控制
- socketpair(双向通信)--高级IO
- 事件冒泡与阻止
- 解决火狐httprequester链接不安全
- ASP.NET Core MVC 和 Visual Studio入门(五) 使用 SQL Server LocalDB
- Linux基础命令整理