x264_me_search_ref函数分析

来源:互联网 发布:网络歌手说唱歌曲大全 编辑:程序博客网 时间:2024/05/01 18:28
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_halfpel_thresh )

{
    const int bw = x264_pixel_size[m->i_pixel].w;
    const int bh = x264_pixel_size[m->i_pixel].h;
    const int i_pixel = m->i_pixel;
    const int stride = m->i_stride[0];
    int i_me_range = h->param.analyse.i_me_range;
    int bmx, bmy, bcost;
    int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
    int omx, omy, pmx, pmy;
    pixel *p_fenc = m->p_fenc[0];
    pixel *p_fref_w = m->p_fref_w;
    ALIGNED_ARRAY_16( pixel, pix,[16*16] );

    int costs[16];

    int mv_x_min = h->mb.mv_min_fpel[0];
    int mv_y_min = h->mb.mv_min_fpel[1];  
    int mv_x_max = h->mb.mv_max_fpel[0]; 
    int mv_y_max = h->mb.mv_max_fpel[1];  //以上是整像素点
    int mv_x_min_qpel = mv_x_min << 2;
    int mv_y_min_qpel = mv_y_min << 2;
    int mv_x_max_qpel = mv_x_max << 2;
    int mv_y_max_qpel = mv_y_max << 2; //左移两位,变成四分之一像素点
/* Special version of pack to allow shortcuts in CHECK_MVRANGE */
//使mx在32位中的高16位,my在低16位
#define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
    uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
    uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
  //如果mx,my超过了边界mv_min或者mv_max , 在最高位即符号位会为1,取反后变为0,与0x80004000与变为0
#define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))

    const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
    const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];//之前计算的mvp为四分之一像素的

    uint32_t pmv;
    bmx = x264_clip3( m->mvp[0], mv_x_min_qpel, mv_x_max_qpel );
    bmy = x264_clip3( m->mvp[1], mv_y_min_qpel, mv_y_max_qpel );
    pmx = ( bmx + 2 ) >> 2;//将bmy,bmy 变为整数精度矢量
    pmy = ( bmy + 2 ) >> 2;
    bcost = COST_MAX;

    /* try extra predictors if provided */
    if( h->mb.i_subpel_refine >= 3 )//如果精度为1/4
    {
        pmv = pack16to32_mask(bmx,bmy);
        if( i_mvc ) //计算mx,my的cost代价是否比bpred_cost小,如果小,就bpred_mx=mx ,bpred_my=my
            COST_MV_HPEL( bmx, bmy );//此函数先找到1/4像素的mv对应的位置,然后算satd 
        for( int i = 0; i < i_mvc; i++ )//对每一个mvc 计算代价
        {
            if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
            {
                int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
                int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
                COST_MV_HPEL( mx, my );
            }
        }
        bmx = ( bpred_mx + 2 ) >> 2;//
        bmy = ( bpred_my + 2 ) >> 2;
        COST_MV( bmx, bmy );//计算中心位置(0,0),的cost值
    }
    else
    {
        /* check the MVP */
        bmx = pmx;
        bmy = pmy;
        /* Because we are rounding the predicted motion vector to fullpel, there will be
         * an extra MV cost in 15 out of 16 cases.  However, when the predicted MV is
         * chosen as the best predictor, it is often the case that the subpel search will
         * result in a vector at or next to the predicted motion vector.  Therefore, it is
         * sensible to omit the cost of the MV from the rounded MVP to avoid unfairly
         * biasing against use of the predicted motion vector. */
         //计算bmx,bmy的代价
        bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
        pmv = pack16to32_mask( bmx, bmy );
        if( i_mvc > 0 )
        {
            ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
            x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
            bcost <<= 4;//先左移4位,每次计算cost时低位用于保存i值,最后选择 
            for( int i = 1; i <= i_mvc; i++ )
            {
                if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
                {
                    int mx = mvc_fpel[i-1][0];
                    int my = mvc_fpel[i-1][1];
                    int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
                    cost = (cost << 4) + i;
                    COPY1_IF_LT( bcost, cost );
                }
            }
            if( bcost&15 )//取低4位即选择的i_mvc ,跟新变量bmx ,bmy
            {
                bmx = mvc_fpel[(bcost&15)-1][0];
                bmy = mvc_fpel[(bcost&15)-1][1];
            }
            bcost >>= 4;//变回bcost真正的值
        }
    }

    if( pmv )
        COST_MV( 0, 0 );//计算中心cost

    switch( h->mb.i_me_method )
    {
        case X264_ME_DIA://钻石搜索算法
        {
            /* diamond search, radius 1 */
            bcost <<= 4;
            int i = i_me_range;
            do
            {
                COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );//计算中心的周围4个点cost
                COPY1_IF_LT( bcost, (costs[0]<<4)+1 );
                COPY1_IF_LT( bcost, (costs[1]<<4)+3 );
                COPY1_IF_LT( bcost, (costs[2]<<4)+4 );
                COPY1_IF_LT( bcost, (costs[3]<<4)+12 );
                if( !(bcost&15) )//如果中心点cost最小,提前结束
                    break;
                bmx -= (bcost<<28)>>30;//将bcost值移出,将上面计算中心周围4个点的编号(1,3,4,12)移到高位,再
                //右移30位,得到正确的选择的周围4个点的坐标偏移
                bmy -= (bcost<<30)>>30;
                bcost &= ~15;//将低4位数据消除
            } while( --i && CHECK_MVRANGE(bmx, bmy) );
            bcost >>= 4;
            break;
        }

        case X264_ME_HEX:
        {
    me_hex2: //六边形搜索算法
            /* hexagon search, radius 2 */
    #if 0
            for( int i = 0; i < i_me_range/2; i++ )
            {
                omx = bmx; omy = bmy;
                COST_MV( omx-2, omy   );
                COST_MV( omx-1, omy+2 );
                COST_MV( omx+1, omy+2 );
                COST_MV( omx+2, omy   );
                COST_MV( omx+1, omy-2 );
                COST_MV( omx-1, omy-2 );
                if( bmx == omx && bmy == omy )
                    break;
                if( !CHECK_MVRANGE(bmx, bmy) )
                    break;
            }
    #else
            /* equivalent to the above, but eliminates duplicate candidates */

            /* hexagon */
            COST_MV_X3_DIR( -2,0, -1, 2,  1, 2, costs   );//计算左边和上边3个点cost
            COST_MV_X3_DIR(  2,0,  1,-2, -1,-2, costs+3 );//计算右边和下边3个点cost
            bcost <<= 3;
            COPY1_IF_LT( bcost, (costs[0]<<3)+2 );//判断选择cost最小的
            COPY1_IF_LT( bcost, (costs[1]<<3)+3 );
            COPY1_IF_LT( bcost, (costs[2]<<3)+4 );
            COPY1_IF_LT( bcost, (costs[3]<<3)+5 );
            COPY1_IF_LT( bcost, (costs[4]<<3)+6 );
            COPY1_IF_LT( bcost, (costs[5]<<3)+7 );

            if( bcost&7 )//如果6个点有最小的cost值
            {
                int dir = (bcost&7)-2;//dir为6个点的位置序号,0为左边,1为左上点,依次下去
                //减2 是因为上面计算时给点计数时从2开始编号,
                bmx += hex2[dir+1][0];//加1时因为hex2 中数组元素第一个不是编号1点
                bmy += hex2[dir+1][1];

                /* half hexagon, not overlapping the previous iteration */
                for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )
                {
                    COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],
                                    hex2[dir+1][0], hex2[dir+1][1],
                                    hex2[dir+2][0], hex2[dir+2][1],
                                    costs );//循环左6边形搜索,但只需搜3个点,另3个点已计算过,而这
                                  //3个点是从dir前一个位置,所以hex2数组中为加1了
                    bcost &= ~7;//将低3位数据消除
                    COPY1_IF_LT( bcost, (costs[0]<<3)+1 );
                    COPY1_IF_LT( bcost, (costs[1]<<3)+2 );
                    COPY1_IF_LT( bcost, (costs[2]<<3)+3 );
                    if( !(bcost&7) )//如果中心点cost值最小,则循环break
                        break;
                    dir += (bcost&7)-2;
                    dir = mod6m1[dir+1];
                    bmx += hex2[dir+1][0];//对bmx ,bmy 加偏移
                    bmy += hex2[dir+1][1];
                }
            }
            bcost >>= 3;

         break;



    /* -> qpel mv */  
    if( bpred_cost < bcost )   //如果前frame作为reference时代价写则运动矢量取前frame
    {
        m->mv[0] = bpred_mx;
        m->mv[1] = bpred_my;
        m->cost = bpred_cost;
    }
    else
    {
        m->mv[0] = bmx << 2;   //如果不是,则运动矢量为运动搜索后的mv
        m->mv[1] = bmy << 2;
        m->cost = bcost;
    }


    /* compute the real cost */
    m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
    if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 )  //计算最后的代价函数
        m->cost += m->cost_mv;


    /* subpel refine */
 
    if( h->mb.i_subpel_refine >= 2 )     //二分之一像素搜索和四分之一像素搜索
    {
        int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
        int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
        refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
    }
  
}

0 0
原创粉丝点击