[MMX指令版]黑白棋行动力的计算--指令拆分整理版(方便分析研究)

来源:互联网 发布:手机淘宝怎么复制网址 编辑:程序博客网 时间:2024/05/22 03:43

这个是去掉MMX指令配对和汇编指令配对,经过整理后的代码,阅读上应该更加直观一些,对于你理解其bitboard原理应该有很大的帮助,关于MMX指令的相关资料可以到google.com搜索一下,make_bitboard()函数是我的程序里的代码,由于篇幅不能列出所有代码,但我想聪明的你应该可以知道代码的大概意思。

有趣的是,经过整理,我发现有一句汇编指令是多余的,不知道是为了要配对还是什么其他原因。(不过是很久以前发现的,现在不记得了,你可以再次证实一下,在代码的末尾)

聪明的你也应该知道,这样不完全配对的指令,运行效率是很低的。

注:字体稍微增大了一号,CSDN这个编辑器好难用哦,不过好歹看起来已经不错了。

define   THITHER_COLOR(color)  ((~color)&0x03)

typedef struct
{
    UINT8 board[BOARD_ROWS+2][BOARD_COLS+2];
}   board_type;

static unsigned __int64 dir_mask0;
static unsigned __int64 dir_mask1;
static unsigned __int64 dir_mask2;
static unsigned __int64 dir_mask3;
static unsigned __int64 dir_mask4;
static unsigned __int64 dir_mask5;
static unsigned __int64 dir_mask6;
static unsigned __int64 dir_mask7;
static unsigned __int64 c0f;
static unsigned __int64 c33;
static unsigned __int64 c55;

void init_mmx( void )
{
    dir_mask0 = 0x007e7e7e7e7e7e00;
    dir_mask1 = 0x00ffffffffffff00;
    dir_mask2 = 0x007e7e7e7e7e7e00;
    dir_mask3 = 0x7e7e7e7e7e7e7e7e;
    dir_mask4 = 0x7e7e7e7e7e7e7e7e;
    dir_mask5 = 0x007e7e7e7e7e7e00;
    dir_mask6 = 0x00ffffffffffff00;
    dir_mask7 = 0x007e7e7e7e7e7e00;
    c0f = 0x0f0f0f0f0f0f0f0f;
    c33 = 0x3333333333333333;
    c55 = 0x5555555555555555;
}

void make_bitboard( board_type *board_ptr,
                   BitBoard &my_bits,
                   BitBoard &opp_bits,
                   UINT8 objcolor )
{
    UINT8 curcolor;
    UINT8 thithercolor = THITHER_COLOR(objcolor);
/*
    my_bits.high  = 0;
    my_bits.low   = 0;
    opp_bits.high = 0;
    opp_bits.low  = 0;
*/
    unsigned __int64 power = 0x0000000000000001;
    unsigned __int64 my_bits64  = 0x0000000000000000;
    unsigned __int64 opp_bits64 = 0x0000000000000000;

    for(int i=BOARD_ROWS; i>=1; i--)
    {
        for(int j=BOARD_COLS; j>=1; j--)
        {
            curcolor = board_ptr->board[i][j];
            if(curcolor == objcolor)
                my_bits64 |= power;
            else if(curcolor == thithercolor)
                opp_bits64 |= power;
            power <<= 1;
        }
    }

    my_bits.high  = (unsigned long)(my_bits64 >> 32);
    my_bits.low   = (unsigned long)(my_bits64 & 0xffffffff);
    opp_bits.high = (unsigned long)(opp_bits64 >> 32);
    opp_bits.low  = (unsigned long)(opp_bits64 & 0xffffffff);
}

int bitboard_mobility( const BitBoard my_bits, const BitBoard opp_bits )
{
    unsigned int count;
    //unsigned int count = 0;

    __asm {
        //push  eax                     ;
        /*
        push    ecx                     ;
        push    edx                     ;
        push    ebx                     ;
        push    esi                     ;
        push    edi                     ;
        //*/
        /* Ready for init data */
        //mov       eax, 0
        mov     ebx, my_bits.high       ;
        mov     ecx, my_bits.low        ;
        mov     edi, opp_bits.high      ;
        mov     esi, opp_bits.low       ;

        //
        movd    mm0, ebx                ;
        psllq   mm0, 32                 ;
        movd    mm3, ecx                ;
        por     mm0, mm3                ; mm0 is BitBoard of my_bits
        movd    mm1, edi                ;
        psllq   mm1, 32                 ;
        movd    mm4, esi                ;
        por     mm1, mm4                ; mm1 is BitBoard of opp_bits
        pxor    mm2, mm2                ; mm2 <- 0x0000000000000000


        /* shift=-9   rowDelta=-1   colDelta=-1 */
        /* shift=+9   rowDelta=+1   colDelta=+1 */

        /* Disc #1, flip direction 0. */
        /* Disc #1, flip direction 7. */
        movq    mm3, mm1                ; mm3 <- opp_bits
        movq    mm4, mm0                ; mm4 <- my_bits
        movq    mm6, mm0                ; mm6 <- backup of my_bits
        pand    mm3, dir_mask0          ; 0x007e7e7e7e7e7e00
                                        ; dir_mask0 of value: 
                                        ;   00000000
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   00000000
        psllq   mm4, 9                  ;
        psrlq   mm6, 9                  ;
        pand    mm4, mm3                ;
        pand    mm6, mm3                ;

        /* Disc #2, flip direction 0. */
        /* Disc #2, flip direction 7. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 9                  ;
        psrlq   mm7, 9                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #3, flip direction 0. */
        /* Disc #3, flip direction 7. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 9                  ;
        psrlq   mm7, 9                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #4, flip direction 0. */
        /* Disc #4, flip direction 7. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 9                  ;
        psrlq   mm7, 9                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #5, flip direction 0. */
        /* Disc #5, flip direction 7. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 9                  ;
        psrlq   mm7, 9                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #6, flip direction 0. */
        /* Disc #6, flip direction 7. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psrlq   mm7, 9                  ;
        psllq   mm5, 9                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;
        psllq   mm4, 9                  ;
        psrlq   mm6, 9                  ;
        por     mm2, mm4                ;
        por     mm2, mm6                ;


        /* *************************** */
        push    esi                     ;
        push    edi                     ;
        push    ecx                     ;
        push    ebx                     ;

        and     edi, 0x7e7e7e7e         ; 0x7e7e7e7e
        and     esi, 0x7e7e7e7e         ; 0x7e7e7e7e
                                        ; value of:
                                        ; 011111110
                                        ; 011111110
                                        ; 011111110
                                        ; 011111110
        shl     ebx, 1                  ;
        shl     ecx, 1                  ;
        and     ebx, edi                ;
        and     ecx, esi                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shl     edx, 1                  ;
        shl     eax, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shl     edx, 1                  ;
        shl     eax, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shl     edx, 1                  ;
        shl     eax, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shl     edx, 1                  ;
        shl     eax, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shl     edx, 1                  ;
        shl     eax, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        shl     ebx, 1                  ;
        shl     ecx, 1                  ;
        /* *************************** */


        /* shift=-8   rowDelta=-1   colDelta=0 */
        /* shift=+8   rowDelta=1   colDelta=0 */

        /* Disc #1, flip direction 1. */
        /* Disc #1, flip direction 6. */
        movq    mm3, mm1                ;
        movq    mm4, mm0                ;
        movq    mm6, mm0                ;
        pand    mm3, dir_mask1          ; 0x00ffffffffffff00;
                                        ; dir_mask1 of value:
                                        ;   00000000
                                        ;   11111111
                                        ;   11111111
                                        ;   11111111
                                        ;   11111111
                                        ;   11111111
                                        ;   11111111
                                        ;   00000000
        psllq   mm4, 8                  ;
        psrlq   mm6, 8                  ;

        pand    mm4, mm3                ;
        pand    mm6, mm3                ;

        /* Disc #2, flip direction 1. */
        /* Disc #2, flip direction 6. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 8                  ;
        psrlq   mm7, 8                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* serialize here: add horizontal shl flips. */

        movd    mm5, ebx                ;
        psllq   mm5, 32                 ;
        movd    mm7, ecx                ;
        por     mm5, mm7                ;
        por     mm2, mm5                ;

        /* Disc #3, flip direction 1. */
        /* Disc #3, flip direction 6. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 8                  ;
        psrlq   mm7, 8                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #4, flip direction 1. */
        /* Disc #4, flip direction 6. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 8                  ;
        psrlq   mm7, 8                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #5, flip direction 1. */
        /* Disc #5, flip direction 6. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 8                  ;
        psrlq   mm7, 8                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #6, flip direction 1. */
        /* Disc #6, flip direction 6. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 8                  ;
        psrlq   mm7, 8                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;
        psllq   mm4, 8                  ;
        psrlq   mm6, 8                  ;
        por     mm2, mm4                ;
        por     mm2, mm6                ;


        /* *************************** */
        pop     ebx                     ;
        pop     ecx                     ;
        push    ecx                     ;
        push    ebx                     ;

        shr     ebx, 1                  ;
        shr     ecx, 1                  ;
        and     ebx, edi                ; edi = 0x7e7e7e7e
        and     ecx, esi                ; esi = 0x7e7e7e7e
                                        ; value of:
                                        ; 011111110
                                        ; 011111110
                                        ; 011111110
                                        ; 011111110
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shr     eax, 1                  ;
        shr     edx, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shr     eax, 1                  ;
        shr     edx, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shr     eax, 1                  ;
        shr     edx, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shr     eax, 1                  ;
        shr     edx, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shr     eax, 1                  ;
        shr     edx, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        mov     eax, ebx                ;
        mov     edx, ecx                ;
        shr     eax, 1                  ;
        shr     edx, 1                  ;
        and     eax, edi                ;
        and     edx, esi                ;
        or      ebx, eax                ;
        or      ecx, edx                ;
        shr     ebx, 1                  ;
        shr     ecx, 1                  ;
        /* *************************** */


        /* shift=-7   rowDelta=-1   colDelta=1 */
        /* shift=+7   rowDelta=1   colDelta=-1 */

        /* Disc #1, flip direction 2. */
        /* Disc #1, flip direction 5. */
        movq    mm3, mm1                ;
        movq    mm4, mm0                ;
        movq    mm6, mm0                ;
        pand    mm3, dir_mask2          ; 0x007e7e7e7e7e7e00;
                                        ; dir_mask2 of value:
                                        ;   00000000
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   01111110
                                        ;   00000000
        psllq   mm4, 7                  ;
        psrlq   mm6, 7                  ;
        pand    mm4, mm3                ;
        pand    mm6, mm3                ;

        /* Disc #2, flip direction 2. */
        /* Disc #2, flip direction 5. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 7                  ;
        psrlq   mm7, 7                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #3, flip direction 2. */
        /* Disc #3, flip direction 5. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 7                  ;
        psrlq   mm7, 7                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #4, flip direction 2. */
        /* Disc #4, flip direction 5. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 7                  ;
        psrlq   mm7, 7                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* Disc #5, flip direction 2. */
        /* Disc #5, flip direction 5. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 7                  ;
        psrlq   mm7, 7                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;

        /* serialize here: add horizontal shr flips. */

        movd    mm5, ebx                ;
        psllq   mm5, 32                 ;
        movd    mm7, ecx                ;
        por     mm5, mm7                ;
        por     mm2, mm5                ;

        pop     ebx                     ;
        pop     ecx                     ;
        pop     edi                     ;
        pop     esi                     ;

        /* Disc #6, flip direction 2. */
        /* Disc #6, flip direction 5. */
        movq    mm5, mm4                ;
        movq    mm7, mm6                ;
        psllq   mm5, 7                  ;
        psrlq   mm7, 7                  ;
        pand    mm5, mm3                ;
        pand    mm7, mm3                ;
        por     mm4, mm5                ;
        por     mm6, mm7                ;
        psllq   mm4, 7                  ;
        psrlq   mm6, 7                  ;
        por     mm2, mm4                ;
        por     mm2, mm6                ;

        /* mm2 is the pseudo-feasible moves at this point. */
        /* Let mm7 be the feasible moves, i.e., mm2 restricted to empty squares. */

        movq    mm7, mm0                ;
        por     mm7, mm1                ;
        pandn   mm7, mm2                ;

        /* Count the moves, i.e., the number of bits set in mm7. */

        movq    mm1, mm7                ;
        psrld   mm7, 1                  ;
        pand    mm7, c55                ; c55 = 0x5555555555555555
        psubd   mm1, mm7                ;
        movq    mm7, mm1                ;
        psrld   mm1, 2                  ;
        pand    mm7, c33                ; c33 = 0x3333333333333333;
        pand    mm1, c33                ; c33 = 0x3333333333333333;
        paddd   mm7, mm1                ;
        movq    mm1, mm7                ;
        psrld   mm7, 4                  ;
        paddd   mm7, mm1                ;
        pand    mm7, c0f                ; c0f = 0x0f0f0f0f0f0f0f0f;
        movq    mm1, mm7                ;
        psrld   mm7, 8                  ;
        paddd   mm7, mm1                ;
        movq    mm1, mm7                ;
        psrld   mm7, 16                 ;
        paddd   mm7, mm1                ;
        movq    mm1, mm7                ;
        psrlq   mm7, 32                 ;
        paddd   mm7, mm1                ;
        movd    eax, mm7                ;
        and     eax, 63                 ;
        mov     count, eax              ;
        //
        //
        emms                            ;

        /*
        pop     edi                     ;
        pop     esi                     ;
        pop     ebx                     ;
        pop     edx                     ;
        pop     ecx                     ;
        //*/
        //pop       eax                     ;
    }
    return count;
}


附:

MMX的数据结构

多媒体软件具有如下显著的特点:
1、 小整型数据类型(图形数据为8位 ,声频数据为16位)
2、 对小整型数据的频繁且重复的计算操作(例如被频繁的调用的核心算法);
3、 许多操作具有内存的并行性(例如对大量的数据进行同一个加,减或乘法运算操作);

MMX技术设计了一套基本的,通用的紧缩整形指令,共57条。

所谓“紧缩整形数据”是指多个8/16/32位的整形数据组合成为一个64位的数据.MMX指令主要就是使用
这种紧缩整形数据,它又分成4种整形类型:紧缩字节、紧缩字、紧缩双字、紧缩4字

。紧缩字节(Packed Byte): 8个字节组合成一个64位的数据;
。紧缩字 (Packed Word): 4个字组合成一个64位的数据;
。紧缩双字(Packed Doubleword): 2个双字组合成一个64位的数据;
。紧缩4字 (Packed Quadword):一个64位数据

这样一条MMX指令就能够同时处理8/4/2个数据单元,这就是所谓的“单指令多数据SIMD”结构。这种结构
是MMX技术把机器性能提高的最根本因素。

为了方便使用64位紧缩整形数据,MMX技术含有8个64位的MMX寄存器(MM0-----MM7),只有MMX指令可以使用MMX寄存器。


值得一提的是,MMX寄存器是随机存取的,但实际上是借用了8个浮点数据寄存器实现的。浮点处理单元FPU有8个浮点寄存器FPR,以堆栈方式存取。每个浮点数据寄存器有80位,高16位用于指数和符号,低64位用于有效数字。MMX利用其64位有效数字部分用做随机存取的64位的MMX寄存器。


MMX指令集

1、算术运算:
PADD[B、W、D] 环绕加[字节,字,双字]
PADDS[B , W] 有符号饱和加[字节,字]
PADDUS[B , W] 无符号饱和加[字节,字]
PSUB[B、W、D] 环绕减[字节,字,双字]
PSUBS[B,W] 有符号饱和减[字节,字]
PSUBUS[D,W] 无符号饱和减【字节,字】
PMULHW 紧缩字乘后取高位
PMULLW 紧缩字乘后取低位
PMADDWD 紧缩字乘,积相加

2、比较:
PCMPEQ[B,W,D] 紧缩比较是否相等【字节,字,双字】
PCMPGT[B,W,D] 紧缩比较是否大于【字节,字,双字】

3、类型转换:
PACKUSWB 按无符号饱和压缩【字成字节】
PACKSS[WB,DW] 按有符号饱和压缩【字/双字成/字节/字】
PUNPCKH[BW,WD,DQ] 扩展高位【字节,字,双字成字,双字,4字】
PUNPCKL[BW,WD,DQ] 扩展地位【字节,字,双字成字,双字,4字】

4、逻辑运算:
PAND 紧缩逻辑与
PANDN 紧缩逻辑与非
POR 紧缩逻辑或
PXOR 紧缩逻辑异或

5、位移:
PSLL[W,D,Q] 紧缩逻辑左移[字,双字,4字]
PSRL[W,D,Q] 紧缩逻辑右移[字,双字,4字]
PSRA[W,D] 紧缩算术右移【字,双字】

7、数据传送:
MOV[D,Q] 从MMX寄存器传人/传出【双字/4字】

8、状态清除
EMMS 清除MMX状态
原创粉丝点击