Nasm Intro - Understand nasm by OpenH264 WelsCPUId

来源:互联网 发布:ubuntu 打开anaconda 编辑:程序博客网 时间:2024/04/30 13:43



;****************************************************************************************************
;   void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
;****************************************************************************************************
%ifdef       WIN64

WELS_EXTERN WelsCPUId
    push     rbx
    push     rdx

    mov      eax,     ecx
    mov      ecx,     [r9]
    cpuid
    mov      [r9],    ecx
    mov      [r8],    ebx
    mov      rcx,    [rsp + 2*8 + 40]
    mov      [rcx],   edx
    pop      rdx
    mov      [rdx],   eax

    pop      rbx
    ret

%elifdef     UNIX64
WELS_EXTERN WelsCPUId
    push     rbx
    push     rcx
    push     rdx

    mov      eax,     edi
    mov      ecx,     [rcx]
    cpuid
    mov      [r8],    edx
    pop      rdx
    pop      r8
    mov      [r8],   ecx
    mov      [rdx],   ebx
    mov      [rsi],   eax

    pop      rbx
    ret

%elifdef     X86_32

WELS_EXTERN WelsCPUId
    push    ebx
    push    edi

    mov     eax, [esp+12]   ; operating index
    mov     edi, [esp+24]
    mov     ecx, [edi]
    cpuid                   ; cpuid

    ; processing various information return
    mov     edi, [esp+16]
    mov     [edi], eax
    mov     edi, [esp+20]
    mov     [edi], ebx
    mov     edi, [esp+24]
    mov     [edi], ecx
    mov     edi, [esp+28]
    mov     [edi], edx

    pop     edi
    pop     ebx
    ret

%endif



Notes:
1.   WELS_EXTERN
%macro WELS_EXTERN 1
    ALIGN 16
    %ifdef PREFIX
        global _%1
        %define %1 _%1
    %else
        global %1
    %endif
    %1:
%endmacro

WELS_EXTERN macro defined in asm_inc.asm,
Here it will expand function definition with prefix
global_ or global, this will cause one assembly function
defined, and may be referenced by others.

2. Call Conventions

When writing code for 64-bit Linux that integrates with a C library, you must follow the calling conventions explained in theAMD64 ABI Reference. You can also get this information fromWikipedia. The most important points are:

  • From left to right, pass as many parameters as will fit in registers. The order in which registers are allocated, are:
    • For integers and pointers, rdi, rsi, rdx,rcx, r8, r9.
    • For floating-point (float, double), xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6,xmm7.
  • Additional parameters are pushed on the stack, right to left, and are to be removed by the caller after the call.
  • After the parameters are pushed, the call instruction is made, so when the called function gets control, the return address is at[rsp], the first memory parameter is at [rsp+8], etc.
  • The stack pointer rsp must be aligned to a 16-byte boundary before making a call. Fine, but the process of making a call pushes the return address (8 bytes) on the stack, so when a function gets control,rsp is not aligned. You have to make that extra space yourself, by pushing something or subtracting 8 fromrsp.
  • The only registers that the called function is required to preserve (the calle-save registers) are:rbp, rbx, r12, r13, r14,r15. All others are free to be changed by the called function.
  • The callee is also supposed to save the control bits of the XMCSR and the x87 control word, but x87 instructions are rare in 64-bit code so you probably don't have to worry about this.
  • Integers are returned in rax or rdx:rax, and floating point values are returned inxmm0 or xmm1:xmm0.

3.   register define in asm_inc.asm of openh264

%ifdef WIN64 ; Windows x64 ;************************************

DEFAULT REL

BITS 64

%define arg1 rcx
%define arg2 rdx
%define arg3 r8
%define arg4 r9
%define arg5 [rsp + push_num*8 + 40]
%define arg6 [rsp + push_num*8 + 48]
%define arg7 [rsp + push_num*8 + 56]
%define arg8 [rsp + push_num*8 + 64]
%define arg9 [rsp + push_num*8 + 72]
%define arg10 [rsp + push_num*8 + 80]
%define arg11 [rsp + push_num*8 + 88]
%define arg12 [rsp + push_num*8 + 96]

%define r0 rcx
%define r1 rdx
%define r2 r8
%define r3 r9
%define r4 rax
%define r5 r10
%define r6 r11
%define r7 rsp

%define r0d ecx
%define r1d edx
%define r2d r8d
%define r3d r9d
%define r4d eax
%define r5d r10d
%define r6d r11d

%define r0w  cx
%define r1w  dx
%define r2w  r8w
%define r3w  r9w
%define r6w  r11w

%define r0b  cl
%define r1b  dl
%define r2b  r8l
%define r3b  r9l

%define  PUSHRFLAGS     pushfq
%define  POPRFLAGS      popfq
%define  retrq          rax
%define  retrd          eax

%elifdef UNIX64 ; Unix x64 ;************************************

DEFAULT REL

BITS 64

%ifidn __OUTPUT_FORMAT__,elf64
SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable
%endif

%define arg1 rdi
%define arg2 rsi
%define arg3 rdx
%define arg4 rcx
%define arg5 r8
%define arg6 r9
%define arg7 [rsp + push_num*8 + 8]
%define arg8 [rsp + push_num*8 + 16]
%define arg9 [rsp + push_num*8 + 24]
%define arg10 [rsp + push_num*8 + 32]
%define arg11 [rsp + push_num*8 + 40]
%define arg12 [rsp + push_num*8 + 48]

%define r0 rdi
%define r1 rsi
%define r2 rdx
%define r3 rcx
%define r4 r8
%define r5 r9
%define r6 r10
%define r7 rsp

%define r0d edi
%define r1d esi
%define r2d edx
%define r3d ecx
%define r4d r8d
%define r5d r9d
%define r6d r10d

%define r0w  di
%define r1w  si
%define r2w  dx
%define r3w  cx
%define r6w  r10w

%define r0b  dil
%define r1b  sil
%define r2b  dl
%define r3b  cl

%define  PUSHRFLAGS     pushfq
%define  POPRFLAGS      popfq
%define  retrq          rax
%define  retrd          eax

%elifdef X86_32 ; X86_32 ;************************************

BITS 32

%ifidn __OUTPUT_FORMAT__,elf
SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-executable
%endif

%define arg1 [esp + push_num*4 + 4]
%define arg2 [esp + push_num*4 + 8]
%define arg3 [esp + push_num*4 + 12]
%define arg4 [esp + push_num*4 + 16]
%define arg5 [esp + push_num*4 + 20]
%define arg6 [esp + push_num*4 + 24]
%define arg7 [esp + push_num*4 + 28]
%define arg8 [esp + push_num*4 + 32]
%define arg9 [esp + push_num*4 + 36]
%define arg10 [esp + push_num*4 + 40]
%define arg11 [esp + push_num*4 + 44]
%define arg12 [esp + push_num*4 + 48]

%define r0 eax
%define r1 ecx
%define r2 edx
%define r3 ebx
%define r4 esi
%define r5 edi
%define r6 ebp
%define r7 esp

%define r0d eax
%define r1d ecx
%define r2d edx
%define r3d ebx
%define r4d esi
%define r5d edi
%define r6d ebp

%define r0w ax
%define r1w cx
%define r2w dx
%define r3w bx
%define r6w bp

%define r0b al
%define r1b cl
%define r2b dl
%define r3b bl

%define  PUSHRFLAGS     pushfd
%define  POPRFLAGS      popfd
%define  retrq          eax      ; 32 bit mode do not support 64 bits regesters
%define  retrd          eax

%endif




0 0