C++函数调用原理

来源：互联网发布：我太孤独知乎编辑：程序博客网时间：2024/06/05 23:44

首先看一个很简单的例子（此例子的更简单原型（一个add函数）来自于《加密与解密》一书第三版第4章的第2节），这里列举出了各种函数调用约定的ADD函数（其实还有PASCAL调用约定，但是作为C++程序员我忽略了那种，所以我提到的几种函数调用约定都是参数反向入栈的，此点再下面不再提起）这里的编译器选择了VC6，不是我喜欢仿古。。。但是，因为VS2005（我平时用的）的优化太过于激进。。。。不知道此词妥否。。。。起码一般的小测试程序几乎不能反汇编得到什么信息，一般就是直接在编译器完成了很多操作了。这个我在以前也提到过，

比如在《Inside C++ Object 》阅读笔记(1)， NRV（Named Return Value）一文中：

http://blog.csdn.net/vagrxie/archive/2008/12/09/3486211.aspx

示例源代码1：

1 int __cdecl Add1(int x,int y) // default
2 {
3         return(x+y);
4 }
5
6 int __stdcall Add2(int x,int y)
7 {
8         return(x+y);
9 }
10
11 int __fastcall Add3(int x,int y)
12 {
13         return(x+y);
14 }
15
16 int inline Add4(int x,int y)
17 {
18         return(x+y);
19 }
20
21 int main( )
22 {
23         int a=5,b=6;
24         Add1(a,b);
25         Add2(a,b);
26         Add3(a,b);
27         Add4(a,b);
28         return 0;
29  }

Release编译后，反汇编代码及其注释如下：

.text:00401030 ; main函数

.text:00401030

.text:00401030 ; int __cdecl main(int argc, const char **argv, const char *envp)

.text:00401030 _main proc near ; CODE XREF: _mainCRTStartup+AFp

.text:00401030 push 6

.text:00401032 push 5

.text:00401034 call Add1 ; 将参数压入栈中，并调用Add1（__cdecl调用约定函数，

.text:00401034 ; 即C语言的调用规范，调用者负责栈的维护）

.text:00401039 add esp, 8 ; 此处由调用者维护调用了Add1后的栈，

.text:00401039 ; esp加8是因为两个参数

.text:0040103C push 6

.text:0040103E push 5

.text:00401040 call Add2 ; 参数入栈，并调用Add2（__stdcall调用规范，windows

.text:00401040 ; API的默认调用规范，由被调用者负责维护栈）所以

.text:00401040 ; 此函数调用完后，main函数中不需要有维护栈的操作

.text:00401045 mov edx, 6

.text:0040104A mov ecx, 5

.text:0040104F call Add3 ; 将参数赋值给寄存器edx,ecx，调用Add3(Fastcall调用约定，

.text:0040104F ; 函数尽量通过寄存器传递，也是由被调用者自己维护栈）

.text:00401054 xor eax, eax ; 此处清空eax作为main函数的返回值返回了，注意到并没有

.text:00401054 ; Add4（inline函数）的调用，并且因为返回值并没有用，

.text:00401054 ; 所以此函数即使在VC6中，也忽略了。

.text:00401056 retn

.text:00401056 _main endp

例2，稍微复杂一点

源码：

1 #include
2
3 int __cdecl Add1(int x,int y) // default
4 {
5     int z = 1;
6     return(x+y+z);
7 }
8
9 int __stdcall Add2(int x,int y)
10 {
11     int z = 1;
12     return(x+y+z);
13 }
14
15 int __fastcall Add3(int x,int y)
16 {
17     int z = 1;
18     return(x+y+z);
19 }
20
21 int inline Add4(int x,int y)
22 {
23     int z = 1;
24     return(x+y+z);
25 }
26
27 int main( )
28 {
29     int a=5,b=6;
30     int c = 0;
31
32     c += Add1(a,b);
33     c += Add2(a,b);
34     c += Add3(a,b);
35     c += Add4(a,b);
36     printf("%d",c);
37     return 0;
38  }
39

比前面的例子多了一个变量c来累加返回值并输出，每个函数中再多了一个局部变量。

Release编译后，反汇编代码及其注释如下：

.text:00401030 ; main函数

.text:00401030

.text:00401030 ; int __cdecl main(int argc, const char **argv, const char *envp)

.text:00401030 _main proc near ; CODE XREF: _mainCRTStartup+AFp

.text:00401030

.text:00401030 argc = dword ptr 4

.text:00401030 argv = dword ptr 8

.text:00401030 envp = dword ptr 0Ch

.text:00401030

.text:00401030 push esi ; 以下可以看到，esi后来一直用作局部变量c，

.text:00401030 ; 所以此处先保存以前的值

.text:00401031 push 6

.text:00401033 push 5

.text:00401035 call Add1

.text:0040103A add esp, 8

.text:0040103D mov esi, eax ; 默认约定eax是返回值，无论哪种调用约定都是一样的，

.text:0040103D ; 并且因为C/C++函数肯定只能由一个返回值，所以确定

.text:0040103D ; 是eax这一个寄存器也没有关系

.text:0040103F push 6

.text:00401041 push 5

.text:00401043 call Add2

.text:00401048 mov edx, 6

.text:0040104D mov ecx, 5

.text:00401052 add esi, eax

.text:00401054 call Add3

.text:00401059 lea eax, [esi+eax+0Ch] ; 内联的作用，Add4还是没有函数调用，并且用一个lea指令

.text:00401059 ; 实现了c+Add3（）+5+6的操作，其中5+6的值在编译器确定

.text:0040105D push eax

.text:0040105E push offset aD ; "%d"

.text:00401063 call _printf

.text:00401068 add esp, 8 ; 可见C语言库函数的调用遵循的是__cdecl约定，所以此处

.text:00401068 ; 由main函数维护栈

.text:0040106B xor eax, eax

.text:0040106D pop esi

.text:0040106E retn

.text:0040106E _main endp

与前一个例子重复的内容我注释也就不重复了。

一下具体看看各个Add函数的内容

.text:00401000 Add1 proc near ; CODE XREF: _main+5p

.text:00401000

.text:00401000 arg_0 = dword ptr 4

.text:00401000 arg_4 = dword ptr 8

.text:00401000

.text:00401000 mov eax, [esp+arg_4] ; 因为函数是如此的简单，所以此处并没有将ebp入栈，也

.text:00401000 ; 并没有通过堆栈为z局部变量开辟空间，而是直接用esp

.text:00401000 ; 取参数，用lea指令来完成+1，以下几个函数相同

.text:00401004 mov ecx, [esp+arg_0]

.text:00401008 lea eax, [ecx+eax+1]

.text:0040100C retn ; 这里可以看到Add1函数并没有在内部维护栈，原因也说了

.text:0040100C Add1 endp ; __cdecl调用约定是由调用者来维护栈的

.text:0040100C

.text:0040100C ; ---------------------------------------------------------------------------

.text:0040100D align 10h

.text:00401010

.text:00401010 ; =============== S U B R O U T I N E =======================================

.text:00401010

.text:00401010 Add2 proc near ; CODE XREF: _main+13p

.text:00401010

.text:00401010 arg_0 = dword ptr 4

.text:00401010 arg_4 = dword ptr 8

.text:00401010

.text:00401010 mov eax, [esp+arg_4]

.text:00401014 mov ecx, [esp+arg_0]

.text:00401018 lea eax, [ecx+eax+1]

.text:0040101C retn 8 ; 此处可以看到Add2自己维护了栈，retn 8相当于

.text:0040101C Add2 endp ; add esp 8

.text:0040101C ; retn

.text:0040101C ; ---------------------------------------------------------------------------

.text:0040101F align 10h

.text:00401020

.text:00401020 ; =============== S U B R O U T I N E =======================================

.text:00401020

.text:00401020 Add3 proc near ; CODE XREF: _main+24p

.text:00401020 lea eax, [ecx+edx+1] ; 通过寄存器来传递参数，速度自然快，也不破坏栈，所以

.text:00401020 ; 也不用维护，此处的参数较少，所以可以达到完全不用

.text:00401020 ; 栈操作

.text:00401024 retn

.text:00401024 Add3 endp

.text:00401024

至此，完全没有源码，看到一个函数的调用，大概也知道参数是什么，返回值是什么，栈维护的操作是在干什么了。

这里再看两个复杂点的例子，一个是局部变量多一点的Add5,一个是参数多一点的fastcall调用的函数Add6

1 #include
2
3 int __cdecl Add5(int x,int y) // default
4 {
5     int z1 = 1;
6     int z2 = ++z1;
7     int z3 = ++z2;
8     return(x+y+z1+z2+z3);
9 }
10
11 int __fastcall Add6(int x,int y,int z)
12 {
13     return(x+y+z);
14 }
15
16
17 int main( )
18 {
19     int a=5,b=6;
20     int c = 0;
21
22     c += Add5(a,b);
23     c += Add6(a,b,c);
24
25     printf("%d",c);
26     return 0;
27  }

反汇编：

.text:00401020 ; int __cdecl main(int argc, const char **argv, const char *envp)

.text:00401020 _main proc near ; CODE XREF: _mainCRTStartup+AFp

.text:00401020

.text:00401020 argc = dword ptr 4

.text:00401020 argv = dword ptr 8

.text:00401020 envp = dword ptr 0Ch

.text:00401020

.text:00401020 push esi

.text:00401021 push 6

.text:00401023 push 5

.text:00401025 call Add5

.text:0040102A add esp, 8

.text:0040102D mov esi, eax ; 保存第3个参数（即Add5的返回值）到esi

.text:0040102F mov edx, 6

.text:00401034 mov ecx, 5

.text:00401039 push esi ; 虽然时fastcall,但是edx,ecx不够用的时候，还是使用了栈

.text:0040103A call Add6

.text:0040103F add esi, eax

.text:00401041 push esi

.text:00401042 push offset aD ; "%d"

.text:00401047 call _printf

.text:0040104C add esp, 8

.text:0040104F xor eax, eax

.text:00401051 pop esi

.text:00401052 retn

.text:00401052 _main endp

Add函数：

.text:00401000 ; =============== S U B R O U T I N E =======================================

.text:00401000

.text:00401000 Add5 proc near ; CODE XREF: _main+5p

.text:00401000

.text:00401000 arg_0 = dword ptr 4

.text:00401000 arg_4 = dword ptr 8

.text:00401000

.text:00401000 mov eax, [esp+arg_4]

.text:00401004 mov ecx, [esp+arg_0]

.text:00401008 lea eax, [ecx+eax+8] ; 虽然我尽量做了很多无用的操作，但是连VC6都要把这些

.text:00401008 ; 操作优化掉

.text:0040100C retn

.text:0040100C Add5 endp

.text:0040100C

.text:0040100C ; ---------------------------------------------------------------------------

.text:0040100D align 10h

.text:00401010

.text:00401010 ; =============== S U B R O U T I N E =======================================

.text:00401010

.text:00401010 Add6 proc near ; CODE XREF: _main+1Ap

.text:00401010

.text:00401010 arg_0 = dword ptr 4

.text:00401010

.text:00401010 lea eax, [ecx+edx]

.text:00401013 mov ecx, [esp+arg_0] ; fastcall在VC中只会使用ecx,edx两个寄存器来传递参数，

.text:00401013 ; 当参数超过2个时，还是得通过栈来传递

.text:00401017 add eax, ecx

.text:00401019 retn 4

.text:00401019 Add6 endp

.text:00401019

.text:00401019 ; ---------------------------------------------------------------------------

空程序:

int main()

{

00411360 push ebp ;压入ebp

00411361 mov ebp,esp ;ebp = esp,保留esp,待函数调用完再恢复,因为函数调用中肯定会用到esp.

00411363 sub esp,0C0h ;esp-=0C0h(192);为该函数留出临时存储区

;将其他指针或寄存器中的值入栈，以便在函数中使用这些寄存器。

00411369 push ebx ;压入ebx

0041136A push esi ;压入esi

0041136B push edi ;压入edi

0041136C lea edi,[ebp-0C0h] ;读入[ebp-0C0h]有效地址,即原esp-0C0h,正好是为该函数留出的临时存储区的最低位

00411372 mov ecx,30h ;ecx = 30h(48),30h*4 = 0C0h

00411377 mov eax,0CCCCCCCCh ;eax = 0CCCCCCCCh;

0041137C rep stos dword ptr es:[edi] ;重复在es:[edi]存入30个;0CCCCCCCCh? Debug模式下把Stack上的变量初始化为0xcc，检查未初始化的问题

return 0;

0041137E xor eax,eax ;将eax清零,作为返回值

}

;各指针出栈

00411380 pop edi ;弹出edi

00411381 pop esi ;弹出esi

00411382 pop ebx ;弹出ebx

00411383 mov esp,ebp ;esp复原

00411385 pop ebp ;弹出ebp,也复原

00411386 ret ;返回

函数调用:

int _tmain(int argc, _TCHAR* argv[])

{

同上理解, 保存现场

004113D0 push ebp

004113D1 mov ebp,esp

004113D3 sub esp,0F0h ;一共留了0F0h(240)空间

004113D9 push ebx

004113DA push esi

004113DB push edi

004113DC lea edi,[ebp-0F0h]

004113E2 mov ecx,3Ch ; ecx = 3C(60),3C*4 = 0F0h,

004113E7 mov eax,0CCCCCCCCh

004113EC rep stos dword ptr es:[edi]

同上理解.

int a = 1, b = 2, c = 3;

定义a,b,c并存储在为函数留出的临时存储空间中.

004113EE mov dword ptr [a],1

004113F5 mov dword ptr [b],2

004113FC mov dword ptr [c],3

int d = Fun1(a, b, c);

参数反向入栈

00411403 mov eax,dword ptr [c]

00411406 push eax

00411407 mov ecx,dword ptr [b]

0041140A push ecx

0041140B mov edx,dword ptr [a]

0041140E push edx

调用Fun1

0041140F call Fun1 (4111DBh) ;Call调用时将下一行命令的ＥＩＰ压入堆栈

恢复因为Fun1参数入栈改变的栈指针,因为Fun1有3个参数,一个整数4个字节,共0Ch(12)个字节

00411414 add esp,0Ch

00411417 mov dword ptr [d],eax

将返回值保存在d中.

return 0;

返回值为0,让eax清零

0041141A xor eax,eax

}

恢复现场

0041141C pop edi

0041141D pop esi

0041141E pop ebx

以下全为运行时ESP检查:

先恢复因为为main预留空间而改变的栈指针

0041141F add esp,0F0h

00411425 cmp ebp,esp

00411427 call @ILT+320(__RTC_CheckEsp) (411145h)

正常时只需要以下两句就可以正常恢复esp,再出栈,又可以恢复ebp.

0041142C mov esp,ebp

0041142E pop ebp

0041142F ret ;main返回

int Fun1(int a, int b, int c)

{

同上理解, 保存现场

00411A70 push ebp

00411A71 mov ebp,esp

00411A73 sub esp,0E4h ;留了0E4H(228)空间,

00411A79 push ebx

00411A7A push esi

00411A7B push edi

00411A7C lea edi,[ebp-0E4h]

00411A82 mov ecx,39h ; 39H(57)*4 = 0E4H(228)

00411A87 mov eax,0CCCCCCCCh

00411A8C rep stos dword ptr es:[edi]

int d = 4, e = 5;

定义变量

00411A8E mov dword ptr [d],4

00411A95 mov dword ptr [e],5

int f = Fun2(a, b, c, d, e);

再次参数反向入栈

00411A9C mov eax,dword ptr [e]

00411A9F push eax

00411AA0 mov ecx,dword ptr [d]

00411AA3 push ecx

00411AA4 mov edx,dword ptr [c]

00411AA7 push edx

00411AA8 mov eax,dword ptr [b]

00411AAB push eax

00411AAC mov ecx,dword ptr [a]

00411AAF push ecx

调用Fun2

00411AB0 call Fun2 (4111D6h) ;Call调用时将下一行命令的ＥＩＰ压入堆栈

00411AB5 add esp,14h ;恢复因为参数入栈改变的栈指针,因为Fun2有5个参数,一个整数4个字节,共14h(20)个字节

将Fun2函数的返回值(保存在eax中),赋值给f;

00411AB8 mov dword ptr [f],eax

return f;

将保留在f中的Fun1的返回值保存在eax中返回

00411ABB mov eax,dword ptr [f]

}

恢复现场

00411ABE pop edi

00411ABF pop esi

00411AC0 pop ebx

以下全为运行时ESP检查:

先恢复因为预留函数存储控件而改变的栈指针,

00411AC1 add esp,0E4h

再比较ebp,esp,假如程序运行正确,两个值应该相等.

00411AC7 cmp ebp,esp

00411AC9 call @ILT+320(__RTC_CheckEsp) (411145h)

正常时只需要以下两句就可以正常恢复esp,再出栈,又可以恢复ebp.

00411ACE mov esp,ebp

00411AD0 pop ebp

返回main从pop堆栈中的EIP开始执行

00411AD1 ret

int Fun2(int a, int b, int c, int d, int e)

{

同上理解, 保存现场

00412050 push ebp

00412051 mov ebp,esp

00412053 sub esp,0E4h ;保留0E4H(228)

00412059 push ebx

0041205A push esi

0041205B push edi

0041205C lea edi,[ebp-0E4h]

00412062 mov ecx,39h ; 39H(57)*4 = 0E4H(228)

00412067 mov eax,0CCCCCCCCh

0041206C rep stos dword ptr es:[edi]

int f = 6, g = 7;

定义变量

0041206E mov dword ptr [f],6

00412075 mov dword ptr [g],7

int h = a + b + c + d + e + f + g;

相加,存入a,再保存在h

0041207C mov eax,dword ptr [a]

0041207F add eax,dword ptr [b]

00412082 add eax,dword ptr [c]

00412085 add eax,dword ptr [d]

00412088 add eax,dword ptr [e]

0041208B add eax,dword ptr [f]

0041208E add eax,dword ptr [g]

00412091 mov dword ptr [h],eax

return h;

将返回值h的值保存在eax中

00412094 mov eax,dword ptr [h]

}

恢复现场

00412097 pop edi

00412098 pop esi

00412099 pop ebx

0041209A mov esp,ebp

0041209C pop ebp

0041209D ret ;返回fun1 ,从pop堆栈中的EIP开始执行