linux编程之main()函数启动过程

来源:互联网 发布:jsp数据展示样式 编辑:程序博客网 时间:2024/04/26 12:29

1 最简单的程序 

1)编辑helloworld程序,$vim helloworld.c

  1 #include <stdio.h>  2   3 int main (int argc, char *argv[])  4 {  5         printf("Hello world!\n");  6   7         return 0;  8 }

2) 编译,$ gcc helloworld.c -o helloworld

3) 运行,$./helloworld 

Hello world!

2 最简单的程序其实不简单

上面这个helloword程序已经再简单不过了,先来看一看它的反汇编代码:

$ objdump -d helloworldhelloworld:     file format elf32-i386Disassembly of section .init:080482b0 <_init>: 80482b0:53                   push   %ebx 80482b1:83 ec 08             sub    $0x8,%esp 80482b4:e8 00 00 00 00       call   80482b9 <_init+0x9> 80482b9:5b                   pop    %ebx 80482ba:81 c3 3b 1d 00 00    add    $0x1d3b,%ebx 80482c0:8b 83 fc ff ff ff    mov    -0x4(%ebx),%eax 80482c6:85 c0                test   %eax,%eax 80482c8:74 05                je     80482cf <_init+0x1f> 80482ca:e8 31 00 00 00       call   8048300 <__gmon_start__@plt> 80482cf:e8 dc 00 00 00       call   80483b0 <frame_dummy> 80482d4:e8 97 01 00 00       call   8048470 <__do_global_ctors_aux> 80482d9:83 c4 08             add    $0x8,%esp 80482dc:5b                   pop    %ebx 80482dd:c3                   ret    Disassembly of section .plt:080482e0 <puts@plt-0x10>: 80482e0:ff 35 f8 9f 04 08    pushl  0x8049ff8 80482e6:ff 25 fc 9f 04 08    jmp    *0x8049ffc 80482ec:00 00                add    %al,(%eax)...080482f0 <puts@plt>: 80482f0:ff 25 00 a0 04 08    jmp    *0x804a000 80482f6:68 00 00 00 00       push   $0x0 80482fb:e9 e0 ff ff ff       jmp    80482e0 <_init+0x30>08048300 <__gmon_start__@plt>: 8048300:ff 25 04 a0 04 08    jmp    *0x804a004 8048306:68 08 00 00 00       push   $0x8 804830b:e9 d0 ff ff ff       jmp    80482e0 <_init+0x30>08048310 <__libc_start_main@plt>: 8048310:ff 25 08 a0 04 08    jmp    *0x804a008 8048316:68 10 00 00 00       push   $0x10 804831b:e9 c0 ff ff ff       jmp    80482e0 <_init+0x30>Disassembly of section .text:08048320 <_start>: 8048320:31 ed                xor    %ebp,%ebp 8048322:5e                   pop    %esi 8048323:89 e1                mov    %esp,%ecx 8048325:83 e4 f0             and    $0xfffffff0,%esp 8048328:50                   push   %eax 8048329:54                   push   %esp 804832a:52                   push   %edx 804832b:68 60 84 04 08       push   $0x8048460 8048330:68 f0 83 04 08       push   $0x80483f0 8048335:51                   push   %ecx 8048336:56                   push   %esi 8048337:68 d4 83 04 08       push   $0x80483d4 804833c:e8 cf ff ff ff       call   8048310 <__libc_start_main@plt> 8048341:f4                   hlt     8048342:90                   nop 8048343:90                   nop 8048344:90                   nop 8048345:90                   nop 8048346:90                   nop 8048347:90                   nop 8048348:90                   nop 8048349:90                   nop 804834a:90                   nop 804834b:90                   nop 804834c:90                   nop 804834d:90                   nop 804834e:90                   nop 804834f:90                   nop08048350 <__do_global_dtors_aux>: 8048350:55                   push   %ebp 8048351:89 e5                mov    %esp,%ebp 8048353:53                   push   %ebx 8048354:83 ec 04             sub    $0x4,%esp 8048357:80 3d 14 a0 04 08 00 cmpb   $0x0,0x804a014 804835e:75 3f                jne    804839f <__do_global_dtors_aux+0x4f> 8048360:a1 18 a0 04 08       mov    0x804a018,%eax 8048365:bb 20 9f 04 08       mov    $0x8049f20,%ebx 804836a:81 eb 1c 9f 04 08    sub    $0x8049f1c,%ebx 8048370:c1 fb 02             sar    $0x2,%ebx 8048373:83 eb 01             sub    $0x1,%ebx 8048376:39 d8                cmp    %ebx,%eax 8048378:73 1e                jae    8048398 <__do_global_dtors_aux+0x48> 804837a:8d b6 00 00 00 00    lea    0x0(%esi),%esi 8048380:83 c0 01             add    $0x1,%eax 8048383:a3 18 a0 04 08       mov    %eax,0x804a018 8048388:ff 14 85 1c 9f 04 08 call   *0x8049f1c(,%eax,4) 804838f:a1 18 a0 04 08       mov    0x804a018,%eax 8048394:39 d8                cmp    %ebx,%eax 8048396:72 e8                jb     8048380 <__do_global_dtors_aux+0x30> 8048398:c6 05 14 a0 04 08 01 movb   $0x1,0x804a014 804839f:83 c4 04             add    $0x4,%esp 80483a2:5b                   pop    %ebx 80483a3:5d                   pop    %ebp 80483a4:c3                   ret     80483a5:8d 74 26 00          lea    0x0(%esi,%eiz,1),%esi 80483a9:8d bc 27 00 00 00 00 lea    0x0(%edi,%eiz,1),%edi080483b0 <frame_dummy>: 80483b0:55                   push   %ebp 80483b1:89 e5                mov    %esp,%ebp 80483b3:83 ec 18             sub    $0x18,%esp 80483b6:a1 24 9f 04 08       mov    0x8049f24,%eax 80483bb:85 c0                test   %eax,%eax 80483bd:74 12                je     80483d1 <frame_dummy+0x21> 80483bf:b8 00 00 00 00       mov    $0x0,%eax 80483c4:85 c0                test   %eax,%eax 80483c6:74 09                je     80483d1 <frame_dummy+0x21> 80483c8:c7 04 24 24 9f 04 08 movl   $0x8049f24,(%esp) 80483cf:ff d0                call   *%eax 80483d1:c9                   leave   80483d2:c3                   ret     80483d3:90                   nop080483d4 <main>: 80483d4:55                   push   %ebp 80483d5:89 e5                mov    %esp,%ebp 80483d7:83 e4 f0             and    $0xfffffff0,%esp 80483da:83 ec 10             sub    $0x10,%esp 80483dd:c7 04 24 c0 84 04 08 movl   $0x80484c0,(%esp) 80483e4:e8 07 ff ff ff       call   80482f0 <puts@plt> 80483e9:b8 00 00 00 00       mov    $0x0,%eax 80483ee:c9                   leave   80483ef:c3                   ret    080483f0 <__libc_csu_init>: 80483f0:55                   push   %ebp 80483f1:57                   push   %edi 80483f2:56                   push   %esi 80483f3:53                   push   %ebx 80483f4:e8 69 00 00 00       call   8048462 <__i686.get_pc_thunk.bx> 80483f9:81 c3 fb 1b 00 00    add    $0x1bfb,%ebx 80483ff:83 ec 1c             sub    $0x1c,%esp 8048402:8b 6c 24 30          mov    0x30(%esp),%ebp 8048406:8d bb 20 ff ff ff    lea    -0xe0(%ebx),%edi 804840c:e8 9f fe ff ff       call   80482b0 <_init> 8048411:8d 83 20 ff ff ff    lea    -0xe0(%ebx),%eax 8048417:29 c7                sub    %eax,%edi 8048419:c1 ff 02             sar    $0x2,%edi 804841c:85 ff                test   %edi,%edi 804841e:74 29                je     8048449 <__libc_csu_init+0x59> 8048420:31 f6                xor    %esi,%esi 8048422:8d b6 00 00 00 00    lea    0x0(%esi),%esi 8048428:8b 44 24 38          mov    0x38(%esp),%eax 804842c:89 2c 24             mov    %ebp,(%esp) 804842f:89 44 24 08          mov    %eax,0x8(%esp) 8048433:8b 44 24 34          mov    0x34(%esp),%eax 8048437:89 44 24 04          mov    %eax,0x4(%esp) 804843b:ff 94 b3 20 ff ff ff call   *-0xe0(%ebx,%esi,4) 8048442:83 c6 01             add    $0x1,%esi 8048445:39 fe                cmp    %edi,%esi 8048447:75 df                jne    8048428 <__libc_csu_init+0x38> 8048449:83 c4 1c             add    $0x1c,%esp 804844c:5b                   pop    %ebx 804844d:5e                   pop    %esi 804844e:5f                   pop    %edi 804844f:5d                   pop    %ebp 8048450:c3                   ret     8048451:eb 0d                jmp    8048460 <__libc_csu_fini> 8048453:90                   nop 8048454:90                   nop 8048455:90                   nop 8048456:90                   nop 8048457:90                   nop 8048458:90                   nop 8048459:90                   nop 804845a:90                   nop 804845b:90                   nop 804845c:90                   nop 804845d:90                   nop 804845e:90                   nop 804845f:90                   nop08048460 <__libc_csu_fini>: 8048460:f3 c3                repz ret 08048462 <__i686.get_pc_thunk.bx>: 8048462:8b 1c 24             mov    (%esp),%ebx 8048465:c3                   ret     8048466:90                   nop 8048467:90                   nop 8048468:90                   nop 8048469:90                   nop 804846a:90                   nop 804846b:90                   nop 804846c:90                   nop 804846d:90                   nop 804846e:90                   nop 804846f:90                   nop08048470 <__do_global_ctors_aux>: 8048470:55                   push   %ebp 8048471:89 e5                mov    %esp,%ebp 8048473:53                   push   %ebx 8048474:83 ec 04             sub    $0x4,%esp 8048477:a1 14 9f 04 08       mov    0x8049f14,%eax 804847c:83 f8 ff             cmp    $0xffffffff,%eax 804847f:74 13                je     8048494 <__do_global_ctors_aux+0x24> 8048481:bb 14 9f 04 08       mov    $0x8049f14,%ebx 8048486:66 90                xchg   %ax,%ax 8048488:83 eb 04             sub    $0x4,%ebx 804848b:ff d0                call   *%eax 804848d:8b 03                mov    (%ebx),%eax 804848f:83 f8 ff             cmp    $0xffffffff,%eax 8048492:75 f4                jne    8048488 <__do_global_ctors_aux+0x18> 8048494:83 c4 04             add    $0x4,%esp 8048497:5b                   pop    %ebx 8048498:5d                   pop    %ebp 8048499:c3                   ret     804849a:90                   nop 804849b:90                   nopDisassembly of section .fini:0804849c <_fini>: 804849c:53                   push   %ebx 804849d:83 ec 08             sub    $0x8,%esp 80484a0:e8 00 00 00 00       call   80484a5 <_fini+0x9> 80484a5:5b                   pop    %ebx 80484a6:81 c3 4f 1b 00 00    add    $0x1b4f,%ebx 80484ac:e8 9f fe ff ff       call   8048350 <__do_global_dtors_aux> 80484b1:83 c4 08             add    $0x8,%esp 80484b4:5b                   pop    %ebx 80484b5:c3                   ret 
从反汇编结果来看,helloworld可执行文件并不只是main函数,除main外,还有几个重要的函数,_init,_fini,_start,__libc_start_main,__libc_csu_init,__libc_csu_fini等。
除main函数外,其它函数从哪来的?分析一下编译好的可执行文件helloworld的库依赖关系如下:
$ ldd helloworldlinux-gate.so.1 =>  (0x00876000)libc.so.6 => /lib/i386-linux-gnu/libc.so.6 (0x00110000)/lib/ld-linux.so.2 (0x006a3000)
其中linux-gate.so.1为系统调用相关的动态库,ld-linux.so.2为动态链接库,libc.so.6为GNU C库。
从反汇编出来的相关函数名称(__libc_start_main)来看,我们把目标锁定在GNU C库,即glibc,也是GNU开源项目,可以从GNU 源码ftp站点(http://ftp.gnu.org/gnu/libc/)上找到。
__libc_start_main函数可以在glibc源码的csu/libc-start.c中找到:
STATIC int LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),    int argc,    char *__unbounded *__unbounded ubp_av,#ifdef LIBC_START_MAIN_AUXVEC_ARG    ElfW(auxv_t) *__unbounded auxvec,#endif    __typeof (main) init,    void (*fini) (void),    void (*rtld_fini) (void),    void *__unbounded stack_end)     __attribute__ ((noreturn));
__libc_csu_init,__libc_csu_fini函数可以在glibc源码的csu/elf-init.c中找到:
void__libc_csu_init (int argc, char **argv, char **envp){  /* For dynamically linked executables the preinit array is executed by     the dynamic linker (before initializing any shared object.  */#ifndef LIBC_NONSHARED  /* For static executables, preinit happens right before init.  */  {    const size_t size = __preinit_array_end - __preinit_array_start;    size_t i;    for (i = 0; i < size; i++)      (*__preinit_array_start [i]) (argc, argv, envp);  }#endif  _init ();  const size_t size = __init_array_end - __init_array_start;  for (size_t i = 0; i < size; i++)      (*__init_array_start [i]) (argc, argv, envp);}/* This function should not be used anymore.  We run the executable's   destructor now just like any other.  We cannot remove the function,   though.  */void__libc_csu_fini (void){#ifndef LIBC_NONSHARED  size_t i = __fini_array_end - __fini_array_start;  while (i-- > 0)    (*__fini_array_start [i]) ();  _fini ();#endif}
_start函数可以在glibc源码的sysdeps/i386/elf/start.S中找到:
#include "bp-sym.h".text.globl _start.type _start,@function_start:/* Clear the frame pointer.  The ABI suggests this be done, to mark   the outermost frame obviously.  */xorl %ebp, %ebp/* Extract the arguments as encoded on the stack and set up   the arguments for `main': argc, argv.  envp will be determined   later in __libc_start_main.  */popl %esi/* Pop the argument count.  */movl %esp, %ecx/* argv starts just at the current stack top.*//* Before pushing the arguments align the stack to a 16-byte(SSE needs 16-byte alignment) boundary to avoid penalties frommisaligned accesses.  Thanks to Edward Seidl <seidl@janed.com>for pointing this out.  */andl $0xfffffff0, %esppushl %eax/* Push garbage because we allocate   28 more bytes.  *//* Provide the highest stack address to the user code (for stacks   which grow downwards).  */pushl %esppushl %edx/* Push address of the shared library   termination function.  */#ifdef SHARED/* Load PIC register.  */call 1faddl $_GLOBAL_OFFSET_TABLE_, %ebx/* Push address of our own entry points to .fini and .init.  */leal __libc_csu_fini@GOTOFF(%ebx), %eaxpushl %eaxleal __libc_csu_init@GOTOFF(%ebx), %eaxpushl %eaxpushl %ecx/* Push second argument: argv.  */pushl %esi/* Push first argument: argc.  */pushl BP_SYM (main)@GOT(%ebx)/* Call the user's main function, and exit with its value.   But let the libc call main.    */call BP_SYM (__libc_start_main)@PLT#else/* Push address of our own entry points to .fini and .init.  */pushl $__libc_csu_finipushl $__libc_csu_initpushl %ecx/* Push second argument: argv.  */pushl %esi/* Push first argument: argc.  */pushl $BP_SYM (main)/* Call the user's main function, and exit with its value.   But let the libc call main.    */call BP_SYM (__libc_start_main)#endifhlt/* Crash if somehow `exit' does return.  */
_init,_fini函数可以在glibc源码的sysdeps/generic/initfini.c中找到:
/* The beginning of _init:  */asm ("\n/*@_init_PROLOG_BEGINS*/");static voidcall_gmon_start(void){  extern void __gmon_start__ (void) __attribute__ ((weak)); /*weak_extern (__gmon_start__);*/  void (*gmon_start) (void) = __gmon_start__;  if (gmon_start)    gmon_start ();}SECTION (".init");extern void __attribute__ ((section (".init"))) _init (void);void_init (void){  /* We cannot use the normal constructor mechanism in gcrt1.o because it     appears before crtbegin.o in the link, so the header elt of .ctors     would come after the elt for __gmon_start__.  One approach is for     gcrt1.o to reference a symbol which would be defined by some library     module which has a constructor; but then user code's constructors     would come first, and not be profiled.  */  call_gmon_start ();  asm ("ALIGN");  asm("END_INIT");  /* Now the epilog. */  asm ("\n/*@_init_PROLOG_ENDS*/");  asm ("\n/*@_init_EPILOG_BEGINS*/");  SECTION(".init");}asm ("END_INIT");/* End of the _init epilog, beginning of the _fini prolog. */asm ("\n/*@_init_EPILOG_ENDS*/");asm ("\n/*@_fini_PROLOG_BEGINS*/");SECTION (".fini");extern void __attribute__ ((section (".fini"))) _fini (void);void_fini (void){  /* End of the _fini prolog. */  asm ("ALIGN");  asm ("END_FINI");  asm ("\n/*@_fini_PROLOG_ENDS*/");  {    /* Let GCC know that _fini is not a leaf function by having a dummy       function call here.  We arrange for this call to be omitted from       either crt file.  */    extern void i_am_not_a_leaf (void);    i_am_not_a_leaf ();  }  /* Beginning of the _fini epilog. */  asm ("\n/*@_fini_EPILOG_BEGINS*/");  SECTION (".fini");}asm ("END_FINI");/* End of the _fini epilog.  Any further generated assembly (e.g. .ident)   is shared between both crt files. */asm ("\n/*@_fini_EPILOG_ENDS*/");asm ("\n/*@TRAILER_BEGINS*/");
但是为什么不能在libc.so.6中找到_start,_init,_fini这三个函数呢?
是因为GNU把这三个作为了程序启动和结束的最基本运行库函数,分别放在crt1.o,crti.o,crtn.o这三个object文件中供程序链接时使用。
从glibc的源码sysdeps/generic/initfini.c相关注释也可以看出:
/* This file is compiled into assembly code which is then munged by a sed   script into two files: crti.s and crtn.s.   * crti.s puts a function prologue at the beginning of the   .init and .fini sections and defines global symbols for   those addresses, so they can be called as functions.   * crtn.s puts the corresponding function epilogues   in the .init and .fini sections. */
从上面注释来看crti.o,crtn.o分别包含.init和.fini段的开头和结束部分,分析它们的反汇编代码也可以看出:
$ objdump -d /usr/lib/i386-linux-gnu/crti.o /usr/lib/i386-linux-gnu/crti.o:     file format elf32-i386Disassembly of section .init:00000000 <_init>:   0:53                   push   %ebx   1:83 ec 08             sub    $0x8,%esp   4:e8 00 00 00 00       call   9 <_init+0x9>   9:5b                   pop    %ebx   a:81 c3 03 00 00 00    add    $0x3,%ebx  10:8b 83 00 00 00 00    mov    0x0(%ebx),%eax  16:85 c0                test   %eax,%eax  18:74 05                je     1f <_init+0x1f>  1a:e8 fc ff ff ff       call   1b <_init+0x1b>Disassembly of section .fini:00000000 <_fini>:   0:53                   push   %ebx   1:83 ec 08             sub    $0x8,%esp   4:e8 00 00 00 00       call   9 <_fini+0x9>   9:5b                   pop    %ebx   a:81 c3 03 00 00 00    add    $0x3,%ebx
$ objdump -d /usr/lib/i386-linux-gnu/crtn.o /usr/lib/i386-linux-gnu/crtn.o:     file format elf32-i386Disassembly of section .init:00000000 <.init>:   0:83 c4 08             add    $0x8,%esp   3:5b                   pop    %ebx   4:c3                   ret    Disassembly of section .fini:00000000 <.fini>:   0:83 c4 08             add    $0x8,%esp   3:5b                   pop    %ebx   4:c3                   ret

回过头来看看helloworld程序反汇编代码中的_init,_fini函数的组成:

080482b0 <_init>: 80482b0:53                   push   %ebx 80482b1:83 ec 08             sub    $0x8,%esp 80482b4:e8 00 00 00 00       call   80482b9 <_init+0x9> 80482b9:5b                   pop    %ebx 80482ba:81 c3 3b 1d 00 00    add    $0x1d3b,%ebx 80482c0:8b 83 fc ff ff ff    mov    -0x4(%ebx),%eax 80482c6:85 c0                test   %eax,%eax 80482c8:74 05                je     80482cf <_init+0x1f>
 80482ca:e8 31 00 00 00       call   8048300 <__gmon_start__@plt> 80482cf:e8 dc 00 00 00       call   80483b0 <frame_dummy> 80482d4:e8 97 01 00 00       call   8048470 <__do_global_ctors_aux>
 80482d9:83 c4 08             add    $0x8,%esp 80482dc:5b                   pop    %ebx 80482dd:c3                   ret 
0804849c <_fini>: 804849c:53                   push   %ebx 804849d:83 ec 08             sub    $0x8,%esp 80484a0:e8 00 00 00 00       call   80484a5 <_fini+0x9> 80484a5:5b                   pop    %ebx 80484a6:81 c3 4f 1b 00 00    add    $0x1b4f,%ebx
 80484ac:e8 9f fe ff ff       call   8048350 <__do_global_dtors_aux>
 80484b1:83 c4 08             add    $0x8,%esp 80484b4:5b                   pop    %ebx 80484b5:c3                   ret 
helloworld程序反汇编代码中的_init,_fini函数中间多出来的部分是其它库或用户自定义的.init和.fini段代码。
从上面helloworld程序反汇编来看,程序启动的过程应该:
_start -> __libc_start_main -> main. 具体一点就是:
_start -> __libc_start_main -> __libc_csu_init -> main. 再具体一点就是:
_start -> __libc_start_main -> __libc_csu_init -> _init -> main -> _fini.
可以通过分析__libc_start_main函数进一步了解,下面的__libc_start_main函数是我精简后的伪代码:
/* Note: the fini parameter is ignored here for shared library.  It   is registered with __cxa_atexit.  This had the disadvantage that   finalizers were called in more than one place.  */STATIC intLIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), int argc, char *__unbounded *__unbounded ubp_av,#ifdef LIBC_START_MAIN_AUXVEC_ARG ElfW(auxv_t) *__unbounded auxvec,#endif __typeof (main) init, void (*fini) (void), void (*rtld_fini) (void), void *__unbounded stack_end){  ...  /* Register the destructor of the dynamic linker if there is any.  */  if (__builtin_expect (rtld_fini != NULL, 1))    __cxa_atexit ((void (*) (void *)) rtld_fini, NULL, NULL);  /* Call the initializer of the libc.  This is only needed here if we     are compiling for the static library in which case we haven't     run the constructors in `_dl_start_user'.  */  __libc_init_first (argc, argv, __environ);  /* Register the destructor of the program, if any.  */  if (fini)    __cxa_atexit ((void (*) (void *)) fini, NULL, NULL);  /* Call the initializer of the program, if any.  */  if (init)    (*init) (argc, argv, __environ MAIN_AUXVEC_PARAM);  /* Nothing fancy, just call the function.  */  result = main (argc, argv, __environ MAIN_AUXVEC_PARAM);  exit (result);}

__libc_start_main函数的参数main, argc, ubp_av, init, fini, rtld_fini都是通过_start入栈得到。

其中argc, ubp_av为传递给main函数的参数argc, argv。init为__libc_csu_init函数指针,fini为__libc_csu_fini函数指针,rtld_fini为运行库加载收尾函数指针。

从__libc_start_main函数可以看出在call init之前还通过__cxa_atexit向exit函数注册了rtlf_fini和fini函数,目的是为了在main结束后call exit自动完成一些收尾工作。

这里的_init, _fini函数功能还主要负责完成C++程序全局/静态对象的构造与析构,有兴趣的可以深入一下。

__cxa_atexit函数用于注册main结束后程序退出时调用的函数,例如:

  1 #include <stdio.h>  2 #include <stdlib.h>  3   4 void pre_exit (void)  5 {  6         printf("Will be exit!\n");  7 }  8   9 int main (int argc, char *argv[]) 10 { 11         __cxa_atexit(pre_exit, NULL, NULL); 12  13         printf("Hello world!\n"); 14  15         return 0; 16 }
$ gcc helloworld2.c -o helloworld2$ ./helloworld2Hello world!Will be exit!

可以看到pre_exit函数并不是靠main来执行的,而是靠__libc_start_main函数里面的exit函数来调用完成。

为了更进一步理解_init函数的作用,我们再改写一下代码:

因为链接器会自动把.init段粘贴到一起组成_init函数,所以为了不使_init函数半途返回退出,我们只能用下面汇编的方法定义哪些程序块是需要被链接到_init函数,且该程序块不能有函数的返回指令,这样我们必须在该程序块结束的地方加入汇编指令告诉编译器后面的程序不属于.init段,因此就有了下面这个测试函数pre_init,它会被link到两个section,即.init和.text段。

  1 #include <stdio.h>  2 #include <stdlib.h>  3   4 void __attribute__((used)) pre_init (void)  5 {  6         asm (".section .init");  7         printf("pre-init section .init part\n");  8         printf("Call pre_init before main\n");  9         asm (".section .text"); 10         printf("pre-init section .text part\n"); 11         printf("call pre_init in main\n"); 12 } 13  14 void pre_exit (void) 15 { 16         printf("Call pre_exit after main!\n"); 17 } 18  19 int main (int argc, char *argv[]) 20 { 21         printf("Enter main\n"); 22  23         __cxa_atexit(pre_exit, NULL, NULL); 24  25         printf("Hello world!\n"); 26  27         pre_init(); 28  29         printf("Exit main\n"); 30  31         return 0; 32 }
$ gcc helloworld3.c -o helloworld3$ ./helloworld3pre-init section .init partCall pre_init before mainEnter mainHello world!pre-init section .text partcall pre_init in mainExit mainCall pre_exit after main!
从helloworld3程序的运行结果也可以看出该程序函数的调用过程:

pre_init (.init段部分 代码) -> main -> pre_init (.text段部分 代码) -> pre_exit.

同样我们也可以添加.fini段代码,链接器会把.fini段代码部分链接进_fini函数,这样_fini函数就会帮我们在main函数退出后自动执行这段代码:

 14 void __attribute__((used)) pre_fini (void) 15 { 16         asm (".section .fini"); 17         printf("pre-fini section .fini part\n"); 18         printf("Call pre_fini after main\n"); 19         asm (".section .text"); 20 }

$ gcc helloworld4.c -o helloworld4$ ./helloworld4pre-init section .init partCall pre_init before mainEnter mainHello world!pre-init section .text partcall pre_init in mainExit mainCall pre_exit after main!pre-fini section .fini partCall pre_fini after main

从以上运行结果可知pre_fini函数是在main函数退出后才被调用的,但是为什么pre_fini打印的内容比pre_exit打印的内容要晚出来呢?这是因为pre_exit函数注册比pre_fini要晚,而通过__cxa_atexit注册的过程其实就是建立一个exit函数指针链表栈,按栈的规则是后进先出,因此pre_exit比pre_fini要先被调用。

从helloworld4程序的运行结果也可以看出该程序函数的调用过程:

pre_init (.init段部分 代码) -> main -> pre_init (.text段部分 代码) -> pre_exit -> pre_fini (.fini段部分 代码).

为了让程序正常运行和结束,链接器ld帮我们做了好多事情,可以用下面来表示:

ld crt1.o crti.o [usr object] [lib] crtn.o 

最后总结一下,程序启动和结束的过程可以描述为: 

_start -> __libc_start_main ->  init -> main -> exit.

3 Self-check

如果想对main的启动及结束有进一步的理解,最好的方法就亲自读一读glibc的相关源码。
另外,介绍一本好书《程序员的自我修养—链接、装载与库》。
(待完善)

原创粉丝点击