VS下EXE可执行文件启动代码剖析(4)_cinit 函数

来源:互联网 发布:金融交易软件开发 编辑:程序博客网 时间:2024/05/18 10:13

接着上文 在初始化IO之后的代码是这样的

        __try {            if ( _ioinit() < 0 )            /* initialize lowio */                _amsg_exit(_RT_LOWIOINIT);            /* get wide cmd line info */            _tcmdln = (_TSCHAR *)GetCommandLineT();            /* get wide environ info */            _tenvptr = (_TSCHAR *)GetEnvironmentStringsT();            if ( _tsetargv() < 0 )                _amsg_exit(_RT_SPACEARG);            if ( _tsetenvp() < 0 )                _amsg_exit(_RT_SPACEENV);            initret = _cinit(TRUE);                  /* do C data initialize */            if (initret != 0)                _amsg_exit(initret);#ifdef _WINMAIN_            lpszCommandLine = _twincmdln();            mainret = _tWinMain( (HINSTANCE)&__ImageBase,                                 NULL,                                 lpszCommandLine,                                 StartupInfo.dwFlags & STARTF_USESHOWWINDOW                                      ? StartupInfo.wShowWindow                                      : SW_SHOWDEFAULT                                );#else  /* _WINMAIN_ */            _tinitenv = _tenviron;            mainret = _tmain(__argc, _targv, _tenviron);#endif  /* _WINMAIN_ */            if ( !managedapp )                exit(mainret);            _cexit();        }        __except ( _XcptFilter(GetExceptionCode(), GetExceptionInformation()) )

_tsetargv()  和_tsetenvp() 前者获取命令行参数的字符串,并解析分割对argc和argv进行初始化,后者解析获取到的环境变量字符串_tenvptr把结果存放到全局变量_tenviron 

这里就不贴代码了,因为都是些解析字符串的操作没什么可看的。


这里仅分析_setargv 中最为核心的代码

_TSCHAR *p;_TSCHAR *cmdstart;                  /* start of command line to parse */int numargs, numchars;//MAX_PATH 是 260 ,很有趣 ,文件的完整路径最大255 + "." + 后缀(比如exe) + " \0"static _TSCHAR _pgmname[ MAX_PATH ];/*  * __initmbctable 只能被调用一次,因此会设置一个__mbctype_initialized标记。 * __initmbctable 内部会调用_setmbcp 函数去创建一个新的multibyte code page, * 随后置__mbctype_initialized=1 */if ( __mbctype_initialized == 0 )        __initmbctable();/* 将当前进程的exe完整路径复制到_pgmname数组中 * 注意GetModuleFileName 是拿不到 程序启动参数args的,它获得仅仅是程序的完整路径而已 */GetModuleFileName( NULL, _pgmname, sizeof( _pgmname ) / sizeof(_TSCHAR));_pgmptr = _pgmname;/* 如果之前解析出来的_acmdln为空,则采用_pgmptr */cmdstart = (*_acmdln == NULCHAR) ? _pgmptr : _acmdln;/* 计算出 numargs 和 numchars 的大小  */parse_cmdline(cmdstart, NULL, NULL, &numargs, &numchars);/*  * 为argv 分配所需的空间  * 先是numargs 个指针,前numargs-1 指向路径与参数,最后一个是NULL * 紧接着是numchars 个字符,用来存放numargs-1 个指针所指的内容 */p = _malloc_crt(numargs * sizeof(_TSCHAR *) + numchars * sizeof(_TSCHAR));if (p == NULL)        _amsg_exit(_RT_SPACEARG);/* 为指针P所指向的内存空间里填充argv  */parse_cmdline(cmdstart, (char **)p, p + numargs * sizeof(char *), &numargs, &numchars);/* 至此,argc 与  argv 已经全部现形 */__argc = numargs - 1;__argv = (char **)p; 




紧接着就是initret = _cinit(TRUE);   


cinit这个函数所完成的操作在所有VS生产的应用程序的启动代码中都有,无论是WIN32的  还是MFC的  或者是DLL类型  ,不管是使用动态运行库 还是静态运行库。

因为它主要工作是调用_initterm_e和_initterm 来完成C/C++中  全局变量和全局对象的初始化,调用全局对象的构造函数。


不妨先看看这两个函数的实现

首先看下_initterm

typedef void (__cdecl *_PVFV)(void);

typedef int  (__cdecl *_PIFV)(void);


#ifdef CRTDLLvoid __cdecl _initterm (#else  /* CRTDLL */static void __cdecl _initterm (#endif  /* CRTDLL */        _PVFV * pfbegin,        _PVFV * pfend        ){        /*         * walk the table of function pointers from the bottom up, until         * the end is encountered.  Do not skip the first entry.  The initial         * value of pfbegin points to the first valid entry.  Do not try to         * execute what pfend points to.  Only entries before pfend are valid.         */  沿着函数指针表从下到上,直到遇到结束标志,并不跳过表的第一项,pfbegin指向了第一个有效的表项,不要指向pfend指向的函数,只有在pfend之前的有效        while ( pfbegin < pfend )        {            /*             * if current table entry is non-NULL, call thru it.             */            if ( *pfbegin != NULL )     //如何改函数指针不为空 执行它                (**pfbegin)();            ++pfbegin;        }
 _initterm_e  跟_initterm 差不多  只是它要执行的函数表中的每一个函数是有返回值得,如果返回值不为0就不再继续,并把该返回值 返回
int __cdecl _initterm_e (        _PIFV * pfbegin,        _PIFV * pfend        ){        int ret = 0;        /*         * walk the table of function pointers from the bottom up, until         * the end is encountered.  Do not skip the first entry.  The initial         * value of pfbegin points to the first valid entry.  Do not try to         * execute what pfend points to.  Only entries before pfend are valid.         */        while ( pfbegin < pfend  && ret == 0)        {            /*             * if current table entry is non-NULL, call thru it.             */            if ( *pfbegin != NULL )                ret = (**pfbegin)();            ++pfbegin;        }        return ret;}


从代码上来这两个函数的作用很简单,参数pfbegin和pfend是两个指向函数指针的指针,他们标识了一个指向函数指针的指针组成的数组,pfbegin是数组的第一元素,pfend是超出数组末端的指针,函数的工作就是简单的遍历数组中的每一个元素,获取对应的函数地址,执行函数(函数都是无参的)


在cinit()中是这样调用两个函数的


initret = _initterm_e( __xi_a, __xi_z );

 _initterm( __xc_a, __xc_z );

这里引用到了四个全局变量 ,它们的定义是这样的

_CRTALLOC(".CRT$XIA") _PVFV __xi_a[] = { NULL };_CRTALLOC(".CRT$XIZ") _PVFV __xi_z[] = { NULL };_CRTALLOC(".CRT$XCA") _PVFV __xc_a[] = { NULL };_CRTALLOC(".CRT$XCZ") _PVFV __xc_z[] = { NULL };#define _CRTALLOC(x) __declspec(allocate(x))#pragma comment(linker, "/merge:.CRT=.rdata") //



可以看出这四个变量分别在数据段.CRT$XIA、.CRT$XIZ、.CRT$XCA、.CRT$XCZ中。当连接器布局代码时,它按根据的名称,按照字母排序的规则,排列所有段。这样在段.CRT$XIA中的变量出现在段.CRT$XIZ所有变量之前,从而形成一个表。对于.CRT$XCA、.CRT$XCZ数据段同理。最后这四个数据段被合并到.data数据段中。

再看看这些变量的类型,typedef void (__cdecl *_PVFV)(void); 所以这些变量组成了2个初始化函数指表

在MSDN中关于这两个表的描述是这样的

Initializing a Global Object
Consider the following code:
以带有颜色区分的格式查看复制到剪贴板打印
int func(void)
{
    return 3;
}


int gi = func();


int main()
{
    return gi;
}
int func(void)
{
    return 3;
}


int gi = func();


int main()
{
    return gi;
}
According to the C/C++ standard, func() must be called beforemain() is executed. But who calls it?
One way to determine this is to set a breakpoint in func(), debug the application, and examine the stack. This is possible because the CRT source code is included with Visual Studio.
When you browse the functions on the stack, you will find that the CRT is looping through a list of function pointers and calling each one as it encounters them. These functions are either similar tofunc() or constructors for class instances.
The CRT obtains the list of function pointers from the Visual C++ compiler. When the compiler sees a global initializer, it generates a dynamic initializer in the.CRT$XCU section (where CRT is the section name andXCU is the group name). To obtain a list of those dynamic initializers run the commanddumpbin /all main.obj, and then search the .CRT$XCU section (when main.cpp is compiled as a C++ file, not a C file). It will be similar to the following:






SECTION HEADER #6
.CRT$XCU name
       0 physical address
       0 virtual address
       4 size of raw data
     1F2 file pointer to raw data (000001F2 to 000001F5)
     1F6 file pointer to relocation table
       0 file pointer to line numbers
       1 number of relocations
       0 number of line numbers
40300040 flags
         Initialized Data
         4 byte align
         Read Only


RAW DATA #6
  00000000: 00 00 00 00                                      ....


RELOCATIONS #6
                                                Symbol    Symbol
 Offset    Type              Applied To         Index     Name
 --------  ----------------  -----------------  --------  ------
 00000000  DIR32                      00000000         C  ??__Egi@@YAXXZ (void __cdecl `dynamic initializer for 'gi''(void))
SECTION HEADER #6
.CRT$XCU name
       0 physical address
       0 virtual address
       4 size of raw data
     1F2 file pointer to raw data (000001F2 to 000001F5)
     1F6 file pointer to relocation table
       0 file pointer to line numbers
       1 number of relocations
       0 number of line numbers
40300040 flags
         Initialized Data
         4 byte align
         Read Only


RAW DATA #6
  00000000: 00 00 00 00                                      ....


RELOCATIONS #6
                                                Symbol    Symbol
 Offset    Type              Applied To         Index     Name
 --------  ----------------  -----------------  --------  ------
 00000000  DIR32                      00000000         C  ??__Egi@@YAXXZ (void __cdecl `dynamic initializer for 'gi''(void))
The CRT defines two pointers:
__xc_a in .CRT$XCA
__xc_z in .CRT$XCZ
Both groups do not have any other symbols defined except __xc_a and__xc_z.
Now, when the linker reads various .CRT groups, it combines them in one section and orders them alphabetically. This means that the user-defined global initializers (which the Visual C++ compiler puts in.CRT$XCU) will always come after .CRT$XCA and before.CRT$XCZ.
The section will resemble the following:
以带有颜色区分的格式查看复制到剪贴板打印
.CRT$XCA
            __xc_a
.CRT$XCU
            Pointer to Global Initializer 1
            Pointer to Global Initializer 2
.CRT$XCZ
            __xc_z
.CRT$XCA
            __xc_a
.CRT$XCU
            Pointer to Global Initializer 1
            Pointer to Global Initializer 2
.CRT$XCZ
            __xc_z
So, the CRT library uses both __xc_a and __xc_z to determine the start and end of the global initializers list because of the way in which they are laid out in memory after the image is loaded. 






根据微软的解释,当VC编译器看到一个全局的对象,就动态的构造一个该对象的初始化器,并把初始化函数的指针放入.CRT段的XCU组中,当链接器生成应用程序的时候是按照自如顺序组织段内结构的,因此当是使用C++编译链接程序时
这些函数指针被放到了__xc_a和__xc_z 之间 


__xc_a和__xc_z 都被定义成了空的,因此__xc_a实际指向了这组函数指针的第一个元素   __xc_z指向了数组超出数组末尾的未知数据
同样的道理当使用C模式来编译程序的时候  函数指针数组被放到__xi_a和__xi_z 之间


这样当我们调用
initret = _initterm_e( __xi_a, __xi_z );
 _initterm( __xc_a, __xc_z );
的时候就可以  所有的在编译和链接后放入.CRT$XCA和.CRT$XCZ      .CRT$XIA和.CRT$XIZ之间的函数指针都会被调用



下面是完整的_cinit函数代码



int __cdecl _cinit (        int initFloatingPrecision        ){        int initret;        /*         * initialize floating point package, if present         */#ifdef CRTDLL        _fpmath(initFloatingPrecision);      //初始化浮点运算#else  /* CRTDLL */        if (_FPinit != NULL &&            _IsNonwritableInCurrentImage((PBYTE)&_FPinit))        {            (*_FPinit)(initFloatingPrecision);        }        _initp_misc_cfltcvt_tab();#endif  /* CRTDLL */        /*         * do initializations         */        initret = _initterm_e( __xi_a, __xi_z );        if ( initret != 0 )            return initret;#ifdef _RTC        atexit(_RTC_Terminate);#endif  /* _RTC */        /*         * do C++ initializations         */        _initterm( __xc_a, __xc_z );#ifndef CRTDLL        /*         * If we have any dynamically initialized __declspec(thread)         * variables, then invoke their initialization for the thread on         * which the DLL is being loaded, by calling __dyn_tls_init through         * a callback defined in tlsdyn.obj.  We can't rely on the OS         * calling __dyn_tls_init with DLL_PROCESS_ATTACH because, on         * Win2K3 and before, that call happens before the CRT is         * initialized.         */        if (__dyn_tls_init_callback != NULL &&                                             //关于静态TLS的检查设置            _IsNonwritableInCurrentImage((PBYTE)&__dyn_tls_init_callback))        {            __dyn_tls_init_callback(NULL, DLL_THREAD_ATTACH, NULL);        }#endif  /* CRTDLL */        return 0;}



在完成了了所有的初始化工作之后就用 _WINMAIN_ 调用我们的main()函数了,这之后我们的代码才获得了执行权,

0 0
原创粉丝点击