ld.so分析

来源:互联网 发布:赖昌星太子知乎 编辑:程序博客网 时间:2024/05/07 05:00

http://blog.chinaunix.net/uid-725631-id-253186.html

ld.so分析1

1.入口
elf/rtld.c中

#ifdef RTLD_START
RTLD_START
#else
# error "sysdeps/MACHINE/dl-machine.h fails to define RTLD_START"
#endif

该宏定义在sysdeps/i386/dl-machine.h

#define RTLD_START asm ("\n\
    .text\n\
    .align 16\n\
0:    movl (%esp), %ebx\n\
    ret\n\
    .align 16\n\
.globl _start\n\ ld.so入口
.globl _dl_start_user\n\
_start:\n\
    # Note that _dl_start gets the parameter in %eax.\n\
    movl %esp, %eax\n\ 当值esp值作为参数传递给_dl_start,_dl_start函数原型是static Elf32_Addr __attribute__ ((__used__)) __attribute__ ((regparm (3), stdcall)) _dl_start (void *arg)
    call _dl_start\n\ //调用_dl_start,完成动态链接,返回用户入口地址,_dl_start自己平栈
_dl_start_user:\n\
    # Save the user entry point address in %edi.\n\
    movl %eax, %edi\n\ 保存用户程序入口地址
    # Point %ebx at the GOT.\n\
    call 0b\n\ //等价于 call 1f;1:pop %ebx;addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx;获取GOT地址,存入%ebx
    addl $_GLOBAL_OFFSET_TABLE_, %ebx\n\//%ebx指向本条指令地址,加上GOT相对于本指令的偏移,即得到GOT地址
    # Store the highest stack address\n\
/*
00000020  0000950a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000026  00009603 R_386_GOT32       00000000   __libc_stack_end
0000002e  00009709 R_386_GOTOFF      00000004   _dl_skip_args
*/   
    movl __libc_stack_end@GOT(%ebx), %eax\n\ __libc_stack_end是GLOBAL变量,存在于GOT中
    movl %esp, (%eax)\n\ 存入esp
    # See if we were run as a command with the executable file\n\
    # name as an extra leading argument.\n\
    movl _dl_skip_args@GOTOFF(%ebx), %eax\n\ _dl_skip_args是LOCAL变量,不存在于GOT中
    # Pop the original argument count.\n\
    popl %edx\n\弹出原始参数个数
    # Adjust the stack pointer to skip _dl_skip_args words.\n\
    leal (%esp,%eax,4), %esp\n\//跳过需要skip的参数,这些参数被ld.so处理了
    # Subtract _dl_skip_args from argc.\n\
    subl %eax, %edx\n\减掉
    # Push argc back on the stack.\n\
    push %edx\n\//重新压回
    # The special initializer gets called with the stack just\n\
    # as the application's entry point will see it; it can\n\
    # switch stacks if it moves these contents over.\n\
" RTLD_START_SPECIAL_INIT "\n\ 空宏
    # Load the parameters again.\n\
    # (eax, edx, ecx, *--esp) = (_dl_loaded, argc, argv, envp)\n\
    //为_dl_init准备参数,_dl_init的原型是
    //void __attribute__((regparm(3),stdcall)) _dl_init (struct link_map *main_map, int argc, char **argv, char **env)
    movl _rtld_local@GOTOFF(%ebx), %eax\n\//取ld.so的_rtld_local入%eax
    leal 8(%esp,%edx,4), %esi\n\ esi指向envp
    leal 4(%esp), %ecx\n\ ecx指向argv
    pushl %esi\n\ 第四个参数使用堆栈传递
    # Call the function to run the initializers.\n\
    call _dl_init_internal@PLT\n\调用_dl_init
    # Pass our finalizer function to the user in %edx, as per ELF ABI.\n\
    leal _dl_fini@GOTOFF(%ebx), %edx\n\ 取_dl_fini入edx,传给user
    # Jump to the user's entry point.\n\
    jmp *%edi\n\
    .previous\n\
");

结合crt1.o的分析,大家就很清楚控制和参数是如何被传递的。

注意扩展属性regparm(3)表示传参数时前三个参数使用%eax,%edx,%ecx寄存器,后面的参数仍然使用堆栈传
stdcall属性表示函数自己平栈,除非使用了可变参数(仍然由调用者平栈)

2.内核传递给ld.so的参数在堆栈中的形式如下

  position            content                     size (bytes) + comment
  ------------------------------------------------------------------------
  stack pointer ->  [ argc = number of args ]     4
                    [ argv[0] (pointer) ]         4   (program name)
                    [ argv[1] (pointer) ]         4
                    [ argv[..] (pointer) ]        4 * x
                    [ argv[n - 1] (pointer) ]     4
                    [ argv[n] (pointer) ]         4   (= NULL)

                    [ envp[0] (pointer) ]         4
                    [ envp[1] (pointer) ]         4
                    [ envp[..] (pointer) ]        4
                    [ envp[term] (pointer) ]      4   (= NULL)

                    [ auxv[0] AT_PHDR (Elf32_auxv_t) ]    8
                    [ auxv[1] AT_PHENT (Elf32_auxv_t) ]    8
                    [ auxv[2] AT_PHNUM (Elf32_auxv_t) ]   8
                    [ auxv[3] AT_BASE (Elf32_auxv_t) ]   8
                    [ auxv[4] AT_FLAGS (Elf32_auxv_t) ]   8
                    [ auxv[5] AT_ENTRY (Elf32_auxv_t) ]   8
                    [ auxv[6] AT_UID (Elf32_auxv_t) ]   8
                    [ auxv[7] AT_EUID (Elf32_auxv_t) ]   8
                    [ auxv[8] AT_GID (Elf32_auxv_t) ]   8
                    [ auxv[9] AT_EGID (Elf32_auxv_t) ]   8
                    [ auxv[10] AT_HWCAP (Elf32_auxv_t) ]   8
                    [ auxv[11] AT_PAGESZ (Elf32_auxv_t) ]   8
                    [ auxv[12] AT_CLKTCK (Elf32_auxv_t) ]   8
                    [ auxv[13] AT_PLATFORM (Elf32_auxv_t) ]   8
                    [ auxv[14] (Elf32_auxv_t) ] 8   (= AT_NULL vector)

                    [ padding ]                   0 - 15                  
                    [ padding ]                   16                   
                    [ padding ]                   0 - 15                   

                    [k_platform]                  0 - 65
                    [ argument ASCIIZ strings ]   >= 0
                    [ environment ASCIIZ str. ]   >= 0
                    [filename] >=0

  (0xbffffffc)      [ end marker ]                4   (= NULL)

  (0xc0000000)      < top of stack >              0   (virtual)

 

ld.so分析2

内核是如何执行程序的,本分析基于内核版本2.4.0

1.用户空间接口
man execve显示如下的函数原型

       execve - execute program

SYNOPSIS
       #include <unistd.h>

       int  execve(const  char  *filename,  char  *const  argv [], char *const
       envp[]);

2.glibc中实现

在glibc中,execve对应的文件是
sysdeps/unix/sysv/linux/execve.c

int
__execve (file, argv, envp)
     const char *file;
     char *const argv[];
     char *const envp[];
{
  /* If this is a threaded application kill all other threads.  */
  if (__pthread_kill_other_threads_np)
    __pthread_kill_other_threads_np ();
#if __BOUNDED_POINTERS__ //该宏未定义
  {
    char *const *v;
    int i;
    char *__unbounded *__unbounded ubp_argv;
    char *__unbounded *__unbounded ubp_envp;
    char *__unbounded *__unbounded ubp_v;

    for (v = argv; *v; v++)
      ;
    i = v - argv + 1;
    ubp_argv = (char *__unbounded *__unbounded) alloca (sizeof (*ubp_argv) * i);
    for (v = argv, ubp_v = ubp_argv; --i; v++, ubp_v++)
      *ubp_v = CHECK_STRING (*v);
    *ubp_v = 0;

    for (v = envp; *v; v++)
      ;
    i = v - envp + 1;
    ubp_envp = (char *__unbounded *__unbounded) alloca (sizeof (*ubp_envp) * i);
    for (v = envp, ubp_v = ubp_envp; --i; v++, ubp_v++)
      *ubp_v = CHECK_STRING (*v);
    *ubp_v = 0;

    return INLINE_SYSCALL (execve, 3, CHECK_STRING (file), ubp_argv, ubp_envp);
  }
#else
  return INLINE_SYSCALL (execve, 3, file, argv, envp);//所以这行有效
#endif
}

INLINE_SYSCALL的定义在
sysdeps/unix/sysv/linux/i386/sysdeps.h

#define INLINE_SYSCALL(name, nr, args...) \
  ({                                          \
    unsigned int resultvar;                              \
    asm volatile (                                  \
    LOADARGS_##nr                                  \
    "movl %1, %%eax\n\t"                              \
    "int $0x80\n\t"                                  \
    RESTOREARGS_##nr                                  \
    : "=a" (resultvar)                                  \
    : "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc");              \
    if (resultvar >= 0xfffff001)                          \
      {                                          \
    __set_errno (-resultvar);                          \
    resultvar = 0xffffffff;                              \
      }                                          \
    (int) resultvar; })

3.手工展开看看

  ({                                         
    unsigned int resultvar;                             
    asm volatile (                                 
    LOADARGS_3                                 
    "movl %1, %%eax\n\t"                             
    "int $0x80\n\t"                                 
    RESTOREARGS_3                                 
    : "=a" (resultvar)                                 
    : "i" (__NR_execve) ASMFMT_3(args) : "memory", "cc");             
    if (resultvar >= 0xfffff001)                         
      {                                         
    __set_errno (-resultvar);                         
    resultvar = 0xffffffff;                             
      }                                         
    (int) resultvar; })

其中__NR_execve是execve的系统调用号,为11,定义在头文件unistd.h中
这其中又涉及到三个宏

#define LOADARGS_1 \
    "bpushl .L__X'%k2, %k2\n\t"                              \
    "bmovl .L__X'%k2, %k2\n\t"
#define LOADARGS_3    LOADARGS_1

#define RESTOREARGS_1 \
    "bpopl .L__X'%k2, %k2\n\t"
#define RESTOREARGS_3    RESTOREARGS_1

#define ASMFMT_3(arg1, arg2, arg3) \
    , "aCD" (arg1), "c" (arg2), "d" (arg3)

展开
  ({                                         
    unsigned int resultvar;                             
    asm volatile (                                 
    "bpushl .L__X'%k2, %k2\n\t"                             
    "bmovl .L__X'%k2, %k2\n\t"   
    "movl %1, %%eax\n\t"                             
    "int $0x80\n\t"                                 
    "bpopl .L__X'%k2, %k2\n\t"
    : "=a" (resultvar)                                 
    : "i" (11) , "aCD" (arg1), "c" (arg2), "d" (arg3) : "memory", "cc");             
    if (resultvar >= 0xfffff001)                         
      {                                         
    __set_errno (-resultvar);                         
    resultvar = 0xffffffff;                             
      }                                         
    (int) resultvar; })

这里又涉及到三个asm宏,bpushl,bmovl,bpopl
定义如下(也在该文件sysdeps.h中)
asm (".L__X'%ebx = 1\n\t"
     ".L__X'%ecx = 2\n\t"
     ".L__X'%edx = 2\n\t"
     ".L__X'%eax = 3\n\t"
     ".L__X'%esi = 3\n\t"
     ".L__X'%edi = 3\n\t"
     ".L__X'%ebp = 3\n\t"
     ".L__X'%esp = 3\n\t"
     ".macro bpushl name reg\n\t"
     ".if 1 - \\name\n\t"
     ".if 2 - \\name\n\t"
     "pushl %ebx\n\t"
     ".else\n\t"
     "xchgl \\reg, %ebx\n\t"
     ".endif\n\t"
     ".endif\n\t"
     ".endm\n\t"
     ".macro bpopl name reg\n\t"
     ".if 1 - \\name\n\t"
     ".if 2 - \\name\n\t"
     "popl %ebx\n\t"
     ".else\n\t"
     "xchgl \\reg, %ebx\n\t"
     ".endif\n\t"
     ".endif\n\t"
     ".endm\n\t"
     ".macro bmovl name reg\n\t"
     ".if 1 - \\name\n\t"
     ".if 2 - \\name\n\t"
     "movl \\reg, %ebx\n\t"
     ".endif\n\t"
     ".endif\n\t"
     ".endm\n\t");

根据约束条件
%eax分配给resultvar
%ecx分配给argv
%edx分配给envp
则约束条件"aCD"中,a(%eax)已分配,C无效,因此分配%edi给file

手工展开
    mov     file,%edi
    mov     argv,%ecx
    mov     envp,%edx
    bpushl     .L__X'%edi, %edi
    bmovl     .L__X'·%edi, %%edi
    movl     11, %%eax
    int     $0x80
    bpopl     .L__X'%edi, %edi

手工展开
    mov     file,%edi
    mov     argv,%ecx
    mov     envp,%edx
    .if 1 - .L_X'%edi
    .if 2 - .L_X'%edi
    pushl %ebx
    .else
    xchgl %edi, %ebx
    .endif
    .endif
    .if 1 - .L_X'%edi
    .if 2 - .L_X'%edi
    movl %edi, %ebx
    .endif
    .endif
    movl     11, %%eax
    int     $0x80
    .if 1 - .L_X'%edi
    .if 2 - .L_X'%edi
    popl %ebx
    .else
    xchgl %edi, %ebx
    .endif
    .endif

由于L__X'%edi = 3,展开

    mov     file,%edi
    mov     argv,%ecx
    mov     envp,%edx
    .if 1 - 3
    .if 2 - 3
    pushl %ebx
    .else
    xchgl %edi, %ebx
    .endif
    .endif
    .if 1 - 3
    .if 2 - 3
    movl %edi, %ebx
    .endif
    .endif
    movl     11, %%eax
    int     $0x80
    .if 1 - 3
    .if 2 - 3
    popl %ebx
    .else
    xchgl %edi, %ebx
    .endif
    .endif

.if为真的条件是不等于0,展开

    mov     file,%edi
    mov     argv,%ecx
    mov     envp,%edx
    pushl %ebx
    movl %edi, %ebx
    movl     11, %%eax
    int     $0x80
    popl %ebx


最终编译结果是
mov    0x8(%ebp),%edi
mov    0xc(%ebp),%ecx
mov    0x10(%ebp),%edx
push   %ebx
mov    %edi,%ebx
mov    $0xb,%eax
int    $0x80
pop    %ebx

正好一致

系统调用传参使用%ebx,%ecx,%edx,%esi,%edi这五个寄存器,因此最多只能传五个参数.

4.返回值的处理
#  define __set_errno(val) (*__errno_location ()) = (val)

    if (resultvar >= 0xfffff001)//如果返回值>=0xfffff001,则出错                         
      {                                         
    __set_errno (-resultvar);// 预处理时被替换成(*__errno_location ()) = (-resultvar);设置errno为-resultvar         
    resultvar = 0xffffffff;    //-1                         
      }


__errno_location的定义是
sysdeps/generic/errno-loc.c

int * __errno_location (void)
{
  return &errno;
}


5.也可使用如下宏生成调用系统调用execve的代码
linux/include/asm-i386/unistd.h

#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
type name(type1 arg1,type2 arg2,type3 arg3) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \
    : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
          "d" ((long)(arg3))); \
__syscall_return(type,__res); \
}

例如
_syscall3(int,execve,const char *,file,char *const,argv[],char *const,envp[])
能生成和glibc相似的代码


6.sys_execve
linux/arch/i386/kernel/process.c

/*
* sys_execve() executes a new program.
*/
asmlinkage int sys_execve(struct pt_regs regs)
{
    int error;
    char * filename;

    filename = getname((char *) regs.ebx);
    error = PTR_ERR(filename);
    if (IS_ERR(filename))
        goto out;
    //do_execve成功替换掉执行影像后,在返回到用户空间时,执行权才交给新的影像
    error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
    if (error == 0)
        current->ptrace &= ~PT_DTRACE;//取消单步跟踪
    putname(filename);
out:
    return error;
}

7.do_execve(sys_execve->do_execve)
fs/exec.c

/*
* sys_execve() executes a new program.
*/
int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
{
    struct linux_binprm bprm;
    struct file *file;
    int retval;
    int i;

    file = open_exec(filename);

    retval = PTR_ERR(file);
    if (IS_ERR(file))
        return retval;

    bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);//参数最多占32个页面,最后一个字存放NULL
    memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));//清空页指针

    bprm.file = file;
    bprm.filename = filename;
    bprm.sh_bang = 0;
    bprm.loader = 0;
    bprm.exec = 0;
    //计算argv数组的长度,该数组是0结束
    if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
        allow_write_access(file);
        fput(file);
        return bprm.argc;
    }

    //计算envp数组的长度
    if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
        allow_write_access(file);
        fput(file);
        return bprm.envc;
    }

    retval = prepare_binprm(&bprm);
    if (retval < 0)
        goto out;

    retval = copy_strings_kernel(1, &bprm.filename, &bprm);//复制文件名
    if (retval < 0)
        goto out;

    bprm.exec = bprm.p;
    retval = copy_strings(bprm.envc, envp, &bprm);//复制envp
    if (retval < 0)
        goto out;

    retval = copy_strings(bprm.argc, argv, &bprm);//复制argv
    if (retval < 0)
        goto out;

    retval = search_binary_handler(&bprm,regs);
    if (retval >= 0)
        /* execve success */
        return retval;

out:
    /* Something went wrong, return the inode and free the argument pages*/
    allow_write_access(bprm.file);
    if (bprm.file)
        fput(bprm.file);

    for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
        struct page * page = bprm.page[i];
        if (page)
            __free_page(page);
    }

    return retval;
}

8.copy_strings(sys_execve->do_execve->copy_strings)
fs/exec.c

/*
* 'copy_strings()' copies argument/envelope strings from user
* memory to free pages in kernel mem. These are in a format ready
* to be put directly into the top of new user memory.
*/
//从用户空间拷贝数据到空闲页
int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
{
    while (argc-- > 0) {//argc--
        char *str;
        int len;
        unsigned long pos;
        //上面argc--
        if (get_user(str, argv+argc) || !str || !(len = strnlen_user(str, bprm->p)))
            return -EFAULT;
        if (bprm->p < len) //空间不够
            return -E2BIG;

        bprm->p -= len;//从后往前考
        /* XXX: add architecture specific overflow check here. */

        pos = bprm->p;
        while (len > 0) {
            char *kaddr;
            int i, new, err;
            struct page *page;
            int offset, bytes_to_copy;

            offset = pos % PAGE_SIZE;//页内偏移
            i = pos/PAGE_SIZE;//页号
            page = bprm->page[i];
            new = 0;
            if (!page) {
                page = alloc_page(GFP_HIGHUSER);
                bprm->page[i] = page;
                if (!page)
                    return -ENOMEM;
                new = 1;
            }
            kaddr = kmap(page);

            if (new && offset)//是新页,offset>0,清[0,offset)
                memset(kaddr, 0, offset);
            bytes_to_copy = PAGE_SIZE - offset;
            if (bytes_to_copy > len) {
                bytes_to_copy = len;
                if (new)//清[offset+len,PAGE_SIZE)
                    memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
            }
            err = copy_from_user(kaddr + offset, str, bytes_to_copy);
            kunmap(page);

            if (err)
                return -EFAULT;

            pos += bytes_to_copy;//可能跨页
            str += bytes_to_copy;
            len -= bytes_to_copy;
        }
    }
    return 0;
}

执行到这里bprm->p内存空间布局如下
                    [ argument ASCIIZ strings ]   >= 0
                    [ environment ASCIIZ str. ]   >= 0
                    [filename]
  (0xbffffffc)      [ end marker ]                4   (= NULL)

  (0xc0000000)      < top of stack >              0   (virtual)

写一个程序验证一下
系统redhat7.2
[root@proxy ~]# uname -a
Linux proxy 2.4.7-10smp #1 SMP Thu Sep 6 17:09:31 EDT 2001 i686 unknown
[root@proxy ~]#
root@proxy ~]# cat 1.c
#include <stdio.h>

int main(int argc,char * argv[],char * envp[])
{
unsigned char * p;
printf("%d,%p,%p\n",argc,argv,envp);
p=(unsigned char *)argv;
for(;p<(unsigned char *)0xc0000000;p++)
if(isprint(*p))
        printf("%c",*p);
else
        printf("\\%x",*p);
return 0;
}

[root@proxy ~]# ./a.out
1,0xbffffb04,0xbffffb0c
\3\fc\ff\bf\0\0\0\0\b\fc\ff\bf\15\fc\ff\bf$\fc\ff\bf<\fc\ff\bf^\fc\ff\bfj\fc\ff\bft\fc\ff\bf7\fe\ff\bfV\fe\ff\bfp\fe\ff\bf\85\fe\ff\bf\9c\fe\ff\bf\a7\fe\ff\bf\b4\fe\ff\bf\bc\fe\ff\bf\cc\fe\ff\bf\da\fe\ff\bf\e8\fe\ff\bf\f9\fe\ff\bf\7\ff\ff\bf\12\ff\ff\bf\1d\ff\ff\bfI\ff\ff\bf|\ff\ff\bf\d7\ff\ff\bf\ea\ff\ff\bf\0\0\0\0\10\0\0\0\ff\fb\83\3\6\0\0\0\0\10\0\0\11\0\0\0d\0\0\0\3\0\0\04\80\4\8\4\0\0\0 \0\0\0\5\0\0\0\6\0\0\0\7\0\0\0\0\0\0@\8\0\0\0\0\0\0\0\9\0\0\0\90\83\4\8\b\0\0\0\0\0\0\0\c\0\0\0\0\0\0\0\d\0\0\0\0\0\0\0\e\0\0\0\0\0\0\0\f\0\0\0\fe\fb\ff\bf\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0i686\0./a.out\0PWD=/root\0HOSTNAME=proxy\0QTDIR=/usr/lib/qt-2.3.1\0LESSOPEN=|/usr/bin/lesspipe.sh %s\0KDEDIR=/usr\0USER=root\0LS_COLORS=no=00:fi=00:di=01;34:ln=01;36:pi=40;33:so=01;35:bd=40;33;01:cd=40;33;01:or=01;05;37;41:mi=01;05;37;41:ex=01;32:*.cmd=01;32:*.exe=01;32:*.com=01;32:*.btm=01;32:*.bat=01;32:*.sh=01;32:*.csh=01;32:*.tar=01;31:*.tgz=01;31:*.arj=01;31:*.taz=01;31:*.lzh=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.gz=01;31:*.bz2=01;31:*.bz=01;31:*.tz=01;31:*.rpm=01;31:*.cpio=01;31:*.jpg=01;35:*.gif=01;35:*.bmp=01;35:*.xbm=01;35:*.xpm=01;35:*.png=01;35:*.tif=01;35:\0MACHTYPE=i386-redhat-linux-gnu\0MAIL=/var/spool/mail/root\0INPUTRC=/etc/inputrc\0BASH_ENV=/root/.bashrc\0LANG=en_US\0LOGNAME=root\0SHLVL=1\0SHELL=/bin/bash\0USERNAME=root\0HOSTTYPE=i386\0OSTYPE=linux-gnu\0HISTSIZE=1000\0HOME=/root\0TERM=linux\0SSH_AUTH_SOCK=/tmp/ssh-XXi40Qtw/agent.23262\0SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass\0PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/X11R6/bin:/root/bin\0SSH_TTY=/dev/pts/0\0_=./a.out\0./a.out\0\0\0\0\0



9.search_binary_handler(sys_execve->do_execve->search_binary_handler)
fs/exec.c

/*
* cycle the list of binary formats handler, until one recognizes the image
*/
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
    int try,retval=0;
    struct linux_binfmt *fmt;
#ifdef __alpha__
    /* handle /sbin/loader.. */
    {
        struct exec * eh = (struct exec *) bprm->buf;

        if (!bprm->loader && eh->fh.f_magic == 0x183 &&
        (eh->fh.f_flags & 0x3000) == 0x3000)
        {
        char * dynloader[] = { "/sbin/loader" };
        struct file * file;
        unsigned long loader;

        allow_write_access(bprm->file);
        fput(bprm->file);
        bprm->file = NULL;

            loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);

        file = open_exec(dynloader[0]);
        retval = PTR_ERR(file);
        if (IS_ERR(file))
            return retval;
        bprm->file = file;
        bprm->loader = loader;
        retval = prepare_binprm(bprm);
        if (retval<0)
            return retval;
        /* should call search_binary_handler recursively here,
           but it does not matter */
        }
    }
#endif
    for (try=0; try<2; try++) {
        read_lock(&binfmt_lock);
        for (fmt = formats ; fmt ; fmt = fmt->next) {
            int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
            if (!fn)
                continue;
            if (!try_inc_mod_count(fmt->module))
                continue;
            read_unlock(&binfmt_lock);
            retval = fn(bprm, regs);//调用该文件格式的load_binary
            if (retval >= 0) {//成功
                put_binfmt(fmt);
                allow_write_access(bprm->file);//allow write
                if (bprm->file)
                    fput(bprm->file);
                bprm->file = NULL;
                current->did_exec = 1;//可以执行了
                return retval;
            }
            read_lock(&binfmt_lock);
            put_binfmt(fmt);
            if (retval != -ENOEXEC)
                break;
            if (!bprm->file) {
                read_unlock(&binfmt_lock);
                return retval;
            }
        }
        read_unlock(&binfmt_lock);
        if (retval != -ENOEXEC) {
            break;
#ifdef CONFIG_KMOD
        }else{
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
            char modname[20];
            if (printable(bprm->buf[0]) &&
                printable(bprm->buf[1]) &&
                printable(bprm->buf[2]) &&
                printable(bprm->buf[3]))
                break; /* -ENOEXEC 不允许都是可打印字符*/
            sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
            request_module(modname);
#endif
        }
    }
    return retval;
}

elf文件的相关处理结构在fs/binfmt_elf.c中

static int __init init_elf_binfmt(void)
{
    return register_binfmt(&elf_format);
}

static struct linux_binfmt elf_format = {
    NULL, THIS_MODULE, load_elf_binary, load_elf_library, elf_core_dump, ELF_EXEC_PAGESIZE
};

因此elf的load_binary函数是load_elf_binary

ld.so分析3

内核中load_elf_binary如何执行

1.load_elf_binary
fs/binfmt_elf.c

/*
* These are the functions used to load ELF style executables and shared
* libraries.  There is no binary dependent code anywhere else.
*/

#define INTERPRETER_NONE 0
#define INTERPRETER_AOUT 1
#define INTERPRETER_ELF 2


static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
    struct file *interpreter = NULL; /* to shut gcc up */
     unsigned long load_addr = 0, load_bias;
    int load_addr_set = 0;
    char * elf_interpreter = NULL;
    unsigned int interpreter_type = INTERPRETER_NONE;
    unsigned char ibcs2_interpreter = 0;
    mm_segment_t old_fs;
    unsigned long error;
    struct elf_phdr * elf_ppnt, *elf_phdata;
    unsigned long elf_bss, k, elf_brk;
    int elf_exec_fileno;
    int retval, size, i;
    unsigned long elf_entry, interp_load_addr = 0;
    unsigned long start_code, end_code, start_data, end_data;
    struct elfhdr elf_ex;
    struct elfhdr interp_elf_ex;
      struct exec interp_ex;
    char passed_fileno[6];

    /* Get the exec-header */
    elf_ex = *((struct elfhdr *) bprm->buf);

    retval = -ENOEXEC;
    /* First of all, some simple consistency checks */
    //检查magic
    if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
        goto out;

    //既非可执行文件又非动态链接库,动态链接库也可直接执行
    if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN)
        goto out;
    if (!elf_check_arch(&elf_ex))//体系结构检查
        goto out;
    if (!bprm->file->f_op||!bprm->file->f_op->mmap)//不能mmap,error
        goto out;

    /* Now read in all of the header information */

    retval = -ENOMEM;
    /*
    typedef struct elf32_hdr{
  unsigned char    e_ident[EI_NIDENT];
  Elf32_Half    e_type;
  Elf32_Half    e_machine;
  Elf32_Word    e_version;
  Elf32_Addr    e_entry; 
  Elf32_Off    e_phoff;
  Elf32_Off    e_shoff;
  Elf32_Word    e_flags;
  Elf32_Half    e_ehsize;
  Elf32_Half    e_phentsize;
  Elf32_Half    e_phnum;
  Elf32_Half    e_shentsize;
  Elf32_Half    e_shnum;
  Elf32_Half    e_shstrndx;
} Elf32_Ehdr;
    */
    //e_phentsize 该成员保存着在文件的程序头表(program header table)
    //中一个入口的大小(以字节计数)。所有的入口都是同样的大小。
   
    //e_phnum 该成员保存着在程序头表中入口的个数。因此,e_phentsize和e_phnum
    //的乘机就是表的大小(以字节计数).假如没有程序头表(program header table),
    //e_phnum变量为0。
    size = elf_ex.e_phentsize * elf_ex.e_phnum;
    if (size > 65536)
        goto out;
    elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
    if (!elf_phdata)
        goto out;

    //读入 program headers
    retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
    if (retval < 0)
        goto out_free_ph;

    retval = get_unused_fd();
    if (retval < 0)
        goto out_free_ph;
    get_file(bprm->file);
    //保存原始打开文件
    fd_install(elf_exec_fileno = retval, bprm->file);//flush old exec不会关闭

    elf_ppnt = elf_phdata;//program headers
    elf_bss = 0;
    elf_brk = 0;

    start_code = ~0UL;// -1
    end_code = 0;
    start_data = 0;
    end_data = 0;

    for (i = 0; i < elf_ex.e_phnum; i++) {//处理每一个program headers,寻找PT_INTERP
        if (elf_ppnt->p_type == PT_INTERP) {
            retval = -EINVAL;
              if (elf_interpreter)//已经有interpreter
                goto out_free_dentry;

            /* This is the program interpreter used for
             * shared libraries - for now assume that this
             * is an a.out format binary
             */
            /*
typedef struct elf32_phdr{
  Elf32_Word    p_type;
  Elf32_Off    p_offset;
  Elf32_Addr    p_vaddr;
  Elf32_Addr    p_paddr;
  Elf32_Word    p_filesz;
  Elf32_Word    p_memsz;
  Elf32_Word    p_flags;
  Elf32_Word    p_align;
} Elf32_Phdr;
            */
            retval = -ENOMEM;
            elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
                               GFP_KERNEL);
            if (!elf_interpreter)
                goto out_free_file;

            retval = kernel_read(bprm->file, elf_ppnt->p_offset,
                       elf_interpreter,
                       elf_ppnt->p_filesz);//读入interp
            if (retval < 0)
                goto out_free_interp;
            /* If the program interpreter is one of these two,
             * then assume an iBCS2 image. Otherwise assume
             * a native linux image.
             redhat 7.2 中是 /lib/ld-linux.so.2
             */
            if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
                strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
                ibcs2_interpreter = 1;
#if 0
            printk("Using ELF interpreter %s\n", elf_interpreter);
#endif
#ifdef __sparc__
            if (ibcs2_interpreter) {
                unsigned long old_pers = current->personality;
                struct exec_domain *old_domain = current->exec_domain;
                struct exec_domain *new_domain;
                struct fs_struct *old_fs = current->fs, *new_fs;
                get_exec_domain(old_domain);
                atomic_inc(&old_fs->count);

                set_personality(PER_SVR4);
                interpreter = open_exec(elf_interpreter);

                new_domain = current->exec_domain;
                new_fs = current->fs;
                current->personality = old_pers;
                current->exec_domain = old_domain;
                current->fs = old_fs;
                put_exec_domain(new_domain);
                put_fs_struct(new_fs);
            } else
#endif
            {
                interpreter = open_exec(elf_interpreter);//打开/lib/ld-linux.so.2
            }
            retval = PTR_ERR(interpreter);
            if (IS_ERR(interpreter))
                goto out_free_interp;
            retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);//读入头部
            if (retval < 0)
                goto out_free_dentry;

            /* Get the exec headers */
            interp_ex = *((struct exec *) bprm->buf);//可能是a.out
            interp_elf_ex = *((struct elfhdr *) bprm->buf);//可能是elf
        }
        elf_ppnt++;
    }

    /* Some simple consistency checks for the interpreter */
    if (elf_interpreter) {//有interp,执行/lib/ld-linux.so.2时,没有,或静态链接的可执行文件也没有
        interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;

        /* Now figure out which format our binary is */
        if ((N_MAGIC(interp_ex) != OMAGIC) &&
            (N_MAGIC(interp_ex) != ZMAGIC) &&
            (N_MAGIC(interp_ex) != QMAGIC))
            interpreter_type = INTERPRETER_ELF;//是interp elf

        if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
            interpreter_type &= ~INTERPRETER_ELF;//是interp aout

        retval = -ELIBBAD;
        if (!interpreter_type)
            goto out_free_dentry;

        /* Make sure only one type was selected */
        if ((interpreter_type & INTERPRETER_ELF) &&
             interpreter_type != INTERPRETER_ELF) {
            printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
            interpreter_type = INTERPRETER_ELF;
        }
    }

    /* OK, we are done with that, now set up the arg stuff,
       and then start this sucker up */

    if (!bprm->sh_bang) {
        char * passed_p;

        if (interpreter_type == INTERPRETER_AOUT) {//a.out
          sprintf(passed_fileno, "%d", elf_exec_fileno);//原始打开文件号
          passed_p = passed_fileno;

          if (elf_interpreter) {//interp文件名
            retval = copy_strings_kernel(1,&passed_p,bprm);
            if (retval)
                goto out_free_dentry;
            bprm->argc++;//打开文件号作为参数
          }
        }
    }

    /* Flush all traces of the currently running executable */
    retval = flush_old_exec(bprm);//清除旧的执行影像
    if (retval)
        goto out_free_dentry;

    /* OK, This is the point of no return */
    current->mm->start_data = 0;
    current->mm->end_data = 0;
    current->mm->end_code = 0;
    current->mm->mmap = NULL;
    current->flags &= ~PF_FORKNOEXEC;
    elf_entry = (unsigned long) elf_ex.e_entry;//原文件代码入口

    /* Do this immediately, since STACK_TOP as used in setup_arg_pages
       may depend on the personality.  */
    SET_PERSONALITY(elf_ex, ibcs2_interpreter);

    /* Do this so that we can load the interpreter, if need be.  We will
       change some of these later */
    current->mm->rss = 0;
    setup_arg_pages(bprm); /* XXX: check error */

2.setup_arg_pages
load_elf_binayr->setup_arg_page
fs/exec.c

//把arg pages页和进程挂钩
int setup_arg_pages(struct linux_binprm *bprm)
{
    unsigned long stack_base;
    struct vm_area_struct *mpnt;
    int i;

    stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;//堆栈基址

    bprm->p += stack_base;//变换成地址
    if (bprm->loader)
        bprm->loader += stack_base;
    bprm->exec += stack_base;

    mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);//为堆栈段分配vm_area_struct结构
    if (!mpnt)
        return -ENOMEM;
   
    down(&current->mm->mmap_sem);
    {
        mpnt->vm_mm = current->mm;
        mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;//下对齐
        mpnt->vm_end = STACK_TOP;
        mpnt->vm_page_prot = PAGE_COPY;
        mpnt->vm_flags = VM_STACK_FLAGS;
        mpnt->vm_ops = NULL;
        mpnt->vm_pgoff = 0;
        mpnt->vm_file = NULL;
        mpnt->vm_private_data = (void *) 0;
        insert_vm_struct(current->mm, mpnt);
        current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
    }

    for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
        struct page *page = bprm->page[i];
        if (page) {
            bprm->page[i] = NULL;
            current->mm->rss++;//驻内页
            put_dirty_page(current,page,stack_base);//该页挂入进程空间
        }
        stack_base += PAGE_SIZE;
    }
    up(&current->mm->mmap_sem);
   
    return 0;
}


3.返回到load_elf_binary

    current->mm->start_stack = bprm->p;

    /* Try and get dynamic programs out of the way of the default mmap
       base, as well as whatever program they might try to exec.  This
       is because the brk will follow the loader, and is not movable.  */

    //普通可执行文件load_bias=0;动态链接库load_bias=0x8000 0000,即2G处(单独执行时,给ld-linux.so.2让路)
/*
例如/lib/ld-2.3.2.so执行时的maps如下
[root@mail /proc/30019]# cat maps
80000000-80015000 r-xp 00000000 08:01 272070     /lib/ld-2.3.2.so
80015000-80016000 rw-p 00014000 08:01 272070     /lib/ld-2.3.2.so
bfffe000-c0000000 rwxp fffff000 00:00 0
/lib/libc-2.3.2.so执行时的maps如下
[root@mail /proc/30097]# cat /proc/14541/maps 
40000000-40015000 r-xp 00000000 08:01 272070     /lib/ld-2.3.2.so
40015000-40016000 rw-p 00014000 08:01 272070     /lib/ld-2.3.2.so
80000000-80133000 r-xp 00000000 08:01 272077     /lib/libc-2.3.2.so
80133000-80137000 rw-p 00132000 08:01 272077     /lib/libc-2.3.2.so
80137000-80139000 rwxp 00000000 00:00 0
bfffe000-c0000000 rwxp fffff000 00:00 0
*/
    load_bias = ELF_PAGESTART(elf_ex.e_type==ET_DYN ? ELF_ET_DYN_BASE : 0);

    /* Now we do a little grungy work by mmaping the ELF image into
       the correct location in memory.  At this point, we assume that
       the image should be loaded at fixed address, not at a variable
       address. */

    old_fs = get_fs();
    set_fs(get_ds());
    for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
        //处理每一个program headers
/*
typedef struct elf32_phdr{
  Elf32_Word    p_type;
  Elf32_Off    p_offset;//该成员给出了该段的驻留位置相对于文件开始处的偏移。
  Elf32_Addr    p_vaddr;//该成员给出了该段在内存中的首字节地址。(连接器推荐的加载基址)
  Elf32_Addr    p_paddr;
  Elf32_Word    p_filesz;//该成员给出了文件映像中该段的字节数;它可能是 0 。
  Elf32_Word    p_memsz;//该成员给出了内存映像中该段的字节数;它可能是 0 。
  Elf32_Word    p_flags;//该成员给出了和该段相关的标志。定义的标志值如下所述。
  Elf32_Word    p_align;
} Elf32_Phdr;
*/
        int elf_prot = 0, elf_flags;
        unsigned long vaddr;

        if (elf_ppnt->p_type != PT_LOAD)//必须是PT_LOAD
            continue;

        if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
        if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
        if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;

        elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;

        vaddr = elf_ppnt->p_vaddr;
        if (elf_ex.e_type == ET_EXEC || load_addr_set) {
            //是可执行文件或者起始加载地址已设置
            elf_flags |= MAP_FIXED;
        }
        error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
        //不判断出错 ???
       

/*
elf文件有两种视图,一种节表,是从程序编译连接的角度看。一种是程序头,是从程序执行的角度看。

举例看看这两种视图的关系

[root@mail /proc/30097]# readelf -l /bin/ls

Elf file type is EXEC (Executable file)
Entry point 0x8049690
There are 7 program headers, starting at offset 52

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  PHDR           0x000034 0x08048034 0x08048034 0x000e0 0x000e0 R E 0x4
  INTERP         0x000114 0x08048114 0x08048114 0x00013 0x00013 R   0x1
      [Requesting program interpreter: /lib/ld-linux.so.2]
  LOAD           0x000000 0x08048000 0x08048000 0x0fa98 0x0fa98 R E 0x1000
  LOAD           0x010000 0x08058000 0x08058000 0x00348 0x006c8 RW  0x1000
  DYNAMIC        0x010114 0x08058114 0x08058114 0x000d0 0x000d0 RW  0x4
  NOTE           0x000128 0x08048128 0x08048128 0x00020 0x00020 R   0x4
  GNU_EH_FRAME   0x00f960 0x08057960 0x08057960 0x0002c 0x0002c R   0x4

Section to Segment mapping:
  Segment Sections...
   00    
   01     .interp
   02     .interp .note.ABI-tag .hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata

.eh_frame_hdr .eh_frame
   03     .data .dynamic .ctors .dtors .jcr .got .bss
   04     .dynamic
   05     .note.ABI-tag
   06     .eh_frame_hdr

注意下面的Section to Segment mapping,说明了每个程序头包含了哪些节。我们关心的是代码段程序头和数据段程序头。
  LOAD           0x000000 0x08048000 0x08048000 0x0fa98 0x0fa98 R E 0x1000
  LOAD           0x010000 0x08058000 0x08058000 0x00348 0x006c8 RW  0x1000
分别对应
   02     .interp .note.ABI-tag .hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata

.eh_frame_hdr .eh_frame
   03     .data .dynamic .ctors .dtors .jcr .got .bss

代码段开始文件地址是0,开始虚拟地址是0x8048000,文件大小是0xfa98,内存大小是0xfa98,flag是可读可执行,对齐大小是4k
数据段开始文件地址是0x10000,开始虚拟地址0x8058000,文件大小是0x348,内存大小是0x6c8,flag是可读可写,对齐大小是4

k

下面列出节表
[zws@mail /proc/1]$readelf -S /bin/ls
There are 26 section headers, starting at offset 0x10444:

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .interp           PROGBITS        08048114 000114 000013 00   A  0   0  1
  [ 2] .note.ABI-tag     NOTE            08048128 000128 000020 00   A  0   0  4
  [ 3] .hash             HASH            08048148 000148 00028c 04   A  4   0  4
  [ 4] .dynsym           DYNSYM          080483d4 0003d4 0005e0 10   A  5   1  4
  [ 5] .dynstr           STRTAB          080489b4 0009b4 0003ea 00   A  0   0  1
  [ 6] .gnu.version      VERSYM          08048d9e 000d9e 0000bc 02   A  4   0  2
  [ 7] .gnu.version_r    VERNEED         08048e5c 000e5c 000070 00   A  5   1  4
  [ 8] .rel.dyn          REL             08048ecc 000ecc 000028 08   A  4   0  4
  [ 9] .rel.plt          REL             08048ef4 000ef4 000278 08   A  4  11  4
  [10] .init             PROGBITS        0804916c 00116c 000017 00  AX  0   0  4
  [11] .plt              PROGBITS        08049184 001184 000500 04  AX  0   0  4
  [12] .text             PROGBITS        08049690 001690 00ab4c 00  AX  0   0 16
  [13] .fini             PROGBITS        080541dc 00c1dc 00001b 00  AX  0   0  4
  [14] .rodata           PROGBITS        08054200 00c200 003760 00   A  0   0 32
  [15] .eh_frame_hdr     PROGBITS        08057960 00f960 00002c 00   A  0   0  4
  [16] .eh_frame         PROGBITS        0805798c 00f98c 00010c 00   A  0   0  4
  [17] .data             PROGBITS        08058000 010000 000114 00  WA  0   0 32
  [18] .dynamic          DYNAMIC         08058114 010114 0000d0 08  WA  5   0  4
  [19] .ctors            PROGBITS        080581e4 0101e4 000008 00  WA  0   0  4
  [20] .dtors            PROGBITS        080581ec 0101ec 000008 00  WA  0   0  4
  [21] .jcr              PROGBITS        080581f4 0101f4 000004 00  WA  0   0  4
  [22] .got              PROGBITS        080581f8 0101f8 000150 04  WA  0   0  4
  [23] .bss              NOBITS          08058360 010360 000368 00  WA  0   0 32
  [24] .gnu_debuglink    PROGBITS        00000000 010360 000010 00      0   0  4
  [25] .shstrtab         STRTAB          00000000 010370 0000d2 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

从开始文件地址和内存大小可看出
代码段包含的节从[0]到[16],数据段包含的节从[17]到[23],和前面的显示正好一致。

由于内存映射以页为单位,映射起始地址向下对齐到页边界,映射大小向上对齐到页边界,因此在进行内存映射的时候,
代码段映射关系是虚拟地址[0x8048000,0x8048000+0x10000)->文件偏移[0,0x10000)
数据段映射关系是虚拟地址[0x8058000,0x1000)->文件偏移[0x10000,0x10000+0x1000)

/bin/ls的文件大小是小于0x11000的,数据的映射超出了,不过没有关系,超出的部分会被当做零页分配。可见这个文件都被映

射了。显然0x8048000就是这个文件镜像加载的起始地址,这个地址后面有用。

我还注意到[23].bss节type是NOBITS,说明文件中没有对应内容,这从[24].gnu_debuglink的文件偏移和[23].bss的文件偏移相

等侧面证明。但是它却是有大小的,0x368,Flag是WA,说明可写且需要分配。

bss是未初始化节,程序中的未初始化变量都放在这个节中,由于未初始化变量的值默认都为0,因此也就不再文件中为其分配空

间了。但是到了内存中就不同了,必须为其分配空间,且清0。这一点后面还会谈到。

还有就是程序中的常量被放在.rodata节中,常量只能读,不能写,代码段是可读,可执行,因此.rodata节被插入在代码段中,没

有创建额外的程序头了。

可执行文件的一般从0x08000000(512M)开始编址,可执行文件加载时不重定位。
动态链接库的一般从0开始编址.动态链接库加载时重定位,重定位地址从0x40000000(1G)开始。

映射的时候,代码段只读映射该页,而数据段COW映射该页.因此虽然上面代码段和数据段是连续的,但是页属性是不同的。


代码段和数据段在文件中的映射可能有重叠,例如
[zws@mail ~]$ readelf -l /lib/libc-2.3.2.so

Elf file type is DYN (Shared object file)
Entry point 0x159d0
There are 7 program headers, starting at offset 52

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  PHDR           0x000034 0x00000034 0x00000034 0x000e0 0x000e0 R E 0x4
  INTERP         0x1312f0 0x001312f0 0x001312f0 0x00013 0x00013 R   0x1
      [Requesting program interpreter: /lib/ld-linux.so.2]
  LOAD           0x000000 0x00000000 0x00000000 0x132804 0x132804 R E 0x1000
  LOAD           0x132820 0x00133820 0x00133820 0x02c90 0x056c4 RW  0x1000
  DYNAMIC        0x1350d4 0x001360d4 0x001360d4 0x000d8 0x000d8 RW  0x4
  NOTE           0x000114 0x00000114 0x00000114 0x00020 0x00020 R   0x4
  GNU_EH_FRAME   0x131304 0x00131304 0x00131304 0x0032c 0x0032c R   0x4

假设实际加载地址是x
代码段映射关系是虚拟地址[x+0,x+0+0x133000)->文件偏移[0,0x133000)
数据段映射关系是虚拟地址[x+0x133000,x+0x133000+0x6000)->文件偏移[0x132000,0x132000+0x3000)+零页[0x132000+

0x3000,0x133000+0x6000)

文件偏移有一页重叠,但这种重叠不会引起冲突和访问错误.

总之,ELF文件的节和程序头关系紧密,其中暗藏玄机,值得细细揣摩。

*/

4.计算start_code,end_code等


        if (!load_addr_set) {
            load_addr_set = 1;
            //load_addr 计算整个镜像加载基址
            /*
                    load_bias   load_addr   
            ET_EXEC        0x00000000  0x08048000  /bin/ls
             ET_DYN     0x80000000  0x80000000   /lib/ld-linux.so.2
            */
            load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);//链接器推荐的镜像加载基址,对于可执

行文件就是该地址,对于动态链接库,一般是0
            if (elf_ex.e_type == ET_DYN) {//load_bias是0x80000000
                load_bias += error -//动态链接库实际加载地址-内核推荐的加载地址=加载偏移(一般

是0)
                             ELF_PAGESTART(load_bias + vaddr);
                load_addr += error;//一般load_addr==error
            }
        }
        k = elf_ppnt->p_vaddr;//通常代码段和数据段紧挨在一起,代码段在前,数据段在后
        if (k < start_code) start_code = k;//start_code 的初值为0xffffffff,定位代码段开始地址
        if (start_data < k) start_data = k;//start_data初值为0,定位数据段开始

        k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;//file size,不是mem size

        if (k > elf_bss)//elf_bss初值为0
            elf_bss = k;//计算bss起始地址
        if ((elf_ppnt->p_flags & PF_X) && end_code <  k)
            end_code = k;//代码段结束地址
        if (end_data < k)
            end_data = k;//数据段结束地址
        k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;//mem size
        if (k > elf_brk)
            elf_brk = k;
        //[elf_bss,elf_brk)之间是bss section
    }
    set_fs(old_fs);

    //全部重定位
    elf_entry += load_bias;
    elf_bss += load_bias;//bss为初始化段开始
    elf_brk += load_bias;//brk动态内存分配起始地址
    start_code += load_bias;
    end_code += load_bias;
    start_data += load_bias;
    end_data += load_bias;

    if (elf_interpreter) {
        //elf_entry被覆盖
        if (interpreter_type == INTERPRETER_AOUT)
            elf_entry = load_aout_interp(&interp_ex,
                             interpreter);
        else
            elf_entry = load_elf_interp(&interp_elf_ex,
                            interpreter,
                            &interp_load_addr);

5.load_elf_interp


/* This is much more generalized than the library routine read function,
   so we keep this separate.  Technically the library read function
   is only provided so that we can read a.out libraries that have
   an ELF header */

static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
                     struct file * interpreter,
                     unsigned long *interp_load_addr)
{
    struct elf_phdr *elf_phdata;
    struct elf_phdr *eppnt;
    unsigned long load_addr = 0;
    int load_addr_set = 0;
    unsigned long last_bss = 0, elf_bss = 0;
    unsigned long error = ~0UL;
    int retval, i, size;

    /* First of all, some simple consistency checks */
    if (interp_elf_ex->e_type != ET_EXEC &&
        interp_elf_ex->e_type != ET_DYN)
        goto out;
    if (!elf_check_arch(interp_elf_ex))
        goto out;
    if (!interpreter->f_op || !interpreter->f_op->mmap)
        goto out;

    /*
     * If the size of this structure has changed, then punt, since
     * we will be doing the wrong thing.
     */
    if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
        goto out;

    /* Now read in all of the header information */

    size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
    if (size > ELF_MIN_ALIGN)
        goto out;
    elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
    if (!elf_phdata)
        goto out;

    //读入interp的prgoram header
    retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
    error = retval;
    if (retval < 0)
        goto out_close;

    eppnt = elf_phdata;
    for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
      if (eppnt->p_type == PT_LOAD) {
        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
        int elf_prot = 0;
        unsigned long vaddr = 0;
        unsigned long k, map_addr;

        if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
        if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
        if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
        vaddr = eppnt->p_vaddr;
        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
            elf_type |= MAP_FIXED;
/*
readelf -l /lib/ld-linux.so.2

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x000000 0x00000000 0x00000000 0x15229 0x15229 R E 0x1000
  LOAD           0x015240 0x00016240 0x00016240 0x00300 0x00758 RW  0x1000
  DYNAMIC        0x015490 0x00016490 0x00016490 0x000b0 0x000b0 RW  0x4

load_addr + vaddr=0,mmap将从1G处开始处映射
*/

        map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);

        if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
        //只计算ET_DYN
        load_addr = map_addr - ELF_PAGESTART(vaddr);
        load_addr_set = 1;
        }

        /*
         * Find the end of the file mapping for this phdr, and keep
         * track of the largest address we see for this.
         */
        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
        if (k > elf_bss)//elf bss
        elf_bss = k;

        /*
         * Do the same thing for the memory mapping - between
         * elf_bss and last_bss is the bss section.
         [elf_bss,last_bss)是bss section
         */
        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
        if (k > last_bss)//last bss
        last_bss = k;
      }
    }

    /* Now use mmap to map the library into memory. */

    /*
     * Now fill out the bss section.  First pad the last page up
     * to the page boundary, and then perform a mmap to make sure
     * that there are zero-mapped pages up to and including the
     * last bss page.
     */
    padzero(elf_bss);//清bss
    elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);    /* What we have mapped so far */

    /* Map the last of the bss segment */
    if (last_bss > elf_bss)//未映射到文件的部分,分配0页
        do_brk(elf_bss, last_bss - elf_bss);//匿名映射[addr,addr+len)

    *interp_load_addr = load_addr;//镜像加载基址
    error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;//入口地址

out_close:
    kfree(elf_phdata);
out:
    return error;
}


6.返回load_elf_binary

        allow_write_access(interpreter);
        fput(interpreter);
        kfree(elf_interpreter);

        if (elf_entry == ~0UL) {
            printk(KERN_ERR "Unable to load interpreter\n");
            kfree(elf_phdata);
            send_sig(SIGSEGV, current, 0);
            return 0;
        }
    }

    kfree(elf_phdata);

    if (interpreter_type != INTERPRETER_AOUT)
        sys_close(elf_exec_fileno);//ELF不需要

    set_binfmt(&elf_format);

    compute_creds(bprm);
    current->flags &= ~PF_FORKNOEXEC;
    //注意bprm->p被更新,指向argc地址
    bprm->p = (unsigned long)
      create_elf_tables((char *)bprm->p,
            bprm->argc,
            bprm->envc,
            (interpreter_type == INTERPRETER_ELF ? &elf_ex : NULL),
            load_addr, load_bias,
            interp_load_addr,
            (interpreter_type == INTERPRETER_AOUT ? 0 : 1));

7.create_elf_tables


static elf_addr_t *
create_elf_tables(char *p, int argc, int envc,
          struct elfhdr * exec,
          unsigned long load_addr,
          unsigned long load_bias,
          unsigned long interp_load_addr, int ibcs)
{
    elf_caddr_t *argv;
    elf_caddr_t *envp;
    elf_addr_t *sp, *csp;
    char *k_platform, *u_platform;
    long hwcap;
    size_t platform_len = 0;

    /*
     * Get hold of platform and hardware capabilities masks for
     * the machine we are running on.  In some cases (Sparc),
     * this info is impossible to get, in others (i386) it is
     * merely difficult.
     */

    hwcap = ELF_HWCAP;//CPU特性描述字
    k_platform = ELF_PLATFORM;//CPU类型名 例如i686

    //p指向argv字符串首地址 即下面的0xbffffc07处
/*
0xbffffc02:      "i686"
0xbffffc07:      "/root/3/88"
0xbffffc12:      "PWD=/root"
0xbffffc1c:      "HOSTNAME=proxy"
0xbffffc2b:      "QTDIR=/usr/lib/qt-2.3.1"
0xbffffc43:      "LESSOPEN=|/usr/bin/lesspipe.sh %s"
0xbffffc65:      "KDEDIR=/usr"
*/
    if (k_platform) {//一般不为空
        platform_len = strlen(k_platform) + 1;
        u_platform = p - platform_len;
        __copy_to_user(u_platform, k_platform, platform_len);//i686
    } else
        u_platform = p;

    /*
     * Force 16 byte _final_ alignment here for generality.
     * Leave an extra 16 bytes free so that on the PowerPC we
     * can move the aux table up to start on a 16-byte boundary.
     */
     //向低地址方向对齐到16字节边界,再减16字节
                //较新的内核使用一个随机数,因此布局有所不同
    sp = (elf_addr_t *)((~15UL & (unsigned long)(u_platform)) - 16UL);
    csp = sp;
    //DLINFO -> 动态链接信息?? 这些信息是为ld.so准备的,ld.so需要用到
    csp -= ((exec ? DLINFO_ITEMS*2 : 4) + (k_platform ? 2 : 0));//DLINFO_ITEMS*2 + 2,DLINFO_ITEMS定义为13
    csp -= envc+1;
    csp -= argc+1;
    //ibcs 0->a.out 1->elf
    csp -= (!ibcs ? 3 : 1);// 1-> argc    /* argc itself */
    if ((unsigned long)csp & 15UL)//不与16字节边界对齐
    //下移sp,使argc对齐到16字节边界
        sp -= ((unsigned long)csp & 15UL) / sizeof(*sp);
/*
内存布局如下
  position            content                     size (bytes) + comment
  ------------------------------------------------------------------------
  stack pointer ->  [ argc = number of args ]     4
                    [ argv[0] (pointer) ]         4   (program name)
                    [ argv[1] (pointer) ]         4
                    [ argv[..] (pointer) ]        4 * x
                    [ argv[n - 1] (pointer) ]     4
                    [ argv[n] (pointer) ]         4   (= NULL)

                    [ envp[0] (pointer) ]         4
                    [ envp[1] (pointer) ]         4
                    [ envp[..] (pointer) ]        4
                    [ envp[term] (pointer) ]      4   (= NULL)

                    [ auxv[0] AT_PHDR (Elf32_auxv_t) ]    8
                    [ auxv[1] AT_PHENT (Elf32_auxv_t) ]    8
                    [ auxv[2] AT_PHNUM (Elf32_auxv_t) ]   8
                    [ auxv[3] AT_BASE (Elf32_auxv_t) ]   8
                    [ auxv[4] AT_FLAGS (Elf32_auxv_t) ]   8
                    [ auxv[5] AT_ENTRY (Elf32_auxv_t) ]   8
                    [ auxv[6] AT_UID (Elf32_auxv_t) ]   8
                    [ auxv[7] AT_EUID (Elf32_auxv_t) ]   8
                    [ auxv[8] AT_GID (Elf32_auxv_t) ]   8
                    [ auxv[9] AT_EGID (Elf32_auxv_t) ]   8
                    [ auxv[10] AT_HWCAP (Elf32_auxv_t) ]   8
                    [ auxv[11] AT_PAGESZ (Elf32_auxv_t) ]   8
                    [ auxv[12] AT_CLKTCK (Elf32_auxv_t) ]   8
                    [ auxv[13] AT_PLATFORM (Elf32_auxv_t) ]   8
                    [ auxv[14] (Elf32_auxv_t) ] 8   (= AT_NULL vector)

         [ padding ]                   0 - 15                   
                    [ padding ]                   16                   
                    [ padding ]                   0 - 15                   

         [k_platform]                  0 - 65
                    [ argument ASCIIZ strings ]   >= 0
                    [ environment ASCIIZ str. ]   >= 0
                       [filename] >=0

  (0xbffffffc)      [ end marker ]                4   (= NULL)

  (0xc0000000)      < top of stack >              0   (virtual)
*/
    /*
     * Put the ELF interpreter info on the stack
     */
#define NEW_AUX_ENT(nr, id, val) \
      __put_user ((id), sp+(nr*2)); \
      __put_user ((val), sp+(nr*2+1)); \
    //开始存放辅助向量
    sp -= 2;
    NEW_AUX_ENT(0, AT_NULL, 0);//end of vector
    if (k_platform) {
        sp -= 2;
        NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform);
    }
    sp -= 3*2;
    NEW_AUX_ENT(0, AT_HWCAP, hwcap);
    NEW_AUX_ENT(1, AT_PAGESZ, ELF_EXEC_PAGESIZE);// 4096
    NEW_AUX_ENT(2, AT_CLKTCK, CLOCKS_PER_SEC);// 100

    if (exec) {//elf interp
        sp -= 10*2;

        NEW_AUX_ENT(0, AT_PHDR, load_addr + exec->e_phoff);
        NEW_AUX_ENT(1, AT_PHENT, sizeof (struct elf_phdr));
        NEW_AUX_ENT(2, AT_PHNUM, exec->e_phnum);
        NEW_AUX_ENT(3, AT_BASE, interp_load_addr);//interp加载基址,如果就是/lib/ld-linux.so.2或静态链

接可执行文件,则为0
        NEW_AUX_ENT(4, AT_FLAGS, 0);
        NEW_AUX_ENT(5, AT_ENTRY, load_bias + exec->e_entry);//原程序入口
        NEW_AUX_ENT(6, AT_UID, (elf_addr_t) current->uid);
        NEW_AUX_ENT(7, AT_EUID, (elf_addr_t) current->euid);
        NEW_AUX_ENT(8, AT_GID, (elf_addr_t) current->gid);
        NEW_AUX_ENT(9, AT_EGID, (elf_addr_t) current->egid);
    }
#undef NEW_AUX_ENT

    sp -= envc+1;
    envp = (elf_caddr_t *) sp;
    sp -= argc+1;
    argv = (elf_caddr_t *) sp;
    if (!ibcs) {//a.out
        __put_user((elf_addr_t)(unsigned long) envp,--sp);
        __put_user((elf_addr_t)(unsigned long) argv,--sp);
    }
    //处理argv数组
    __put_user((elf_addr_t)argc,--sp);//argc入栈
    current->mm->arg_start = (unsigned long) p;//arg_start
    while (argc-->0) {
        __put_user((elf_caddr_t)(unsigned long)p,argv++);
        p += strlen_user(p);//计算下一个字符串的长度,更新p
    }
    __put_user(NULL, argv);
    //处理envp数组
    current->mm->arg_end = current->mm->env_start = (unsigned long) p;
    while (envc-->0) {
        __put_user((elf_caddr_t)(unsigned long)p,envp++);
        p += strlen_user(p);
    }
    __put_user(NULL, envp);
    current->mm->env_end = (unsigned long) p;
    return sp;//返回argc地址
}


8.返回load_elf_binary

    /* N.B. passed_fileno might not be initialized? */
    if (interpreter_type == INTERPRETER_AOUT)
        current->mm->arg_start += strlen(passed_fileno) + 1;//多了passed_fileno参数
    current->mm->start_brk = current->mm->brk = elf_brk;//动态分配内存起始地址
    current->mm->end_code = end_code;
    current->mm->start_code = start_code;
    current->mm->start_data = start_data;
    current->mm->end_data = end_data;
    current->mm->start_stack = bprm->p;

    /* Calling set_brk effectively mmaps the pages that we need
     * for the bss and break sections
     */
    set_brk(elf_bss, elf_brk);//elf_bss上取整

9.set_brk

static void set_brk(unsigned long start, unsigned long end)
{
    start = ELF_PAGEALIGN(start);//上取整到页边界
    end = ELF_PAGEALIGN(end);
    if (end <= start)
        return;
    do_brk(start, end - start);
}


10.返回load_elf_binary
    padzero(elf_bss);//对最后一映射文件的页中的bss清零

#if 0
    printk("(start_brk) %lx\n" , (long) current->mm->start_brk);
    printk("(end_code) %lx\n" , (long) current->mm->end_code);
    printk("(start_code) %lx\n" , (long) current->mm->start_code);
    printk("(start_data) %lx\n" , (long) current->mm->start_data);
    printk("(end_data) %lx\n" , (long) current->mm->end_data);
    printk("(start_stack) %lx\n" , (long) current->mm->start_stack);
    printk("(brk) %lx\n" , (long) current->mm->brk);
#endif

    if ( current->personality == PER_SVR4 )
    {
        /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
           and some applications "depend" upon this behavior.
           Since we do not have the power to recompile these, we
           emulate the SVr4 behavior.  Sigh.  */
        /* N.B. Shouldn't the size here be PAGE_SIZE?? */
        down(&current->mm->mmap_sem);
        error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC,
                MAP_FIXED | MAP_PRIVATE, 0);
        up(&current->mm->mmap_sem);
    }

#ifdef ELF_PLAT_INIT
    /*
     * The ABI may specify that certain registers be set up in special
     * ways (on i386 %edx is the address of a DT_FINI function, for
     * example.  This macro performs whatever initialization to
     * the regs structure is required.
     */
    ELF_PLAT_INIT(regs);//清空所有寄存器
#endif
/*
#define ELF_PLAT_INIT(_r)    do { \
    _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
    _r->esi = 0; _r->edi = 0; _r->ebp = 0; \
    _r->eax = 0; \
} while (0)
*/

    start_thread(regs, elf_entry, bprm->p);//一般此处的elf_entry是interp的entry

11.start_thread

//清fs,gs
#define start_thread(regs, new_eip, new_esp) do {        \
    __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));    \
    set_fs(USER_DS);                    \
    regs->xds = __USER_DS;                    \
    regs->xes = __USER_DS;                    \
    regs->xss = __USER_DS;                    \
    regs->xcs = __USER_CS;                    \
    regs->eip = new_eip;/*设置eip,一般指向ld-linux.so.2的入口*/                    \
    regs->esp = new_esp;/*设置esp,指向argc地址*/                    \
} while (0)

12.返回load_elf_binary

    if (current->ptrace & PT_PTRACED)
        send_sig(SIGTRAP, current, 0);//如果进程被调试,通知父进程
    retval = 0;
out:
    return retval;

    /* error cleanup */
out_free_dentry:
    allow_write_access(interpreter);
    fput(interpreter);
out_free_interp:
    if (elf_interpreter)
        kfree(elf_interpreter);
out_free_file:
    sys_close(elf_exec_fileno);
out_free_ph:
    kfree(elf_phdata);
    goto out;
}

 

ld.so分析4 PIC,GOT和PLT

1.PIC

PIC就是Position Independent Code(位置无关代码).那么何谓位置无关代码?

如果代码不需要被重定位,那么这种代码就是位置无关的。

我们要区分位置无关代码和可重入代码(Reentry Code)的不同,两者是无关的概念,不能混淆。

例如
int f()
{
return 1;
}

[zws@mail ~]$gcc -S x.c
[zws@mail ~]$cat x.s
        .file   "x.c"
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        movl    $1, %eax
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

这是PIC也是RC

char * f()
{
return "a";
}

[zws@mail ~]$gcc -S x.c
[zws@mail ~]$cat x.s
        .file   "x.c"
        .section        .rodata
.LC0:
        .string "a"
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        movl    $.LC0, %eax
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

这不是PIC但是是RC

int f()
{
static int a=0;
a++;
return a;
}
[zws@mail ~]$gcc -S x.c
[zws@mail ~]$cat x.s
        .file   "x.c"
        .data
        .align 4
        .type   a.0,@object
        .size   a.0,4
a.0:
        .long   0
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        incl    a.0
        movl    a.0, %eax
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

两者都不是

将上面的汇编语言改成PIC

[zws@mail ~]$cat x.s
        .file   "x.c"
        .data
        .align 4
        .type   a.0,@object
        .size   a.0,4
a.0:
        .long   0
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        call    .L2
.L2:
        popl    %ebx// %ebx中为当前指令的地址
        subl    $.L2-a.0, %ebx//该指令地址-相对a.0的偏移,即为a.0的地址,在%ebx中
        incl    (%ebx)
        movl    (%ebx), %eax
        popl    %ebx
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

现在这个代码是PIC而不是RC

综上,PIC代码中不能引用绝对地址,否则需要重定位.(上面的 subl    .L2-a.0, %ebx不算引用,因为gas会计算偏移,最终形成的指令中存放的是数)
RC代码不能使用共享变量,否则需要锁。

位置无关代码有什么优点?多个可执行程序运行这样的代码(例如动态链接库)时,虽然加载地址可能不一样,但是该代码不需要重定位,也就是不

需要修改代码,那么多个可执行程序就能共享一个代码副本,从而节省内存。缺点是占用一个寄存器计算地址,代码长度增加一点,执行时间增加

一点。

2.GOT

GOT是GLOBAL OFFSET TABLE(全局偏移表).

我们称一个可执行文件或动态链接库为一个模块.

一个模块中的数据或函数只允许被自己访问,称为本地局部数据或本地局部函数.例如static 类型的变量或函数就是这种类型.
一个模块中的数据或函数不但允许被自己访问,也允许外部访问,称为本地全局数据或本地全局函数.例如没有static修饰的变量或函数就是这种类

型.

相应地一个模块引用另一个模块中的数据或函数,则称为外部全局数据或外部全局函数.例如使用extern修饰的类型的变量或函数就是这种类型
.

局部肯定是本地的,外部一定是全局的。

GOT有四种功能:
>>为本地访问本地局部数据(静态变量或常量)访问提供PIC支持。
>>为本地访问本地全局数据访问提供PIC支持(配合.got节)
>>为本地访问外地全局数据访问提供PIC支持(配合.got节)
>>为本地访问本地全局函数访问提供PIC支持(配合.plt节和.got.plt节)
>>为本地访问外地全局函数调用提供PIC支持(配合.plt节和.got.plt节)
>>为动态链接提供支持(配合.rel.dyn节,rel.plt节,.got节,.got.plt节)

由于函数调用使用的都是相对寻址,且本地局部函数地址已知,因此本地访问本地局部函数调用不需要GOT支持.

(1)为本地访问本地局部数据(静态变量或常量)访问提供PIC支持。
[zws@mail ~]$cat x.c
static int a=0;

int f()
{
a++;
return a;
}


[zws@mail ~]$gcc -fPIC -S x.c
[zws@mail ~]$cat x.s
        .file   "x.c"
        .data
        .align 4
        .type   a,@object
        .size   a,4
a:
        .long   0
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        call    .L2
.L2:
        popl    %ebx
        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
        incl    a@GOTOFF(%ebx)
        movl    a@GOTOFF(%ebx), %eax
        popl    %ebx
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

[zws@mail ~]$readelf -a x.o
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              REL (Relocatable file)
  Machine:                           Intel 80386
  Version:                           0x1
  Entry point address:               0x0
  Start of program headers:          0 (bytes into file)
  Start of section headers:          196 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           0 (bytes)
  Number of program headers:         0
  Size of section headers:           40 (bytes)
  Number of section headers:         9
  Section header string table index: 6

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .text             PROGBITS        00000000 000034 00001f 00  AX  0   0  4
  [ 2] .rel.text         REL             00000000 0002dc 000018 08      7   1  4
  [ 3] .data             PROGBITS        00000000 000054 000004 00  WA  0   0  4
  [ 4] .bss              NOBITS          00000000 000058 000000 00  WA  0   0  4
  [ 5] .comment          PROGBITS        00000000 000058 000033 00      0   0  1
  [ 6] .shstrtab         STRTAB          00000000 00008b 000039 00      0   0  1
  [ 7] .symtab           SYMTAB          00000000 00022c 000090 10      8   7  4
  [ 8] .strtab           STRTAB          00000000 0002bc 00001f 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

There are no section groups in this file.

There are no program headers in this file.

Relocation section '.rel.text' at offset 0x2dc contains 3 entries:
Offset     Info    Type            Sym.Value  Sym. Name
0000000c  0000080a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000012  00000309 R_386_GOTOFF      00000000   .data
00000018  00000309 R_386_GOTOFF      00000000   .data

There are no unwind sections in this file.

Symbol table '.symtab' contains 9 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 00000000     0 FILE    LOCAL  DEFAULT  ABS x.c
     2: 00000000     0 SECTION LOCAL  DEFAULT    1
     3: 00000000     0 SECTION LOCAL  DEFAULT    3
     4: 00000000     0 SECTION LOCAL  DEFAULT    4
     5: 00000000     4 OBJECT  LOCAL  DEFAULT    3 a
     6: 00000000     0 SECTION LOCAL  DEFAULT    5
     7: 00000000    31 FUNC    GLOBAL DEFAULT    1 f
     8: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND _GLOBAL_OFFSET_TABLE_

No version information found in this file.

[zws@mail ~]$ objdump -d x.o

x.o:     file format elf32-i386

Disassembly of section .text:

00000000 <f>:
   0:   55                      push   %ebp
   1:   89 e5                   mov    %esp,%ebp
   3:   53                      push   %ebx
   4:   e8 00 00 00 00          call   9 <f+0x9>
   9:   5b                      pop    %ebx
   a:   81 c3 03 00 00 00       add    $0x3,%ebx
  10:   ff 83 00 00 00 00       incl   0x0(%ebx)
  16:   8b 83 00 00 00 00       mov    0x0(%ebx),%eax
  1c:   5b                      pop    %ebx
  1d:   c9                      leave 
  1e:   c3                      ret 
分析

call .L2,将下一条指令地址压栈
popl %ebx,将本指令地址弹出到%ebx寄存器
addl    $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
$说明这个操作数是立即数,_GLOBAL_OFFSET_TABLE_,特殊符号,gas能够识别,并为改该操作数生成R_386_GOTPC重定位类型.例如上面的
0000000c  0000080a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
地址0000000c指向指令   a:   81 c3 03 00 00 00       add    $0x3,%ebx的源操作数部分[03 00 00 00]
ld链接时,检查重定位表,发现包含R_386_GOTPC重定位项,创建.got和.got.plt节,.got节存放全局数据地址,.got.plt存放全局函数地址,GOT地

址是.got.plt的地址(ld也可以合并这两个节成一个.got节),并计算地址GOT和地址0000000c的差值,加入0000000c处的值并写入,这就是R_386_GO

TPC重定位的内容。

上面的[.-.L2]意思是计算当前指令地址和.L2地址之差,即popl %ebx指令长度,应该是1.但是为何最终的指令却是add $0x3,%ebx呢?操作数3是如

何计算出来的呢?

这是因为重定位R_386_GOTPC项时计算的是该操作数与GOT的差值,而不是该条指令与GOT的差值.因此需要计算该操作数的偏移,即指令
   a:   81 c3 03 00 00 00       add    $0x3,%ebx
地址00000009加上81 c3这两字节操作码长度,形成最终地址0000000c.
相应的%ebx存放的应该是该操作数的加载地址,即(popl %ebx指令地址)+(popl %ebx指令长度)+(add   

$0x3,%ebx指令操作码长度)=%ebx+1+2=add $0x3,%ebx

然而指令       
addl    $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
并没有明确指出再加上addl的操作码长度,其实这是gas替我们隐含计算了.gas分析该指令的操作数,碰到是立即数,且含有符号_GLOBAL_OFFS

ET_TABLE_,会在形成最终的操作时,自动加上操作码长度,得到我们想要的结果。

a++生成的指令是 incl    a@GOTOFF(%ebx)生成的机器指令是
  10:   ff 83 00 00 00 00       incl   0x0(%ebx)
重定位项是
00000012  00000309 R_386_GOTOFF      00000000   .data
在连接时重定位类型R_386_GOTOFF执行的操作时计算计算该符号与GOT的偏移,并加入重定位处(GOTOFF即GOT OFFSET).
可见a@GOTOFF会指示gas生成R_386_GOTOFF重定位项,比较适合只被自己使用的变量。

在符号表中
     5: 00000000     4 OBJECT  LOCAL  DEFAULT    3 a
a的bind类型是local.

(2)为本地访问本地全局数据访问提供PIC支持(配合.got节)
[zws@mail ~]$cat x.c
int a=0;

int f()
{
a++;
return a;
}
[zws@mail ~]$cat x.s
        .file   "x.c"
.globl a
        .data
        .align 4
        .type   a,@object
        .size   a,4
a:
        .long   0
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        call    .L2
.L2:
        popl    %ebx
        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
        movl    a@GOT(%ebx), %eax
        incl    (%eax)
        movl    a@GOT(%ebx), %eax
        movl    (%eax), %eax
        popl    %ebx
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
[zws@mail ~]$readelf -a x.o
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              REL (Relocatable file)
  Machine:                           Intel 80386
  Version:                           0x1
  Entry point address:               0x0
  Start of program headers:          0 (bytes into file)
  Start of section headers:          200 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           0 (bytes)
  Number of program headers:         0
  Size of section headers:           40 (bytes)
  Number of section headers:         9
  Section header string table index: 6

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .text             PROGBITS        00000000 000034 000023 00  AX  0   0  4
  [ 2] .rel.text         REL             00000000 0002e0 000018 08      7   1  4
  [ 3] .data             PROGBITS        00000000 000058 000004 00  WA  0   0  4
  [ 4] .bss              NOBITS          00000000 00005c 000000 00  WA  0   0  4
  [ 5] .comment          PROGBITS        00000000 00005c 000033 00      0   0  1
  [ 6] .shstrtab         STRTAB          00000000 00008f 000039 00      0   0  1
  [ 7] .symtab           SYMTAB          00000000 000230 000090 10      8   6  4
  [ 8] .strtab           STRTAB          00000000 0002c0 00001f 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

There are no section groups in this file.

There are no program headers in this file.

Relocation section '.rel.text' at offset 0x2e0 contains 3 entries:
Offset     Info    Type            Sym.Value  Sym. Name
0000000c  0000080a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000012  00000603 R_386_GOT32       00000000   a
0000001a  00000603 R_386_GOT32       00000000   a

There are no unwind sections in this file.

Symbol table '.symtab' contains 9 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 00000000     0 FILE    LOCAL  DEFAULT  ABS x.c
     2: 00000000     0 SECTION LOCAL  DEFAULT    1
     3: 00000000     0 SECTION LOCAL  DEFAULT    3
     4: 00000000     0 SECTION LOCAL  DEFAULT    4
     5: 00000000     0 SECTION LOCAL  DEFAULT    5
     6: 00000000     4 OBJECT  GLOBAL DEFAULT    3 a
     7: 00000000    35 FUNC    GLOBAL DEFAULT    1 f
     8: 00000000     0 NOTYPE  GLOBAL DEFAULT  UND _GLOBAL_OFFSET_TABLE_

No version information found in this file.
[zws@mail ~]$

和前面的唯一差别就是对变量的访问方式.由a@GOTOFF变成a@GOT,重定位方式也从R_386_GOTOFF变成R_386_GOT32.
a@GOT的访问方式是,将变量a的地址值存入.got节,访问a时,先根据GOT计算存放变量a的地址值在.got中的地址,然后取该地址值,即为变量a的

地址,用一条指令就能实现
        movl    a@GOT(%ebx), %eax
然后就可以对该变量执行操作了
例如a++生成的指令时        incl    (%eax),对该地址处的值增一。

在符号表中
     6: 00000000     4 OBJECT  GLOBAL DEFAULT    3 a
a的bind类型是GLOBAL.

再写一个y.c
[zws@mail ~]$cat y.c
void f();

int main()
{
f();
return 0;
}
[zws@mail ~]$gcc y.c x.o

分析生成的可执行文件a.out可发现

ld在处理R_386_GOT32时,将该符号的地址x存入.got节,并记录其在.got

节中的地址y,然后计算y相对于GOT偏移,存入该符号所有的R_386_GOT32类型重定位地址处。最后在目标文件中为该符号生成R_386_GLOB_D

AT类型重定位项例如
readelf -a a.out

  [20] .got              PROGBITS        080494e0 0004e0 000008 04  WA  0   0  4
  [21] .got.plt          PROGBITS        080494e8 0004e8 000010 04  WA  0   0  4

080494e4  00000406 R_386_GLOB_DAT    08049504   a(显然地址080494e4在.got中)

R_386_GLOB_DAT类型执行的操作是,将模块加载地址加入该重定位处.这样变量的地址就确定了,可以功过y来访问,而且不需要对代码重定位。

如果该变量被其他模块访问(例如动态链接库中的变量被可执行文件访问或动态链接库中的变量被其他动态链接库库访问),则执行动态链接时,只

需要将该变量所在的地址x存入引用模块的.got节y处,就能实现共享且PIC.

其实本地访问本地全局数据访问也可以使用GOTOFF方式(例如本例的x.c).想一想为什么不这样做?从指导ld的方面去想。

(3)为本地访问外部全局数据访问提供PIC支持(配合.got节)

[zws@mail ~]$cat x.c
extern int a;

int f()
{
a++;
return a;
}
[zws@mail ~]$gcc -fPIC -S x.c
[zws@mail ~]$cat x.s
        .file   "x.c"
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        call    .L2
.L2:
        popl    %ebx
        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L2], %ebx
        movl    a@GOT(%ebx), %eax
        incl    (%eax)
        movl    a@GOT(%ebx), %eax
        movl    (%eax), %eax
        popl    %ebx
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"

[zws@mail ~]$gcc -shared x.s -o libx.so

readelf 查看一下是否和上面分析的一致

(4)为本地访问本地全局函数调用提供PIC支持(配合.plt节和.got.plt节)
[zws@mail ~]$cat x.c
void f()
{
}

void g()
{
f();
}
[zws@mail ~]$gcc -fPIC -S x.c
[zws@mail ~]$cat x.s
        .file   "x.c"
        .text
.globl f
        .type   f,@function
f:
        pushl   %ebp
        movl    %esp, %ebp
        leave
        ret
.Lfe1:
        .size   f,.Lfe1-f
.globl g
        .type   g,@function
g:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ebx
        subl    $4, %esp
        call    .L3
.L3:
        popl    %ebx
        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L3], %ebx
        call    f@PLT
        addl    $4, %esp
        popl    %ebx
        leave
        ret
.Lfe2:
        .size   g,.Lfe2-g
        .ident  "GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
[zws@mail ~]$as x.s -o x.o
[zws@mail ~]$readelf -r x.o

Relocation section '.rel.text' at offset 0x2dc contains 2 entries:
Offset     Info    Type            Sym.Value  Sym. Name
00000014  0000080a R_386_GOTPC       00000000   _GLOBAL_OFFSET_TABLE_
00000019  00000604 R_386_PLT32       00000000   f

本地调用本地全局函数生成的代码是        call    f@PLT
gas为call f@PLT生成的重定位项是R_386_PLT32       ,指导ld生成.plt节。
[zws@mail ~]$gcc -shared x.o -o libx.so
[zws@mail ~]$readelf -r libx.so

Relocation section '.rel.dyn' at offset 0x22c contains 5 entries:
Offset     Info    Type            Sym.Value  Sym. Name
00001500  00000008 R_386_RELATIVE  
00001504  00000008 R_386_RELATIVE  
000014dc  00000106 R_386_GLOB_DAT    00000000   __gmon_start__
000014e0  00000206 R_386_GLOB_DAT    00000000   _Jv_RegisterClasses
000014e4  00000806 R_386_GLOB_DAT    00000000   __cxa_finalize

Relocation section '.rel.plt' at offset 0x254 contains 3 entries:
Offset     Info    Type            Sym.Value  Sym. Name
000014f4  00000207 R_386_JUMP_SLOT   00000000   _Jv_RegisterClasses
000014f8  00000607 R_386_JUMP_SLOT   00000390   f
000014fc  00000807 R_386_JUMP_SLOT   00000000   __cxa_finalize

[zws@mail ~]$objdump -d libx.so
Disassembly of section .plt:

00000284 <_Jv_RegisterClasses@plt-0x10>:
284:   ff b3 04 00 00 00       pushl  0x4(%ebx)
28a:   ff a3 08 00 00 00       jmp    *0x8(%ebx)
290:   00 00                   add    %al,(%eax)
        ...

00000294 <_Jv_RegisterClasses@plt>:
294:   ff a3 0c 00 00 00       jmp    *0xc(%ebx)
29a:   68 00 00 00 00          push   $0x0
29f:   e9 e0 ff ff ff          jmp    284 <_init+0x18>

000002a4 <f@plt>:
2a4:   ff a3 10 00 00 00       jmp    *0x10(%ebx)
2aa:   68 08 00 00 00          push   $0x8
2af:   e9 d0 ff ff ff          jmp    284 <_init+0x18>

000002b4 <__cxa_finalize@plt>:
2b4:   ff a3 14 00 00 00       jmp    *0x14(%ebx)
2ba:   68 10 00 00 00          push   $0x10
2bf:   e9 c0 ff ff ff          jmp    284 <_init+0x18>

。。。。。。。。。。。。。
00000390 <f>:
390:   55                      push   %ebp
391:   89 e5                   mov    %esp,%ebp
393:   c9                      leave 
394:   c3                      ret   

00000395 <g>:
395:   55                      push   %ebp
396:   89 e5                   mov    %esp,%ebp
398:   53                      push   %ebx
399:   83 ec 04                sub    $0x4,%esp
39c:   e8 00 00 00 00          call   3a1 <g+0xc>
3a1:   5b                      pop    %ebx
3a2:   81 c3 47 11 00 00       add    $0x1147,%ebx
3a8:   e8 f7 fe ff ff          call   2a4 <f@plt>
3ad:   83 c4 04                add    $0x4,%esp
3b0:   5b                      pop    %ebx
3b1:   c9                      leave 
3b2:   c3                      ret   
3b3:   90                      nop

至于这里的涉及到的原理看下面,这里的%ebx存放的是本模块的GOT地址

(5)为本地访问外部全局函数调用提供PIC支持(配合.plt节和.got.plt节)
[zws@mail ~]$cat x.c
int a=0;

int f()
{
a++;
return a;
}
[zws@mail ~]$gcc -fPIC -shared x.c -o x.o
[zws@mail ~]$cat y.c
void f();

int main()
{
f();
return 0;
}
[zws@mail ~]$gcc y.c libx.so
[zws@mail ~]objdump -d a.out
看看外部全局函数调用使用什么方式

080483e8 <main>:
80483e8:       55                      push   %ebp
80483e9:       89 e5                   mov    %esp,%ebp
80483eb:       83 ec 08                sub    $0x8,%esp
80483ee:       83 e4 f0                and    $0xfffffff0,%esp
80483f1:       b8 00 00 00 00          mov    $0x0,%eax
80483f6:       29 c4                   sub    %eax,%esp
80483f8:       e8 2b ff ff ff          call   8048328 <f@plt>
80483fd:       b8 00 00 00 00          mov    $0x0,%eax
8048402:       c9                      leave 
8048403:       c3                      ret

call 8048328,这个地址在.plt节中
Disassembly of section .plt:

08048308 <__libc_start_main@plt-0x10>:
8048308:       ff 35 a0 95 04 08       pushl  0x80495a0
804830e:       ff 25 a4 95 04 08       jmp    *0x80495a4
8048314:       00 00                   add    %al,(%eax)
        ...

08048318 <__libc_start_main@plt>:
8048318:       ff 25 a8 95 04 08       jmp    *0x80495a8
804831e:       68 00 00 00 00          push   $0x0
8048323:       e9 e0 ff ff ff          jmp    8048308 <_init+0x18>

08048328 <f@plt>:
8048328:       ff 25 ac 95 04 08       jmp    *0x80495ac
804832e:       68 08 00 00 00          push   $0x8
8048333:       e9 d0 ff ff ff          jmp    8048308 <_init+0x18>

jmp *0x80495ac,这个地址在.got.plt节中
[zws@mail ~]$objdump -sj .got.plt a.out

a.out:     file format elf32-i386

Contents of section .got.plt:
804959c c8940408 00000000 00000000 1e830408  ................
80495ac 2e830408                             ....           

该地址处的值是0804832e,就是前面jmp *0x80495ac的下一条指令地址
push $0x8,压入立即数8,其实是f的重定位项的在.rel.plt节中偏移(一个重定位项占8字节)
Relocation section '.rel.plt' at offset 0x2e0 contains 2 entries:
Offset     Info    Type            Sym.Value  Sym. Name
080495a8  00000407 R_386_JUMP_SLOT   00000000   __libc_start_main
080495ac  00000807 R_386_JUMP_SLOT   00000000   f
该f符号的重定位偏移是080495ac(就是在前面的.got.plt节中),类型是R_386_JUMP_SLOT.这样动态连接时,查找到f的地址后,写入080495ac处.
这样下次调用f时,就会直接跳到f的真实地址。

push $0x8的下一条指令时jmp    8048308,8048308处的指令时
8048308:       ff 35 a0 95 04 08       pushl  0x80495a0
804830e:       ff 25 a4 95 04 08       jmp    *0x80495a4

第一条pushl 0x80495a0,将0x80495a0地址处的值压栈。0x80495a0在.got.plt中
[zws@mail ~]$objdump -sj .got.plt a.out

a.out:     file format elf32-i386

Contents of section .got.plt:
804959c c8940408 00000000 00000000 1e830408  ................
80495ac 2e830408                             ....          

.got.plt的前三项是有特殊意义的,他们都是地址,在执行动态连接时要用到.第0项080494c8是.dynamic节地址.第1项是本模块的link_map地址,这

里是0,动态连接时会存入真实地址,第2项是_dl_runtime_resolve的地址,动态链接时存入.

将本模块的link_map地址压栈后,jmp    *0x80495a4, 显然是跳到_dl_runtime_resolve中,执行链接f任务,_dl_runtime_resolve解析到f地址后,

会存入80495ac处,并将该地址替换栈上的返回地址,这样,_dl_runtime_resolve返回时,直接返回到f中,并执行f.而下次再执行f时就不需要这么

麻烦了。

这种在需要执行时才进行符号链接是所谓的lazy方式动态链接,还有一种就是模块加载时一次性为所有的符号进行链接,无论用不用得到,所谓的

now方式动态链接。

综上.got节存放的都是被本地引用的本地全局数据(没有被本地引用的不会出现)和外部全局数据,.got.plt前三项特殊,后面都是被本地引用的本地全

局函数(没有被本地引用的不会出现)和外部全局函数地址..plt存放过程链接信息(procedure link

table)..rel.dyn重定位.got(类型为R_386_GLOB_DAT的项),.rel.plt重定位.got.plt.




ld.so分析5 _dl_start

对于不关心的地方,我们都//或/**/注释掉

1._dl_start中的变量声明

static Elf32_Addr //我们假设是i386 32位平台,ElfW(Addr)被宏扩展为Elf32_Addr
//ElfW(Addr)
//__attribute_used__ internal_function
//__attribute__ ((__used__)) __attribute__ ((regparm (3), stdcall))
_dl_start (void *arg)//arg参数值argc地址
{
//#ifdef DONT_USE_BOOTSTRAP_MAP
# define bootstrap_map GL(dl_rtld_map)
//#else
//  struct dl_start_final_info info;
//# define bootstrap_map info.l
//#endif
//#if USE_TLS || (!DONT_USE_BOOTSTRAP_MAP && !HAVE_BUILTIN_MEMSET)
//  size_t cnt;
//#endif
//#ifdef USE_TLS
//  ElfW(Ehdr) *ehdr;
//  ElfW(Phdr) *phdr;
//  dtv_t initdtv[3];
//#endif


宏GL定义如下

#  define GL(name) _rtld_local._##name

展开

#define bootstrap_map _rtld_local._dl_rtld_map

_rtld_local是什么呢?

查看rtld.c的预处理文件可发现如下定义


struct rtld_global _rtld_global =
  {
# 1 "../sysdeps/unix/sysv/linux/i386/dl-procinfo.c" 1
# 47 "../sysdeps/unix/sysv/linux/i386/dl-procinfo.c"
  ._dl_x86_cap_flags
= {
    "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
    "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov",
    "pat", "pse36", "pn", "clflush", "20", "dts", "acpi", "mmx",
    "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "amd3d"
  }
,
  ._dl_x86_platforms
= {
    "i386", "i486", "i586", "i686"
  }
,
# 92 "rtld.c" 2
    ._dl_debug_fd = 2,
    ._dl_dynamic_weak = 1,
    ._dl_lazy = 1,
    ._dl_fpu_control = 0x037f,
    ._dl_correct_cache_id = 3,
    ._dl_hwcap_mask = HWCAP_IMPORTANT,
    ._dl_load_lock = {{0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, { 0, 0 }}}
  };

extern struct rtld_global _rtld_local __attribute__ ((visibility ("hidden")));
extern __typeof (_rtld_global) _rtld_local __attribute__ ((alias ("_rtld_global")));;

结构rtld_global的内容就不贴出来了,大家自己查吧
这里指出,_rtld_local是_rtld_global的别名.查看ld.so的符号表也能例证
[zws@mail ~/glibc-2.3/build/elf]$readelf -s ld.so|grep _rtld
   332: 00012140   980 OBJECT  LOCAL  HIDDEN   14 _rtld_local
   462: 00012140   980 OBJECT  GLOBAL DEFAULT   14 _rtld_global

_rtld_local._dl_rtld_map的类型是struct link_map.这个类型非常重要,是动态链接的核心数据结构
注意这里的HIDDEN属性,这个属性保证访问_rtld_local使用_rtld_local@GOTOFF而不是_rtld_local@GOT,
从而_rtld_local不需要重定位,这个一定很重要


2._dl_start中的动态链接内联函数

  /* This #define produces dynamic linking inline functions for
     bootstrap relocation instead of general-purpose relocation.  */
#define RTLD_BOOTSTRAP
#define RESOLVE_MAP(sym, version, flags) \
  ((*(sym))->st_shndx == SHN_UNDEF ? 0 : &bootstrap_map)
#define RESOLVE(sym, version, flags) \
  ((*(sym))->st_shndx == SHN_UNDEF ? 0 : bootstrap_map.l_addr)
#include "dynamic-link.h"

这里先定义了三个宏,然后包含dynamic-link.h头文件,里面定义了几个动态链接需要用到的宏或函数。
这些宏或函数用到了前面定义的三个宏,因此,根据这三个宏定义的不同,动态链接宏或函数的功能会有所不同,
前面的注释也说明了这一点。至于有这些动态链接宏或函数的功能,后面涉及到的时候再分析。

3.获取ld.so的加载基址

  if (HP_TIMING_INLINE && HP_TIMING_AVAIL)
//#ifdef DONT_USE_BOOTSTRAP_MAP
    HP_TIMING_NOW (start_time);//获得开始时间
//#else
//    HP_TIMING_NOW (info.start_time);
//#endif

  /* Partly clean the `bootstrap_map' structure up. 部分清空bootstrap_map结构. Don't use
     `memset' since it might not be built in or inlined and we cannot
     不使用memset是因为它不是内建的或内联函数,我们现在还不能调用.
     make function calls at this point.  Use '__builtin_memset' if we
    如果有效的话,使用__builtin_memset
     know it is available.  We do not have to clear the memory if we
     如果不必使用临时bootstrap_map则不需要清0
     do not have to use the temporary bootstrap_map.  Global variables
     全局变量缺省初始化为0
     are initialized to zero by default.  */
/*
#ifndef DONT_USE_BOOTSTRAP_MAP
# ifdef HAVE_BUILTIN_MEMSET
  __builtin_memset (bootstrap_map.l_info, '\0', sizeof (bootstrap_map.l_info));
# else
  for (cnt = 0;
       cnt < sizeof (bootstrap_map.l_info) / sizeof (bootstrap_map.l_info[0]);
       ++cnt)
    bootstrap_map.l_info[cnt] = 0;
# endif
#endif
*/
  /* Figure out the run-time load address of the dynamic linker itself.  */
  bootstrap_map.l_addr = elf_machine_load_address ();//  加载地址 _rtld_local._dl_rtld_map.l_addr = elf_machine_load_address ();

  /* Read our own dynamic section and fill in the info array.  */
  bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic ();//动态节地址
  elf_get_dynamic_info (&bootstrap_map);//取动态信息

4.elf_machine_dynamic和elf_machine_load_address (sysdeps/i386/dl-machine.h)

/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
   first element of the GOT, a special entry that is never relocated.  */
static inline Elf32_Addr //__attribute__ ((unused, const))
elf_machine_dynamic (void)
{
  /* This produces a GOTOFF reloc that resolves to zero at link time, so in
     fact just loads from the GOT register directly.  By doing it without
     an asm we can let the compiler choose any register.  */
  extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
  return _GLOBAL_OFFSET_TABLE_[0];
}


/* Return the run-time load address of the shared object.  */
static inline Elf32_Addr //__attribute__ ((unused))
elf_machine_load_address (void)
{
  /* Compute the difference between the runtime address of _DYNAMIC as seen
     by a GOTOFF reference, and the link-time address found in the special
     unrelocated first GOT entry.  */
  extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC");// attribute_hidden;
  return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
}
有点晦涩难懂,看看汇编代码

  bootstrap_map.l_addr = elf_machine_load_address ();
生成的汇编代码如下
    movl    _GLOBAL_OFFSET_TABLE_@GOTOFF(%ebx), %edx//取GOT[0],即ld.so的dynamic节被ld静态链接时安排的地址
    leal    _DYNAMIC@GOTOFF(%ebx), %eax//取dynamic节运行时加载到内存中的地址
    subl    %edx, %eax//dynamic的地址-got[0],即得镜像加载基址
    movl    %eax, 456+_rtld_local@GOTOFF(%ebx)//该地址存入l_addr

C代码和汇编代码对照着看,就能明白一二。

5.elf_get_dynamic_info (dynamic-link.h)
/* Read the dynamic section at DYN and fill in INFO with indices DT_*.  */

static inline void //__attribute__ ((unused, always_inline))
elf_get_dynamic_info (struct link_map *l)
{
  ElfW(Dyn) *dyn = l->l_ld;
  ElfW(Dyn) **info;

//#ifndef RTLD_BOOTSTRAP
  if (dyn == NULL)
    return;
//#endif
/*
[zws@mail elf]$ readelf -d ld.so

Dynamic section at offset 0x12000 contains 18 entries:
  Tag        Type                         Name/Value
0x0000000e (SONAME)                     Library soname: [ld-linux.so.2]
0x00000004 (HASH)                       0x94
0x00000005 (STRTAB)                     0x48c
0x00000006 (SYMTAB)                     0x1dc
0x0000000a (STRSZ)                      719 (bytes)
0x0000000b (SYMENT)                     16 (bytes)
0x00000003 (PLTGOT)                     0x120e8
0x00000002 (PLTRELSZ)                   72 (bytes)
0x00000014 (PLTREL)                     REL
0x00000017 (JMPREL)                     0x8c8
0x00000011 (REL)                        0x858
0x00000012 (RELSZ)                      112 (bytes)
0x00000013 (RELENT)                     8 (bytes)
0x6ffffffc (VERDEF)                     0x7b4
0x6ffffffd (VERDEFNUM)                  5
0x6ffffff0 (VERSYM)                     0x75c
0x6ffffffa (RELCOUNT)                   5
0x00000000 (NULL)                       0x0
[zws@mail elf]$ readelf -x 11 ld.so

Hex dump of section '.dynamic':
  0x00012000 0e000000 95020000 04000000 94000000 ................
  0x00012010 05000000 8c040000 06000000 dc010000 ................
  0x00012020 0a000000 cf020000 0b000000 10000000 ................
  0x00012030 03000000 e8200100 02000000 48000000 ..... ......H...
  0x00012040 14000000 11000000 17000000 c8080000 ................
  0x00012050 11000000 58080000 12000000 70000000 ....X.......p...
  0x00012060 13000000 08000000 fcffff6f b4070000 ...........o....
  0x00012070 fdffff6f 05000000 f0ffff6f 5c070000 ...o.......o\...
  0x00012080 faffff6f 05000000 00000000 00000000 ...o............
  0x00012090 00000000 00000000 00000000 00000000 ................
  0x000120a0 00000000 00000000 00000000 00000000 ................
*/
  info = l->l_info;//取保存dynamic信息的数据结构

  while (dyn->d_tag != DT_NULL)//遍历
    {
      if (dyn->d_tag < DT_NUM)//长度34,索引范围 [0,33]
    info[dyn->d_tag] = dyn;
      else if (dyn->d_tag >= DT_LOPROC &&
           dyn->d_tag < DT_LOPROC + DT_THISPROCNUM)//0,(0x70000000,0x70000000)
    info[dyn->d_tag - DT_LOPROC + DT_NUM] = dyn;
      else if ((Elf32_Word) DT_VERSIONTAGIDX (dyn->d_tag) < DT_VERSIONTAGNUM)// 16,[0x6ffffff0,0x6fffffff]->[49,34]
    info[VERSYMIDX (dyn->d_tag)] = dyn;
      else if ((Elf32_Word) DT_EXTRATAGIDX (dyn->d_tag) < DT_EXTRANUM)// 3,[0x7fffffffd,0x7fffffff]
    info[DT_EXTRATAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
         + DT_VERSIONTAGNUM] = dyn;
      else if ((Elf32_Word) DT_VALTAGIDX (dyn->d_tag) < DT_VALNUM)// 12,[0x6ffffdf4,0x6ffffdff]
    info[DT_VALTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
         + DT_VERSIONTAGNUM + DT_EXTRANUM] = dyn;
      else if ((Elf32_Word) DT_ADDRTAGIDX (dyn->d_tag) < DT_ADDRNUM)// 10 ,[0x6ffffef6,0x6ffffeff]
    info[DT_ADDRTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM
         + DT_VERSIONTAGNUM + DT_EXTRANUM + DT_VALNUM] = dyn;
      ++dyn;
    }
//#ifndef DL_RO_DYN_SECTION
  /* Don't adjust .dynamic unnecessarily.  */
  if (l->l_addr != 0)//加载地址
    {
    //调整地址
      ElfW(Addr) l_addr = l->l_addr;

      if (info[DT_HASH] != NULL)
    info[DT_HASH]->d_un.d_ptr += l_addr;
      if (info[DT_PLTGOT] != NULL)
    info[DT_PLTGOT]->d_un.d_ptr += l_addr;
      if (info[DT_STRTAB] != NULL)
    info[DT_STRTAB]->d_un.d_ptr += l_addr;
      if (info[DT_SYMTAB] != NULL)
    info[DT_SYMTAB]->d_un.d_ptr += l_addr;
//# if ! ELF_MACHINE_NO_RELA
      if (info[DT_RELA] != NULL)
    info[DT_RELA]->d_un.d_ptr += l_addr;
//# endif
//# if ! ELF_MACHINE_NO_REL
      if (info[DT_REL] != NULL)
    info[DT_REL]->d_un.d_ptr += l_addr;
//# endif
      if (info[DT_JMPREL] != NULL)
    info[DT_JMPREL]->d_un.d_ptr += l_addr;
      if (info[VERSYMIDX (DT_VERSYM)] != NULL)
    info[VERSYMIDX (DT_VERSYM)]->d_un.d_ptr += l_addr;
    }
//#endif
  if (info[DT_PLTREL] != NULL)
    {
//#if ELF_MACHINE_NO_RELA
//      assert (info[DT_PLTREL]->d_un.d_val == DT_REL);
//#elif ELF_MACHINE_NO_REL
//      assert (info[DT_PLTREL]->d_un.d_val == DT_RELA);
//#else
     assert (info[DT_PLTREL]->d_un.d_val == DT_REL
          || info[DT_PLTREL]->d_un.d_val == DT_RELA);
//#endif
    }
//#if ! ELF_MACHINE_NO_RELA
  if (info[DT_RELA] != NULL)
    assert (info[DT_RELAENT]->d_un.d_val == sizeof (ElfW(Rela)));
//# endif
//# if ! ELF_MACHINE_NO_REL
  if (info[DT_REL] != NULL)
    assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel)));
//#endif
  if (info[DT_FLAGS] != NULL)
    {
      /* Flags are used.  Translate to the old form where available.
     Since these l_info entries are only tested for NULL pointers it
     is ok if they point to the DT_FLAGS entry.  */
      l->l_flags = info[DT_FLAGS]->d_un.d_val;
//#ifdef RTLD_BOOTSTRAP
      /* These three flags must not be set for ld.so.  */
//      assert ((l->l_flags & (DF_SYMBOLIC | DF_TEXTREL | DF_BIND_NOW)) == 0);
//#else
      if (l->l_flags & DF_SYMBOLIC)
    info[DT_SYMBOLIC] = info[DT_FLAGS];
      if (l->l_flags & DF_TEXTREL)
    info[DT_TEXTREL] = info[DT_FLAGS];
      if (l->l_flags & DF_BIND_NOW)
    info[DT_BIND_NOW] = info[DT_FLAGS];
//#endif
    }
  if (info[VERSYMIDX (DT_FLAGS_1)] != NULL)
    l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
//#ifdef RTLD_BOOTSTRAP
  /* The dynamic linker should have none of these set.  */
//  assert (info[DT_RUNPATH] == NULL);
//  assert (info[DT_RPATH] == NULL);
//#else
  if (info[DT_RUNPATH] != NULL)
    /* If both RUNPATH and RPATH are given, the latter is ignored.  */
    info[DT_RPATH] = NULL;
//#endif
}


6._dl_start执行自我重定位

/*
#if USE_TLS
# if !defined HAVE___THREAD && !defined DONT_USE_BOOTSTRAP_MAP
  /* Signal that we have not found TLS data so far.  * /
  bootstrap_map.l_tls_modid = 0;
# endif

  /* Get the dynamic linker's own program header.  First we need the ELF
     file header.  The `_begin' symbol created by the linker script points
     to it.  When we have something like GOTOFF relocs, we can use a plain
     reference to find the runtime address.  Without that, we have to rely
     on the `l_addr' value, which is not the value we want when prelinked.  * /
#ifdef DONT_USE_BOOTSTRAP_MAP
  ehdr = (ElfW(Ehdr) *) &_begin;
#else
  ehdr = (ElfW(Ehdr) *) bootstrap_map.l_addr;
#endif
  phdr = (ElfW(Phdr) *) ((ElfW(Addr)) ehdr + ehdr->e_phoff);
  for (cnt = 0; cnt < ehdr->e_phnum; ++cnt)
    if (phdr[cnt].p_type == PT_TLS)
      {
    void *tlsblock;
    size_t max_align = MAX (TLS_INIT_TCB_ALIGN, phdr[cnt].p_align);
    char *p;

    bootstrap_map.l_tls_blocksize = phdr[cnt].p_memsz;
    bootstrap_map.l_tls_align = phdr[cnt].p_align;
    assert (bootstrap_map.l_tls_blocksize != 0);
    bootstrap_map.l_tls_initimage_size = phdr[cnt].p_filesz;
    bootstrap_map.l_tls_initimage = (void *) (bootstrap_map.l_addr
                          + phdr[cnt].p_vaddr);

    /* We can now allocate the initial TLS block.  This can happen
       on the stack.  We'll get the final memory later when we
       know all about the various objects loaded at startup
       time.  * /
# if TLS_TCB_AT_TP
    tlsblock = alloca (roundup (bootstrap_map.l_tls_blocksize,
                    TLS_INIT_TCB_ALIGN)
               + TLS_INIT_TCB_SIZE
               + max_align);
# elif TLS_DTV_AT_TP
    tlsblock = alloca (roundup (TLS_INIT_TCB_SIZE,
                    bootstrap_map.l_tls_align)
               + bootstrap_map.l_tls_blocksize
               + max_align);
# else
    /* In case a model with a different layout for the TCB and DTV
       is defined add another #elif here and in the following #ifs.  * /
#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif
    /* Align the TLS block.  * /
    tlsblock = (void *) (((uintptr_t) tlsblock + max_align - 1)
                 & ~(max_align - 1));

    /* Initialize the dtv.  [0] is the length, [1] the generation
       counter.  * /
    initdtv[0].counter = 1;
    initdtv[1].counter = 0;

    /* Initialize the TLS block.  * /
# if TLS_TCB_AT_TP
    initdtv[2].pointer = tlsblock;
# elif TLS_DTV_AT_TP
    bootstrap_map.l_tls_offset = roundup (TLS_INIT_TCB_SIZE,
                          bootstrap_map.l_tls_align);
    initdtv[2].pointer = (char *) tlsblock + bootstrap_map.l_tls_offset;
# else
#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif
    p = __mempcpy (initdtv[2].pointer, bootstrap_map.l_tls_initimage,
               bootstrap_map.l_tls_initimage_size);
# ifdef HAVE_BUILTIN_MEMSET
    __builtin_memset (p, '\0', (bootstrap_map.l_tls_blocksize
                    - bootstrap_map.l_tls_initimage_size));
# else
    {
      size_t remaining = (bootstrap_map.l_tls_blocksize
                  - bootstrap_map.l_tls_initimage_size);
      while (remaining-- > 0)
        *p++ = '\0';
    }
#endif

    /* Install the pointer to the dtv.  * /

    /* Initialize the thread pointer.  * /
# if TLS_TCB_AT_TP
    bootstrap_map.l_tls_offset
      = roundup (bootstrap_map.l_tls_blocksize, TLS_INIT_TCB_ALIGN);

    INSTALL_DTV ((char *) tlsblock + bootstrap_map.l_tls_offset,
             initdtv);

    if (TLS_INIT_TP ((char *) tlsblock + bootstrap_map.l_tls_offset, 0)
        != 0)
      _dl_fatal_printf ("cannot setup thread-local storage\n");
# elif TLS_DTV_AT_TP
    INSTALL_DTV (tlsblock, initdtv);
    if (TLS_INIT_TP (tlsblock, 0) != 0)
      _dl_fatal_printf ("cannot setup thread-local storage\n");
# else
#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
# endif

    /* So far this is module number one.  * /
    bootstrap_map.l_tls_modid = 1;
    /* The TP got initialized.  * /
    bootstrap_map.l_tls_tp_initialized = 1;

    /* There can only be one PT_TLS entry.  * /
    break;
      }
#endif    /* use TLS * /
*/

//#ifdef ELF_MACHINE_BEFORE_RTLD_RELOC
//  ELF_MACHINE_BEFORE_RTLD_RELOC (bootstrap_map.l_info);
//#endif

  if (bootstrap_map.l_addr || ! bootstrap_map.l_info[VALIDX(DT_GNU_PRELINKED)])
    {
      /* Relocate ourselves so we can do normal function calls and 自我重定位,以便能够使用GOT调用函数和访问数据
     data access using the global offset table.  */

      ELF_DYNAMIC_RELOCATE (&bootstrap_map, 0, 0);
    }

7._dl_start->ELF_DYNAMIC_RELOCATE (dynamic-link.h)


/* This can't just be an inline function because GCC is too dumb
   to inline functions containing inlines themselves.  */
# define ELF_DYNAMIC_RELOCATE(map, lazy, consider_profile) \
  do {                                          \
    int edr_lazy = elf_machine_runtime_setup ((map), (lazy),              \
                          (consider_profile));          \
    ELF_DYNAMIC_DO_REL ((map), edr_lazy);                      \
    ELF_DYNAMIC_DO_RELA ((map), edr_lazy);                      \
  } while (0)

8._dl_start->ELF_DYNAMIC_RELOCATE ->elf_machine_runtime_setup(sysdeps/i386/dl-machine.h)

/* Set up the loaded object described by L so its unrelocated PLT
   entries will jump to the on-demand fixup code in dl-runtime.c.  */

static inline int //__attribute__ ((unused))
elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{
  Elf32_Addr *got;
  extern void _dl_runtime_resolve (Elf32_Word);// attribute_hidden;
  extern void _dl_runtime_profile (Elf32_Word);// attribute_hidden;

  if (l->l_info[DT_JMPREL] && lazy)//有JMPREL且lazy
    {
      /* The GOT entries for functions in the PLT have not yet been filled
     in.  Their initial contents will arrange when called to push an
     offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1],
     and then jump to _GLOBAL_OFFSET_TABLE[2].  */
      got = (Elf32_Addr *) D_PTR (l, l_info[DT_PLTGOT]);//取PLTGOT地址
      /* If a library is prelinked but we have to relocate anyway,
     we have to be able to undo the prelinking of .got.plt.
     The prelinker saved us here address of .plt + 0x16.  */
/*
[zws@mail elf]$   readelf -x 21 a.out

Hex dump of section '.got.plt':
  0x080494e8 1c940408 00000000 00000000 5e820408 ............^...
  0x080494f8 6e820408                            n...

  第一个存放.dynamic节的地址
  第二个存放link_map地址
  第三个存放_dl_runtime_resolve地址
  */
      if (got[1])
    {
      l->l_mach.plt = got[1] + l->l_addr;
      l->l_mach.gotplt = (Elf32_Addr) &got[3];
    }
      got[1] = (Elf32_Addr) l;    /* Identify this shared object.存放本模块的link_map  */

      /* The got[2] entry contains the address of a function which gets
     called to get the address of a so far unresolved function and
     jump to it.  The profiling extension of the dynamic linker allows
     to intercept the calls to collect information.  In this case we
     don't store the address in the GOT so that all future calls also
     end in this function.  */
      if (__builtin_expect (profile, 0))
    {
      got[2] = (Elf32_Addr) &_dl_runtime_profile;

      if (_dl_name_match_p (GL(dl_profile), l))
        /* This is the object we are looking for.  Say that we really
           want profiling and the timers are started.  */
        GL(dl_profile_map) = l;
    }
      else
    /* This function will get called to fix up the GOT entry indicated by
       the offset on the stack, and then jump to the resolved address.  */
    got[2] = (Elf32_Addr) &_dl_runtime_resolve;//存放解析函数
    }

  return lazy;
}

前面传给lazy参数值为0,因此直接返回0,接下来的两个宏定义如下,注意lazy==0

#define ELF_DYNAMIC_DO_REL(map,lazy) _ELF_DYNAMIC_DO_RELOC (REL, rel, map, lazy, _ELF_CHECK_REL)
#define ELF_DYNAMIC_DO_RELA(map,lazy)

9._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC(elf/dynamic-link.h)
处理.rel.dyn和.rel.plt重定位节

#  define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, do_lazy, test_rel) \
  do {                                          \
    struct { ElfW(Addr) start, size; int lazy; } ranges[2];              \
    ranges[0].lazy = 0;                                  \
    ranges[0].size = ranges[1].size = 0;                      \
    ranges[0].start = 0;                              \
                                          \
    if ((map)->l_info[DT_##RELOC])    /* DT_REL,是否有.rel.dyn节,0x00000011 (REL)                        0x858*/                      \
      {                                          \
    ranges[0].start = D_PTR ((map), l_info[DT_##RELOC]);/*节地址,节长*/              \
    ranges[0].size = (map)->l_info[DT_##RELOC##SZ]->d_un.d_val;/* 0x00000012 (RELSZ)                      112 (bytes)*/          \
      }                                          \
    if ((map)->l_info[DT_PLTREL]/*是否有.rel.plt
节, 0x00000014 (PLTREL)                     REL*/                          \
    && (!test_rel/*test_rel==0*/ || (map)->l_info[DT_PLTREL]->d_un.d_val == DT_##RELOC/*值是否为DT_REL*/)) \
      {                                          \
    ElfW(Addr) start = D_PTR ((map), l_info[DT_JMPREL]);    /*.rel.plt节地址, 0x00000017 (JMPREL)                     0x8c8*/          \
                                          \
    if (! ELF_DURING_STARTUP    /*该宏定位为1*/                      \
        && ((do_lazy)/*do_lazy==0*/                              \
        /* This test does not only detect whether the relocation      \
           sections are in the right order, it also checks whether    \
           there is a DT_REL/DT_RELA section.  */              \
        || ranges[0].start + ranges[0].size != start))/*.rel.dyn节和.rel.plt节不连续*/              \
      {                                      \
        ranges[1].start = start;                          \
        ranges[1].size = (map)->l_info[DT_PLTRELSZ]->d_un.d_val;          \
        ranges[1].lazy = (do_lazy);                          \
      }                                      \
    else                                      \
      {                                      \
        /* Combine processing the sections.显然应该走这里  */                  \
        assert (ranges[0].start + ranges[0].size == start);    /*地址连续*/          \
        ranges[0].size += (map)->l_info[DT_PLTRELSZ]->d_un.d_val;/*合并大小, 0x00000002 (PLTRELSZ)                   72 (bytes)*/          \
      }                                      \
      }                                          \
                                          \
    if (ELF_DURING_STARTUP)        /*1*/                      \
      elf_dynamic_do_##reloc ((map), ranges[0].start, ranges[0].size, 0); /*调用elf_dynamic_do_rel */    \
    else                                      \
      {                                          \
    int ranges_index;                              \
    for (ranges_index = 0; ranges_index < 2; ++ranges_index)          \
      elf_dynamic_do_##reloc ((map),                      \
                  ranges[ranges_index].start,              \
                  ranges[ranges_index].size,              \
                  ranges[ranges_index].lazy);              \
      }                                          \
  } while (0)

看看ld.so的重定位信息
[zws@mail ~/glibc-2.3/build/elf]$readelf -r ld.so

Relocation section '.rel.dyn' at offset 0x858 contains 14 entries:
Offset     Info    Type            Sym.Value  Sym. Name
000120c0  00000008 R_386_RELATIVE  
000120c8  00000008 R_386_RELATIVE  
000120d8  00000008 R_386_RELATIVE  
000120dc  00000008 R_386_RELATIVE  
000120e0  00000008 R_386_RELATIVE  
000120b0  00000106 R_386_GLOB_DAT    000126d0   __libc_internal_tsd_se
000120b4  00000206 R_386_GLOB_DAT    00012140   _rtld_global
000120b8  00000606 R_386_GLOB_DAT    00000000   __pthread_mutex_lock
000120bc  00000706 R_386_GLOB_DAT    000126d4   __libc_stack_end
000120c4  00000a06 R_386_GLOB_DAT    00000000   __pthread_mutex_init
000120cc  00001106 R_386_GLOB_DAT    000126e4   __libc_internal_tsd_ge
000120d0  00001306 R_386_GLOB_DAT    00000000   __pthread_mutex_unlock
000120d4  00001806 R_386_GLOB_DAT    00000000   __pthread_mutex_destro
000120e4  00002606 R_386_GLOB_DAT    000126f8   _r_debug

Relocation section '.rel.plt' at offset 0x8c8 contains 9 entries:
Offset     Info    Type            Sym.Value  Sym. Name
000120f4  00000607 R_386_JUMP_SLOT   00000000   __pthread_mutex_lock
000120f8  00000907 R_386_JUMP_SLOT   0000bdc4   __libc_memalign
000120fc  00000a07 R_386_JUMP_SLOT   00000000   __pthread_mutex_init
00012100  00000b07 R_386_JUMP_SLOT   0000bea0   malloc
00012104  00001207 R_386_JUMP_SLOT   0000bec2   calloc
00012108  00001307 R_386_JUMP_SLOT   00000000   __pthread_mutex_unlock
0001210c  00001807 R_386_JUMP_SLOT   00000000   __pthread_mutex_destro
00012110  00001b07 R_386_JUMP_SLOT   0000bf25   realloc
00012114  00002907 R_386_JUMP_SLOT   0000beff   free
[zws@mail ~/glibc-2.3/build/elf]$

10._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel(elf/dynamic-link.h)
执行实质的重定位操作

/* Perform the relocations in MAP on the running program image as specified
   by RELTAG, SZTAG.  If LAZY is nonzero, this is the first pass on PLT
   relocations; they should be set up to call _dl_runtime_resolve, rather
   than fully resolved now.  */

static inline void
elf_dynamic_do_rel (struct link_map *map,
            ElfW(Addr) reladdr, ElfW(Addr) relsize,
            int lazy)
{
  const ElfW(Rel) *r = (const void *) reladdr;
  const ElfW(Rel) *end = (const void *) (reladdr + relsize);
  ElfW(Addr) l_addr = map->l_addr;
/*
#if (!defined DO_RELA || !defined ELF_MACHINE_PLT_REL) && !defined RTLD_BOOTSTRAP
  /* We never bind lazily during ld.so bootstrap.  Unfortunately gcc is
     not clever enough to see through all the function calls to realize
     that.  * /
  if (lazy)
    {
      /* Doing lazy PLT relocations; they need very little info.  * /
      for (; r < end; ++r)
    elf_machine_lazy_rel (map, l_addr, r);
    }
  else
#endif
*/
    {
      const ElfW(Sym) *const symtab =
    (const void *) D_PTR (map, l_info[DT_SYMTAB]);//取符号表
      ElfW(Word) nrelative = (map->l_info[RELCOUNT_IDX] == NULL
                  ? 0 : map->l_info[RELCOUNT_IDX]->d_un.d_val);//R_386_RELATIVE重定位项个数 0x6ffffffa (RELCOUNT)                   5
      const ElfW(Rel) *relative = r;// 0x00000011 (REL)                        0x858
      r = r + MIN (nrelative, relsize / sizeof (ElfW(Rel)));
/*
#ifndef RTLD_BOOTSTRAP
      /* This is defined in rtld.c, but nowhere in the static libc.a; make
     the reference weak so static programs can still link.  This
     declaration cannot be done when compiling rtld.c (i.e. #ifdef
     RTLD_BOOTSTRAP) because rtld.c contains the common defn for
     _dl_rtld_map, which is incompatible with a weak decl in the same
     file.  * /
# ifndef SHARED
      weak_extern (GL(dl_rtld_map));
# endif
      if (map != &GL(dl_rtld_map)) /* Already done in rtld itself.  * /
# if !defined DO_RELA || defined ELF_MACHINE_REL_RELATIVE
    /* Rela platforms get the offset from r_addend and this must
       be copied in the relocation address.  Therefore we can skip
       the relative relocations only if this is for rel
       relocations or rela relocations if they are computed as
       memory_loc += l_addr...  * /
    if (l_addr != 0)
# else
    /* ...or we know the object has been prelinked.  * /
    if (l_addr != 0 || ! map->l_info[VALIDX(DT_GNU_PRELINKED)])
# endif
#endif
*/
      for (; relative < r; ++relative)
        DO_ELF_MACHINE_REL_RELATIVE (map, l_addr, relative);//先处理前面的相对重定位

11._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel->DO_ELF_MACHINE_REL_RELATIVE (elf/do-rel.h)
重定位R_386_RELATIVE重定位项

# define DO_ELF_MACHINE_REL_RELATIVE(map, l_addr, relative) \
  elf_machine_rel_relative (l_addr, relative,                      \
                (void *) (l_addr + relative->r_offset))

11._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel->DO_ELF_MACHINE_REL_RELATIVE-> elf_machine_rel_relative (sysdeps/i386/dl-machine.h)

static inline void
elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc,
              Elf32_Addr *const reloc_addr)
{
  assert (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE);//肯定是R_386_RELATIVE重定位类型
  *reloc_addr += l_addr;//原地址加上模块加载地址
}

12._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel(elf/dynamic-link.h)


//#ifdef RTLD_BOOTSTRAP
      /* The dynamic linker always uses versioning.  */
      assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL);//动态链接器总是使用版本信息
//#else
//      if (map->l_info[VERSYMIDX (DT_VERSYM)])
//#endif
    {
      const ElfW(Half) *const version =
        (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);//0x6ffffff0 (VERSYM)                     0x75c

      for (; r < end; ++r)
        {
          ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
          elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
                   &map->l_versions[ndx],
                   (void *) (l_addr + r->r_offset));
/*等价于
              Elf32_Half ndx = version[((r->r_info) >> 8)] & 0x7fff;
              elf_machine_rel (map, r, &symtab[((r->r_info) >> 8)],
                               &map->l_versions[ndx],
                               (void *) (l_addr + r->r_offset));

*/
        }
    }
/*
#ifndef RTLD_BOOTSTRAP
      else
    for (; r < end; ++r)
      elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
               (void *) (l_addr + r->r_offset));
#endif
*/
    }
}


ld.so的版本符号表是
[zws@mail ~/glibc-2.3/build/elf]$objdump -sj .gnu.version ld.so

ld.so:     file format elf32-i386

Contents of section .gnu.version:
075c 00000500 05000500 05000500 00000500  ................
076c 05000200 00000200 05000300 05000500  ................
077c 05000500 02000000 05000500 05000500  ................
078c 00000200 05000200 05000500 05000500  ................
079c 05000500 05000300 05000500 02000500  ................
07ac 04000200 0500                        ......   

typedef uint16_t Elf32_Half;

map->l_versions其实为空,不过elf_machine_rel 中没有用到

11._dl_start->ELF_DYNAMIC_RELOCATE ->_ELF_DYNAMIC_DO_RELOC->elf_dynamic_do_rel->DO_ELF_MACHINE_REL_RELATIVE-> elf_machine_relmap->l_versions其实为空,不过elf_machine_rel (sysdeps/i386/dl-machine.h)

/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
   MAP is the object containing the reloc.  */

static inline void
elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
         const Elf32_Sym *sym, const struct r_found_version *version,
         Elf32_Addr *const reloc_addr)
{
  const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
/*
#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC
  if (__builtin_expect (r_type == R_386_RELATIVE, 0))
    {
# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC
      /* This is defined in rtld.c, but nowhere in the static libc.a;
     make the reference weak so static programs can still link.
     This declaration cannot be done when compiling rtld.c
     (i.e. #ifdef RTLD_BOOTSTRAP) because rtld.c contains the
     common defn for _dl_rtld_map, which is incompatible with a
     weak decl in the same file.  * /
#  ifndef SHARED
      weak_extern (_dl_rtld_map);
#  endif
      if (map != &GL(dl_rtld_map)) /* Already done in rtld itself.  * /
# endif
    *reloc_addr += map->l_addr;
    }
# ifndef RTLD_BOOTSTRAP
  else if (__builtin_expect (r_type == R_386_NONE, 0))
    return;
# endif
  else
#endif
*/
    {
      const Elf32_Sym *const refsym = sym;
//#if defined USE_TLS && !defined RTLD_BOOTSTRAP
//      struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
//      Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
//#else
      Elf32_Addr value = RESOLVE (&sym, version, r_type);//等价于Elf32_Addr value = ((*(&sym))->st_shndx == 0 ? 0 : _rtld_local._dl_rtld_map.l_addr);

//# ifndef RTLD_BOOTSTRAP
//      if (sym != NULL)
//# endif
    value += sym->st_value;//加上sym->st_value中的值
//#endif

      switch (r_type)
    {
    case R_386_GLOB_DAT: //ld.so中只有这两个
    case R_386_JMP_SLOT:
      *reloc_addr = value;
      break;




一路返回到_dl_start中,就完成重定位了。
大家想一想如何保证到现在还没有用到重定位的数据?
通过全部使用inline函数或宏,且只使用_rtld_local(vis为hidden)和局部变量来保证.

12.返回_dl_start,完成动态链接

  /* Please note that we don't allow profiling of this object and
     therefore need not test whether we have to allocate the array
     for the relocation results (as done in dl-reloc.c).  */

  /* Now life is sane; we can call functions and access global data.
     Set up to use the operating system facilities, and find out from
     the operating system's program loader where to find the program
     header table in core.  Put the rest of _dl_start into a separate
将_dl_start中剩下的工作放在独立的函数中,这样编译器就不会将需要
     function, that way the compiler cannot put accesses to the GOT
访问GOT的操作放在ELF_DYNAMIC_RELOCATE之前
     before ELF_DYNAMIC_RELOCATE.  */
  {
//#ifdef DONT_USE_BOOTSTRAP_MAP
    ElfW(Addr) entry = _dl_start_final (arg);//完成动态链接,返回可执行文件入口
//#else
//    ElfW(Addr) entry = _dl_start_final (arg, &info);
//#endif

//#ifndef ELF_MACHINE_START_ADDRESS
# define ELF_MACHINE_START_ADDRESS(map, start) (start)
//#endif

    return ELF_MACHINE_START_ADDRESS (GL(dl_loaded), entry);//等价于return entry;
  }
}
ld.so 分析之6 _dl_start_final

1.setup hash

/* This is the second half of _dl_start (below).  It can be inlined safely
这是_dl_start的第二部分.
   under DONT_USE_BOOTSTRAP_MAP, where it is careful not to make any GOT
在DONT_USE_BOOTSTRAP_MAP下它能被安全内联,在DONT_USE_BOOTSTRAP_MAP下不允许引用GOT
   references.  When the tools don't permit us to avoid using a GOT entry
   for _dl_rtld_global (no attribute_hidden support), we must make sure
   this function is not inlined (see below). 
当编译器允许我们使用GOT访问_dl_rtld_global,我们必须让f该函数不内联
*/

//#ifdef DONT_USE_BOOTSTRAP_MAP
static inline Elf32_Addr//
//ElfW(Addr) __attribute__ ((always_inline)) 总是内联
_dl_start_final (void *arg)
//#else
//static ElfW(Addr) __attribute__ ((noinline))
//_dl_start_final (void *arg, struct dl_start_final_info *info)
//#endif
{
  ElfW(Addr) start_addr;

  if (HP_TIMING_AVAIL)// 1
    {
      /* If it hasn't happen yet record the startup time.  */
//      if (! HP_TIMING_INLINE)// 1
//    HP_TIMING_NOW (start_time);
//#if !defined DONT_USE_BOOTSTRAP_MAP && !defined HP_TIMING_NONAVAIL
//      else
//    start_time = info->start_time;
//#endif

      /* Initialize the timing functions.  */
      HP_TIMING_DIFF_INIT ();
    }

/* Transfer data about ourselves to the permanent link_map structure. */
/*
#ifndef DONT_USE_BOOTSTRAP_MAP
  GL(dl_rtld_map).l_addr = info->l.l_addr;
  GL(dl_rtld_map).l_ld = info->l.l_ld;
  memcpy (GL(dl_rtld_map).l_info, info->l.l_info,
      sizeof GL(dl_rtld_map).l_info);
  GL(dl_rtld_map).l_mach = info->l.l_mach;
#endif
*/
  _dl_setup_hash (&GL(dl_rtld_map));//  _dl_setup_hash (&_rtld_local._dl_rtld_map);
  GL(dl_rtld_map).l_opencount = 1;

2._dl_start_final->_dl_setup_hash

/* Cache the location of MAP's hash table.  */

void
//internal_function
_dl_setup_hash (struct link_map *map)
{
  Elf_Symndx *hash;//typedef uint32_t Elf_Symndx;
  Elf_Symndx nchain;

  if (!map->l_info[DT_HASH])// 例如ld.so的0x00000004 (HASH)                       0x94
    return;
  hash = (void *) D_PTR (map, l_info[DT_HASH]);//map->l_info[DT_HASH]->d_un.d_ptr,取hash表地址

  map->l_nbuckets = *hash++;
  nchain = *hash++;
  map->l_buckets = hash;
  hash += map->l_nbuckets;
  map->l_chain = hash;
}

ld.so hash表的内容是
[zws@mail ~/glibc-2.3/build/elf]$ objdump -sj .hash ld.so

ld.so:     file format elf32-i386

Contents of section .hash:
0094 25000000 2b000000 0d000000 21000000  %...+.......!...
00a4 28000000 00000000 06000000 22000000  (..........."...
00b4 00000000 00000000 00000000 08000000  ................
00c4 1e000000 00000000 1a000000 23000000  ............#...
00d4 26000000 0e000000 1d000000 17000000  &...............
00e4 25000000 24000000 00000000 13000000  %...$...........
00f4 00000000 0b000000 18000000 14000000  ................
0104 27000000 1b000000 00000000 15000000  '...............
0114 00000000 29000000 1c000000 00000000  ....)...........
0124 0c000000 2a000000 19000000 00000000  ....*...........
0134 00000000 00000000 00000000 00000000  ................
0144 00000000 00000000 00000000 00000000  ................
0154 00000000 00000000 00000000 00000000  ................
0164 05000000 02000000 03000000 07000000  ................
0174 00000000 10000000 00000000 00000000  ................
0184 00000000 00000000 00000000 00000000  ................
0194 00000000 00000000 00000000 11000000  ................
01a4 01000000 0a000000 00000000 0f000000  ................
01b4 00000000 09000000 20000000 04000000  ........ .......
01c4 00000000 16000000 12000000 00000000  ................
01d4 00000000 1f000000                    ........       


hash表的作用是加快链接速度。当在动态链接库中查找是否有需要被外部链接的函数时,
如果直接线性搜索库的动态符号表且表比较大,速度很慢。采用散列的方法查找就比较好。

这里l_nbuckets值为0x25=37,nchain 值为0x2b=43,l_buckets存放散列表入口,l_chain用于将散列值相同的符号连接成单链表。
nchain其实就是动态符号数。该链表中第一个符号索引值A存在l_buckets中,下一个符号的索引值B存放在索引值A在l_chain中的偏移处等等。

问题1.hash表大小是如何计算的?

由binutils 1.18 的bfd/elflink.c文件中compute_bucket_count 计算

/* Array used to determine the number of hash table buckets to use
   based on the number of symbols there are.  If there are fewer than
定义一个数组用于根据符号数来计算hash表大小
   3 symbols we use 1 bucket, fewer than 17 symbols we use 3 buckets,
少于3个符号使用一个桶,少于17个符号使用3个桶
   fewer than 37 we use 17 buckets, and so forth.  We never use more
少于37个符号使用17个桶,等等
   than 32771 buckets. 
我们从不使用超过32771个桶的hash
*/

static const size_t elf_buckets[] =
{
  1, 3, 17, 37, 67, 97, 131, 197, 263, 521, 1031, 2053, 4099, 8209,
  16411, 32771, 0
};

/* Compute bucket count for hashing table.  We do not use a static set
   of possible tables sizes anymore.  Instead we determine for all
   possible reasonable sizes of the table the outcome (i.e., the
   number of collisions etc) and choose the best solution.  The
   weighting functions are not too simple to allow the table to grow
   without bounds.  Instead one of the weighting factors is the size.
   Therefore the result is always a good payoff between few collisions
   (= short chain lengths) and table size.  */
static size_t
compute_bucket_count (struct bfd_link_info *info, unsigned long int *hashcodes,
              unsigned long int nsyms, int gnu_hash)
{
  size_t dynsymcount = elf_hash_table (info)->dynsymcount;
  size_t best_size = 0;
  unsigned long int i;
  bfd_size_type amt;

  /* We have a problem here.  The following code to optimize the table
     size requires an integer type with more the 32 bits.  If
     BFD_HOST_U_64_BIT is set we know about such a type.  */

#ifdef BFD_HOST_U_64_BIT
。。。忽略64位系统
#endif /* defined (BFD_HOST_U_64_BIT) * /

    {
      /* This is the fallback solution if no 64bit type is available or if we
     are not supposed to spend much time on optimizations.  We select the
     bucket count using a fixed set of numbers.  */
      for (i = 0; elf_buckets[i] != 0; i++)//循环查找elf_buckets数组
    {
      best_size = elf_buckets[i];//取桶大小
      if (nsyms < elf_buckets[i + 1])//如果符号数小于适用的符号数
        break;//找到
    }
      if (gnu_hash && best_size < 2)
    best_size = 2;
    }

  return best_size;
}

由于本例中符号数是43,根据计算得桶大小是37,和前面的桶值相等。


问题2:符号的hash值如何计算?

同样在elflink.c中,由bfd_elf_hash计算

/* Standard ELF hash function.  Do not change this function; you will
   cause invalid hash tables to be generated.  */

unsigned long
bfd_elf_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned long h = 0;
  unsigned long g;
  int ch;

  while ((ch = *name++) != '\0')
    {
      h = (h << 4) + ch;
      if ((g = (h & 0xf0000000)) != 0)
    {
      h ^= g >> 24;
      /* The ELF ABI says `h &= ~g', but this is equivalent in
         this case and on some machines one insn instead of two.  */
      h ^= g;
    }
    }
  return h & 0xffffffff;
}

一个符号的hash值%桶大小即得其在hash表中的索引.

问题3.动态符号表的内容是什么?

动态符号表中存放的是全局符号,包括本地和外地。
本地提供给其他模块使用,本地符号的Ndx是数,指出该符号所在的节。外地符号的Ndx是UND,需要动态链接。

例如
readelf -s ld.so

Symbol table '.dynsym' contains 43 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 000126d0     4 OBJECT  GLOBAL DEFAULT   15 __libc_internal_tsd_set@@GLIBC_PRIVATE
     2: 00012140   980 OBJECT  GLOBAL DEFAULT   14 _rtld_global@@GLIBC_PRIVATE
     3: 00009f6b    44 FUNC    GLOBAL DEFAULT    9 _dl_debug_printf@@GLIBC_PRIVATE
     4: 0000a372  1066 FUNC    GLOBAL DEFAULT    9 _dl_check_map_versions@@GLIBC_PRIVATE
5: 00006808 757 FUNC GLOBAL DEFAULT 9 _dl_lookup_versioned_symb@@GLIBC_PRIVATE
     6: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __pthread_mutex_lock
     7: 000126d4     4 OBJECT  GLOBAL DEFAULT   15 __libc_stack_end@@GLIBC_PRIVATE
     8: 000096e0   307 FUNC    GLOBAL DEFAULT    9 _dl_init@@GLIBC_PRIVATE
     9: 0000bdc4   220 FUNC    WEAK   DEFAULT    9 __libc_memalign@@GLIBC_2.0
    10: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __pthread_mutex_init
    11: 0000bea0    34 FUNC    WEAK   DEFAULT    9 malloc@@GLIBC_2.0
    12: 00006189   605 FUNC    GLOBAL DEFAULT    9 _dl_lookup_symbol_skip@@GLIBC_PRIVATE
    13: 00000000     0 OBJECT  GLOBAL DEFAULT  ABS GLIBC_2.1
    14: 000063e6  1058 FUNC    GLOBAL DEFAULT    9 _dl_lookup_versioned_symb@@GLIBC_PRIVATE
    15: 00012514     4 OBJECT  GLOBAL DEFAULT   14 __libc_enable_secure@@GLIBC_PRIVATE
    16: 00005ec4   709 FUNC    GLOBAL DEFAULT    9 _dl_lookup_symbol@@GLIBC_PRIVATE
    17: 000126e4     4 OBJECT  GLOBAL DEFAULT   15 __libc_internal_tsd_get@@GLIBC_PRIVATE
    18: 0000bec2    61 FUNC    WEAK   DEFAULT    9 calloc@@GLIBC_2.0
    19: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __pthread_mutex_unlock
    20: 000126ec     4 OBJECT  GLOBAL DEFAULT   15 __libc_internal_tsd_addre@@GLIBC_PRIVATE
    21: 00009b86     5 FUNC    GLOBAL DEFAULT    9 _dl_debug_state@@GLIBC_PRIVATE
    22: 00012120     4 OBJECT  GLOBAL DEFAULT   14 _dl_argv@@GLIBC_PRIVATE
    23: 000030db   272 FUNC    GLOBAL DEFAULT    9 _dl_dst_substitute@@GLIBC_PRIVATE
    24: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __pthread_mutex_destroy
    25: 00000000     0 OBJECT  GLOBAL DEFAULT  ABS GLIBC_2.0
    26: 00000000     0 OBJECT  GLOBAL DEFAULT  ABS GLIBC_PRIVATE
    27: 0000bf25   139 FUNC    WEAK   DEFAULT    9 realloc@@GLIBC_2.0
    28: 0000b43c   373 FUNC    GLOBAL DEFAULT    9 _dl_get_origin@@GLIBC_PRIVATE
    29: 0000a868  1981 FUNC    GLOBAL DEFAULT    9 _dl_start_profile@@GLIBC_PRIVATE
    30: 00007ca0  1072 FUNC    GLOBAL DEFAULT    9 _dl_relocate_object@@GLIBC_PRIVATE
    31: 00003056   133 FUNC    GLOBAL DEFAULT    9 _dl_dst_count@@GLIBC_PRIVATE
    32: 00012124     4 OBJECT  GLOBAL DEFAULT   14 _dl_starting_up@@GLIBC_PRIVATE
    33: 00005b89    75 FUNC    GLOBAL DEFAULT    9 _dl_unload_cache@@GLIBC_PRIVATE
    34: 0000f460    14 OBJECT  GLOBAL DEFAULT   10 _dl_out_of_memory@@GLIBC_PRIVATE
    35: 0000b025   562 FUNC    GLOBAL DEFAULT    9 _dl_mcount@@GLIBC_2.1
    36: 00004b84  1917 FUNC    GLOBAL DEFAULT    9 _dl_map_object@@GLIBC_PRIVATE
    37: 000091cc   433 FUNC    GLOBAL DEFAULT    9 _dl_signal_error@@GLIBC_PRIVATE
    38: 000126f8    20 OBJECT  GLOBAL DEFAULT   15 _r_debug@@GLIBC_2.0
    39: 0000940d   318 FUNC    GLOBAL DEFAULT    9 _dl_catch_error@@GLIBC_PRIVATE
    40: 00000000     0 OBJECT  GLOBAL DEFAULT  ABS GLIBC_2.3
    41: 0000beff    38 FUNC    WEAK   DEFAULT    9 free@@GLIBC_2.0
    42: 00008255  3291 FUNC    GLOBAL DEFAULT    9 _dl_map_object_deps@@GLIBC_PRIVATE

符号表的第一个符号总是被保留的.因此实际可用的符号数是42

上面是通过节表来显示动态符号表的,我们也可以通过动态节来显示动态符号表

[zws@mail ~/glibc-2.3/build/elf]$readelf -Ds ld.so

Symbol table for image:
  Num Buc:    Value  Size   Type   Bind Vis      Ndx Name
   13   0: 00000000     0  OBJECT GLOBAL DEFAULT ABS GLIBC_2.1
    5   0: 00006808   757    FUNC GLOBAL DEFAULT   9 _dl_lookup_versioned_symbol_skip
   33   1: 00005b89    75    FUNC GLOBAL DEFAULT   9 _dl_unload_cache
   40   2: 00000000     0  OBJECT GLOBAL DEFAULT ABS GLIBC_2.3
    6   4: 00000000     0  NOTYPE   WEAK DEFAULT UND __pthread_mutex_lock
   34   5: 0000f460    14  OBJECT GLOBAL DEFAULT  10 _dl_out_of_memory
    9   5: 0000bdc4   220    FUNC   WEAK DEFAULT   9 __libc_memalign
    8   9: 000096e0   307    FUNC GLOBAL DEFAULT   9 _dl_init
   30  10: 00007ca0  1072    FUNC GLOBAL DEFAULT   9 _dl_relocate_object
   10  10: 00000000     0  NOTYPE   WEAK DEFAULT UND __pthread_mutex_init
   26  12: 00000000     0  OBJECT GLOBAL DEFAULT ABS GLIBC_PRIVATE
   35  13: 0000b025   562    FUNC GLOBAL DEFAULT   9 _dl_mcount
   32  13: 00012124     4  OBJECT GLOBAL DEFAULT  14 _dl_starting_up
   15  13: 00012514     4  OBJECT GLOBAL DEFAULT  14 __libc_enable_secure
    3  13: 00009f6b    44    FUNC GLOBAL DEFAULT   9 _dl_debug_printf
   38  14: 000126f8    20  OBJECT GLOBAL DEFAULT  15 _r_debug
   22  14: 00012120     4  OBJECT GLOBAL DEFAULT  14 _dl_argv
   14  15: 000063e6  1058    FUNC GLOBAL DEFAULT   9 _dl_lookup_versioned_symbol
    2  15: 00012140   980  OBJECT GLOBAL DEFAULT  14 _rtld_global
   29  16: 0000a868  1981    FUNC GLOBAL DEFAULT   9 _dl_start_profile
    1  16: 000126d0     4  OBJECT GLOBAL DEFAULT  15 __libc_internal_tsd_set
   23  17: 000030db   272    FUNC GLOBAL DEFAULT   9 _dl_dst_substitute
   37  18: 000091cc   433    FUNC GLOBAL DEFAULT   9 _dl_signal_error
   36  19: 00004b84  1917    FUNC GLOBAL DEFAULT   9 _dl_map_object
    4  19: 0000a372  1066    FUNC GLOBAL DEFAULT   9 _dl_check_map_versions
   19  21: 00000000     0  NOTYPE   WEAK DEFAULT UND __pthread_mutex_unlock
   11  23: 0000bea0    34    FUNC   WEAK DEFAULT   9 malloc
   24  24: 00000000     0  NOTYPE   WEAK DEFAULT UND __pthread_mutex_destroy
   20  25: 000126ec     4  OBJECT GLOBAL DEFAULT  15 __libc_internal_tsd_address
   39  26: 0000940d   318    FUNC GLOBAL DEFAULT   9 _dl_catch_error
   18  26: 0000bec2    61    FUNC   WEAK DEFAULT   9 calloc
   16  26: 00005ec4   709    FUNC GLOBAL DEFAULT   9 _dl_lookup_symbol
    7  26: 000126d4     4  OBJECT GLOBAL DEFAULT  15 __libc_stack_end
   27  27: 0000bf25   139    FUNC   WEAK DEFAULT   9 realloc
   21  29: 00009b86     5    FUNC GLOBAL DEFAULT   9 _dl_debug_state
   41  31: 0000beff    38    FUNC   WEAK DEFAULT   9 free
   28  32: 0000b43c   373    FUNC GLOBAL DEFAULT   9 _dl_get_origin
   17  32: 000126e4     4  OBJECT GLOBAL DEFAULT  15 __libc_internal_tsd_get
   12  34: 00006189   605    FUNC GLOBAL DEFAULT   9 _dl_lookup_symbol_skip
   42  35: 00008255  3291    FUNC GLOBAL DEFAULT   9 _dl_map_object_deps
   31  35: 00003056   133    FUNC GLOBAL DEFAULT   9 _dl_dst_count
   25  36: 00000000     0  OBJECT GLOBAL DEFAULT ABS GLIBC_2.0

这里Num列显示该符号在符号表中的索引,Buc列显示该符号在Hash表中的索引,索引值相同的符号按照选后顺序显示.
可见索引为0的保留符号是不进入hash表的

可用如下命令显示各种长度桶的直方图,用于分析散列效果.

[zws@mail ~/glibc-2.3/build/elf]$readelf -I ld.so

Histogram for bucket list length (total of 37 buckets):
Length  Number     % of total  Coverage
      0  10         ( 27.0%)                          //长度为0的桶占总桶的27%
      1  16         ( 43.2%)     38.1%           //长度为1的桶占总桶的43.2%,其总符号数占总符号的38.1%
      2  9          ( 24.3%)     81.0%            //....
      3  0          (  0.0%)     81.0%
      4  2          (  5.4%)    100.0%

length为桶长,number为相同桶长的个数,%  of total为相同桶长的个数占总桶的百分比,coverage为相同桶长的桶中总符号数
占总符号数的百分比。

问题4:.dynsym节和.symtab节的联系和区别?

[zws@mail ~/glibc-2.3/build/elf]$readelf -S ld.so
There are 30 section headers, starting at offset 0x96078:

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .hash             HASH            00000094 000094 000148 04   A  2   0  4
  [ 2] .dynsym           DYNSYM          000001dc 0001dc 0002b0 10   A  3   1  4
  [ 3] .dynstr           STRTAB          0000048c 00048c 0002cf 00   A  0   0  1
  [ 4] .gnu.version      VERSYM          0000075c 00075c 000056 02   A  2   0  2
  [ 5] .gnu.version_d    VERDEF          000007b4 0007b4 0000a4 00   A  3   5  4
  [ 6] .rel.dyn          REL             00000858 000858 000070 08   A  2   0  4
  [ 7] .rel.plt          REL             000008c8 0008c8 000048 08   A  2   8  4
  [ 8] .plt              PROGBITS        00000910 000910 0000a0 04  AX  0   0  4
  [ 9] .text             PROGBITS        000009b0 0009b0 00e6ce 00  AX  0   0 16
  [10] .rodata           PROGBITS        0000f080 00f080 002e60 00   A  0   0 32
  [11] .dynamic          DYNAMIC         00012000 012000 0000b0 08  WA  3   0  4
  [12] .got              PROGBITS        000120b0 0120b0 000038 04  WA  0   0  4
  [13] .got.plt          PROGBITS        000120e8 0120e8 000030 04  WA  0   0  4
  [14] .data             PROGBITS        00012120 012120 000408 00  WA  0   0 32
  [15] .bss              NOBITS          00012540 012528 0001cc 00  WA  0   0 32
  [16] .stab             PROGBITS        00000000 012528 0004f8 0c     17   0  4
  [17] .stabstr          STRTAB          00000000 012a20 000276 00      0   0  1
  [18] .comment          PROGBITS        00000000 012c96 0009f6 00      0   0  1
  [19] .debug_aranges    PROGBITS        00000000 01368c 0005e0 00      0   0  1
  [20] .debug_pubnames   PROGBITS        00000000 013c6c 000bd9 00      0   0  1
  [21] .debug_info       PROGBITS        00000000 014845 06722d 00      0   0  1
  [22] .debug_abbrev     PROGBITS        00000000 07ba72 006978 00      0   0  1
  [23] .debug_line       PROGBITS        00000000 0823ea 009e0e 00      0   0  1
  [24] .debug_frame      PROGBITS        00000000 08c1f8 001934 00      0   0  4
  [25] .debug_str        PROGBITS        00000000 08db2c 0083e3 01  MS  0   0  1
  [26] .gnu.warning.llse PROGBITS        00000000 095f20 00003f 00      0   0 32
  [27] .shstrtab         STRTAB          00000000 095f5f 000118 00      0   0  1
  [28] .symtab           SYMTAB          00000000 096528 001f70 10     29 461  4
  [29] .strtab           STRTAB          00000000 098498 00174a 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

strip ld.so后发现
[zws@mail ~/glibc-2.3/build/elf]$readelf -S ldx.so    
There are 19 section headers, starting at offset 0x12ffc:

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .hash             HASH            00000094 000094 000148 04   A  2   0  4
  [ 2] .dynsym           DYNSYM          000001dc 0001dc 0002b0 10   A  3   1  4
  [ 3] .dynstr           STRTAB          0000048c 00048c 0002cf 00   A  0   0  1
  [ 4] .gnu.version      VERSYM          0000075c 00075c 000056 02   A  2   0  2
  [ 5] .gnu.version_d    VERDEF          000007b4 0007b4 0000a4 00   A  3   5  4
  [ 6] .rel.dyn          REL             00000858 000858 000070 08   A  2   0  4
  [ 7] .rel.plt          REL             000008c8 0008c8 000048 08   A  2   8  4
  [ 8] .plt              PROGBITS        00000910 000910 0000a0 04  AX  0   0  4
  [ 9] .text             PROGBITS        000009b0 0009b0 00e6ce 00  AX  0   0 16
  [10] .rodata           PROGBITS        0000f080 00f080 002e60 00   A  0   0 32
  [11] .dynamic          DYNAMIC         00012000 012000 0000b0 08  WA  3   0  4
  [12] .got              PROGBITS        000120b0 0120b0 000038 04  WA  0   0  4
  [13] .got.plt          PROGBITS        000120e8 0120e8 000030 04  WA  0   0  4
  [14] .data             PROGBITS        00012120 012120 000408 00  WA  0   0 32
  [15] .bss              NOBITS          00012540 012528 0001cc 00  WA  0   0 32
  [16] .comment          PROGBITS        00000000 012528 0009f6 00      0   0  1
  [17] .gnu.warning.llse PROGBITS        00000000 012f20 00003f 00      0   0 32
  [18] .shstrtab         STRTAB          00000000 012f5f 00009c 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

少了11个节,分别是
  [16] .stab             PROGBITS        00000000 012528 0004f8 0c     17   0  4
  [17] .stabstr          STRTAB          00000000 012a20 000276 00      0   0  1
  [19] .debug_aranges    PROGBITS        00000000 01368c 0005e0 00      0   0  1
  [20] .debug_pubnames   PROGBITS        00000000 013c6c 000bd9 00      0   0  1
  [21] .debug_info       PROGBITS        00000000 014845 06722d 00      0   0  1
  [22] .debug_abbrev     PROGBITS        00000000 07ba72 006978 00      0   0  1
  [23] .debug_line       PROGBITS        00000000 0823ea 009e0e 00      0   0  1
  [24] .debug_frame      PROGBITS        00000000 08c1f8 001934 00      0   0  4
  [25] .debug_str        PROGBITS        00000000 08db2c 0083e3 01  MS  0   0  1
  [28] .symtab           SYMTAB          00000000 096528 001f70 10     29 461  4
  [29] .strtab           STRTAB          00000000 098498 00174a 00      0   0  1

前面9个都是调试信息节。后面一个是.symtab,一个是.symtab的字符表节,他们的符号表的Lk值是29,指向.strtab节.
说明这些信息都不是执行程序必须的。执行程序用的符号表是.dynsym节,以及为其服务的.hash和.dynstr.

.dynsym和.symtab分别占用文件空间,两者磁盘空间不相干。在内容上,.dynsym是.symtab的子集。.dynsym用于执行时动态链接,
.symtab由objdump等分析程序使用.这里也体现了EFL的执行和存储两种视图。

两者侧重点不同,但是相通。链接程序将所有执行时需要的节安排在一起,并安排在最前面。这样程序加载的时候,基址就在文件头处。
在hash节前面的ELF文件头,然后是程序头。
执行时不需要的节放在后面,准确说是.bss后。.bss不占用文件空间,但是占用内存空间。

因此前面的
  [16] .comment          PROGBITS        00000000 012528 0009f6 00      0   0  1
  [17] .gnu.warning.llse PROGBITS        00000000 012f20 00003f 00      0   0 32
  [18] .shstrtab         STRTAB          00000000 012f5f 00009c 00      0   0  1
也可以删掉
还有节表本身,被安排在最后,也可以删掉,不会影响程序的执行。

我们可以作如下实验,编写一个简单的hello world程序,编译执行。

使用readelf -S hello,找到.bss节,假设文件偏移是x.
使用python脚本将其截断。
f=open("hello","r+")
f.seek(x)
f.truncate()
f.close
再执行一下hello,看是否还是可执行.


3.设置map_start和map_end

  GL(dl_rtld_map).l_map_start = (ElfW(Addr)) _begin;// 0
  GL(dl_rtld_map).l_map_end = (ElfW(Addr)) _end;// bss 最后
  /* Copy the TLS related data if necessary.  */
/*#if USE_TLS && !defined DONT_USE_BOOTSTRAP_MAP
//# ifdef HAVE___THREAD
//  assert (info->l.l_tls_modid != 0);
//# else
  if (info->l.l_tls_modid != 0)
//# endif
    {
      GL(dl_rtld_map).l_tls_blocksize = info->l.l_tls_blocksize;
      GL(dl_rtld_map).l_tls_align = info->l.l_tls_align;
      GL(dl_rtld_map).l_tls_initimage_size = info->l.l_tls_initimage_size;
      GL(dl_rtld_map).l_tls_initimage = info->l.l_tls_initimage;
      GL(dl_rtld_map).l_tls_offset = info->l.l_tls_offset;
      GL(dl_rtld_map).l_tls_modid = 1;
      GL(dl_rtld_map).l_tls_tp_initialized
    = info->l.l_tls_tp_initialized;
    }
#endif
*/
//#if HP_TIMING_AVAIL
  HP_TIMING_NOW (GL(dl_cpuclock_offset));
//#endif

查看ld.so的符号表

405: 00000000     0 NOTYPE  LOCAL  DEFAULT  ABS _begin
417: 0001270c     0 NOTYPE  LOCAL  DEFAULT  ABS _end

对比节表可发现

_end正好执行bss结尾(是地址而不是文件偏移)

4._dl_sysdep_start


  /* Call the OS-dependent function to set up life so we can do things like
调用操作系统相关函数,建立操作环境,这样就能执行文件访问等操作
     file access.  It will call `dl_main' (below) to do all the real work
     of the dynamic linker, and then unwind our frame and run the user
这将会调用dl_main完成所有的动态链接工作
     entry point on the same stack we entered on.
最后退出并执行用户入口
*/
  start_addr =  _dl_sysdep_start (arg, &dl_main);//传递dl_main函数,返回用户入口地址

//#ifndef HP_TIMING_NONAVAIL
  if (HP_TIMING_AVAIL)// 1
    {
      hp_timing_t end_time;

      /* Get the current time.  */
      HP_TIMING_NOW (end_time);//记录end_time

      /* Compute the difference.  */
      HP_TIMING_DIFF (rtld_total_time, start_time, end_time);//计算耗时
    }
//#endif

  if (__builtin_expect (GL(dl_debug_mask) & DL_DEBUG_STATISTICS, 0))
    print_statistics ();//如果需要,输出统计信息

  return start_addr;
}

输出的统计信息如下

[zws@mail ~/glibc-2.3/build/elf]$  LD_DEBUG=statistics ls /proc/slabinfo
     30251:
     30251:     runtime linker statistics:
     30251:       total startup time in dynamic loader: 1141112 clock cycles
     30251:                 time needed for relocation: 498188 clock cycles (43.6%)
     30251:                      number of relocations: 103
     30251:           number of relocations from cache: 5
     30251:                time needed to load objects: 377760 clock cycles (33.1%)
/proc/slabinfo
     30251:
     30251:     runtime linker statistics:
     30251:                final number of relocations: 156
     30251:     final number of relocations from cache: 5
[zws@mail ~/glibc-2.3/build/elf]$

ld.so分析之7 _dl_sysdep_start

(sysdeps/generic/dl-sysdep.c)

1.获取内核传递过来的信息

Elf32_Addr//ElfW(Addr)
_dl_sysdep_start (void **start_argptr,
          void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum,
                   ElfW(Addr) *user_entry))
{
  const ElfW(Phdr) *phdr = NULL;
  ElfW(Word) phnum = 0;
  ElfW(Addr) user_entry;
  ElfW(auxv_t) *av;
  uid_t uid = 0;
  gid_t gid = 0;
//#ifdef HAVE_AUX_XID
//# define set_seen(tag) (tag) /* Evaluate for the side effects.  */
//#else
  unsigned int seen = 0;
# define M(type) (1 << (type))
# define set_seen(tag) seen |= M ((tag)->a_type)
//#endif

  DL_FIND_ARG_COMPONENTS (start_argptr, _dl_argc, INTUSE(_dl_argv), _environ,
              _dl_auxv);

2.DL_FIND_ARG_COMPONENTS

# define DL_FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp)    \
  do {                                          \
    void **_tmp;                                  \
    (argc) = *(long int *) cookie;                          \
    (argv) = (char **) ((long int *) cookie + 1);                  \
    (envp) = (argv) + (argc) + 1;                          \
    for (_tmp = (void **) (envp); *_tmp; ++_tmp)                  \
      continue;                                      \
    (auxp) = (void *) ++_tmp;                              \
  } while (0)

start_argptr指向argc在栈上的地址,因此这个宏的目的很简单,取得argc,argv,envp,auxp变量值。现在再次把内核传递信息贴出

注意前面的实参
start_argptr, 是局部变量
_dl_argc,  有hidden属性
INTUSE(_dl_argv),即_dl_argv_internal 有hidden属性,是_dl_argv的alias,_dl_argv是全局变量
_environ, 有hidden属性,但是最终的符号属性是
  307: 00012524     4 OBJECT  LOCAL  DEFAULT   14 _environ
有点不一样,不知道什么原因?
_dl_auxv 是局部变量
因此会使用GOFOFF访问他们而不需要重定位



/*
内存布局如下
  position            content                     size (bytes) + comment
  ------------------------------------------------------------------------
  stack pointer ->  [ argc = number of args ]     4
                    [ argv[0] (pointer) ]         4   (program name)
                    [ argv[1] (pointer) ]         4
                    [ argv[..] (pointer) ]        4 * x
                    [ argv[n - 1] (pointer) ]     4
                    [ argv[n] (pointer) ]         4   (= NULL)

                    [ envp[0] (pointer) ]         4
                    [ envp[1] (pointer) ]         4
                    [ envp[..] (pointer) ]        4
                    [ envp[term] (pointer) ]      4   (= NULL)

                    [ auxv[0] AT_PHDR (Elf32_auxv_t) ]    8
                    [ auxv[1] AT_PHENT (Elf32_auxv_t) ]    8
                    [ auxv[2] AT_PHNUM (Elf32_auxv_t) ]   8
                    [ auxv[3] AT_BASE (Elf32_auxv_t) ]   8
                    [ auxv[4] AT_FLAGS (Elf32_auxv_t) ]   8
                    [ auxv[5] AT_ENTRY (Elf32_auxv_t) ]   8
                    [ auxv[6] AT_UID (Elf32_auxv_t) ]   8
                    [ auxv[7] AT_EUID (Elf32_auxv_t) ]   8
                    [ auxv[8] AT_GID (Elf32_auxv_t) ]   8
                    [ auxv[9] AT_EGID (Elf32_auxv_t) ]   8
                    [ auxv[10] AT_HWCAP (Elf32_auxv_t) ]   8
                    [ auxv[11] AT_PAGESZ (Elf32_auxv_t) ]   8
                    [ auxv[12] AT_CLKTCK (Elf32_auxv_t) ]   8
                    [ auxv[13] AT_PLATFORM (Elf32_auxv_t) ]   8
                    [ auxv[14] (Elf32_auxv_t) ] 8   (= AT_NULL vector)

         [ padding ]                   0 - 15                   
                    [ padding ]                   16                   
                    [ padding ]                   0 - 15                   

         [k_platform]                  0 - 65
                    [ argument ASCIIZ strings ]   >= 0
                    [ environment ASCIIZ str. ]   >= 0
                       [filename] >=0

  (0xbffffffc)      [ end marker ]                4   (= NULL)

  (0xc0000000)      < top of stack >              0   (virtual)
*/

3.解析auxv

  user_entry = (ElfW(Addr)) ENTRY_POINT;//_start,默认为ld.so的_start,_start有属性hidden
  GL(dl_platform) = NULL; /* Default to nothing known about the platform.  */

  for (av = _dl_auxv; av->a_type != AT_NULL; set_seen (av++))
    switch (av->a_type)
      {
      case AT_PHDR:
    phdr = av->a_un.a_ptr;
    break;
      case AT_PHNUM:
    phnum = av->a_un.a_val;
    break;
      case AT_PAGESZ:
    GL(dl_pagesize) = av->a_un.a_val;//4k
    break;
      case AT_ENTRY:
    user_entry = av->a_un.a_val;//用户入口
    break;
//#ifdef NEED_DL_BASE_ADDR
//      case AT_BASE:
//    _dl_base_addr = av->a_un.a_val;
//    break;
//#endif
      case AT_UID:
      case AT_EUID:
    uid ^= av->a_un.a_val;//等价于uid=0^AT_UID^AT_EUID=AT_UID^AT_EUID,即判断AT_UID和AT_EUID是否相等
    break;
      case AT_GID:
      case AT_EGID:
    gid ^= av->a_un.a_val;//同理判断AT_GID和AT_EGID是否相等
    break;
      case AT_PLATFORM:
    GL(dl_platform) = av->a_un.a_ptr;
    break;
      case AT_HWCAP:
    GL(dl_hwcap) = av->a_un.a_val;
    break;
      case AT_CLKTCK:
    GL(dl_clktck) = av->a_un.a_val;
    break;
      case AT_FPUCW:
    GL(dl_fpu_control) = av->a_un.a_val;
    break;
      }

//#ifdef DL_SYSDEP_OSCHECK
  DL_SYSDEP_OSCHECK (dl_fatal);//编译时为空
//#endif

  /* Fill in the values we have not gotten from the kernel through the
     auxiliary vector.  */
//#ifndef HAVE_AUX_XID
# define SEE(UID, var, uid) \
   if ((seen & M (AT_##UID)) == 0) var ^= __get##uid ()/就如果没有该属性,就调用系统调用取得
  SEE (UID, uid, uid);//if ((seen & (1 << (AT_UID))) == 0) uid ^= __getuid ();
  SEE (EUID, uid, euid);//if ((seen & (1 << (AT_EUID))) == 0) uid ^= __geteuid ();
  SEE (GID, gid, gid);//if ((seen & (1 << (AT_GID))) == 0) gid ^= __getgid ();
  SEE (EGID, gid, egid);//if ((seen & (1 << (AT_EGID))) == 0) gid ^= __getegid ();
//#endif

  /* If one of the two pairs of IDs does not mattch this is a setuid
如果两对id中有一个不等,则这是一个setuid和setgid程序
     or setgid run.  */
  INTUSE(__libc_enable_secure) = uid | gid;

//#ifndef HAVE_AUX_PAGESIZE
  if (GL(dl_pagesize) == 0)
    GL(dl_pagesize) = __getpagesize ();
//#endif

//#ifdef DL_SYSDEP_INIT
  DL_SYSDEP_INIT;
//#endif

//#ifdef DL_PLATFORM_INIT
  DL_PLATFORM_INIT;
//#endif

4.DL_SYSDEP_INIT(sysdeps/unix/sysv/linux/dl-sysdep.c)

#define DL_SYSDEP_INIT frob_brk ()

static inline void
frob_brk (void)
{
  __brk (0);            /* Initialize the break.  取得brk起始地址*/
}


5.DL_SYSDEP_INIT->frob_brk ->__brk(sysdeps/unix/sysv/linux/i386/brk.c)

/* This must be initialized data because commons can't have aliases.  */
void *__curbrk = 0;

/* Old braindamage in GCC's crtstuff.c requires this symbol in an attempt
   to work around different old braindamage in the old Linux ELF dynamic
   linker.  */
weak_alias (__curbrk, ___brk_addr)

int
__brk (void *addr)
{
  void *__unbounded newbrk, *__unbounded scratch;

  asm ("movl %%ebx, %1\n"    /* Save %ebx in scratch register.  保存%ebx,这是GOT地址*/
       "movl %3, %%ebx\n"    /* Put ADDR in %ebx to be syscall arg.  将addr值存入%ebx*/
       "int $0x80 # %2\n"    /* Perform the system call.  执行系统调用*/
       "movl %1, %%ebx\n"    /* Restore %ebx from scratch register. 还原%ebx */
       : "=a" (newbrk), "=r" (scratch)
       : "0" (SYS_ify (brk)), "g" (__ptrvalue (addr)));//SYS_ify(brk)等价于__NR_brk,即系统调用sys_brk

  __curbrk = newbrk;//返回新地址

  if (newbrk < addr)
    {
      __set_errno (ENOMEM);
      return -1;
    }

  return 0;
}

weak_alias (__brk, brk)//brk是__brk的alias且weak


6.DL_SYSDEP_INIT->frob_brk ->__brk->sys_brk(内核中 2.4.0)

asmlinkage unsigned long sys_brk(unsigned long brk)
{
    unsigned long rlim, retval;
    unsigned long newbrk, oldbrk;
    struct mm_struct *mm = current->mm;

    down(&mm->mmap_sem);

    if (brk < mm->end_code)
        goto out;

由于前面的调用参数是0,因此直接out
out:
    retval = mm->brk;
    up(&mm->mmap_sem);
    return retval;
}

返回mm->brk,由于到目前为止还没有调用sys_brk,应该返回的是mm->start_brk,即紧随bss后的地址.

验证
[zws@mail ~/glibc-2.3/build/elf]$strace -e brk ls
brk(0)                                  = 0x80586c8
...
[zws@mail ~]$readelf -S /bin/ls
  [23] .bss              NOBITS          08058360 010360 000368 00  WA  0   0 32

0x8058360+0x368=0x80586c8

7.DL_PLATFORM_INIT(sysdeps/i386/dl-machine.h)

/* We define an initialization functions.  This is called very early in
   _dl_sysdep_start.  */
#define DL_PLATFORM_INIT dl_platform_init ()

static inline void //__attribute__ ((unused))
dl_platform_init (void)//这个函数没什么可说的
{
  if (GL(dl_platform) != NULL && *GL(dl_platform) == '\0')
    /* Avoid an empty string which would disturb us.  */
    GL(dl_platform) = NULL;
}

8.调用__sbrk

  /* Determine the length of the platform name.  */
  if (GL(dl_platform) != NULL)
    GL(dl_platformlen) = strlen (GL(dl_platform));

  if (__sbrk (0) == &_end)
    /* The dynamic linker was run as a program, and so the initial break
动态链接器本身直接运行,所有起始break就紧随bss,在&_end处
       starts just after our bss, at &_end.  The malloc in dl-minimal.c
       will consume the rest of this page, so tell the kernel to move the
在dl-minimal.c中的malloc将消耗掉该页剩下部分,所有告诉内核移动break跳过该部分
       break up that far.  When the user program examines its break, it
       will see this new value and not clobber our data. 
当用户程序检查它的break,它将会看到新值,而不会破坏我们的数据.

不太明白这里的意思?
*/
    __sbrk (GL(dl_pagesize) - ((&_end - (void *) 0) & (GL(dl_pagesize) - 1)));


9.__sbrk(sysdeps/generic/sbrk.c)
/* Extend the process's data space by INCREMENT.
根据INCREMENT扩展进程数据空间
   If INCREMENT is negative, shrink data space by - INCREMENT.
如果INCREMENT是负数,缩减数据空间INCREMENT大小
   Return start of new space allocated, or -1 for errors.
返回新分配空间的起始地址
*/
void *
__sbrk (intptr_t increment)
{
  void *oldbrk;

  /* If this is not part of the dynamic library or the library is used
     via dynamic loading in a statically linked program update
     __curbrk from the kernel's brk value.  That way two separate
     instances of __brk and __sbrk can share the heap, returning
     interleaved pieces of it.  */
  if (__curbrk == NULL || __libc_multiple_libcs)//__libc_multiple_libcs=0,因此本条件为假,不会调用__brk
    if (__brk (0) < 0)        /* Initialize the break.  */
      return (void *) -1;

  if (increment == 0)//为0,直接返回__curbrk
    return __curbrk;

  oldbrk = __curbrk;
  if (__brk (oldbrk + increment) < 0)//扩展到oldbrk+increment
    return (void *) -1;

  return oldbrk;
}


10.返回_dl_sysdep_start

  /* If this is a SUID program we make sure that FDs 0, 1, and 2 are
     allocated.  If necessary we are doing it ourself.  If it is not
如果是SUID程序,确保FD 0,1,2都被分配,如果必须,我们自己分配它们。
     possible we stop the program. 
否则停止程序
*/
  if (__builtin_expect (INTUSE(__libc_enable_secure), 0))
    __libc_check_standard_fds ();

11.__libc_check_standard_fds (sysdeps/generic/check_fds.c)

void
__libc_check_standard_fds (void)
{
  /* This is really paranoid but some people actually are.  If /dev/null
这确实有点偏执
     should happen to be a symlink to somewhere else and not the device
如果/dev/null碰巧被符号链接到某处,而不是我们通常认为的那个/dev/null设备,我们退出
     commonly known as "/dev/null" we bail out.  We can detect this with
     the O_NOFOLLOW flag for open() but only on some system. 
我们能使用O_NOFOLLOW标识调用open来测试这种情况,仅对某些系统可以.
*/
//#ifndef O_NOFOLLOW //已定义,是0400000
//# define O_NOFOLLOW    0
//#endif
  /* Check all three standard file descriptors.  */
  check_one_fd (STDIN_FILENO, O_RDONLY | O_NOFOLLOW);
  check_one_fd (STDOUT_FILENO, O_RDWR | O_NOFOLLOW);
  check_one_fd (STDERR_FILENO, O_RDWR | O_NOFOLLOW);
}

12.__libc_check_standard_fds->check_one_fd  (sysdeps/generic/check_fds.c)

/* Should other OSes (e.g., Hurd) have different versions which can
   be written in a better way?  */
static void
check_one_fd (int fd, int mode)
{
  if (__builtin_expect (__libc_fcntl (fd, F_GETFD), 0) == -1
      && errno == EBADF)//该fd不存在
    {
      struct stat64 st;

      /* Something is wrong with this descriptor, it's probably not
该描述符出错,可能是未打开
     opened.  Open /dev/null so that the SUID program we are
打开/dev/null以便SUID程序能使用它
     about to start does not accidently use this descriptor.  */
      int nullfd = __libc_open (_PATH_DEVNULL, mode);
      /* We are very paranoid here.  With all means we try to ensure
     that we are actually opening the /dev/null device and nothing
     else.

     Note that the following code assumes that STDIN_FILENO,
     STDOUT_FILENO, STDERR_FILENO are the three lowest file
     decsriptor numbers, in this order.  */
      if (__builtin_expect (nullfd != fd, 0)//安装的fd不是想要的
      || __builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) != 0//不能stat该fd
      || __builtin_expect (S_ISCHR (st.st_mode), 1) == 0//该fd不是字符设备
#if defined DEV_NULL_MAJOR && defined DEV_NULL_MINOR
      || st.st_rdev != makedev (DEV_NULL_MAJOR, DEV_NULL_MINOR)//该设备不是空设备
#endif
      )
    /* We cannot even give an error message here since it would
       run into the same problems. 
                不能给出错误消息,因为可能会碰到同样的问题
*/
    while (1)
      /* Try for ever and ever.  */
      ABORT_INSTRUCTION;//asm ("hlt");
    }
}

13.一且都准备好了,调用dl_main

  (*dl_main) (phdr, phnum, &user_entry);
  return user_entry;
}

 

ld.so分析8 dl_main->process_envars处理环境变量

dl_main函数是ld.so的真实主体,很大很复杂,想读懂它必须选择一条主线或情景.我们就看hello world程序如何被动态链接的吧。

1.准备例子

hello.c

#include <stdio.h>

int main()
{
printf("Hello World!\n");
return 0;
}

gcc hello.c -o hello

显示完整的elf信息
[zws@mail ~/glibc-2.3/build/elf]$readelf -a hello
ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           Intel 80386
  Version:                           0x1
  Entry point address:               0x8048278
  Start of program headers:          52 (bytes into file)
  Start of section headers:          7460 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         6
  Size of section headers:           40 (bytes)
  Number of section headers:         35
  Section header string table index: 32

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .interp           PROGBITS        080480f4 0000f4 000013 00   A  0   0  1
  [ 2] .note.ABI-tag     NOTE            08048108 000108 000020 00   A  0   0  4
  [ 3] .hash             HASH            08048128 000128 000028 04   A  4   0  4
  [ 4] .dynsym           DYNSYM          08048150 000150 000050 10   A  5   1  4
  [ 5] .dynstr           STRTAB          080481a0 0001a0 00004c 00   A  0   0  1
  [ 6] .gnu.version      VERSYM          080481ec 0001ec 00000a 02   A  4   0  2
  [ 7] .gnu.version_r    VERNEED         080481f8 0001f8 000020 00   A  5   1  4
  [ 8] .rel.dyn          REL             08048218 000218 000008 08   A  4   0  4
  [ 9] .rel.plt          REL             08048220 000220 000010 08   A  4  11  4
  [10] .init             PROGBITS        08048230 000230 000017 00  AX  0   0  4
  [11] .plt              PROGBITS        08048248 000248 000030 04  AX  0   0  4
  [12] .text             PROGBITS        08048278 000278 000160 00  AX  0   0  4
  [13] .fini             PROGBITS        080483d8 0003d8 00001b 00  AX  0   0  4
  [14] .rodata           PROGBITS        080483f4 0003f4 000016 00   A  0   0  4
  [15] .eh_frame         PROGBITS        0804840c 00040c 000004 00   A  0   0  4
  [16] .ctors            PROGBITS        08049410 000410 000008 00  WA  0   0  4
  [17] .dtors            PROGBITS        08049418 000418 000008 00  WA  0   0  4
  [18] .jcr              PROGBITS        08049420 000420 000004 00  WA  0   0  4
  [19] .dynamic          DYNAMIC         08049424 000424 0000c8 08  WA  5   0  4
  [20] .got              PROGBITS        080494ec 0004ec 000004 04  WA  0   0  4
  [21] .got.plt          PROGBITS        080494f0 0004f0 000014 04  WA  0   0  4
  [22] .data             PROGBITS        08049504 000504 00000c 00  WA  0   0  4
  [23] .bss              NOBITS          08049510 000510 000004 00  WA  0   0  4
  [24] .comment          PROGBITS        00000000 000510 000132 00      0   0  1
  [25] .debug_aranges    PROGBITS        00000000 000648 000078 00      0   0  8
  [26] .debug_pubnames   PROGBITS        00000000 0006c0 000025 00      0   0  1
  [27] .debug_info       PROGBITS        00000000 0006e5 000a84 00      0   0  1
  [28] .debug_abbrev     PROGBITS        00000000 001169 000138 00      0   0  1
  [29] .debug_line       PROGBITS        00000000 0012a1 00027c 00      0   0  1
  [30] .debug_frame      PROGBITS        00000000 001520 000014 00      0   0  4
  [31] .debug_str        PROGBITS        00000000 001534 0006ba 01  MS  0   0  1
  [32] .shstrtab         STRTAB          00000000 001bee 000134 00      0   0  1
  [33] .symtab           SYMTAB          00000000 00229c 0006a0 10     34  88  4
  [34] .strtab           STRTAB          00000000 00293c 0003ee 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

There are no section groups in this file.

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  PHDR           0x000034 0x08048034 0x08048034 0x000c0 0x000c0 R E 0x4
  INTERP         0x0000f4 0x080480f4 0x080480f4 0x00013 0x00013 R   0x1
      [Requesting program interpreter: /lib/ld-linux.so.2]
  LOAD           0x000000 0x08048000 0x08048000 0x00410 0x00410 R E 0x1000
  LOAD           0x000410 0x08049410 0x08049410 0x00100 0x00104 RW  0x1000
  DYNAMIC        0x000424 0x08049424 0x08049424 0x000c8 0x000c8 RW  0x4
  NOTE           0x000108 0x08048108 0x08048108 0x00020 0x00020 R   0x4

Section to Segment mapping:
  Segment Sections...
   00    
   01     .interp
   02     .interp .note.ABI-tag .hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata .eh_frame
   03     .ctors .dtors .jcr .dynamic .got .got.plt .data .bss
   04     .dynamic
   05     .note.ABI-tag

Dynamic section at offset 0x424 contains 20 entries:
  Tag        Type                         Name/Value
0x00000001 (NEEDED)                     Shared library: [libc.so.6]
0x0000000c (INIT)                       0x8048230
0x0000000d (FINI)                       0x80483d8
0x00000004 (HASH)                       0x8048128
0x00000005 (STRTAB)                     0x80481a0
0x00000006 (SYMTAB)                     0x8048150
0x0000000a (STRSZ)                      76 (bytes)
0x0000000b (SYMENT)                     16 (bytes)
0x00000015 (DEBUG)                      0x0
0x00000003 (PLTGOT)                     0x80494f0
0x00000002 (PLTRELSZ)                   16 (bytes)
0x00000014 (PLTREL)                     REL
0x00000017 (JMPREL)                     0x8048220
0x00000011 (REL)                        0x8048218
0x00000012 (RELSZ)                      8 (bytes)
0x00000013 (RELENT)                     8 (bytes)
0x6ffffffe (VERNEED)                    0x80481f8
0x6fffffff (VERNEEDNUM)                 1
0x6ffffff0 (VERSYM)                     0x80481ec
0x00000000 (NULL)                       0x0

Relocation section '.rel.dyn' at offset 0x218 contains 1 entries:
Offset     Info    Type            Sym.Value  Sym. Name
080494ec  00000106 R_386_GLOB_DAT    00000000   __gmon_start__

Relocation section '.rel.plt' at offset 0x220 contains 2 entries:
Offset     Info    Type            Sym.Value  Sym. Name
080494fc  00000207 R_386_JUMP_SLOT   00000000   __libc_start_main
08049500  00000407 R_386_JUMP_SLOT   00000000   printf

There are no unwind sections in this file.

Symbol table '.dynsym' contains 5 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __gmon_start__
     2: 00000000   251 FUNC    GLOBAL DEFAULT  UND __libc_start_main@GLIBC_2.0 (2)
     3: 080483f8     4 OBJECT  GLOBAL DEFAULT   14 _IO_stdin_used
     4: 00000000    57 FUNC    GLOBAL DEFAULT  UND printf@GLIBC_2.0 (2)

Symbol table '.symtab' contains 106 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
     1: 080480f4     0 SECTION LOCAL  DEFAULT    1
     2: 08048108     0 SECTION LOCAL  DEFAULT    2
     3: 08048128     0 SECTION LOCAL  DEFAULT    3
     4: 08048150     0 SECTION LOCAL  DEFAULT    4
     5: 080481a0     0 SECTION LOCAL  DEFAULT    5
     6: 080481ec     0 SECTION LOCAL  DEFAULT    6
     7: 080481f8     0 SECTION LOCAL  DEFAULT    7
     8: 08048218     0 SECTION LOCAL  DEFAULT    8
     9: 08048220     0 SECTION LOCAL  DEFAULT    9
    10: 08048230     0 SECTION LOCAL  DEFAULT   10
    11: 08048248     0 SECTION LOCAL  DEFAULT   11
    12: 08048278     0 SECTION LOCAL  DEFAULT   12
    13: 080483d8     0 SECTION LOCAL  DEFAULT   13
    14: 080483f4     0 SECTION LOCAL  DEFAULT   14
    15: 0804840c     0 SECTION LOCAL  DEFAULT   15
    16: 08049410     0 SECTION LOCAL  DEFAULT   16
    17: 08049418     0 SECTION LOCAL  DEFAULT   17
    18: 08049420     0 SECTION LOCAL  DEFAULT   18
    19: 08049424     0 SECTION LOCAL  DEFAULT   19
    20: 080494ec     0 SECTION LOCAL  DEFAULT   20
    21: 080494f0     0 SECTION LOCAL  DEFAULT   21
    22: 08049504     0 SECTION LOCAL  DEFAULT   22
    23: 08049510     0 SECTION LOCAL  DEFAULT   23
    24: 00000000     0 SECTION LOCAL  DEFAULT   24
    25: 00000000     0 SECTION LOCAL  DEFAULT   25
    26: 00000000     0 SECTION LOCAL  DEFAULT   26
    27: 00000000     0 SECTION LOCAL  DEFAULT   27
    28: 00000000     0 SECTION LOCAL  DEFAULT   28
    29: 00000000     0 SECTION LOCAL  DEFAULT   29
    30: 00000000     0 SECTION LOCAL  DEFAULT   30
    31: 00000000     0 SECTION LOCAL  DEFAULT   31
    32: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    33: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    34: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    35: 00000000     0 FILE    LOCAL  DEFAULT  ABS <built-in>
    36: 00000000     0 FILE    LOCAL  DEFAULT  ABS abi-note.S
    37: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    38: 00000000     0 FILE    LOCAL  DEFAULT  ABS abi-note.S
    39: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    40: 00000000     0 FILE    LOCAL  DEFAULT  ABS abi-note.S
    41: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    42: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    43: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    44: 00000000     0 FILE    LOCAL  DEFAULT  ABS <built-in>
    45: 00000000     0 FILE    LOCAL  DEFAULT  ABS abi-note.S
    46: 00000000     0 FILE    LOCAL  DEFAULT  ABS init.c
    47: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    48: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    49: 00000000     0 FILE    LOCAL  DEFAULT  ABS initfini.c
    50: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    51: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    52: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    53: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    54: 00000000     0 FILE    LOCAL  DEFAULT  ABS <built-in>
    55: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    56: 0804829c     0 FUNC    LOCAL  DEFAULT   12 call_gmon_start
    57: 00000000     0 FILE    LOCAL  DEFAULT  ABS crtstuff.c
    58: 08049410     0 OBJECT  LOCAL  DEFAULT   16 __CTOR_LIST__
    59: 08049418     0 OBJECT  LOCAL  DEFAULT   17 __DTOR_LIST__
    60: 0804840c     0 OBJECT  LOCAL  DEFAULT   15 __EH_FRAME_BEGIN__
    61: 08049420     0 OBJECT  LOCAL  DEFAULT   18 __JCR_LIST__
    62: 0804950c     0 OBJECT  LOCAL  DEFAULT   22 p.0
    63: 08049510     1 OBJECT  LOCAL  DEFAULT   23 completed.1
    64: 080482c0     0 FUNC    LOCAL  DEFAULT   12 __do_global_dtors_aux
    65: 080482fc     0 FUNC    LOCAL  DEFAULT   12 frame_dummy
    66: 00000000     0 FILE    LOCAL  DEFAULT  ABS crtstuff.c
    67: 08049414     0 OBJECT  LOCAL  DEFAULT   16 __CTOR_END__
    68: 0804941c     0 OBJECT  LOCAL  DEFAULT   17 __DTOR_END__
    69: 0804840c     0 OBJECT  LOCAL  DEFAULT   15 __FRAME_END__
    70: 08049420     0 OBJECT  LOCAL  DEFAULT   18 __JCR_END__
    71: 080483b4     0 FUNC    LOCAL  DEFAULT   12 __do_global_ctors_aux
    72: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    73: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    74: 00000000     0 FILE    LOCAL  DEFAULT  ABS initfini.c
    75: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    76: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    77: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    78: 00000000     0 FILE    LOCAL  DEFAULT  ABS <command line>
    79: 00000000     0 FILE    LOCAL  DEFAULT  ABS <built-in>
    80: 00000000     0 FILE    LOCAL  DEFAULT  ABS /usr/src/build/231499-i38
    81: 00000000     0 FILE    LOCAL  DEFAULT  ABS 1.c
    82: 08049410     0 NOTYPE  LOCAL  HIDDEN   16 __fini_array_end
    83: 080494f0     0 OBJECT  LOCAL  HIDDEN   21 _GLOBAL_OFFSET_TABLE_
    84: 08049410     0 NOTYPE  LOCAL  HIDDEN   16 __fini_array_start
    85: 08049410     0 NOTYPE  LOCAL  HIDDEN   16 __init_array_end
    86: 08049410     0 NOTYPE  LOCAL  HIDDEN   16 __init_array_start
    87: 08049424     0 OBJECT  LOCAL  HIDDEN   19 _DYNAMIC
    88: 08049504     0 NOTYPE  WEAK   DEFAULT   22 data_start
    89: 08048380    52 FUNC    GLOBAL DEFAULT   12 __libc_csu_fini
    90: 08048278     0 FUNC    GLOBAL DEFAULT   12 _start
    91: 00000000     0 NOTYPE  WEAK   DEFAULT  UND __gmon_start__
    92: 00000000     0 NOTYPE  WEAK   DEFAULT  UND _Jv_RegisterClasses
    93: 080483f4     4 OBJECT  GLOBAL DEFAULT   14 _fp_hw
    94: 080483d8     0 FUNC    GLOBAL DEFAULT   13 _fini
    95: 00000000   251 FUNC    GLOBAL DEFAULT  UND __libc_start_main@@GLIBC_
    96: 080483f8     4 OBJECT  GLOBAL DEFAULT   14 _IO_stdin_used
    97: 08049504     0 NOTYPE  GLOBAL DEFAULT   22 __data_start
    98: 08049508     0 OBJECT  GLOBAL HIDDEN   22 __dso_handle
    99: 08048350    48 FUNC    GLOBAL DEFAULT   12 __libc_csu_init
   100: 00000000    57 FUNC    GLOBAL DEFAULT  UND printf@@GLIBC_2.0
   101: 08049510     0 NOTYPE  GLOBAL DEFAULT  ABS __bss_start
   102: 08049514     0 NOTYPE  GLOBAL DEFAULT  ABS _end
   103: 08049510     0 NOTYPE  GLOBAL DEFAULT  ABS _edata
   104: 08048328    39 FUNC    GLOBAL DEFAULT   12 main
   105: 08048230     0 FUNC    GLOBAL DEFAULT   10 _init

Histogram for bucket list length (total of 3 buckets):
Length  Number     % of total  Coverage
      0  0          (  0.0%)
      1  2          ( 66.7%)     50.0%
      2  1          ( 33.3%)    100.0%

Version symbols section '.gnu.version' contains 5 entries:
Addr: 00000000080481ec  Offset: 0x0001ec  Link: 4 (.dynsym)
  000:   0 (*local*)       0 (*local*)       2 (GLIBC_2.0)     1 (*global*)  
  004:   2 (GLIBC_2.0) 

Version needs section '.gnu.version_r' contains 1 entries:
Addr: 0x00000000080481f8  Offset: 0x0001f8  Link to section: 5 (.dynstr)
  000000: Version: 1  File: libc.so.6  Cnt: 1
  0x0010:   Name: GLIBC_2.0  Flags: none  Version: 2

Notes at offset 0x00000108 with length 0x00000020:
  Owner         Data size       Description
  GNU           0x00000010      NT_GNU_ABI_TAG (ABI version tag)


我们发现有上三个重定位项
Relocation section '.rel.dyn' at offset 0x218 contains 1 entries:
Offset     Info    Type            Sym.Value  Sym. Name
080494ec  00000106 R_386_GLOB_DAT    00000000   __gmon_start__

Relocation section '.rel.plt' at offset 0x220 contains 2 entries:
Offset     Info    Type            Sym.Value  Sym. Name
080494fc  00000207 R_386_JUMP_SLOT   00000000   __libc_start_main
08049500  00000407 R_386_JUMP_SLOT   00000000   printf


2. dl_main 局部变量定义


static void
dl_main (const ElfW(Phdr) *phdr,
     ElfW(Word) phnum,
     ElfW(Addr) *user_entry)
{
  const ElfW(Phdr) *ph;
  enum mode mode;
  struct link_map **preloads;
  unsigned int npreloads;
  size_t file_size;
  char *file;
  bool has_interp = false;
  unsigned int i;
  bool prelinked = false;
  bool rtld_is_main = false;
//#ifndef HP_TIMING_NONAVAIL
  hp_timing_t start;
  hp_timing_t stop;
  hp_timing_t diff;
//#endif
//#ifdef USE_TLS
// void *tcbp;
//#endif

  /* Process the environment variable which control the behaviour.  */
  process_envvars (&mode);//处理环境变量

  /* Set up a flag which tells we are just starting.  */
  INTUSE(_dl_starting_up) = 1;

3.先处理环境变量 process_envvars

static void
process_envvars (enum mode *modep)
{
  char **runp = _environ;//指向_environ
  char *envline;
  enum mode mode = normal;
  char *debug_output = NULL;

  /* This is the default place for profiling data file.  */
  GL(dl_profile_output)
    = &"/var/tmp\0/var/profile"[INTUSE(__libc_enable_secure) ? 9 : 0];//根据libc_enable_secure的值不同取/var/tmp 或 /var/profile

  while ((envline = _dl_next_ld_env_entry (&runp)) != NULL)

4.process_envvars->_dl_next_ld_env_entry(sysdeps/generic/dl-environ.c)

/* Walk through the environment of the process and return all entries
遍历环境变量,返回所有以LD_开始的项目
   starting with `LD_'. 
有效的LD_环境变量有
       LD_LIBRARY_PATH
       LD_PRELOAD
       LD_TRACE_LOADED_OBJECTS
       LD_BIND_NOW
       LD_WARN
       LD_DEBUG
       LD_DEBUG_OUTPUT
       LD_VERBOSE
在man ld.so中有详细说明
*/
char *
//internal_function
_dl_next_ld_env_entry (char ***position)
{
  char **current = *position;
  char *result = NULL;

  while (*current != NULL)
    {
      if (__builtin_expect ((*current)[0] == 'L', 0)
      && (*current)[1] == 'D' && (*current)[2] == '_')
    {
      result = &(*current)[3];//返回LD_后面的字符串

      /* Save current position for next visit.  */
      *position = ++current;//为下次访问保存下一个位置

      break;
    }

      ++current;
    }

  return result;
}

5返回process_envvars
    {
      size_t len = 0;

      while (envline[len] != '\0' && envline[len] != '=')//查找=
    ++len;

      if (envline[len] != '=')//不是key=value格式
    /* This is a "LD_" variable at the end of the string without
这个LD_变量在字符串末尾没有=字符
       a '=' character.  Ignore it since otherwise we will access
为了避免后面访问无效内存,忽略它
       invalid memory below.  */
    continue;

      switch (len)
    {
    case 4:
      /* Warning level, verbose or not.  警告级别,详细或没有*/
      if (memcmp (envline, "WARN", 4) == 0)
        GL(dl_verbose) = envline[5] != '\0';//=号后面是否为空串,例如LD_WARN=1会详细显示
      break;

    case 5:
      /* Debugging of the dynamic linker?  */
      if (memcmp (envline, "DEBUG", 5) == 0)
        process_dl_debug (&envline[6]);//进一步处理
      break;

    case 7:
      /* Print information about versions.  */
      if (memcmp (envline, "VERBOSE", 7) == 0)
        {
          version_info = envline[8] != '\0';//=号后面是否为空串
          break;
        }

      /* List of objects to be preloaded.  */
      if (memcmp (envline, "PRELOAD", 7) == 0)
        {
          preloadlist = &envline[8];
          break;
        }

      /* Which shared object shall be profiled.  */
      if (memcmp (envline, "PROFILE", 7) == 0 && envline[8] != '\0')
        GL(dl_profile) = &envline[8];
      break;

    case 8:
      /* Do we bind early?  */
      if (memcmp (envline, "BIND_NOW", 8) == 0)
        {
          GL(dl_lazy) = envline[9] == '\0';//LD_BIND_NOW=1,立即bind,LD_BIND_NOW=,lazy bind
          break;
        }
      if (memcmp (envline, "BIND_NOT", 8) == 0)//LD_BIND_NOT,作用未知?
        GL(dl_bind_not) = envline[9] != '\0';
      break;

    case 9:
      /* Test whether we want to see the content of the auxiliary
         array passed up from the kernel.  */
      if (memcmp (envline, "SHOW_AUXV", 9) == 0)//显示AUXV数组
        _dl_show_auxv ();
      break;

    case 10:
      /* Mask for the important hardware capabilities.  */
      if (memcmp (envline, "HWCAP_MASK", 10) == 0)
        GL(dl_hwcap_mask) = __strtoul_internal (&envline[11], NULL, 0, 0);
      break;

    case 11:
      /* Path where the binary is found.  */
      if (!INTUSE(__libc_enable_secure)
          && memcmp (envline, "ORIGIN_PATH", 11) == 0)
        GL(dl_origin_path) = &envline[12];
      break;

    case 12:
      /* The library search path.  */
      if (memcmp (envline, "LIBRARY_PATH", 12) == 0)
        {
          library_path = &envline[13];
          break;
        }

      /* Where to place the profiling data file.  */
      if (memcmp (envline, "DEBUG_OUTPUT", 12) == 0)
        {
          debug_output = &envline[13];
          break;
        }

      if (memcmp (envline, "DYNAMIC_WEAK", 12) == 0)
        GL(dl_dynamic_weak) = 1;
      break;

    case 14:
      /* Where to place the profiling data file.  */
      if (!INTUSE(__libc_enable_secure)
          && memcmp (envline, "PROFILE_OUTPUT", 14) == 0
          && envline[15] != '\0')
        GL(dl_profile_output) = &envline[15];
      break;

    case 16:
      /* The mode of the dynamic linker can be set.  */
      if (memcmp (envline, "TRACE_PRELINKING", 16) == 0)
        {
          mode = trace;
          GL(dl_verbose) = 1;
          GL(dl_debug_mask) |= DL_DEBUG_PRELINK;
          GL(dl_trace_prelink) = &envline[17];
        }
      break;

    case 20:
      /* The mode of the dynamic linker can be set.  */
      if (memcmp (envline, "TRACE_LOADED_OBJECTS", 20) == 0)
        mode = trace;
      break;

      /* We might have some extra environment variable to handle.  This
         is tricky due to the pre-processing of the length of the name
         in the switch statement here.  The code here assumes that added
         environment variables have a different length.  */
#ifdef EXTRA_LD_ENVVARS
      EXTRA_LD_ENVVARS
#endif
    }
    }

上面的处理流程很清晰,各个参数什么用,后面会涉及到

查看生成的汇编代码发现gcc对switch做了优化,使用数组索引各个case.memcmp虽然没有定义但是也因为使用-O参数而被被优化成内联

    subl    $4, %eax//%eax为len,减去4
    cmpl    $16, %eax//和16比较
    ja    .L718//如果大于16,则进入下一轮循环
    movl    .L770@GOTOFF(%ebx,%eax,4), %eax//取各个case的地址,%ebx为GOT基址,%eax为索引,4为元素大小,.L770@GOTOFF为.L770相对于GOT偏移
    addl    %ebx, %eax//GOT加上case的地址相对于GOT偏移
    jmp    *%eax//跳到该case处
    .section    .rodata//case数组存入.rodata
    .align 4
    .align 4
.L770:
    .long    .L728@GOTOFF//len=4
    .long    .L730@GOTOFF//len=5
    .long    .L718@GOTOFF//len=6,没有,置为下一轮循环地址
    .long    .L732@GOTOFF//len=7
    .long    .L736@GOTOFF//len=8
    .long    .L739@GOTOFF//len=9
    .long    .L741@GOTOFF//len=10
    .long    .L743@GOTOFF//len=11
    .long    .L745@GOTOFF//len=12
    .long    .L755@GOTOFF//len=13
    .long    .L749@GOTOFF//len=14
    .long    .L766@GOTOFF//len=15
    .long    .L751@GOTOFF//len=16
    .long    .L718@GOTOFF//len=17没有,置为下一轮循环地址
    .long    .L718@GOTOFF//len=18没有,置为下一轮循环地址
    .long    .L718@GOTOFF//len=19没有,置为下一轮循环地址
    .long    .L753@GOTOFF//len=20
    .text
.L728:
    .loc 1 1775 0
    movl    -40(%ebp), %esi
    leal    .LC62@GOTOFF(%ebx), %edi
    movl    $4, %eax
    cld //memcmp被内联
    movl    %eax, %ecx
    repz
    cmpsb
    seta    %dl
    setb    %al
    cmpb    %al, %dl
    jne    .L718
    .loc 1 1776 0
    movl    -40(%ebp), %eax
    cmpb    $0, 5(%eax)
    setne    %al
    movzbl    %al, %eax
    movl    %eax, 80+_rtld_local@GOTOFF(%ebx)
    .loc 1 1777 0
    jmp    .L718

6.process_envvars->process_dl_debug 分析debug选项

/* Nonzero if any of the debugging options is enabled.  */
static int any_debug;

/* Process the string given as the parameter which explains which debugging
   options are enabled.  */

static void
process_dl_debug (const char *dl_debug)
{
  /* When adding new entries make sure that the maximal length of a name
     is correctly handled in the LD_DEBUG_HELP code below.  */
//定义LD_DEBUG=value的value有哪些
  static const struct
  {
    unsigned char len;
    const char name[10];
    const char helptext[41];
    unsigned short int mask;
  }
debopts[] =
    {
#define LEN_AND_STR(str) sizeof (str) - 1, str
      { LEN_AND_STR ("libs"), "display library search paths",
    DL_DEBUG_LIBS | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("reloc"), "display relocation processing",
    DL_DEBUG_RELOC | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("files"), "display progress for input file",
    DL_DEBUG_FILES | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("symbols"), "display symbol table processing",
    DL_DEBUG_SYMBOLS | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("bindings"), "display information about symbol binding",
    DL_DEBUG_BINDINGS | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("versions"), "display version dependencies",
    DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("all"), "all previous options combined",
    DL_DEBUG_LIBS | DL_DEBUG_RELOC | DL_DEBUG_FILES | DL_DEBUG_SYMBOLS
    | DL_DEBUG_BINDINGS | DL_DEBUG_VERSIONS | DL_DEBUG_IMPCALLS },
      { LEN_AND_STR ("statistics"), "display relocation statistics",
    DL_DEBUG_STATISTICS },
      { LEN_AND_STR ("help"), "display this help message and exit",
    DL_DEBUG_HELP },
    };
#define ndebopts (sizeof (debopts) / sizeof (debopts[0]))

  /* Skip separating white spaces and commas.  跳过分界符空格和逗号*/
  while (*dl_debug != '\0')//value未结束
    {
      if (*dl_debug != ' ' && *dl_debug != ',' && *dl_debug != ':')//跳过多余的空格,逗号,冒号,如果有的话
    {
      size_t cnt;
      size_t len = 1;

      while (dl_debug[len] != '\0' && dl_debug[len] != ' '
         && dl_debug[len] != ',' && dl_debug[len] != ':')
        ++len;//查找本value后的分隔符

      for (cnt = 0; cnt < ndebopts; ++cnt)//在debopts中查找
        if (debopts[cnt].len == len
        && memcmp (dl_debug, debopts[cnt].name, len) == 0)
          {
        GL(dl_debug_mask) |= debopts[cnt].mask;//置位
        any_debug = 1;//有调试选项
        break;
          }

      if (cnt == ndebopts)//未找到,无效
        {
          /* Display a warning and skip everything until next 显示警告,在找到下一个分隔符之前跳过所有字符
         separator.  */
          char *copy = strndupa (dl_debug, len);
          _dl_error_printf ("\
warning: debug option `%s' unknown; try LD_DEBUG=help\n", copy);
        }

      dl_debug += len;
      continue;
    }

      ++dl_debug;
    }

  if (GL(dl_debug_mask) & DL_DEBUG_HELP)//显示help
    {
      size_t cnt;

      _dl_printf ("\
Valid options for the LD_DEBUG environment variable are:\n\n");

      for (cnt = 0; cnt < ndebopts; ++cnt)
    _dl_printf ("  %.*s%s%s\n", debopts[cnt].len, debopts[cnt].name,
            "         " + debopts[cnt].len - 3,
            debopts[cnt].helptext);

      _dl_printf ("\n\
To direct the debugging output into a file instead of standard output\n\
a filename can be specified using the LD_DEBUG_OUTPUT environment variable.\n");
      _exit (0);
    }
}

举例
[zws@mail ~/glibc-2.3/build/elf]$LD_DEBUG=help ls
Valid options for the LD_DEBUG environment variable are:

  libs        display library search paths
  reloc       display relocation processing
  files       display progress for input file
  symbols     display symbol table processing
  bindings    display information about symbol binding
  versions    display version dependencies
  all         all previous options combined
  statistics  display relocation statistics
  help        display this help message and exit

To direct the debugging output into a file instead of standard output
a filename can be specified using the LD_DEBUG_OUTPUT environment variable.

其他的选项大家自己试验一下看看

7.process_envvars->process_dl_debug->strndupa (string/string.h)

/* Return an alloca'd copy of at most N bytes of string.  */
# define strndupa(s, n)                                  \
  (__extension__                                  \
    ({                                          \
      __const char *__old = (s);                          \
      size_t __len = strnlen (__old, (n));                      \
      char *__new = (char *) __builtin_alloca (__len + 1);              \
      __new[__len] = '\0';                              \
      (char *) memcpy (__new, __old, __len);                      \
    }))

对应的汇编代码是
.LBB78:
    subl    $8, %esp
    pushl    -16(%ebp) //参数len
    pushl    8(%ebp) //参数dl_debug
.LCFI78:
    call    __strnlen@PLT//调用__strnlen,strnlen是__strnlen的weark_alias
    addl    $16, %esp//平栈,8+4+4
    leal    16(%eax), %edx//16+__len->%edx
    andl    $-16, %edx//%edx向地地址方向对齐到16字节边界
    subl    %edx, %esp//__builtin_alloca 在栈上分配空间,实际分配的大小>=__len+16
    movl    %esp, %edx//__new->%edx
    movb    $0, (%esp,%eax)//__new[__len] = '\0';
    subl    $4, %esp
    pushl    %eax//参数__len
    pushl    8(%ebp)//__old
    pushl    %edx//__new
    call    memcpy@PLT
    addl    $12, %esp//平栈
.LBE78:
    .loc 1 1710 0
    pushl    %eax//copy
    leal    .LC56@GOTOFF(%ebx), %eax//warning: debug option `%s' unknown; try LD_DEBUG=help\n
    pushl    %eax
    pushl    $2//STDERR_FILENO
    call    _dl_dprintf
.LBE77:
    addl    $16, %esp//平栈,12+前面的subl $4,%esp的4字节
.L700:
    .loc 1 1714 0
    movl    -16(%ebp), %eax
    addl    %eax, 8(%ebp)//dl_debug += len;
    .loc 1 1715 0
    jmp    .L685


还要指出多次调用__builtin_alloca会不断的在栈上分配空间,即%esp向低地址方向增长。但是随着函数的返回,这些空间自然全部被释放。


8.process_envvars->_dl_show_auxv(sysdeps/generic/dl-sysdep.c)
显示AUXV信息

void
//internal_function
_dl_show_auxv (void)
{
  char buf[64];
  ElfW(auxv_t) *av;

  /* Terminate string.  */
  buf[63] = '\0';

  /* The following code assumes that the AT_* values are encoded
下面的代码假定AT_*值从0(AT_NULL)开始编码,1代表AT_IGNORE,其他值?
  starting from 0 with AT_NULL, 1 for AT_IGNORE, and all other values
  close by (otherwise the array will be too large).  In case we have
为避免必须支持不同平台,必须采用可定制实现
  to support a platform where these requirements are not fulfilled
  some alternative implementation has to be used.  */
  for (av = _dl_auxv; av->a_type != AT_NULL; ++av)
    {
      static const struct
      {
    const char label[20];
    enum { dec, hex, str } form;
      }
      auxvars[] =
    {
      [AT_EXECFD - 2] =        { "AT_EXECFD:      ", dec },
      [AT_PHDR - 2] =        { "AT_PHDR:        0x", hex },
      [AT_PHENT - 2] =        { "AT_PHENT:       ", dec },
      [AT_PHNUM - 2] =        { "AT_PHNUM:       ", dec },
      [AT_PAGESZ - 2] =        { "AT_PAGESZ:      ", dec },
      [AT_BASE - 2] =        { "AT_BASE:        0x", hex },
      [AT_FLAGS - 2] =        { "AT_FLAGS:       0x", hex },
      [AT_ENTRY - 2] =        { "AT_ENTRY:       0x", hex },
      [AT_NOTELF - 2] =        { "AT_NOTELF:      ", hex },
      [AT_UID - 2] =        { "AT_UID:         ", dec },
      [AT_EUID - 2] =        { "AT_EUID:        ", dec },
      [AT_GID - 2] =        { "AT_GID:         ", dec },
      [AT_EGID - 2] =        { "AT_EGID:        ", dec },
      [AT_PLATFORM - 2] =        { "AT_PLATFORM:    ", str },
      [AT_HWCAP - 2] =        { "AT_HWCAP:       ", hex },
      [AT_CLKTCK - 2] =        { "AT_CLKTCK:      ", dec },
      [AT_FPUCW - 2] =        { "AT_FPUCW:       ", hex },
      [AT_DCACHEBSIZE - 2] =    { "AT_DCACHEBSIZE: 0x", hex },
      [AT_ICACHEBSIZE - 2] =    { "AT_ICACHEBSIZE: 0x", hex },
      [AT_UCACHEBSIZE - 2] =    { "AT_UCACHEBSIZE: 0x", hex }
    };
      unsigned int idx = (unsigned int) (av->a_type - 2);// -2忽略0,和1

      assert (AT_NULL == 0);
      assert (AT_IGNORE == 1);
      if (idx < sizeof (auxvars) / sizeof (auxvars[0]))
    {
      if (av->a_type != AT_HWCAP || _dl_procinfo (av->a_un.a_val) < 0)//不是AT_HWCAP,或者是AT_HWCAP就调用_dl_procinfo,代码自己看
        {
          const char *val = av->a_un.a_ptr;

          if (__builtin_expect (auxvars[idx].form, dec) == dec)//十进制数据
        val = _itoa ((unsigned long int) av->a_un.a_val,
                 buf + sizeof buf - 1, 10, 0);//调用_itoa,代码自己看
          else if (__builtin_expect (auxvars[idx].form, hex) == hex)
        val = _itoa ((unsigned long int) av->a_un.a_val,
                 buf + sizeof buf - 1, 16, 0);

          _dl_printf ("%s%s\n", auxvars[idx].label, val);
        }
    }
    }
}

举例
[zws@mail elf]$ LD_SHOW_AUXV=1 ls
AT_SYSINFO:     0xffffe000
AT_HWCAP:    fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
AT_PAGESZ:      4096
AT_CLKTCK:      100
AT_PHDR:        0x8048034
AT_PHENT:       32
AT_PHNUM:       7
AT_BASE:        0x40000000
AT_FLAGS:       0x0
AT_ENTRY:       0x8049690
AT_UID:         503
AT_EUID:        503
AT_GID:         504
AT_EGID:        504
AT_PLATFORM:    i686

9.process_envvars->EXTRA_LD_ENVVARS (sysdeps/unix/sysv/linux/i386/dl-librecron.c)
额外的环境变量
#define EXTRA_LD_ENVVARS \
  case 13:                                      \
    if (memcmp (envline, "ASSUME_KERNEL", 13) == 0)/*指定内核版本*/                  \
      {                                          \
    unsigned long int i, j, osversion = 0;                      \
    char *p = &envline[14], *q;                          \
                                          \
    for (i = 0; i < 3; i++, p = q + 1)/*i代表x.y.z中数的个数,共三个*/                      \
      {                                      \
        j = __strtoul_internal (p, &q, 0, 0);/*版本字符串转换成数x.y.z*/                  \
        if (j >= 255/*j不能>=255*/ || p == q /*该字符串不是数*/|| (i < 2 && *q && *q != '.')/*前两个数且后面还有字符,且该字符不为'.'*/)          \
          {                                      \
        osversion = 0;                              \
        break;                                  \
          }                                      \
        osversion |= j << (16 - 8 * i);/*i==0,j<<16;i=-1;j<<8;i==2,j<<0,说明每个数占一个字节*/                      \
        if (!*q)                                  \
          break;                                  \
      }                                      \
    if (osversion)                                  \
      GL(dl_osversion) = osversion;                          \
    break;                                      \
      }                                          \
                                          \
  case 15:                                      \
    if (memcmp (envline, "LIBRARY_VERSION", 15) == 0)                  \
      {                                          \
    GL(dl_correct_cache_id) = envline[16] == '5' ? 2 : 3;/*值为5,则dl_correct_cache_id=2;否则为dl_correct_cache_id=3*/          \
    break;                                      \
      }

10.返回process_envvars

  /* The caller wants this information.  */
  *modep = mode;

  /* Extra security for SUID binaries.  Remove all dangerous environment
因为是SUID程序,移除所有危险环境变量
     variables.  */
  if (__builtin_expect (INTUSE(__libc_enable_secure), 0))
    {
      static const char unsecure_envvars[] =
//#ifdef EXTRA_UNSECURE_ENVVARS
/*该宏定义在sysdeps/unix/sysv/linux/i386/dl-librecon.h
/*
Extra unsecure variables.  The names are all stuffed in a single
特别不安全变量。这些名字被组合成单个字符串,因此需要显示使用'\0' 结束这些名字
   string which means they have to be terminated with a '\0' explicitly.  * /
#define EXTRA_UNSECURE_ENVVARS \
  "LD_AOUT_LIBRARY_PATH\0"                              \
  "LD_AOUT_PRELOAD\0"
*/
    EXTRA_UNSECURE_ENVVARS
//#endif
/*该宏定义在sysdeps/generic/unsecvars.h
/*
Environment variable to be removed for SUID programs.  The names are
   all stuffed in a single string which means they have to be terminated
   with a '\0' explicitly.  * /
#define UNSECURE_ENVVARS \
  "LD_PRELOAD\0"                                  \
  "LD_LIBRARY_PATH\0"                                  \
  "LD_ORIGIN_PATH\0"                                  \
  "LD_DEBUG_OUTPUT\0"                                  \
  "LD_PROFILE\0"                                  \
  "GCONV_PATH\0"                                  \
  "HOSTALIASES\0"                                  \
  "LOCALDOMAIN\0"                                  \
  "LOCPATH\0"                                      \
  "MALLOC_TRACE\0"                                  \
  "NLSPATH\0"                                      \
  "RESOLV_HOST_CONF\0"                                  \
  "RES_OPTIONS\0"                                  \
  "TMPDIR\0"                                      \
  "TZDIR\0"

*/
    UNSECURE_ENVVARS;
      const char *nextp;

      nextp = unsecure_envvars;
      do
    {
      unsetenv (nextp);//注销该环境变量,其实就是将后面的环境变量指针往前移,覆盖掉该变量指针
      /* We could use rawmemchr but this need not be fast.  */
      nextp = (char *) (strchr) (nextp, '\0') + 1;
    }
      while (*nextp != '\0');

      if (__access ("/etc/suid-debug", F_OK) != 0)//不存在文件/etc/suid-debug
    unsetenv ("MALLOC_CHECK_");//注销MALLOC_CHECK_
    }
  /* If we have to run the dynamic linker in debugging mode and the
不是SUID程序,如果我们必须运行dl在调试模式下,并且存在环境变量LD_DEBUG_OUTPUT
     LD_DEBUG_OUTPUT environment variable is given, we write the debug
则将调试信息写入这个文件
     messages to this file.  */
  else if (any_debug && debug_output != NULL)
    {
//#ifdef O_NOFOLLOW
      const int flags = O_WRONLY | O_APPEND | O_CREAT | O_NOFOLLOW;
//#else
//      const int flags = O_WRONLY | O_APPEND | O_CREAT;
//#endif
      size_t name_len = strlen (debug_output);
      char buf[name_len + 12];//在栈上动态分配
      char *startp;

      buf[name_len + 11] = '\0';
      startp = _itoa (__getpid (), &buf[name_len + 11], 10, 0);//存入buf的最后
      *--startp = '.';//前面加.
      startp = memcpy (startp - name_len, debug_output, name_len);//复制debug_output,组成x.y格式

      GL(dl_debug_fd) = __open (startp, flags, DEFFILEMODE);
      if (GL(dl_debug_fd) == -1)
    /* We use standard output if opening the file failed.  */
    GL(dl_debug_fd) = STDOUT_FILENO;
    }
}

原创粉丝点击