android linker 执行流程

来源:互联网 发布:广西广电网络营业厅 编辑:程序博客网 时间:2024/06/03 17:27

通过前一篇的博客,我们知道,在linux kernel加载完可执行程序后,在需要解释器的情况下,返回用户空间时会先跳到解释器的函数中执行,对于android平台而已,就是先跑到_start()函数,然后再跳转到__linker_init()。

bionic/linker/linker.cpp

extern "C" ElfW(Addr) __linker_init(void* raw_args) {4431    KernelArgumentBlock args(raw_args);4432  4433    ElfW(Addr) linker_addr = args.getauxval(AT_BASE);   //interp_load_addr 解释器 加载到内存的地址4434    ElfW(Addr) entry_point = args.getauxval(AT_ENTRY);   //可执行程序的入口地址4435    ElfW(Ehdr)* elf_hdr = reinterpret_cast<ElfW(Ehdr)*>(linker_addr);  //linker文件的起始位置  Elf64_Ehdr4436    ElfW(Phdr)* phdr = reinterpret_cast<ElfW(Phdr)*>(linker_addr + elf_hdr->e_phoff);   // linker中的程序头表  Elf64_Phdr4437  4438    soinfo linker_so(nullptr, nullptr, nullptr, 0, 0);    //soinfo 一个很重要的结构体...4452  4453    linker_so.base = linker_addr;4454    linker_so.size = phdr_table_get_load_size(phdr, elf_hdr->e_phnum);  //加载的PT_LOAD段的大小4455    linker_so.load_bias = get_elf_exec_load_bias(elf_hdr); //类似于一个加载的偏移4456    linker_so.dynamic = nullptr;4457    linker_so.phdr = phdr;   //4458    linker_so.phnum = elf_hdr->e_phnum;  //4459    linker_so.set_linker_flag();4460  4461    // Prelink the linker so we can access linker globals.4462    if (!linker_so.prelink_image()) __linker_cannot_link(args);  //解析(遍历)PT_DYNAMIC 段(.dynamic节)里面的内容...4470    if (!linker_so.link_image(g_empty_list, g_empty_list, nullptr)) __linker_cannot_link(args); //根据.rela.dyn .rela.plt进行重定位...4481    // Initialize the main thread (including TLS, so system calls really work).4482    __libc_init_main_thread(args);4483  4484    // We didn't protect the linker's RELRO pages in link_image because we4485    // couldn't make system calls on x86 at that point, but we can now...4486    if (!linker_so.protect_relro()) __linker_cannot_link(args); //将PT_GNU_RELRO段指向的内存地址通过mprotoct函数设置为PROT_READ4487  4488    // Initialize the linker's static libc's globals4489    __libc_init_globals(args);4490  4491    // Initialize the linker's own global variables4492    linker_so.call_constructors();4493  4494    // Initialize static variables. Note that in order to4495    // get correct libdl_info we need to call constructors4496    // before get_libdl_info().4497    solist = get_libdl_info();4498    sonext = get_libdl_info();4499    g_default_namespace.add_soinfo(get_libdl_info());4500  4501    // We have successfully fixed our own relocations. It's safe to run4502    // the main part of the linker now.4503    args.abort_message_ptr = &g_abort_message;4504    ElfW(Addr) start_address = __linker_init_post_relocation(args, linker_addr);4505  4506    INFO("[ Jumping to _start (%p)... ]", reinterpret_cast<void*>(start_address));4507  4508    // Return the address that the calling assembly stub should jump to.4509    return start_address;4510  }

KernelArgumentBlock 这个类是用来解析从kernel 拷贝过来的参数

bionic/libc/private/KernelArgumentBlock.h

/*   根据前一篇博客的分析,kernel拷贝参数到用户空间的分布如下 *                  |   0               | *                  |   0               | *                  |   elf_info[valn]  | *                  |   elf_info[id0]   | *                  |   ...             | *                  |   elf_info[val0]  | *                  |   elf_info[id0]   | *                  |    0              | *                  |   envn            | *                  |   ...             | *                  |   env0            | *                  |   0               | *                  |   argn            | *                  |   ....            | *                  |   arg0            | *    raw_args ->   |   argc            | */class KernelArgumentBlock {34   public:35    KernelArgumentBlock(void* raw_args) {36      uintptr_t* args = reinterpret_cast<uintptr_t*>(raw_args);37      argc = static_cast<int>(*args);  //传递的参数个数,不包括环境变量参数,也就是上面的argc38      argv = reinterpret_cast<char**>(args + 1); //指向arg0 的位置39      envp = argv + argc + 1;  //加上argc,再加1,就指到env0 的位置40  41      // Skip over all environment variable definitions to find the aux vector.42      // The end of the environment block is marked by a NULL pointer.43      char** p = envp;44      while (*p != NULL) { //等于NULL,就指到envn 后面 0的位置45        ++p;46      }47      ++p; // Skip the NULL itself.//再加1就指到elf_info[id0]位置48  49      auxv = reinterpret_cast<ElfW(auxv_t)*>(p); 50    }51  52    // Similar to ::getauxval but doesn't require the libc global variables to be set up,53    // so it's safe to call this really early on.54    unsigned long getauxval(unsigned long type) {55      for (ElfW(auxv_t)* v = auxv; v->a_type != AT_NULL; ++v) {56        if (v->a_type == type) {57          return v->a_un.a_val;  //返回a_type 对应的value58        }59      }60      return 0;61    }62  63    int argc;64    char** argv;65    char** envp;66    ElfW(auxv_t)* auxv;67  68    abort_msg_t** abort_message_ptr;69  70   private:71    DISALLOW_COPY_AND_ASSIGN(KernelArgumentBlock);72  };


解析完参数后,接下来创建一个soinfo结构体linker_so,这是一个局部变量,调用它的prelink_image()函数

bionic/linker/linker.cpp

3495  bool soinfo::prelink_image() {3496    /* Extract dynamic section */3497    ElfW(Word) dynamic_flags = 0;3498    phdr_table_get_dynamic_section(phdr, phnum, load_bias, &dynamic, &dynamic_flags); //获得linker加载到内存后,.dynamic这个节在内存中的位置.../*  ElfW(Dyn)对应的数据结构如下 *  linux-4.10/include/uapi/linux/elf.h *  typedef struct { *      Elf64_Sxword d_tag;/* entry tag value */ *      union { *          Elf64_Xword d_val; //保存值 *          Elf64_Addr d_ptr;  //保存地址 *      } d_un; *   } Elf64_Dyn;*/3529    for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) {  //解析(遍历)PT_DYNAMIC 段(.dynamic节)里面的内容3530      DEBUG("d = %p, d[0](tag) = %p d[1](val) = %p",3531            d, reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val));3532      switch (d->d_tag) {3533        case DT_SONAME:3534          // this is parsed after we have strtab initialized (see below).3535          break;...3566        case DT_STRTAB:  // str table 的位置3567          strtab_ = reinterpret_cast<const char*>(load_bias + d->d_un.d_ptr);3568          break;3569  3570        case DT_STRSZ: //str table 每一项的大小3571          strtab_size_ = d->d_un.d_val;3572          break;3573  3574        case DT_SYMTAB:  // sym table 的位置3575          symtab_ = reinterpret_cast<ElfW(Sym)*>(load_bias + d->d_un.d_ptr);3576          break;3577  /* *  typedef struct elf64_sym { *    Elf64_Word st_name;    //4 byte/* Symbol name, index in string tbl */ *    unsigned charst_info;  //1 byte/* Type and binding attributes */ *    unsigned charst_other; //1 byte/* No defined meaning, 0 */ *    Elf64_Half st_shndx;        //2 byte/* Associated section index */ *    Elf64_Addr st_value;        //8 byte/* Value of the symbol */ *    Elf64_Xword st_size;  //8 byte/* Associated symbol size */ *  } Elf64_Sym;*/3578        case DT_SYMENT:  // sym table 每一项的大小,和结构体Elf64_Sym 的大小一致,所以在64位上是 24 byte3579          if (d->d_un.d_val != sizeof(ElfW(Sym))) {3580            DL_ERR("invalid DT_SYMENT: %zd in \"%s\"",3581                static_cast<size_t>(d->d_un.d_val), get_realpath());3582            return false;3583          }3584          break;...             //还定义了很多类型,这里不再一一描述,有兴趣的同学自己看源码吧3867        default:3868          if (!relocating_linker) {3869            DL_WARN("%s: unused DT entry: type %p arg %p", get_realpath(),3870                reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val));3871          }3872          break;3873      }3874    }...3904    // second pass - parse entries relying on strtab3905    for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) {3906      switch (d->d_tag) {3907        case DT_SONAME:3908          set_soname(get_string(d->d_un.d_val)); //获取需要加载的so,linker 不需要额外的so3909          break;3910        case DT_RUNPATH:3911          set_dt_runpath(get_string(d->d_un.d_val));3912          break;3913      }3914    }...3929    return true;3930  }

prelink_image()函数 的主要工作是解析(遍历)PT_DYNAMIC 段(.dynamic节)里面的内容,并保存到对应的成员变量中


bionic/linker/linker_phdr.cpp

890  /* Return the address and size of the ELF file's .dynamic section in memory,891   * or null if missing.892   *893   * Input:894   *   phdr_table  -> program header table895   *   phdr_count  -> number of entries in tables896   *   load_bias   -> load bias897   * Output:898   *   dynamic       -> address of table in memory (null on failure).899   *   dynamic_flags -> protection flags for section (unset on failure)900   * Return:901   *   void902   */903  void phdr_table_get_dynamic_section(const ElfW(Phdr)* phdr_table, size_t phdr_count,904                                      ElfW(Addr) load_bias, ElfW(Dyn)** dynamic,905                                      ElfW(Word)* dynamic_flags) {906    *dynamic = nullptr;907    for (size_t i = 0; i<phdr_count; ++i) {908      const ElfW(Phdr)& phdr = phdr_table[i];909      if (phdr.p_type == PT_DYNAMIC) {  //PT_DYNAMIC 这个段, 对应的是.dynamic 这个section910        *dynamic = reinterpret_cast<ElfW(Dyn)*>(load_bias + phdr.p_vaddr); //加载的偏移加上 物理地址偏移就是内存中的位置911        if (dynamic_flags) {912          *dynamic_flags = phdr.p_flags;913        }914        return;915      }916    }917  }

phdr_table_get_dynamic_section()函数用于获取l.dynamic这个节(PT_DYNAMIC这个段)在内存中的位置,接下来调用link_image()对符号进行重定位

bionic/linker/linker.cpp

3932  bool soinfo::link_image(const soinfo_list_t& global_group, const soinfo_list_t& local_group,3933                          const android_dlextinfo* extinfo) {...3999  #if defined(USE_RELA)   //一般是走上面这个4000    if (rela_ != nullptr) {  //对应  DT_RELA 这个dyn,#define DT_RELA 7  ,内部定义的函数4001      DEBUG("[ relocating %s ]", get_realpath());/* *  linux-4.10/include/uapi/linux/elf.h *   typedef struct elf64_rela { *     Elf64_Addr r_offset;   //8 byte/* Location at which to apply the action */ *     Elf64_Xword r_info;    //8 byte/* index and type of relocation */ *     Elf64_Sxword r_addend; //8 byte/* Constant addend used to compute value */ *   } Elf64_Rela;*/4002      if (!relocate(version_tracker,  //对应 .rela.dyn 这个节,rela_count_ 就是这个节里面的entry 个数,大小就是 Elf64_Rela 结构体大小,24byte4003              plain_reloc_iterator(rela_, rela_count_), global_group, local_group)) { 4004        return false;4005      }4006    }4007    if (plt_rela_ != nullptr) {  //对应 DT_JMPREL 这个dyn,#define DT_JMPREL 23 ,需要调用的外部函数4008      DEBUG("[ relocating %s plt ]", get_realpath());4009      if (!relocate(version_tracker,   //对应.rela.plt 这个节,plt_rela_count_ 就是这个节里面的entry 个数,大小就是 Elf64_Rela 结构体大小,24byte4010              plain_reloc_iterator(plt_rela_, plt_rela_count_), global_group, local_group)) {4011        return false;4012      }4013    }4014  #else4015    if (rel_ != nullptr) {  //对应 DT_REL 这个dyn,#define DT_REL 174016      DEBUG("[ relocating %s ]", get_realpath());4017      if (!relocate(version_tracker,4018              plain_reloc_iterator(rel_, rel_count_), global_group, local_group)) {4019        return false;4020      }4021    }4022    if (plt_rel_ != nullptr) { //对应 DT_JMPREL 这个dyn,#define DT_JMPREL 234023      DEBUG("[ relocating %s plt ]", get_realpath());4024      if (!relocate(version_tracker,4025              plain_reloc_iterator(plt_rel_, plt_rel_count_), global_group, local_group)) {4026        return false;4027      }4028    }4029  #endif...4049  4050    // We can also turn on GNU RELRO protection if we're not linking the dynamic linker4051    // itself --- it can't make system calls yet, and will have to call protect_relro later.4052    if (!is_linker() && !protect_relro()) {4053      return false;4054    }4055  4056    /* Handle serializing/sharing the RELRO segment */4057    if (extinfo && (extinfo->flags & ANDROID_DLEXT_WRITE_RELRO)) {4058      if (phdr_table_serialize_gnu_relro(phdr, phnum, load_bias,4059                                         extinfo->relro_fd) < 0) {4060        DL_ERR("failed serializing GNU RELRO section for \"%s\": %s",4061               get_realpath(), strerror(errno));4062        return false;4063      }4064    } else if (extinfo && (extinfo->flags & ANDROID_DLEXT_USE_RELRO)) {4065      if (phdr_table_map_gnu_relro(phdr, phnum, load_bias,4066                                   extinfo->relro_fd) < 0) {4067        DL_ERR("failed mapping GNU RELRO section for \"%s\": %s",4068               get_realpath(), strerror(errno));4069        return false;4070      }4071    }4072  4073    notify_gdb_of_load(this);4074    return true;4075  }4076  4077  bool soinfo::protect_relro() {4078    if (phdr_table_protect_gnu_relro(phdr, phnum, load_bias) < 0) {4079      DL_ERR("can't enable GNU RELRO protection for \"%s\": %s",4080             get_realpath(), strerror(errno));4081      return false;4082    }4083    return true;4084  }

get_addend()函数的定义如下

2728  #if defined(USE_RELA)  //一般走上面这个了2729  static ElfW(Addr) get_addend(ElfW(Rela)* rela, ElfW(Addr) reloc_addr __unused) {2730    return rela->r_addend;2731  }2732  #else2733  static ElfW(Addr) get_addend(ElfW(Rel)* rel, ElfW(Addr) reloc_addr) {2734    if (ELFW(R_TYPE)(rel->r_info) == R_GENERIC_RELATIVE ||2735        ELFW(R_TYPE)(rel->r_info) == R_GENERIC_IRELATIVE) {2736      return *reinterpret_cast<ElfW(Addr)*>(reloc_addr);2737    }2738    return 0;2739  }2740  #endif

link_image()函数的主要工作是对根据.rela.dyn 、.rela.plt的信息修改got表中的内容,relocate() 就是具体的实现函数

template<typename ElfRelIteratorT>2743  bool soinfo::relocate(const VersionTracker& version_tracker, ElfRelIteratorT&& rel_iterator,2744                        const soinfo_list_t& global_group, const soinfo_list_t& local_group) {/* *  linux-4.10/include/uapi/linux/elf.h *   typedef struct elf64_rela { *     Elf64_Addr r_offset;   /* Location at which to apply the action */ *     Elf64_Xword r_info;    /* index and type of relocation */ *     Elf64_Sxword r_addend; /* Constant addend used to compute value */ *   } Elf64_Rela;*/2745    for (size_t idx = 0; rel_iterator.has_next(); ++idx) {  //根据上面的分析,我们知道rel_iterator 集合保存的类型是Elf64_Rela 结构体2746      const auto rel = rel_iterator.next();2747      if (rel == nullptr) {2748        return false;2749      }2750  2751      ElfW(Word) type = ELFW(R_TYPE)(rel->r_info);  // #define ELF64_R_TYPE(info)  (((info) >> 56) & 0xff)      位运算,取高8位,表示重定位类型2752      ElfW(Word) sym = ELFW(R_SYM)(rel->r_info);    // #define ELF64_R_SYM(info)   (((info) >> 0) & 0xffffffff) 位运算,取低56位,表示符号表的索引2753  2754      ElfW(Addr) reloc = static_cast<ElfW(Addr)>(rel->r_offset + load_bias); //.rela.dyn 修改.got 节里面的内容, 也就是got表中的位置,.rela.plt 修改.got.plt 节里面的内容2755      ElfW(Addr) sym_addr = 0;2756      const char* sym_name = nullptr;2757      ElfW(Addr) addend = get_addend(rel, reloc);  //等价于返回 rela->r_addend2758  2759      DEBUG("Processing \"%s\" relocation at index %zd", get_realpath(), idx);2760      if (type == R_GENERIC_NONE) {2761        continue;2762      }2763  2764      const ElfW(Sym)* s = nullptr;2765      soinfo* lsi = nullptr;  //2766  /* *  typedef struct elf64_sym { *    Elf64_Word st_name;    //4 byte/* Symbol name, index in string tbl */ *    unsigned charst_info;  //1 byte/* Type and binding attributes */ *    unsigned charst_other; //1 byte/* No defined meaning, 0 */ *    Elf64_Half st_shndx;        //2 byte/* Associated section index */ *    Elf64_Addr st_value;        //8 byte/* Value of the symbol */ *    Elf64_Xword st_size;  //8 byte/* Associated symbol size */ *  } Elf64_Sym;*/2767      if (sym != 0) {/* //获取sym的名称,字符串名称,上面重新把Elf64_Sym 结构体贴出来, *  symtab_[sym] sym就是一个数组下标,所以symtab_[sym] 对应下标的 Elf64_Sym *  然后通过symtab_[sym].st_name 在 string table的偏移量找到对应的string,是偏移量哦,以\0作为字符串结束标志*/2768        sym_name = get_string(symtab_[sym].st_name);2769        const version_info* vi = nullptr;2770  2771        if (!lookup_version_info(version_tracker, sym, sym_name, &vi)) {2772          return false;2773        }2774        //global_group 和 local_group 都是一个soinfo 链表, sym_name 所在的so(lsi 是对应的soin)            // 先从this 当前的soinfo找sym_name 对应的sym,找到就返回,不然从global_group 中找,最后从local_group 找2775        if (!soinfo_do_lookup(this, sym_name, vi, &lsi, global_group, local_group, &s)) { //从加载的so 里面找到sym_name 对应的soinfo和 Elf64_Sym2776          return false;2777        }2778  2779        if (s == nullptr) {2780          // We only allow an undefined symbol if this is a weak reference...2781          s = &symtab_[sym];  //当前so 中的sym2782          if (ELF_ST_BIND(s->st_info) != STB_WEAK) {2783            DL_ERR("cannot locate symbol \"%s\" referenced by \"%s\"...", sym_name, get_realpath());2784            return false;2785          }...2835        } else { // We got a definition..../* *  typedef struct elf64_sym { *    Elf64_Word st_name;    //4 byte/* Symbol name, index in string tbl */ *    unsigned charst_info;  //1 byte/* Type and binding attributes */ *    unsigned charst_other; //1 byte/* No defined meaning, 0 */ *    Elf64_Half st_shndx;        //2 byte/* Associated section index */ *    Elf64_Addr st_value;        //8 byte/* Value of the symbol */ *    Elf64_Xword st_size;  //8 byte/* Associated symbol size */ *  } Elf64_Sym;*/2851          sym_addr = lsi->resolve_symbol_address(s); //找到Elf64_Sym 对应的函数地址,即 load_bias + st_value...2861        }2862        count_relocation(kRelocSymbol);2863      }2864  2865      switch (type) {   //Elf64_Xword r_info;    /* index and type of relocation *//*   #define R_AARCH64_COPY                  1024 *   #define R_AARCH64_GLOB_DAT              1025    /* Create GOT entry.  */ *   #define R_AARCH64_JUMP_SLOT             1026    /* Create PLT entry.  */ *   #define R_AARCH64_RELATIVE              1027    /* Adjust by program base.  */ *   #define R_AARCH64_TLS_TPREL64           1030 *   #define R_AARCH64_TLS_DTPREL32          1031 *   #define R_AARCH64_IRELATIVE             1032*/2866        case R_GENERIC_JUMP_SLOT:  //arm64对应R_AARCH64_JUMP_SLOT,需要绑定外部函数的地址2867          count_relocation(kRelocAbsolute);2868          MARK(rel->r_offset);2869          TRACE_TYPE(RELO, "RELO JMP_SLOT %16p <- %16p %s\n",2870                     reinterpret_cast<void*>(reloc),2871                     reinterpret_cast<void*>(sym_addr + addend), sym_name);2872  2873          *reinterpret_cast<ElfW(Addr)*>(reloc) = (sym_addr + addend); //.got.plt 节, got.plt 表中保存外部的函数地址2874          break;2875        case R_GENERIC_GLOB_DAT:  //arm64对应 R_AARCH64_GLOB_DAT,2876          count_relocation(kRelocAbsolute);2877          MARK(rel->r_offset);2878          TRACE_TYPE(RELO, "RELO GLOB_DAT %16p <- %16p %s\n",2879                     reinterpret_cast<void*>(reloc),2880                     reinterpret_cast<void*>(sym_addr + addend), sym_name);2881          *reinterpret_cast<ElfW(Addr)*>(reloc) = (sym_addr + addend); 2882          break;2883        case R_GENERIC_RELATIVE:  //arm64对应 R_AARCH64_RELATIVE2884          count_relocation(kRelocRelative);2885          MARK(rel->r_offset);2886          TRACE_TYPE(RELO, "RELO RELATIVE %16p <- %16p\n",2887                     reinterpret_cast<void*>(reloc),2888                     reinterpret_cast<void*>(load_bias + addend));2889          *reinterpret_cast<ElfW(Addr)*>(reloc) = (load_bias + addend);2890          break;2891        case R_GENERIC_IRELATIVE:  //arm64对应 R_AARCH64_IRELATIVE2892          count_relocation(kRelocRelative);2893          MARK(rel->r_offset);2894          TRACE_TYPE(RELO, "RELO IRELATIVE %16p <- %16p\n",2895                      reinterpret_cast<void*>(reloc),2896                      reinterpret_cast<void*>(load_bias + addend));2897          {...2910            ElfW(Addr) ifunc_addr = call_ifunc_resolver(load_bias + addend);...2921            *reinterpret_cast<ElfW(Addr)*>(reloc) = ifunc_addr;2922          }2923          break;...3097        default:3098          DL_ERR("unknown reloc type %d @ %p (%zu)", type, rel, idx);3099          return false;3100      }3101    }3102    return true;3103  }

下面给出一个例子,分析程序中如何通过got表调用到外部函数,以logcat 程序中调用printf 的过程为例

1、在执行的过程中调用printf()函数,对应如下的汇编指令

4564:97fff7df bl24e0 <printf@plt>
上面指令的意思是跳转到 0x24e0 这个地址执行,而0x24e0 这个地址在 .plt 这个节中

2、0x24e0 对应的指令

00000000000024e0 <printf@plt>:    24e0:d0000030 adrpx16, 8000 <_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6insertEmPKcm+0x2a28>    24e4:f9479e11 ldrx17, [x16,#3896]    24e8:913ce210 addx16, x16, #0xf38    24ec:d61f0220 brx17

 指令解析   
   adrp x16, 8000           等于 x16 = pc + 8000    也就是当前的pc(0-11位清0)值 加上8000 , 也就是.got.plt 这个section的起始地址(地址生成指令)
   ldr x17, [x16,#3464]    等于 x17 = x16 + 3896   也就是x16寄存器的值加上3464 ,.got.plt起始位置加上3896(0xf38) ,就找到了printf 对函数应的外部地址
   add x16, x16, #0xf38    等于 x16 = x16 + 0xf38  也就是x16寄存器的值加上0xf38
   br x17     跳到 x17 的地址执行,也就是外部printf 函数

3、查看.rela.plt section 中printf 对应的地址,linker会把Sym. Value 修改成外部printf函数对应的地址

Relocation section '.rela.plt' at offset 0x18e0 contains 86 entries:  Offset          Info           Type           Sym. Value    Sym. Name + Addend000000008f38  004200000402 R_AARCH64_JUMP_SL 0000000000000000 printf + 0

也就是说 0x000000008f38 这个地址保存的值就是外部printf()函数的地址


__linker_init()函数继续往下执行,调用到__libc_init_main_thread()
bionic/libc/bionic/__libc_init_main_thread.cpp

52  void __libc_init_main_thread(KernelArgumentBlock& args) {53    __libc_auxv = args.auxv;...58    static pthread_internal_t main_thread;  //创建main_thread59  60    // The -fstack-protector implementation uses TLS, so make sure that's61    // set up before we call any function that might get a stack check inserted.62    __set_tls(main_thread.tls);63  64    // Tell the kernel to clear our tid field when we exit, so we're like any other pthread.65    // As a side-effect, this tells us our pid (which is the same as the main thread's tid).66    main_thread.tid = __set_tid_address(&main_thread.tid);67    main_thread.set_cached_pid(main_thread.tid);68  69    // We don't want to free the main thread's stack even when the main thread exits70    // because things like environment variables with global scope live on it.71    // We also can't free the pthread_internal_t itself, since that lives on the main72    // thread's stack rather than on the heap.73    // The main thread has no mmap allocated space for stack or pthread_internal_t.74    main_thread.mmap_size = 0;75    pthread_attr_init(&main_thread.attr);76    main_thread.attr.guard_size = 0; // The main thread has no guard page.77    main_thread.attr.stack_size = 0; // User code should never see this; we'll compute it when asked.78    // TODO: the main thread's sched_policy and sched_priority need to be queried.79  80    // The TLS stack guard is set from the global, so ensure that we've initialized the global81    // before we initialize the TLS.82    __libc_init_global_stack_chk_guard(args);  //栈保护初始化83  84    __init_thread(&main_thread);85    __init_tls(&main_thread);86  87    // Store a pointer to the kernel argument block in a TLS slot to be88    // picked up by the libc constructor.89    main_thread.tls[TLS_SLOT_BIONIC_PREINIT] = &args;90  91    __init_alternate_signal_stack(&main_thread);92  }
没有仔细分析,知道栈保护的初始化是在这里面做的,通过在栈中的特殊位置写入特殊的值,当一个函数调用返回时,检查这些特殊的值是否被改变,以判断栈是否被破坏了。

bionic/libc/bionic/libc_init_common.cpp

86  void __libc_init_globals(KernelArgumentBlock& args) {...   //为libc设置辅助向量、vsdo和setjmp等全局变量90    // Initialize libc globals that are needed in both the linker and in libc.91    // In dynamic binaries, this is run at least twice for different copies of the92    // globals, once for the linker's copy and once for the one in libc.so.93    __libc_init_global_stack_chk_guard(args);94    __libc_auxv = args.auxv;95    __libc_globals.initialize();96    __libc_globals.mutate([&args](libc_globals* globals) {97      __libc_init_vdso(globals, args);98      __libc_init_setjmp_cookie(globals, args);99    });100  }

__libc_init_globals() 函数做了什么,我们也不深入分析,接下来看一下call_constructors()做了什么

bionic/linker/linker.cpp

void soinfo::call_constructors() {3143    if (constructors_called) {3144      return;3145    }...    //递归了get_childred()中的soinfo,执行DT_INIT段指向的函数和DT_INIT_ARRAY段包含的函数列表3157    constructors_called = true;3158  3159    if (!is_main_executable() && preinit_array_ != nullptr) {3160      // The GNU dynamic linker silently ignores these, but we warn the developer.3161      PRINT("\"%s\": ignoring DT_PREINIT_ARRAY in shared library!", get_realpath());3162    }3163  3164    get_children().for_each([] (soinfo* si) {3165      si->call_constructors();  //3166    });3167  3168    TRACE("\"%s\": calling constructors", get_realpath());3169  /* bionic/libc/include/elf.h * #define DT_INIT12 * #define DT_INIT_ARRAY 25*/3170    // DT_INIT should be called before DT_INIT_ARRAY if both are present.3171    call_function("DT_INIT", init_func_);  //调用.init 中指定的函数3172    call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false); //调用 .init_array 中指定的函数3173  }


解析来调用get_libdl_info()

bionic/linker/dlfcn.cpp

// This is used by the dynamic linker. Every process gets these symbols for free.//get_libdl_info()相当于在堆上拷贝linker_so构造了一个新的soinfo,然后添加到solist和sonext链表中//get_libdl_info()创建的soinfo,被命名为libdl.so, 所以dlopen, dlclose, dlsym, dladdr这几个函数实际上都是直接链接linker当中的符号265  soinfo* get_libdl_info() {266    if (__libdl_info == nullptr) {267      __libdl_info = new (__libdl_info_buf) soinfo(&g_default_namespace, "libdl.so", nullptr, 0, RTLD_GLOBAL);268      __libdl_info->flags_ |= FLAG_LINKED;269      __libdl_info->strtab_ = ANDROID_LIBDL_STRTAB;270      __libdl_info->symtab_ = g_libdl_symtab;271      __libdl_info->nbucket_ = sizeof(g_libdl_buckets)/sizeof(unsigned);272      __libdl_info->nchain_ = sizeof(g_libdl_chains)/sizeof(unsigned);273      __libdl_info->bucket_ = g_libdl_buckets;274      __libdl_info->chain_ = g_libdl_chains;275      __libdl_info->ref_count_ = 1;276      __libdl_info->strtab_size_ = sizeof(ANDROID_LIBDL_STRTAB);277      __libdl_info->local_group_root_ = __libdl_info;278      __libdl_info->soname_ = "libdl.so";279      __libdl_info->target_sdk_version_ = __ANDROID_API__;280      __libdl_info->generate_handle();281  #if defined(__work_around_b_24465209__)282      strlcpy(__libdl_info->old_name_, __libdl_info->soname_, sizeof(__libdl_info->old_name_));283  #endif284    }285  286    return __libdl_info;287  }
前面有介绍linker_so 是__linker_init 函数中的局部变量,这个函数执行完了,这个变量就会被释放。


最后调用__linker_init_post_relocation()对加载的可执行程序中的符号进行重定位。
bionic/linker/linker.cpp

4190  static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(Addr) linker_base) {...4196    // Sanitize the environment.4197    __libc_init_AT_SECURE(args);4198  4199    // Initialize system properties4200    __system_properties_init(); // may use 'environ'4201  4202    debuggerd_init();  //注册debuggerd对应的handle函数, 也就是当进程发生异常后,用于抓取异常信息,后面专门再讲...4239    const char* executable_path = get_executable_path(); 4240    soinfo* si = soinfo_alloc(&g_default_namespace, executable_path, &file_stat, 0, RTLD_GLOBAL);4241    if (si == nullptr) {4242      __libc_fatal("Couldn't allocate soinfo: out of memory?");4243    }4244  4245    /* bootstrap the link map, the main exe always needs to be first */4246    si->set_main_executable(); //标志这是一个可执行程序4247    link_map* map = &(si->link_map_head);4248  4249    // Register the main executable and the linker upfront to have4250    // gdb aware of them before loading the rest of the dependency4251    // tree.4252    map->l_addr = 0;4253    map->l_name = const_cast<char*>(executable_path);4254    insert_link_map_into_debug_map(map);4255    init_linker_info_for_gdb(linker_base);4256  /*  NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);  //load_addr可执行程序 加载到内存的地址 *  NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); *  NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); *  NEW_AUX_ENT(AT_ENTRY, exec->e_entry);   //可执行程序的入口地址*/4257    // Extract information passed from the kernel.4258    si->phdr = reinterpret_cast<ElfW(Phdr)*>(args.getauxval(AT_PHDR));4259    si->phnum = args.getauxval(AT_PHNUM);4260    si->entry = args.getauxval(AT_ENTRY);  //4261  4262    /* Compute the value of si->base. We can't rely on the fact that4263     * the first entry is the PHDR because this will not be true4264     * for certain executables (e.g. some in the NDK unit test suite)4265     */4266    si->base = 0;4267    si->size = phdr_table_get_load_size(si->phdr, si->phnum);4268    si->load_bias = 0;4269    for (size_t i = 0; i < si->phnum; ++i) {4270      if (si->phdr[i].p_type == PT_PHDR) {4271        si->load_bias = reinterpret_cast<ElfW(Addr)>(si->phdr) - si->phdr[i].p_vaddr;4272        si->base = reinterpret_cast<ElfW(Addr)>(si->phdr) - si->phdr[i].p_offset;4273        break;4274      }4275    }4276    si->dynamic = nullptr;4277  4278    ElfW(Ehdr)* elf_hdr = reinterpret_cast<ElfW(Ehdr)*>(si->base);4279    if (elf_hdr->e_type != ET_DYN) {4280      __libc_fatal("\"%s\": error: only position independent executables (PIE) are supported.",4281                   args.argv[0]);4282    }4283  4284    // Use LD_LIBRARY_PATH and LD_PRELOAD (but only if we aren't setuid/setgid).4285    parse_LD_LIBRARY_PATH(ldpath_env);4286    parse_LD_PRELOAD(ldpreload_env);4287  4288    somain = si;4289  4290    init_default_namespace();  //4291  4292    if (!si->prelink_image()) { //和前面一样,解析.dynamic 段4293      __libc_fatal("CANNOT LINK EXECUTABLE \"%s\": %s", args.argv[0], linker_get_error_buffer());4294    }4295  4296    // add somain to global group4297    si->set_dt_flags_1(si->get_dt_flags_1() | DF_1_GLOBAL);4298  4299    // Load ld_preloads and dependencies.4300    StringLinkedList needed_library_name_list;4301    size_t needed_libraries_count = 0;4302    size_t ld_preloads_count = 0;4303  4304    for (const auto& ld_preload_name : g_ld_preload_names) {  //找出需要预加载的so4305      needed_library_name_list.push_back(ld_preload_name.c_str());4306      ++needed_libraries_count;4307      ++ld_preloads_count;4308    }4309  4310    for_each_dt_needed(si, [&](const char* name) {  //找出依赖的so4311      needed_library_name_list.push_back(name);4312      ++needed_libraries_count;4313    });4314  4315    const char* needed_library_names[needed_libraries_count];4316  4317    memset(needed_library_names, 0, sizeof(needed_library_names));4318    needed_library_name_list.copy_to_array(needed_library_names, needed_libraries_count);4319  4320    if (needed_libraries_count > 0 &&4321        !find_libraries(&g_default_namespace, si, needed_library_names, needed_libraries_count, //加载依赖的so4322                        nullptr, &g_ld_preloads, ld_preloads_count, RTLD_GLOBAL, nullptr,4323                        /* add_as_children */ true)) {4324      __libc_fatal("CANNOT LINK EXECUTABLE \"%s\": %s", args.argv[0], linker_get_error_buffer());4325    } else if (needed_libraries_count == 0) {4326      if (!si->link_image(g_empty_list, soinfo::soinfo_list_t::make_list(si), nullptr)) {  //对程序中的符号进行重定位4327        __libc_fatal("CANNOT LINK EXECUTABLE \"%s\": %s", args.argv[0], linker_get_error_buffer());4328      }4329      si->increment_ref_count();4330    }4331  4332    add_vdso(args);4333  4334    {4335      ProtectedDataGuard guard;4336  4337      si->call_pre_init_constructors(); //调用.preinit_array 指定的函数...4344      map->l_addr = si->load_bias;4345      si->call_constructors(); //调用.init .init_array中指定的函数4346    }...4390    return si->entry; //返回程序执行的入口函数4391  }

si->call_pre_init_constructors(),si->call_constructors(),这里会在mian()函数执行之前就已经执行完了。



原创粉丝点击