7、Linux内核如何装载和启动一个可执行程序
来源:互联网 发布:网络客户服务的优势有 编辑:程序博客网 时间:2024/06/13 11:51
姓名:周毅原创作品转载请注明出处 《Linux内核分析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000
一、可执行文件的创建——预处理、编译和链接
预处理,替换宏定义等等:gcc -E -o hello.cpp hello.c
编译,编译成汇编文本代码:gcc -x cpp-output -S -o hello.s hello.cpp
汇编,汇编成目标文件:gcc -x assembler -c hello.s -o hello.o
链接共享库,得到可以执行文件:gcc -o hello hello.o
静态链接:gcc -o hello.static hello.o -static
二、查看ELF目标文件格式
是UNIX系统实验室(USL)作为应用程序二进制接口(Application Binary Interface,ABI)而开发和发布的,也是Linux的主要可执行文件格式。
#define EI_NIDENT typedef struct{ unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; Elf32_Word e_version; Elf32_Addr e_entry; Elf32_Off e_phoff; Elf32_Off e_shoff; Elf32_Word e_flags; Elf32_Half e_ehsize; Elf32_Half e_phentsize; Elf32_Half e_phnum; Elf32_Half e_shentsize; Elf32_Half e_shnum; Elf32_Half e_shstrndx; }Elf32_Ehdr;
e_type 它标识的是该文件的类型。
e_machine 表明运行该程序需要的体系结构。
e_version 表示文件的版本。
e_entry 程序的入口地址。
e_phoff 表示Program header table 在文件中的偏移量(以字节计数)。
e_shoff 表示Section header table 在文件中的偏移量(以字节计数)。
e_flags 对IA32而言,此项为0。
e_ehsize 表示ELF header大小(以字节计数)。
e_phentsize 表示Program header table中每一个条目的大小。
e_phnum 表示Program header table中有多少个条目。
e_shentsize 表示Section header table中的每一个条目的大小。
e_shnum 表示Section header table中有多少个条目。
e_shstrndx 包含节名称的字符串是第几个节(从零开始计数)
可以看到elf头的各种信息。
三、可执行程序的装载
Shell本身不限制命令行参数的个数,命令行参数的个数受限于命令自身
例如,int main(int argc, char *argv[])
又如, int main(int argc, char *argv[], char *envp[])
Shell会调用execve将命令行参数和环境参数传递给可执行程序的main函数
int execve(const char * filename,char * const argv[ ],char * const envp[ ]);
库函数exec*都是execve的封装例程
下面通过内核代码看看sys_execve的执行过程:
在linux-3.18.6/fs/exec.c中我们找到sys_execve的系统调用,实际上执行的是do_execve:
SYSCALL_DEFINE3(execve, const char __user *, filename, const char __user *const __user *, argv, const char __user *const __user *, envp){ return do_execve(getname(filename), argv, envp);}
继续找到do_execve,最终执行的是do_execve_common,:
int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp){ struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execve_common(filename, argv, envp);}
找到do_execve_common,:
static int do_execve_common(struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp){ struct linux_binprm *bprm;// linux_binprm结构体用于维护程序执行过程中所使用的各种数据。 struct file *file; struct files_struct *displaced; int retval; if (IS_ERR(filename)) return PTR_ERR(filename); /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs * don't check setuid() return code. Here we additionally recheck * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } //下面是初始化bprm的过程 /* We're below the limit (still or again), so we don't want to make * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; retval = unshare_files(&displaced); if (retval) goto out_ret; retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) goto out_files; retval = prepare_bprm_creds(bprm); if (retval) goto out_free; check_unsafe_exec(bprm); current->in_execve = 1; file = do_open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; sched_exec(); bprm->file = file; bprm->filename = bprm->interp = filename->name; retval = bprm_mm_init(bprm); if (retval) goto out_unmark; bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) goto out; bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) goto out; retval = prepare_binprm(bprm); if (retval < 0) goto out; retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto out; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; retval = exec_binprm(bprm);//加载程序 if (retval < 0) goto out; /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); putname(filename); if (displaced) put_files_struct(displaced); return retval;out: if (bprm->mm) { acct_arg_size(bprm, 0); mmput(bprm->mm); }out_unmark: current->fs->in_exec = 0; current->in_execve = 0;out_free: free_bprm(bprm);out_files: if (displaced) reset_files_struct(displaced);out_ret: putname(filename); return retval;}
我们找到exec_binprm:
static int exec_binprm(struct linux_binprm *bprm){ pid_t old_pid, old_vpid; int ret; /* Need to fetch pid before load_binary changes it */ old_pid = current->pid; rcu_read_lock(); old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); rcu_read_unlock(); ret = search_binary_handler(bprm);//寻找文件格式对应的解析模块 if (ret >= 0) { audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); proc_exec_connector(current); } return ret;}
找到search_binary_handler:
int search_binary_handler(struct linux_binprm *bprm){ bool need_retry = IS_ENABLED(CONFIG_MODULES); struct linux_binfmt *fmt; int retval; /* This allows 4 levels of binfmt rewrites before failing hard. */ if (bprm->recursion_depth > 5) return -ELOOP; retval = security_bprm_check(bprm); if (retval) return retval; retval = -ENOENT; retry: read_lock(&binfmt_lock); list_for_each_entry(fmt, &formats, lh) { if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); bprm->recursion_depth++; retval = fmt->load_binary(bprm);//寻找文件格式对应的解析模块 read_lock(&binfmt_lock); put_binfmt(fmt); bprm->recursion_depth--; if (retval < 0 && !bprm->mm) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); force_sigsegv(SIGSEGV, current); return retval; } if (retval != -ENOEXEC || !bprm->file) { read_unlock(&binfmt_lock); return retval; } } read_unlock(&binfmt_lock); if (need_retry) { if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && printable(bprm->buf[2]) && printable(bprm->buf[3])) return retval; if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0) return retval; need_retry = false; goto retry; } return retval;}
于ELF格式的可执行文件fmt->load_binary(bprm);执行的应该是load_elf_binary其内部是和ELF文件格式解析:
static int load_elf_binary(struct linux_binprm *bprm){ ///解析elf文件格式 struct file *interpreter = NULL; /* to shut gcc up */ unsigned long load_addr = 0, load_bias = 0; int load_addr_set = 0; char * elf_interpreter = NULL; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; int retval, i; unsigned int size; unsigned long elf_entry; unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc __maybe_unused = 0; int executable_stack = EXSTACK_DEFAULT; struct pt_regs *regs = current_pt_regs(); struct { struct elfhdr elf_ex; struct elfhdr interp_elf_ex; } *loc; loc = kmalloc(sizeof(*loc), GFP_KERNEL); if (!loc) { retval = -ENOMEM; goto out_ret; } /* Get the exec-header */ loc->elf_ex = *((struct elfhdr *)bprm->buf); retval = -ENOEXEC; /* First of all, some simple consistency checks */ if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out; if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; if (!bprm->file->f_op->mmap) goto out; /* Now read in all of the header information */ if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) goto out; if (loc->elf_ex.e_phnum < 1 || loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr)) goto out; size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); retval = -ENOMEM; elf_phdata = kmalloc(size, GFP_KERNEL); if (!elf_phdata) goto out; retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *)elf_phdata, size); if (retval != size) { if (retval >= 0) retval = -EIO; goto out_free_ph; } elf_ppnt = elf_phdata; elf_bss = 0; elf_brk = 0; start_code = ~0UL; end_code = 0; start_data = 0; end_data = 0; for (i = 0; i < loc->elf_ex.e_phnum; i++) { if (elf_ppnt->p_type == PT_INTERP) { /* This is the program interpreter used for * shared libraries - for now assume that this * is an a.out format binary */ retval = -ENOEXEC; if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) goto out_free_ph; retval = -ENOMEM; elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); if (!elf_interpreter) goto out_free_ph; retval = kernel_read(bprm->file, elf_ppnt->p_offset, elf_interpreter, elf_ppnt->p_filesz); if (retval != elf_ppnt->p_filesz) { if (retval >= 0) retval = -EIO; goto out_free_interp; } /* make sure path is NULL terminated */ retval = -ENOEXEC; if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) goto out_free_interp; /* * If the binary is not readable then enforce * mm->dumpable = 0 regardless of the interpreter's * permissions. */ would_dump(bprm, interpreter); retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); if (retval != BINPRM_BUF_SIZE) { if (retval >= 0) retval = -EIO; goto out_free_dentry; } /* Get the exec headers */ loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); break; } elf_ppnt++; } elf_ppnt = elf_phdata; for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) if (elf_ppnt->p_type == PT_GNU_STACK) { if (elf_ppnt->p_flags & PF_X) executable_stack = EXSTACK_ENABLE_X; else executable_stack = EXSTACK_DISABLE_X; break; } /* Some simple consistency checks for the interpreter */ if (elf_interpreter) { retval = -ELIBBAD; /* Not an ELF interpreter */ if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) goto out_free_dentry; /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; } /* Flush all traces of the currently running executable */ retval = flush_old_exec(bprm); if (retval) goto out_free_dentry; /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ SET_PERSONALITY(loc->elf_ex); if (elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) goto out_free_dentry; current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmapping the ELF image into the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; unsigned long k, vaddr; if (elf_ppnt->p_type != PT_LOAD) continue; if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ retval = set_brk(elf_bss + load_bias, elf_brk + load_bias); if (retval) goto out_free_dentry; nbyte = ELF_PAGEOFFSET(elf_bss); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; if (nbyte > elf_brk - elf_bss) nbyte = elf_brk - elf_bss; if (clear_user((void __user *)elf_bss + load_bias, nbyte)) { /* * This bss-zeroing can fail if the ELF * file specifies odd protections. So * we don't check the return value */ } } } if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ; if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr; if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { elf_flags |= MAP_FIXED; } else if (loc->elf_ex.e_type == ET_DYN) { /* Try and get dynamic programs out of the way of the * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE /* Memory randomization might have been switched off * in runtime via sysctl or explicit setting of * personality flags. * If that is the case, retain the original non-zero * load_bias value in order to establish proper * non-randomized mappings. */ if (current->flags & PF_RANDOMIZE) load_bias = 0; else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);#else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);#endif } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, 0); //目标文件映射到地址空间 if (BAD_ADDR(error)) { retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } if (!load_addr_set) { load_addr_set = 1; load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); if (loc->elf_ex.e_type == ET_DYN) { load_bias += error - ELF_PAGESTART(load_bias + vaddr); load_addr += load_bias; reloc_func_desc = load_bias; } } k = elf_ppnt->p_vaddr; if (k < start_code) start_code = k; if (start_data < k) start_data = k; /* * Check to see if the section's size will overflow the * allowed task size. Note that p_filesz must always be * <= p_memsz so it is only necessary to check p_memsz. */ if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ retval = -EINVAL; goto out_free_dentry; } k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; if (k > elf_bss) elf_bss = k; if ((elf_ppnt->p_flags & PF_X) && end_code < k) end_code = k; if (end_data < k) end_data = k; k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; if (k > elf_brk) elf_brk = k; } loc->elf_ex.e_entry += load_bias; elf_bss += load_bias; elf_brk += load_bias; start_code += load_bias; end_code += load_bias; start_data += load_bias; end_data += load_bias; /* Calling set_brk effectively mmaps the pages that we need * for the bss and break sections. We must do this before * mapping in the interpreter, to make sure it doesn't wind * up getting placed where the bss needs to go. */ retval = set_brk(elf_bss, elf_brk); if (retval) goto out_free_dentry; if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { retval = -EFAULT; /* Nobody gets to see this, but.. */ goto out_free_dentry; } if (elf_interpreter) {//动态链接时的入口 unsigned long interp_map_addr = 0; elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, &interp_map_addr, load_bias); if (!IS_ERR((void *)elf_entry)) { /* * load_elf_interp() returns relocation * adjustment */ interp_load_addr = elf_entry; elf_entry += loc->interp_elf_ex.e_entry; } if (BAD_ADDR(elf_entry)) { retval = IS_ERR((void *)elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; } reloc_func_desc = interp_load_addr; allow_write_access(interpreter); fput(interpreter); kfree(elf_interpreter); } else { elf_entry = loc->elf_ex.e_entry;//无动态链接的程序入口 if (BAD_ADDR(elf_entry)) { retval = -EINVAL; goto out_free_dentry; } } kfree(elf_phdata); set_binfmt(&elf_format);#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES retval = arch_setup_additional_pages(bprm, !!elf_interpreter); if (retval < 0) goto out;#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm); retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) goto out; /* N.B. passed_fileno might not be initialized? */ current->mm->end_code = end_code; current->mm->start_code = start_code; current->mm->start_data = start_data; current->mm->end_data = end_data; current->mm->start_stack = bprm->p;#ifdef arch_randomize_brk if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm);#ifdef CONFIG_COMPAT_BRK current->brk_randomized = 1;#endif }#endif if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); }#ifdef ELF_PLAT_INIT /* * The ABI may specify that certain registers be set up in special * ways (on i386 %edx is the address of a DT_FINI function, for * example. In addition, it may also specify (eg, PowerPC64 ELF) * that the e_entry field is the address of the function descriptor * for the startup routine, rather than the address of the startup * routine itself. This macro performs whatever initialization to * the regs structure is required as well as any relocations to the * function descriptor entries when executing dynamically links apps. */ ELF_PLAT_INIT(regs, reloc_func_desc);#endif start_thread(regs, elf_entry, bprm->p);//执行程序 retval = 0;out: kfree(loc);out_ret: return retval; /* error cleanup */out_free_dentry: allow_write_access(interpreter); if (interpreter) fput(interpreter);out_free_interp: kfree(elf_interpreter);out_free_ph: kfree(elf_phdata); goto out;}
start_thread修改了寄存器的值:
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp){ set_user_gs(regs, 0); regs->fs = 0; regs->ds = __USER_DS; regs->es = __USER_DS; regs->ss = __USER_DS; regs->cs = __USER_CS; regs->ip = new_ip;//返回用户态的位置从int 0x80的下一条指令的位置变成新加载的可执行文件的entry位置(new_ip)。 regs->sp = new_sp; regs->flags = X86_EFLAGS_IF; /* * force it to the iret return path by making it look as if there was * some work pending. */ set_thread_flag(TIF_NOTIFY_RESUME);}
这样程序在执行时,从new_ip(加载的程序)开始执行。
- 7、Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序(Linux)
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- 初学《Linux内核如何装载和启动一个可执行程序》
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- Linux内核如何装载和启动一个可执行程序
- 6.Linux内核如何装载和启动一个可执行程序
- 7.Linux内核如何装载和启动一个可执行程序
- 2017年院赛C题 treat
- 俄罗斯方块控制台程序C++
- 赛门铁克抨击 Google 夸大其词误导用户
- 基础篇:wait与notify与notifyAll(八)
- js兼容问题
- 7、Linux内核如何装载和启动一个可执行程序
- 编码问题的一点小事
- Jquery插件-Jquery validate
- 53道Java线程面试题
- Android Button Library 按钮库
- 虚拟机常用命令
- 克隆二叉树
- HDU-1864-最大报销额
- STM32中LCD显示少量汉字