【进程管理】进程(线程)创建

来源:互联网 发布:linux文件同步 编辑:程序博客网 时间:2024/04/29 22:29

          本节主要研究进程(线程)创建的过程,下文将不区分进程和线程;


基本知识

在linux系统中,第一个进程是系统固有的,是由内核的设计者安排好的;一个新的进程一定要由一个已存在的进程复制出来,而不是创造出来的,其实linux系统并不提供直接创建进程的方法;创建了子进程以后,父进程可以继续走自己的路,与子进程分道扬镳,但是如果子进程先行exit(),那么将要向父进程发一个信号;父进程也可以选择睡眠,等子进程exit()以后再去世,然后父进程再继续执行,可使用wait3()某个特定的子进程,wait4()所有子进程;第三,自己exit()(是每一个可执行程序映像所必有的,因此在子进程中执行完后,不会返回);linux将进程的创建与目标程序的执行分成两步;

(1)从一个已存在的父进程像细胞分裂一样地复制出一个子进程;实际复制出来的子进程有自己的task_struct和系统空间堆栈,但是与父进程共享其他资源;例如,要是父进程打开了5个文件,那么子进程也打开了这5个文件,而且这些文件的读写位置处于相同的位置;fork()是全部复制,父进程的所有资源全部通过数据结构复制给子进程,但进程号不一样;clone()则带有参数的选择性的复制,可复制出一个线程其他资源通过指针与父亲来共享;vfork()是除了task_struct和系统空间堆栈外的资源通过指针全部复制,因此复制出来的是个线程,效率很高

(2)目标程序的执行,创建一个进程是为有不同的目标程序要让新的程序去执行,但复制完以后,子进程就要与父进程分道扬镳了,用execve()执行以文件形式存在的可执行程序映像;

在(1)中,复制时只复制进程基本资源,如task_struct,系统空间堆栈,页面表等,不包括父进程的代码和全局变量,这些通过只读方式的共享,在需要写的时候,通过copy_on_write()为所涉及的页面建立一个新的副本;


fork,vfork,clone

(1)clone()主要是用来创建一个线程,包括用户线程和内核线程;创建用户线程时,可以给定子线程用户空间堆栈位置,它也可以用来创建进程,有选择性的复制父进程的资源;fork()则是全面的复制vfork()是为了提高创建时的效率,减少系统开销

(2)Linux内核中确实有一个创建内核线程的函数,kernel_thread(),供内核线程调用,它是对clone()的包装,并不执行execve(),而是执行内核中某一个函数,会返回因此要执行一个exit()系统调用;

(3)fork,vfork,clone这三个系统调用都调用do_fork(),只不过调用的参数不一样,下面主要来讲解do_fork();

int sys_fork(struct pt_regs *regs){//clone_flags中的SIGCHLDreturn do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);}

int sys_vfork(struct pt_regs *regs){//共享CLONE_VFORK和VMreturn do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,       NULL, NULL);}

//clone负责建立起轻量级进程(可以与其他进程共享地址空间,或打开文件等),newsp是指用户堆栈指针,parent_tid表示父进程的//的用户变量地址,child_tid表示新的轻量级进程的用户变量地址:longsys_clone(unsigned long clone_flags, unsigned long newsp,  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs){if (!newsp)newsp = regs->sp;  //有新的用户栈地址//其中clone_flags一般有参数SIGCHLD,占用一个字节,剩余的3个字节可制定,如共享内存描述符,页表,文件目录,信号处理标,跟踪等return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);}

说明几点

(1)newsp为子进程新的栈,该栈可能在另一个地址空间;


/* * Create a kernel thread */int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags){struct pt_regs regs;memset(&regs, 0, sizeof(regs));regs.si = (unsigned long) fn;regs.di = (unsigned long) arg;#ifdef CONFIG_X86_32regs.ds = __USER_DS;regs.es = __USER_DS;regs.fs = __KERNEL_PERCPU;:regs.gs = __KERNEL_STACK_CANARY;#elseregs.ss = __KERNEL_DS;#endifregs.orig_ax = -1;regs.ip = (unsigned long) kernel_thread_helper;regs.cs = __KERNEL_CS | get_kernel_rpl();regs.flags = X86_EFLAGS_IF | 0x2;/* Ok, create the new process.. *///其中CLONE_VM避免调用进程的页表,内核线程是不用访问用户态的地址空间;不会被跟踪的return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);}


do_fork

/* *  Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. *///sys_clone//regs是指通用寄存器指针,它是一个轻量级进程在用户态切换到内核态,保存到内核堆栈中long do_fork(unsigned long clone_flags,      unsigned long stack_start,//用户状态下栈的起始地址      struct pt_regs *regs,//指向寄存器集合的指针      unsigned long stack_size,//用户状态下,栈的大小      int __user *parent_tidptr,//指向用户空间中地址的两个指针      int __user *child_tidptr){struct task_struct *p;int trace = 0;long nr;/* * Do some preliminary argument and permissions checking before we * actually start allocating stuff */if (clone_flags & CLONE_NEWUSER) {//创建新的用户if (clone_flags & CLONE_THREAD) //但是没有创建新的线程return -EINVAL;/* hopefully this check will go away when userns support is * complete */if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||!capable(CAP_SETGID))return -EPERM;}/* * When called from kernel_thread, don't do user tracing stuff. */if (likely(user_mode(regs)))trace = tracehook_prepare_clone(clone_flags);//执行生成新进程的实际工作p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace);/* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */if (!IS_ERR(p)) {struct completion vfork;trace_sched_process_fork(current, p);nr = task_pid_vnr(p);//获得当前的局部nrif (clone_flags & CLONE_PARENT_SETTID)   put_user(nr, parent_tidptr);//将nr复制到对应的用户空间指向的地址if (clone_flags & CLONE_VFORK) {  //如果是执行vfork这个函数,父进程会睡眠下去p->vfork_done = &vfork;init_completion(&vfork);   //睡眠,此时父进程等子进程}//schedule_tailaudit_finish_fork(p);tracehook_report_clone(regs, clone_flags, nr, p);/* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet.  Now we * clear it and set the child going. */p->flags &= ~PF_STARTING;wake_up_new_task(p, clone_flags);//将子进程的task_struct放入到新调度器队列中tracehook_report_clone_complete(trace, regs,clone_flags, nr, p);//如果设置了CLONE_VFORK,//就把父进程插入到等待队列中,直到子进程释放了自己的内存地址空间(也就是子进程结束或执行新的程序)if (clone_flags & CLONE_VFORK) {freezer_do_not_count();wait_for_completion(&vfork);//父进程将在改变量上进入睡眠状态freezer_count();tracehook_report_vfork_done(p, nr);}} else {nr = PTR_ERR(p);}return nr;}
说明几点

(1)p = copy_process(clone_flags, stack_start, regs, stack_size,  child_tidptr, NULL, trace);执行实际的进程复制工作;

(2)if (clone_flags & CLONE_VFORK) 表示如果是执行vfork这个函数,父进程会睡眠下去;


copy_process中关键代码1

设置task_struct和系统堆栈

//task_struct可以在内存中的任何位置p = dup_task_struct(current);//为子进程获取并设置进程描述符,并且设置好了thread_infoif (!p)goto fork_out;


static struct task_struct *dup_task_struct(struct task_struct *orig){struct task_struct *tsk;struct thread_info *ti;unsigned long *stackend;int err;prepare_to_copy(orig); //保存FPU等寄存器内容到thread_info中tsk = alloc_task_struct();//kem, 获取新的进程描述符task_struct的内存if (!tsk)return NULL;ti = alloc_thread_info(tsk); //task无用处,使用get_free_pages获得两个页大小的内存if (!ti) {//ti若分配失败,还要释放原内存free_task_struct(tsk);return NULL;} err = arch_dup_task_struct(tsk, orig);//将旧的task_struct复制给新的task_structif (err)goto out;tsk->stack = ti; //改变新进程的stack指向到新的thread_info中err = prop_local_init_single(&tsk->dirties);if (err)goto out;setup_thread_stack(tsk, orig);//链接task_struct和thread_info,确定内存布局,相互指向clear_user_return_notifier(tsk);clear_tsk_need_resched(tsk);stackend = end_of_stack(tsk);*stackend = STACK_END_MAGIC;/* for overflow detection */#ifdef CONFIG_CC_STACKPROTECTORtsk->stack_canary = get_random_int();#endif/* One for us, one for whoever does the "release_task()" (usually parent) */atomic_set(&tsk->usage,2);  //要将新进程的使用计数置为2atomic_set(&tsk->fs_excl, 0);#ifdef CONFIG_BLK_DEV_IO_TRACEtsk->btrace_seq = 0;#endiftsk->splice_pipe = NULL;account_kernel_stack(ti, 1);return tsk;out:free_thread_info(ti);free_task_struct(tsk);return NULL;}

int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src){int ret;*dst = *src;//拷贝两个进程描述符号if (fpu_allocated(&src->thread.fpu)) {//若源的fpu设置了,那么还要分配thread的fpu内存memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));//清空目的的fpuret = fpu_alloc(&dst->thread.fpu);if (ret)return ret;fpu_copy(&dst->thread.fpu, &src->thread.fpu);//分配好了,直接拷贝}return 0;}


copy_process中关键代码2

一些字段的设置

p->did_exec = 0;//记录进程发出execve()的次数delayacct_tsk_init(p);/* Must remain after dup_task_struct() */copy_flags(clone_flags, p);INIT_LIST_HEAD(&p->children);INIT_LIST_HEAD(&p->sibling);rcu_copy_process(p);p->vfork_done = NULL;spin_lock_init(&p->alloc_lock);init_sigpending(&p->pending);//初始化好信号处理//初始化cpu的统计字段p->utime = cputime_zero;p->stime = cputime_zero;p->gtime = cputime_zero;p->utimescaled = cputime_zero;p->stimescaled = cputime_zero;

copy_process中关键代码2

设置子进程的调度信息

/* Perform scheduler related setup. Assign this task to a CPU. */sched_fork(p, clone_flags);  //完成对新进程调度程序数据结构的初始化

copy_process中关键代码3

复制和共享进程的各个部分

if ((retval = audit_alloc(p)))goto bad_fork_cleanup_policy;/* copy all the process information */if ((retval = copy_semundo(clone_flags, p)))goto bad_fork_cleanup_audit;if ((retval = copy_files(clone_flags, p)))goto bad_fork_cleanup_semundo;if ((retval = copy_fs(clone_flags, p)))goto bad_fork_cleanup_files;if ((retval = copy_sighand(clone_flags, p)))goto bad_fork_cleanup_fs;if ((retval = copy_signal(clone_flags, p)))goto bad_fork_cleanup_sighand;if ((retval = copy_mm(clone_flags, p)))        //进程地址空间的处理goto bad_fork_cleanup_signal;if ((retval = copy_namespaces(clone_flags, p)))goto bad_fork_cleanup_mm;if ((retval = copy_io(clone_flags, p)))goto bad_fork_cleanup_namespaces;//设置子进程的内核栈retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);


copy_files

static int copy_files(unsigned long clone_flags, struct task_struct * tsk){struct files_struct *oldf, *newf;int error = 0;/* * A background process may not have any files ... */oldf = current->files; //原进程的files_structif (!oldf)goto out;if (clone_flags & CLONE_FILES) { //共享打开的文件表atomic_inc(&oldf->count); //增加引用计数goto out;}newf = dup_fd(oldf, &error);if (!newf)goto out;tsk->files = newf;error = 0;out:return error;}


copy_thread

int copy_thread(unsigned long clone_flags, unsigned long sp,unsigned long unused,struct task_struct *p, struct pt_regs *regs){struct pt_regs *childregs;struct task_struct *tsk;int err;//填充包含了所有的寄存器childregs = task_pt_regs(p);*childregs = *regs;childregs->ax = 0;             //子进程的返回值,为0childregs->sp = sp;            //子进程的用户空间栈地址p->thread.sp = (unsigned long) childregs;  //指向子进程的用户空间p->thread.sp0 = (unsigned long) (childregs+1); //指向子进程系统空间堆栈中的pt_regsp->thread.ip = (unsigned long) ret_from_fork;   //子进程开始调用的函数task_user_gs(p) = get_user_gs(regs);p->thread.io_bitmap_ptr = NULL;tsk = current;err = -ENOMEM;memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,IO_BITMAP_BYTES, GFP_KERNEL);if (!p->thread.io_bitmap_ptr) {p->thread.io_bitmap_max = 0;return -ENOMEM;}set_tsk_thread_flag(p, TIF_IO_BITMAP);}err = 0;/* * Set a new TLS for the child thread? */if (clone_flags & CLONE_SETTLS)err = do_set_thread_area(p, -1,(struct user_desc __user *)childregs->si, 0);if (err && p->thread.io_bitmap_ptr) {kfree(p->thread.io_bitmap_ptr);p->thread.io_bitmap_max = 0;}return err;}

copy_process中关键代码4

获得子进程pid

if (pid != &init_struct_pid) {retval = -ENOMEM;pid = alloc_pid(p->nsproxy->pid_ns);//分配好一个pidif (!pid)goto bad_fork_cleanup_io;if (clone_flags & CLONE_NEWPID) {retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);if (retval < 0)goto bad_fork_free_pid;}}p->pid = pid_nr(pid);//得到全局的nrp->tgid = p->pid;if (clone_flags & CLONE_THREAD)p->tgid = current->tgid;  //设置好线程组idif (current->nsproxy != p->nsproxy) {retval = ns_cgroup_clone(p, pid);if (retval)goto bad_fork_free_pid;}//改变子进程用户地址空间的child_tidptr的内存值p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;  //也保存在对应的值上面去/* * Clear TID on mm_release()? *///在mm_release时,将0写到child_tidptr中去p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;


copy_process中关键代码5

线程还是进程

/* CLONE_PARENT re-uses the old parent */if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {  //在父进程的同一进程组,同一个父亲p->real_parent = current->real_parent;//当前线程和创建的线程的父亲同一个线程p->parent_exec_id = current->parent_exec_id;} else {                          //否则real_parent指向本进程p->real_parent = current;p->parent_exec_id = current->self_exec_id;}

线程

if (clone_flags & CLONE_THREAD) {  //子进程放入到同一线程组去current->signal->nr_threads++;atomic_inc(¤t->signal->live);atomic_inc(¤t->signal->sigcnt);p->group_leader = current->group_leader;//指向线程组组长list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);//加入到线程组中去}

execve

/* * sys_execve() executes a new program. */long sys_execve(const char __user *name,const char __user *const __user *argv,const char __user *const __user *envp, struct pt_regs *regs){long error;char *filename;filename = getname(name);    //找到可执行文件名error = PTR_ERR(filename);if (IS_ERR(filename))return error;error = do_execve(filename, argv, envp, regs);#ifdef CONFIG_X86_32if (error == 0) {/* Make sure we don't return using sysenter.. */                set_thread_flag(TIF_IRET);        }#endifputname(filename);return error;}

do_execve中关键代码

file = open_exec(filename);//找到fileretval = PTR_ERR(file);if (IS_ERR(file))goto out_unmark;sched_exec();bprm->file = file;//设置相应的参数bprm->filename = filename;//名称bprm->interp = filename;retval = bprm_mm_init(bprm);if (retval)goto out_file;bprm->argc = count(argv, MAX_ARG_STRINGS);//计算长度

retval = search_binary_handler(bprm,regs);  //用于找到一种适当的二进制格式,如a.out, elf格式等


int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs){unsigned int depth = bprm->recursion_depth;int try,retval;struct linux_binfmt *fmt;retval = security_bprm_check(bprm);if (retval)return retval;/* kernel module loader fixup *//* so we don't try to load run modprobe in kernel space. */set_fs(USER_DS);retval = audit_bprm(bprm);if (retval)return retval;retval = -ENOENT;for (try=0; try<2; try++) {//装入模块后,需再尝试一次read_lock(&binfmt_lock);list_for_each_entry(fmt, &formats, lh) {int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;if (!fn)continue;if (!try_module_get(fmt->module))continue;read_unlock(&binfmt_lock);retval = fn(bprm, regs);//执行对应的装入函数  <span style="font-family: Arial, Helvetica, sans-serif;">load_aout_binary</span>
说明几点

(1)load_aout_binary为a.out可执行文件格式的装入,此外还支持elf和脚本等格式文件的装入;


0 0
原创粉丝点击