linux/unix 进程及进程调度

来源：互联网发布：mac平时关机还是待机编辑：程序博客网时间：2024/05/08 22:16

进程是一个动态的使用系统资源，处于活动状态的程序。linux是一个多任务操作系统。

linux进程管理由进程控制快、进程调度、中断处理、任务队列、定时器、bottom half(任务延迟处理)队列、系统调用、进程通信等部分组成，他是linux存储管理、文件管理、设备管理的基础。

进程的概念

进程是一个具有独立功能的程序关于某个数据集合的一次可以并发执行的运行活动，是处于活动状态的计算机程序。进程作为构成系统的基本细胞，不仅是系统内部独立运行的实体，而且是独立竞争资源的基本实体。了解进程的本质，对于理解、描述和设计操作系统有着极为重要的意义。了解进程的活动、状态，也有利于编制复杂程序

进程的运行状态

      linux进程提供了下面的几种运行的状态：
       (1) 运行状态(TASK_RUNNING)
             进程正在运行或是准备运行的状态，也就是就绪态。
             头文件位置：$KERNELPATH/include/linux/sched.h
             宏定义表达式：

#define TASK_RUNNING 0

(2) 等待状态(TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE)
进程在等待某一个事件的发生或是等待自己需要的资源，分为两种进程状态,TASK_INTERRUPTIBLE和TASK_UNINTERRUPTIBLE， TASK_INTERRUPTIBLE是可以中断的，可以等待一个信号进行中断，而TASK_UNINTERRUPTIBLE而是不可以中断，不等待信号的打扰，而一直等待硬件的改变。

#define TASK_INTERRUPTIBLE 1

#define TASK_UNINTERRUPTIBLE 2

(3) 停止状态(TASK_STOPPED)
进程进入到停止状态，通常是通过接收一个信号。正在被调试的进程可能处于停止状态。

#define TASK_STOPPED 4

(4) 僵尸状态(TASK_ZOMBIE)
当进程因为某种原因结束后，但是在task_struct中还保留这进程信息。

#define EXIT_ZOMBIE 16

进程的模式和类型

在linux/unix系统中，进程分为两种模式，即用户模式和内核模式，也就是用户态和核心态。如果当前运行的是用户程序、应用程序或者内核之外的系统程序，那么对应进程就在用户模式下运行；如果在用户程序执行过程中出现系统调用或者发生中断事件，就要运行操作系统（即核心）程序，进程模式就变成内核模式。在内核模式下运行的进程可以执行机器的特权指令；而且，此时该进程的运行不受用户的干预，即使是root用户也不能干预内核模式下进程的运行。

按照进程的功能和运行的程序分类，进程可划分为两大类：一类是系统进程，只运行在内核模式，执行操作系统代码，完成一些管理性的工作，例如内存分配和进程切换；另外一类是用户进程，通常在用户模式中执行，并通过系统调用或在出现中断、异常时进入内核模式。

进程的数据结构

    所在位置：$KERNELPATH/include/linux/sched.h
    数据结构名：task_struct
    每一个进程都据有自己的属性，用一个task_struct的机构指针，系统中最大的进程数目受task的数组大小限止，一般都为512,创建新进程时，linux会自动的从系统的内存中分配一个task_struct的数据结构，并加入到task的数组中。
    下面我们开始深入的看一下这个数据结构，因为本人的能力原因，只能逐步的完善。

/*kernel linux-2.6.20*/

struct task_struct {

volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */

struct thread_info *thread_info;

atomic_t usage;

unsigned long flags; /* per process flags, defined below */

unsigned long ptrace;

int lock_depth; /* BKL lock depth */

#ifdef CONFIG_SMP

#ifdef __ARCH_WANT_UNLOCKED_CTXSW

int oncpu;

#endif

int load_weight; /* for niceness load balancing purposes */

int prio, static_prio, normal_prio;

struct list_head run_list;

struct prio_array *array;

unsigned short ioprio;

#ifdef CONFIG_BLK_DEV_IO_TRACE

unsigned int btrace_seq;

#endif

unsigned long sleep_avg;

unsigned long long timestamp, last_ran;

unsigned long long sched_time; /* sched_clock time spent running */

enum sleep_type sleep_type;

unsigned long policy;

cpumask_t cpus_allowed;

unsigned int time_slice, first_time_slice;

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)

struct sched_info sched_info;

#endif

struct list_head tasks;

* ptrace_list/ptrace_children forms the list of my children

* that were stolen by a ptracer.

struct list_head ptrace_children;

struct list_head ptrace_list;

struct mm_struct *mm, *active_mm;

/* task state */

struct linux_binfmt *binfmt;

long exit_state;

int exit_code, exit_signal;

int pdeath_signal; /* The signal sent when the parent dies */

/* ??? */

unsigned long personality;

unsigned did_exec:1;

pid_t pid;

pid_t tgid;

#ifdef CONFIG_CC_STACKPROTECTOR

/* Canary value for the -fstack-protector gcc feature */

unsigned long stack_canary;

#endif

* pointers to (original) parent process, youngest child, younger sibling,

* older sibling, respectively. (p->father can be replaced with

* p->parent->pid)

struct task_struct *real_parent; /* real parent process (when being debugged) */

struct task_struct *parent; /* parent process */

* children/sibling forms the list of my children plus the

* tasks I'm ptracing.

struct list_head children; /* list of my children */

struct list_head sibling; /* linkage in my parent's children list */

struct task_struct *group_leader; /* threadgroup leader */

/* PID/PID hash table linkage. */

struct pid_link pids[PIDTYPE_MAX];

struct list_head thread_group;

struct completion *vfork_done; /* for vfork() */

int __user *set_child_tid; /* CLONE_CHILD_SETTID */

int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */

unsigned long rt_priority;

cputime_t utime, stime;

unsigned long nvcsw, nivcsw; /* context switch counts */

struct timespec start_time;

/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */

unsigned long min_flt, maj_flt;

cputime_t it_prof_expires, it_virt_expires;

unsigned long long it_sched_expires;

struct list_head cpu_timers[3];

/* process credentials */

uid_t uid,euid,suid,fsuid;

gid_t gid,egid,sgid,fsgid;

struct group_info *group_info;

kernel_cap_t cap_effective, cap_inheritable, cap_permitted;

unsigned keep_capabilities:1;

struct user_struct *user;

#ifdef CONFIG_KEYS

struct key *request_key_auth; /* assumed request_key authority */

struct key *thread_keyring; /* keyring private to this thread */

unsigned char jit_keyring; /* default keyring to attach requested keys to */

#endif

* fpu_counter contains the number of consecutive context switches

* that the FPU is used. If this is over a threshold, the lazy fpu

* saving becomes unlazy to save the trap. This is an unsigned char

* so that after 256 times the counter wraps and the behavior turns

* lazy again; this to deal with bursty apps that only use FPU for

* a short time

unsigned char fpu_counter;

int oomkilladj; /* OOM kill score adjustment (bit shift). */

char comm[TASK_COMM_LEN]; /* executable name excluding path

- access with [gs]et_task_comm (which lock

it with task_lock())

- initialized normally by flush_old_exec */

/* file system info */

int link_count, total_link_count;

#ifdef CONFIG_SYSVIPC

/* ipc stuff */

struct sysv_sem sysvsem;

#endif

/* CPU-specific state of this task */

struct thread_struct thread;

/* filesystem information */

struct fs_struct *fs;

/* open file information */

struct files_struct *files;

/* namespaces */

struct nsproxy *nsproxy;

/* signal handlers */

struct signal_struct *signal;

struct sighand_struct *sighand;

sigset_t blocked, real_blocked;

sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */

struct sigpending pending;

unsigned long sas_ss_sp;

size_t sas_ss_size;

int (*notifier)(void *priv);

void *notifier_data;

sigset_t *notifier_mask;

void *security;

struct audit_context *audit_context;

seccomp_t seccomp;

/* Thread group tracking */

u32 parent_exec_id;

u32 self_exec_id;

/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */

spinlock_t alloc_lock;

/* Protection of the PI data structures: */

spinlock_t pi_lock;

#ifdef CONFIG_RT_MUTEXES

/* PI waiters blocked on a rt_mutex held by this task */

struct plist_head pi_waiters;

/* Deadlock detection and priority inheritance handling */

struct rt_mutex_waiter *pi_blocked_on;

#endif

#ifdef CONFIG_DEBUG_MUTEXES

/* mutex deadlock detection */

struct mutex_waiter *blocked_on;

#endif

#ifdef CONFIG_TRACE_IRQFLAGS

unsigned int irq_events;

int hardirqs_enabled;

unsigned long hardirq_enable_ip;

unsigned int hardirq_enable_event;

unsigned long hardirq_disable_ip;

unsigned int hardirq_disable_event;

int softirqs_enabled;

unsigned long softirq_disable_ip;

unsigned int softirq_disable_event;

unsigned long softirq_enable_ip;

unsigned int softirq_enable_event;

int hardirq_context;

int softirq_context;

#endif

#ifdef CONFIG_LOCKDEP

# define MAX_LOCK_DEPTH 30UL

u64 curr_chain_key;

int lockdep_depth;

struct held_lock held_locks[MAX_LOCK_DEPTH];

unsigned int lockdep_recursion;

#endif

/* journalling filesystem info */

void *journal_info;

/* VM state */

struct reclaim_state *reclaim_state;

struct backing_dev_info *backing_dev_info;

struct io_context *io_context;

unsigned long ptrace_message;

siginfo_t *last_siginfo; /* For ptrace use. */

* current io wait handle: wait queue entry to use for io waits

* If this thread is processing aio, this points at the waitqueue

* inside the currently handled kiocb. It may be NULL (i.e. default

* to a stack based synchronous wait) if its doing sync IO.

wait_queue_t *io_wait;

/* i/o counters(bytes read/written, #syscalls */

u64 rchar, wchar, syscr, syscw;

struct task_io_accounting ioac;

#if defined(CONFIG_TASK_XACCT)

u64 acct_rss_mem1; /* accumulated rss usage */

u64 acct_vm_mem1; /* accumulated virtual memory usage */

cputime_t acct_stimexpd;/* stime since last update */

#endif

#ifdef CONFIG_NUMA

struct mempolicy *mempolicy;

short il_next;

#endif

#ifdef CONFIG_CPUSETS

struct cpuset *cpuset;

nodemask_t mems_allowed;

int cpuset_mems_generation;

int cpuset_mem_spread_rotor;

#endif

struct robust_list_head __user *robust_list;

#ifdef CONFIG_COMPAT

struct compat_robust_list_head __user *compat_robust_list;

#endif

struct list_head pi_state_list;

struct futex_pi_state *pi_state_cache;

atomic_t fs_excl; /* holding fs exclusive resources */

struct rcu_head rcu;

* cache last used pipe for splice

struct pipe_inode_info *splice_pipe;

#ifdef CONFIG_TASK_DELAY_ACCT

struct task_delay_info *delays;

#endif

#ifdef CONFIG_FAULT_INJECTION

int make_it_fail;

#endif

};

恐怖的开始这个数据结构尽然2xx行！

(1)进程的状态(volatle long state)

      进程运行的状态有一下几种：
      和我们上面讨论的进程的状态相对应。

#define TASK_RUNNING 0

#define TASK_INTERRUPTIBLE 1

#define TASK_UNINTERRUPTIBLE 2

#define TASK_STOPPED 4

#define TASK_TRACED 8

/* in tsk->exit_state */

#define EXIT_ZOMBIE 16

#define EXIT_DEAD 32

/* in tsk->state again */

#define TASK_NONINTERACTIVE 64

#define TASK_DEAD 128

(2)进程的优先级

头文件位置：$KERNELPATH/include/linux/sched.h 中定义的宏定义。

所在的数据结构：struct task_struct

定义原型：int prio;

优先级的取值在0~MAX_PRIO-1(其中MAX_PRIO的值为库券式)的取值。其中0~MAX_RT_PRIO-1(MAX_PT_PRIO=100)属于实时进程，而MAX_RT_PRIO~MAX_PTRO-1属于非实时进程，数值越大，标识进程的优先级越小。

进程优先级的宏定义

/*$KERNELPATH/include/linux/sched.h*/

#define MAX_PRIO (MAX_RT_PRIO + 40)

#define MAX_RT_PRIO MAX_USER_RT_PRIO

#define MAX_USER_RT_PRIO 100

深入了解main()

    函数原型：int main(int argc,char *argv[])
    这个是最常见的原型，先看一个例子，后面会详细的解释这个main函数的调用过程。

int main(int argc ,char *argv[])

行参列表：int argc 命令行参数的个数

char *argv[] 命令行的参数的指针数组

定时器

操作系统经常需要能把一项活动安排在将来某个时间进行。需要有一种机制保证能够把任务安排到一个相对比较精确的时间进行处理。任何能够支持操作系统的微处理器必定有一个可编程的间隔定时器，能够周期性地中断处理器。这个周期性的中断就是系统时钟的滴答。如果把系统比作一个交响乐队，它就像一个节拍器， Linux 对于时间的理解很简单：它从系统启动开始记录系统时钟的滴答数目，以此衡量时间。系统中用到时间的地方都以此为衡量的基础，这就是所说的 jiffy 单位的由来。核心中有一个全局变量 jiffies, 就是这个记录的值。

linux系统中的提供两种定时器，都可以实现时务排队等到某个时间处理。但是两个定时器的数据结构稍有不同。

第一种，是老式的定时器机制，由一个静态数组，包含32个timer_struct数据结构以及一个用屏蔽位指示活动定时器的整数:timer_active，由于定时器的列表是中的定时器是静态分配的，所以一般都是在初始化的时候进行加入。

第二种，是新的机制，是使用了timer_list数据链表把定时器按时间到达的早晚进行从早到晚进行链接。

参考资料

资料1：linux2.6内核调度分析

http://www.ibm.com/developerworks/cn/linux/kernel/l-kn26sch/index.html

资料2：linux用到的32个系统调用详解

http://www.linuxmine.com/3685.html