Linux shedule 的发展历史.
来源:互联网 发布:win7红警网络点不进去 编辑:程序博客网 时间:2024/04/30 04:20
慢慢来吧~~
Linux V0.11
支持定时器和信号
流程图:
源码:
void schedule(void){ int i,next,c; struct task_struct ** p;/* check alarm, wake up any interruptible tasks that have got a signal */ for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p) { if ((*p)->alarm && (*p)->alarm < jiffies) { (*p)->signal |= (1<<(SIGALRM-1)); (*p)->alarm = 0; }if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) && (*p)->state==TASK_INTERRUPTIBLE)(*p)->state=TASK_RUNNING;}/* this is the scheduler proper: */ while (1) { c = -1;next = 0; i = NR_TASKS;p = &task[NR_TASKS];while (--i) {if (!*--p) continue;if ((*p)->state == TASK_RUNNING && (*p)->counter > c) c = (*p)->counter, next = i;}if (c) break;for(p = &LAST_TASK ; p > &FIRST_TASK ; --p) if (*p)(*p)->counter = ((*p)->counter >> 1) +(*p)->priority; } switch_to(next);}
重新计算时间片:
time = oldtime / 2 + priority
相关接口:
1. pause , 暂时放弃CPU . 可打断.
int sys_pause(void){current->state = TASK_INTERRUPTIBLE;schedule();return 0;}
2. sleep_on 睡眠等待.不可打断. 比如等到某种资源的时候
void sleep_on(struct task_struct **p){struct task_struct *tmp;if (!p)return;if (current == &(init_task.task))panic("task[0] trying to sleep");tmp = *p;*p = current;current->state = TASK_UNINTERRUPTIBLE;schedule();if (tmp)tmp->state=0;}3 . 可打断睡眠 , 多用于多进程等待同一个资源的时候. 可以形成等待队列.
void interruptible_sleep_on(struct task_struct **p){struct task_struct *tmp;if (!p)return;if (current == &(init_task.task))panic("task[0] trying to sleep");tmp=*p;*p=current;repeat:current->state = TASK_INTERRUPTIBLE;schedule();if (*p && *p != current) {(**p).state=0;goto repeat;}*p=NULL;if (tmp)tmp->state=0;}
4. 唤醒进程 . 不论进程处于何种状态都唤醒它.
void wake_up(struct task_struct **p){if (p && *p) {(**p).state=0;*p=NULL;}}
5. nice . 降低优先级. 更愿意出让CPU . 但是设置的increment 必须不大于当前的优先级数值.
int sys_nice(long increment){ if (current->priority-increment>0) current->priority -= increment; return 0;}
scheduel 的调用者: (system_call.s)
reschedule: pushl $ret_from_sys_call jmp _schedule_system_call:cmpl $nr_system_calls-1,%eax # 检查系统调用号 ja bad_sys_call... # 各种压栈call _sys_call_table(,%eax,4) # 调用对应接口 pushl %eaxmovl _current,%eax # 拿到scheduel后的进程结构体指针cmpl $0,state(%eax)# 如果不是运行态 就回去重新调度jne reschedulecmpl $0,counter(%eax)# 如果时间片刚好消耗没了, 就回去重新调度je reschedule
相关结构体:
保存进程指针的数组. 所有的进程保存在一个固定大小的数组中,所以Linux系统支持的最大进程数目是固定的.
进程之间有指针组成链表.
struct task_struct * task[NR_TASKS] = {&(init_task.task), };
Linux V0.12
支持限时操作, 比如 select 支持最长等待时间.
schedual流程图:
源代码:
void schedule(void){int i,next,c;struct task_struct ** p;/* check alarm, wake up any interruptible tasks that have got a signal */for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p) {if ((*p)->timeout && (*p)->timeout < jiffies) {(*p)->timeout = 0;if ((*p)->state == TASK_INTERRUPTIBLE)(*p)->state = TASK_RUNNING;}if ((*p)->alarm && (*p)->alarm < jiffies) {(*p)->signal |= (1<<(SIGALRM-1));(*p)->alarm = 0;}if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&(*p)->state==TASK_INTERRUPTIBLE)(*p)->state=TASK_RUNNING;}/* this is the scheduler proper: */while (1) {c = -1;next = 0;i = NR_TASKS;p = &task[NR_TASKS];while (--i) {if (!*--p)continue;if ((*p)->state == TASK_RUNNING && (*p)->counter > c)c = (*p)->counter, next = i;}if (c) break;for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p)(*p)->counter = ((*p)->counter >> 1) +(*p)->priority;}switch_to(next);}
相关函数修改:
1. 添加接口 __sleep_on , 睡眠等待和可打断睡眠均直接调用此接口
static inline void __sleep_on(struct task_struct **p, int state){struct task_struct *tmp;if (!p)return;if (current == &(init_task.task))panic("task[0] trying to sleep");tmp = *p;*p = current;current->state = state;repeat:schedule();if (*p && *p != current) {(**p).state = 0;current->state = TASK_UNINTERRUPTIBLE;goto repeat;}if (!*p)printk("Warning: *P = NULL\n\r");if (*p = tmp)tmp->state=0;}void interruptible_sleep_on(struct task_struct **p){__sleep_on(p,TASK_INTERRUPTIBLE);}void sleep_on(struct task_struct **p){__sleep_on(p,TASK_UNINTERRUPTIBLE);}
2. 唤醒进程的时候对已经停止的进程和僵尸进程进行警告
void wake_up(struct task_struct **p){if (p && *p) {if ((**p).state == TASK_STOPPED)printk("wake_up: TASK_STOPPED");if ((**p).state == TASK_ZOMBIE)printk("wake_up: TASK_ZOMBIE");(**p).state=0;}}
Linux V0.95
仅仅修改了一点, 关于signal 屏蔽位:
if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&(*p)->state==TASK_INTERRUPTIBLE)(*p)->state=TASK_RUNNING;
==>
if (((*p)->signal & ~(*p)->blocked) &&(*p)->state==TASK_INTERRUPTIBLE)(*p)->state=TASK_RUNNING;
因为V0.95 版本在sys_ssetmask函数中已经去掉了SIGKILL 和 SIGSTOP :
int sys_ssetmask(int newmask){int old=current->blocked;current->blocked = newmask & ~(1<<(SIGKILL-1)) & ~(1<<(SIGSTOP-1));return old;}
相关接口修改:
1. 暂时出让CPU的进程对SIG_IGN信号进行了屏蔽 :
int sys_pause(void){unsigned long old_blocked;unsigned long mask;struct sigaction * sa = current->sigaction;old_blocked = current->blocked;for (mask=1 ; mask ; sa++,mask += mask)if (sa->sa_handler == SIG_IGN)current->blocked |= mask;current->state = TASK_INTERRUPTIBLE;schedule();current->blocked = old_blocked;return -EINTR;}
2. __sleep_on 接口对标志寄存器进行了保护, 使用CTL STI 指令保证原子操作性.
static inline void __sleep_on(struct task_struct **p, int state){struct task_struct *tmp;unsigned int flags;if (!p)return;if (current == &(init_task.task))panic("task[0] trying to sleep");__asm__("pushfl ; popl %0":"=r" (flags));tmp = *p;*p = current;current->state = state;/* make sure interrupts are enabled: there should be no more races here */sti();repeat:schedule();if (*p && *p != current) {current->state = TASK_UNINTERRUPTIBLE;(**p).state = 0;goto repeat;}if (*p = tmp)tmp->state=0;__asm__("pushl %0 ; popfl"::"r" (flags));}3. nice函数处理了increment 比当前优先值大的情况: 直接减成 1
int sys_nice(long increment){if (increment < 0 && !suser())return -EPERM;if (increment > current->priority)increment = current->priority-1;current->priority -= increment;return 0;}
Linux V0.95a
修改对限时操作的BUG:
if ((*p)->timeout && (*p)->timeout < jiffies) {(*p)->timeout = 0;if ((*p)->state == TASK_INTERRUPTIBLE)(*p)->state = TASK_RUNNING;}
==>
if ((*p)->timeout && (*p)->timeout < jiffies) if ((*p)->state == TASK_INTERRUPTIBLE) {(*p)->timeout = 0;(*p)->state = TASK_RUNNING;}
相关的接口修改:
1. 将next_wait 加入结构体 task_strcut. 从而使用它来维护睡眠等待列表, 不再唤醒僵尸进程和终止进程.
void wake_up(struct task_struct **p){struct task_struct * wakeup_ptr, * tmp;if (p && *p) {wakeup_ptr = *p;*p = NULL;while (wakeup_ptr && wakeup_ptr != task[0]) {if (wakeup_ptr->state == TASK_STOPPED)printk("wake_up: TASK_STOPPED\n");else if (wakeup_ptr->state == TASK_ZOMBIE)printk("wake_up: TASK_ZOMBIE\n");elsewakeup_ptr->state = TASK_RUNNING;tmp = wakeup_ptr->next_wait;wakeup_ptr->next_wait = task[0];wakeup_ptr = tmp;}}}static inline void __sleep_on(struct task_struct **p, int state){ unsigned int flags; if (!p) return; if (current == task[0]) panic("task[0] trying to sleep"); __asm__("pushfl ; popl %0":"=r" (flags)); current->next_wait = *p; task[0]->next_wait = NULL; *p = current; current->state = state; sti(); schedule(); if (current->next_wait != task[0]) wake_up(p); current->next_wait = NULL; __asm__("pushl %0 ; popfl"::"r" (flags));}
Linux V0.95c
无相关修改
Linux V0.96a
添加了一项高优先级抢占功能:
当新唤醒的进程更优先的时候, 就重新调度.
实现:
1. scheduel 函数仅仅在开始加上一句代码:
need_resched = 0;
2. wake_up 函数检查新唤醒的进程优先级是否大于当前的. 是则设置need_resched = 1
void wake_up(struct task_struct **p){struct task_struct * wakeup_ptr, * tmp;if (p && *p) {wakeup_ptr = *p;*p = NULL;while (wakeup_ptr && wakeup_ptr != task[0]) {if (wakeup_ptr->state == TASK_ZOMBIE)printk("wake_up: TASK_ZOMBIE\n");else if (wakeup_ptr->state != TASK_STOPPED) {wakeup_ptr->state = TASK_RUNNING;if (wakeup_ptr->counter > current->counter)need_resched = 1;}tmp = wakeup_ptr->next_wait;wakeup_ptr->next_wait = task[0];wakeup_ptr = tmp;}}}
3. system_call 检查need_resched, 若非0 就重新调度:
reschedule:pushl $ret_from_sys_calljmp _schedule.align 2_system_call:pushl %eax# save orig_eaxSAVE_ALLcmpl _NR_syscalls,%eaxjae bad_sys_callcall _sys_call_table(,%eax,4)movl %eax,EAX(%esp)# save the return valueret_from_sys_call:cmpw $0x0f,CS(%esp)# was old code segment supervisor ?jne 2fcmpw $0x17,OLDSS(%esp)# was stack segment = 0x17 ?jne 2f1:movl _current,%eaxcmpl _task,%eax# task[0] cannot have signalsje 2cmpl $0,_need_resched # 检查need_resched, 若非0 就重新调度jne reschedulecmpl $0,state(%eax)# statejne reschedulecmpl $0,counter(%eax)# counterje reschedule
相关接口修改:
1.修改nice 接口 ,保证优先级不会被改成0
int sys_nice(long increment){if (increment < 0 && !suser())return -EPERM;if (increment >= current->priority) // 原先是 increment > current->priorityincrement = current->priority-1;current->priority -= increment;return 0;}
2. 修改system_call , 若进程调度后仍为同一进程则不再检查状态和时间片
1:movl _current,%eaxcmpl _task,%eax# task[0] cannot have signalsje 2fcmpl $0,_need_reschedjne reschedulecmpl $0,state(%eax)# statejne reschedulecmpl $0,counter(%eax)# counterje reschedule
Linux V0.96b
将定时器部分益处scheduel, 这部分功能由 do_timer 接管.
流程图:
源代码:
void schedule(void){int i,next,c;struct task_struct ** p;/* check alarm, wake up any interruptible tasks that have got a signal */need_resched = 0;for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p) {if ((*p)->timeout && (*p)->timeout < jiffies)if ((*p)->state == TASK_INTERRUPTIBLE) {(*p)->timeout = 0;(*p)->state = TASK_RUNNING;}if (((*p)->signal & ~(*p)->blocked) &&(*p)->state==TASK_INTERRUPTIBLE)(*p)->state=TASK_RUNNING;}/* this is the scheduler proper: */while (1) {c = -1;next = 0;i = NR_TASKS;p = &task[NR_TASKS];while (--i) {if (!*--p)continue;if ((*p)->state == TASK_RUNNING && (*p)->counter > c)c = (*p)->counter, next = i;}if (c) break;for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p)(*p)->counter = ((*p)->counter >> 1) +(*p)->priority;}switch_to(next);}
Linux V0.96c
无相关修改
Linux V0.97
添加 wake_one_task接口 ,修了scheduel的BUG , 使得调度的时候被唤醒的高优先级的进程也可以抢占.1. 新的schedule
void schedule(void){int i,next,c;struct task_struct ** p;/* check alarm, wake up any interruptible tasks that have got a signal */need_resched = 0;for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p) {if ((*p)->timeout && (*p)->timeout < jiffies)if ((*p)->state == TASK_INTERRUPTIBLE) {(*p)->timeout = 0;wake_one_task(*p); // 原来直接改变p->state}if (((*p)->signal & ~(*p)->blocked) && (*p)->state==TASK_INTERRUPTIBLE)wake_one_task(*p); // 原来直接改变p->state}/* this is the scheduler proper: */while (1) {c = -1;next = 0;i = NR_TASKS;p = &task[NR_TASKS];while (--i) {if (!*--p)continue;if ((*p)->state == TASK_RUNNING && (*p)->counter > c)c = (*p)->counter, next = i;}if (c)break;for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)if (*p)(*p)->counter = ((*p)->counter >> 1) +(*p)->priority;}sti(); // 保证原子操作switch_to(next);}
辅助接口 :
void wake_one_task(struct task_struct * p){p->state = TASK_RUNNING;if (p->counter > current->counter)need_resched = 1;}
2. 定义了 wait_queue .在 sched.h 里面添加了接口 : add_wait_queue 和 remove_wait_queue . 使得这个环状链表的操作更美观.
接口代码:
extern inline void add_wait_queue(struct wait_queue ** p, struct wait_queue * wait){unsigned long flags;struct wait_queue * tmp;__asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags));wait->next = *p;tmp = wait;while (tmp->next)if ((tmp = tmp->next)->next == *p)break;*p = tmp->next = wait;__asm__ __volatile__("pushl %0 ; popfl"::"r" (flags));}extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue * wait){unsigned long flags;struct wait_queue * tmp;__asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags));if (*p == wait)if ((*p = wait->next) == wait)*p = NULL;tmp = wait;while (tmp && tmp->next != wait)tmp = tmp->next;if (tmp)tmp->next = wait->next;wait->next = NULL;__asm__ __volatile__("pushl %0 ; popfl"::"r" (flags));}
然后对对应的 __sleep_on 接口进行了修改
static inline void __sleep_on(struct wait_queue **p, int state){unsigned long flags;if (!p)return;if (current == task[0])panic("task[0] trying to sleep");if (current->wait.next)printk("__sleep_on: wait->next exists\n");__asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags));current->state = state;add_wait_queue(p,¤t->wait);sti();schedule();remove_wait_queue(p,¤t->wait);__asm__("pushl %0 ; popfl"::"r" (flags));}
3. wake_up 函数也有改变, 但是只是将 while 改成了 do .. . while .
Linux V1.0
首先, scheduel函数重新接管了定时器, 当然代码更加复杂了. 但是流程图回到了 V1.2 版.
其次, 为了产生更高效率的机器码, 使用 for(;;) 代替while , 使用goto 代替if
最后, 不再依靠遍历task数组来遍历所有的进程,改为遍历环状链表.
源代码:
asmlinkage void schedule(void){int c;struct task_struct * p;struct task_struct * next;unsigned long ticks;/* check alarm, wake up any interruptible tasks that have got a signal */cli();ticks = itimer_ticks;itimer_ticks = 0;itimer_next = ~0;sti();need_resched = 0;p = &init_task;for (;;) {if ((p = p->next_task) == &init_task)goto confuse_gcc1;if (ticks && p->it_real_value) {if (p->it_real_value <= ticks) {send_sig(SIGALRM, p, 1);if (!p->it_real_incr) {p->it_real_value = 0;goto end_itimer;}do {p->it_real_value += p->it_real_incr;} while (p->it_real_value <= ticks);}p->it_real_value -= ticks;if (p->it_real_value < itimer_next)itimer_next = p->it_real_value;}end_itimer:if (p->state != TASK_INTERRUPTIBLE)continue;if (p->signal & ~p->blocked) {p->state = TASK_RUNNING;continue;}if (p->timeout && p->timeout <= jiffies) {p->timeout = 0;p->state = TASK_RUNNING;}}confuse_gcc1:/* this is the scheduler proper: */#if 0/* give processes that go to sleep a bit higher priority.. *//* This depends on the values for TASK_XXX *//* This gives smoother scheduling for some things, but *//* can be very unfair under some circumstances, so.. */ if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state && current->counter < current->priority*2) {++current->counter;}#endifc = -1;next = p = &init_task;for (;;) {if ((p = p->next_task) == &init_task)goto confuse_gcc2;if (p->state == TASK_RUNNING && p->counter > c)c = p->counter, next = p;}confuse_gcc2:if (!c) {for_each_task(p)p->counter = (p->counter >> 1) + p->priority;}if(current != next)kstat.context_swtch++;switch_to(next);/* Now maybe reload the debug registers */if(current->debugreg[7]){loaddebug(0);loaddebug(1);loaddebug(2);loaddebug(3);loaddebug(6);};}
相关函数修改:
1. wake_up 对环状列表的所有睡眠的进程进行唤醒
void wake_up(struct wait_queue **q){struct wait_queue *tmp;struct task_struct * p;if (!q || !(tmp = *q))return;do {if ((p = tmp->task) != NULL) {if ((p->state == TASK_UNINTERRUPTIBLE) || (p->state == TASK_INTERRUPTIBLE)) {p->state = TASK_RUNNING;if (p->counter > current->counter)need_resched = 1;}}if (!tmp->next) {printk("wait_queue is bad (eip = %08lx)\n",((unsigned long *) q)[-1]);printk(" q = %p\n",q);printk(" *q = %p\n",*q);printk(" tmp = %p\n",tmp);break;}tmp = tmp->next;} while (tmp != *q);}
2. 加入接口wake_up_interruptible, 仅唤醒可中断睡眠的进行
void wake_up_interruptible(struct wait_queue **q){struct wait_queue *tmp;struct task_struct * p;if (!q || !(tmp = *q))return;do {if ((p = tmp->task) != NULL) {if (p->state == TASK_INTERRUPTIBLE) {p->state = TASK_RUNNING;if (p->counter > current->counter)need_resched = 1;}}if (!tmp->next) {printk("wait_queue is bad (eip = %08lx)\n",((unsigned long *) q)[-1]);printk(" q = %p\n",q);printk(" *q = %p\n",*q);printk(" tmp = %p\n",tmp);break;}tmp = tmp->next;} while (tmp != *q);}
3. 为了保护竞争资源加入了__down接口实现了计数的信号量机制
void __down(struct semaphore * sem){struct wait_queue wait = { current, NULL };add_wait_queue(&sem->wait, &wait);current->state = TASK_UNINTERRUPTIBLE;while (sem->count <= 0) {schedule();current->state = TASK_UNINTERRUPTIBLE;}current->state = TASK_RUNNING;remove_wait_queue(&sem->wait, &wait);}
4. 通过nice接口, 限制优先级数值为1-35 .
asmlinkage int sys_nice(long increment){int newprio;if (increment < 0 && !suser())return -EPERM;newprio = current->priority - increment;if (newprio < 1)newprio = 1;if (newprio > 35)newprio = 35;current->priority = newprio;return 0;}
Linux V1.1
加入了个对中断的输出:
if (intr_count) {printk("Aiee: scheduling in interrupt\n");intr_count = 0;}
Linux V1.2
删除了system_call.s 文件. 添加了arch文件夹, 系统跳用移动到对应内核的entry.S
Linux V1.3
1. 添加了对 scheduel时处理tq_scheduler的支持.
run_task_queue(&tq_scheduler);
2. 每次统计当前运行的进程数目
nr_running = 0;...nr_running++;
Linux V2.0
大量的修改 , 先看源码 ,再一一解释:
asmlinkage void schedule(void){int c;struct task_struct * p;struct task_struct * prev, * next;unsigned long timeout = 0;int this_cpu=smp_processor_id();/* check alarm, wake up any interruptible tasks that have got a signal */if (intr_count)goto scheduling_in_interrupt;if (bh_active & bh_mask) {intr_count = 1;do_bottom_half(); // 1. 添加了对 buttom half 的支持.intr_count = 0;}run_task_queue(&tq_scheduler); need_resched = 0;prev = current;cli();/* move an exhausted RR process to be last.. */if (!prev->counter && prev->policy == SCHED_RR) {prev->counter = prev->priority;move_last_runqueue(prev); //2. 加入了runquene的概念}switch (prev->state) {case TASK_INTERRUPTIBLE:if (prev->signal & ~prev->blocked)goto makerunnable;timeout = prev->timeout;if (timeout && (timeout <= jiffies)) {prev->timeout = 0;timeout = 0;makerunnable:prev->state = TASK_RUNNING;break;}default:del_from_runqueue(prev);case TASK_RUNNING:}p = init_task.next_run;sti();#ifdef __SMP__/* *This is safe as we do not permit re-entry of schedule() */prev->processor = NO_PROC_ID;#define idle_task (task[cpu_number_map[this_cpu]])#else#define idle_task (&init_task)#endif/* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. *//* this is the scheduler proper: */c = -1000;next = idle_task;while (p != &init_task) {int weight = goodness(p, prev, this_cpu); //3. 新的优先级计算方式if (weight > c)c = weight, next = p;p = p->next_run;}/* if all runnable processes have "counter == 0", re-calculate counters */if (!c) {for_each_task(p)p->counter = (p->counter >> 1) + p->priority;}#ifdef __SMP__ // 4. 多CPU支持/* *Allocate process to CPU */ next->processor = this_cpu; next->last_processor = this_cpu;#endif #ifdef __SMP_PROF__ /* mark processor running an idle thread */if (0==next->pid)set_bit(this_cpu,&smp_idle_map);elseclear_bit(this_cpu,&smp_idle_map);#endifif (prev != next) {struct timer_list timer;kstat.context_swtch++;if (timeout) {init_timer(&timer);timer.expires = timeout;timer.data = (unsigned long) prev;timer.function = process_timeout;add_timer(&timer);}get_mmu_context(next);switch_to(prev,next);if (timeout)del_timer(&timer);}return;scheduling_in_interrupt:printk("Aiee: scheduling in interrupt %p\n",__builtin_return_address(0));}
1. 添加了对 buttom half 的支持.
利用 bh_active 和 bh_mask 两个掩码来记录软中断信息. 每次scheduel统一执行之 .
if (bh_active & bh_mask) {intr_count = 1;do_bottom_half();intr_count = 0;}
2. 定义了runquene的概念(task_struct 加入俩指针) , 加入接口 add_to_runqueue , del_from_runqueue, move_last_runqueue , 来支持对runquene的支持.
其中除了双向环状链表的操作, 就是对多CPU的支持.
static inline void add_to_runqueue(struct task_struct * p){#ifdef __SMP__int cpu=smp_processor_id();#endif#if 1/* sanity tests */if (p->next_run || p->prev_run) {printk("task already on run-queue\n");return;}#endifif (p->counter > current->counter + 3)need_resched = 1;nr_running++;(p->prev_run = init_task.prev_run)->next_run = p;p->next_run = &init_task;init_task.prev_run = p;#ifdef __SMP__/* this is safe only if called with cli()*/while(set_bit(31,&smp_process_available)){while(test_bit(31,&smp_process_available)){if(clear_bit(cpu,&smp_invalidate_needed)){local_flush_tlb();set_bit(cpu,&cpu_callin_map[0]);}}}smp_process_available++;clear_bit(31,&smp_process_available);if ((0!=p->pid) && smp_threads_ready){int i;for (i=0;i<smp_num_cpus;i++){if (0==current_set[cpu_logical_map[i]]->pid) {smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);break;}}}#endif}static inline void del_from_runqueue(struct task_struct * p){struct task_struct *next = p->next_run;struct task_struct *prev = p->prev_run;#if 1/* sanity tests */if (!next || !prev) {printk("task not on run-queue\n");return;}#endifif (p == &init_task) {static int nr = 0;if (nr < 5) {nr++;printk("idle task may not sleep\n");}return;}nr_running--;next->prev_run = prev;prev->next_run = next;p->next_run = NULL;p->prev_run = NULL;}static inline void move_last_runqueue(struct task_struct * p){struct task_struct *next = p->next_run;struct task_struct *prev = p->prev_run;/* remove from list */next->prev_run = prev;prev->next_run = next;/* add back to list */p->next_run = &init_task;prev = init_task.prev_run;init_task.prev_run = p;p->prev_run = prev;prev->next_run = p;}
3. 加入新的优先级计算方式.
加入了进程调度测略概念:
/* * Scheduling policies */#define SCHED_OTHER 0 //一般的进程#define SCHED_FIFO 1 // 实时进程, 一个进程执行完才执行另一个#define SCHED_RR 2 // 实时进程, 固定执行时间片,轮转依次执行
添加了优先级计算接口 goodness
/* * This is the function that decides how desirable a process is.. * You can weigh different processes against each other depending * on what CPU they've run on lately etc to try to handle cache * and TLB miss penalties. * * Return values: * -1000: never select this * 0: out of time, recalculate counters (but it might still be *selected) * +ve: "goodness" value (the larger, the better) * +1000: realtime process, select this. */static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu){int weight;#ifdef __SMP__/* We are not permitted to run a task someone else is running */if (p->processor != NO_PROC_ID)return -1000; // CPU 不支持直接#ifdef PAST_2_0/* This process is locked to a processor group */if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))return -1000; // 绑定特定CPU , 不需要当前CPU#endif#endif/* * Realtime process, select the first one on the * runqueue (taking priorities within processes * into account). */if (p->policy != SCHED_OTHER)return 1000 + p->rt_priority; // 实时程序, 立刻执行/* * Give the process a first-approximation goodness value * according to the number of clock-ticks it has left. * * Don't do any other calculations if the time slice is * over.. */weight = p->counter;if (weight) {#ifdef __SMP__/* Give a largish advantage to the same processor... *//* (this is equivalent to penalizing other processors) */if (p->last_processor == this_cpu)weight += PROC_CHANGE_PENALTY; // 同一CPU#endif/* .. and a slight advantage to the current process */if (p == prev)weight += 1; // 还是上一个进程}return weight;}
4. 支持多CPU调度
相关接口修改:
加入接口 wake_up_process , 利用add_runquene , 被wake_up 和wake_up_interruputible 取代p->state = TASK_RUNNING调用.
inline void wake_up_process(struct task_struct * p){unsigned long flags;save_flags(flags);cli();p->state = TASK_RUNNING;if (!p->next_run)add_to_runqueue(p);restore_flags(flags);}
nice接口策略修改, 限定 优先值 0 - DEF_PRIORITY*2
asmlinkage int sys_nice(int increment){unsigned long newprio;int increase = 0;newprio = increment;if (increment < 0) {if (!suser())return -EPERM;newprio = -increment;increase = 1;}if (newprio > 40)newprio = 40;/* * do a "normalization" of the priority (traditionally * unix nice values are -20..20, linux doesn't really * use that kind of thing, but uses the length of the * timeslice instead (default 150 msec). The rounding is * why we want to avoid negative values. */newprio = (newprio * DEF_PRIORITY + 10) / 20;increment = newprio;if (increase)increment = -increment;newprio = current->priority - increment;if (newprio < 1)newprio = 1;if (newprio > DEF_PRIORITY*2)newprio = DEF_PRIORITY*2;current->priority = newprio;return 0;}
Linux V2.1
相关函数修改:
唤醒函数修改,统一使用wait_quene
void wake_up(struct wait_queue **q){ struct wait_queue *next; struct wait_queue *head; if (!q || !(next = *q)) return; head = WAIT_QUEUE_HEAD(q); while (next != head) { struct task_struct *p = next->task; next = next->next; if (p != NULL) { if ((p->state == TASK_UNINTERRUPTIBLE) || (p->state == TASK_INTERRUPTIBLE)) wake_up_process(p); } if (!next) goto bad; } return;bad: printk("wait_queue is bad (eip = %p)\n", __builtin_return_address(0)); printk(" q = %p\n",q); printk(" *q = %p\n",*q);}void wake_up_interruptible(struct wait_queue **q){ struct wait_queue *next; struct wait_queue *head; if (!q || !(next = *q)) return; head = WAIT_QUEUE_HEAD(q); while (next != head) { struct task_struct *p = next->task; next = next->next; if (p != NULL) { if (p->state == TASK_INTERRUPTIBLE) wake_up_process(p); } if (!next) goto bad; } return;bad: printk("wait_queue is bad (eip = %p)\n", __builtin_return_address(0)); printk(" q = %p\n",q); printk(" *q = %p\n",*q);}对 sleep_on 接口加入原子保护
修改nice接口的一个bug .
if (newprio < 1)
==>
if ((signed) newprio < 1)
Linux V2.2
修改较多.
1. 添加CPU的的更多支持.
2. 对每个队列操作添加了信号量保护
源码:
asmlinkage void schedule(void){struct schedule_data * sched_data;struct task_struct * prev, * next;int this_cpu;prev = current;this_cpu = prev->processor;/* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */sched_data = & aligned_data[this_cpu].schedule_data;if (in_interrupt())goto scheduling_in_interrupt;release_kernel_lock(prev, this_cpu);/* Do "administrative" work here while we don't hold any locks */if (bh_active & bh_mask)do_bottom_half();run_task_queue(&tq_scheduler);spin_lock(&scheduler_lock);spin_lock_irq(&runqueue_lock);/* move an exhausted RR process to be last.. */prev->need_resched = 0;if (!prev->counter && prev->policy == SCHED_RR) {prev->counter = prev->priority;move_last_runqueue(prev);}switch (prev->state) {case TASK_INTERRUPTIBLE:if (signal_pending(prev)) {prev->state = TASK_RUNNING;break;}default:del_from_runqueue(prev);case TASK_RUNNING:}sched_data->prevstate = prev->state;{struct task_struct * p = init_task.next_run;/* * This is subtle. * Note how we can enable interrupts here, even * though interrupts can add processes to the run- * queue. This is because any new processes will * be added to the front of the queue, so "p" above * is a safe starting point. * run-queue deletion and re-ordering is protected by * the scheduler lock */spin_unlock_irq(&runqueue_lock);#ifdef __SMP__prev->has_cpu = 0;#endif/* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. *//* this is the scheduler proper: */{int c = -1000;next = idle_task;while (p != &init_task) {if (can_schedule(p)) {int weight = goodness(p, prev, this_cpu);if (weight > c)c = weight, next = p;}p = p->next_run;}/* Do we need to re-calculate counters? */if (!c) {struct task_struct *p;read_lock(&tasklist_lock);for_each_task(p)p->counter = (p->counter >> 1) + p->priority;read_unlock(&tasklist_lock);}}} /* * maintain the per-process 'average timeslice' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP: */#ifdef __SMP__{cycles_t t, this_slice;t = get_cycles();this_slice = t - sched_data->last_schedule;sched_data->last_schedule = t;/* * Simple, exponentially fading average calculation: */prev->avg_slice = this_slice + prev->avg_slice;prev->avg_slice >>= 1;}/* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */prev->has_cpu = 1; next->has_cpu = 1; next->processor = this_cpu;spin_unlock(&scheduler_lock);#endif /* __SMP__ */ if (prev != next) {#ifdef __SMP__sched_data->prev = prev;#endif kstat.context_swtch++;get_mmu_context(next);switch_to(prev,next);__schedule_tail();} reacquire_kernel_lock(current);return;scheduling_in_interrupt:printk("Scheduling in interrupt\n");*(int *)0 = 0;}
相关接口修改:
添加接口reschedule_idle, 对唤醒进程后重新调度之前做了更过工作
void wake_up_process(struct task_struct * p){unsigned long flags;spin_lock_irqsave(&runqueue_lock, flags);p->state = TASK_RUNNING;if (!p->next_run) {add_to_runqueue(p);reschedule_idle(p);}spin_unlock_irqrestore(&runqueue_lock, flags);}主要是对多CPU的支持. 同时对优先级更高有了新的定义 : 至少大于3
static inline void reschedule_idle(struct task_struct * p){if (p->policy != SCHED_OTHER || p->counter > current->counter + 3) {current->need_resched = 1;return;}#ifdef __SMP__/* * ("wakeup()" should not be called before we've initialized * SMP completely. * Basically a not-yet initialized SMP subsystem can be * considered as a not-yet working scheduler, simply dont use * it before it's up and running ...) * * SMP rescheduling is done in 2 passes: * - pass #1: faster: 'quick decisions' * - pass #2: slower: 'lets try and find another CPU' *//* * Pass #1 * * There are two metrics here: * * first, a 'cutoff' interval, currently 0-200 usecs on * x86 CPUs, depending on the size of the 'SMP-local cache'. * If the current process has longer average timeslices than * this, then we utilize the idle CPU. * * second, if the wakeup comes from a process context, * then the two processes are 'related'. (they form a * 'gang') * * An idle CPU is almost always a bad thing, thus we skip * the idle-CPU utilization only if both these conditions * are true. (ie. a 'process-gang' rescheduling with rather * high frequency should stay on the same CPU). * * [We can switch to something more finegrained in 2.3.] */if ((current->avg_slice < cacheflush_time) && related(current, p))return;reschedule_idle_slow(p);#endif /* __SMP__ */}
添加了新的睡眠接口,支持定时睡眠
signed long schedule_timeout(signed long timeout){ struct timer_list timer; unsigned long expire; switch (timeout) { case MAX_SCHEDULE_TIMEOUT: /* * These two special cases are useful to be comfortable * in the caller. Nothing more. We could take * MAX_SCHEDULE_TIMEOUT from one of the negative value * but I' d like to return a valid offset (>=0) to allow * the caller to do everything it want with the retval. */ schedule(); goto out; default: /* * Another bit of PARANOID. Note that the retval will be * 0 since no piece of kernel is supposed to do a check * for a negative retval of schedule_timeout() (since it * should never happens anyway). You just have the printk() * that will tell you if something is gone wrong and where. */ if (timeout < 0) { printk(KERN_ERR "schedule_timeout: wrong timeout " "value %lx from %p\n", timeout, __builtin_return_address(0)); goto out; } } expire = timeout + jiffies; init_timer(&timer); timer.expires = expire; timer.data = (unsigned long) current; timer.function = process_timeout; add_timer(&timer); schedule(); del_timer(&timer); timeout = expire - jiffies; out: return timeout < 0 ? 0 : timeout;}/* * This one aligns per-CPU data on cacheline boundaries. */static union { struct schedule_data { struct task_struct * prev; long prevstate; cycles_t last_schedule; } schedule_data; char __pad [L1_CACHE_BYTES];} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};static inline void __schedule_tail (void){#ifdef __SMP__ struct schedule_data * sched_data; /* * We might have switched CPUs: */ sched_data = & aligned_data[smp_processor_id()].schedule_data; /* * Subtle. In the rare event that we got a wakeup to 'prev' just * during the reschedule (this is possible, the scheduler is pretty * parallel), we should do another reschedule in the next task's * context. schedule() will do the right thing next time around. * this is equivalent to 'delaying' the wakeup until the reschedule * has finished. */ if (sched_data->prev->state != sched_data->prevstate) current->need_resched = 1; /* * Release the previous process ... * * We have dropped all locks, and we must make sure that we * only mark the previous process as no longer having a CPU * after all other state has been seen by other CPU's. Thus * the write memory barrier! */ wmb(); sched_data->prev->has_cpu = 0;#endif /* __SMP__ */}void interruptible_sleep_on(struct wait_queue **p){SLEEP_ON_VARcurrent->state = TASK_INTERRUPTIBLE;SLEEP_ON_HEADschedule();SLEEP_ON_TAIL}long interruptible_sleep_on_timeout(struct wait_queue **p, long timeout){SLEEP_ON_VARcurrent->state = TASK_INTERRUPTIBLE;SLEEP_ON_HEADtimeout = schedule_timeout(timeout);SLEEP_ON_TAILreturn timeout;}void sleep_on(struct wait_queue **p){SLEEP_ON_VARcurrent->state = TASK_UNINTERRUPTIBLE;SLEEP_ON_HEADschedule();SLEEP_ON_TAIL}long sleep_on_timeout(struct wait_queue **p, long timeout){SLEEP_ON_VARcurrent->state = TASK_UNINTERRUPTIBLE;SLEEP_ON_HEADtimeout = schedule_timeout(timeout);SLEEP_ON_TAILreturn timeout;}
Linux V2.3
1. 对多CPU支持进行了更多修改, 这里不讨论.
2. scheduel函数本身逻辑并无太大修改,但是大量使用goto 替换原来的if { ... } \
3. 更多的使用信号量保护.
源码:
asmlinkage void schedule(void){struct schedule_data * sched_data;struct task_struct *prev, *next, *p;int this_cpu, c;if (tq_scheduler)goto handle_tq_scheduler;tq_scheduler_back:prev = current;this_cpu = prev->processor;if (in_interrupt())goto scheduling_in_interrupt;release_kernel_lock(prev, this_cpu);/* Do "administrative" work here while we don't hold any locks */if (bh_mask & bh_active)goto handle_bh;handle_bh_back:/* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */sched_data = & aligned_data[this_cpu].schedule_data;spin_lock_irq(&runqueue_lock);/* move an exhausted RR process to be last.. */if (prev->policy == SCHED_RR)goto move_rr_last;move_rr_back:switch (prev->state) {case TASK_INTERRUPTIBLE:if (signal_pending(prev)) {prev->state = TASK_RUNNING;break;}default:del_from_runqueue(prev);case TASK_RUNNING:}prev->need_resched = 0;repeat_schedule:/* * this is the scheduler proper: */p = init_task.next_run;/* Default process to select.. */next = idle_task(this_cpu);c = -1000;if (prev->state == TASK_RUNNING)goto still_running;still_running_back:/* * This is subtle. * Note how we can enable interrupts here, even * though interrupts can add processes to the run- * queue. This is because any new processes will * be added to the front of the queue, so "p" above * is a safe starting point. * run-queue deletion and re-ordering is protected by * the scheduler lock *//* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. */while (p != &init_task) {if (can_schedule(p)) {int weight = goodness(prev, p, this_cpu);if (weight > c)c = weight, next = p;}p = p->next_run;}/* Do we need to re-calculate counters? */if (!c)goto recalculate;/* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */sched_data->curr = next;#ifdef __SMP__ next->has_cpu = 1;next->processor = this_cpu;#endifspin_unlock_irq(&runqueue_lock);if (prev == next)goto same_process;#ifdef __SMP__ /* * maintain the per-process 'average timeslice' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it's approximate, so we do not have to maintain * it while holding the runqueue spinlock. */{cycles_t t, this_slice;t = get_cycles();this_slice = t - sched_data->last_schedule;sched_data->last_schedule = t;/* * Exponentially fading average calculation, with * some weight so it doesnt get fooled easily by * smaller irregularities. */prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2;}/* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */#endif /* __SMP__ */kstat.context_swtch++;get_mmu_context(next);switch_to(prev, next, prev);__schedule_tail(prev);same_process: reacquire_kernel_lock(current);return;recalculate:{struct task_struct *p;spin_unlock_irq(&runqueue_lock);read_lock(&tasklist_lock);for_each_task(p)p->counter = (p->counter >> 1) + p->priority;read_unlock(&tasklist_lock);spin_lock_irq(&runqueue_lock);goto repeat_schedule;}still_running:c = prev_goodness(prev, prev, this_cpu);next = prev;goto still_running_back;handle_bh:do_bottom_half();goto handle_bh_back;handle_tq_scheduler:run_task_queue(&tq_scheduler);goto tq_scheduler_back;move_rr_last:if (!prev->counter) {prev->counter = prev->priority;move_last_runqueue(prev);}goto move_rr_back;scheduling_in_interrupt:printk("Scheduling in interrupt\n");*(int *)0 = 0;return;}
相关函数修改:
唤醒函数
void __wake_up(struct wait_queue **q, unsigned int mode){struct task_struct *p;struct wait_queue *head, *next; if (!q)goto out;/* * this is safe to be done before the check because it * means no deference, just pointer operations. */head = WAIT_QUEUE_HEAD(q);read_lock(&waitqueue_lock);next = *q;if (!next)goto out_unlock;while (next != head) {p = next->task;next = next->next;if (p->state & mode) {/* * We can drop the read-lock early if this * is the only/last process. */if (next == head) {read_unlock(&waitqueue_lock);wake_up_process(p);goto out;}wake_up_process(p);}}out_unlock:read_unlock(&waitqueue_lock);out:return;}
同时 __schedule_tail 接口也开始调用 reschedule_idle . 仍然是针对多CPU支持
/* * schedule_tail() is getting called from the fork return path. This * cleans up all remaining scheduler things, without impacting the * common case. */static inline void __schedule_tail (struct task_struct *prev){#ifdef __SMP__if ((prev->state == TASK_RUNNING) &&(prev != idle_task(smp_processor_id())))reschedule_idle(prev);wmb();prev->has_cpu = 0;#endif /* __SMP__ */}void schedule_tail (struct task_struct *prev){__schedule_tail(prev);}
Linux V2.4
1. scheduel添加了对内存的判断, 进程至少要有有效内存
if (!current->active_mm) BUG();
2. 添加了 prepare_to_switch 部分.
prepare_to_switch();{struct mm_struct *mm = next->mm;struct mm_struct *oldmm = prev->active_mm;if (!mm) {if (next->active_mm) BUG();next->active_mm = oldmm;atomic_inc(&oldmm->mm_count);enter_lazy_tlb(oldmm, next, this_cpu);} else {if (next->active_mm != mm) BUG();switch_mm(oldmm, mm, next, this_cpu);}if (!prev->mm) {prev->active_mm = NULL;mmdrop(oldmm);}}
scheduel 源码 :
asmlinkage void schedule(void){struct schedule_data * sched_data;struct task_struct *prev, *next, *p;struct list_head *tmp;int this_cpu, c;if (!current->active_mm) BUG();need_resched_back:prev = current;this_cpu = prev->processor;if (in_interrupt())goto scheduling_in_interrupt;release_kernel_lock(prev, this_cpu);/* Do "administrative" work here while we don't hold any locks */if (softirq_active(this_cpu) & softirq_mask(this_cpu))goto handle_softirq;handle_softirq_back:/* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */sched_data = & aligned_data[this_cpu].schedule_data;spin_lock_irq(&runqueue_lock);/* move an exhausted RR process to be last.. */if (prev->policy == SCHED_RR)goto move_rr_last;move_rr_back:switch (prev->state) {case TASK_INTERRUPTIBLE:if (signal_pending(prev)) {prev->state = TASK_RUNNING;break;}default:del_from_runqueue(prev);case TASK_RUNNING:}prev->need_resched = 0;/* * this is the scheduler proper: */repeat_schedule:/* * Default process to select.. */next = idle_task(this_cpu);c = -1000;if (prev->state == TASK_RUNNING)goto still_running;still_running_back:list_for_each(tmp, &runqueue_head) {p = list_entry(tmp, struct task_struct, run_list);if (can_schedule(p, this_cpu)) {int weight = goodness(p, this_cpu, prev->active_mm);if (weight > c)c = weight, next = p;}}/* Do we need to re-calculate counters? */if (!c)goto recalculate;/* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */sched_data->curr = next;#ifdef CONFIG_SMP next->has_cpu = 1;next->processor = this_cpu;#endifspin_unlock_irq(&runqueue_lock);if (prev == next)goto same_process;#ifdef CONFIG_SMP /* * maintain the per-process 'last schedule' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it's approximate, so we do not have to maintain * it while holding the runqueue spinlock. */ sched_data->last_schedule = get_cycles();/* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */#endif /* CONFIG_SMP */kstat.context_swtch++;/* * there are 3 processes which are affected by a context switch: * * prev == .... ==> (last => next) * * It's the 'much more previous' 'prev' that is on next's stack, * but prev is set to (the just run) 'last' process by switch_to(). * This might sound slightly confusing but makes tons of sense. */prepare_to_switch();{struct mm_struct *mm = next->mm;struct mm_struct *oldmm = prev->active_mm;if (!mm) {if (next->active_mm) BUG();next->active_mm = oldmm;atomic_inc(&oldmm->mm_count);enter_lazy_tlb(oldmm, next, this_cpu);} else {if (next->active_mm != mm) BUG();switch_mm(oldmm, mm, next, this_cpu);}if (!prev->mm) {prev->active_mm = NULL;mmdrop(oldmm);}}/* * This just switches the register state and the * stack. */switch_to(prev, next, prev);__schedule_tail(prev);same_process:reacquire_kernel_lock(current);if (current->need_resched)goto need_resched_back;return;recalculate:{struct task_struct *p;spin_unlock_irq(&runqueue_lock);read_lock(&tasklist_lock);for_each_task(p)p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);read_unlock(&tasklist_lock);spin_lock_irq(&runqueue_lock);}goto repeat_schedule;still_running:c = goodness(prev, this_cpu, prev->active_mm);next = prev;goto still_running_back;handle_softirq:do_softirq();goto handle_softirq_back;move_rr_last:if (!prev->counter) {prev->counter = NICE_TO_TICKS(prev->nice);move_last_runqueue(prev);}goto move_rr_back;scheduling_in_interrupt:printk("Scheduling in interrupt\n");BUG();return;}
相关代码修改:
1. 修改了对runquene的操作. 双向链表操作抽象在list.h中 , 提供各种接口
使用例子:
static inline void add_to_runqueue(struct task_struct * p){list_add(&p->run_list, &runqueue_head);nr_running++;}static inline void move_last_runqueue(struct task_struct * p){list_del(&p->run_list);list_add_tail(&p->run_list, &runqueue_head);}static inline void move_first_runqueue(struct task_struct * p){list_del(&p->run_list);list_add(&p->run_list, &runqueue_head);}
2. reschedule_idle对多CPU做了更多的工作.
3. 添加异步唤醒接口, 唤醒后不去判断是否重新调度.
static inline void wake_up_process_synchronous(struct task_struct * p){unsigned long flags;/* * We want the common case fall through straight, thus the goto. */spin_lock_irqsave(&runqueue_lock, flags);p->state = TASK_RUNNING;if (task_on_runqueue(p))goto out;add_to_runqueue(p);out:spin_unlock_irqrestore(&runqueue_lock, flags);}
4. 添加一系列唤醒接口, 可以根据不同的模式,选择CPU ,选择唤醒方式.
static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode, const int sync){ struct list_head *tmp, *head; struct task_struct *p, *best_exclusive; unsigned long flags; int best_cpu, irq; if (!q) goto out; best_cpu = smp_processor_id(); irq = in_interrupt(); best_exclusive = NULL; wq_write_lock_irqsave(&q->lock, flags);#if WAITQUEUE_DEBUG CHECK_MAGIC_WQHEAD(q);#endif head = &q->task_list;#if WAITQUEUE_DEBUG if (!head->next || !head->prev) WQ_BUG();#endif tmp = head->next; while (tmp != head) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); tmp = tmp->next;#if WAITQUEUE_DEBUG CHECK_MAGIC(curr->__magic);#endif p = curr->task; state = p->state; if (state & mode) {#if WAITQUEUE_DEBUG curr->__waker = (long)__builtin_return_address(0);#endif /* * If waking up from an interrupt context then * prefer processes which are affine to this * CPU. */ if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) { if (!best_exclusive) best_exclusive = p; if (p->processor == best_cpu) { best_exclusive = p; break; } } else { if (sync) wake_up_process_synchronous(p); else wake_up_process(p); if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE) break; } } } if (best_exclusive) { if (sync) wake_up_process_synchronous(best_exclusive); else wake_up_process(best_exclusive); } wq_write_unlock_irqrestore(&q->lock, flags);out: return;}void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode){ __wake_up_common(q, mode, wq_mode, 0);}void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode){ __wake_up_common(q, mode, wq_mode, 1);}
Linux V2.5
1. 不再大量的使用 goto , 作为替代对 if 使用 unlikely .
源码:
asmlinkage void schedule(void){struct schedule_data * sched_data;struct task_struct *prev, *next, *p;struct list_head *tmp;int this_cpu, c;spin_lock_prefetch(&runqueue_lock);if (!current->active_mm) BUG();need_resched_back:prev = current;this_cpu = prev->processor;if (unlikely(in_interrupt())) {printk("Scheduling in interrupt\n");BUG();}release_kernel_lock(prev, this_cpu);/* * 'sched_data' is protected by the fact that we can run * only one process per CPU. */sched_data = & aligned_data[this_cpu].schedule_data;spin_lock_irq(&runqueue_lock);/* move an exhausted RR process to be last.. */if (unlikely(prev->policy == SCHED_RR))if (!prev->counter) {prev->counter = NICE_TO_TICKS(prev->nice);move_last_runqueue(prev);}switch (prev->state) {case TASK_INTERRUPTIBLE:if (signal_pending(prev)) {prev->state = TASK_RUNNING;break;}default:del_from_runqueue(prev);case TASK_RUNNING:;}prev->need_resched = 0;/* * this is the scheduler proper: */repeat_schedule:/* * Default process to select.. */next = idle_task(this_cpu);c = -1000;list_for_each(tmp, &runqueue_head) {p = list_entry(tmp, struct task_struct, run_list);if (can_schedule(p, this_cpu)) {int weight = goodness(p, this_cpu, prev->active_mm);if (weight > c)c = weight, next = p;}}/* Do we need to re-calculate counters? */if (unlikely(!c)) {struct task_struct *p;spin_unlock_irq(&runqueue_lock);read_lock(&tasklist_lock);for_each_task(p)p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);read_unlock(&tasklist_lock);spin_lock_irq(&runqueue_lock);goto repeat_schedule;}/* * from this point on nothing can prevent us from * switching to the next task, save this fact in * sched_data. */sched_data->curr = next;task_set_cpu(next, this_cpu);spin_unlock_irq(&runqueue_lock);if (unlikely(prev == next)) {/* We won't go through the normal tail, so do this by hand */prev->policy &= ~SCHED_YIELD;goto same_process;}#ifdef CONFIG_SMP /* * maintain the per-process 'last schedule' value. * (this has to be recalculated even if we reschedule to * the same process) Currently this is only used on SMP, * and it's approximate, so we do not have to maintain * it while holding the runqueue spinlock. */ sched_data->last_schedule = get_cycles();/* * We drop the scheduler lock early (it's a global spinlock), * thus we have to lock the previous process from getting * rescheduled during switch_to(). */#endif /* CONFIG_SMP */kstat.context_swtch++;/* * there are 3 processes which are affected by a context switch: * * prev == .... ==> (last => next) * * It's the 'much more previous' 'prev' that is on next's stack, * but prev is set to (the just run) 'last' process by switch_to(). * This might sound slightly confusing but makes tons of sense. */prepare_to_switch();{struct mm_struct *mm = next->mm;struct mm_struct *oldmm = prev->active_mm;if (!mm) {if (next->active_mm) BUG();next->active_mm = oldmm;atomic_inc(&oldmm->mm_count);enter_lazy_tlb(oldmm, next, this_cpu);} else {if (next->active_mm != mm) BUG();switch_mm(oldmm, mm, next, this_cpu);}if (!prev->mm) {prev->active_mm = NULL;mmdrop(oldmm);}}/* * This just switches the register state and the * stack. */switch_to(prev, next, prev);__schedule_tail(prev);same_process:reacquire_kernel_lock(current);if (current->need_resched)goto need_resched_back;return;}
相关函数修改:
1. 唤醒函数整合, 添加了参数有效性代码
static inline int try_to_wake_up(struct task_struct * p, int synchronous){unsigned long flags;int success = 0;/* * We want the common case fall through straight, thus the goto. */spin_lock_irqsave(&runqueue_lock, flags);p->state = TASK_RUNNING;if (task_on_runqueue(p))goto out;add_to_runqueue(p);if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id())))reschedule_idle(p);success = 1;out:spin_unlock_irqrestore(&runqueue_lock, flags);return success;}inline int wake_up_process(struct task_struct * p){return try_to_wake_up(p, 0);}
oid __wake_up(wait_queue_head_t *q, unsigned int mode, int nr){if (q) {unsigned long flags;wq_read_lock_irqsave(&q->lock, flags);__wake_up_common(q, mode, nr, 0);wq_read_unlock_irqrestore(&q->lock, flags);}}void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr){if (q) {unsigned long flags;wq_read_lock_irqsave(&q->lock, flags);__wake_up_common(q, mode, nr, 1);wq_read_unlock_irqrestore(&q->lock, flags);}}
- Linux shedule 的发展历史.
- Linux发展的历史脉络
- unix linux的历史、区别以及发展
- Linux历史及其发展
- UNIX/Linux发展历史
- Linux发展历史
- 计算机的发展历史
- RSS的发展历史
- ERP的发展历史
- FreeBSD的发展历史
- Flash的发展历史
- 计算机的发展历史
- Unix的历史发展
- FreeBSD的发展历史
- Unix的发展历史
- Symbian的历史发展
- 外汇的发展历史
- vb的发展历史
- Opencv源码解析------二值化
- 前端资源工具网址
- ActiveMQ初识及安装
- 文本框输入限制【不允许空格、只允许数字】
- 表单自动提交注意事项
- Linux shedule 的发展历史.
- 游戏任务成就体系的实现(一):业务分析及技术架构
- linux命令大全之ps命令详解(查看进程结果)
- nginx详细配置
- 常用NoSQL比较
- POJ 3259 Wormholes (bellman_ford算法判负环)
- HTML5表单及其验证
- 写入HDFS
- SyntaxError: Non-ASCII character ‘\xe5′ in file