sys_nice源码分析

来源：互联网发布：数据资源共享交换平台编辑：程序博客网时间：2024/05/21 07:05

sys_nice源码分析

sys_nice系统调用用于改变进程的优先级，下面来看。

sys_nice
kernel/sched/core.c

SYSCALL_DEFINE1(nice, int, increment){    long nice, retval;    increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);    nice = task_nice(current) + increment;    nice = clamp_val(nice, MIN_NICE, MAX_NICE);    if (increment < 0 && !can_nice(current, nice))        return -EPERM;    set_user_nice(current, nice);    return 0;}

clamp宏让increment变量限制在(-NICE_WIDTH, NICE_WIDTH)范围内。NICE_WIDTH的默认值为40。即nice系统调用用户提供的进程优先级只能限制在-40到40的范围内。

#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)

接下来通过task_nice函数获取进程当前优先级对应的nice值，并与increment相加获得新的nice值。
clamp_val宏和clamp类似，将nice值限制在(MIN_NICE, MAX_NICE)范围内。MIN_NICE为-19，MAX_NICE为20。
再往下调用can_nice函数检查新的nice值是否会超过系统的限制值。
最后通过set_user_nice函数将新的nice值设置到task_struct中。

sys_nice->task_nice
include/linux/sched.h

static inline int task_nice(const struct task_struct *p){    return PRIO_TO_NICE((p)->static_prio);}#define PRIO_TO_NICE(prio)  ((prio) - DEFAULT_PRIO)#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)#define MAX_USER_RT_PRIO    100#define MAX_RT_PRIO     MAX_USER_RT_PRIO

task_nice首先从进程task_struct结构中获得静态优先级static_prio，然后通过PRIO_TO_NICE宏将其转化成nice值。PRIO_TO_NICE宏其实就是(prio - 120)。相反，NICE_TO_PRIO宏其实就是(nice + 120)。

sys_nice->can_nice
kernel/sched/core.c

int can_nice(const struct task_struct *p, const int nice){    int nice_rlim = nice_to_rlimit(nice);    return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));}static inline long nice_to_rlimit(long nice){    return (MAX_NICE - nice + 1);}

nice_to_rlimit将nice值从-20到19反向对应到2到41，即-20对应41，19对应2。
转化后检查该值是否小于系统的限制值。task_rlimit内部通过系统调用获得该限制值，该限制值在内核启动时就已经确定下来。

sys_nice->set_user_nice
kernel/sched/core.c

void set_user_nice(struct task_struct *p, long nice){    int old_prio, delta, queued;    unsigned long flags;    struct rq *rq;    rq = task_rq_lock(p, &flags);    if (task_has_dl_policy(p) || task_has_rt_policy(p)) {        p->static_prio = NICE_TO_PRIO(nice);        goto out_unlock;    }    queued = task_on_rq_queued(p);    if (queued)        dequeue_task(rq, p, 0);    p->static_prio = NICE_TO_PRIO(nice);    set_load_weight(p);    old_prio = p->prio;    p->prio = effective_prio(p);    delta = p->prio - old_prio;    if (queued) {        enqueue_task(rq, p, 0);        if (delta < 0 || (delta > 0 && task_running(rq, p)))            resched_curr(rq);    }out_unlock:    task_rq_unlock(rq, p, &flags);}

task_rq_lock获得进程p所属的运行队列rq。

如果当前进程的调度策略是SCHED_DEADLINE、SCHED_FIFO和SCHED_RR的一种，则直接通过NICE_TO_PRIO宏将nice值转化为优先级值并设置到static_prio中即可。
SCHED_DEADLINE采用了EDF调度算法，主要针对运行时间比较敏感的进程，SCHED_FIFO和SCHED_RR调度策略主要是针对实时进程。

如果是其他的调度策略，最典型的是SCHED_NORMAL，此时首先通过task_on_rq_queued检查当前进程是否在运行队列上，如果是则返回1，否则返回0。如果当前进程在运行队列上，就要通过dequeue_task函数将其移除当前运行队列，等待重新设置进程权重后再放回运行队列。dequeue_task函数和后面的enqueue_task函数在《enqueue_task和dequeue_task源码分析》文章中分析了。

接下来也要设置static_prio。根据前面的分析，该值的范围在101到140之间。

set_load_weight函数根据当前进程的静态优先级设置其对应的调度实体sched_entity的权重，内核在调度进程时，最终会通过计算该权重将进程对应的调度实体插入到一个红黑树中，然后再从该树中找到一个最合适的进程运行。

下面以CFS调度策略为例，effective_prio对于CFS调度的普通进程而言其实就是获得static_prio。再计算delta表示新旧两个static_prio的差。然后如果前面将进程从运行队列中出队，这里就要通过enqueue_task函数将其重新入队。如果delta小于0，则表示进程的优先级提高，如果delta大于0，表示进程的优先级降低，并且此时进程正在运行，这两种情况都要通过resched_curr函数设置运行队列当前正在运行的进程的TIF_NEED_RESCHED标志位，让其重新调度一次。task_running宏检查进程是否在运行中。

static inline int task_running(struct rq *rq, struct task_struct *p){    return p->on_cpu;}

sys_nice->set_user_nice->task_has_dl_policy
kernel/sched/sched.h

static inline int task_has_dl_policy(struct task_struct *p){    return dl_policy(p->policy);}static inline int dl_policy(int policy){    return policy == SCHED_DEADLINE;}static inline int task_has_rt_policy(struct task_struct *p){    return rt_policy(p->policy);}static inline int rt_policy(int policy){    return policy == SCHED_FIFO || policy == SCHED_RR;}

task_has_dl_policy函数通过dl_policy函数检查进程的调度策略是否是SCHED_DEADLINE；task_has_rt_policy函数通过rt_policy函数检查进程的调度策略是否是SCHED_FIFO或SCHED_RR。

sys_nice->set_user_nice->set_load_weight
kernel/sched/core.c

static void set_load_weight(struct task_struct *p){    int prio = p->static_prio - MAX_RT_PRIO;    struct load_weight *load = &p->se.load;    if (p->policy == SCHED_IDLE) {        ...        return;    }    load->weight = prio_to_weight[prio];    load->inv_weight = prio_to_wmult[prio];}

根据前面的分析可知这里的静态优先级static_prio的范围在101到140之间，减去MAX_RT_PRIO即100后，prio的范围限制在1到40之间。
接下来获得进程对应的调度实体sched_entity的权重load_weight，这里只考虑使用CFS策略调度的普通进程，最后将刚刚计算的prio值作为数组下表，在prio_to_weight和prio_to_wmult数组中查找对应的权重值及它的倒数，并设置到load_weight的weight和inv_weight变量中。

static const int prio_to_weight[40] = { /* -20 */     88761,     71755,     56483,     46273,     36291, /* -15 */     29154,     23254,     18705,     14949,     11916, /* -10 */      9548,      7620,      6100,      4904,      3906, /*  -5 */      3121,      2501,      1991,      1586,      1277, /*   0 */      1024,       820,       655,       526,       423, /*   5 */       335,       272,       215,       172,       137, /*  10 */       110,        87,        70,        56,        45, /*  15 */        36,        29,        23,        18,        15,};static const u32 prio_to_wmult[40] = { /* -20 */     48388,     59856,     76040,     92818,    118348, /* -15 */    147320,    184698,    229616,    287308,    360437, /* -10 */    449829,    563644,    704093,    875809,   1099582, /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326, /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587, /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126, /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717, /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,};

设置prio_to_wmult数组是为了提升算法速度，例如在CFS调度策略中，会选择一个进程并计算其运行的虚拟时间，这个计算过程就会使用进程权重的倒数，因此在这里提前计算。

sys_nice->set_user_nice->effective_prio
kernel/sched/core.c

static int effective_prio(struct task_struct *p){    p->normal_prio = normal_prio(p);    if (!rt_prio(p->prio))        return p->normal_prio;    return p->prio;}static inline int normal_prio(struct task_struct *p){    int prio;    if (task_has_dl_policy(p))        prio = MAX_DL_PRIO-1;    else if (task_has_rt_policy(p))        prio = MAX_RT_PRIO-1 - p->rt_priority;    else        prio = __normal_prio(p);    return prio;}static inline int __normal_prio(struct task_struct *p){    return p->static_prio;}

effective_prio对于采用CFS调度策略的普通进程而言，最终返回的就是进程的static_prio。

sys_nice->set_user_nice->resched_curr
kernel/sched/core.c

void resched_curr(struct rq *rq){    struct task_struct *curr = rq->curr;    int cpu;    if (test_tsk_need_resched(curr))        return;    cpu = cpu_of(rq);    if (cpu == smp_processor_id()) {        set_tsk_need_resched(curr);        set_preempt_need_resched();        return;    }    ...}static inline int test_tsk_need_resched(struct task_struct *tsk){    return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));}static inline void set_tsk_need_resched(struct task_struct *tsk){    set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);}static __always_inline void set_preempt_need_resched(void){    raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);}

test_tsk_need_resched检查thread_info结构的标志位中是否已经设置了TIF_NEED_RESCHED。
cpu_of宏获得运行队列rq对应的cpu，smp_processor_id宏则获得当前进程对应的cpu的id。下面只考虑两者相等的情况，此时，通过set_tsk_need_resched增加TIF_NEED_RESCHED到进程thread_info结构的标志位，再通过set_preempt_need_resched函数设置per-cpu变量__preempt_count。

阅读全文

0 0