Kernel启动流程源码解析 9 sched_init

来源:互联网 发布:矩阵分解 als 编辑:程序博客网 时间:2024/05/01 10:07
一 sched_init

这里只是简单过了一下sched_init,收集一些疑问,以后看书或阅读代码的时候再来寻找答案。

1.0 sched_init

定义在kernel/sched/core.c中
void __init sched_init(void)
{
    int i, j;
    unsigned long alloc_size = 0, ptr;

#ifdef CONFIG_FAIR_GROUP_SCHED
    alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
#ifdef CONFIG_RT_GROUP_SCHED
    alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
    alloc_size += num_possible_cpus() * cpumask_size();
#endif
    if (alloc_size) {
        ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); // 分配内存

#ifdef CONFIG_FAIR_GROUP_SCHED
        root_task_group.se = (struct sched_entity **)ptr; // 设置root_task_group中调度实体的指针
        ptr += nr_cpu_ids * sizeof(void **);

        root_task_group.cfs_rq = (struct cfs_rq **)ptr; // 设置root_task_group中CFS运行队列的指针 // CFS: 完全公平调度算法
        ptr += nr_cpu_ids * sizeof(void **);

#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_RT_GROUP_SCHED
        root_task_group.rt_se = (struct sched_rt_entity **)ptr; // 设置root_task_group中实时调度实体的指针
        ptr += nr_cpu_ids * sizeof(void **);

        root_task_group.rt_rq = (struct rt_rq **)ptr; // 设置root_task_group中实时运行队列的指针
        ptr += nr_cpu_ids * sizeof(void **);

#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CPUMASK_OFFSTACK
        for_each_possible_cpu(i) {
            per_cpu(load_balance_mask, i) = (void *)ptr;
            ptr += cpumask_size();
        }
#endif /* CONFIG_CPUMASK_OFFSTACK */
    }

#ifdef CONFIG_SMP
    init_defrootdomain(); // 初始化默认的调度域
#endif

    init_rt_bandwidth(&def_rt_bandwidth,
            global_rt_period(), global_rt_runtime()); // 初始化实时进程的带宽限制 // 实时进程在CPU中的占用率

#ifdef CONFIG_RT_GROUP_SCHED
    init_rt_bandwidth(&root_task_group.rt_bandwidth,
            global_rt_period(), global_rt_runtime());
#endif /* CONFIG_RT_GROUP_SCHED */

#ifdef CONFIG_CGROUP_SCHED
    list_add(&root_task_group.list, &task_groups); // 将root_task_group添加到task_groups链表
    INIT_LIST_HEAD(&root_task_group.children);
    INIT_LIST_HEAD(&root_task_group.siblings);
    autogroup_init(&init_task); // 初始化init_task->signal->autogroup

#endif /* CONFIG_CGROUP_SCHED */

    for_each_possible_cpu(i) { // 遍历所有cpu,并初始化每个cpu上的其运行队列
        struct rq *rq;

        rq = cpu_rq(i);
        raw_spin_lock_init(&rq->lock);
        rq->nr_running = 0;
        rq->calc_load_active = 0;
        rq->calc_load_update = jiffies + LOAD_FREQ;
        init_cfs_rq(&rq->cfs);
        init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
        root_task_group.shares = ROOT_TASK_GROUP_LOAD;
        INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
        /*
         * How much cpu bandwidth does root_task_group get?
         *
         * In case of task-groups formed thr' the cgroup filesystem, it
         * gets 100% of the cpu resources in the system. This overall
         * system cpu resource is divided among the tasks of
         * root_task_group and its child task-groups in a fair manner,
         * based on each entity's (task or task-group's) weight
         * (se->load.weight).
         *
         * In other words, if root_task_group has 10 tasks of weight
         * 1024) and two child groups A0 and A1 (of weight 1024 each),
         * then A0's share of the cpu resource is:
         *
         *    A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
         *
         * We achieve this by letting root_task_group's tasks sit
         * directly in rq->cfs (i.e root_task_group->se[] = NULL).
         */
        init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
        init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */

        rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
        INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
        init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif

        for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
            rq->cpu_load[j] = 0;

        rq->last_load_update_tick = jiffies;

#ifdef CONFIG_SMP
        rq->sd = NULL;
        rq->rd = NULL;
        rq->cpu_power = SCHED_POWER_SCALE;
        rq->post_schedule = 0;
        rq->active_balance = 0;
        rq->next_balance = jiffies;
        rq->push_cpu = 0;
        rq->cpu = i;
        rq->online = 0;
        rq->idle_stamp = 0;
        rq->avg_idle = 2*sysctl_sched_migration_cost;
        rq->cur_freq = 0;
        rq->max_freq = 0;
        rq->min_freq = 0;
        rq->cumulative_runnable_avg = 0;

        INIT_LIST_HEAD(&rq->cfs_tasks);

        rq_attach_root(rq, &def_root_domain); // 将运行队列添加到默认调度域
#ifdef CONFIG_NO_HZ_COMMON
        rq->nohz_flags = 0;
#endif
#ifdef CONFIG_NO_HZ_FULL
        rq->last_sched_tick = 0;
#endif
#endif
        init_rq_hrtick(rq);
        atomic_set(&rq->nr_iowait, 0);
    }

    set_load_weight(&init_task); // 设置init_task的权重

#ifdef CONFIG_PREEMPT_NOTIFIERS
    INIT_HLIST_HEAD(&init_task.preempt_notifiers); 初始化init_task的preempt_notifiers通知链
#endif

#ifdef CONFIG_RT_MUTEXES
    plist_head_init(&init_task.pi_waiters);
#endif

    /*
     * The boot idle thread does lazy MMU switching as well:
     */
    atomic_inc(&init_mm.mm_count);
    enter_lazy_tlb(&init_mm, current); // arm64为空

    /*
     * Make us the idle thread. Technically, schedule() should not be
     * called from this thread, however somewhere below it might be,
     * but because we are the idle thread, we just pick up running again
     * when this runqueue becomes "idle".
     */
    init_idle(current, smp_processor_id()); // 将当前进程,即init_task设置为idle进程

    calc_load_update = jiffies + LOAD_FREQ; // 设置下载负载平衡的时间 // LOAD_FREQ等于5秒

    /*
     * During early bootup we pretend to be a normal task:
     */
    current->sched_class = &fair_sched_class; // 设置当前进程,即init_task进程采用CFS调度策略

#ifdef CONFIG_SMP
    zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
    /* May be allocated at isolcpus cmdline parse time */
    if (cpu_isolated_map == NULL)
        zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
    idle_thread_set_boot_cpu(); // 设置当前CPU的每CPU变量idle_threads指向当前进程
#endif
    init_sched_fair_class(); // 初始化sched_fair_class

    scheduler_running = 1; // 标记调度器已经初始化完成
}

0 0