Linux启动代码分析

来源:互联网 发布:json里面有html标签 编辑:程序博客网 时间:2024/03/29 16:46
本文系转载,原文地址:http://blog.chinaunix.net/u/6071/showart_205152.html
Linux启动代码分析
Kernel: 2.6.10-rc2Finished: 01/01/05/* *Activate the first processor. */asmlinkage void __init start_kernel(void){char * command_line;extern struct kernel_param __start___param[], __stop___param[];/* * Interrupts are still disabled. Do necessary setups, then * enable them */lock_kernel();/* 给kernel上锁 */page_address_init();/* 在配置highmem才作工作 */printk(linux_banner);/* 打印kernel版本信息 */setup_arch(&command_line); /* 设置体系结构相关信息,包括页面映射,acpi等 */setup_per_cpu_areas();/* 设置smp中每个cpu区域偏移量信息 *//* * Mark the boot cpu "online" so that it can call console drivers in * printk() and can access its per-cpu storage. 设置引导cpu在工作状态 */smp_prepare_boot_cpu();/* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() * time - but meanwhile we still have a functioning scheduler. */sched_init();/* runqueue设置 */build_all_zonelists();/* 建立分配策略 */page_alloc_init();/* hotplug CPU设置 */printk("Kernel command line: %s/n", saved_command_line);parse_early_param();parse_args("Booting kernel", command_line, __start___param,   __stop___param - __start___param,   &unknown_bootoption);/* 对传入内核参数作分析,并作相应设置 */sort_main_extable();/* 异常处理调用函数表排序 */trap_init();/* 重新设置中断向量表 */rcu_init();/* 初始化RCU(Read-Copy Update),主要是一个per_cpu_rcu_tasklet */init_IRQ();/* 中断服务队列初始化,但没有具体中断处理函数入口,在request_irq()向系统注册 */pidhash_init();/* pidhash表初始化,共5个,是不是每个表中保存不同类型pid? */init_timers();/* 初始化一个per_cpu_tvec_bases队列,并设置TIMER_SOFTIRQ */softirq_init();/* 初始化软中断和tasklet */time_init();/* 硬件时钟及其中断初始化 *//* * HACK ALERT! This is early. We're enabling the console before * we've done PCI setups etc, and console_init() must be aware of * this. But we do want output early, in case something goes wrong. */console_init();if (panic_later)panic(panic_later, panic_param);profile_init();/* profile设置 */local_irq_enable();/* 开中断 */#ifdef CONFIG_BLK_DEV_INITRDif (initrd_start && !initrd_below_start_ok &&initrd_start < min_low_pfn << PAGE_SHIFT) {printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "    "disabling it./n",initrd_start,min_low_pfn << PAGE_SHIFT);initrd_start = 0;}#endifvfs_caches_init_early();/* 初始化dentry和inode缓冲队列的hash表 */mem_init();/* 最后内存初始化,释放前边标志为保留的所有页面 */kmem_cache_init();/* slab初始化 */numa_policy_init();/* ?????????????????????? */if (late_time_init)late_time_init();calibrate_delay();/* 计算BogoMIPS  */pidmap_init();/* 初始化pid位图 */pgtable_cache_init();/* pgd,pmd slab初始化 */prio_tree_init();/* 初始化index_bits_to_maxindex,For (struct page)->mapping->i_map*/anon_vma_init();/* anon_vma slab初始化,用于对rmap支持 */#ifdef CONFIG_X86if (efi_enabled)efi_enter_virtual_mode();#endiffork_init(num_physpages);/* 计算系统最大安全进程数,设置当前进程最大进程数 */proc_caches_init();/* 其他slab初始化 */buffer_init();/* buffer head初始化 */unnamed_dev_init();/* ?????what is idr????? */security_init();/* security 初始化 */vfs_caches_init(num_physpages);/* **vfs需要的cache初始化** */radix_tree_init();/* radix_tree初始化,该功能主要加速look up dirty or writeback pages */signals_init();/* 创建sigqueue slab *//* rootfs populating might need page-writeback */page_writeback_init();/* 计算当前系统vm-radio等,设置是否需要回写操作 */#ifdef CONFIG_PROC_FSproc_root_init();/* proc文件系统初始化,并根据配置建立相应的目录和文件 */#endifcheck_bugs();acpi_early_init(); /* before LAPIC and SMP init *//* Do the rest non-__init'ed, we're now alive */rest_init();/* 建立init进程 */}/* arch/i386/kernel/setup.c *//* * Determine if we were loaded by an EFI loader.  If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures * for initialization.  Note, the efi init code path is determined by the * global efi_enabled. This allows the same kernel image to be used on existing * systems (with a traditional BIOS) as well as on EFI systems. * 检测是否是通过EFI引导kernel.如果是,将通过efi导入memmap, systab等,因此用此数据 * 结构进行初始化。 * Note: efi初始化路径是在全觉efi_enabled决定的(是否配置efi_enable?)。 */void __init setup_arch(char **cmdline_p){unsigned long max_low_pfn;memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));pre_setup_arch_hook();/* 执行某些体系结构相关的hook程序, i386是空 */early_cpu_init();/* 设置获取的cpu信息 *//* * FIXME: This isn't an official loader_type right * now but does currently work with elilo. * If we were configured as an EFI kernel, check to make * sure that we were loaded correctly from elilo and that * the system table is valid.  If not, then initialize normally. */#ifdef CONFIG_EFIif ((LOADER_TYPE == 0x50) && EFI_SYSTAB)efi_enabled = 1;#endif/* 从setup中取得BIOS自检后取得的信息,复制到内核内存空间中(原来保存在一个临时页面中) */ ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); drive_info = DRIVE_INFO; screen_info = SCREEN_INFO;edid_info = EDID_INFO;apm_info.bios = APM_BIOS_INFO;ist_info = IST_INFO;saved_videomode = VIDEO_MODE;if( SYS_DESC_TABLE.length != 0 ) {MCA_bus = SYS_DESC_TABLE.table[3] &0x2;machine_id = SYS_DESC_TABLE.table[0];machine_submodel_id = SYS_DESC_TABLE.table[1];BIOS_revision = SYS_DESC_TABLE.table[2];}aux_device_present = AUX_DEVICE_INFO;#ifdef CONFIG_BLK_DEV_RAMrd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);#endifARCH_SETUP/* x86系列没有任何的动作 */if (efi_enabled)efi_init();else {printk(KERN_INFO "BIOS-provided physical RAM map:/n");print_memory_map(machine_specific_memory_setup()); /* 处理内存图,最后保存在e820中 */}copy_edd();/* 复制增强磁盘参数(来之setup自检信息),实验性质,CONFIG_EDD */if (!MOUNT_ROOT_RDONLY)root_mountflags &= ~MS_RDONLY;init_mm.start_code = (unsigned long) _text;init_mm.end_code = (unsigned long) _etext;init_mm.end_data = (unsigned long) _edata;init_mm.brk = init_pg_tables_end + PAGE_OFFSET;code_resource.start = virt_to_phys(_text);code_resource.end = virt_to_phys(_etext)-1;data_resource.start = virt_to_phys(_etext);data_resource.end = virt_to_phys(_edata)-1;parse_cmdline_early(cmdline_p);/* 分析引导时用户提供的启动参数(例如mem=xxx,acpi=xx,and so on) */max_low_pfn = setup_memory();/* 为页面映射作基础工作(生成map) *//* * NOTE: before this point _nobody_ is allowed to allocate到现在依然不可以用bootmem内存分配器来 * any memory using the bootmem allocator.  Although the分配内存,在执行paging_init()以前必须 * alloctor is now initialised only the first 8Mb of the kernel用alloc_bootmem_low_pages()来分配内存 * virtual address space has been mapped.  All allocations before * paging_init() has completed must use the alloc_bootmem_low_pages() * variant (which allocates DMA'able memory) and care must be taken * not to exceed the 8Mb limit. */#ifdef CONFIG_SMPsmp_alloc_memory(); /* AP processor realmode stacks in low memory 为启动smp其他cpu分配内存 */#endifpaging_init();/* 页面信息初始化 *//* * NOTE: at this point the bootmem allocator is fully available. */#ifdef CONFIG_EARLY_PRINTK{char *s = strstr(*cmdline_p, "earlyprintk=");if (s) {extern void setup_early_printk(char *);setup_early_printk(s);printk("early console enabled/n");}}#endifdmi_scan_machine(); /* DMI=Desktop Management Interface */#ifdef CONFIG_X86_GENERICARCHgeneric_apic_probe(*cmdline_p);/* 检测APIC(高级可编程中断器) */#endifif (efi_enabled)efi_map_memmap();/* * Parse the ACPI tables for possible boot-time SMP configuration. */acpi_boot_init();#ifdef CONFIG_X86_LOCAL_APICif (smp_found_config)get_smp_config();#endifregister_memory(max_low_pfn);/* 对系统I/O资源生成资源树 */#ifdef CONFIG_VT#if defined(CONFIG_VGA_CONSOLE)if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))conswitchp = &vga_con;#elif defined(CONFIG_DUMMY_CONSOLE)conswitchp = &dummy_con;#endif#endif}/* arch/i386/kernel/cpu/common.c */void __init early_cpu_init(void){/* 目前支持9中x386系列cpu,分别赋值给cpu_devs */intel_cpu_init();/* Intel CPU结构赋值 */cyrix_init_cpu();nsc_init_cpu();amd_init_cpu();centaur_init_cpu();transmeta_init_cpu();rise_init_cpu();nexgen_init_cpu();umc_init_cpu();early_cpu_detect();/* 检测cpu信息,并将检测得到信息给boot_cpu_data */#ifdef CONFIG_DEBUG_PAGEALLOC/* pse is not compatible with on-the-fly unmapping, * disable it even if the cpus claim to support it. */clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);disable_pse = 1;#endif}/* arch/i386/kernel/setup.c */static void __init print_memory_map(char *who){int i;for (i = 0; i < e820.nr_map; i++) {printk(" %s: %016Lx - %016Lx ", who,e820.map[i].addr,e820.map[i].addr + e820.map[i].size);switch (e820.map[i].type) {case E820_RAM:printk("(usable)/n");break;case E820_RESERVED:printk("(reserved)/n");break;case E820_ACPI:printk("(ACPI data)/n");break;case E820_NVS:printk("(ACPI NVS)/n");break;default:printk("type %lu/n", e820.map[i].type);break;}}}/* arch/i386/kernel/setup.c */static void __init parse_cmdline_early (char ** cmdline_p){char c = ' ', *to = command_line, *from = saved_command_line;int len = 0;int userdef = 0;/* Save unparsed command line copy for /proc/cmdline */saved_command_line[COMMAND_LINE_SIZE-1] = '/0';for (;;) {/* * "mem=nopentium" disables the 4MB page tables. * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM * to , overriding the bios size. * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from *  to +, overriding the bios size. * * HPA tells me bootloaders need to parse mem=, so no new * option should be mem=  [also see Documentation/i386/boot.txt] */if (c == ' ' && !memcmp(from, "mem=", 4)) {if (to != command_line)to--;if (!memcmp(from+4, "nopentium", 9)) {from += 9+4;clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);disable_pse = 1;} else {/* If the user specifies memory size, we * limit the BIOS-provided memory map to * that size. exactmap can be used to specify * the exact map. mem=number can be used to * trim the existing memory map. */unsigned long long mem_size; mem_size = memparse(from+4, &from);limit_regions(mem_size);userdef=1;}}if (c == ' ' && !memcmp(from, "memmap=", 7)) {if (to != command_line)to--;if (!memcmp(from+7, "exactmap", 8)) {from += 8+7;e820.nr_map = 0;userdef = 1;} else {/* If the user specifies memory size, we * limit the BIOS-provided memory map to * that size. exactmap can be used to specify * the exact map. mem=number can be used to * trim the existing memory map. */unsigned long long start_at, mem_size; mem_size = memparse(from+7, &from);if (*from == '@') {start_at = memparse(from+1, &from);add_memory_region(start_at, mem_size, E820_RAM);} else if (*from == '#') {start_at = memparse(from+1, &from);add_memory_region(start_at, mem_size, E820_ACPI);} else if (*from == '$') {start_at = memparse(from+1, &from);add_memory_region(start_at, mem_size, E820_RESERVED);} else {limit_regions(mem_size);userdef=1;}}}#ifdef  CONFIG_X86_SMP/* * If the BIOS enumerates physical processors before logical, * maxcpus=N at enumeration-time can be used to disable HT. */else if (!memcmp(from, "maxcpus=", 8)) {extern unsigned int maxcpus;maxcpus = simple_strtoul(from + 8, NULL, 0);}#endif#ifdef CONFIG_ACPI_BOOT/* "acpi=off" disables both ACPI table parsing and interpreter */else if (!memcmp(from, "acpi=off", 8)) {disable_acpi();}/* acpi=force to over-ride black-list */else if (!memcmp(from, "acpi=force", 10)) {acpi_force = 1;acpi_ht = 1;acpi_disabled = 0;}/* acpi=strict disables out-of-spec workarounds */else if (!memcmp(from, "acpi=strict", 11)) {acpi_strict = 1;}/* Limit ACPI just to boot-time to enable HT */else if (!memcmp(from, "acpi=ht", 7)) {if (!acpi_force)disable_acpi();acpi_ht = 1;}/* "pci=noacpi" disable ACPI IRQ routing and PCI scan */else if (!memcmp(from, "pci=noacpi", 10)) {acpi_disable_pci();}/* "acpi=noirq" disables ACPI interrupt routing */else if (!memcmp(from, "acpi=noirq", 10)) {acpi_noirq_set();}else if (!memcmp(from, "acpi_sci=edge", 13))acpi_sci_flags.trigger =  1;else if (!memcmp(from, "acpi_sci=level", 14))acpi_sci_flags.trigger = 3;else if (!memcmp(from, "acpi_sci=high", 13))acpi_sci_flags.polarity = 1;else if (!memcmp(from, "acpi_sci=low", 12))acpi_sci_flags.polarity = 3;#ifdef CONFIG_X86_IO_APICelse if (!memcmp(from, "acpi_skip_timer_override", 24))acpi_skip_timer_override = 1;#endif#ifdef CONFIG_X86_LOCAL_APIC/* disable IO-APIC */else if (!memcmp(from, "noapic", 6))disable_ioapic_setup();#endif /* CONFIG_X86_LOCAL_APIC */#endif /* CONFIG_ACPI_BOOT *//* * highmem=size forces highmem to be exactly 'size' bytes.使用用户定义的highmem大小 * This works even on boxes that have no highmem otherwise.即使配置内核没有选择此选项 * This also works to reduce highmem size on bigger boxes.如果选择此选项也可能减少hignmem大小 */if (c == ' ' && !memcmp(from, "highmem=", 8))highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;/* * vmalloc=size forces the vmalloc area to be exactly 'size' * bytes. This can be used to increase (or decrease) the * vmalloc area - the default is 128m. 用户指定vmalloc大小代替缺省128m */if (c == ' ' && !memcmp(from, "vmalloc=", 8))__VMALLOC_RESERVE = memparse(from+8, &from);c = *(from++);if (!c)break;if (COMMAND_LINE_SIZE <= ++len)break;*(to++) = c;}*to = '/0';*cmdline_p = command_line;if (userdef) {printk(KERN_INFO "user-defined physical RAM map:/n");print_memory_map("user");}}static unsigned long __init setup_memory(void){unsigned long bootmap_size, start_pfn, max_low_pfn;/* * partially used pages are not usable - thus * we are rounding upwards: */start_pfn = PFN_UP(init_pg_tables_end);find_max_pfn();max_low_pfn = find_max_low_pfn();#ifdef CONFIG_HIGHMEMhighstart_pfn = highend_pfn = max_pfn;if (max_pfn > max_low_pfn) {highstart_pfn = max_low_pfn;}printk(KERN_NOTICE "%ldMB HIGHMEM available./n",pages_to_mb(highend_pfn - highstart_pfn));#endifprintk(KERN_NOTICE "%ldMB LOWMEM available./n",pages_to_mb(max_low_pfn));/* * Initialize the boot-time allocator (with low memory only): */bootmap_size = init_bootmem(start_pfn, max_low_pfn);/* 设置此区间页面为保留,好像结果在node_data[0]->bdata */register_bootmem_low_pages(max_low_pfn);/* 设置所有可以使用内存页面位图 *//* * Reserve the bootmem bitmap itself as well. We do this in two * steps (first step was init_bootmem()) because this catches * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));/* 保留内核在内存中的映像 *//* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */reserve_bootmem(0, PAGE_SIZE);/* 保留物理页面0, 主要是和启动有关的信息以及bios信息 *//* reserve EBDA region, it's a 4K region */reserve_ebda_region();    /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent       PCI prefetch into it (errata #56). Usually the page is reserved anyways,       unless you have no PS/2 mouse plugged in. */if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&    boot_cpu_data.x86 == 6)     reserve_bootmem(0xa0000 - 4096, 4096);#ifdef CONFIG_SMP/* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */reserve_bootmem(PAGE_SIZE, PAGE_SIZE);/* 在SMP系统中需要使用 */#endif#ifdef CONFIG_ACPI_SLEEP/* * Reserve low memory region for sleep support. */acpi_reserve_bootmem();#endif#ifdef CONFIG_X86_FIND_SMP_CONFIG/* * Find and reserve possible boot-time SMP configuration: */find_smp_config();#endif#ifdef CONFIG_BLK_DEV_INITRDif (LOADER_TYPE && INITRD_START) {if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {reserve_bootmem(INITRD_START, INITRD_SIZE);initrd_start =INITRD_START ? INITRD_START + PAGE_OFFSET : 0;initrd_end = initrd_start+INITRD_SIZE;}else {printk(KERN_ERR "initrd extends beyond end of memory "    "(0x%08lx > 0x%08lx)/ndisabling initrd/n",    INITRD_START + INITRD_SIZE,    max_low_pfn << PAGE_SHIFT);initrd_start = 0;}}#endifreturn max_low_pfn;}/* arch/i386/mm/init.c *//* * paging_init() sets up the page tables - note that the first 8MB are * already mapped by head.S. * 已经有8MB内存在head.S中映射完成 * This routines also unmaps the page at virtual kernel address 0, so * that we can trap those pesky NULL-reference errors in the kernel. */void __init paging_init(void){#ifdef CONFIG_X86_PAEset_nx();if (nx_enabled)printk("NX (Execute Disable) protection: active/n");#endifpagetable_init();/* 修改系统空间页面表信息,原来在系统setup时已经设置好,但都是空的 */load_cr3(swapper_pg_dir);#ifdef CONFIG_X86_PAE/* * We will bail out later - printk doesn't work right now so * the user would just see a hanging kernel. */if (cpu_has_pae)set_in_cr4(X86_CR4_PAE);#endif__flush_tlb_all();/* 刷新mmu */kmap_init();/* highmem使用内存设定 */zone_sizes_init(); /* 内存初始化 pgdat_list->zone */}static void __init pagetable_init (void){unsigned long vaddr;pgd_t *pgd_base = swapper_pg_dir;#ifdef CONFIG_X86_PAE/* 用三级页面映射表(Physical Address Extension) */int i;/* Init entries of the first-level page table to the zero page */for (i = 0; i < PTRS_PER_PGD; i++)/* PTRS_PER_PGD=4 */set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));#endif/* Enable PSE if available(Page Size Extensions)4MB页面表 */if (cpu_has_pse) {set_in_cr4(X86_CR4_PSE);}/* Enable PGE if available (PTE Global Bit)*/if (cpu_has_pge) {set_in_cr4(X86_CR4_PGE);__PAGE_KERNEL |= _PAGE_GLOBAL;__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;}kernel_physical_mapping_init(pgd_base);/* 系统空间映射(0xC0000000..=>0-max_low_pfn) */remap_numa_kva(); /* 重新初始化numa的内核虚拟地址空间???? *//* * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): 固定使用的地址 */vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; /* 在内存高地址区域 */page_table_range_init(vaddr, 0, pgd_base); /* 页面表固定地址初始化,包括acpi地址等 */permanent_kmaps_init(pgd_base);  /* 固定地址初始化(pkmap),此地址干什么用?????是不是用作highmem分配使用 */#ifdef CONFIG_X86_PAE/* * Add low memory identity-mappings - SMP needs it when * starting up on an AP from real-mode. In the non-PAE * case we already have these mappings through head.S. * All user-space mappings are explicitly cleared after * SMP startup. */pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];#endif}/* * This maps the physical memory to kernel virtual address space, a total  * of max_low_pfn pages, by creating page tables starting from address  * PAGE_OFFSET.(映射物理内存到系统空间虚拟地址,共max_low_pfn页面,从0xc0000000地址开始) */static void __init kernel_physical_mapping_init(pgd_t *pgd_base){unsigned long pfn;pgd_t *pgd;pmd_t *pmd;pte_t *pte;int pgd_idx, pmd_idx, pte_ofs;pgd_idx = pgd_index(PAGE_OFFSET);/* 映射开始地址是系统空间 */pgd = pgd_base + pgd_idx;pfn = 0;for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {pmd = one_md_table_init(pgd);/* 初始化二级目录表 */if (pfn >= max_low_pfn)continue;for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;/* Map with big pages if possible, otherwise create normal page tables. */if (cpu_has_pse) {/* 4MB页面表初始化,如果用此,将没有第三级页面 */unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;if (is_kernel_text(address) || is_kernel_text(address2))set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));elseset_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));pfn += PTRS_PER_PTE;} else {pte = one_page_table_init(pmd);for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {if (is_kernel_text(address))set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));elseset_pte(pte, pfn_pte(pfn, PAGE_KERNEL));}}}}}void __init sched_init(void){runqueue_t *rq;int i, j, k;/* 初始化每个cpu运行队列 */for (i = 0; i < NR_CPUS; i++) {prio_array_t *array;rq = cpu_rq(i);spin_lock_init(&rq->lock);rq->active = rq->arrays; /* 活动队列 */rq->expired = rq->arrays + 1; /* 过期队列 */rq->best_expired_prio = MAX_PRIO; /* 优先级最低 */#ifdef CONFIG_SMPrq->sd = &sched_domain_dummy;rq->cpu_load = 0; /* cpu负载 */rq->active_balance = 0;/* ???? */rq->push_cpu = 0;/* ???? */rq->migration_thread = NULL;INIT_LIST_HEAD(&rq->migration_queue);#endifatomic_set(&rq->nr_iowait, 0);for (j = 0; j < 2; j++) {array = rq->arrays + j;for (k = 0; k < MAX_PRIO; k++) {INIT_LIST_HEAD(array->queue + k);__clear_bit(k, array->bitmap);}// delimiter for bitsearch__set_bit(MAX_PRIO, array->bitmap);}}/* * The boot idle thread does lazy MMU switching as well: */atomic_inc(&init_mm.mm_count);enter_lazy_tlb(&init_mm, current);/* * Make us the idle thread. Technically, schedule() should not be * called from this thread, however somewhere below it might be, * but because we are the idle thread, we just pick up running again * when this runqueue becomes "idle". */init_idle(current, smp_processor_id()); /* 设置idel进程,并将runqueue中curr指向该进程 */}void __init trap_init(void)/* 中断向量重新设置(在初始化时设置指向ignore_int) */{#ifdef CONFIG_EISAif (isa_readl(0x0FFFD9) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {EISA_bus = 1;}#endif#ifdef CONFIG_X86_LOCAL_APICinit_apic_mappings();#endifset_trap_gate(0,÷_error);/* 陷阱门设置 */set_intr_gate(1,&debug);/* 中断门设置 */set_intr_gate(2,&nmi);set_system_intr_gate(3, &int3); /* int3-5 can be called from all */set_system_gate(4,&overflow);set_system_gate(5,&bounds);set_trap_gate(6,&invalid_op);set_trap_gate(7,&device_not_available);set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);set_trap_gate(9,&coprocessor_segment_overrun);set_trap_gate(10,&invalid_TSS);set_trap_gate(11,&segment_not_present);set_trap_gate(12,&stack_segment);set_trap_gate(13,&general_protection);set_intr_gate(14,&page_fault);set_trap_gate(15,&spurious_interrupt_bug);set_trap_gate(16,&coprocessor_error);set_trap_gate(17,&alignment_check);#ifdef CONFIG_X86_MCEset_trap_gate(18,&machine_check);#endifset_trap_gate(19,&simd_coprocessor_error);set_system_gate(SYSCALL_VECTOR,&system_call);/* 系统调用中断设置 *//* * Should be a barrier for any external CPU state. */cpu_init();/* 重新装入gdt,ldt */trap_init_hook(); /* do nothing on i386 */}void __init init_IRQ(void){int i;/* all the set up before the call gates are initialised */pre_intr_init_hook();/* 中断请求队列初始化 *//* * Cover the whole vector space, no vector can escape设置中断向量 * us. (some of these will be overridden and become * 'special' SMP interrupts) */for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {int vector = FIRST_EXTERNAL_VECTOR + i;if (i >= NR_IRQS)break;if (vector != SYSCALL_VECTOR) set_intr_gate(vector, interrupt[i]);}/* setup after call gates are initialised (usually add in * the architecture specific gates) 在系统调用初始化完毕后特殊设置,和结构相关  */intr_init_hook();/* * Set the clock to HZ Hz, we already have a valid * vector now: 设置时钟hz */setup_pit_timer();/* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. */if (boot_cpu_data.hard_math && !cpu_has_fpu)setup_irq(FPU_IRQ, &fpu_irq);irq_ctx_init(smp_processor_id());}void __init mem_init(void){extern int ppro_with_ram_bug(void);/* 检测pentium是否是有bug的cpu */int codesize, reservedpages, datasize, initsize;int tmp;int bad_ppro;#ifndef CONFIG_DISCONTIGMEMif (!mem_map)BUG();#endifbad_ppro = ppro_with_ram_bug();#ifdef CONFIG_HIGHMEM/* check that fixmap and pkmap do not overlap 确认fixmap和pkmap没有重叠 */if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {printk(KERN_ERR "fixmap and kmap areas overlap - this will crash/n");printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh/n",PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);BUG();}#endif set_max_mapnr_init();/* 设置highmem区域 */#ifdef CONFIG_HIGHMEMhigh_memory = (void *) __va(highstart_pfn * PAGE_SIZE);#elsehigh_memory = (void *) __va(max_low_pfn * PAGE_SIZE);#endif/* this will put all low memory onto the freelists,根据页面位图释放内存中所有可供动态分配的页面 */totalram_pages += __free_all_bootmem();reservedpages = 0;for (tmp = 0; tmp < max_low_pfn; tmp++)/* * Only count reserved RAM pages */if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))reservedpages++;set_highmem_pages_init(bad_ppro);codesize =  (unsigned long) &_etext - (unsigned long) &_text;datasize =  (unsigned long) &_edata - (unsigned long) &_etext;initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); /* 初始化kcore_mem,应该是实际内存? */kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,    VMALLOC_END-VMALLOC_START);/* 虚拟内存初始化 */printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)/n",(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),num_physpages << (PAGE_SHIFT-10),codesize >> 10,reservedpages << (PAGE_SHIFT-10),datasize >> 10,initsize >> 10,(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))       );#ifdef CONFIG_X86_PAEif (!cpu_has_pae)panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");#endifif (boot_cpu_data.wp_works_ok < 0)test_wp_bit();/* * Subtle. SMP is doing it's boot stuff late (because it has to * fork idle threads) - but it also needs low mappings for the * protected-mode entry to work. We zap these entries only after * the WP-bit has been tested. */#ifndef CONFIG_SMPzap_low_mappings();#endif}/* Initialisation. * Called after the gfp() functions have been enabled, and before smp_init(). */void __init kmem_cache_init(void){size_t left_over;struct cache_sizes *sizes;struct cache_names *names;/* * Fragmentation(分裂) resistance(阻力) on low memory - only use bigger * page orders on machines with more than 32MB of memory. */if (num_physpages > (32 << 20) >> PAGE_SHIFT)/* 系统有多于32MB内存 */slab_break_gfp_order = BREAK_GFP_ORDER_HI;/* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: * 1) initialize the cache_cache cache: it contains the kmem_cache_t *    structures of all caches, except cache_cache itself: cache_cache *    is statically allocated. *    Initially an __init data area is used for the head array, it's *    replaced with a kmalloc allocated array at the end of the bootstrap. * 2) Create the first kmalloc cache. *    The kmem_cache_t for the new cache is allocated normally. An __init *    data area is used for the head array. * 3) Create the remaining kmalloc caches, with minimally sized head arrays. * 4) Replace the __init data head arrays for cache_cache and the first *    kmalloc cache with kmalloc allocated arrays. * 5) Resize the head arrays of the kmalloc caches to their final sizes. *//* 1) create the cache_cache */init_MUTEX(&cache_chain_sem);/* 初始化cache链表信号量 */INIT_LIST_HEAD(&cache_chain);/* 初始化cache链表 */list_add(&cache_cache.next, &cache_chain);/* 是不是把自己加入到队列头???? */cache_cache.colour_off = cache_line_size();/* 128 */cache_cache.array[smp_processor_id()] = &initarray_cache.cache;cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());cache_estimate(0, cache_cache.objsize, cache_line_size(), 0,&left_over, &cache_cache.num);if (!cache_cache.num)BUG();cache_cache.colour = left_over/cache_cache.colour_off;cache_cache.colour_next = 0;cache_cache.slab_size = ALIGN(cache_cache.num*sizeof(kmem_bufctl_t) +sizeof(struct slab), cache_line_size());/* 2+3) create the kmalloc caches */sizes = malloc_sizes;names = cache_names;while (sizes->cs_size) {/* For performance, all the general caches are L1 aligned. * This should be particularly beneficial on SMP boxes, as it * eliminates "false sharing". * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */sizes->cs_cachep = kmem_cache_create(names->name,sizes->cs_size, ARCH_KMALLOC_MINALIGN,(ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);/* Inc off-slab bufctl limit until the ceiling is hit. */if (!(OFF_SLAB(sizes->cs_cachep))) {offslab_limit = sizes->cs_size-sizeof(struct slab);offslab_limit /= sizeof(kmem_bufctl_t);}sizes->cs_dmacachep = kmem_cache_create(names->name_dma,sizes->cs_size, ARCH_KMALLOC_MINALIGN,(ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),NULL, NULL);sizes++;names++;}/* 4) Replace the bootstrap head arrays */{void * ptr;ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);local_irq_disable();BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache);memcpy(ptr, ac_data(&cache_cache), sizeof(struct arraycache_init));cache_cache.array[smp_processor_id()] = ptr;local_irq_enable();ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);local_irq_disable();BUG_ON(ac_data(malloc_sizes[0].cs_cachep) != &initarray_generic.cache);memcpy(ptr, ac_data(malloc_sizes[0].cs_cachep),sizeof(struct arraycache_init));malloc_sizes[0].cs_cachep->array[smp_processor_id()] = ptr;local_irq_enable();}/* 5) resize the head arrays to their final sizes */{kmem_cache_t *cachep;down(&cache_chain_sem);list_for_each_entry(cachep, &cache_chain, next)enable_cpucache(cachep);/* 激活cpu缓存 */up(&cache_chain_sem);}/* Done! */g_cpucache_up = FULL;/* Register a cpu startup notifier callback * that initializes ac_data for all new cpus */register_cpu_notifier(&cpucache_notifier);/* The reap timers are started later, with a module init call: * That part of the kernel is not yet operational. */}void __init pidmap_init(void){int i;pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);set_bit(0, pidmap_array->page);atomic_dec(&pidmap_array->nr_free);/* * Allocate PID 0, and hash it via all PID types: */for (i = 0; i < PIDTYPE_MAX; i++)/* 将当前进程加入到hash表中.pid,pgid,tgid,sid */attach_pid(current, i, 0);}/* * We need to finalize in a non-__init function or else race conditions * between the root thread and the init thread may cause start_kernel to * be reaped by free_initmem before the root thread has proceeded to * cpu_idle. * * gcc-3.4 accidentally inlines this function, so use noinline. */static void noinline rest_init(void)__releases(kernel_lock){kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND); /* 启动init内核进程 */numa_default_policy();unlock_kernel(); cpu_idle();} static int init(void * unused){lock_kernel();/* * Tell the world that we're going to be the grim * reaper of innocent orphaned children. 所有进程的父进程 * * We don't want people to have to make incorrect * assumptions about where in the task array this * can be found. */child_reaper = current;/* Sets up cpus_possible() */smp_prepare_cpus(max_cpus); /*主cpu会依次启动各个从cpu。见smp_boot_cpus->do_boot_cpu()*/do_pre_smp_initcalls();/* 启动migration_thread,ksoftirqd等CPU进程 */fixup_cpu_present_map();smp_init();/* 主要设置APIC */sched_init_smp();/* * Do this before initcalls, because some drivers want to access * firmware files. */populate_rootfs();/* 生成initrd文件 */do_basic_setup();/* * check if there is an early userspace init.  If yes, let it do all * the work */if (sys_access((const char __user *) "/init", 0) == 0)execute_command = "/init";elseprepare_namespace();/* 装载initrd,安装模块,mount根文件系统 *//* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */free_initmem();unlock_kernel();system_state = SYSTEM_RUNNING;numa_default_policy();if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)printk("Warning: unable to open an initial console./n");(void) sys_dup(0);(void) sys_dup(0);/* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are  * trying to recover a really broken machine. */if (execute_command)run_init_process(execute_command);run_init_process("/sbin/init");run_init_process("/etc/init");run_init_process("/bin/init");run_init_process("/bin/sh");panic("No init found.  Try passing init= option to kernel.");}
原创粉丝点击