smp boot up sequence

来源:互联网 发布:英雄联盟观战软件 编辑:程序博客网 时间:2024/05/17 12:53

8. SMP Boot

There are a few SMP related macros, like CONFIG_SMP, CONFIG_X86_LOCAL_APIC, CONFIG_X86_IO_APIC, CONFIG_MULTIQUAD and CONFIG_VISWS. I will ignore code that requiresCONFIG_MULTIQUAD or CONFIG_VISWS, which most people don't care (if not using IBM high-end multiprocessor server or SGI Visual Workstation).

BSP executes start_kernel() -> smp_init() -> smp_boot_cpus() -> do_boot_cpu() -> wakeup_secondary_via_INIT() to trigger APs. Check  MultiProcessor Specification and IA-32 Manual Vol.3 (Ch.7. Multile-Processor Management, and Ch.8. Advanced Programmable Interrupt Controller) for technical details.

8.1. Before smp_init()

Before calling smp_init()start_kernel() did something to setup SMP environment:

start_kernel()|-- setup_arch()|   |-- parse_cmdline_early();  // SMP looks for "noht" and "acpismp=force"|   |   `-- /* "noht" disables HyperThreading (2 logical cpus per Xeon) */|   |       if (!memcmp(from, "noht", 4)) {|   |           disable_x86_ht = 1;|   |           set_bit(X86_FEATURE_HT, disabled_x86_caps);|   |       }|   |       /* "acpismp=force" forces parsing and use of the ACPI SMP table */|   |       else if (!memcmp(from, "acpismp=force", 13))|   |           enable_acpi_smp_table = 1;|   |-- setup_memory();         // reserve memory for MP configuration table|   |   |-- reserve_bootmem(PAGE_SIZE, PAGE_SIZE);|   |   `-- find_smp_config();|   |       `-- find_intel_smp();|   |           `-- smp_scan_config();|   |               |-- set flag smp_found_config|   |               |-- set MP floating pointer mpf_found|   |               `-- reserve_bootmem(mpf_found, PAGE_SIZE);|   |-- if (disable_x86_ht) {   // if HyperThreading feature disabled|   |       clear_bit(X86_FEATURE_HT, &boot_cpu_data.x86_capability[0]);|   |       set_bit(X86_FEATURE_HT, disabled_x86_caps);|   |       enable_acpi_smp_table = 0;|   |   }|   |-- if (test_bit(X86_FEATURE_HT, &boot_cpu_data.x86_capability[0]))|   |       enable_acpi_smp_table = 1;|   |-- smp_alloc_memory();|   |   `-- /* reserve AP processor's real-mode code space in low memory */|   |       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);|   `-- get_smp_config();     /* get boot-time MP configuration */|       |-- config_acpi_tables();|       |   |-- memset(&acpi_boot_ops, 0, sizeof(acpi_boot_ops));|       |   |-- acpi_boot_ops[ACPI_APIC] = acpi_parse_madt;|       |   `-- /* Set have_acpi_tables to indicate using|       |        * MADT in the ACPI tables; Use MPS tables if failed. */|       |       if (enable_acpi_smp_table && !acpi_tables_init())|       |           have_acpi_tables = 1;|       |-- set pic_mode|       |   /* =1, if the IMCR is present and PIC Mode is implemented;|       |    * =0, otherwise Virtual Wire Mode is implemented. */|       |-- save local APIC address in mp_lapic_addr|       `-- scan for MP configuration table entries, like|             MP_PROCESSOR, MP_BUS, MP_IOAPIC, MP_INTSRC and MP_LINTSRC.|-- trap_init();|   `-- init_apic_mappings();   // setup PTE for APIC|       |-- /* If no local APIC can be found then set up a fake all|       |    * zeroes page to simulate the local APIC and another|       |    * one for the IO-APIC. */|       |   if (!smp_found_config && detect_init_APIC()) {|       |       apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);|       |       apic_phys = __pa(apic_phys);|       |   } else|       |       apic_phys = mp_lapic_addr;|       |-- /* map local APIC address,|       |    *   mp_lapic_addr (0xfee00000) in most case,|       |    *   to linear address FIXADDR_TOP (0xffffe000) */|       |   set_fixmap_nocache(FIX_APIC_BASE, apic_phys);|       |-- /* Fetch the APIC ID of the BSP in case we have a|       |    * default configuration (or the MP table is broken). */|       |   if (boot_cpu_physical_apicid == -1U)|       |       boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));|       `-- // map IOAPIC address to uncacheable linear address|           set_fixmap_nocache(idx, ioapic_phys);|       // Now we can use linear address to access APIC space.|-- init_IRQ();|   |-- init_ISA_irqs();|   |   |-- /* An initial setup of the virtual wire mode. */|   |   |   init_bsp_APIC();|   |   `-- init_8259A(auto_eoi=0);|   `-- setup SMP/APIC interrupt handlers, esp. IPI.`-- mem_init();    `-- /* delay zapping low mapping entries for SMP: zap_low_mappings() */

IPI (InterProcessor Interrupt), CPU-to-CPU interrupt through local APIC, is the mechanism used by BSP to trigger APs.

Be aware that "one local APIC per CPU is required" in an MP-compliant system. Processors do not share APIC local units address space (physical address 0xFEE00000 - 0xFEEFFFFF), but will share APIC I/O units (0xFEC00000 - 0xFECFFFFF). Both address spaces are uncacheable.

8.2. smp_init()

BSP calls start_kernel() -> smp_init() -> smp_boot_cpus() to setup data structures for each CPU and activate the rest APs.

///////////////////////////////////////////////////////////////////////////////static void __init smp_init(void){        /* Get other processors into their bootup holding patterns. */        smp_boot_cpus();        wait_init_idle = cpu_online_map;        clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */        smp_threads_ready=1;        smp_commence() {                /* Lets the callins below out of their loop. */                Dprintk("Setting commenced=1, go go go\n");                wmb();                atomic_set(&smp_commenced,1);        }        /* Wait for the other cpus to set up their idle processes */        printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle);        while (wait_init_idle) {                cpu_relax();    // i.e. "rep;nop"                barrier();        }        printk("All processors have done init_idle\n");}///////////////////////////////////////////////////////////////////////////////void __init smp_boot_cpus(void){        // ... something not very interesting :-)        /* Initialize the logical to physical CPU number mapping         * and the per-CPU profiling router/multiplier */        prof_counter[0..NR_CPUS-1] = 0;        prof_old_multiplier[0..NR_CPUS-1] = 0;        prof_multiplier[0..NR_CPUS-1] = 0;        init_cpu_to_apicid() {                physical_apicid_2_cpu[0..MAX_APICID-1] = -1;                logical_apicid_2_cpu[0..MAX_APICID-1] = -1;                cpu_2_physical_apicid[0..NR_CPUS-1] = 0;                cpu_2_logical_apicid[0..NR_CPUS-1] = 0;        }        /* Setup boot CPU information */        smp_store_cpu_info(0); /* Final full version of the data */        printk("CPU%d: ", 0);        print_cpu_info(&cpu_data[0]);        /* We have the boot CPU online for sure. */        set_bit(0, &cpu_online_map);        boot_cpu_logical_apicid = logical_smp_processor_id() {                GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));        }        map_cpu_to_boot_apicid(0, boot_cpu_apicid) {               physical_apicid_2_cpu[boot_cpu_apicid] = 0;               cpu_2_physical_apicid[0] = boot_cpu_apicid;        }        global_irq_holder = 0;        current->processor = 0;        init_idle();    // will clear corresponding bit in wait_init_idle        smp_tune_scheduling();        // ... some conditions checked        connect_bsp_APIC();     // enable APIC mode if used to be PIC mode        setup_local_APIC();        if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)                BUG();        /* Scan the CPU present map and fire up the other CPUs         *   via do_boot_cpu() */        Dprintk("CPU present map: %lx\n", phys_cpu_present_map);        for (bit = 0; bit < NR_CPUS; bit++) {                apicid = cpu_present_to_apicid(bit);                /* Don't even attempt to start the boot CPU! */                if (apicid == boot_cpu_apicid)                        continue;                if (!(phys_cpu_present_map & (1 << bit)))                        continue;                if ((max_cpus >= 0) && (max_cpus <= cpucount+1))                        continue;                do_boot_cpu(apicid);                /* Make sure we unmap all failed CPUs */                if ((boot_apicid_to_cpu(apicid) == -1) &&                                (phys_cpu_present_map & (1 << bit)))                        printk("CPU #%d not responding - cannot use it.\n",                                                                apicid);        }        // ... SMP BogoMIPS        // ... B stepping processor warning        // ... HyperThreading handling        /* Set up all local APIC timers in the system */        setup_APIC_clocks();        /* Synchronize the TSC with the AP */        if (cpu_has_tsc && cpucount)                synchronize_tsc_bp();smp_done:        zap_low_mappings();}///////////////////////////////////////////////////////////////////////////////static void __init do_boot_cpu (int apicid){        cpu = ++cpucount;        // 1. prepare "idle process" task struct for next AP        /* We can't use kernel_thread since we must avoid to         * reschedule the child. */        if (fork_by_hand() < 0)                panic("failed fork for CPU %d", cpu);        /* We remove it from the pidhash and the runqueue         * once we got the process: */        idle = init_task.prev_task;        if (!idle)                panic("No idle process for CPU %d", cpu);        /* we schedule the first task manually */        idle->processor = cpu;        idle->cpus_runnable = 1 << cpu; // only on this AP!        map_cpu_to_boot_apicid(cpu, apicid) {                physical_apicid_2_cpu[apicid] = cpu;                cpu_2_physical_apicid[cpu] = apicid;        }        idle->thread.eip = (unsigned long) start_secondary;        del_from_runqueue(idle);        unhash_process(idle);        init_tasks[cpu] = idle;        // 2. prepare stack and code (CS:IP) for next AP        /* start_eip had better be page-aligned! */        start_eip = setup_trampoline() {                memcpy(trampoline_base, trampoline_data,                        trampoline_end - trampoline_data);                /* trampoline_base was reserved in                 * start_kernel() -> setup_arch() -> smp_alloc_memory(),                 * and will be shared by all APs (one by one) */                return virt_to_phys(trampoline_base);        }        /* So we see what's up */        printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);        stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);        /* this value is used by next AP when it executes         *   "lss stack_start,%esp" in         *   linux/arch/i386/kernel/head.S:startup_32(). */        /* This grunge runs the startup process for         * the targeted processor. */        atomic_set(&init_deasserted, 0);        Dprintk("Setting warm reset code and vector.\n");        CMOS_WRITE(0xa, 0xf);        local_flush_tlb();        Dprintk("1.\n");        *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;        Dprintk("2.\n");        *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;        Dprintk("3.\n");        // we have setup 0:467 to start_eip (trampoline_base)        // 3. kick AP to run (AP gets CS:IP from 0:467)        // Starting actual IPI sequence...        boot_error = wakeup_secondary_via_INIT(apicid, start_eip);        if (!boot_error) {      // looks OK                /* allow APs to start initializing. */                set_bit(cpu, &cpu_callout_map);                /* ... Wait 5s total for a response */                // bit cpu in cpu_callin_map is set by AP in smp_callin()                if (test_bit(cpu, &cpu_callin_map)) {                        print_cpu_info(&cpu_data[cpu]);                } else {                        boot_error= 1;                        // marker 0xA5 set by AP in trampoline_data()                        if (*((volatile unsigned char *)phys_to_virt(8192))                                        == 0xA5)                                /* trampoline started but... */                                printk("Stuck ??\n");                        else                                /* trampoline code not run */                                printk("Not responding.\n");                }        }        if (boot_error) {                /* Try to put things back the way they were before ... */                unmap_cpu_to_boot_apicid(cpu, apicid);                clear_bit(cpu, &cpu_callout_map); /* set in do_boot_cpu() */                clear_bit(cpu, &cpu_initialized); /* set in cpu_init() */                clear_bit(cpu, &cpu_online_map);  /* set in smp_callin() */                cpucount--;        }        /* mark "stuck" area as not stuck */        *((volatile unsigned long *)phys_to_virt(8192)) = 0;}

Don't confuse start_secondary() with trampoline_data(). The former is AP "idle" process task struct EIP value, and the latter is the real-mode code that AP runs after BSP kicks it (usingwakeup_secondary_via_INIT()).

8.3. linux/arch/i386/kernel/trampoline.S

This file contains the 16-bit real-mode AP startup code. BSP reserved memory space trampoline_base in start_kernel() -> setup_arch() -> smp_alloc_memory(). Before BSP triggers AP, it copies the trampoline code, between trampoline_data and trampoline_end, to trampoline_base (in do_boot_cpu() -> setup_trampoline()). BSP sets up 0:467 to point to trampoline_base, so that AP will run from here.

///////////////////////////////////////////////////////////////////////////////trampoline_data(){r_base:        wbinvd;         // Needed for NUMA-Q should be harmless for other        DS = CS;        BX = 1;         // Flag an SMP trampoline        cli;        // write marker for master knows we're running        trampoline_base = 0xA5A5A5A5;        lidt idt_48;        lgdt gdt_48;        AX = 1;        lmsw AX;        // protected mode!        goto flush_instr;flush_instr:        goto CS:100000; // see linux/arch/i386/kernel/head.S:startup_32()}idt_48:        .word   0                       # idt limit = 0        .word   0, 0                    # idt base = 0Lgdt_48:        .word   0x0800                  # gdt limit = 2048, 256 GDT entries        .long   gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU).globl SYMBOL_NAME(trampoline_end)SYMBOL_NAME_LABEL(trampoline_end)

Note that BX=1 when AP jumps to linux/arch/i386/kernel/head.S:startup_32(), which is different from that of BSP (BX=0). See Section 6.

8.4. initialize_secondary()

Unlike BSP, at the end of linux/arch/i386/kernel/head.S:startup_32() in Section 6.4, AP will call initialize_secondary() instead of start_kernel().

/* Everything has been set up for the secondary * CPUs - they just need to reload everything * from the task structure * This function must not return. */void __init initialize_secondary(void){        /* We don't actually need to load the full TSS,         * basically just the stack pointer and the eip. */        asm volatile(                "movl %0,%%esp\n\t"                "jmp *%1"                :                :"r" (current->thread.esp),"r" (current->thread.eip));}

As BSP called do_boot_cpu() to set thread.eip to start_secondary(), control of AP is passed to this function. AP uses a new stack frame, which was set up by BSP in do_boot_cpu() -> fork_by_hand() -> do_fork().

8.5. start_secondary()

All APs wait for signal smp_commenced from BSP, triggered in Section 8.2 smp_init() -> smp_commence(). After getting this signal, they will run "idle" processes.

///////////////////////////////////////////////////////////////////////////////int __init start_secondary(void *unused){        /* Dont put anything before smp_callin(), SMP         * booting is too fragile that we want to limit the         * things done here to the most necessary things. */        cpu_init();        smp_callin();        while (!atomic_read(&smp_commenced))                rep_nop();        /* low-memory mappings have been cleared, flush them from         * the local TLBs too. */        local_flush_tlb();        return cpu_idle();      // never return, see Section 7.3}

cpu_idle() -> init_idle() will clear corresponding bit in wait_init_idle, and finally make BSP finish smp_init() and continue with the following function in start_kernel() (i.e. rest_init()).




from:

http://tldp.org/HOWTO/Linux-i386-Boot-Code-HOWTO/smpboot.html

0 0