FreeBSD 基礎建設 - Context Switch

来源:互联网 发布:米表cms 编辑:程序博客网 时间:2024/05/17 02:14
FreeBSD 基礎建設 - Context Switch
by thinker
2 Columns
關鍵字:
FreeBSD

Context Switch

在 multi-tasking 的作業系統中,kernel 不斷的切換 CPU 於各個工作間。FreeBSD kernel 以 thread 為工作的單位,透過不斷的切換,達成多工的目的。切換 thread 的過程,可以切割成三個步驟:

  1. 選擇下一個 thread
  2. 保存目前 thread 的狀態
  3. 切換到新的 thread
這裡不討論如何選擇下一個 thread,只討論保存和切換 thread 的方法。

下面是 FreeBSD kernel 實作 context switch 的三個重要 function:

  • mi_switch(int flags, struct thread *newtd) /* in kern_synch.c */
  • void sched_switch(struct thread *td, struct thread *newtd, int flags) /* in sched_*.c */
  • void cpu_switch(struct thread *old, struct thread *new) /* in sys/proc.h , i386/i386/swtch.s */

cpu_switch()

cpu_switch() 將 CPU 狀態儲存在 thread object 的 PCB 裡,並從 new thread 的 PCB 裡,將 CPU 狀態回復,以切換成 new thread 。由於 CPU 的不同,CPU 的狀態內容也不同,下面是 i386 定義的 PCB ,儲存 CPU 的 register 內容和 gs 和 fs 的 segment descriptor 。PCB 內容,於 kernel 其它部分而言,是一個黑盒子,不知其內容,只是將之視為一塊存放 CPU 狀態的記憶體。

struct pcb {                                                                    
int pcb_cr3;
int pcb_edi;
int pcb_esi;
int pcb_ebp;
int pcb_esp;
int pcb_ebx;
int pcb_eip;

int pcb_dr0;
int pcb_dr1;
int pcb_dr2;
int pcb_dr3;
int pcb_dr6;
int pcb_dr7;

union savefpu pcb_save;
u_int pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
#define PCB_NPXTRAP 0x04 /* npx trap pending */
#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */
#define PCB_VM86CALL 0x10 /* in vm86 call */

caddr_t pcb_onfault; /* copyin/out fault recovery */
int pcb_gs;
struct segment_descriptor pcb_fsd;
struct segment_descriptor pcb_gsd;
struct pcb_ext *pcb_ext; /* optional pcb extension */
int pcb_psl; /* process status long */
u_long pcb_vm86[2]; /* vm86bios scratch space */
};

下面程式碼,是取自 i386/i386/swtch.s ,儲存 CPU 狀態,包括所有 register 的內容。

ENTRY(cpu_switch)                                                               

/* Switch to new thread. First, save context. */
movl 4(%esp),%ecx

#ifdef INVARIANTS
testl %ecx,%ecx /* no thread? */
jz badsw2 /* no, panic */
#endif

movl TD_PCB(%ecx),%edx

movl (%esp),%eax /* Hardware registers */
movl %eax,PCB_EIP(%edx)
movl %ebx,PCB_EBX(%edx)
movl %esp,PCB_ESP(%edx)
movl %ebp,PCB_EBP(%edx)
movl %esi,PCB_ESI(%edx)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
pushfl /* PSL */
popl PCB_PSL(%edx)
/* Test if debug registers should be saved. */
testl $PCB_DBREGS,PCB_FLAGS(%edx)
jz 1f /* no, skip over */
movl %dr7,%eax /* yes, do the save */
movl %eax,PCB_DR7(%edx)
andl $0x0000fc00, %eax /* disable all watchpoints */
movl %eax,%dr7
movl %dr6,%eax
movl %eax,PCB_DR6(%edx)
movl %dr3,%eax
movl %eax,PCB_DR3(%edx)
movl %dr2,%eax
movl %eax,PCB_DR2(%edx)
movl %dr1,%eax
movl %eax,PCB_DR1(%edx)
movl %dr0,%eax
movl %eax,PCB_DR0(%edx)
1:

#ifdef DEV_NPX
/* have we used fp, and need a save? */
cmpl %ecx,PCPU(FPCURTHREAD)
jne 1f
addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complic$
pushl %edx
call npxsave /* do it in a big C function */
popl %eax
1:
#endif

將 cr3 切換到新 thread 的 page table (page directory table) 。

        /* Save is done.  Now fire up new thread. Leave old vmspace. */         
movl 8(%esp),%ecx /* New thread */
#ifdef INVARIANTS
testl %ecx,%ecx /* no thread? */
jz badsw3 /* no, panic */
#endif
movl TD_PCB(%ecx),%edx
movl PCPU(CPUID), %esi

/* switch address space */
movl PCB_CR3(%edx),%eax
#ifdef PAE
cmpl %eax,IdlePDPT /* Kernel address space? */
#else
cmpl %eax,IdlePTD /* Kernel address space? */
#endif
je sw1
movl %cr3,%ebx /* The same address space? */
cmpl %ebx,%eax
je sw1
movl %eax,%cr3 /* new address space */

修改新舊 pmap 的 active CPU,舊的清成空 (沒有 active CPU),新的設定成目前 CPU 。

        /* Release bit from old pmap->pm_active */                              
movl PCPU(CURPMAP), %ebx
#ifdef SMP
lock
#endif
btrl %esi, PM_ACTIVE(%ebx) /* clear old */

/* Set bit in new pmap->pm_active */
movl TD_PROC(%ecx),%eax /* newproc */
movl P_VMSPACE(%eax), %ebx
addl $VM_PMAP, %ebx
movl %ebx, PCPU(CURPMAP)
#ifdef SMP
lock
#endif
btsl %esi, PM_ACTIVE(%ebx) /* set new */

將 CPU 狀況恢復成新 thread 前一次執行的最後狀態

sw1:                                                                            
/*
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
*/
cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
je 1f /* If not, use the default */
movl $1, PCPU(PRIVATE_TSS) /* mark use of private tss */
movl PCB_EXT(%edx), %edi /* new tss descriptor */
jmp 2f /* Load it up */

1: /*
* Use the common default TSS instead of our own.
* Set our stack pointer into the TSS, it's set to just
* below the PCB. In C, common_tss.tss_esp0 = &pcb - 16;
*/
leal -16(%edx), %ebx /* leave space for vm86 */
movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0

/*
* Test this CPU's bit in the bitmap to see if this
* CPU was using a private TSS.
*/
cmpl $0, PCPU(PRIVATE_TSS) /* Already using the common? */
je 3f /* if so, skip reloading */
movl $0, PCPU(PRIVATE_TSS)
PCPU_ADDR(COMMON_TSSD, %edi)
2:
/* Move correct tss descriptor into GDT slot, then reload tr. */
movl PCPU(TSS_GDT), %ebx /* entry in GDT */
movl 0(%edi), %eax
movl 4(%edi), %esi
movl %eax, 0(%ebx)
movl %esi, 4(%ebx)
movl $GPROC0_SEL*8, %esi /* GSEL(GPROC0_SEL, SEL_KPL) */
ltr %si
3:

/* Copy the %fs and %gs selectors into this pcpu gdt */
leal PCB_FSD(%edx), %esi
movl PCPU(FSGS_GDT), %edi
movl 0(%esi), %eax /* %fs selector */
movl 4(%esi), %ebx
movl %eax, 0(%edi)
movl %ebx, 4(%edi)
movl 8(%esi), %eax /* %gs selector, comes straight after *$
movl 12(%esi), %ebx
movl %eax, 8(%edi)
movl %ebx, 12(%edi)

/* Restore context. */
movl PCB_EBX(%edx),%ebx
movl PCB_ESP(%edx),%esp
movl PCB_EBP(%edx),%ebp
movl PCB_ESI(%edx),%esi
movl PCB_EDI(%edx),%edi
movl PCB_EIP(%edx),%eax
movl %eax,(%esp)
pushl PCB_PSL(%edx)
popfl

movl %edx, PCPU(CURPCB)
movl %ecx, PCPU(CURTHREAD) /* into next thread */

/*
* Determine the LDT to use and load it if is the default one and
* that is not the current one.
*/
movl TD_PROC(%ecx),%eax
cmpl $0,P_MD+MD_LDT(%eax)
jnz 1f
movl _default_ldt,%eax
cmpl PCPU(CURRENTLDT),%eax
je 2f
lldt _default_ldt
movl %eax,PCPU(CURRENTLDT)
jmp 2f
1:
/* Load the LDT when it is not the default one. */
pushl %edx /* Preserve pointer to pcb. */
addl $P_MD,%eax /* Pointer to mdproc is arg. */
pushl %eax
call set_user_ldt
addl $4,%esp
popl %edx
2:

/* This must be done after loading the user LDT. */
.globl cpu_switch_load_gs
cpu_switch_load_gs:
movl PCB_GS(%edx),%gs

/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%edx)
jz 1f

/*
* Restore debug registers. The special code for dr7 is to
* preserve the current values of its reserved bits.
*/
movl PCB_DR6(%edx),%eax
movl %eax,%dr6
movl PCB_DR3(%edx),%eax
movl %eax,%dr3
movl PCB_DR2(%edx),%eax
movl %eax,%dr2
movl PCB_DR1(%edx),%eax
movl %eax,%dr1
movl PCB_DR0(%edx),%eax
movl %eax,%dr0
movl %dr7,%eax
andl $0x0000fc00,%eax
movl PCB_DR7(%edx),%ecx
andl $~0x0000fc00,%ecx
orl %ecx,%eax
movl %eax,%dr7
1:
ret

FreeBSD 以 cpu_switch() 為基礎,提供誇 CPU 平臺的 context switch 能力。

sched_switch()

sched_switch() 是由 scheduler 提供的 function ,從 run queue 挑選下一個被執行的 thread ,並呼叫 cpu_switch() 切換 CPU 狀態,執行新的 thread 。目前 FreeBSD 提供三種 scheduler ,分別於 kern/sched_core.c 、 kern/sched_ule.c 和 kern/sched_4bsd.c ,各自有自己的 schedule 方式,應該有各自的 sched_switch() ,以挑選下一個 thread 。

sched_switch() 的主要功能是決定執行順序,真正進行 context switch ,還是呼叫 cpu_switch() 進形。因此,如果你想設計新的 scheduler ,最重要的就是 implement sched_switch() ,決定 thread 的執行次序,並呼叫 cpu_switch() 進行切換。

mi_switch()

mi_switch() 是屬於和 schedule 和 CPU 平臺無關的部分。主要工作是進行資料統計,和環境的檢查。統計資料包括 CPU 使用量,執行時間長短,切換的時間點。做完這些統計之後,就呼叫 sched_switch() ,以切換到下一個合適的 thread。

 
转载自:http://heaven.branda.to/~thinker/GinGin_CGI.py/show_id_doc/172
原创粉丝点击