vmx_vcpu_run函数分析(基于kernel 3.10.0)

来源:互联网 发布:淘宝投诉电话人工接听 编辑:程序博客网 时间:2024/06/05 10:45

转发请注明引用地址:http://blog.163.com/eric_liufeng/blog/static/19738268320156624253216

 

代码位于: arch\x86\kvm\vmx.c

vmx_vcpu_run函数实现了VM entry和VM exit的处理

 


/** 
* 运行虚拟机,进入Guest模式,即non root模式
*
* 真正的进入非根模式进行虚拟机运行
*/
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
     struct vcpu_vmx *vmx = to_vmx(vcpu);
     unsigned long debugctlmsr;

     /* Record the guest's net vcpu time for enforced NMI injections. */
     /*
     * nmi注入?跟nmi_watchdog相关?
     */
     if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
          vmx->entry_time = ktime_get();

     /* Don't enter VMX if guest state is invalid, let the exit handler
        start emulation until we arrive back to a valid state */
     if (vmx->emulation_required)
          return;

     if (vmx->nested.sync_shadow_vmcs) {
          copy_vmcs12_to_shadow(vmx);
          vmx->nested.sync_shadow_vmcs = false;
     }

     /*
     * 写入Guest的RSP寄存器信息至VMCS相关位置中
     */
     if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
          vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
     /*
     * 写入Guest的RIP寄存器信息至VMCS相关位置中
     */
     if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
          vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);

     /* When single-stepping over STI and MOV SS, we must clear the
     * corresponding interruptibility bits in the guest state. Otherwise
     * vmentry fails as it then expects bit 14 (BS) in pending debug
     * exceptions being set, but that's not correct for the guest debugging
     * case. */
     /*
     * 单步调试时,需要禁用Guest中断
     */
     if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
          vmx_set_interrupt_shadow(vcpu, 0);

     atomic_switch_perf_msrs(vmx);
     debugctlmsr = get_debugctlmsr();

     /* vmx->__launched用于判断当前VCPU是否已经VMLAUNCH了 */
     vmx->__launched = vmx->loaded_vmcs->launched;

     /** 
     * 执行VMLAUNCH指令进入Guest模式,虚拟机开始运行
     *
     * 进行CPU的模式切换,运行虚拟机
     */
     asm(
          /* Store host registers */
          /*将相关寄存器压栈*/
          /*BP压栈*/
          "push %%" _ASM_DX "; push %%" _ASM_BP ";"
          /*为guest的rcx寄存器保留个位置,所以这里压两次栈*/
          "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
          "push %%" _ASM_CX " \n\t"

          /*
          * 首先完成host os状态在VMCS中的保存。
          */
          /*
          * %c表示用来表示使用立即数替换,但不使用立即数的语法,at&t汇编中表示立即数的语法前面有一个$,而用了%c后,就去掉了这个$。
          * 主要是用在间接寻址的情况,这种情况下如果直接使用$立即数的方式的话,会报语法错误。
          * [host_rsp]是后面输入部分定义的tag,使用%tag方式可以直接引用,%0是后面输入输出部分中的第一个操作数,即vmx,这里是间接寻址
          * %c[host_rsp](%0)整体来看就是vmx(以寄存器ecx传入)中的host_rsp成员。
          * 所以,如下语句的整体含义就是比较当前SP寄存器和vmx->host_rsp的值。
          */
          /*如果当前RSP和vmx->rsp相等,那就不用mov了,否则将当前RSP保存到vmx中*/
          "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
          "je 1f \n\t"
          "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
          /*执行ASM_VMX_VMWRITE_RSP_RDX指令,当出现异常时直接重启,由__ex()实现*/
          __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
          "1: \n\t"
          /* Reload cr2 if changed */
          /* 加载cr2,从guest os中对应vcpu的vmcs中获取数据,放到cr2寄存器中。*/
          /* 比较当前CR2寄存器和vmx中保存的CR2寄存器内容,如果不相等,就从vmx中重新CR2内容到当前CR2寄存器中 */
          "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
          "mov %%cr2, %%" _ASM_DX " \n\t"
          "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
          "je 2f \n\t"
          "mov %%" _ASM_AX", %%cr2 \n\t"
          "2: \n\t"
          /* Check if vmlaunch of vmresume is needed */
          /* 判断vcpu_vmx->__launched,确认是否需要执行VMLAUNCH*/
          /* 判断是否需要vm launch,对于上次就是从对应vmcs退出的情况,不需要vm launch,使用vm resume,否则需要。*/
          "cmpl $0, %c[launched](%0) \n\t"

          /* 加载guest寄存器,其实就是从vmx中加载 */
          /* 加载guest os相关的寄存器,从vcpu对应的结构体中获取。 */
          /* Load guest registers.  Don't clobber flags. */
          "mov %c[rax](%0), %%" _ASM_AX " \n\t"
          "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
          "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
          "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
          "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
          "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
#ifdef CONFIG_X86_64
          "mov %c[r8](%0),  %%r8  \n\t"
          "mov %c[r9](%0),  %%r9  \n\t"
          "mov %c[r10](%0), %%r10 \n\t"
          "mov %c[r11](%0), %%r11 \n\t"
          "mov %c[r12](%0), %%r12 \n\t"
          "mov %c[r13](%0), %%r13 \n\t"
          "mov %c[r14](%0), %%r14 \n\t"
          "mov %c[r15](%0), %%r15 \n\t"
#endif
          "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */

        /*
         * 通过ASM_VMX_VMLAUNCH或ASM_VMX_VMRESUME进入虚拟机模式运行,
         * HOST代码会一直卡在这里,直到VMEXIT退出,才会继续下面的代码进行
         */
        /* 调用VM launch或者vm resume进入non root state,运行vcpu。*/
          /* Enter guest mode */
          "jne 1f \n\t"
          /* 执行VMLAUNCH指令,进入Guest模式*/
          __ex(ASM_VMX_VMLAUNCH) "\n\t"
          "jmp 2f \n\t"
          /* 如果已经曾经加载过VM了,执行VMRESUME指令,快速重新启动VM*/
          "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
          "2: "
          /* 发生vm exit,导致vcpu退出。*/
          /*
          * VM-EXIT退出后,保存guest寄存器,恢复加载host寄存器
          */ 
          /* Save guest registers, load host registers, keep flags */
          /*保存guest os对应寄存器的值到对应变量中。*/
          "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
          "pop %0 \n\t"
          "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
          "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
          __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
          "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
          "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
          "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
          "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
#ifdef CONFIG_X86_64
          "mov %%r8,  %c[r8](%0) \n\t"
          "mov %%r9,  %c[r9](%0) \n\t"
          "mov %%r10, %c[r10](%0) \n\t"
          "mov %%r11, %c[r11](%0) \n\t"
          "mov %%r12, %c[r12](%0) \n\t"
          "mov %%r13, %c[r13](%0) \n\t"
          "mov %%r14, %c[r14](%0) \n\t"
          "mov %%r15, %c[r15](%0) \n\t"
#endif
          "mov %%cr2, %%" _ASM_AX "   \n\t"
          "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
          /*恢复host os的寄存器取值。*/
          "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
          "setbe %c[fail](%0) \n\t"
          ".pushsection .rodata \n\t"
          ".global vmx_return \n\t"
          "vmx_return: " _ASM_PTR " 2b \n\t"
          ".popsection"
           : : "c"(vmx), "d"((unsigned long)HOST_RSP),
          [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
          [fail]"i"(offsetof(struct vcpu_vmx, fail)),
          /* [host_rsp]是tag,可以在前面以%[host_rsp]方式引用*/
          [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
          [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
          [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
          [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
          [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
          [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
          [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
          [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
#ifdef CONFIG_X86_64
          [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
          [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
          [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
          [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
          [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
          [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
          [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
          [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
#endif
          [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
          /*clobber list,cc表示寄存器,memory表示内存*/
          [wordsize]"i"(sizeof(ulong))
           : "cc", "memory"
#ifdef CONFIG_X86_64
          , "rax", "rbx", "rdi", "rsi"
          , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#else
          , "eax", "ebx", "edi", "esi"
#endif
           );

     /*运行到这里,说明已经发生了VM-exit,返回到了root模式*/
     /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
     if (debugctlmsr)
          update_debugctlmsr(debugctlmsr);

#ifndef CONFIG_X86_64
     /*
     * The sysexit path does not restore ds/es, so we must set them to
     * a reasonable value ourselves.
     *
     * We can't defer this to vmx_load_host_state() since that function
     * may be executed in interrupt context, which saves and restore segments
     * around it, nullifying its effect.
     */
     /*重新加载ds/es段寄存器,因为VM-exit不会自动加载他们*/
     loadsegment(ds, __USER_DS);
     loadsegment(es, __USER_DS);
#endif

     vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
                      | (1 << VCPU_EXREG_RFLAGS)
                      | (1 << VCPU_EXREG_CPL)
                      | (1 << VCPU_EXREG_PDPTR)
                      | (1 << VCPU_EXREG_SEGMENTS)
                      | (1 << VCPU_EXREG_CR3));
     vcpu->arch.regs_dirty = 0;

     /*
     * 从硬件VMCS中读取中断向量表信息
     */
     vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);

    /*
     * 标记虚拟机为已经加载过
     */
     vmx->loaded_vmcs->launched = 1;

    /*
     * 从VMCS中读取VM-EXIT-REASON
     *
     * 从硬件VMCS中读取VM-exit原因信息,这些信息是VM-exit过程中由硬件自动写入的
     */     
     vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
     /*
     * 通过perf打印日志
     */
     trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);

     /*
     * 处理MCE异常和NMI中断
     */
     vmx_complete_atomic_exit(vmx);
     vmx_recover_nmi_blocking(vmx);
     vmx_complete_interrupts(vmx);
}

 

 

 

0 0