1.2Qemu KVM内核态架构

来源:互联网 发布:内锥形螺纹编程实例 编辑:程序博客网 时间:2024/06/08 02:37


内核态代码位于Linux内核代码的 virt 和arch/x86/kvm 两个目录下;本节将分析内核态代码的架构与模块划分以及内核态对用户空间提供的接口。

1.2.1 对外字符设备

Intel 的虚拟化模块初始化入口如下:

vmx_init (arch/x86/kvm/vmx.c)

该函数为msr_bitmap 和 io_bitmap分配内存, 并调用

kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),

__alignof__(struct vcpu_vmx),THIS_MODULE);

  

static struct kvm_x86_ops vmx_x86_ops = {

      ......

};

该结构为virt 层 调用arch/x86/kvm的接口

 

int kvm_init(void *opaque, unsigned vcpu_size, unsignedvcpu_align,

         struct module *module)

 

该函数流程如下:

a.  kvm_arch_init 注册 x86 arch 函数操作结构

b.  kvm_irqfd_init; //初始化irqfd_cleanup_wq workqueue; KVM_IRQFD ioctrl时会用到

c.  kvm_arch_hardware_setup ==》 kvm_x86_ops->hardware_setup

d.  对每个 Cpu 执行kvm_x86_ops->check_processor_compatibility(rtn);

e.  注册register_cpu_notifier(&kvm_cpu_notifier);

f.  注册register_reboot_notifier(&kvm_reboot_notifier);

g.  注册char 设备 misc_register(&kvm_dev);//kvm_dev是kvm对应用层的访问接口

h.  register_syscore_ops(&kvm_syscore_ops);注册电源管理回调

i.  kvm_preempt_ops 初始化, 该结构在创建vcpu时使用(kvm_vm_ioctl_create_vcpu)

 

static struct syscore_ops kvm_syscore_ops = { //当VMM host收到电源管理时被调用

    .suspend = kvm_suspend,==》 kvm_x86_ops->hardware_disable

    .resume = kvm_resume,==》 kvm_x86_ops->hardware_enable

};

   当vcpu所在进程被调度运行或被调度不运行时,会触发下面回调。

    kvm_preempt_ops.sched_in= kvm_sched_in;

    kvm_preempt_ops.sched_out= kvm_sched_out;

 

static struct miscdevice kvm_dev = {

    KVM_MINOR,

    "kvm",

    &kvm_chardev_ops,

};

 

static struct file_operations kvm_chardev_ops = {

    .unlocked_ioctl =kvm_dev_ioctl,

    .compat_ioctl   = kvm_dev_ioctl,

    .llseek       = noop_llseek,

};

应用层qemu通过kvm接口调用内核,其中KVM_CREATE_VM用于创建虚拟机。

static long kvm_dev_ioctl(struct file *filp,

             unsigned int ioctl, unsigned long arg)

{

    long r = -EINVAL;

    switch (ioctl) {

    caseKVM_GET_API_VERSION:

       r = KVM_API_VERSION;

       break;

    case KVM_CREATE_VM:

       r = kvm_dev_ioctl_create_vm(arg); //建立vm对应用层的char设备

       break;

    caseKVM_CHECK_EXTENSION:

       r = kvm_dev_ioctl_check_extension_generic(arg);

       break;

    caseKVM_GET_VCPU_MMAP_SIZE:

       r = -EINVAL;

       if (arg)

           goto out;

       r = PAGE_SIZE;     /* struct kvm_run */

#ifdef CONFIG_X86

       r += PAGE_SIZE;    /* pio data page */

#endif

#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET

       r += PAGE_SIZE;    /* coalesced mmio ring page */

#endif

       break;

    ......

    default:

       return kvm_arch_dev_ioctl(filp, ioctl, arg);

    }

out:

    return r;

}

 

kvm_dev_ioctl_create_vm 创建VM虚拟机,流程如下:

a.  kvm_create_vm (虚拟机结构 为 struct kvm)

        ==> kvm_arch_init_vm(arch/kvm/x86.c)

       ==> kvm_init_mmu_notifier注册 vmm内存通知结构kvm_mmu_notifier_ops

 

static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {

    .invalidate_page  = kvm_mmu_notifier_invalidate_page,

    .invalidate_range_start  = kvm_mmu_notifier_invalidate_range_start,

    .invalidate_range_end    = kvm_mmu_notifier_invalidate_range_end,

    .clear_flush_young   = kvm_mmu_notifier_clear_flush_young,

    .test_young       = kvm_mmu_notifier_test_young,

    .change_pte       = kvm_mmu_notifier_change_pte,

    .release      = kvm_mmu_notifier_release,

};

b. hardware_enable_all

c. kvm_eventfd_init ==》 kvm_x86_ops->hardware_enable

d. 注册虚拟机字符设备 (kvm_vm_fops)

 

1.2.2 VM 字符设备

static struct file_operations kvm_vm_fops = {

    .release        = kvm_vm_release,

    .unlocked_ioctl =kvm_vm_ioctl,

#ifdef CONFIG_COMPAT

    .compat_ioctl   = kvm_vm_compat_ioctl,

#endif

    .llseek       = noop_llseek,

};

 

Ioctl Cmd

Implement function

Src file

KVM_CREATE_VCPU

kvm_vm_ioctl_create_vcpu

Kvm_main.c

KVM_SET_USER_MEMORY_REGION

kvm_vm_ioctl_set_memory_region

Kvm_main.c

KVM_GET_DIRTY_LOG

kvm_vm_ioctl_get_dirty_log

Arch/x86/kvm/x86.c

KVM_REGISTER_COALESCED_MMIO

kvm_vm_ioctl_register_coalesced_mmio

Mmio.c

KVM_UNREGISTER_COALESCED_MMIO

kvm_vm_ioctl_unregister_coalesced_mmio

Mmio.c

KVM_IRQFD

kvm_irqfd

Eventfd.c

KVM_IOEVENTFD

kvm_ioeventfd

Eventfd.c

KVM_SIGNAL_MSI

kvm_send_userspace_msi

Irqchip.c

KVM_SET_GSI_ROUTING

kvm_set_irq_routing

Irqchip.c

KVM_CREATE_DEVICE

kvm_ioctl_create_device

Kvm_main.c

defautl

kvm_arch_vm_ioctl

kvm_vm_ioctl_assigned_device

Arch/x86/kvm/x86.c

Assigend-dev.c

下面是vcpu创建的代码分析:

kvm_vm_ioctl_create_vcpu

a.  kvm_arch_vcpu_create ==> kvm_x86_ops -> vcpu_create

b.  preempt_notifier_init(&vcpu->preempt_notifier,&kvm_preempt_ops);

c.  kvm_arch_vcpu_setup ==>  kvm_x86_ops->vcpu_load

d.  create_vcpu_fd ==>  anon_inode_getfd("kvm-vcpu",&kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); //创建vcpu 的字符设备

 

static struct file_operations kvm_vcpu_fops = {

    .release        = kvm_vcpu_release,

    .unlocked_ioctl =kvm_vcpu_ioctl,

#ifdef CONFIG_COMPAT

    .compat_ioctl   = kvm_vcpu_compat_ioctl,

#endif

    .mmap           = kvm_vcpu_mmap,

    .llseek       = noop_llseek,

};

 

kvm_vcpu_mmap: vma->vm_ops = &kvm_vcpu_vm_ops;

static const struct vm_operations_struct kvm_vcpu_vm_ops = {

    .fault =kvm_vcpu_fault,

};

 

kvm_vcpu_falut:  映射vcpu->run 的地址,这样qemu可以直接访问该内存。

 

kvm_x86_ops -> vcpu_create = vmx_create_vcpu  ==> kvm_vcpu_init 会分配 vcpu->run的空间

 

kvm_vcpu_ioctl

Ioctl Cmd

Implement function

KVM_RUN

kvm_arch_vcpu_ioctl_run 进入vm-entry

KVM_GET_REGS

kvm_arch_vcpu_ioctl_get_regs==>

kvm_register_read(kvm_cache_reg.h)

KVM_SET_REGS

kvm_arch_vcpu_ioctl_set_regs ==>

kvm_register_write

KVM_GET_SREGS

kvm_arch_vcpu_ioctl_get_sregs

KVM_SET_SREGS

kvm_arch_vcpu_ioctl_set_sregs

KVM_GET_MP_STATE

kvm_arch_vcpu_ioctl_get_mpstate

KVM_SET_MP_STATE

kvm_arch_vcpu_ioctl_set_mpstate

KVM_TRANSLATE

kvm_arch_vcpu_ioctl_translate

KVM_SET_SIGNAL_MASK

kvm_vcpu_ioctl_set_sigmask

KVM_GET_FPU

kvm_arch_vcpu_ioctl_get_fpu

KVM_SET_FPU

kvm_arch_vcpu_ioctl_set_fpu

kvm_arch_vcpu_ioctl

 

 

1.2.3 KVM 内核空间模块

(1) arch/x86/kvm 下的模块

File

Description

Interface func

Function note

Cpuid.c

处理cpuid指令相关代码

kvm_emulate_cpuid

handle_cpuid

响应cpuid造成的vm-exit

kvm_cpuid

响应kvm_x86_ops--》ioctrl

get_cpuid

Emulate.c

模拟指令的执行

x86_emulate_insn

当异常造成vm_exits时x86_emulate_instruction

I8254.c

管理虚拟8254 控制器

kvm_create_pit

ioctl KVM_CREATE_PIT2

kvm_create_pit

I8259.c

管理虚拟8259控制器

kvm_create_pic

KVM_CREATE_IRQCHIP:

kvm_create_pic

Irq.c

虚拟中断管理

kvm_cpu_get_interrupt

kvm_inject_pending_timer_irqs

……

 

Lapic.c

 

 

Irq.c依赖该文件

Mmu.c

虚拟机内存管理

kvm_mmu_page_fault

kvm_mmu_invlpg

 

页与缓存管理

Pmu.c

虚拟机性能监控管理

 

 

Vmx.c

X86虚拟机管理主函数

各种异常处理函数入口

kvm_vmx_exit_handlers

 

x86 arch的入口文件

 

X86.c

Virt 模块到x86 arch的中间层

 

 

 

(2) virt下的模块

File

Description

Detail

Kvm_main.c

内核态的主模块

Kvm的初始化,对应用层接口的实现。Os回调的接口实现

Ioapic.c

Irqchip.c

Irq_comm.c

中断模块

Kvm虚拟机中断控制器的实现入口

coalesced_mmio.c

eventfd.c

io虚拟化

 

Pci_assign.c

Vfio.c

直接io虚拟化

Pci_assign与vfio是两种不同的直接io实现方式

Iommu.c

Iommu入口

用于直接io,调用drivers/iommu模块完成相应功能,第7章会分析该部分

Async_pf.c

异步任务

用于cpu内存与中断的异步任务执行

 

 

(3) guest_os下的模块

  Kvm的内核模块除了在vm host运行外,对于采用了半虚拟化的情况下,还需要在guest os上有对应的驱动(假设guest os 也运行Linux). 下面是其代码路径与说明:

 

File

Description

Detail

Drivers/virtio/*.c

Virtio模块

第6章分析

Arch/x86/ (arch\x86\kernel\  kvmclock.c pvclock.c)

时间虚拟化

4.3节分析

 

 

0 0