Message ID | 1374041153-32235-3-git-send-email-yzt356@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 2013-07-17 08:05, Arthur Chunqi Li wrote: > This is the first version for VMX nested environment test case. It > contains the basic VMX instructions test cases, including VMXON/ > VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch > also tests the basic execution routine in VMX nested environment and > let the VM print "Hello World" to inform its successfully run. > > New files added: > x86/vmx.h : contains all VMX related macro declerations > x86/vmx.c : main file for VMX nested test case > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > --- > config-x86-common.mak | 2 + > config-x86_64.mak | 1 + > lib/x86/msr.h | 5 + > x86/cstart64.S | 4 + > x86/unittests.cfg | 6 + > x86/vmx.c | 676 +++++++++++++++++++++++++++++++++++++++++++++++++ > x86/vmx.h | 419 ++++++++++++++++++++++++++++++ > 7 files changed, 1113 insertions(+) > create mode 100644 x86/vmx.c > create mode 100644 x86/vmx.h > > diff --git a/config-x86-common.mak b/config-x86-common.mak > index 455032b..34a41e1 100644 > --- a/config-x86-common.mak > +++ b/config-x86-common.mak > @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o > > $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o > > +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o > + > arch_clean: > $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ > $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o > diff --git a/config-x86_64.mak b/config-x86_64.mak > index 91ffcce..5d9b22a 100644 > --- a/config-x86_64.mak > +++ b/config-x86_64.mak > @@ -11,5 +11,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ > $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ > $(TEST_DIR)/pcid.flat > tests += $(TEST_DIR)/svm.flat > +tests += $(TEST_DIR)/vmx.flat > > include config-x86-common.mak > diff --git a/lib/x86/msr.h b/lib/x86/msr.h > index 509a421..281255a 100644 > --- a/lib/x86/msr.h > +++ b/lib/x86/msr.h > @@ -396,6 +396,11 @@ > #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a > #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b > #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c > +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d > +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e > +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f > +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 > + > > /* AMD-V MSRs */ > > diff --git a/x86/cstart64.S b/x86/cstart64.S > index 24df5f8..0fe76da 100644 > --- a/x86/cstart64.S > +++ b/x86/cstart64.S > @@ -4,6 +4,10 @@ > .globl boot_idt > boot_idt = 0 > > +.globl idt_descr > +.globl tss_descr > +.globl gdt64_desc > + > ipi_vector = 0x20 > > max_cpus = 64 > diff --git a/x86/unittests.cfg b/x86/unittests.cfg > index bc9643e..85c36aa 100644 > --- a/x86/unittests.cfg > +++ b/x86/unittests.cfg > @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" > file = pcid.flat > extra_params = -cpu qemu64,+pcid > arch = x86_64 > + > +[vmx] > +file = vmx.flat > +extra_params = -cpu host,+vmx > +arch = x86_64 > + > diff --git a/x86/vmx.c b/x86/vmx.c > new file mode 100644 > index 0000000..af48fce > --- /dev/null > +++ b/x86/vmx.c > @@ -0,0 +1,676 @@ > +#include "libcflat.h" > +#include "processor.h" > +#include "vm.h" > +#include "desc.h" > +#include "vmx.h" > +#include "msr.h" > +#include "smp.h" > +#include "io.h" > +#include "setjmp.h" > + > +int fails = 0, tests = 0; > +u32 *vmxon_region; > +struct vmcs *vmcs_root; > +void *io_bmp1, *io_bmp2; > +void *msr_bmp; > +u32 vpid_ctr; > +char *guest_stack, *host_stack; > +char *guest_syscall_stack, *host_syscall_stack; > +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; > +ulong fix_cr0_set, fix_cr0_clr; > +ulong fix_cr4_set, fix_cr4_clr; > +struct regs regs; > +jmp_buf env; > +struct vmx_test *current; > + > +extern u64 gdt64_desc[]; > +extern u64 idt_descr[]; > +extern u64 tss_descr[]; > +extern void *entry_vmx; > +extern void *entry_sysenter; > +extern void *guest_entry; > + > +void report(const char *name, int result) > +{ > + ++tests; > + if (result) > + printf("PASS: %s\n", name); > + else { > + printf("FAIL: %s\n", name); > + ++fails; > + } > +} > + > +inline u64 get_rflags(void) > +{ > + u64 r; > + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); > + return r; > +} > + > +inline void set_rflags(u64 r) > +{ > + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); > +} > + > +int vmcs_clear(struct vmcs *vmcs) > +{ > + bool ret; > + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > + return ret; > +} > + > +u64 vmcs_read(enum Encoding enc) > +{ > + u64 val; > + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); > + return val; > +} > + > +int vmcs_write(enum Encoding enc, u64 val) > +{ > + bool ret; > + asm volatile ("vmwrite %1, %2; setbe %0" > + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); > + return ret; > +} > + > +int make_vmcs_current(struct vmcs *vmcs) > +{ > + bool ret; > + > + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > + return ret; > +} > + > +int save_vmcs(struct vmcs **vmcs) > +{ > + bool ret; > + > + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); > + return ret; > +} > + > +/* entry_vmx */ > +asm( > + ".align 4, 0x90\n\t" > + ".globl entry_vmx\n\t" > + "entry_vmx:\n\t" > + SAVE_GPR > + " call default_exit_handler\n\t" > + /* Should not reach here*/ > + " mov $1, %eax\n\t" > + " call exit\n\t" > +); > + > +/* entry_sysenter */ > +asm( > + ".align 4, 0x90\n\t" > + ".globl entry_sysenter\n\t" > + "entry_sysenter:\n\t" > + SAVE_GPR > + " and $0xf, %rax\n\t" > + " push %rax\n\t" > + " call default_syscall_handler\n\t" > +); > + > +void default_syscall_handler(u64 syscall_no) > +{ > + if (current != NULL && current->syscall_handler != NULL) { > + current->syscall_handler(syscall_no); > + return; > + } > + printf("Here in default syscall_handler,"); > + printf("syscall_no = %d\n", syscall_no); > + printf("Nothing is done here.\n"); > +} > + > +static inline void vmx_run() > +{ > + bool ret; > + /* printf("Now run vm.\n"); */ > + asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret)); > + printf("VMLAUNCH error, ret=%d\n", ret); > +} > + > +static inline void vmx_resume() > +{ > + asm volatile(LOAD_GPR > + "vmresume\n\t"); > + /* VMRESUME fail if reach here */ > +} > + > +static inline int vmx_on() > +{ > + bool ret; > + asm volatile ("vmxon %1; setbe %0\n\t" > + : "=q"(ret) : "m"(vmxon_region) : "cc"); > + return ret; > +} > + > +static inline int vmx_off() > +{ > + bool ret; > + asm volatile("vmxoff; setbe %0\n\t" > + : "=q"(ret) : : "cc"); > + return ret; > +} > + > +void print_vmexit_info() > +{ > + u64 guest_rip, guest_rsp; > + ulong reason = vmcs_read(EXI_REASON) & 0xff; > + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); > + guest_rip = vmcs_read(GUEST_RIP); > + guest_rsp = vmcs_read(GUEST_RSP); > + printf("VMEXIT info:\n"); > + printf("\tvmexit reason = %d\n", reason); > + printf("\texit qualification = 0x%x\n", exit_qual); > + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); > + printf("\tguest_rip = 0x%llx\n", guest_rip); > + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", > + regs.rax, regs.rbx, regs.rcx, regs.rdx); > + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", > + guest_rsp, regs.rbp, regs.rsi, regs.rdi); > + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", > + regs.r8, regs.r9, regs.r10, regs.r11); > + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", > + regs.r12, regs.r13, regs.r14, regs.r15); > +} > + > +void test_vmclear(void) > +{ > + u64 rflags; > + > + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + set_rflags(rflags); > + report("test vmclear", vmcs_clear(vmcs_root) == 0); > +} > + > +void test_vmxoff(void) > +{ > + int ret; > + u64 rflags; > + > + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + set_rflags(rflags); > + ret = vmx_off(); > + report("test vmxoff", !ret); > +} > + > +/* This function should not return directly, only three kinds > + of return is permitted : goto vm_exit, goto vm_resume > + and longjmp to previous set jmp_buf. > + For test case defined exit_handler, only three kinds of return > + value are permitted : VMX_EXIT, VMX_RESUME and VMX_HALT, > + which coressponding to three actions above, other return > + values will cause error message. */ > +void default_exit_handler() > +{ > + u64 guest_rip; > + ulong reason; > + int ret; > + > + if (current != NULL && current->exit_handler != NULL) { > + current->exits ++; > + current->guest_regs = regs; > + ret = current->exit_handler(); > + regs = current->guest_regs; > + switch (ret) { > + case VMX_HALT: > + goto vmx_halt; > + case VMX_RESUME: > + goto vmx_resume; > + case VMX_EXIT: > + goto vmx_exit; > + default: > + printf("ERROR : Invalid exit_handler return val, %d.\n" > + , ret); > + goto vmx_exit; > + } > + } > + > + if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG) > + && !(rdmsr(MSR_EFER) & EFER_LMA)) > + printf("ERROR : PDPTEs should be checked\n"); > + > + guest_rip = vmcs_read(GUEST_RIP); > + reason = vmcs_read(EXI_REASON) & 0xff; > + > + switch (reason) { > + case VMX_VMCALL: > + print_vmexit_info(); > + vmcs_write(GUEST_RIP, guest_rip + 3); > + goto vmx_resume; > + case VMX_HLT: > + goto vmx_halt; > + default: > + break; > + } > + printf("ERROR : Unhandled vmx exit.\n"); > + print_vmexit_info(); > +vmx_halt: > + /* printf("VM exit.\n"); */ > + longjmp(env, 1); > + /* Should not reach here */ > +vmx_exit: > + exit(-1); > +vmx_resume: > + vmx_resume(); > + /* Should not reach here */ > + exit(-1); > +} > + > +/* guest_entry */ > +asm( > + ".align 4, 0x90\n\t" > + ".globl entry_guest\n\t" > + "guest_entry:\n\t" > + " call default_guest_main\n\t" > + " hlt\n\t" > +); > + > +void default_guest_main(void) > +{ > + if (current != NULL && current->guest_main != NULL) { > + current->guest_main(); > + return; > + } > + /* Here is default guest_main, print Hello World */ > + printf("\tHello World, this is default guest main!\n"); > +} > + > +void init_vmcs_ctrl(void) > +{ > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > + /* 26.2.1.1 */ > + vmcs_write(PIN_CONTROLS, ctrl_pin); > + /* Disable VMEXIT of IO instruction */ > + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { > + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; > + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); > + } > + vmcs_write(CR3_TARGET_COUNT, 0); > + io_bmp1 = alloc_page(); > + io_bmp2 = alloc_page(); > + memset(io_bmp1, 0, PAGE_SIZE); > + memset(io_bmp2, 0, PAGE_SIZE); > + vmcs_write(IO_BITMAP_A, (u64)io_bmp1); > + vmcs_write(IO_BITMAP_B, (u64)io_bmp2); > + msr_bmp = alloc_page(); > + memset(msr_bmp, 0, PAGE_SIZE); > + vmcs_write(MSR_BITMAP, (u64)msr_bmp); > + vmcs_write(VPID, ++vpid_ctr); > +} > + > +void init_vmcs_host(void) > +{ > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > + /* 26.2.1.2 */ > + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); > + > + /* 26.2.1.3 */ > + vmcs_write(ENT_CONTROLS, ctrl_enter); > + vmcs_write(EXI_CONTROLS, ctrl_exit); > + > + /* 26.2.2 */ > + vmcs_write(HOST_CR0, read_cr0()); > + vmcs_write(HOST_CR3, read_cr3()); > + vmcs_write(HOST_CR4, read_cr4()); > + vmcs_write(HOST_SYSENTER_ESP, > + (u64)(host_syscall_stack + PAGE_SIZE - 1)); > + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); > + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); > + > + /* 26.2.3 */ > + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); > + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); > + vmcs_write(HOST_BASE_TR, (u64)tss_descr); > + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); > + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); > + vmcs_write(HOST_BASE_FS, 0); > + vmcs_write(HOST_BASE_GS, 0); > + > + /* Set other vmcs area */ > + vmcs_write(PF_ERROR_MASK, 0); > + vmcs_write(PF_ERROR_MATCH, 0); > + vmcs_write(VMCS_LINK_PTR, ~0ul); > + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); > + vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1)); > + vmcs_write(HOST_RIP, (u64)(&entry_vmx)); > +} > + > +void init_vmcs_guest(void) > +{ > + /* 26.3 CHECKING AND LOADING GUEST STATE */ > + ulong guest_cr0, guest_cr4, guest_cr3; > + /* 26.3.1.1 */ > + guest_cr0 = read_cr0(); > + guest_cr4 = read_cr4(); > + guest_cr3 = read_cr3(); > + if (ctrl_enter & ENT_GUEST_64) { > + guest_cr0 |= CR0_PG; > + guest_cr4 |= CR4_PAE; > + } > + if ((ctrl_enter & ENT_GUEST_64) == 0) > + guest_cr4 &= (~CR4_PCIDE); > + if (guest_cr0 & CR0_PG) > + guest_cr0 |= CR0_PE; > + vmcs_write(GUEST_CR0, guest_cr0); > + vmcs_write(GUEST_CR3, guest_cr3); > + vmcs_write(GUEST_CR4, guest_cr4); > + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); > + vmcs_write(GUEST_SYSENTER_ESP, > + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); > + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); > + vmcs_write(GUEST_DR7, 0); > + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); > + > + /* 26.3.1.2 */ > + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); > + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); > + vmcs_write(GUEST_SEL_LDTR, 0); > + > + vmcs_write(GUEST_BASE_CS, 0); > + vmcs_write(GUEST_BASE_ES, 0); > + vmcs_write(GUEST_BASE_SS, 0); > + vmcs_write(GUEST_BASE_DS, 0); > + vmcs_write(GUEST_BASE_FS, 0); > + vmcs_write(GUEST_BASE_GS, 0); > + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); > + vmcs_write(GUEST_BASE_LDTR, 0); > + > + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); > + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); > + > + vmcs_write(GUEST_AR_CS, 0xa09b); > + vmcs_write(GUEST_AR_DS, 0xc093); > + vmcs_write(GUEST_AR_ES, 0xc093); > + vmcs_write(GUEST_AR_FS, 0xc093); > + vmcs_write(GUEST_AR_GS, 0xc093); > + vmcs_write(GUEST_AR_SS, 0xc093); > + vmcs_write(GUEST_AR_LDTR, 0x82); > + vmcs_write(GUEST_AR_TR, 0x8b); > + > + /* 26.3.1.3 */ > + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); > + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); > + vmcs_write(GUEST_LIMIT_GDTR, > + ((struct descr *)gdt64_desc)->limit & 0xffff); > + vmcs_write(GUEST_LIMIT_IDTR, > + ((struct descr *)idt_descr)->limit & 0xffff); > + > + /* 26.3.1.4 */ > + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); > + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); > + vmcs_write(GUEST_RFLAGS, 0x2); > + > + /* 26.3.1.5 */ > + vmcs_write(GUEST_ACTV_STATE, 0); > + vmcs_write(GUEST_INTR_STATE, 0); > +} > + > +int init_vmcs(struct vmcs **vmcs) > +{ > + *vmcs = alloc_page(); > + memset(*vmcs, 0, PAGE_SIZE); > + (*vmcs)->revision_id = basic.revision; > + /* vmclear first to init vmcs */ > + if (vmcs_clear(*vmcs)) { > + printf("%s : vmcs_clear error\n", __func__); > + return 1; > + } > + > + if (make_vmcs_current(*vmcs)) { > + printf("%s : make_vmcs_current error\n", __func__); > + return 1; > + } > + > + /* All settings to pin/exit/enter/cpu > + control fields should be placed here */ > + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; > + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; > + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); > + ctrl_cpu[0] |= CPU_HLT; > + /* DIsable IO instruction VMEXIT now */ > + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); > + ctrl_cpu[1] = 0; > + > + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; > + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; > + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; > + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; > + > + init_vmcs_ctrl(); > + init_vmcs_host(); > + init_vmcs_guest(); > + return 0; > +} > + > +void init_vmx(void) > +{ > + vmxon_region = alloc_page(); > + memset(vmxon_region, 0, PAGE_SIZE); > + > + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); > + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); > + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); > + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); > + basic.val = rdmsr(MSR_IA32_VMX_BASIC); > + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN > + : MSR_IA32_VMX_PINBASED_CTLS); > + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT > + : MSR_IA32_VMX_EXIT_CTLS); > + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY > + : MSR_IA32_VMX_ENTRY_CTLS); > + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC > + : MSR_IA32_VMX_PROCBASED_CTLS); > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) > + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); > + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) > + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); > + > + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); > + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE); > + > + *vmxon_region = basic.revision; > + current = NULL; > + > + guest_stack = alloc_page(); > + memset(guest_stack, 0, PAGE_SIZE); > + guest_syscall_stack = alloc_page(); > + memset(guest_syscall_stack, 0, PAGE_SIZE); > + host_stack = alloc_page(); > + memset(host_stack, 0, PAGE_SIZE); > + host_syscall_stack = alloc_page(); > + memset(host_syscall_stack, 0, PAGE_SIZE); > +} > + > +int test_vmx_capability(void) > +{ > + struct cpuid r; > + u64 ret1, ret2; > + u64 ia32_feature_control; > + r = cpuid(1); > + ret1 = ((r.c) >> 5) & 1; > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { > + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > + } > + report("test vmx capability", ret1 & ret2); > + return !(ret1 & ret2); > +} > + > +int test_vmxon(void) > +{ > + int ret; > + u64 rflags; > + > + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + set_rflags(rflags); > + ret = vmx_on(); > + report("test vmxon", !ret); > + return ret; > +} > + > +void test_vmptrld(void) > +{ > + u64 rflags; > + struct vmcs *vmcs; > + > + vmcs = alloc_page(); > + vmcs->revision_id = basic.revision; > + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + set_rflags(rflags); > + report("test vmptrld", make_vmcs_current(vmcs) == 0); > +} > + > +void test_vmptrst(void) > +{ > + u64 rflags; > + int ret; > + struct vmcs *vmcs1, *vmcs2; > + > + vmcs1 = alloc_page(); > + memset(vmcs1, 0, PAGE_SIZE); > + init_vmcs(&vmcs1); > + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + set_rflags(rflags); > + ret = save_vmcs(&vmcs2); > + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); > +} > + > +int test_run(struct vmx_test *test) > +{ > + if (test == NULL) { Not possible, is it? > + printf("%s : test is NULL.\n", __func__); > + return 1; > + } > + if (test->name == NULL) > + test->name = "(no name)"; > + if (vmx_on()) { > + printf("%s : vmxon failed.\n", __func__); > + return 2; > + } > + init_vmcs(&(test->vmcs)); > + /* Directly call test->init is ok here, init_vmcs has done > + vmcs init, vmclear and vmptrld*/ > + if (test->init) > + test->init(test->vmcs); > + test->exits = 0; > + current = test; > + regs = test->guest_regs; > + printf("\nTest suite : %s\n", test->name); > + if (setjmp(env) == 0) { > + vmx_run(); > + /* Should not reach here */ > + printf("%s : vmx_run failed.\n", __func__); > + } > + if (vmx_off()) { > + printf("%s : vmxoff failed.\n", __func__); > + return 2; > + } > + return 0; > +} > + > +void vmenter_main() > +{ > + u64 rax; > + u64 rsp, resume_rsp; > + > + report("test vmlaunch", 1); > + > + rax = 0; Can already be initialized above. > + asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp)); > + asm volatile("mov %2, %%rax\n\t" > + "vmcall\n\t" > + "mov %%rax, %0\n\t" > + "mov %%rsp, %1\n\t" > + : "=r"(rax), "=r"(resume_rsp) > + : "g"(0xABCD)); > + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); > +} > + > +int vmenter_exit_handler() > +{ > + u64 guest_rip; > + ulong reason; > + > + guest_rip = vmcs_read(GUEST_RIP); > + reason = vmcs_read(EXI_REASON) & 0xff; > + if (reason == VMX_VMCALL) { I suppose any reason != VMX_VMCALL or VMX_HALT means test failure, no? Then catch it. > + if (current->guest_regs.rax != 0xABCD) { > + report("test vmresume", 0); > + return VMX_HALT; > + } > + current->guest_regs.rax = 0xFFFF; > + vmcs_write(GUEST_RIP, guest_rip + 3); > + return VMX_RESUME; > + } > + return VMX_HALT; > +} > + > + > +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs > + NULL means use default func */ > +static struct vmx_test vmx_tests[] = { > + { "null", NULL, NULL, NULL, NULL, {0} }, Let's convert the "null" test into a proper one, removing all the current != NULL checks from the default handlers. > + { "vmenter", NULL, vmenter_main, vmenter_exit_handler, NULL, {0} }, > +}; > + > +int main(void) > +{ > + int nr, i; > + > + setup_vm(); > + setup_idt(); > + > + if (test_vmx_capability() != 0) { > + printf("ERROR : vmx not supported, check +vmx option\n"); > + goto exit; > + } > + init_vmx(); > + if (test_vmxon() != 0) > + goto exit; > + test_vmptrld(); > + test_vmclear(); > + test_vmptrst(); > + init_vmcs(&vmcs_root); > + if (setjmp(env) == 0) { > + vmx_run(); > + /* Should not reach here */ > + report("test vmlaunch", 0); > + goto exit; > + } > + test_vmxoff(); > + > + nr = ARRAY_SIZE(vmx_tests); No need for a separate nr variable here. > + for (i = 0; i < nr; ++i) { > + if (test_run(&vmx_tests[i])) > + goto exit; > + } > + > +exit: > + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); > + return fails ? 1 : 0; > +} Nice work! I was just wondering how hard it would be to prepare the infrastructure for multiple L2 guests as well. But that could also be done later on once there is a concrete test on the table that requires this. Jan
On Wed, Jul 17, 2013 at 2:26 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-07-17 08:05, Arthur Chunqi Li wrote: >> This is the first version for VMX nested environment test case. It >> contains the basic VMX instructions test cases, including VMXON/ >> VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch >> also tests the basic execution routine in VMX nested environment and >> let the VM print "Hello World" to inform its successfully run. >> >> New files added: >> x86/vmx.h : contains all VMX related macro declerations >> x86/vmx.c : main file for VMX nested test case >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> --- >> config-x86-common.mak | 2 + >> config-x86_64.mak | 1 + >> lib/x86/msr.h | 5 + >> x86/cstart64.S | 4 + >> x86/unittests.cfg | 6 + >> x86/vmx.c | 676 +++++++++++++++++++++++++++++++++++++++++++++++++ >> x86/vmx.h | 419 ++++++++++++++++++++++++++++++ >> 7 files changed, 1113 insertions(+) >> create mode 100644 x86/vmx.c >> create mode 100644 x86/vmx.h >> >> diff --git a/config-x86-common.mak b/config-x86-common.mak >> index 455032b..34a41e1 100644 >> --- a/config-x86-common.mak >> +++ b/config-x86-common.mak >> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o >> >> $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o >> >> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o >> + >> arch_clean: >> $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ >> $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o >> diff --git a/config-x86_64.mak b/config-x86_64.mak >> index 91ffcce..5d9b22a 100644 >> --- a/config-x86_64.mak >> +++ b/config-x86_64.mak >> @@ -11,5 +11,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ >> $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ >> $(TEST_DIR)/pcid.flat >> tests += $(TEST_DIR)/svm.flat >> +tests += $(TEST_DIR)/vmx.flat >> >> include config-x86-common.mak >> diff --git a/lib/x86/msr.h b/lib/x86/msr.h >> index 509a421..281255a 100644 >> --- a/lib/x86/msr.h >> +++ b/lib/x86/msr.h >> @@ -396,6 +396,11 @@ >> #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a >> #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b >> #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c >> +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d >> +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e >> +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f >> +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 >> + >> >> /* AMD-V MSRs */ >> >> diff --git a/x86/cstart64.S b/x86/cstart64.S >> index 24df5f8..0fe76da 100644 >> --- a/x86/cstart64.S >> +++ b/x86/cstart64.S >> @@ -4,6 +4,10 @@ >> .globl boot_idt >> boot_idt = 0 >> >> +.globl idt_descr >> +.globl tss_descr >> +.globl gdt64_desc >> + >> ipi_vector = 0x20 >> >> max_cpus = 64 >> diff --git a/x86/unittests.cfg b/x86/unittests.cfg >> index bc9643e..85c36aa 100644 >> --- a/x86/unittests.cfg >> +++ b/x86/unittests.cfg >> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" >> file = pcid.flat >> extra_params = -cpu qemu64,+pcid >> arch = x86_64 >> + >> +[vmx] >> +file = vmx.flat >> +extra_params = -cpu host,+vmx >> +arch = x86_64 >> + >> diff --git a/x86/vmx.c b/x86/vmx.c >> new file mode 100644 >> index 0000000..af48fce >> --- /dev/null >> +++ b/x86/vmx.c >> @@ -0,0 +1,676 @@ >> +#include "libcflat.h" >> +#include "processor.h" >> +#include "vm.h" >> +#include "desc.h" >> +#include "vmx.h" >> +#include "msr.h" >> +#include "smp.h" >> +#include "io.h" >> +#include "setjmp.h" >> + >> +int fails = 0, tests = 0; >> +u32 *vmxon_region; >> +struct vmcs *vmcs_root; >> +void *io_bmp1, *io_bmp2; >> +void *msr_bmp; >> +u32 vpid_ctr; >> +char *guest_stack, *host_stack; >> +char *guest_syscall_stack, *host_syscall_stack; >> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; >> +ulong fix_cr0_set, fix_cr0_clr; >> +ulong fix_cr4_set, fix_cr4_clr; >> +struct regs regs; >> +jmp_buf env; >> +struct vmx_test *current; >> + >> +extern u64 gdt64_desc[]; >> +extern u64 idt_descr[]; >> +extern u64 tss_descr[]; >> +extern void *entry_vmx; >> +extern void *entry_sysenter; >> +extern void *guest_entry; >> + >> +void report(const char *name, int result) >> +{ >> + ++tests; >> + if (result) >> + printf("PASS: %s\n", name); >> + else { >> + printf("FAIL: %s\n", name); >> + ++fails; >> + } >> +} >> + >> +inline u64 get_rflags(void) >> +{ >> + u64 r; >> + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); >> + return r; >> +} >> + >> +inline void set_rflags(u64 r) >> +{ >> + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); >> +} >> + >> +int vmcs_clear(struct vmcs *vmcs) >> +{ >> + bool ret; >> + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); >> + return ret; >> +} >> + >> +u64 vmcs_read(enum Encoding enc) >> +{ >> + u64 val; >> + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); >> + return val; >> +} >> + >> +int vmcs_write(enum Encoding enc, u64 val) >> +{ >> + bool ret; >> + asm volatile ("vmwrite %1, %2; setbe %0" >> + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); >> + return ret; >> +} >> + >> +int make_vmcs_current(struct vmcs *vmcs) >> +{ >> + bool ret; >> + >> + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); >> + return ret; >> +} >> + >> +int save_vmcs(struct vmcs **vmcs) >> +{ >> + bool ret; >> + >> + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); >> + return ret; >> +} >> + >> +/* entry_vmx */ >> +asm( >> + ".align 4, 0x90\n\t" >> + ".globl entry_vmx\n\t" >> + "entry_vmx:\n\t" >> + SAVE_GPR >> + " call default_exit_handler\n\t" >> + /* Should not reach here*/ >> + " mov $1, %eax\n\t" >> + " call exit\n\t" >> +); >> + >> +/* entry_sysenter */ >> +asm( >> + ".align 4, 0x90\n\t" >> + ".globl entry_sysenter\n\t" >> + "entry_sysenter:\n\t" >> + SAVE_GPR >> + " and $0xf, %rax\n\t" >> + " push %rax\n\t" >> + " call default_syscall_handler\n\t" >> +); >> + >> +void default_syscall_handler(u64 syscall_no) >> +{ >> + if (current != NULL && current->syscall_handler != NULL) { >> + current->syscall_handler(syscall_no); >> + return; >> + } >> + printf("Here in default syscall_handler,"); >> + printf("syscall_no = %d\n", syscall_no); >> + printf("Nothing is done here.\n"); >> +} >> + >> +static inline void vmx_run() >> +{ >> + bool ret; >> + /* printf("Now run vm.\n"); */ >> + asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret)); >> + printf("VMLAUNCH error, ret=%d\n", ret); >> +} >> + >> +static inline void vmx_resume() >> +{ >> + asm volatile(LOAD_GPR >> + "vmresume\n\t"); >> + /* VMRESUME fail if reach here */ >> +} >> + >> +static inline int vmx_on() >> +{ >> + bool ret; >> + asm volatile ("vmxon %1; setbe %0\n\t" >> + : "=q"(ret) : "m"(vmxon_region) : "cc"); >> + return ret; >> +} >> + >> +static inline int vmx_off() >> +{ >> + bool ret; >> + asm volatile("vmxoff; setbe %0\n\t" >> + : "=q"(ret) : : "cc"); >> + return ret; >> +} >> + >> +void print_vmexit_info() >> +{ >> + u64 guest_rip, guest_rsp; >> + ulong reason = vmcs_read(EXI_REASON) & 0xff; >> + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); >> + guest_rip = vmcs_read(GUEST_RIP); >> + guest_rsp = vmcs_read(GUEST_RSP); >> + printf("VMEXIT info:\n"); >> + printf("\tvmexit reason = %d\n", reason); >> + printf("\texit qualification = 0x%x\n", exit_qual); >> + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); >> + printf("\tguest_rip = 0x%llx\n", guest_rip); >> + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", >> + regs.rax, regs.rbx, regs.rcx, regs.rdx); >> + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", >> + guest_rsp, regs.rbp, regs.rsi, regs.rdi); >> + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", >> + regs.r8, regs.r9, regs.r10, regs.r11); >> + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", >> + regs.r12, regs.r13, regs.r14, regs.r15); >> +} >> + >> +void test_vmclear(void) >> +{ >> + u64 rflags; >> + >> + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + set_rflags(rflags); >> + report("test vmclear", vmcs_clear(vmcs_root) == 0); >> +} >> + >> +void test_vmxoff(void) >> +{ >> + int ret; >> + u64 rflags; >> + >> + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + set_rflags(rflags); >> + ret = vmx_off(); >> + report("test vmxoff", !ret); >> +} >> + >> +/* This function should not return directly, only three kinds >> + of return is permitted : goto vm_exit, goto vm_resume >> + and longjmp to previous set jmp_buf. >> + For test case defined exit_handler, only three kinds of return >> + value are permitted : VMX_EXIT, VMX_RESUME and VMX_HALT, >> + which coressponding to three actions above, other return >> + values will cause error message. */ >> +void default_exit_handler() >> +{ >> + u64 guest_rip; >> + ulong reason; >> + int ret; >> + >> + if (current != NULL && current->exit_handler != NULL) { >> + current->exits ++; >> + current->guest_regs = regs; >> + ret = current->exit_handler(); >> + regs = current->guest_regs; >> + switch (ret) { >> + case VMX_HALT: >> + goto vmx_halt; >> + case VMX_RESUME: >> + goto vmx_resume; >> + case VMX_EXIT: >> + goto vmx_exit; >> + default: >> + printf("ERROR : Invalid exit_handler return val, %d.\n" >> + , ret); >> + goto vmx_exit; >> + } >> + } >> + >> + if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG) >> + && !(rdmsr(MSR_EFER) & EFER_LMA)) >> + printf("ERROR : PDPTEs should be checked\n"); >> + >> + guest_rip = vmcs_read(GUEST_RIP); >> + reason = vmcs_read(EXI_REASON) & 0xff; >> + >> + switch (reason) { >> + case VMX_VMCALL: >> + print_vmexit_info(); >> + vmcs_write(GUEST_RIP, guest_rip + 3); >> + goto vmx_resume; >> + case VMX_HLT: >> + goto vmx_halt; >> + default: >> + break; >> + } >> + printf("ERROR : Unhandled vmx exit.\n"); >> + print_vmexit_info(); >> +vmx_halt: >> + /* printf("VM exit.\n"); */ >> + longjmp(env, 1); >> + /* Should not reach here */ >> +vmx_exit: >> + exit(-1); >> +vmx_resume: >> + vmx_resume(); >> + /* Should not reach here */ >> + exit(-1); >> +} >> + >> +/* guest_entry */ >> +asm( >> + ".align 4, 0x90\n\t" >> + ".globl entry_guest\n\t" >> + "guest_entry:\n\t" >> + " call default_guest_main\n\t" >> + " hlt\n\t" >> +); >> + >> +void default_guest_main(void) >> +{ >> + if (current != NULL && current->guest_main != NULL) { >> + current->guest_main(); >> + return; >> + } >> + /* Here is default guest_main, print Hello World */ >> + printf("\tHello World, this is default guest main!\n"); >> +} >> + >> +void init_vmcs_ctrl(void) >> +{ >> + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ >> + /* 26.2.1.1 */ >> + vmcs_write(PIN_CONTROLS, ctrl_pin); >> + /* Disable VMEXIT of IO instruction */ >> + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); >> + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { >> + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; >> + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); >> + } >> + vmcs_write(CR3_TARGET_COUNT, 0); >> + io_bmp1 = alloc_page(); >> + io_bmp2 = alloc_page(); >> + memset(io_bmp1, 0, PAGE_SIZE); >> + memset(io_bmp2, 0, PAGE_SIZE); >> + vmcs_write(IO_BITMAP_A, (u64)io_bmp1); >> + vmcs_write(IO_BITMAP_B, (u64)io_bmp2); >> + msr_bmp = alloc_page(); >> + memset(msr_bmp, 0, PAGE_SIZE); >> + vmcs_write(MSR_BITMAP, (u64)msr_bmp); >> + vmcs_write(VPID, ++vpid_ctr); >> +} >> + >> +void init_vmcs_host(void) >> +{ >> + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ >> + /* 26.2.1.2 */ >> + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); >> + >> + /* 26.2.1.3 */ >> + vmcs_write(ENT_CONTROLS, ctrl_enter); >> + vmcs_write(EXI_CONTROLS, ctrl_exit); >> + >> + /* 26.2.2 */ >> + vmcs_write(HOST_CR0, read_cr0()); >> + vmcs_write(HOST_CR3, read_cr3()); >> + vmcs_write(HOST_CR4, read_cr4()); >> + vmcs_write(HOST_SYSENTER_ESP, >> + (u64)(host_syscall_stack + PAGE_SIZE - 1)); >> + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); >> + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); >> + >> + /* 26.2.3 */ >> + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); >> + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); >> + vmcs_write(HOST_BASE_TR, (u64)tss_descr); >> + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); >> + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); >> + vmcs_write(HOST_BASE_FS, 0); >> + vmcs_write(HOST_BASE_GS, 0); >> + >> + /* Set other vmcs area */ >> + vmcs_write(PF_ERROR_MASK, 0); >> + vmcs_write(PF_ERROR_MATCH, 0); >> + vmcs_write(VMCS_LINK_PTR, ~0ul); >> + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); >> + vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1)); >> + vmcs_write(HOST_RIP, (u64)(&entry_vmx)); >> +} >> + >> +void init_vmcs_guest(void) >> +{ >> + /* 26.3 CHECKING AND LOADING GUEST STATE */ >> + ulong guest_cr0, guest_cr4, guest_cr3; >> + /* 26.3.1.1 */ >> + guest_cr0 = read_cr0(); >> + guest_cr4 = read_cr4(); >> + guest_cr3 = read_cr3(); >> + if (ctrl_enter & ENT_GUEST_64) { >> + guest_cr0 |= CR0_PG; >> + guest_cr4 |= CR4_PAE; >> + } >> + if ((ctrl_enter & ENT_GUEST_64) == 0) >> + guest_cr4 &= (~CR4_PCIDE); >> + if (guest_cr0 & CR0_PG) >> + guest_cr0 |= CR0_PE; >> + vmcs_write(GUEST_CR0, guest_cr0); >> + vmcs_write(GUEST_CR3, guest_cr3); >> + vmcs_write(GUEST_CR4, guest_cr4); >> + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); >> + vmcs_write(GUEST_SYSENTER_ESP, >> + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); >> + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); >> + vmcs_write(GUEST_DR7, 0); >> + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); >> + >> + /* 26.3.1.2 */ >> + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); >> + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); >> + vmcs_write(GUEST_SEL_LDTR, 0); >> + >> + vmcs_write(GUEST_BASE_CS, 0); >> + vmcs_write(GUEST_BASE_ES, 0); >> + vmcs_write(GUEST_BASE_SS, 0); >> + vmcs_write(GUEST_BASE_DS, 0); >> + vmcs_write(GUEST_BASE_FS, 0); >> + vmcs_write(GUEST_BASE_GS, 0); >> + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); >> + vmcs_write(GUEST_BASE_LDTR, 0); >> + >> + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); >> + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); >> + >> + vmcs_write(GUEST_AR_CS, 0xa09b); >> + vmcs_write(GUEST_AR_DS, 0xc093); >> + vmcs_write(GUEST_AR_ES, 0xc093); >> + vmcs_write(GUEST_AR_FS, 0xc093); >> + vmcs_write(GUEST_AR_GS, 0xc093); >> + vmcs_write(GUEST_AR_SS, 0xc093); >> + vmcs_write(GUEST_AR_LDTR, 0x82); >> + vmcs_write(GUEST_AR_TR, 0x8b); >> + >> + /* 26.3.1.3 */ >> + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); >> + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); >> + vmcs_write(GUEST_LIMIT_GDTR, >> + ((struct descr *)gdt64_desc)->limit & 0xffff); >> + vmcs_write(GUEST_LIMIT_IDTR, >> + ((struct descr *)idt_descr)->limit & 0xffff); >> + >> + /* 26.3.1.4 */ >> + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); >> + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); >> + vmcs_write(GUEST_RFLAGS, 0x2); >> + >> + /* 26.3.1.5 */ >> + vmcs_write(GUEST_ACTV_STATE, 0); >> + vmcs_write(GUEST_INTR_STATE, 0); >> +} >> + >> +int init_vmcs(struct vmcs **vmcs) >> +{ >> + *vmcs = alloc_page(); >> + memset(*vmcs, 0, PAGE_SIZE); >> + (*vmcs)->revision_id = basic.revision; >> + /* vmclear first to init vmcs */ >> + if (vmcs_clear(*vmcs)) { >> + printf("%s : vmcs_clear error\n", __func__); >> + return 1; >> + } >> + >> + if (make_vmcs_current(*vmcs)) { >> + printf("%s : make_vmcs_current error\n", __func__); >> + return 1; >> + } >> + >> + /* All settings to pin/exit/enter/cpu >> + control fields should be placed here */ >> + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; >> + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; >> + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); >> + ctrl_cpu[0] |= CPU_HLT; >> + /* DIsable IO instruction VMEXIT now */ >> + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); >> + ctrl_cpu[1] = 0; >> + >> + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; >> + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; >> + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; >> + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; >> + >> + init_vmcs_ctrl(); >> + init_vmcs_host(); >> + init_vmcs_guest(); >> + return 0; >> +} >> + >> +void init_vmx(void) >> +{ >> + vmxon_region = alloc_page(); >> + memset(vmxon_region, 0, PAGE_SIZE); >> + >> + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); >> + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); >> + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); >> + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); >> + basic.val = rdmsr(MSR_IA32_VMX_BASIC); >> + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN >> + : MSR_IA32_VMX_PINBASED_CTLS); >> + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT >> + : MSR_IA32_VMX_EXIT_CTLS); >> + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY >> + : MSR_IA32_VMX_ENTRY_CTLS); >> + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC >> + : MSR_IA32_VMX_PROCBASED_CTLS); >> + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) >> + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); >> + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) >> + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); >> + >> + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); >> + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE); >> + >> + *vmxon_region = basic.revision; >> + current = NULL; >> + >> + guest_stack = alloc_page(); >> + memset(guest_stack, 0, PAGE_SIZE); >> + guest_syscall_stack = alloc_page(); >> + memset(guest_syscall_stack, 0, PAGE_SIZE); >> + host_stack = alloc_page(); >> + memset(host_stack, 0, PAGE_SIZE); >> + host_syscall_stack = alloc_page(); >> + memset(host_syscall_stack, 0, PAGE_SIZE); >> +} >> + >> +int test_vmx_capability(void) >> +{ >> + struct cpuid r; >> + u64 ret1, ret2; >> + u64 ia32_feature_control; >> + r = cpuid(1); >> + ret1 = ((r.c) >> 5) & 1; >> + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); >> + ret2 = ((ia32_feature_control & 0x5) == 0x5); >> + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { >> + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); >> + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); >> + ret2 = ((ia32_feature_control & 0x5) == 0x5); >> + } >> + report("test vmx capability", ret1 & ret2); >> + return !(ret1 & ret2); >> +} >> + >> +int test_vmxon(void) >> +{ >> + int ret; >> + u64 rflags; >> + >> + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + set_rflags(rflags); >> + ret = vmx_on(); >> + report("test vmxon", !ret); >> + return ret; >> +} >> + >> +void test_vmptrld(void) >> +{ >> + u64 rflags; >> + struct vmcs *vmcs; >> + >> + vmcs = alloc_page(); >> + vmcs->revision_id = basic.revision; >> + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + set_rflags(rflags); >> + report("test vmptrld", make_vmcs_current(vmcs) == 0); >> +} >> + >> +void test_vmptrst(void) >> +{ >> + u64 rflags; >> + int ret; >> + struct vmcs *vmcs1, *vmcs2; >> + >> + vmcs1 = alloc_page(); >> + memset(vmcs1, 0, PAGE_SIZE); >> + init_vmcs(&vmcs1); >> + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + set_rflags(rflags); >> + ret = save_vmcs(&vmcs2); >> + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); >> +} >> + >> +int test_run(struct vmx_test *test) >> +{ >> + if (test == NULL) { > > Not possible, is it? > >> + printf("%s : test is NULL.\n", __func__); >> + return 1; >> + } >> + if (test->name == NULL) >> + test->name = "(no name)"; >> + if (vmx_on()) { >> + printf("%s : vmxon failed.\n", __func__); >> + return 2; >> + } >> + init_vmcs(&(test->vmcs)); >> + /* Directly call test->init is ok here, init_vmcs has done >> + vmcs init, vmclear and vmptrld*/ >> + if (test->init) >> + test->init(test->vmcs); >> + test->exits = 0; >> + current = test; >> + regs = test->guest_regs; >> + printf("\nTest suite : %s\n", test->name); >> + if (setjmp(env) == 0) { >> + vmx_run(); >> + /* Should not reach here */ >> + printf("%s : vmx_run failed.\n", __func__); >> + } >> + if (vmx_off()) { >> + printf("%s : vmxoff failed.\n", __func__); >> + return 2; >> + } >> + return 0; >> +} >> + >> +void vmenter_main() >> +{ >> + u64 rax; >> + u64 rsp, resume_rsp; >> + >> + report("test vmlaunch", 1); >> + >> + rax = 0; > > Can already be initialized above. > >> + asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp)); >> + asm volatile("mov %2, %%rax\n\t" >> + "vmcall\n\t" >> + "mov %%rax, %0\n\t" >> + "mov %%rsp, %1\n\t" >> + : "=r"(rax), "=r"(resume_rsp) >> + : "g"(0xABCD)); >> + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); >> +} >> + >> +int vmenter_exit_handler() >> +{ >> + u64 guest_rip; >> + ulong reason; >> + >> + guest_rip = vmcs_read(GUEST_RIP); >> + reason = vmcs_read(EXI_REASON) & 0xff; >> + if (reason == VMX_VMCALL) { > > I suppose any reason != VMX_VMCALL or VMX_HALT means test failure, no? > Then catch it. > >> + if (current->guest_regs.rax != 0xABCD) { >> + report("test vmresume", 0); >> + return VMX_HALT; >> + } >> + current->guest_regs.rax = 0xFFFF; >> + vmcs_write(GUEST_RIP, guest_rip + 3); >> + return VMX_RESUME; >> + } >> + return VMX_HALT; >> +} >> + >> + >> +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs >> + NULL means use default func */ >> +static struct vmx_test vmx_tests[] = { >> + { "null", NULL, NULL, NULL, NULL, {0} }, > > Let's convert the "null" test into a proper one, removing all the > current != NULL checks from the default handlers. > >> + { "vmenter", NULL, vmenter_main, vmenter_exit_handler, NULL, {0} }, >> +}; >> + >> +int main(void) >> +{ >> + int nr, i; >> + >> + setup_vm(); >> + setup_idt(); >> + >> + if (test_vmx_capability() != 0) { >> + printf("ERROR : vmx not supported, check +vmx option\n"); >> + goto exit; >> + } >> + init_vmx(); >> + if (test_vmxon() != 0) >> + goto exit; >> + test_vmptrld(); >> + test_vmclear(); >> + test_vmptrst(); >> + init_vmcs(&vmcs_root); >> + if (setjmp(env) == 0) { >> + vmx_run(); >> + /* Should not reach here */ >> + report("test vmlaunch", 0); >> + goto exit; >> + } >> + test_vmxoff(); >> + >> + nr = ARRAY_SIZE(vmx_tests); > > No need for a separate nr variable here. > >> + for (i = 0; i < nr; ++i) { >> + if (test_run(&vmx_tests[i])) >> + goto exit; >> + } >> + >> +exit: >> + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); >> + return fails ? 1 : 0; >> +} > > Nice work! > > I was just wondering how hard it would be to prepare the infrastructure > for multiple L2 guests as well. But that could also be done later on > once there is a concrete test on the table that requires this. I have also considered about this scenario and I think it is not hard to convert this to a multiple L2 guests one. We just need to enclose the guest_main function to something like exec_ctxt with its own registers and stack. Besides, if we want to support SMP, we can write the pCPU and vCPU module below all current work and set current test_suite module on them. I also think we can handle them later on. Arthur > > Jan > > -- Arthur Chunqi Li Department of Computer Science School of EECS Peking University Beijing, China -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 17/07/2013 08:05, Arthur Chunqi Li ha scritto: > + guest_rip = vmcs_read(GUEST_RIP); > + reason = vmcs_read(EXI_REASON) & 0xff; > + > + switch (reason) { > + case VMX_VMCALL: > + print_vmexit_info(); > + vmcs_write(GUEST_RIP, guest_rip + 3); > + goto vmx_resume; > + case VMX_HLT: > + goto vmx_halt; > + default: > + break; > + } Could you reorganize this code, so that it reuses the switch statement you have in the "current != NULL && current->exit_handler != NULL" case? In other words, it looks like this code: > + if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG) > + && !(rdmsr(MSR_EFER) & EFER_LMA)) > + printf("ERROR : PDPTEs should be checked\n"); > + > + guest_rip = vmcs_read(GUEST_RIP); > + reason = vmcs_read(EXI_REASON) & 0xff; > + > + switch (reason) { > + case VMX_VMCALL: > + print_vmexit_info(); > + vmcs_write(GUEST_RIP, guest_rip + 3); > + goto vmx_resume; > + case VMX_HLT: > + goto vmx_halt; > + default: > + break; > + } > + printf("ERROR : Unhandled vmx exit.\n"); > + print_vmexit_info(); is only alternative to this: > + current->exits ++; > + current->guest_regs = regs; > + ret = current->exit_handler(); > + regs = current->guest_regs; Everything that comes after "regs = current->guest_regs;" should be outside the "if". Please tell me if I'm not clear. :) > +#define VMX_HALT 1 > +#define VMX_EXIT 2 > +#define VMX_RESUME 3 These are not VMX constants, so perhaps you can rename them to VMXTEST_{HANDLE,EXIT,RESUME}? Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/config-x86-common.mak b/config-x86-common.mak index 455032b..34a41e1 100644 --- a/config-x86-common.mak +++ b/config-x86-common.mak @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o + arch_clean: $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o diff --git a/config-x86_64.mak b/config-x86_64.mak index 91ffcce..5d9b22a 100644 --- a/config-x86_64.mak +++ b/config-x86_64.mak @@ -11,5 +11,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ $(TEST_DIR)/pcid.flat tests += $(TEST_DIR)/svm.flat +tests += $(TEST_DIR)/vmx.flat include config-x86-common.mak diff --git a/lib/x86/msr.h b/lib/x86/msr.h index 509a421..281255a 100644 --- a/lib/x86/msr.h +++ b/lib/x86/msr.h @@ -396,6 +396,11 @@ #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 + /* AMD-V MSRs */ diff --git a/x86/cstart64.S b/x86/cstart64.S index 24df5f8..0fe76da 100644 --- a/x86/cstart64.S +++ b/x86/cstart64.S @@ -4,6 +4,10 @@ .globl boot_idt boot_idt = 0 +.globl idt_descr +.globl tss_descr +.globl gdt64_desc + ipi_vector = 0x20 max_cpus = 64 diff --git a/x86/unittests.cfg b/x86/unittests.cfg index bc9643e..85c36aa 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" file = pcid.flat extra_params = -cpu qemu64,+pcid arch = x86_64 + +[vmx] +file = vmx.flat +extra_params = -cpu host,+vmx +arch = x86_64 + diff --git a/x86/vmx.c b/x86/vmx.c new file mode 100644 index 0000000..af48fce --- /dev/null +++ b/x86/vmx.c @@ -0,0 +1,676 @@ +#include "libcflat.h" +#include "processor.h" +#include "vm.h" +#include "desc.h" +#include "vmx.h" +#include "msr.h" +#include "smp.h" +#include "io.h" +#include "setjmp.h" + +int fails = 0, tests = 0; +u32 *vmxon_region; +struct vmcs *vmcs_root; +void *io_bmp1, *io_bmp2; +void *msr_bmp; +u32 vpid_ctr; +char *guest_stack, *host_stack; +char *guest_syscall_stack, *host_syscall_stack; +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; +ulong fix_cr0_set, fix_cr0_clr; +ulong fix_cr4_set, fix_cr4_clr; +struct regs regs; +jmp_buf env; +struct vmx_test *current; + +extern u64 gdt64_desc[]; +extern u64 idt_descr[]; +extern u64 tss_descr[]; +extern void *entry_vmx; +extern void *entry_sysenter; +extern void *guest_entry; + +void report(const char *name, int result) +{ + ++tests; + if (result) + printf("PASS: %s\n", name); + else { + printf("FAIL: %s\n", name); + ++fails; + } +} + +inline u64 get_rflags(void) +{ + u64 r; + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); + return r; +} + +inline void set_rflags(u64 r) +{ + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); +} + +int vmcs_clear(struct vmcs *vmcs) +{ + bool ret; + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); + return ret; +} + +u64 vmcs_read(enum Encoding enc) +{ + u64 val; + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); + return val; +} + +int vmcs_write(enum Encoding enc, u64 val) +{ + bool ret; + asm volatile ("vmwrite %1, %2; setbe %0" + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); + return ret; +} + +int make_vmcs_current(struct vmcs *vmcs) +{ + bool ret; + + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); + return ret; +} + +int save_vmcs(struct vmcs **vmcs) +{ + bool ret; + + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); + return ret; +} + +/* entry_vmx */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_vmx\n\t" + "entry_vmx:\n\t" + SAVE_GPR + " call default_exit_handler\n\t" + /* Should not reach here*/ + " mov $1, %eax\n\t" + " call exit\n\t" +); + +/* entry_sysenter */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_sysenter\n\t" + "entry_sysenter:\n\t" + SAVE_GPR + " and $0xf, %rax\n\t" + " push %rax\n\t" + " call default_syscall_handler\n\t" +); + +void default_syscall_handler(u64 syscall_no) +{ + if (current != NULL && current->syscall_handler != NULL) { + current->syscall_handler(syscall_no); + return; + } + printf("Here in default syscall_handler,"); + printf("syscall_no = %d\n", syscall_no); + printf("Nothing is done here.\n"); +} + +static inline void vmx_run() +{ + bool ret; + /* printf("Now run vm.\n"); */ + asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret)); + printf("VMLAUNCH error, ret=%d\n", ret); +} + +static inline void vmx_resume() +{ + asm volatile(LOAD_GPR + "vmresume\n\t"); + /* VMRESUME fail if reach here */ +} + +static inline int vmx_on() +{ + bool ret; + asm volatile ("vmxon %1; setbe %0\n\t" + : "=q"(ret) : "m"(vmxon_region) : "cc"); + return ret; +} + +static inline int vmx_off() +{ + bool ret; + asm volatile("vmxoff; setbe %0\n\t" + : "=q"(ret) : : "cc"); + return ret; +} + +void print_vmexit_info() +{ + u64 guest_rip, guest_rsp; + ulong reason = vmcs_read(EXI_REASON) & 0xff; + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); + guest_rip = vmcs_read(GUEST_RIP); + guest_rsp = vmcs_read(GUEST_RSP); + printf("VMEXIT info:\n"); + printf("\tvmexit reason = %d\n", reason); + printf("\texit qualification = 0x%x\n", exit_qual); + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); + printf("\tguest_rip = 0x%llx\n", guest_rip); + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", + regs.rax, regs.rbx, regs.rcx, regs.rdx); + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", + guest_rsp, regs.rbp, regs.rsi, regs.rdi); + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", + regs.r8, regs.r9, regs.r10, regs.r11); + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", + regs.r12, regs.r13, regs.r14, regs.r15); +} + +void test_vmclear(void) +{ + u64 rflags; + + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + set_rflags(rflags); + report("test vmclear", vmcs_clear(vmcs_root) == 0); +} + +void test_vmxoff(void) +{ + int ret; + u64 rflags; + + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + set_rflags(rflags); + ret = vmx_off(); + report("test vmxoff", !ret); +} + +/* This function should not return directly, only three kinds + of return is permitted : goto vm_exit, goto vm_resume + and longjmp to previous set jmp_buf. + For test case defined exit_handler, only three kinds of return + value are permitted : VMX_EXIT, VMX_RESUME and VMX_HALT, + which coressponding to three actions above, other return + values will cause error message. */ +void default_exit_handler() +{ + u64 guest_rip; + ulong reason; + int ret; + + if (current != NULL && current->exit_handler != NULL) { + current->exits ++; + current->guest_regs = regs; + ret = current->exit_handler(); + regs = current->guest_regs; + switch (ret) { + case VMX_HALT: + goto vmx_halt; + case VMX_RESUME: + goto vmx_resume; + case VMX_EXIT: + goto vmx_exit; + default: + printf("ERROR : Invalid exit_handler return val, %d.\n" + , ret); + goto vmx_exit; + } + } + + if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG) + && !(rdmsr(MSR_EFER) & EFER_LMA)) + printf("ERROR : PDPTEs should be checked\n"); + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + + switch (reason) { + case VMX_VMCALL: + print_vmexit_info(); + vmcs_write(GUEST_RIP, guest_rip + 3); + goto vmx_resume; + case VMX_HLT: + goto vmx_halt; + default: + break; + } + printf("ERROR : Unhandled vmx exit.\n"); + print_vmexit_info(); +vmx_halt: + /* printf("VM exit.\n"); */ + longjmp(env, 1); + /* Should not reach here */ +vmx_exit: + exit(-1); +vmx_resume: + vmx_resume(); + /* Should not reach here */ + exit(-1); +} + +/* guest_entry */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_guest\n\t" + "guest_entry:\n\t" + " call default_guest_main\n\t" + " hlt\n\t" +); + +void default_guest_main(void) +{ + if (current != NULL && current->guest_main != NULL) { + current->guest_main(); + return; + } + /* Here is default guest_main, print Hello World */ + printf("\tHello World, this is default guest main!\n"); +} + +void init_vmcs_ctrl(void) +{ + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ + /* 26.2.1.1 */ + vmcs_write(PIN_CONTROLS, ctrl_pin); + /* Disable VMEXIT of IO instruction */ + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); + } + vmcs_write(CR3_TARGET_COUNT, 0); + io_bmp1 = alloc_page(); + io_bmp2 = alloc_page(); + memset(io_bmp1, 0, PAGE_SIZE); + memset(io_bmp2, 0, PAGE_SIZE); + vmcs_write(IO_BITMAP_A, (u64)io_bmp1); + vmcs_write(IO_BITMAP_B, (u64)io_bmp2); + msr_bmp = alloc_page(); + memset(msr_bmp, 0, PAGE_SIZE); + vmcs_write(MSR_BITMAP, (u64)msr_bmp); + vmcs_write(VPID, ++vpid_ctr); +} + +void init_vmcs_host(void) +{ + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ + /* 26.2.1.2 */ + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); + + /* 26.2.1.3 */ + vmcs_write(ENT_CONTROLS, ctrl_enter); + vmcs_write(EXI_CONTROLS, ctrl_exit); + + /* 26.2.2 */ + vmcs_write(HOST_CR0, read_cr0()); + vmcs_write(HOST_CR3, read_cr3()); + vmcs_write(HOST_CR4, read_cr4()); + vmcs_write(HOST_SYSENTER_ESP, + (u64)(host_syscall_stack + PAGE_SIZE - 1)); + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); + + /* 26.2.3 */ + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); + vmcs_write(HOST_BASE_TR, (u64)tss_descr); + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); + vmcs_write(HOST_BASE_FS, 0); + vmcs_write(HOST_BASE_GS, 0); + + /* Set other vmcs area */ + vmcs_write(PF_ERROR_MASK, 0); + vmcs_write(PF_ERROR_MATCH, 0); + vmcs_write(VMCS_LINK_PTR, ~0ul); + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); + vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1)); + vmcs_write(HOST_RIP, (u64)(&entry_vmx)); +} + +void init_vmcs_guest(void) +{ + /* 26.3 CHECKING AND LOADING GUEST STATE */ + ulong guest_cr0, guest_cr4, guest_cr3; + /* 26.3.1.1 */ + guest_cr0 = read_cr0(); + guest_cr4 = read_cr4(); + guest_cr3 = read_cr3(); + if (ctrl_enter & ENT_GUEST_64) { + guest_cr0 |= CR0_PG; + guest_cr4 |= CR4_PAE; + } + if ((ctrl_enter & ENT_GUEST_64) == 0) + guest_cr4 &= (~CR4_PCIDE); + if (guest_cr0 & CR0_PG) + guest_cr0 |= CR0_PE; + vmcs_write(GUEST_CR0, guest_cr0); + vmcs_write(GUEST_CR3, guest_cr3); + vmcs_write(GUEST_CR4, guest_cr4); + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); + vmcs_write(GUEST_SYSENTER_ESP, + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); + vmcs_write(GUEST_DR7, 0); + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); + + /* 26.3.1.2 */ + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); + vmcs_write(GUEST_SEL_LDTR, 0); + + vmcs_write(GUEST_BASE_CS, 0); + vmcs_write(GUEST_BASE_ES, 0); + vmcs_write(GUEST_BASE_SS, 0); + vmcs_write(GUEST_BASE_DS, 0); + vmcs_write(GUEST_BASE_FS, 0); + vmcs_write(GUEST_BASE_GS, 0); + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); + vmcs_write(GUEST_BASE_LDTR, 0); + + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); + + vmcs_write(GUEST_AR_CS, 0xa09b); + vmcs_write(GUEST_AR_DS, 0xc093); + vmcs_write(GUEST_AR_ES, 0xc093); + vmcs_write(GUEST_AR_FS, 0xc093); + vmcs_write(GUEST_AR_GS, 0xc093); + vmcs_write(GUEST_AR_SS, 0xc093); + vmcs_write(GUEST_AR_LDTR, 0x82); + vmcs_write(GUEST_AR_TR, 0x8b); + + /* 26.3.1.3 */ + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); + vmcs_write(GUEST_LIMIT_GDTR, + ((struct descr *)gdt64_desc)->limit & 0xffff); + vmcs_write(GUEST_LIMIT_IDTR, + ((struct descr *)idt_descr)->limit & 0xffff); + + /* 26.3.1.4 */ + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); + vmcs_write(GUEST_RFLAGS, 0x2); + + /* 26.3.1.5 */ + vmcs_write(GUEST_ACTV_STATE, 0); + vmcs_write(GUEST_INTR_STATE, 0); +} + +int init_vmcs(struct vmcs **vmcs) +{ + *vmcs = alloc_page(); + memset(*vmcs, 0, PAGE_SIZE); + (*vmcs)->revision_id = basic.revision; + /* vmclear first to init vmcs */ + if (vmcs_clear(*vmcs)) { + printf("%s : vmcs_clear error\n", __func__); + return 1; + } + + if (make_vmcs_current(*vmcs)) { + printf("%s : make_vmcs_current error\n", __func__); + return 1; + } + + /* All settings to pin/exit/enter/cpu + control fields should be placed here */ + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); + ctrl_cpu[0] |= CPU_HLT; + /* DIsable IO instruction VMEXIT now */ + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); + ctrl_cpu[1] = 0; + + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; + + init_vmcs_ctrl(); + init_vmcs_host(); + init_vmcs_guest(); + return 0; +} + +void init_vmx(void) +{ + vmxon_region = alloc_page(); + memset(vmxon_region, 0, PAGE_SIZE); + + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); + basic.val = rdmsr(MSR_IA32_VMX_BASIC); + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN + : MSR_IA32_VMX_PINBASED_CTLS); + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT + : MSR_IA32_VMX_EXIT_CTLS); + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY + : MSR_IA32_VMX_ENTRY_CTLS); + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC + : MSR_IA32_VMX_PROCBASED_CTLS); + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE); + + *vmxon_region = basic.revision; + current = NULL; + + guest_stack = alloc_page(); + memset(guest_stack, 0, PAGE_SIZE); + guest_syscall_stack = alloc_page(); + memset(guest_syscall_stack, 0, PAGE_SIZE); + host_stack = alloc_page(); + memset(host_stack, 0, PAGE_SIZE); + host_syscall_stack = alloc_page(); + memset(host_syscall_stack, 0, PAGE_SIZE); +} + +int test_vmx_capability(void) +{ + struct cpuid r; + u64 ret1, ret2; + u64 ia32_feature_control; + r = cpuid(1); + ret1 = ((r.c) >> 5) & 1; + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + ret2 = ((ia32_feature_control & 0x5) == 0x5); + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + ret2 = ((ia32_feature_control & 0x5) == 0x5); + } + report("test vmx capability", ret1 & ret2); + return !(ret1 & ret2); +} + +int test_vmxon(void) +{ + int ret; + u64 rflags; + + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + set_rflags(rflags); + ret = vmx_on(); + report("test vmxon", !ret); + return ret; +} + +void test_vmptrld(void) +{ + u64 rflags; + struct vmcs *vmcs; + + vmcs = alloc_page(); + vmcs->revision_id = basic.revision; + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + set_rflags(rflags); + report("test vmptrld", make_vmcs_current(vmcs) == 0); +} + +void test_vmptrst(void) +{ + u64 rflags; + int ret; + struct vmcs *vmcs1, *vmcs2; + + vmcs1 = alloc_page(); + memset(vmcs1, 0, PAGE_SIZE); + init_vmcs(&vmcs1); + rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + set_rflags(rflags); + ret = save_vmcs(&vmcs2); + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); +} + +int test_run(struct vmx_test *test) +{ + if (test == NULL) { + printf("%s : test is NULL.\n", __func__); + return 1; + } + if (test->name == NULL) + test->name = "(no name)"; + if (vmx_on()) { + printf("%s : vmxon failed.\n", __func__); + return 2; + } + init_vmcs(&(test->vmcs)); + /* Directly call test->init is ok here, init_vmcs has done + vmcs init, vmclear and vmptrld*/ + if (test->init) + test->init(test->vmcs); + test->exits = 0; + current = test; + regs = test->guest_regs; + printf("\nTest suite : %s\n", test->name); + if (setjmp(env) == 0) { + vmx_run(); + /* Should not reach here */ + printf("%s : vmx_run failed.\n", __func__); + } + if (vmx_off()) { + printf("%s : vmxoff failed.\n", __func__); + return 2; + } + return 0; +} + +void vmenter_main() +{ + u64 rax; + u64 rsp, resume_rsp; + + report("test vmlaunch", 1); + + rax = 0; + asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp)); + asm volatile("mov %2, %%rax\n\t" + "vmcall\n\t" + "mov %%rax, %0\n\t" + "mov %%rsp, %1\n\t" + : "=r"(rax), "=r"(resume_rsp) + : "g"(0xABCD)); + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); +} + +int vmenter_exit_handler() +{ + u64 guest_rip; + ulong reason; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + if (reason == VMX_VMCALL) { + if (current->guest_regs.rax != 0xABCD) { + report("test vmresume", 0); + return VMX_HALT; + } + current->guest_regs.rax = 0xFFFF; + vmcs_write(GUEST_RIP, guest_rip + 3); + return VMX_RESUME; + } + return VMX_HALT; +} + + +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs + NULL means use default func */ +static struct vmx_test vmx_tests[] = { + { "null", NULL, NULL, NULL, NULL, {0} }, + { "vmenter", NULL, vmenter_main, vmenter_exit_handler, NULL, {0} }, +}; + +int main(void) +{ + int nr, i; + + setup_vm(); + setup_idt(); + + if (test_vmx_capability() != 0) { + printf("ERROR : vmx not supported, check +vmx option\n"); + goto exit; + } + init_vmx(); + if (test_vmxon() != 0) + goto exit; + test_vmptrld(); + test_vmclear(); + test_vmptrst(); + init_vmcs(&vmcs_root); + if (setjmp(env) == 0) { + vmx_run(); + /* Should not reach here */ + report("test vmlaunch", 0); + goto exit; + } + test_vmxoff(); + + nr = ARRAY_SIZE(vmx_tests); + for (i = 0; i < nr; ++i) { + if (test_run(&vmx_tests[i])) + goto exit; + } + +exit: + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); + return fails ? 1 : 0; +} diff --git a/x86/vmx.h b/x86/vmx.h new file mode 100644 index 0000000..7e412c5 --- /dev/null +++ b/x86/vmx.h @@ -0,0 +1,419 @@ +#ifndef __HYPERVISOR_H +#define __HYPERVISOR_H + +#include "libcflat.h" + +struct vmcs { + u32 revision_id; /* vmcs revision identifier */ + u32 abort; /* VMX-abort indicator */ + /* VMCS data */ + char data[0]; +}; + +struct regs { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 cr2; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; +}; + +struct vmx_test { + const char *name; + void (*init)(struct vmcs *vmcs); + void (*guest_main)(); + int (*exit_handler)(); + void (*syscall_handler)(u64 syscall_no); + struct regs guest_regs; + struct vmcs *vmcs; + int exits; +}; + +static union vmx_basic { + u64 val; + struct { + u32 revision; + u32 size:13, + : 3, + width:1, + dual:1, + type:4, + insouts:1, + ctrl:1; + }; +} basic; + +static union vmx_ctrl_pin { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_pin_rev; + +static union vmx_ctrl_cpu { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_cpu_rev[2]; + +static union vmx_ctrl_exit { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_exit_rev; + +static union vmx_ctrl_ent { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_enter_rev; + +static union vmx_ept_vpid { + u64 val; + struct { + u32:16, + super:2, + : 2, + invept:1, + : 11; + u32 invvpid:1; + }; +} ept_vpid; + +struct descr { + u16 limit; + u64 addr; +}; + +enum Encoding { + /* 16-Bit Control Fields */ + VPID = 0x0000ul, + /* Posted-interrupt notification vector */ + PINV = 0x0002ul, + /* EPTP index */ + EPTP_IDX = 0x0004ul, + + /* 16-Bit Guest State Fields */ + GUEST_SEL_ES = 0x0800ul, + GUEST_SEL_CS = 0x0802ul, + GUEST_SEL_SS = 0x0804ul, + GUEST_SEL_DS = 0x0806ul, + GUEST_SEL_FS = 0x0808ul, + GUEST_SEL_GS = 0x080aul, + GUEST_SEL_LDTR = 0x080cul, + GUEST_SEL_TR = 0x080eul, + GUEST_INT_STATUS = 0x0810ul, + + /* 16-Bit Host State Fields */ + HOST_SEL_ES = 0x0c00ul, + HOST_SEL_CS = 0x0c02ul, + HOST_SEL_SS = 0x0c04ul, + HOST_SEL_DS = 0x0c06ul, + HOST_SEL_FS = 0x0c08ul, + HOST_SEL_GS = 0x0c0aul, + HOST_SEL_TR = 0x0c0cul, + + /* 64-Bit Control Fields */ + IO_BITMAP_A = 0x2000ul, + IO_BITMAP_B = 0x2002ul, + MSR_BITMAP = 0x2004ul, + EXIT_MSR_ST_ADDR = 0x2006ul, + EXIT_MSR_LD_ADDR = 0x2008ul, + ENTER_MSR_LD_ADDR = 0x200aul, + VMCS_EXEC_PTR = 0x200cul, + TSC_OFFSET = 0x2010ul, + TSC_OFFSET_HI = 0x2011ul, + APIC_VIRT_ADDR = 0x2012ul, + APIC_ACCS_ADDR = 0x2014ul, + EPTP = 0x201aul, + EPTP_HI = 0x201bul, + + /* 64-Bit Readonly Data Field */ + INFO_PHYS_ADDR = 0x2400ul, + + /* 64-Bit Guest State */ + VMCS_LINK_PTR = 0x2800ul, + VMCS_LINK_PTR_HI = 0x2801ul, + GUEST_DEBUGCTL = 0x2802ul, + GUEST_DEBUGCTL_HI = 0x2803ul, + GUEST_EFER = 0x2806ul, + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, + GUEST_PDPTE = 0x280aul, + + /* 64-Bit Host State */ + HOST_EFER = 0x2c02ul, + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, + + /* 32-Bit Control Fields */ + PIN_CONTROLS = 0x4000ul, + CPU_EXEC_CTRL0 = 0x4002ul, + EXC_BITMAP = 0x4004ul, + PF_ERROR_MASK = 0x4006ul, + PF_ERROR_MATCH = 0x4008ul, + CR3_TARGET_COUNT = 0x400aul, + EXI_CONTROLS = 0x400cul, + EXI_MSR_ST_CNT = 0x400eul, + EXI_MSR_LD_CNT = 0x4010ul, + ENT_CONTROLS = 0x4012ul, + ENT_MSR_LD_CNT = 0x4014ul, + ENT_INTR_INFO = 0x4016ul, + ENT_INTR_ERROR = 0x4018ul, + ENT_INST_LEN = 0x401aul, + TPR_THRESHOLD = 0x401cul, + CPU_EXEC_CTRL1 = 0x401eul, + + /* 32-Bit R/O Data Fields */ + VMX_INST_ERROR = 0x4400ul, + EXI_REASON = 0x4402ul, + EXI_INTR_INFO = 0x4404ul, + EXI_INTR_ERROR = 0x4406ul, + IDT_VECT_INFO = 0x4408ul, + IDT_VECT_ERROR = 0x440aul, + EXI_INST_LEN = 0x440cul, + EXI_INST_INFO = 0x440eul, + + /* 32-Bit Guest State Fields */ + GUEST_LIMIT_ES = 0x4800ul, + GUEST_LIMIT_CS = 0x4802ul, + GUEST_LIMIT_SS = 0x4804ul, + GUEST_LIMIT_DS = 0x4806ul, + GUEST_LIMIT_FS = 0x4808ul, + GUEST_LIMIT_GS = 0x480aul, + GUEST_LIMIT_LDTR = 0x480cul, + GUEST_LIMIT_TR = 0x480eul, + GUEST_LIMIT_GDTR = 0x4810ul, + GUEST_LIMIT_IDTR = 0x4812ul, + GUEST_AR_ES = 0x4814ul, + GUEST_AR_CS = 0x4816ul, + GUEST_AR_SS = 0x4818ul, + GUEST_AR_DS = 0x481aul, + GUEST_AR_FS = 0x481cul, + GUEST_AR_GS = 0x481eul, + GUEST_AR_LDTR = 0x4820ul, + GUEST_AR_TR = 0x4822ul, + GUEST_INTR_STATE = 0x4824ul, + GUEST_ACTV_STATE = 0x4826ul, + GUEST_SMBASE = 0x4828ul, + GUEST_SYSENTER_CS = 0x482aul, + + /* 32-Bit Host State Fields */ + HOST_SYSENTER_CS = 0x4c00ul, + + /* Natural-Width Control Fields */ + CR0_MASK = 0x6000ul, + CR4_MASK = 0x6002ul, + CR0_READ_SHADOW = 0x6004ul, + CR4_READ_SHADOW = 0x6006ul, + CR3_TARGET_0 = 0x6008ul, + CR3_TARGET_1 = 0x600aul, + CR3_TARGET_2 = 0x600cul, + CR3_TARGET_3 = 0x600eul, + + /* Natural-Width R/O Data Fields */ + EXI_QUALIFICATION = 0x6400ul, + IO_RCX = 0x6402ul, + IO_RSI = 0x6404ul, + IO_RDI = 0x6406ul, + IO_RIP = 0x6408ul, + GUEST_LINEAR_ADDRESS = 0x640aul, + + /* Natural-Width Guest State Fields */ + GUEST_CR0 = 0x6800ul, + GUEST_CR3 = 0x6802ul, + GUEST_CR4 = 0x6804ul, + GUEST_BASE_ES = 0x6806ul, + GUEST_BASE_CS = 0x6808ul, + GUEST_BASE_SS = 0x680aul, + GUEST_BASE_DS = 0x680cul, + GUEST_BASE_FS = 0x680eul, + GUEST_BASE_GS = 0x6810ul, + GUEST_BASE_LDTR = 0x6812ul, + GUEST_BASE_TR = 0x6814ul, + GUEST_BASE_GDTR = 0x6816ul, + GUEST_BASE_IDTR = 0x6818ul, + GUEST_DR7 = 0x681aul, + GUEST_RSP = 0x681cul, + GUEST_RIP = 0x681eul, + GUEST_RFLAGS = 0x6820ul, + GUEST_PENDING_DEBUG = 0x6822ul, + GUEST_SYSENTER_ESP = 0x6824ul, + GUEST_SYSENTER_EIP = 0x6826ul, + + /* Natural-Width Host State Fields */ + HOST_CR0 = 0x6c00ul, + HOST_CR3 = 0x6c02ul, + HOST_CR4 = 0x6c04ul, + HOST_BASE_FS = 0x6c06ul, + HOST_BASE_GS = 0x6c08ul, + HOST_BASE_TR = 0x6c0aul, + HOST_BASE_GDTR = 0x6c0cul, + HOST_BASE_IDTR = 0x6c0eul, + HOST_SYSENTER_ESP = 0x6c10ul, + HOST_SYSENTER_EIP = 0x6c12ul, + HOST_RSP = 0x6c14ul, + HOST_RIP = 0x6c16ul +}; + +enum Reason { + VMX_EXC_NMI = 0, + VMX_EXTINT = 1, + VMX_TRIPLE_FAULT = 2, + VMX_INIT = 3, + VMX_SIPI = 4, + VMX_SMI_IO = 5, + VMX_SMI_OTHER = 6, + VMX_INTR_WINDOW = 7, + VMX_NMI_WINDOW = 8, + VMX_TASK_SWITCH = 9, + VMX_CPUID = 10, + VMX_GETSEC = 11, + VMX_HLT = 12, + VMX_INVD = 13, + VMX_INVLPG = 14, + VMX_RDPMC = 15, + VMX_RDTSC = 16, + VMX_RSM = 17, + VMX_VMCALL = 18, + VMX_VMCLEAR = 19, + VMX_VMLAUNCH = 20, + VMX_VMPTRLD = 21, + VMX_VMPTRST = 22, + VMX_VMREAD = 23, + VMX_VMRESUME = 24, + VMX_VMWRITE = 25, + VMX_VMXOFF = 26, + VMX_VMXON = 27, + VMX_CR = 28, + VMX_DR = 29, + VMX_IO = 30, + VMX_RDMSR = 31, + VMX_WRMSR = 32, + VMX_FAIL_STATE = 33, + VMX_FAIL_MSR = 34, + VMX_MWAIT = 36, + VMX_MTF = 37, + VMX_MONITOR = 39, + VMX_PAUSE = 40, + VMX_FAIL_MCHECK = 41, + VMX_TPR_THRESHOLD = 43, + VMX_APIC_ACCESS = 44, + VMX_GDTR_IDTR = 46, + VMX_LDTR_TR = 47, + VMX_EPT_VIOLATION = 48, + VMX_EPT_MISCONFIG = 49, + VMX_INVEPT = 50, + VMX_PREEMPT = 52, + VMX_INVVPID = 53, + VMX_WBINVD = 54, + VMX_XSETBV = 55 +}; + +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ + +enum Ctrl_exi { + EXI_HOST_64 = 1UL << 9, + EXI_LOAD_PERF = 1UL << 12, + EXI_INTA = 1UL << 15, + EXI_LOAD_EFER = 1UL << 21, +}; + +enum Ctrl_ent { + ENT_GUEST_64 = 1UL << 9, + ENT_LOAD_EFER = 1UL << 15, +}; + +enum Ctrl_pin { + PIN_EXTINT = 1ul << 0, + PIN_NMI = 1ul << 3, + PIN_VIRT_NMI = 1ul << 5, +}; + +enum Ctrl0 { + CPU_INTR_WINDOW = 1ul << 2, + CPU_HLT = 1ul << 7, + CPU_INVLPG = 1ul << 9, + CPU_CR3_LOAD = 1ul << 15, + CPU_CR3_STORE = 1ul << 16, + CPU_TPR_SHADOW = 1ul << 21, + CPU_NMI_WINDOW = 1ul << 22, + CPU_IO = 1ul << 24, + CPU_IO_BITMAP = 1ul << 25, + CPU_SECONDARY = 1ul << 31, +}; + +enum Ctrl1 { + CPU_EPT = 1ul << 1, + CPU_VPID = 1ul << 5, + CPU_URG = 1ul << 7, +}; + +#define SEL_NULL_DESC 0x0 +#define SEL_KERN_CODE_64 0x8 +#define SEL_KERN_DATA_64 0x10 +#define SEL_USER_CODE_64 0x18 +#define SEL_USER_DATA_64 0x20 +#define SEL_CODE_32 0x28 +#define SEL_DATA_32 0x30 +#define SEL_CODE_16 0x38 +#define SEL_DATA_16 0x40 +#define SEL_TSS_RUN 0x48 + +#define SAVE_GPR \ + "xchg %rax, regs\n\t" \ + "xchg %rbx, regs+0x8\n\t" \ + "xchg %rcx, regs+0x10\n\t" \ + "xchg %rdx, regs+0x18\n\t" \ + "xchg %rbp, regs+0x28\n\t" \ + "xchg %rsi, regs+0x30\n\t" \ + "xchg %rdi, regs+0x38\n\t" \ + "xchg %r8, regs+0x40\n\t" \ + "xchg %r9, regs+0x48\n\t" \ + "xchg %r10, regs+0x50\n\t" \ + "xchg %r11, regs+0x58\n\t" \ + "xchg %r12, regs+0x60\n\t" \ + "xchg %r13, regs+0x68\n\t" \ + "xchg %r14, regs+0x70\n\t" \ + "xchg %r15, regs+0x78\n\t" + +#define LOAD_GPR SAVE_GPR + +#define CR0_PE (1ul << 0) +#define CR0_PG (1ul << 31) +#define CR4_VMXE (1ul << 0) +#define CR4_PAE (1ul << 5) +#define CR4_PCIDE (1ul << 17) + +#define VMX_IO_SIZE_MASK 0x7 +#define _VMX_IO_BYTE 1 +#define _VMX_IO_WORD 2 +#define _VMX_IO_LONG 3 +#define VMX_IO_DIRECTION_MASK (1ul << 3) +#define VMX_IO_IN (1ul << 3) +#define VMX_IO_OUT 0 +#define VMX_IO_STRING (1ul << 4) +#define VMX_IO_REP (1ul << 5) +#define VMX_IO_OPRAND_DX (1ul << 6) +#define VMX_IO_PORT_MASK 0xFFFF0000 +#define VMX_IO_PORT_SHIFT 16 + +#define VMX_HALT 1 +#define VMX_EXIT 2 +#define VMX_RESUME 3 + +#endif +
This is the first version for VMX nested environment test case. It contains the basic VMX instructions test cases, including VMXON/ VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch also tests the basic execution routine in VMX nested environment and let the VM print "Hello World" to inform its successfully run. New files added: x86/vmx.h : contains all VMX related macro declerations x86/vmx.c : main file for VMX nested test case Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> --- config-x86-common.mak | 2 + config-x86_64.mak | 1 + lib/x86/msr.h | 5 + x86/cstart64.S | 4 + x86/unittests.cfg | 6 + x86/vmx.c | 676 +++++++++++++++++++++++++++++++++++++++++++++++++ x86/vmx.h | 419 ++++++++++++++++++++++++++++++ 7 files changed, 1113 insertions(+) create mode 100644 x86/vmx.c create mode 100644 x86/vmx.h