Message ID | 1375020009-6225-1-git-send-email-yzt356@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Gleb, It suddenly occured to me that this patch also fails to handle GUEST_RFLAGS when VMRESUME. I decide to remove rflags in struct regs since rflags can be read and set via vmcs_read/vmcs_write in test suited defined functions (init and exit_handler), and other general registers can only be set in the framework code. Then I will wait for Paolo and Gleb's furthur feedback and commit the final patch. Arthur On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote: > This is the first version of VMX nested environment. It contains the > basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/ > VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the > basic execution routine in VMX nested environment andlet the VM print > "Hello World" to inform its successfully run. > > The first release also includes a test suite for vmenter (vmlaunch and > vmresume). Besides, hypercall mechanism is included and currently it is > used to invoke VM normal exit. > > New files added: > x86/vmx.h : contains all VMX related macro declerations > x86/vmx.c : main file for VMX nested test case > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > --- > ChangeLog: > 1. Refine codes in function vmx_run() > 2. Fix bug of setting GUEST_RFLAGS > 3. Move defines of selectors to lib/x86/vm.h > 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c > 5. Move some inline functions to lib/x86/processor.h > 6. Move some inline functions (vmcs related) to x86/vmx.h > --- > config-x86-common.mak | 2 + > config-x86_64.mak | 1 + > lib/x86/msr.h | 5 + > lib/x86/processor.h | 15 ++ > lib/x86/vm.c | 4 - > lib/x86/vm.h | 21 ++ > x86/cstart64.S | 4 + > x86/unittests.cfg | 6 + > x86/vmx.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++ > x86/vmx.h | 466 ++++++++++++++++++++++++++++++++++ > 10 files changed, 1194 insertions(+), 4 deletions(-) > create mode 100644 x86/vmx.c > create mode 100644 x86/vmx.h > > diff --git a/config-x86-common.mak b/config-x86-common.mak > index 455032b..34a41e1 100644 > --- a/config-x86-common.mak > +++ b/config-x86-common.mak > @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o > > $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o > > +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o > + > arch_clean: > $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ > $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o > diff --git a/config-x86_64.mak b/config-x86_64.mak > index 4e525f5..bb8ee89 100644 > --- a/config-x86_64.mak > +++ b/config-x86_64.mak > @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ > $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ > $(TEST_DIR)/pcid.flat > tests += $(TEST_DIR)/svm.flat > +tests += $(TEST_DIR)/vmx.flat > > include config-x86-common.mak > diff --git a/lib/x86/msr.h b/lib/x86/msr.h > index 509a421..281255a 100644 > --- a/lib/x86/msr.h > +++ b/lib/x86/msr.h > @@ -396,6 +396,11 @@ > #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a > #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b > #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c > +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d > +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e > +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f > +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 > + > > /* AMD-V MSRs */ > > diff --git a/lib/x86/processor.h b/lib/x86/processor.h > index e46d8d0..f0c11cc 100644 > --- a/lib/x86/processor.h > +++ b/lib/x86/processor.h > @@ -307,4 +307,19 @@ static inline void safe_halt(void) > { > asm volatile("sti; hlt"); > } > + > +#ifdef __x86_64__ > +static inline u64 read_rflags(void) > +{ > + u64 r; > + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); > + return r; > +} > + > +static inline void write_rflags(u64 r) > +{ > + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); > +} > +#endif > + > #endif > diff --git a/lib/x86/vm.c b/lib/x86/vm.c > index 260ec45..188bf57 100644 > --- a/lib/x86/vm.c > +++ b/lib/x86/vm.c > @@ -9,10 +9,6 @@ > #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) > #endif > > -#define X86_CR0_PE 0x00000001 > -#define X86_CR0_WP 0x00010000 > -#define X86_CR0_PG 0x80000000 > -#define X86_CR4_PSE 0x00000010 > static void *free = 0; > static void *vfree_top = 0; > > diff --git a/lib/x86/vm.h b/lib/x86/vm.h > index 0b5b5c7..eff6f72 100644 > --- a/lib/x86/vm.h > +++ b/lib/x86/vm.h > @@ -16,6 +16,27 @@ > #define PTE_USER (1ull << 2) > #define PTE_ADDR (0xffffffffff000ull) > > +#define X86_CR0_PE 0x00000001 > +#define X86_CR0_WP 0x00010000 > +#define X86_CR0_PG 0x80000000 > +#define X86_CR4_VMXE 0x00000001 > +#define X86_CR4_PSE 0x00000010 > +#define X86_CR4_PAE 0x00000020 > +#define X86_CR4_PCIDE 0x00020000 > + > +#ifdef __x86_64__ > +#define SEL_NULL_DESC 0x0 > +#define SEL_KERN_CODE_64 0x8 > +#define SEL_KERN_DATA_64 0x10 > +#define SEL_USER_CODE_64 0x18 > +#define SEL_USER_DATA_64 0x20 > +#define SEL_CODE_32 0x28 > +#define SEL_DATA_32 0x30 > +#define SEL_CODE_16 0x38 > +#define SEL_DATA_16 0x40 > +#define SEL_TSS_RUN 0x48 > +#endif > + > void setup_vm(); > > void *vmalloc(unsigned long size); > diff --git a/x86/cstart64.S b/x86/cstart64.S > index 24df5f8..0fe76da 100644 > --- a/x86/cstart64.S > +++ b/x86/cstart64.S > @@ -4,6 +4,10 @@ > .globl boot_idt > boot_idt = 0 > > +.globl idt_descr > +.globl tss_descr > +.globl gdt64_desc > + > ipi_vector = 0x20 > > max_cpus = 64 > diff --git a/x86/unittests.cfg b/x86/unittests.cfg > index bc9643e..85c36aa 100644 > --- a/x86/unittests.cfg > +++ b/x86/unittests.cfg > @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" > file = pcid.flat > extra_params = -cpu qemu64,+pcid > arch = x86_64 > + > +[vmx] > +file = vmx.flat > +extra_params = -cpu host,+vmx > +arch = x86_64 > + > diff --git a/x86/vmx.c b/x86/vmx.c > new file mode 100644 > index 0000000..7467927 > --- /dev/null > +++ b/x86/vmx.c > @@ -0,0 +1,674 @@ > +#include "libcflat.h" > +#include "processor.h" > +#include "vm.h" > +#include "desc.h" > +#include "vmx.h" > +#include "msr.h" > +#include "smp.h" > +#include "io.h" > + > +int fails = 0, tests = 0; > +u32 *vmxon_region; > +struct vmcs *vmcs_root; > +u32 vpid_cnt; > +void *guest_stack, *guest_syscall_stack; > +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; > +ulong fix_cr0_set, fix_cr0_clr; > +ulong fix_cr4_set, fix_cr4_clr; > +struct regs regs; > +struct vmx_test *current; > +u64 hypercall_field = 0; > +bool launched; > + > +extern u64 gdt64_desc[]; > +extern u64 idt_descr[]; > +extern u64 tss_descr[]; > +extern void *vmx_return; > +extern void *entry_sysenter; > +extern void *guest_entry; > + > +static void report(const char *name, int result) > +{ > + ++tests; > + if (result) > + printf("PASS: %s\n", name); > + else { > + printf("FAIL: %s\n", name); > + ++fails; > + } > +} > + > +static int make_vmcs_current(struct vmcs *vmcs) > +{ > + bool ret; > + > + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > + return ret; > +} > + > +/* entry_sysenter */ > +asm( > + ".align 4, 0x90\n\t" > + ".globl entry_sysenter\n\t" > + "entry_sysenter:\n\t" > + SAVE_GPR > + " and $0xf, %rax\n\t" > + " mov %rax, %rdi\n\t" > + " call syscall_handler\n\t" > + LOAD_GPR > + " vmresume\n\t" > +); > + > +static void __attribute__((__used__)) syscall_handler(u64 syscall_no) > +{ > + current->syscall_handler(syscall_no); > +} > + > +static inline int vmx_on() > +{ > + bool ret; > + asm volatile ("vmxon %1; setbe %0\n\t" > + : "=q"(ret) : "m"(vmxon_region) : "cc"); > + return ret; > +} > + > +static inline int vmx_off() > +{ > + bool ret; > + asm volatile("vmxoff; setbe %0\n\t" > + : "=q"(ret) : : "cc"); > + return ret; > +} > + > +static void print_vmexit_info() > +{ > + u64 guest_rip, guest_rsp; > + ulong reason = vmcs_read(EXI_REASON) & 0xff; > + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); > + guest_rip = vmcs_read(GUEST_RIP); > + guest_rsp = vmcs_read(GUEST_RSP); > + printf("VMEXIT info:\n"); > + printf("\tvmexit reason = %d\n", reason); > + printf("\texit qualification = 0x%x\n", exit_qual); > + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); > + printf("\tguest_rip = 0x%llx\n", guest_rip); > + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", > + regs.rax, regs.rbx, regs.rcx, regs.rdx); > + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", > + guest_rsp, regs.rbp, regs.rsi, regs.rdi); > + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", > + regs.r8, regs.r9, regs.r10, regs.r11); > + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", > + regs.r12, regs.r13, regs.r14, regs.r15); > +} > + > +static void test_vmclear(void) > +{ > + u64 rflags; > + > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + write_rflags(rflags); > + report("test vmclear", vmcs_clear(vmcs_root) == 0); > +} > + > +static void test_vmxoff(void) > +{ > + int ret; > + u64 rflags; > + > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + write_rflags(rflags); > + ret = vmx_off(); > + report("test vmxoff", !ret); > +} > + > +static void __attribute__((__used__)) guest_main(void) > +{ > + current->guest_main(); > +} > + > +/* guest_entry */ > +asm( > + ".align 4, 0x90\n\t" > + ".globl entry_guest\n\t" > + "guest_entry:\n\t" > + " call guest_main\n\t" > + " mov $1, %edi\n\t" > + " call hypercall\n\t" > +); > + > +static void init_vmcs_ctrl(void) > +{ > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > + /* 26.2.1.1 */ > + vmcs_write(PIN_CONTROLS, ctrl_pin); > + /* Disable VMEXIT of IO instruction */ > + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { > + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; > + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); > + } > + vmcs_write(CR3_TARGET_COUNT, 0); > + vmcs_write(VPID, ++vpid_cnt); > +} > + > +static void init_vmcs_host(void) > +{ > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > + /* 26.2.1.2 */ > + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); > + > + /* 26.2.1.3 */ > + vmcs_write(ENT_CONTROLS, ctrl_enter); > + vmcs_write(EXI_CONTROLS, ctrl_exit); > + > + /* 26.2.2 */ > + vmcs_write(HOST_CR0, read_cr0()); > + vmcs_write(HOST_CR3, read_cr3()); > + vmcs_write(HOST_CR4, read_cr4()); > + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); > + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); > + > + /* 26.2.3 */ > + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); > + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); > + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); > + vmcs_write(HOST_BASE_TR, (u64)tss_descr); > + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); > + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); > + vmcs_write(HOST_BASE_FS, 0); > + vmcs_write(HOST_BASE_GS, 0); > + > + /* Set other vmcs area */ > + vmcs_write(PF_ERROR_MASK, 0); > + vmcs_write(PF_ERROR_MATCH, 0); > + vmcs_write(VMCS_LINK_PTR, ~0ul); > + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); > + vmcs_write(HOST_RIP, (u64)(&vmx_return)); > +} > + > +static void init_vmcs_guest(void) > +{ > + /* 26.3 CHECKING AND LOADING GUEST STATE */ > + ulong guest_cr0, guest_cr4, guest_cr3; > + /* 26.3.1.1 */ > + guest_cr0 = read_cr0(); > + guest_cr4 = read_cr4(); > + guest_cr3 = read_cr3(); > + if (ctrl_enter & ENT_GUEST_64) { > + guest_cr0 |= X86_CR0_PG; > + guest_cr4 |= X86_CR4_PAE; > + } > + if ((ctrl_enter & ENT_GUEST_64) == 0) > + guest_cr4 &= (~X86_CR4_PCIDE); > + if (guest_cr0 & X86_CR0_PG) > + guest_cr0 |= X86_CR0_PE; > + vmcs_write(GUEST_CR0, guest_cr0); > + vmcs_write(GUEST_CR3, guest_cr3); > + vmcs_write(GUEST_CR4, guest_cr4); > + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); > + vmcs_write(GUEST_SYSENTER_ESP, > + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); > + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); > + vmcs_write(GUEST_DR7, 0); > + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); > + > + /* 26.3.1.2 */ > + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); > + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); > + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); > + vmcs_write(GUEST_SEL_LDTR, 0); > + > + vmcs_write(GUEST_BASE_CS, 0); > + vmcs_write(GUEST_BASE_ES, 0); > + vmcs_write(GUEST_BASE_SS, 0); > + vmcs_write(GUEST_BASE_DS, 0); > + vmcs_write(GUEST_BASE_FS, 0); > + vmcs_write(GUEST_BASE_GS, 0); > + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); > + vmcs_write(GUEST_BASE_LDTR, 0); > + > + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); > + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); > + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); > + > + vmcs_write(GUEST_AR_CS, 0xa09b); > + vmcs_write(GUEST_AR_DS, 0xc093); > + vmcs_write(GUEST_AR_ES, 0xc093); > + vmcs_write(GUEST_AR_FS, 0xc093); > + vmcs_write(GUEST_AR_GS, 0xc093); > + vmcs_write(GUEST_AR_SS, 0xc093); > + vmcs_write(GUEST_AR_LDTR, 0x82); > + vmcs_write(GUEST_AR_TR, 0x8b); > + > + /* 26.3.1.3 */ > + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); > + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); > + vmcs_write(GUEST_LIMIT_GDTR, > + ((struct descr *)gdt64_desc)->limit & 0xffff); > + vmcs_write(GUEST_LIMIT_IDTR, > + ((struct descr *)idt_descr)->limit & 0xffff); > + > + /* 26.3.1.4 */ > + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); > + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); > + vmcs_write(GUEST_RFLAGS, 0x2); > + > + /* 26.3.1.5 */ > + vmcs_write(GUEST_ACTV_STATE, 0); > + vmcs_write(GUEST_INTR_STATE, 0); > +} > + > +static int init_vmcs(struct vmcs **vmcs) > +{ > + *vmcs = alloc_page(); > + memset(*vmcs, 0, PAGE_SIZE); > + (*vmcs)->revision_id = basic.revision; > + /* vmclear first to init vmcs */ > + if (vmcs_clear(*vmcs)) { > + printf("%s : vmcs_clear error\n", __func__); > + return 1; > + } > + > + if (make_vmcs_current(*vmcs)) { > + printf("%s : make_vmcs_current error\n", __func__); > + return 1; > + } > + > + /* All settings to pin/exit/enter/cpu > + control fields should be placed here */ > + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; > + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; > + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); > + ctrl_cpu[0] |= CPU_HLT; > + /* DIsable IO instruction VMEXIT now */ > + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); > + ctrl_cpu[1] = 0; > + > + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; > + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; > + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; > + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; > + > + init_vmcs_ctrl(); > + init_vmcs_host(); > + init_vmcs_guest(); > + return 0; > +} > + > +static void init_vmx(void) > +{ > + vmxon_region = alloc_page(); > + memset(vmxon_region, 0, PAGE_SIZE); > + > + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); > + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); > + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); > + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); > + basic.val = rdmsr(MSR_IA32_VMX_BASIC); > + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN > + : MSR_IA32_VMX_PINBASED_CTLS); > + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT > + : MSR_IA32_VMX_EXIT_CTLS); > + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY > + : MSR_IA32_VMX_ENTRY_CTLS); > + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC > + : MSR_IA32_VMX_PROCBASED_CTLS); > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) > + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); > + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) > + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); > + > + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); > + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); > + > + *vmxon_region = basic.revision; > + > + guest_stack = alloc_page(); > + memset(guest_stack, 0, PAGE_SIZE); > + guest_syscall_stack = alloc_page(); > + memset(guest_syscall_stack, 0, PAGE_SIZE); > +} > + > +static int test_vmx_capability(void) > +{ > + struct cpuid r; > + u64 ret1, ret2; > + u64 ia32_feature_control; > + r = cpuid(1); > + ret1 = ((r.c) >> 5) & 1; > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { > + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > + } > + report("test vmx capability", ret1 & ret2); > + return !(ret1 & ret2); > +} > + > +static int test_vmxon(void) > +{ > + int ret; > + u64 rflags; > + > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + write_rflags(rflags); > + ret = vmx_on(); > + report("test vmxon", !ret); > + return ret; > +} > + > +static void test_vmptrld(void) > +{ > + u64 rflags; > + struct vmcs *vmcs; > + > + vmcs = alloc_page(); > + vmcs->revision_id = basic.revision; > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + write_rflags(rflags); > + report("test vmptrld", make_vmcs_current(vmcs) == 0); > +} > + > +static void test_vmptrst(void) > +{ > + u64 rflags; > + int ret; > + struct vmcs *vmcs1, *vmcs2; > + > + vmcs1 = alloc_page(); > + memset(vmcs1, 0, PAGE_SIZE); > + init_vmcs(&vmcs1); > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > + write_rflags(rflags); > + ret = vmcs_save(&vmcs2); > + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); > +} > + > +/* This function can only be called in guest */ > +static void __attribute__((__used__)) hypercall(u32 hypercall_no) > +{ > + u64 val = 0; > + val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; > + hypercall_field = val; > + asm volatile("vmcall\n\t"); > +} > + > +static bool is_hypercall() > +{ > + ulong reason, hyper_bit; > + > + reason = vmcs_read(EXI_REASON) & 0xff; > + hyper_bit = hypercall_field & HYPERCALL_BIT; > + if (reason == VMX_VMCALL && hyper_bit) > + return true; > + return false; > +} > + > +static int handle_hypercall() > +{ > + ulong hypercall_no; > + > + hypercall_no = hypercall_field & HYPERCALL_MASK; > + hypercall_field = 0; > + switch (hypercall_no) { > + case HYPERCALL_VMEXIT: > + return VMX_TEST_VMEXIT; > + default: > + printf("ERROR : Invalid hypercall number : %d\n", hypercall_no); > + } > + return VMX_TEST_EXIT; > +} > + > +static int exit_handler() > +{ > + int ret; > + > + current->exits++; > + current->guest_regs = regs; > + if (is_hypercall()) > + ret = handle_hypercall(); > + else > + ret = current->exit_handler(); > + regs = current->guest_regs; > + switch (ret) { > + case VMX_TEST_VMEXIT: > + case VMX_TEST_RESUME: > + return ret; > + case VMX_TEST_EXIT: > + break; > + default: > + printf("ERROR : Invalid exit_handler return val %d.\n" > + , ret); > + } > + print_vmexit_info(); > + exit(-1); > + return 0; > +} > + > +static int vmx_run() > +{ > + u32 ret = 0, fail = 0; > + > + while (1) { > + asm volatile ( > + "mov %%rsp, %%rsi\n\t" > + "mov %2, %%rdi\n\t" > + "vmwrite %%rsi, %%rdi\n\t" > + > + LOAD_GPR_C > + "cmpl $0, %1\n\t" > + "jne 1f\n\t" > + LOAD_RFLAGS > + "vmlaunch\n\t" > + "jmp 2f\n\t" > + "1: " > + "vmresume\n\t" > + "2: " > + "setbe %0\n\t" > + "vmx_return:\n\t" > + SAVE_GPR_C > + SAVE_RFLAGS > + : "=m"(fail) > + : "m"(launched), "i"(HOST_RSP) > + : "rdi", "rsi", "memory", "cc" > + > + ); > + if (fail) > + ret = launched ? VMX_TEST_RESUME_ERR : > + VMX_TEST_LAUNCH_ERR; > + else { > + launched = 1; > + ret = exit_handler(); > + } > + if (ret != VMX_TEST_RESUME) > + break; > + } > + launched = 0; > + switch (ret) { > + case VMX_TEST_VMEXIT: > + return 0; > + case VMX_TEST_LAUNCH_ERR: > + printf("%s : vmlaunch failed.\n", __func__); > + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) > + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) > + printf("\tvmlaunch set wrong flags\n"); > + report("test vmlaunch", 0); > + break; > + case VMX_TEST_RESUME_ERR: > + printf("%s : vmresume failed.\n", __func__); > + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) > + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) > + printf("\tvmresume set wrong flags\n"); > + report("test vmresume", 0); > + break; > + default: > + printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret); > + break; > + } > + return 1; > +} > + > +static int test_run(struct vmx_test *test) > +{ > + if (test->name == NULL) > + test->name = "(no name)"; > + if (vmx_on()) { > + printf("%s : vmxon failed.\n", __func__); > + return 1; > + } > + init_vmcs(&(test->vmcs)); > + /* Directly call test->init is ok here, init_vmcs has done > + vmcs init, vmclear and vmptrld*/ > + if (test->init) > + test->init(test->vmcs); > + test->exits = 0; > + current = test; > + regs = test->guest_regs; > + vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); > + launched = 0; > + printf("\nTest suite : %s\n", test->name); > + vmx_run(); > + if (vmx_off()) { > + printf("%s : vmxoff failed.\n", __func__); > + return 1; > + } > + return 0; > +} > + > +static void basic_init() > +{ > +} > + > +static void basic_guest_main() > +{ > + /* Here is null guest_main, print Hello World */ > + printf("\tHello World, this is null_guest_main!\n"); > +} > + > +static int basic_exit_handler() > +{ > + u64 guest_rip; > + ulong reason; > + > + guest_rip = vmcs_read(GUEST_RIP); > + reason = vmcs_read(EXI_REASON) & 0xff; > + > + switch (reason) { > + case VMX_VMCALL: > + print_vmexit_info(); > + vmcs_write(GUEST_RIP, guest_rip + 3); > + return VMX_TEST_RESUME; > + default: > + break; > + } > + printf("ERROR : Unhandled vmx exit.\n"); > + print_vmexit_info(); > + return VMX_TEST_EXIT; > +} > + > +static void basic_syscall_handler(u64 syscall_no) > +{ > +} > + > +static void vmenter_main() > +{ > + u64 rax; > + u64 rsp, resume_rsp; > + > + report("test vmlaunch", 1); > + > + asm volatile( > + "mov %%rsp, %0\n\t" > + "mov %3, %%rax\n\t" > + "vmcall\n\t" > + "mov %%rax, %1\n\t" > + "mov %%rsp, %2\n\t" > + : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) > + : "g"(0xABCD)); > + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); > +} > + > +static int vmenter_exit_handler() > +{ > + u64 guest_rip; > + ulong reason; > + > + guest_rip = vmcs_read(GUEST_RIP); > + reason = vmcs_read(EXI_REASON) & 0xff; > + switch (reason) { > + case VMX_VMCALL: > + if (current->guest_regs.rax != 0xABCD) { > + report("test vmresume", 0); > + return VMX_TEST_VMEXIT; > + } > + current->guest_regs.rax = 0xFFFF; > + vmcs_write(GUEST_RIP, guest_rip + 3); > + return VMX_TEST_RESUME; > + default: > + report("test vmresume", 0); > + print_vmexit_info(); > + } > + return VMX_TEST_VMEXIT; > +} > + > + > +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs > + basic_* just implement some basic functions */ > +static struct vmx_test vmx_tests[] = { > + { "null", basic_init, basic_guest_main, basic_exit_handler, > + basic_syscall_handler, {0} }, > + { "vmenter", basic_init, vmenter_main, vmenter_exit_handler, > + basic_syscall_handler, {0} }, > +}; > + > +int main(void) > +{ > + int i; > + > + setup_vm(); > + setup_idt(); > + > + if (test_vmx_capability() != 0) { > + printf("ERROR : vmx not supported, check +vmx option\n"); > + goto exit; > + } > + init_vmx(); > + /* Set basic test ctxt the same as "null" */ > + current = &vmx_tests[0]; > + if (test_vmxon() != 0) > + goto exit; > + test_vmptrld(); > + test_vmclear(); > + test_vmptrst(); > + init_vmcs(&vmcs_root); > + if (vmx_run()) { > + report("test vmlaunch", 0); > + goto exit; > + } > + test_vmxoff(); > + > + for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) { > + if (test_run(&vmx_tests[i])) > + goto exit; > + } > + > +exit: > + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); > + return fails ? 1 : 0; > +} > diff --git a/x86/vmx.h b/x86/vmx.h > new file mode 100644 > index 0000000..1fb9738 > --- /dev/null > +++ b/x86/vmx.h > @@ -0,0 +1,466 @@ > +#ifndef __HYPERVISOR_H > +#define __HYPERVISOR_H > + > +#include "libcflat.h" > + > +struct vmcs { > + u32 revision_id; /* vmcs revision identifier */ > + u32 abort; /* VMX-abort indicator */ > + /* VMCS data */ > + char data[0]; > +}; > + > +struct regs { > + u64 rax; > + u64 rcx; > + u64 rdx; > + u64 rbx; > + u64 cr2; > + u64 rbp; > + u64 rsi; > + u64 rdi; > + u64 r8; > + u64 r9; > + u64 r10; > + u64 r11; > + u64 r12; > + u64 r13; > + u64 r14; > + u64 r15; > + u64 rflags; > +}; > + > +struct vmx_test { > + const char *name; > + void (*init)(struct vmcs *vmcs); > + void (*guest_main)(); > + int (*exit_handler)(); > + void (*syscall_handler)(u64 syscall_no); > + struct regs guest_regs; > + struct vmcs *vmcs; > + int exits; > +}; > + > +static union vmx_basic { > + u64 val; > + struct { > + u32 revision; > + u32 size:13, > + : 3, > + width:1, > + dual:1, > + type:4, > + insouts:1, > + ctrl:1; > + }; > +} basic; > + > +static union vmx_ctrl_pin { > + u64 val; > + struct { > + u32 set, clr; > + }; > +} ctrl_pin_rev; > + > +static union vmx_ctrl_cpu { > + u64 val; > + struct { > + u32 set, clr; > + }; > +} ctrl_cpu_rev[2]; > + > +static union vmx_ctrl_exit { > + u64 val; > + struct { > + u32 set, clr; > + }; > +} ctrl_exit_rev; > + > +static union vmx_ctrl_ent { > + u64 val; > + struct { > + u32 set, clr; > + }; > +} ctrl_enter_rev; > + > +static union vmx_ept_vpid { > + u64 val; > + struct { > + u32:16, > + super:2, > + : 2, > + invept:1, > + : 11; > + u32 invvpid:1; > + }; > +} ept_vpid; > + > +struct descr { > + u16 limit; > + u64 addr; > +}; > + > +enum Encoding { > + /* 16-Bit Control Fields */ > + VPID = 0x0000ul, > + /* Posted-interrupt notification vector */ > + PINV = 0x0002ul, > + /* EPTP index */ > + EPTP_IDX = 0x0004ul, > + > + /* 16-Bit Guest State Fields */ > + GUEST_SEL_ES = 0x0800ul, > + GUEST_SEL_CS = 0x0802ul, > + GUEST_SEL_SS = 0x0804ul, > + GUEST_SEL_DS = 0x0806ul, > + GUEST_SEL_FS = 0x0808ul, > + GUEST_SEL_GS = 0x080aul, > + GUEST_SEL_LDTR = 0x080cul, > + GUEST_SEL_TR = 0x080eul, > + GUEST_INT_STATUS = 0x0810ul, > + > + /* 16-Bit Host State Fields */ > + HOST_SEL_ES = 0x0c00ul, > + HOST_SEL_CS = 0x0c02ul, > + HOST_SEL_SS = 0x0c04ul, > + HOST_SEL_DS = 0x0c06ul, > + HOST_SEL_FS = 0x0c08ul, > + HOST_SEL_GS = 0x0c0aul, > + HOST_SEL_TR = 0x0c0cul, > + > + /* 64-Bit Control Fields */ > + IO_BITMAP_A = 0x2000ul, > + IO_BITMAP_B = 0x2002ul, > + MSR_BITMAP = 0x2004ul, > + EXIT_MSR_ST_ADDR = 0x2006ul, > + EXIT_MSR_LD_ADDR = 0x2008ul, > + ENTER_MSR_LD_ADDR = 0x200aul, > + VMCS_EXEC_PTR = 0x200cul, > + TSC_OFFSET = 0x2010ul, > + TSC_OFFSET_HI = 0x2011ul, > + APIC_VIRT_ADDR = 0x2012ul, > + APIC_ACCS_ADDR = 0x2014ul, > + EPTP = 0x201aul, > + EPTP_HI = 0x201bul, > + > + /* 64-Bit Readonly Data Field */ > + INFO_PHYS_ADDR = 0x2400ul, > + > + /* 64-Bit Guest State */ > + VMCS_LINK_PTR = 0x2800ul, > + VMCS_LINK_PTR_HI = 0x2801ul, > + GUEST_DEBUGCTL = 0x2802ul, > + GUEST_DEBUGCTL_HI = 0x2803ul, > + GUEST_EFER = 0x2806ul, > + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, > + GUEST_PDPTE = 0x280aul, > + > + /* 64-Bit Host State */ > + HOST_EFER = 0x2c02ul, > + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, > + > + /* 32-Bit Control Fields */ > + PIN_CONTROLS = 0x4000ul, > + CPU_EXEC_CTRL0 = 0x4002ul, > + EXC_BITMAP = 0x4004ul, > + PF_ERROR_MASK = 0x4006ul, > + PF_ERROR_MATCH = 0x4008ul, > + CR3_TARGET_COUNT = 0x400aul, > + EXI_CONTROLS = 0x400cul, > + EXI_MSR_ST_CNT = 0x400eul, > + EXI_MSR_LD_CNT = 0x4010ul, > + ENT_CONTROLS = 0x4012ul, > + ENT_MSR_LD_CNT = 0x4014ul, > + ENT_INTR_INFO = 0x4016ul, > + ENT_INTR_ERROR = 0x4018ul, > + ENT_INST_LEN = 0x401aul, > + TPR_THRESHOLD = 0x401cul, > + CPU_EXEC_CTRL1 = 0x401eul, > + > + /* 32-Bit R/O Data Fields */ > + VMX_INST_ERROR = 0x4400ul, > + EXI_REASON = 0x4402ul, > + EXI_INTR_INFO = 0x4404ul, > + EXI_INTR_ERROR = 0x4406ul, > + IDT_VECT_INFO = 0x4408ul, > + IDT_VECT_ERROR = 0x440aul, > + EXI_INST_LEN = 0x440cul, > + EXI_INST_INFO = 0x440eul, > + > + /* 32-Bit Guest State Fields */ > + GUEST_LIMIT_ES = 0x4800ul, > + GUEST_LIMIT_CS = 0x4802ul, > + GUEST_LIMIT_SS = 0x4804ul, > + GUEST_LIMIT_DS = 0x4806ul, > + GUEST_LIMIT_FS = 0x4808ul, > + GUEST_LIMIT_GS = 0x480aul, > + GUEST_LIMIT_LDTR = 0x480cul, > + GUEST_LIMIT_TR = 0x480eul, > + GUEST_LIMIT_GDTR = 0x4810ul, > + GUEST_LIMIT_IDTR = 0x4812ul, > + GUEST_AR_ES = 0x4814ul, > + GUEST_AR_CS = 0x4816ul, > + GUEST_AR_SS = 0x4818ul, > + GUEST_AR_DS = 0x481aul, > + GUEST_AR_FS = 0x481cul, > + GUEST_AR_GS = 0x481eul, > + GUEST_AR_LDTR = 0x4820ul, > + GUEST_AR_TR = 0x4822ul, > + GUEST_INTR_STATE = 0x4824ul, > + GUEST_ACTV_STATE = 0x4826ul, > + GUEST_SMBASE = 0x4828ul, > + GUEST_SYSENTER_CS = 0x482aul, > + > + /* 32-Bit Host State Fields */ > + HOST_SYSENTER_CS = 0x4c00ul, > + > + /* Natural-Width Control Fields */ > + CR0_MASK = 0x6000ul, > + CR4_MASK = 0x6002ul, > + CR0_READ_SHADOW = 0x6004ul, > + CR4_READ_SHADOW = 0x6006ul, > + CR3_TARGET_0 = 0x6008ul, > + CR3_TARGET_1 = 0x600aul, > + CR3_TARGET_2 = 0x600cul, > + CR3_TARGET_3 = 0x600eul, > + > + /* Natural-Width R/O Data Fields */ > + EXI_QUALIFICATION = 0x6400ul, > + IO_RCX = 0x6402ul, > + IO_RSI = 0x6404ul, > + IO_RDI = 0x6406ul, > + IO_RIP = 0x6408ul, > + GUEST_LINEAR_ADDRESS = 0x640aul, > + > + /* Natural-Width Guest State Fields */ > + GUEST_CR0 = 0x6800ul, > + GUEST_CR3 = 0x6802ul, > + GUEST_CR4 = 0x6804ul, > + GUEST_BASE_ES = 0x6806ul, > + GUEST_BASE_CS = 0x6808ul, > + GUEST_BASE_SS = 0x680aul, > + GUEST_BASE_DS = 0x680cul, > + GUEST_BASE_FS = 0x680eul, > + GUEST_BASE_GS = 0x6810ul, > + GUEST_BASE_LDTR = 0x6812ul, > + GUEST_BASE_TR = 0x6814ul, > + GUEST_BASE_GDTR = 0x6816ul, > + GUEST_BASE_IDTR = 0x6818ul, > + GUEST_DR7 = 0x681aul, > + GUEST_RSP = 0x681cul, > + GUEST_RIP = 0x681eul, > + GUEST_RFLAGS = 0x6820ul, > + GUEST_PENDING_DEBUG = 0x6822ul, > + GUEST_SYSENTER_ESP = 0x6824ul, > + GUEST_SYSENTER_EIP = 0x6826ul, > + > + /* Natural-Width Host State Fields */ > + HOST_CR0 = 0x6c00ul, > + HOST_CR3 = 0x6c02ul, > + HOST_CR4 = 0x6c04ul, > + HOST_BASE_FS = 0x6c06ul, > + HOST_BASE_GS = 0x6c08ul, > + HOST_BASE_TR = 0x6c0aul, > + HOST_BASE_GDTR = 0x6c0cul, > + HOST_BASE_IDTR = 0x6c0eul, > + HOST_SYSENTER_ESP = 0x6c10ul, > + HOST_SYSENTER_EIP = 0x6c12ul, > + HOST_RSP = 0x6c14ul, > + HOST_RIP = 0x6c16ul > +}; > + > +enum Reason { > + VMX_EXC_NMI = 0, > + VMX_EXTINT = 1, > + VMX_TRIPLE_FAULT = 2, > + VMX_INIT = 3, > + VMX_SIPI = 4, > + VMX_SMI_IO = 5, > + VMX_SMI_OTHER = 6, > + VMX_INTR_WINDOW = 7, > + VMX_NMI_WINDOW = 8, > + VMX_TASK_SWITCH = 9, > + VMX_CPUID = 10, > + VMX_GETSEC = 11, > + VMX_HLT = 12, > + VMX_INVD = 13, > + VMX_INVLPG = 14, > + VMX_RDPMC = 15, > + VMX_RDTSC = 16, > + VMX_RSM = 17, > + VMX_VMCALL = 18, > + VMX_VMCLEAR = 19, > + VMX_VMLAUNCH = 20, > + VMX_VMPTRLD = 21, > + VMX_VMPTRST = 22, > + VMX_VMREAD = 23, > + VMX_VMRESUME = 24, > + VMX_VMWRITE = 25, > + VMX_VMXOFF = 26, > + VMX_VMXON = 27, > + VMX_CR = 28, > + VMX_DR = 29, > + VMX_IO = 30, > + VMX_RDMSR = 31, > + VMX_WRMSR = 32, > + VMX_FAIL_STATE = 33, > + VMX_FAIL_MSR = 34, > + VMX_MWAIT = 36, > + VMX_MTF = 37, > + VMX_MONITOR = 39, > + VMX_PAUSE = 40, > + VMX_FAIL_MCHECK = 41, > + VMX_TPR_THRESHOLD = 43, > + VMX_APIC_ACCESS = 44, > + VMX_GDTR_IDTR = 46, > + VMX_LDTR_TR = 47, > + VMX_EPT_VIOLATION = 48, > + VMX_EPT_MISCONFIG = 49, > + VMX_INVEPT = 50, > + VMX_PREEMPT = 52, > + VMX_INVVPID = 53, > + VMX_WBINVD = 54, > + VMX_XSETBV = 55 > +}; > + > +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ > +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ > + > +enum Ctrl_exi { > + EXI_HOST_64 = 1UL << 9, > + EXI_LOAD_PERF = 1UL << 12, > + EXI_INTA = 1UL << 15, > + EXI_LOAD_EFER = 1UL << 21, > +}; > + > +enum Ctrl_ent { > + ENT_GUEST_64 = 1UL << 9, > + ENT_LOAD_EFER = 1UL << 15, > +}; > + > +enum Ctrl_pin { > + PIN_EXTINT = 1ul << 0, > + PIN_NMI = 1ul << 3, > + PIN_VIRT_NMI = 1ul << 5, > +}; > + > +enum Ctrl0 { > + CPU_INTR_WINDOW = 1ul << 2, > + CPU_HLT = 1ul << 7, > + CPU_INVLPG = 1ul << 9, > + CPU_CR3_LOAD = 1ul << 15, > + CPU_CR3_STORE = 1ul << 16, > + CPU_TPR_SHADOW = 1ul << 21, > + CPU_NMI_WINDOW = 1ul << 22, > + CPU_IO = 1ul << 24, > + CPU_IO_BITMAP = 1ul << 25, > + CPU_SECONDARY = 1ul << 31, > +}; > + > +enum Ctrl1 { > + CPU_EPT = 1ul << 1, > + CPU_VPID = 1ul << 5, > + CPU_URG = 1ul << 7, > +}; > + > +#define SAVE_GPR \ > + "xchg %rax, regs\n\t" \ > + "xchg %rbx, regs+0x8\n\t" \ > + "xchg %rcx, regs+0x10\n\t" \ > + "xchg %rdx, regs+0x18\n\t" \ > + "xchg %rbp, regs+0x28\n\t" \ > + "xchg %rsi, regs+0x30\n\t" \ > + "xchg %rdi, regs+0x38\n\t" \ > + "xchg %r8, regs+0x40\n\t" \ > + "xchg %r9, regs+0x48\n\t" \ > + "xchg %r10, regs+0x50\n\t" \ > + "xchg %r11, regs+0x58\n\t" \ > + "xchg %r12, regs+0x60\n\t" \ > + "xchg %r13, regs+0x68\n\t" \ > + "xchg %r14, regs+0x70\n\t" \ > + "xchg %r15, regs+0x78\n\t" > + > +#define LOAD_GPR SAVE_GPR > + > +#define SAVE_GPR_C \ > + "xchg %%rax, regs\n\t" \ > + "xchg %%rbx, regs+0x8\n\t" \ > + "xchg %%rcx, regs+0x10\n\t" \ > + "xchg %%rdx, regs+0x18\n\t" \ > + "xchg %%rbp, regs+0x28\n\t" \ > + "xchg %%rsi, regs+0x30\n\t" \ > + "xchg %%rdi, regs+0x38\n\t" \ > + "xchg %%r8, regs+0x40\n\t" \ > + "xchg %%r9, regs+0x48\n\t" \ > + "xchg %%r10, regs+0x50\n\t" \ > + "xchg %%r11, regs+0x58\n\t" \ > + "xchg %%r12, regs+0x60\n\t" \ > + "xchg %%r13, regs+0x68\n\t" \ > + "xchg %%r14, regs+0x70\n\t" \ > + "xchg %%r15, regs+0x78\n\t" > + > +#define LOAD_GPR_C SAVE_GPR_C > + > +#define SAVE_RFLAGS \ > + "pushf\n\t" \ > + "pop regs+0x80\n\t" > + > +#define LOAD_RFLAGS \ > + "push regs+0x80\n\t" \ > + "popf\n\t" > + > +#define VMX_IO_SIZE_MASK 0x7 > +#define _VMX_IO_BYTE 1 > +#define _VMX_IO_WORD 2 > +#define _VMX_IO_LONG 3 > +#define VMX_IO_DIRECTION_MASK (1ul << 3) > +#define VMX_IO_IN (1ul << 3) > +#define VMX_IO_OUT 0 > +#define VMX_IO_STRING (1ul << 4) > +#define VMX_IO_REP (1ul << 5) > +#define VMX_IO_OPRAND_DX (1ul << 6) > +#define VMX_IO_PORT_MASK 0xFFFF0000 > +#define VMX_IO_PORT_SHIFT 16 > + > +#define VMX_TEST_VMEXIT 1 > +#define VMX_TEST_EXIT 2 > +#define VMX_TEST_RESUME 3 > +#define VMX_TEST_LAUNCH_ERR 4 > +#define VMX_TEST_RESUME_ERR 5 > + > +#define HYPERCALL_BIT (1ul << 12) > +#define HYPERCALL_MASK 0xFFF > +#define HYPERCALL_VMEXIT 0x1 > + > +static inline int vmcs_clear(struct vmcs *vmcs) > +{ > + bool ret; > + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > + return ret; > +} > + > +static inline u64 vmcs_read(enum Encoding enc) > +{ > + u64 val; > + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); > + return val; > +} > + > +static inline int vmcs_write(enum Encoding enc, u64 val) > +{ > + bool ret; > + asm volatile ("vmwrite %1, %2; setbe %0" > + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); > + return ret; > +} > + > +static inline int vmcs_save(struct vmcs **vmcs) > +{ > + bool ret; > + > + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); > + return ret; > +} > + > +#endif > + > -- > 1.7.9.5 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Jul 28, 2013 at 10:24 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote: > Hi Gleb, > > It suddenly occured to me that this patch also fails to handle > GUEST_RFLAGS when VMRESUME. > > I decide to remove rflags in struct regs since rflags can be read and > set via vmcs_read/vmcs_write in test suited defined functions (init > and exit_handler), and other general registers can only be set in the > framework code. Besides, I previously used regs.rflags as host rflags (in vmx_run()), so I changed it to regs.host_rflags and avoid confusion. In the previous version, regs.rflags is also not used in SAVE_GPR and LOAD_GPR, so it is reasonable to leave it for user to set up. Arthur > > Then I will wait for Paolo and Gleb's furthur feedback and commit the > final patch. > > Arthur > > On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote: >> This is the first version of VMX nested environment. It contains the >> basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/ >> VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the >> basic execution routine in VMX nested environment andlet the VM print >> "Hello World" to inform its successfully run. >> >> The first release also includes a test suite for vmenter (vmlaunch and >> vmresume). Besides, hypercall mechanism is included and currently it is >> used to invoke VM normal exit. >> >> New files added: >> x86/vmx.h : contains all VMX related macro declerations >> x86/vmx.c : main file for VMX nested test case >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> --- >> ChangeLog: >> 1. Refine codes in function vmx_run() >> 2. Fix bug of setting GUEST_RFLAGS >> 3. Move defines of selectors to lib/x86/vm.h >> 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c >> 5. Move some inline functions to lib/x86/processor.h >> 6. Move some inline functions (vmcs related) to x86/vmx.h >> --- >> config-x86-common.mak | 2 + >> config-x86_64.mak | 1 + >> lib/x86/msr.h | 5 + >> lib/x86/processor.h | 15 ++ >> lib/x86/vm.c | 4 - >> lib/x86/vm.h | 21 ++ >> x86/cstart64.S | 4 + >> x86/unittests.cfg | 6 + >> x86/vmx.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++ >> x86/vmx.h | 466 ++++++++++++++++++++++++++++++++++ >> 10 files changed, 1194 insertions(+), 4 deletions(-) >> create mode 100644 x86/vmx.c >> create mode 100644 x86/vmx.h >> >> diff --git a/config-x86-common.mak b/config-x86-common.mak >> index 455032b..34a41e1 100644 >> --- a/config-x86-common.mak >> +++ b/config-x86-common.mak >> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o >> >> $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o >> >> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o >> + >> arch_clean: >> $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ >> $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o >> diff --git a/config-x86_64.mak b/config-x86_64.mak >> index 4e525f5..bb8ee89 100644 >> --- a/config-x86_64.mak >> +++ b/config-x86_64.mak >> @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ >> $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ >> $(TEST_DIR)/pcid.flat >> tests += $(TEST_DIR)/svm.flat >> +tests += $(TEST_DIR)/vmx.flat >> >> include config-x86-common.mak >> diff --git a/lib/x86/msr.h b/lib/x86/msr.h >> index 509a421..281255a 100644 >> --- a/lib/x86/msr.h >> +++ b/lib/x86/msr.h >> @@ -396,6 +396,11 @@ >> #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a >> #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b >> #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c >> +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d >> +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e >> +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f >> +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 >> + >> >> /* AMD-V MSRs */ >> >> diff --git a/lib/x86/processor.h b/lib/x86/processor.h >> index e46d8d0..f0c11cc 100644 >> --- a/lib/x86/processor.h >> +++ b/lib/x86/processor.h >> @@ -307,4 +307,19 @@ static inline void safe_halt(void) >> { >> asm volatile("sti; hlt"); >> } >> + >> +#ifdef __x86_64__ >> +static inline u64 read_rflags(void) >> +{ >> + u64 r; >> + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); >> + return r; >> +} >> + >> +static inline void write_rflags(u64 r) >> +{ >> + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); >> +} >> +#endif >> + >> #endif >> diff --git a/lib/x86/vm.c b/lib/x86/vm.c >> index 260ec45..188bf57 100644 >> --- a/lib/x86/vm.c >> +++ b/lib/x86/vm.c >> @@ -9,10 +9,6 @@ >> #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) >> #endif >> >> -#define X86_CR0_PE 0x00000001 >> -#define X86_CR0_WP 0x00010000 >> -#define X86_CR0_PG 0x80000000 >> -#define X86_CR4_PSE 0x00000010 >> static void *free = 0; >> static void *vfree_top = 0; >> >> diff --git a/lib/x86/vm.h b/lib/x86/vm.h >> index 0b5b5c7..eff6f72 100644 >> --- a/lib/x86/vm.h >> +++ b/lib/x86/vm.h >> @@ -16,6 +16,27 @@ >> #define PTE_USER (1ull << 2) >> #define PTE_ADDR (0xffffffffff000ull) >> >> +#define X86_CR0_PE 0x00000001 >> +#define X86_CR0_WP 0x00010000 >> +#define X86_CR0_PG 0x80000000 >> +#define X86_CR4_VMXE 0x00000001 >> +#define X86_CR4_PSE 0x00000010 >> +#define X86_CR4_PAE 0x00000020 >> +#define X86_CR4_PCIDE 0x00020000 >> + >> +#ifdef __x86_64__ >> +#define SEL_NULL_DESC 0x0 >> +#define SEL_KERN_CODE_64 0x8 >> +#define SEL_KERN_DATA_64 0x10 >> +#define SEL_USER_CODE_64 0x18 >> +#define SEL_USER_DATA_64 0x20 >> +#define SEL_CODE_32 0x28 >> +#define SEL_DATA_32 0x30 >> +#define SEL_CODE_16 0x38 >> +#define SEL_DATA_16 0x40 >> +#define SEL_TSS_RUN 0x48 >> +#endif >> + >> void setup_vm(); >> >> void *vmalloc(unsigned long size); >> diff --git a/x86/cstart64.S b/x86/cstart64.S >> index 24df5f8..0fe76da 100644 >> --- a/x86/cstart64.S >> +++ b/x86/cstart64.S >> @@ -4,6 +4,10 @@ >> .globl boot_idt >> boot_idt = 0 >> >> +.globl idt_descr >> +.globl tss_descr >> +.globl gdt64_desc >> + >> ipi_vector = 0x20 >> >> max_cpus = 64 >> diff --git a/x86/unittests.cfg b/x86/unittests.cfg >> index bc9643e..85c36aa 100644 >> --- a/x86/unittests.cfg >> +++ b/x86/unittests.cfg >> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" >> file = pcid.flat >> extra_params = -cpu qemu64,+pcid >> arch = x86_64 >> + >> +[vmx] >> +file = vmx.flat >> +extra_params = -cpu host,+vmx >> +arch = x86_64 >> + >> diff --git a/x86/vmx.c b/x86/vmx.c >> new file mode 100644 >> index 0000000..7467927 >> --- /dev/null >> +++ b/x86/vmx.c >> @@ -0,0 +1,674 @@ >> +#include "libcflat.h" >> +#include "processor.h" >> +#include "vm.h" >> +#include "desc.h" >> +#include "vmx.h" >> +#include "msr.h" >> +#include "smp.h" >> +#include "io.h" >> + >> +int fails = 0, tests = 0; >> +u32 *vmxon_region; >> +struct vmcs *vmcs_root; >> +u32 vpid_cnt; >> +void *guest_stack, *guest_syscall_stack; >> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; >> +ulong fix_cr0_set, fix_cr0_clr; >> +ulong fix_cr4_set, fix_cr4_clr; >> +struct regs regs; >> +struct vmx_test *current; >> +u64 hypercall_field = 0; >> +bool launched; >> + >> +extern u64 gdt64_desc[]; >> +extern u64 idt_descr[]; >> +extern u64 tss_descr[]; >> +extern void *vmx_return; >> +extern void *entry_sysenter; >> +extern void *guest_entry; >> + >> +static void report(const char *name, int result) >> +{ >> + ++tests; >> + if (result) >> + printf("PASS: %s\n", name); >> + else { >> + printf("FAIL: %s\n", name); >> + ++fails; >> + } >> +} >> + >> +static int make_vmcs_current(struct vmcs *vmcs) >> +{ >> + bool ret; >> + >> + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); >> + return ret; >> +} >> + >> +/* entry_sysenter */ >> +asm( >> + ".align 4, 0x90\n\t" >> + ".globl entry_sysenter\n\t" >> + "entry_sysenter:\n\t" >> + SAVE_GPR >> + " and $0xf, %rax\n\t" >> + " mov %rax, %rdi\n\t" >> + " call syscall_handler\n\t" >> + LOAD_GPR >> + " vmresume\n\t" >> +); >> + >> +static void __attribute__((__used__)) syscall_handler(u64 syscall_no) >> +{ >> + current->syscall_handler(syscall_no); >> +} >> + >> +static inline int vmx_on() >> +{ >> + bool ret; >> + asm volatile ("vmxon %1; setbe %0\n\t" >> + : "=q"(ret) : "m"(vmxon_region) : "cc"); >> + return ret; >> +} >> + >> +static inline int vmx_off() >> +{ >> + bool ret; >> + asm volatile("vmxoff; setbe %0\n\t" >> + : "=q"(ret) : : "cc"); >> + return ret; >> +} >> + >> +static void print_vmexit_info() >> +{ >> + u64 guest_rip, guest_rsp; >> + ulong reason = vmcs_read(EXI_REASON) & 0xff; >> + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); >> + guest_rip = vmcs_read(GUEST_RIP); >> + guest_rsp = vmcs_read(GUEST_RSP); >> + printf("VMEXIT info:\n"); >> + printf("\tvmexit reason = %d\n", reason); >> + printf("\texit qualification = 0x%x\n", exit_qual); >> + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); >> + printf("\tguest_rip = 0x%llx\n", guest_rip); >> + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", >> + regs.rax, regs.rbx, regs.rcx, regs.rdx); >> + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", >> + guest_rsp, regs.rbp, regs.rsi, regs.rdi); >> + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", >> + regs.r8, regs.r9, regs.r10, regs.r11); >> + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", >> + regs.r12, regs.r13, regs.r14, regs.r15); >> +} >> + >> +static void test_vmclear(void) >> +{ >> + u64 rflags; >> + >> + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + write_rflags(rflags); >> + report("test vmclear", vmcs_clear(vmcs_root) == 0); >> +} >> + >> +static void test_vmxoff(void) >> +{ >> + int ret; >> + u64 rflags; >> + >> + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + write_rflags(rflags); >> + ret = vmx_off(); >> + report("test vmxoff", !ret); >> +} >> + >> +static void __attribute__((__used__)) guest_main(void) >> +{ >> + current->guest_main(); >> +} >> + >> +/* guest_entry */ >> +asm( >> + ".align 4, 0x90\n\t" >> + ".globl entry_guest\n\t" >> + "guest_entry:\n\t" >> + " call guest_main\n\t" >> + " mov $1, %edi\n\t" >> + " call hypercall\n\t" >> +); >> + >> +static void init_vmcs_ctrl(void) >> +{ >> + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ >> + /* 26.2.1.1 */ >> + vmcs_write(PIN_CONTROLS, ctrl_pin); >> + /* Disable VMEXIT of IO instruction */ >> + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); >> + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { >> + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; >> + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); >> + } >> + vmcs_write(CR3_TARGET_COUNT, 0); >> + vmcs_write(VPID, ++vpid_cnt); >> +} >> + >> +static void init_vmcs_host(void) >> +{ >> + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ >> + /* 26.2.1.2 */ >> + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); >> + >> + /* 26.2.1.3 */ >> + vmcs_write(ENT_CONTROLS, ctrl_enter); >> + vmcs_write(EXI_CONTROLS, ctrl_exit); >> + >> + /* 26.2.2 */ >> + vmcs_write(HOST_CR0, read_cr0()); >> + vmcs_write(HOST_CR3, read_cr3()); >> + vmcs_write(HOST_CR4, read_cr4()); >> + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); >> + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); >> + >> + /* 26.2.3 */ >> + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); >> + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); >> + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); >> + vmcs_write(HOST_BASE_TR, (u64)tss_descr); >> + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); >> + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); >> + vmcs_write(HOST_BASE_FS, 0); >> + vmcs_write(HOST_BASE_GS, 0); >> + >> + /* Set other vmcs area */ >> + vmcs_write(PF_ERROR_MASK, 0); >> + vmcs_write(PF_ERROR_MATCH, 0); >> + vmcs_write(VMCS_LINK_PTR, ~0ul); >> + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); >> + vmcs_write(HOST_RIP, (u64)(&vmx_return)); >> +} >> + >> +static void init_vmcs_guest(void) >> +{ >> + /* 26.3 CHECKING AND LOADING GUEST STATE */ >> + ulong guest_cr0, guest_cr4, guest_cr3; >> + /* 26.3.1.1 */ >> + guest_cr0 = read_cr0(); >> + guest_cr4 = read_cr4(); >> + guest_cr3 = read_cr3(); >> + if (ctrl_enter & ENT_GUEST_64) { >> + guest_cr0 |= X86_CR0_PG; >> + guest_cr4 |= X86_CR4_PAE; >> + } >> + if ((ctrl_enter & ENT_GUEST_64) == 0) >> + guest_cr4 &= (~X86_CR4_PCIDE); >> + if (guest_cr0 & X86_CR0_PG) >> + guest_cr0 |= X86_CR0_PE; >> + vmcs_write(GUEST_CR0, guest_cr0); >> + vmcs_write(GUEST_CR3, guest_cr3); >> + vmcs_write(GUEST_CR4, guest_cr4); >> + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); >> + vmcs_write(GUEST_SYSENTER_ESP, >> + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); >> + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); >> + vmcs_write(GUEST_DR7, 0); >> + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); >> + >> + /* 26.3.1.2 */ >> + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); >> + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); >> + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); >> + vmcs_write(GUEST_SEL_LDTR, 0); >> + >> + vmcs_write(GUEST_BASE_CS, 0); >> + vmcs_write(GUEST_BASE_ES, 0); >> + vmcs_write(GUEST_BASE_SS, 0); >> + vmcs_write(GUEST_BASE_DS, 0); >> + vmcs_write(GUEST_BASE_FS, 0); >> + vmcs_write(GUEST_BASE_GS, 0); >> + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); >> + vmcs_write(GUEST_BASE_LDTR, 0); >> + >> + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); >> + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); >> + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); >> + >> + vmcs_write(GUEST_AR_CS, 0xa09b); >> + vmcs_write(GUEST_AR_DS, 0xc093); >> + vmcs_write(GUEST_AR_ES, 0xc093); >> + vmcs_write(GUEST_AR_FS, 0xc093); >> + vmcs_write(GUEST_AR_GS, 0xc093); >> + vmcs_write(GUEST_AR_SS, 0xc093); >> + vmcs_write(GUEST_AR_LDTR, 0x82); >> + vmcs_write(GUEST_AR_TR, 0x8b); >> + >> + /* 26.3.1.3 */ >> + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); >> + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); >> + vmcs_write(GUEST_LIMIT_GDTR, >> + ((struct descr *)gdt64_desc)->limit & 0xffff); >> + vmcs_write(GUEST_LIMIT_IDTR, >> + ((struct descr *)idt_descr)->limit & 0xffff); >> + >> + /* 26.3.1.4 */ >> + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); >> + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); >> + vmcs_write(GUEST_RFLAGS, 0x2); >> + >> + /* 26.3.1.5 */ >> + vmcs_write(GUEST_ACTV_STATE, 0); >> + vmcs_write(GUEST_INTR_STATE, 0); >> +} >> + >> +static int init_vmcs(struct vmcs **vmcs) >> +{ >> + *vmcs = alloc_page(); >> + memset(*vmcs, 0, PAGE_SIZE); >> + (*vmcs)->revision_id = basic.revision; >> + /* vmclear first to init vmcs */ >> + if (vmcs_clear(*vmcs)) { >> + printf("%s : vmcs_clear error\n", __func__); >> + return 1; >> + } >> + >> + if (make_vmcs_current(*vmcs)) { >> + printf("%s : make_vmcs_current error\n", __func__); >> + return 1; >> + } >> + >> + /* All settings to pin/exit/enter/cpu >> + control fields should be placed here */ >> + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; >> + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; >> + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); >> + ctrl_cpu[0] |= CPU_HLT; >> + /* DIsable IO instruction VMEXIT now */ >> + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); >> + ctrl_cpu[1] = 0; >> + >> + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; >> + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; >> + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; >> + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; >> + >> + init_vmcs_ctrl(); >> + init_vmcs_host(); >> + init_vmcs_guest(); >> + return 0; >> +} >> + >> +static void init_vmx(void) >> +{ >> + vmxon_region = alloc_page(); >> + memset(vmxon_region, 0, PAGE_SIZE); >> + >> + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); >> + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); >> + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); >> + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); >> + basic.val = rdmsr(MSR_IA32_VMX_BASIC); >> + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN >> + : MSR_IA32_VMX_PINBASED_CTLS); >> + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT >> + : MSR_IA32_VMX_EXIT_CTLS); >> + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY >> + : MSR_IA32_VMX_ENTRY_CTLS); >> + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC >> + : MSR_IA32_VMX_PROCBASED_CTLS); >> + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) >> + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); >> + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) >> + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); >> + >> + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); >> + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); >> + >> + *vmxon_region = basic.revision; >> + >> + guest_stack = alloc_page(); >> + memset(guest_stack, 0, PAGE_SIZE); >> + guest_syscall_stack = alloc_page(); >> + memset(guest_syscall_stack, 0, PAGE_SIZE); >> +} >> + >> +static int test_vmx_capability(void) >> +{ >> + struct cpuid r; >> + u64 ret1, ret2; >> + u64 ia32_feature_control; >> + r = cpuid(1); >> + ret1 = ((r.c) >> 5) & 1; >> + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); >> + ret2 = ((ia32_feature_control & 0x5) == 0x5); >> + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { >> + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); >> + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); >> + ret2 = ((ia32_feature_control & 0x5) == 0x5); >> + } >> + report("test vmx capability", ret1 & ret2); >> + return !(ret1 & ret2); >> +} >> + >> +static int test_vmxon(void) >> +{ >> + int ret; >> + u64 rflags; >> + >> + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + write_rflags(rflags); >> + ret = vmx_on(); >> + report("test vmxon", !ret); >> + return ret; >> +} >> + >> +static void test_vmptrld(void) >> +{ >> + u64 rflags; >> + struct vmcs *vmcs; >> + >> + vmcs = alloc_page(); >> + vmcs->revision_id = basic.revision; >> + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + write_rflags(rflags); >> + report("test vmptrld", make_vmcs_current(vmcs) == 0); >> +} >> + >> +static void test_vmptrst(void) >> +{ >> + u64 rflags; >> + int ret; >> + struct vmcs *vmcs1, *vmcs2; >> + >> + vmcs1 = alloc_page(); >> + memset(vmcs1, 0, PAGE_SIZE); >> + init_vmcs(&vmcs1); >> + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; >> + write_rflags(rflags); >> + ret = vmcs_save(&vmcs2); >> + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); >> +} >> + >> +/* This function can only be called in guest */ >> +static void __attribute__((__used__)) hypercall(u32 hypercall_no) >> +{ >> + u64 val = 0; >> + val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; >> + hypercall_field = val; >> + asm volatile("vmcall\n\t"); >> +} >> + >> +static bool is_hypercall() >> +{ >> + ulong reason, hyper_bit; >> + >> + reason = vmcs_read(EXI_REASON) & 0xff; >> + hyper_bit = hypercall_field & HYPERCALL_BIT; >> + if (reason == VMX_VMCALL && hyper_bit) >> + return true; >> + return false; >> +} >> + >> +static int handle_hypercall() >> +{ >> + ulong hypercall_no; >> + >> + hypercall_no = hypercall_field & HYPERCALL_MASK; >> + hypercall_field = 0; >> + switch (hypercall_no) { >> + case HYPERCALL_VMEXIT: >> + return VMX_TEST_VMEXIT; >> + default: >> + printf("ERROR : Invalid hypercall number : %d\n", hypercall_no); >> + } >> + return VMX_TEST_EXIT; >> +} >> + >> +static int exit_handler() >> +{ >> + int ret; >> + >> + current->exits++; >> + current->guest_regs = regs; >> + if (is_hypercall()) >> + ret = handle_hypercall(); >> + else >> + ret = current->exit_handler(); >> + regs = current->guest_regs; >> + switch (ret) { >> + case VMX_TEST_VMEXIT: >> + case VMX_TEST_RESUME: >> + return ret; >> + case VMX_TEST_EXIT: >> + break; >> + default: >> + printf("ERROR : Invalid exit_handler return val %d.\n" >> + , ret); >> + } >> + print_vmexit_info(); >> + exit(-1); >> + return 0; >> +} >> + >> +static int vmx_run() >> +{ >> + u32 ret = 0, fail = 0; >> + >> + while (1) { >> + asm volatile ( >> + "mov %%rsp, %%rsi\n\t" >> + "mov %2, %%rdi\n\t" >> + "vmwrite %%rsi, %%rdi\n\t" >> + >> + LOAD_GPR_C >> + "cmpl $0, %1\n\t" >> + "jne 1f\n\t" >> + LOAD_RFLAGS >> + "vmlaunch\n\t" >> + "jmp 2f\n\t" >> + "1: " >> + "vmresume\n\t" >> + "2: " >> + "setbe %0\n\t" >> + "vmx_return:\n\t" >> + SAVE_GPR_C >> + SAVE_RFLAGS >> + : "=m"(fail) >> + : "m"(launched), "i"(HOST_RSP) >> + : "rdi", "rsi", "memory", "cc" >> + >> + ); >> + if (fail) >> + ret = launched ? VMX_TEST_RESUME_ERR : >> + VMX_TEST_LAUNCH_ERR; >> + else { >> + launched = 1; >> + ret = exit_handler(); >> + } >> + if (ret != VMX_TEST_RESUME) >> + break; >> + } >> + launched = 0; >> + switch (ret) { >> + case VMX_TEST_VMEXIT: >> + return 0; >> + case VMX_TEST_LAUNCH_ERR: >> + printf("%s : vmlaunch failed.\n", __func__); >> + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) >> + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) >> + printf("\tvmlaunch set wrong flags\n"); >> + report("test vmlaunch", 0); >> + break; >> + case VMX_TEST_RESUME_ERR: >> + printf("%s : vmresume failed.\n", __func__); >> + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) >> + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) >> + printf("\tvmresume set wrong flags\n"); >> + report("test vmresume", 0); >> + break; >> + default: >> + printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret); >> + break; >> + } >> + return 1; >> +} >> + >> +static int test_run(struct vmx_test *test) >> +{ >> + if (test->name == NULL) >> + test->name = "(no name)"; >> + if (vmx_on()) { >> + printf("%s : vmxon failed.\n", __func__); >> + return 1; >> + } >> + init_vmcs(&(test->vmcs)); >> + /* Directly call test->init is ok here, init_vmcs has done >> + vmcs init, vmclear and vmptrld*/ >> + if (test->init) >> + test->init(test->vmcs); >> + test->exits = 0; >> + current = test; >> + regs = test->guest_regs; >> + vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); >> + launched = 0; >> + printf("\nTest suite : %s\n", test->name); >> + vmx_run(); >> + if (vmx_off()) { >> + printf("%s : vmxoff failed.\n", __func__); >> + return 1; >> + } >> + return 0; >> +} >> + >> +static void basic_init() >> +{ >> +} >> + >> +static void basic_guest_main() >> +{ >> + /* Here is null guest_main, print Hello World */ >> + printf("\tHello World, this is null_guest_main!\n"); >> +} >> + >> +static int basic_exit_handler() >> +{ >> + u64 guest_rip; >> + ulong reason; >> + >> + guest_rip = vmcs_read(GUEST_RIP); >> + reason = vmcs_read(EXI_REASON) & 0xff; >> + >> + switch (reason) { >> + case VMX_VMCALL: >> + print_vmexit_info(); >> + vmcs_write(GUEST_RIP, guest_rip + 3); >> + return VMX_TEST_RESUME; >> + default: >> + break; >> + } >> + printf("ERROR : Unhandled vmx exit.\n"); >> + print_vmexit_info(); >> + return VMX_TEST_EXIT; >> +} >> + >> +static void basic_syscall_handler(u64 syscall_no) >> +{ >> +} >> + >> +static void vmenter_main() >> +{ >> + u64 rax; >> + u64 rsp, resume_rsp; >> + >> + report("test vmlaunch", 1); >> + >> + asm volatile( >> + "mov %%rsp, %0\n\t" >> + "mov %3, %%rax\n\t" >> + "vmcall\n\t" >> + "mov %%rax, %1\n\t" >> + "mov %%rsp, %2\n\t" >> + : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) >> + : "g"(0xABCD)); >> + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); >> +} >> + >> +static int vmenter_exit_handler() >> +{ >> + u64 guest_rip; >> + ulong reason; >> + >> + guest_rip = vmcs_read(GUEST_RIP); >> + reason = vmcs_read(EXI_REASON) & 0xff; >> + switch (reason) { >> + case VMX_VMCALL: >> + if (current->guest_regs.rax != 0xABCD) { >> + report("test vmresume", 0); >> + return VMX_TEST_VMEXIT; >> + } >> + current->guest_regs.rax = 0xFFFF; >> + vmcs_write(GUEST_RIP, guest_rip + 3); >> + return VMX_TEST_RESUME; >> + default: >> + report("test vmresume", 0); >> + print_vmexit_info(); >> + } >> + return VMX_TEST_VMEXIT; >> +} >> + >> + >> +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs >> + basic_* just implement some basic functions */ >> +static struct vmx_test vmx_tests[] = { >> + { "null", basic_init, basic_guest_main, basic_exit_handler, >> + basic_syscall_handler, {0} }, >> + { "vmenter", basic_init, vmenter_main, vmenter_exit_handler, >> + basic_syscall_handler, {0} }, >> +}; >> + >> +int main(void) >> +{ >> + int i; >> + >> + setup_vm(); >> + setup_idt(); >> + >> + if (test_vmx_capability() != 0) { >> + printf("ERROR : vmx not supported, check +vmx option\n"); >> + goto exit; >> + } >> + init_vmx(); >> + /* Set basic test ctxt the same as "null" */ >> + current = &vmx_tests[0]; >> + if (test_vmxon() != 0) >> + goto exit; >> + test_vmptrld(); >> + test_vmclear(); >> + test_vmptrst(); >> + init_vmcs(&vmcs_root); >> + if (vmx_run()) { >> + report("test vmlaunch", 0); >> + goto exit; >> + } >> + test_vmxoff(); >> + >> + for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) { >> + if (test_run(&vmx_tests[i])) >> + goto exit; >> + } >> + >> +exit: >> + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); >> + return fails ? 1 : 0; >> +} >> diff --git a/x86/vmx.h b/x86/vmx.h >> new file mode 100644 >> index 0000000..1fb9738 >> --- /dev/null >> +++ b/x86/vmx.h >> @@ -0,0 +1,466 @@ >> +#ifndef __HYPERVISOR_H >> +#define __HYPERVISOR_H >> + >> +#include "libcflat.h" >> + >> +struct vmcs { >> + u32 revision_id; /* vmcs revision identifier */ >> + u32 abort; /* VMX-abort indicator */ >> + /* VMCS data */ >> + char data[0]; >> +}; >> + >> +struct regs { >> + u64 rax; >> + u64 rcx; >> + u64 rdx; >> + u64 rbx; >> + u64 cr2; >> + u64 rbp; >> + u64 rsi; >> + u64 rdi; >> + u64 r8; >> + u64 r9; >> + u64 r10; >> + u64 r11; >> + u64 r12; >> + u64 r13; >> + u64 r14; >> + u64 r15; >> + u64 rflags; >> +}; >> + >> +struct vmx_test { >> + const char *name; >> + void (*init)(struct vmcs *vmcs); >> + void (*guest_main)(); >> + int (*exit_handler)(); >> + void (*syscall_handler)(u64 syscall_no); >> + struct regs guest_regs; >> + struct vmcs *vmcs; >> + int exits; >> +}; >> + >> +static union vmx_basic { >> + u64 val; >> + struct { >> + u32 revision; >> + u32 size:13, >> + : 3, >> + width:1, >> + dual:1, >> + type:4, >> + insouts:1, >> + ctrl:1; >> + }; >> +} basic; >> + >> +static union vmx_ctrl_pin { >> + u64 val; >> + struct { >> + u32 set, clr; >> + }; >> +} ctrl_pin_rev; >> + >> +static union vmx_ctrl_cpu { >> + u64 val; >> + struct { >> + u32 set, clr; >> + }; >> +} ctrl_cpu_rev[2]; >> + >> +static union vmx_ctrl_exit { >> + u64 val; >> + struct { >> + u32 set, clr; >> + }; >> +} ctrl_exit_rev; >> + >> +static union vmx_ctrl_ent { >> + u64 val; >> + struct { >> + u32 set, clr; >> + }; >> +} ctrl_enter_rev; >> + >> +static union vmx_ept_vpid { >> + u64 val; >> + struct { >> + u32:16, >> + super:2, >> + : 2, >> + invept:1, >> + : 11; >> + u32 invvpid:1; >> + }; >> +} ept_vpid; >> + >> +struct descr { >> + u16 limit; >> + u64 addr; >> +}; >> + >> +enum Encoding { >> + /* 16-Bit Control Fields */ >> + VPID = 0x0000ul, >> + /* Posted-interrupt notification vector */ >> + PINV = 0x0002ul, >> + /* EPTP index */ >> + EPTP_IDX = 0x0004ul, >> + >> + /* 16-Bit Guest State Fields */ >> + GUEST_SEL_ES = 0x0800ul, >> + GUEST_SEL_CS = 0x0802ul, >> + GUEST_SEL_SS = 0x0804ul, >> + GUEST_SEL_DS = 0x0806ul, >> + GUEST_SEL_FS = 0x0808ul, >> + GUEST_SEL_GS = 0x080aul, >> + GUEST_SEL_LDTR = 0x080cul, >> + GUEST_SEL_TR = 0x080eul, >> + GUEST_INT_STATUS = 0x0810ul, >> + >> + /* 16-Bit Host State Fields */ >> + HOST_SEL_ES = 0x0c00ul, >> + HOST_SEL_CS = 0x0c02ul, >> + HOST_SEL_SS = 0x0c04ul, >> + HOST_SEL_DS = 0x0c06ul, >> + HOST_SEL_FS = 0x0c08ul, >> + HOST_SEL_GS = 0x0c0aul, >> + HOST_SEL_TR = 0x0c0cul, >> + >> + /* 64-Bit Control Fields */ >> + IO_BITMAP_A = 0x2000ul, >> + IO_BITMAP_B = 0x2002ul, >> + MSR_BITMAP = 0x2004ul, >> + EXIT_MSR_ST_ADDR = 0x2006ul, >> + EXIT_MSR_LD_ADDR = 0x2008ul, >> + ENTER_MSR_LD_ADDR = 0x200aul, >> + VMCS_EXEC_PTR = 0x200cul, >> + TSC_OFFSET = 0x2010ul, >> + TSC_OFFSET_HI = 0x2011ul, >> + APIC_VIRT_ADDR = 0x2012ul, >> + APIC_ACCS_ADDR = 0x2014ul, >> + EPTP = 0x201aul, >> + EPTP_HI = 0x201bul, >> + >> + /* 64-Bit Readonly Data Field */ >> + INFO_PHYS_ADDR = 0x2400ul, >> + >> + /* 64-Bit Guest State */ >> + VMCS_LINK_PTR = 0x2800ul, >> + VMCS_LINK_PTR_HI = 0x2801ul, >> + GUEST_DEBUGCTL = 0x2802ul, >> + GUEST_DEBUGCTL_HI = 0x2803ul, >> + GUEST_EFER = 0x2806ul, >> + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, >> + GUEST_PDPTE = 0x280aul, >> + >> + /* 64-Bit Host State */ >> + HOST_EFER = 0x2c02ul, >> + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, >> + >> + /* 32-Bit Control Fields */ >> + PIN_CONTROLS = 0x4000ul, >> + CPU_EXEC_CTRL0 = 0x4002ul, >> + EXC_BITMAP = 0x4004ul, >> + PF_ERROR_MASK = 0x4006ul, >> + PF_ERROR_MATCH = 0x4008ul, >> + CR3_TARGET_COUNT = 0x400aul, >> + EXI_CONTROLS = 0x400cul, >> + EXI_MSR_ST_CNT = 0x400eul, >> + EXI_MSR_LD_CNT = 0x4010ul, >> + ENT_CONTROLS = 0x4012ul, >> + ENT_MSR_LD_CNT = 0x4014ul, >> + ENT_INTR_INFO = 0x4016ul, >> + ENT_INTR_ERROR = 0x4018ul, >> + ENT_INST_LEN = 0x401aul, >> + TPR_THRESHOLD = 0x401cul, >> + CPU_EXEC_CTRL1 = 0x401eul, >> + >> + /* 32-Bit R/O Data Fields */ >> + VMX_INST_ERROR = 0x4400ul, >> + EXI_REASON = 0x4402ul, >> + EXI_INTR_INFO = 0x4404ul, >> + EXI_INTR_ERROR = 0x4406ul, >> + IDT_VECT_INFO = 0x4408ul, >> + IDT_VECT_ERROR = 0x440aul, >> + EXI_INST_LEN = 0x440cul, >> + EXI_INST_INFO = 0x440eul, >> + >> + /* 32-Bit Guest State Fields */ >> + GUEST_LIMIT_ES = 0x4800ul, >> + GUEST_LIMIT_CS = 0x4802ul, >> + GUEST_LIMIT_SS = 0x4804ul, >> + GUEST_LIMIT_DS = 0x4806ul, >> + GUEST_LIMIT_FS = 0x4808ul, >> + GUEST_LIMIT_GS = 0x480aul, >> + GUEST_LIMIT_LDTR = 0x480cul, >> + GUEST_LIMIT_TR = 0x480eul, >> + GUEST_LIMIT_GDTR = 0x4810ul, >> + GUEST_LIMIT_IDTR = 0x4812ul, >> + GUEST_AR_ES = 0x4814ul, >> + GUEST_AR_CS = 0x4816ul, >> + GUEST_AR_SS = 0x4818ul, >> + GUEST_AR_DS = 0x481aul, >> + GUEST_AR_FS = 0x481cul, >> + GUEST_AR_GS = 0x481eul, >> + GUEST_AR_LDTR = 0x4820ul, >> + GUEST_AR_TR = 0x4822ul, >> + GUEST_INTR_STATE = 0x4824ul, >> + GUEST_ACTV_STATE = 0x4826ul, >> + GUEST_SMBASE = 0x4828ul, >> + GUEST_SYSENTER_CS = 0x482aul, >> + >> + /* 32-Bit Host State Fields */ >> + HOST_SYSENTER_CS = 0x4c00ul, >> + >> + /* Natural-Width Control Fields */ >> + CR0_MASK = 0x6000ul, >> + CR4_MASK = 0x6002ul, >> + CR0_READ_SHADOW = 0x6004ul, >> + CR4_READ_SHADOW = 0x6006ul, >> + CR3_TARGET_0 = 0x6008ul, >> + CR3_TARGET_1 = 0x600aul, >> + CR3_TARGET_2 = 0x600cul, >> + CR3_TARGET_3 = 0x600eul, >> + >> + /* Natural-Width R/O Data Fields */ >> + EXI_QUALIFICATION = 0x6400ul, >> + IO_RCX = 0x6402ul, >> + IO_RSI = 0x6404ul, >> + IO_RDI = 0x6406ul, >> + IO_RIP = 0x6408ul, >> + GUEST_LINEAR_ADDRESS = 0x640aul, >> + >> + /* Natural-Width Guest State Fields */ >> + GUEST_CR0 = 0x6800ul, >> + GUEST_CR3 = 0x6802ul, >> + GUEST_CR4 = 0x6804ul, >> + GUEST_BASE_ES = 0x6806ul, >> + GUEST_BASE_CS = 0x6808ul, >> + GUEST_BASE_SS = 0x680aul, >> + GUEST_BASE_DS = 0x680cul, >> + GUEST_BASE_FS = 0x680eul, >> + GUEST_BASE_GS = 0x6810ul, >> + GUEST_BASE_LDTR = 0x6812ul, >> + GUEST_BASE_TR = 0x6814ul, >> + GUEST_BASE_GDTR = 0x6816ul, >> + GUEST_BASE_IDTR = 0x6818ul, >> + GUEST_DR7 = 0x681aul, >> + GUEST_RSP = 0x681cul, >> + GUEST_RIP = 0x681eul, >> + GUEST_RFLAGS = 0x6820ul, >> + GUEST_PENDING_DEBUG = 0x6822ul, >> + GUEST_SYSENTER_ESP = 0x6824ul, >> + GUEST_SYSENTER_EIP = 0x6826ul, >> + >> + /* Natural-Width Host State Fields */ >> + HOST_CR0 = 0x6c00ul, >> + HOST_CR3 = 0x6c02ul, >> + HOST_CR4 = 0x6c04ul, >> + HOST_BASE_FS = 0x6c06ul, >> + HOST_BASE_GS = 0x6c08ul, >> + HOST_BASE_TR = 0x6c0aul, >> + HOST_BASE_GDTR = 0x6c0cul, >> + HOST_BASE_IDTR = 0x6c0eul, >> + HOST_SYSENTER_ESP = 0x6c10ul, >> + HOST_SYSENTER_EIP = 0x6c12ul, >> + HOST_RSP = 0x6c14ul, >> + HOST_RIP = 0x6c16ul >> +}; >> + >> +enum Reason { >> + VMX_EXC_NMI = 0, >> + VMX_EXTINT = 1, >> + VMX_TRIPLE_FAULT = 2, >> + VMX_INIT = 3, >> + VMX_SIPI = 4, >> + VMX_SMI_IO = 5, >> + VMX_SMI_OTHER = 6, >> + VMX_INTR_WINDOW = 7, >> + VMX_NMI_WINDOW = 8, >> + VMX_TASK_SWITCH = 9, >> + VMX_CPUID = 10, >> + VMX_GETSEC = 11, >> + VMX_HLT = 12, >> + VMX_INVD = 13, >> + VMX_INVLPG = 14, >> + VMX_RDPMC = 15, >> + VMX_RDTSC = 16, >> + VMX_RSM = 17, >> + VMX_VMCALL = 18, >> + VMX_VMCLEAR = 19, >> + VMX_VMLAUNCH = 20, >> + VMX_VMPTRLD = 21, >> + VMX_VMPTRST = 22, >> + VMX_VMREAD = 23, >> + VMX_VMRESUME = 24, >> + VMX_VMWRITE = 25, >> + VMX_VMXOFF = 26, >> + VMX_VMXON = 27, >> + VMX_CR = 28, >> + VMX_DR = 29, >> + VMX_IO = 30, >> + VMX_RDMSR = 31, >> + VMX_WRMSR = 32, >> + VMX_FAIL_STATE = 33, >> + VMX_FAIL_MSR = 34, >> + VMX_MWAIT = 36, >> + VMX_MTF = 37, >> + VMX_MONITOR = 39, >> + VMX_PAUSE = 40, >> + VMX_FAIL_MCHECK = 41, >> + VMX_TPR_THRESHOLD = 43, >> + VMX_APIC_ACCESS = 44, >> + VMX_GDTR_IDTR = 46, >> + VMX_LDTR_TR = 47, >> + VMX_EPT_VIOLATION = 48, >> + VMX_EPT_MISCONFIG = 49, >> + VMX_INVEPT = 50, >> + VMX_PREEMPT = 52, >> + VMX_INVVPID = 53, >> + VMX_WBINVD = 54, >> + VMX_XSETBV = 55 >> +}; >> + >> +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ >> +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ >> + >> +enum Ctrl_exi { >> + EXI_HOST_64 = 1UL << 9, >> + EXI_LOAD_PERF = 1UL << 12, >> + EXI_INTA = 1UL << 15, >> + EXI_LOAD_EFER = 1UL << 21, >> +}; >> + >> +enum Ctrl_ent { >> + ENT_GUEST_64 = 1UL << 9, >> + ENT_LOAD_EFER = 1UL << 15, >> +}; >> + >> +enum Ctrl_pin { >> + PIN_EXTINT = 1ul << 0, >> + PIN_NMI = 1ul << 3, >> + PIN_VIRT_NMI = 1ul << 5, >> +}; >> + >> +enum Ctrl0 { >> + CPU_INTR_WINDOW = 1ul << 2, >> + CPU_HLT = 1ul << 7, >> + CPU_INVLPG = 1ul << 9, >> + CPU_CR3_LOAD = 1ul << 15, >> + CPU_CR3_STORE = 1ul << 16, >> + CPU_TPR_SHADOW = 1ul << 21, >> + CPU_NMI_WINDOW = 1ul << 22, >> + CPU_IO = 1ul << 24, >> + CPU_IO_BITMAP = 1ul << 25, >> + CPU_SECONDARY = 1ul << 31, >> +}; >> + >> +enum Ctrl1 { >> + CPU_EPT = 1ul << 1, >> + CPU_VPID = 1ul << 5, >> + CPU_URG = 1ul << 7, >> +}; >> + >> +#define SAVE_GPR \ >> + "xchg %rax, regs\n\t" \ >> + "xchg %rbx, regs+0x8\n\t" \ >> + "xchg %rcx, regs+0x10\n\t" \ >> + "xchg %rdx, regs+0x18\n\t" \ >> + "xchg %rbp, regs+0x28\n\t" \ >> + "xchg %rsi, regs+0x30\n\t" \ >> + "xchg %rdi, regs+0x38\n\t" \ >> + "xchg %r8, regs+0x40\n\t" \ >> + "xchg %r9, regs+0x48\n\t" \ >> + "xchg %r10, regs+0x50\n\t" \ >> + "xchg %r11, regs+0x58\n\t" \ >> + "xchg %r12, regs+0x60\n\t" \ >> + "xchg %r13, regs+0x68\n\t" \ >> + "xchg %r14, regs+0x70\n\t" \ >> + "xchg %r15, regs+0x78\n\t" >> + >> +#define LOAD_GPR SAVE_GPR >> + >> +#define SAVE_GPR_C \ >> + "xchg %%rax, regs\n\t" \ >> + "xchg %%rbx, regs+0x8\n\t" \ >> + "xchg %%rcx, regs+0x10\n\t" \ >> + "xchg %%rdx, regs+0x18\n\t" \ >> + "xchg %%rbp, regs+0x28\n\t" \ >> + "xchg %%rsi, regs+0x30\n\t" \ >> + "xchg %%rdi, regs+0x38\n\t" \ >> + "xchg %%r8, regs+0x40\n\t" \ >> + "xchg %%r9, regs+0x48\n\t" \ >> + "xchg %%r10, regs+0x50\n\t" \ >> + "xchg %%r11, regs+0x58\n\t" \ >> + "xchg %%r12, regs+0x60\n\t" \ >> + "xchg %%r13, regs+0x68\n\t" \ >> + "xchg %%r14, regs+0x70\n\t" \ >> + "xchg %%r15, regs+0x78\n\t" >> + >> +#define LOAD_GPR_C SAVE_GPR_C >> + >> +#define SAVE_RFLAGS \ >> + "pushf\n\t" \ >> + "pop regs+0x80\n\t" >> + >> +#define LOAD_RFLAGS \ >> + "push regs+0x80\n\t" \ >> + "popf\n\t" >> + >> +#define VMX_IO_SIZE_MASK 0x7 >> +#define _VMX_IO_BYTE 1 >> +#define _VMX_IO_WORD 2 >> +#define _VMX_IO_LONG 3 >> +#define VMX_IO_DIRECTION_MASK (1ul << 3) >> +#define VMX_IO_IN (1ul << 3) >> +#define VMX_IO_OUT 0 >> +#define VMX_IO_STRING (1ul << 4) >> +#define VMX_IO_REP (1ul << 5) >> +#define VMX_IO_OPRAND_DX (1ul << 6) >> +#define VMX_IO_PORT_MASK 0xFFFF0000 >> +#define VMX_IO_PORT_SHIFT 16 >> + >> +#define VMX_TEST_VMEXIT 1 >> +#define VMX_TEST_EXIT 2 >> +#define VMX_TEST_RESUME 3 >> +#define VMX_TEST_LAUNCH_ERR 4 >> +#define VMX_TEST_RESUME_ERR 5 >> + >> +#define HYPERCALL_BIT (1ul << 12) >> +#define HYPERCALL_MASK 0xFFF >> +#define HYPERCALL_VMEXIT 0x1 >> + >> +static inline int vmcs_clear(struct vmcs *vmcs) >> +{ >> + bool ret; >> + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); >> + return ret; >> +} >> + >> +static inline u64 vmcs_read(enum Encoding enc) >> +{ >> + u64 val; >> + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); >> + return val; >> +} >> + >> +static inline int vmcs_write(enum Encoding enc, u64 val) >> +{ >> + bool ret; >> + asm volatile ("vmwrite %1, %2; setbe %0" >> + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); >> + return ret; >> +} >> + >> +static inline int vmcs_save(struct vmcs **vmcs) >> +{ >> + bool ret; >> + >> + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); >> + return ret; >> +} >> + >> +#endif >> + >> -- >> 1.7.9.5 >> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Jul 28, 2013 at 10:24:34PM +0800, Arthur Chunqi Li wrote: > Hi Gleb, > > It suddenly occured to me that this patch also fails to handle > GUEST_RFLAGS when VMRESUME. > As you are saying below if test wants to change rflags in a middle of the run it can use vmcs_write, so this is not a big deal. > I decide to remove rflags in struct regs since rflags can be read and > set via vmcs_read/vmcs_write in test suited defined functions (init > and exit_handler), and other general registers can only be set in the > framework code. > The code that prints vmlaunch/vmresume error in vmx_run() relies on rflags been saved by assembly code, so be careful. > Then I will wait for Paolo and Gleb's furthur feedback and commit the > final patch. > Yes please wait for Paolo comments. I want to hear his opinion on assembly code. Will not be surprised if he will find the reason it cannot work :) > Arthur > > On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote: > > This is the first version of VMX nested environment. It contains the > > basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/ > > VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the > > basic execution routine in VMX nested environment andlet the VM print > > "Hello World" to inform its successfully run. > > > > The first release also includes a test suite for vmenter (vmlaunch and > > vmresume). Besides, hypercall mechanism is included and currently it is > > used to invoke VM normal exit. > > > > New files added: > > x86/vmx.h : contains all VMX related macro declerations > > x86/vmx.c : main file for VMX nested test case > > > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > > --- > > ChangeLog: > > 1. Refine codes in function vmx_run() > > 2. Fix bug of setting GUEST_RFLAGS > > 3. Move defines of selectors to lib/x86/vm.h > > 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c > > 5. Move some inline functions to lib/x86/processor.h > > 6. Move some inline functions (vmcs related) to x86/vmx.h > > --- > > config-x86-common.mak | 2 + > > config-x86_64.mak | 1 + > > lib/x86/msr.h | 5 + > > lib/x86/processor.h | 15 ++ > > lib/x86/vm.c | 4 - > > lib/x86/vm.h | 21 ++ > > x86/cstart64.S | 4 + > > x86/unittests.cfg | 6 + > > x86/vmx.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++ > > x86/vmx.h | 466 ++++++++++++++++++++++++++++++++++ > > 10 files changed, 1194 insertions(+), 4 deletions(-) > > create mode 100644 x86/vmx.c > > create mode 100644 x86/vmx.h > > > > diff --git a/config-x86-common.mak b/config-x86-common.mak > > index 455032b..34a41e1 100644 > > --- a/config-x86-common.mak > > +++ b/config-x86-common.mak > > @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o > > > > $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o > > > > +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o > > + > > arch_clean: > > $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ > > $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o > > diff --git a/config-x86_64.mak b/config-x86_64.mak > > index 4e525f5..bb8ee89 100644 > > --- a/config-x86_64.mak > > +++ b/config-x86_64.mak > > @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ > > $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ > > $(TEST_DIR)/pcid.flat > > tests += $(TEST_DIR)/svm.flat > > +tests += $(TEST_DIR)/vmx.flat > > > > include config-x86-common.mak > > diff --git a/lib/x86/msr.h b/lib/x86/msr.h > > index 509a421..281255a 100644 > > --- a/lib/x86/msr.h > > +++ b/lib/x86/msr.h > > @@ -396,6 +396,11 @@ > > #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a > > #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b > > #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c > > +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d > > +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e > > +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f > > +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 > > + > > > > /* AMD-V MSRs */ > > > > diff --git a/lib/x86/processor.h b/lib/x86/processor.h > > index e46d8d0..f0c11cc 100644 > > --- a/lib/x86/processor.h > > +++ b/lib/x86/processor.h > > @@ -307,4 +307,19 @@ static inline void safe_halt(void) > > { > > asm volatile("sti; hlt"); > > } > > + > > +#ifdef __x86_64__ > > +static inline u64 read_rflags(void) > > +{ > > + u64 r; > > + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); > > + return r; > > +} > > + > > +static inline void write_rflags(u64 r) > > +{ > > + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); > > +} > > +#endif > > + > > #endif > > diff --git a/lib/x86/vm.c b/lib/x86/vm.c > > index 260ec45..188bf57 100644 > > --- a/lib/x86/vm.c > > +++ b/lib/x86/vm.c > > @@ -9,10 +9,6 @@ > > #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) > > #endif > > > > -#define X86_CR0_PE 0x00000001 > > -#define X86_CR0_WP 0x00010000 > > -#define X86_CR0_PG 0x80000000 > > -#define X86_CR4_PSE 0x00000010 > > static void *free = 0; > > static void *vfree_top = 0; > > > > diff --git a/lib/x86/vm.h b/lib/x86/vm.h > > index 0b5b5c7..eff6f72 100644 > > --- a/lib/x86/vm.h > > +++ b/lib/x86/vm.h > > @@ -16,6 +16,27 @@ > > #define PTE_USER (1ull << 2) > > #define PTE_ADDR (0xffffffffff000ull) > > > > +#define X86_CR0_PE 0x00000001 > > +#define X86_CR0_WP 0x00010000 > > +#define X86_CR0_PG 0x80000000 > > +#define X86_CR4_VMXE 0x00000001 > > +#define X86_CR4_PSE 0x00000010 > > +#define X86_CR4_PAE 0x00000020 > > +#define X86_CR4_PCIDE 0x00020000 > > + > > +#ifdef __x86_64__ > > +#define SEL_NULL_DESC 0x0 > > +#define SEL_KERN_CODE_64 0x8 > > +#define SEL_KERN_DATA_64 0x10 > > +#define SEL_USER_CODE_64 0x18 > > +#define SEL_USER_DATA_64 0x20 > > +#define SEL_CODE_32 0x28 > > +#define SEL_DATA_32 0x30 > > +#define SEL_CODE_16 0x38 > > +#define SEL_DATA_16 0x40 > > +#define SEL_TSS_RUN 0x48 > > +#endif > > + > > void setup_vm(); > > > > void *vmalloc(unsigned long size); > > diff --git a/x86/cstart64.S b/x86/cstart64.S > > index 24df5f8..0fe76da 100644 > > --- a/x86/cstart64.S > > +++ b/x86/cstart64.S > > @@ -4,6 +4,10 @@ > > .globl boot_idt > > boot_idt = 0 > > > > +.globl idt_descr > > +.globl tss_descr > > +.globl gdt64_desc > > + > > ipi_vector = 0x20 > > > > max_cpus = 64 > > diff --git a/x86/unittests.cfg b/x86/unittests.cfg > > index bc9643e..85c36aa 100644 > > --- a/x86/unittests.cfg > > +++ b/x86/unittests.cfg > > @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" > > file = pcid.flat > > extra_params = -cpu qemu64,+pcid > > arch = x86_64 > > + > > +[vmx] > > +file = vmx.flat > > +extra_params = -cpu host,+vmx > > +arch = x86_64 > > + > > diff --git a/x86/vmx.c b/x86/vmx.c > > new file mode 100644 > > index 0000000..7467927 > > --- /dev/null > > +++ b/x86/vmx.c > > @@ -0,0 +1,674 @@ > > +#include "libcflat.h" > > +#include "processor.h" > > +#include "vm.h" > > +#include "desc.h" > > +#include "vmx.h" > > +#include "msr.h" > > +#include "smp.h" > > +#include "io.h" > > + > > +int fails = 0, tests = 0; > > +u32 *vmxon_region; > > +struct vmcs *vmcs_root; > > +u32 vpid_cnt; > > +void *guest_stack, *guest_syscall_stack; > > +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; > > +ulong fix_cr0_set, fix_cr0_clr; > > +ulong fix_cr4_set, fix_cr4_clr; > > +struct regs regs; > > +struct vmx_test *current; > > +u64 hypercall_field = 0; > > +bool launched; > > + > > +extern u64 gdt64_desc[]; > > +extern u64 idt_descr[]; > > +extern u64 tss_descr[]; > > +extern void *vmx_return; > > +extern void *entry_sysenter; > > +extern void *guest_entry; > > + > > +static void report(const char *name, int result) > > +{ > > + ++tests; > > + if (result) > > + printf("PASS: %s\n", name); > > + else { > > + printf("FAIL: %s\n", name); > > + ++fails; > > + } > > +} > > + > > +static int make_vmcs_current(struct vmcs *vmcs) > > +{ > > + bool ret; > > + > > + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > > + return ret; > > +} > > + > > +/* entry_sysenter */ > > +asm( > > + ".align 4, 0x90\n\t" > > + ".globl entry_sysenter\n\t" > > + "entry_sysenter:\n\t" > > + SAVE_GPR > > + " and $0xf, %rax\n\t" > > + " mov %rax, %rdi\n\t" > > + " call syscall_handler\n\t" > > + LOAD_GPR > > + " vmresume\n\t" > > +); > > + > > +static void __attribute__((__used__)) syscall_handler(u64 syscall_no) > > +{ > > + current->syscall_handler(syscall_no); > > +} > > + > > +static inline int vmx_on() > > +{ > > + bool ret; > > + asm volatile ("vmxon %1; setbe %0\n\t" > > + : "=q"(ret) : "m"(vmxon_region) : "cc"); > > + return ret; > > +} > > + > > +static inline int vmx_off() > > +{ > > + bool ret; > > + asm volatile("vmxoff; setbe %0\n\t" > > + : "=q"(ret) : : "cc"); > > + return ret; > > +} > > + > > +static void print_vmexit_info() > > +{ > > + u64 guest_rip, guest_rsp; > > + ulong reason = vmcs_read(EXI_REASON) & 0xff; > > + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); > > + guest_rip = vmcs_read(GUEST_RIP); > > + guest_rsp = vmcs_read(GUEST_RSP); > > + printf("VMEXIT info:\n"); > > + printf("\tvmexit reason = %d\n", reason); > > + printf("\texit qualification = 0x%x\n", exit_qual); > > + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); > > + printf("\tguest_rip = 0x%llx\n", guest_rip); > > + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", > > + regs.rax, regs.rbx, regs.rcx, regs.rdx); > > + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", > > + guest_rsp, regs.rbp, regs.rsi, regs.rdi); > > + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", > > + regs.r8, regs.r9, regs.r10, regs.r11); > > + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", > > + regs.r12, regs.r13, regs.r14, regs.r15); > > +} > > + > > +static void test_vmclear(void) > > +{ > > + u64 rflags; > > + > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + report("test vmclear", vmcs_clear(vmcs_root) == 0); > > +} > > + > > +static void test_vmxoff(void) > > +{ > > + int ret; > > + u64 rflags; > > + > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + ret = vmx_off(); > > + report("test vmxoff", !ret); > > +} > > + > > +static void __attribute__((__used__)) guest_main(void) > > +{ > > + current->guest_main(); > > +} > > + > > +/* guest_entry */ > > +asm( > > + ".align 4, 0x90\n\t" > > + ".globl entry_guest\n\t" > > + "guest_entry:\n\t" > > + " call guest_main\n\t" > > + " mov $1, %edi\n\t" > > + " call hypercall\n\t" > > +); > > + > > +static void init_vmcs_ctrl(void) > > +{ > > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > > + /* 26.2.1.1 */ > > + vmcs_write(PIN_CONTROLS, ctrl_pin); > > + /* Disable VMEXIT of IO instruction */ > > + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); > > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { > > + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; > > + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); > > + } > > + vmcs_write(CR3_TARGET_COUNT, 0); > > + vmcs_write(VPID, ++vpid_cnt); > > +} > > + > > +static void init_vmcs_host(void) > > +{ > > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > > + /* 26.2.1.2 */ > > + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); > > + > > + /* 26.2.1.3 */ > > + vmcs_write(ENT_CONTROLS, ctrl_enter); > > + vmcs_write(EXI_CONTROLS, ctrl_exit); > > + > > + /* 26.2.2 */ > > + vmcs_write(HOST_CR0, read_cr0()); > > + vmcs_write(HOST_CR3, read_cr3()); > > + vmcs_write(HOST_CR4, read_cr4()); > > + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); > > + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); > > + > > + /* 26.2.3 */ > > + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); > > + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); > > + vmcs_write(HOST_BASE_TR, (u64)tss_descr); > > + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); > > + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); > > + vmcs_write(HOST_BASE_FS, 0); > > + vmcs_write(HOST_BASE_GS, 0); > > + > > + /* Set other vmcs area */ > > + vmcs_write(PF_ERROR_MASK, 0); > > + vmcs_write(PF_ERROR_MATCH, 0); > > + vmcs_write(VMCS_LINK_PTR, ~0ul); > > + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); > > + vmcs_write(HOST_RIP, (u64)(&vmx_return)); > > +} > > + > > +static void init_vmcs_guest(void) > > +{ > > + /* 26.3 CHECKING AND LOADING GUEST STATE */ > > + ulong guest_cr0, guest_cr4, guest_cr3; > > + /* 26.3.1.1 */ > > + guest_cr0 = read_cr0(); > > + guest_cr4 = read_cr4(); > > + guest_cr3 = read_cr3(); > > + if (ctrl_enter & ENT_GUEST_64) { > > + guest_cr0 |= X86_CR0_PG; > > + guest_cr4 |= X86_CR4_PAE; > > + } > > + if ((ctrl_enter & ENT_GUEST_64) == 0) > > + guest_cr4 &= (~X86_CR4_PCIDE); > > + if (guest_cr0 & X86_CR0_PG) > > + guest_cr0 |= X86_CR0_PE; > > + vmcs_write(GUEST_CR0, guest_cr0); > > + vmcs_write(GUEST_CR3, guest_cr3); > > + vmcs_write(GUEST_CR4, guest_cr4); > > + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); > > + vmcs_write(GUEST_SYSENTER_ESP, > > + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); > > + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); > > + vmcs_write(GUEST_DR7, 0); > > + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); > > + > > + /* 26.3.1.2 */ > > + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); > > + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); > > + vmcs_write(GUEST_SEL_LDTR, 0); > > + > > + vmcs_write(GUEST_BASE_CS, 0); > > + vmcs_write(GUEST_BASE_ES, 0); > > + vmcs_write(GUEST_BASE_SS, 0); > > + vmcs_write(GUEST_BASE_DS, 0); > > + vmcs_write(GUEST_BASE_FS, 0); > > + vmcs_write(GUEST_BASE_GS, 0); > > + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); > > + vmcs_write(GUEST_BASE_LDTR, 0); > > + > > + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); > > + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); > > + > > + vmcs_write(GUEST_AR_CS, 0xa09b); > > + vmcs_write(GUEST_AR_DS, 0xc093); > > + vmcs_write(GUEST_AR_ES, 0xc093); > > + vmcs_write(GUEST_AR_FS, 0xc093); > > + vmcs_write(GUEST_AR_GS, 0xc093); > > + vmcs_write(GUEST_AR_SS, 0xc093); > > + vmcs_write(GUEST_AR_LDTR, 0x82); > > + vmcs_write(GUEST_AR_TR, 0x8b); > > + > > + /* 26.3.1.3 */ > > + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); > > + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); > > + vmcs_write(GUEST_LIMIT_GDTR, > > + ((struct descr *)gdt64_desc)->limit & 0xffff); > > + vmcs_write(GUEST_LIMIT_IDTR, > > + ((struct descr *)idt_descr)->limit & 0xffff); > > + > > + /* 26.3.1.4 */ > > + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); > > + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); > > + vmcs_write(GUEST_RFLAGS, 0x2); > > + > > + /* 26.3.1.5 */ > > + vmcs_write(GUEST_ACTV_STATE, 0); > > + vmcs_write(GUEST_INTR_STATE, 0); > > +} > > + > > +static int init_vmcs(struct vmcs **vmcs) > > +{ > > + *vmcs = alloc_page(); > > + memset(*vmcs, 0, PAGE_SIZE); > > + (*vmcs)->revision_id = basic.revision; > > + /* vmclear first to init vmcs */ > > + if (vmcs_clear(*vmcs)) { > > + printf("%s : vmcs_clear error\n", __func__); > > + return 1; > > + } > > + > > + if (make_vmcs_current(*vmcs)) { > > + printf("%s : make_vmcs_current error\n", __func__); > > + return 1; > > + } > > + > > + /* All settings to pin/exit/enter/cpu > > + control fields should be placed here */ > > + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; > > + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; > > + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); > > + ctrl_cpu[0] |= CPU_HLT; > > + /* DIsable IO instruction VMEXIT now */ > > + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); > > + ctrl_cpu[1] = 0; > > + > > + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; > > + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; > > + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; > > + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; > > + > > + init_vmcs_ctrl(); > > + init_vmcs_host(); > > + init_vmcs_guest(); > > + return 0; > > +} > > + > > +static void init_vmx(void) > > +{ > > + vmxon_region = alloc_page(); > > + memset(vmxon_region, 0, PAGE_SIZE); > > + > > + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); > > + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); > > + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); > > + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); > > + basic.val = rdmsr(MSR_IA32_VMX_BASIC); > > + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN > > + : MSR_IA32_VMX_PINBASED_CTLS); > > + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT > > + : MSR_IA32_VMX_EXIT_CTLS); > > + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY > > + : MSR_IA32_VMX_ENTRY_CTLS); > > + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC > > + : MSR_IA32_VMX_PROCBASED_CTLS); > > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) > > + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); > > + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) > > + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); > > + > > + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); > > + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); > > + > > + *vmxon_region = basic.revision; > > + > > + guest_stack = alloc_page(); > > + memset(guest_stack, 0, PAGE_SIZE); > > + guest_syscall_stack = alloc_page(); > > + memset(guest_syscall_stack, 0, PAGE_SIZE); > > +} > > + > > +static int test_vmx_capability(void) > > +{ > > + struct cpuid r; > > + u64 ret1, ret2; > > + u64 ia32_feature_control; > > + r = cpuid(1); > > + ret1 = ((r.c) >> 5) & 1; > > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > > + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { > > + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); > > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > > + } > > + report("test vmx capability", ret1 & ret2); > > + return !(ret1 & ret2); > > +} > > + > > +static int test_vmxon(void) > > +{ > > + int ret; > > + u64 rflags; > > + > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + ret = vmx_on(); > > + report("test vmxon", !ret); > > + return ret; > > +} > > + > > +static void test_vmptrld(void) > > +{ > > + u64 rflags; > > + struct vmcs *vmcs; > > + > > + vmcs = alloc_page(); > > + vmcs->revision_id = basic.revision; > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + report("test vmptrld", make_vmcs_current(vmcs) == 0); > > +} > > + > > +static void test_vmptrst(void) > > +{ > > + u64 rflags; > > + int ret; > > + struct vmcs *vmcs1, *vmcs2; > > + > > + vmcs1 = alloc_page(); > > + memset(vmcs1, 0, PAGE_SIZE); > > + init_vmcs(&vmcs1); > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + ret = vmcs_save(&vmcs2); > > + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); > > +} > > + > > +/* This function can only be called in guest */ > > +static void __attribute__((__used__)) hypercall(u32 hypercall_no) > > +{ > > + u64 val = 0; > > + val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; > > + hypercall_field = val; > > + asm volatile("vmcall\n\t"); > > +} > > + > > +static bool is_hypercall() > > +{ > > + ulong reason, hyper_bit; > > + > > + reason = vmcs_read(EXI_REASON) & 0xff; > > + hyper_bit = hypercall_field & HYPERCALL_BIT; > > + if (reason == VMX_VMCALL && hyper_bit) > > + return true; > > + return false; > > +} > > + > > +static int handle_hypercall() > > +{ > > + ulong hypercall_no; > > + > > + hypercall_no = hypercall_field & HYPERCALL_MASK; > > + hypercall_field = 0; > > + switch (hypercall_no) { > > + case HYPERCALL_VMEXIT: > > + return VMX_TEST_VMEXIT; > > + default: > > + printf("ERROR : Invalid hypercall number : %d\n", hypercall_no); > > + } > > + return VMX_TEST_EXIT; > > +} > > + > > +static int exit_handler() > > +{ > > + int ret; > > + > > + current->exits++; > > + current->guest_regs = regs; > > + if (is_hypercall()) > > + ret = handle_hypercall(); > > + else > > + ret = current->exit_handler(); > > + regs = current->guest_regs; > > + switch (ret) { > > + case VMX_TEST_VMEXIT: > > + case VMX_TEST_RESUME: > > + return ret; > > + case VMX_TEST_EXIT: > > + break; > > + default: > > + printf("ERROR : Invalid exit_handler return val %d.\n" > > + , ret); > > + } > > + print_vmexit_info(); > > + exit(-1); > > + return 0; > > +} > > + > > +static int vmx_run() > > +{ > > + u32 ret = 0, fail = 0; > > + > > + while (1) { > > + asm volatile ( > > + "mov %%rsp, %%rsi\n\t" > > + "mov %2, %%rdi\n\t" > > + "vmwrite %%rsi, %%rdi\n\t" > > + > > + LOAD_GPR_C > > + "cmpl $0, %1\n\t" > > + "jne 1f\n\t" > > + LOAD_RFLAGS > > + "vmlaunch\n\t" > > + "jmp 2f\n\t" > > + "1: " > > + "vmresume\n\t" > > + "2: " > > + "setbe %0\n\t" > > + "vmx_return:\n\t" > > + SAVE_GPR_C > > + SAVE_RFLAGS > > + : "=m"(fail) > > + : "m"(launched), "i"(HOST_RSP) > > + : "rdi", "rsi", "memory", "cc" > > + > > + ); > > + if (fail) > > + ret = launched ? VMX_TEST_RESUME_ERR : > > + VMX_TEST_LAUNCH_ERR; > > + else { > > + launched = 1; > > + ret = exit_handler(); > > + } > > + if (ret != VMX_TEST_RESUME) > > + break; > > + } > > + launched = 0; > > + switch (ret) { > > + case VMX_TEST_VMEXIT: > > + return 0; > > + case VMX_TEST_LAUNCH_ERR: > > + printf("%s : vmlaunch failed.\n", __func__); > > + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) > > + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) > > + printf("\tvmlaunch set wrong flags\n"); > > + report("test vmlaunch", 0); > > + break; > > + case VMX_TEST_RESUME_ERR: > > + printf("%s : vmresume failed.\n", __func__); > > + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) > > + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) > > + printf("\tvmresume set wrong flags\n"); > > + report("test vmresume", 0); > > + break; > > + default: > > + printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret); > > + break; > > + } > > + return 1; > > +} > > + > > +static int test_run(struct vmx_test *test) > > +{ > > + if (test->name == NULL) > > + test->name = "(no name)"; > > + if (vmx_on()) { > > + printf("%s : vmxon failed.\n", __func__); > > + return 1; > > + } > > + init_vmcs(&(test->vmcs)); > > + /* Directly call test->init is ok here, init_vmcs has done > > + vmcs init, vmclear and vmptrld*/ > > + if (test->init) > > + test->init(test->vmcs); > > + test->exits = 0; > > + current = test; > > + regs = test->guest_regs; > > + vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); > > + launched = 0; > > + printf("\nTest suite : %s\n", test->name); > > + vmx_run(); > > + if (vmx_off()) { > > + printf("%s : vmxoff failed.\n", __func__); > > + return 1; > > + } > > + return 0; > > +} > > + > > +static void basic_init() > > +{ > > +} > > + > > +static void basic_guest_main() > > +{ > > + /* Here is null guest_main, print Hello World */ > > + printf("\tHello World, this is null_guest_main!\n"); > > +} > > + > > +static int basic_exit_handler() > > +{ > > + u64 guest_rip; > > + ulong reason; > > + > > + guest_rip = vmcs_read(GUEST_RIP); > > + reason = vmcs_read(EXI_REASON) & 0xff; > > + > > + switch (reason) { > > + case VMX_VMCALL: > > + print_vmexit_info(); > > + vmcs_write(GUEST_RIP, guest_rip + 3); > > + return VMX_TEST_RESUME; > > + default: > > + break; > > + } > > + printf("ERROR : Unhandled vmx exit.\n"); > > + print_vmexit_info(); > > + return VMX_TEST_EXIT; > > +} > > + > > +static void basic_syscall_handler(u64 syscall_no) > > +{ > > +} > > + > > +static void vmenter_main() > > +{ > > + u64 rax; > > + u64 rsp, resume_rsp; > > + > > + report("test vmlaunch", 1); > > + > > + asm volatile( > > + "mov %%rsp, %0\n\t" > > + "mov %3, %%rax\n\t" > > + "vmcall\n\t" > > + "mov %%rax, %1\n\t" > > + "mov %%rsp, %2\n\t" > > + : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) > > + : "g"(0xABCD)); > > + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); > > +} > > + > > +static int vmenter_exit_handler() > > +{ > > + u64 guest_rip; > > + ulong reason; > > + > > + guest_rip = vmcs_read(GUEST_RIP); > > + reason = vmcs_read(EXI_REASON) & 0xff; > > + switch (reason) { > > + case VMX_VMCALL: > > + if (current->guest_regs.rax != 0xABCD) { > > + report("test vmresume", 0); > > + return VMX_TEST_VMEXIT; > > + } > > + current->guest_regs.rax = 0xFFFF; > > + vmcs_write(GUEST_RIP, guest_rip + 3); > > + return VMX_TEST_RESUME; > > + default: > > + report("test vmresume", 0); > > + print_vmexit_info(); > > + } > > + return VMX_TEST_VMEXIT; > > +} > > + > > + > > +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs > > + basic_* just implement some basic functions */ > > +static struct vmx_test vmx_tests[] = { > > + { "null", basic_init, basic_guest_main, basic_exit_handler, > > + basic_syscall_handler, {0} }, > > + { "vmenter", basic_init, vmenter_main, vmenter_exit_handler, > > + basic_syscall_handler, {0} }, > > +}; > > + > > +int main(void) > > +{ > > + int i; > > + > > + setup_vm(); > > + setup_idt(); > > + > > + if (test_vmx_capability() != 0) { > > + printf("ERROR : vmx not supported, check +vmx option\n"); > > + goto exit; > > + } > > + init_vmx(); > > + /* Set basic test ctxt the same as "null" */ > > + current = &vmx_tests[0]; > > + if (test_vmxon() != 0) > > + goto exit; > > + test_vmptrld(); > > + test_vmclear(); > > + test_vmptrst(); > > + init_vmcs(&vmcs_root); > > + if (vmx_run()) { > > + report("test vmlaunch", 0); > > + goto exit; > > + } > > + test_vmxoff(); > > + > > + for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) { > > + if (test_run(&vmx_tests[i])) > > + goto exit; > > + } > > + > > +exit: > > + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); > > + return fails ? 1 : 0; > > +} > > diff --git a/x86/vmx.h b/x86/vmx.h > > new file mode 100644 > > index 0000000..1fb9738 > > --- /dev/null > > +++ b/x86/vmx.h > > @@ -0,0 +1,466 @@ > > +#ifndef __HYPERVISOR_H > > +#define __HYPERVISOR_H > > + > > +#include "libcflat.h" > > + > > +struct vmcs { > > + u32 revision_id; /* vmcs revision identifier */ > > + u32 abort; /* VMX-abort indicator */ > > + /* VMCS data */ > > + char data[0]; > > +}; > > + > > +struct regs { > > + u64 rax; > > + u64 rcx; > > + u64 rdx; > > + u64 rbx; > > + u64 cr2; > > + u64 rbp; > > + u64 rsi; > > + u64 rdi; > > + u64 r8; > > + u64 r9; > > + u64 r10; > > + u64 r11; > > + u64 r12; > > + u64 r13; > > + u64 r14; > > + u64 r15; > > + u64 rflags; > > +}; > > + > > +struct vmx_test { > > + const char *name; > > + void (*init)(struct vmcs *vmcs); > > + void (*guest_main)(); > > + int (*exit_handler)(); > > + void (*syscall_handler)(u64 syscall_no); > > + struct regs guest_regs; > > + struct vmcs *vmcs; > > + int exits; > > +}; > > + > > +static union vmx_basic { > > + u64 val; > > + struct { > > + u32 revision; > > + u32 size:13, > > + : 3, > > + width:1, > > + dual:1, > > + type:4, > > + insouts:1, > > + ctrl:1; > > + }; > > +} basic; > > + > > +static union vmx_ctrl_pin { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_pin_rev; > > + > > +static union vmx_ctrl_cpu { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_cpu_rev[2]; > > + > > +static union vmx_ctrl_exit { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_exit_rev; > > + > > +static union vmx_ctrl_ent { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_enter_rev; > > + > > +static union vmx_ept_vpid { > > + u64 val; > > + struct { > > + u32:16, > > + super:2, > > + : 2, > > + invept:1, > > + : 11; > > + u32 invvpid:1; > > + }; > > +} ept_vpid; > > + > > +struct descr { > > + u16 limit; > > + u64 addr; > > +}; > > + > > +enum Encoding { > > + /* 16-Bit Control Fields */ > > + VPID = 0x0000ul, > > + /* Posted-interrupt notification vector */ > > + PINV = 0x0002ul, > > + /* EPTP index */ > > + EPTP_IDX = 0x0004ul, > > + > > + /* 16-Bit Guest State Fields */ > > + GUEST_SEL_ES = 0x0800ul, > > + GUEST_SEL_CS = 0x0802ul, > > + GUEST_SEL_SS = 0x0804ul, > > + GUEST_SEL_DS = 0x0806ul, > > + GUEST_SEL_FS = 0x0808ul, > > + GUEST_SEL_GS = 0x080aul, > > + GUEST_SEL_LDTR = 0x080cul, > > + GUEST_SEL_TR = 0x080eul, > > + GUEST_INT_STATUS = 0x0810ul, > > + > > + /* 16-Bit Host State Fields */ > > + HOST_SEL_ES = 0x0c00ul, > > + HOST_SEL_CS = 0x0c02ul, > > + HOST_SEL_SS = 0x0c04ul, > > + HOST_SEL_DS = 0x0c06ul, > > + HOST_SEL_FS = 0x0c08ul, > > + HOST_SEL_GS = 0x0c0aul, > > + HOST_SEL_TR = 0x0c0cul, > > + > > + /* 64-Bit Control Fields */ > > + IO_BITMAP_A = 0x2000ul, > > + IO_BITMAP_B = 0x2002ul, > > + MSR_BITMAP = 0x2004ul, > > + EXIT_MSR_ST_ADDR = 0x2006ul, > > + EXIT_MSR_LD_ADDR = 0x2008ul, > > + ENTER_MSR_LD_ADDR = 0x200aul, > > + VMCS_EXEC_PTR = 0x200cul, > > + TSC_OFFSET = 0x2010ul, > > + TSC_OFFSET_HI = 0x2011ul, > > + APIC_VIRT_ADDR = 0x2012ul, > > + APIC_ACCS_ADDR = 0x2014ul, > > + EPTP = 0x201aul, > > + EPTP_HI = 0x201bul, > > + > > + /* 64-Bit Readonly Data Field */ > > + INFO_PHYS_ADDR = 0x2400ul, > > + > > + /* 64-Bit Guest State */ > > + VMCS_LINK_PTR = 0x2800ul, > > + VMCS_LINK_PTR_HI = 0x2801ul, > > + GUEST_DEBUGCTL = 0x2802ul, > > + GUEST_DEBUGCTL_HI = 0x2803ul, > > + GUEST_EFER = 0x2806ul, > > + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, > > + GUEST_PDPTE = 0x280aul, > > + > > + /* 64-Bit Host State */ > > + HOST_EFER = 0x2c02ul, > > + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, > > + > > + /* 32-Bit Control Fields */ > > + PIN_CONTROLS = 0x4000ul, > > + CPU_EXEC_CTRL0 = 0x4002ul, > > + EXC_BITMAP = 0x4004ul, > > + PF_ERROR_MASK = 0x4006ul, > > + PF_ERROR_MATCH = 0x4008ul, > > + CR3_TARGET_COUNT = 0x400aul, > > + EXI_CONTROLS = 0x400cul, > > + EXI_MSR_ST_CNT = 0x400eul, > > + EXI_MSR_LD_CNT = 0x4010ul, > > + ENT_CONTROLS = 0x4012ul, > > + ENT_MSR_LD_CNT = 0x4014ul, > > + ENT_INTR_INFO = 0x4016ul, > > + ENT_INTR_ERROR = 0x4018ul, > > + ENT_INST_LEN = 0x401aul, > > + TPR_THRESHOLD = 0x401cul, > > + CPU_EXEC_CTRL1 = 0x401eul, > > + > > + /* 32-Bit R/O Data Fields */ > > + VMX_INST_ERROR = 0x4400ul, > > + EXI_REASON = 0x4402ul, > > + EXI_INTR_INFO = 0x4404ul, > > + EXI_INTR_ERROR = 0x4406ul, > > + IDT_VECT_INFO = 0x4408ul, > > + IDT_VECT_ERROR = 0x440aul, > > + EXI_INST_LEN = 0x440cul, > > + EXI_INST_INFO = 0x440eul, > > + > > + /* 32-Bit Guest State Fields */ > > + GUEST_LIMIT_ES = 0x4800ul, > > + GUEST_LIMIT_CS = 0x4802ul, > > + GUEST_LIMIT_SS = 0x4804ul, > > + GUEST_LIMIT_DS = 0x4806ul, > > + GUEST_LIMIT_FS = 0x4808ul, > > + GUEST_LIMIT_GS = 0x480aul, > > + GUEST_LIMIT_LDTR = 0x480cul, > > + GUEST_LIMIT_TR = 0x480eul, > > + GUEST_LIMIT_GDTR = 0x4810ul, > > + GUEST_LIMIT_IDTR = 0x4812ul, > > + GUEST_AR_ES = 0x4814ul, > > + GUEST_AR_CS = 0x4816ul, > > + GUEST_AR_SS = 0x4818ul, > > + GUEST_AR_DS = 0x481aul, > > + GUEST_AR_FS = 0x481cul, > > + GUEST_AR_GS = 0x481eul, > > + GUEST_AR_LDTR = 0x4820ul, > > + GUEST_AR_TR = 0x4822ul, > > + GUEST_INTR_STATE = 0x4824ul, > > + GUEST_ACTV_STATE = 0x4826ul, > > + GUEST_SMBASE = 0x4828ul, > > + GUEST_SYSENTER_CS = 0x482aul, > > + > > + /* 32-Bit Host State Fields */ > > + HOST_SYSENTER_CS = 0x4c00ul, > > + > > + /* Natural-Width Control Fields */ > > + CR0_MASK = 0x6000ul, > > + CR4_MASK = 0x6002ul, > > + CR0_READ_SHADOW = 0x6004ul, > > + CR4_READ_SHADOW = 0x6006ul, > > + CR3_TARGET_0 = 0x6008ul, > > + CR3_TARGET_1 = 0x600aul, > > + CR3_TARGET_2 = 0x600cul, > > + CR3_TARGET_3 = 0x600eul, > > + > > + /* Natural-Width R/O Data Fields */ > > + EXI_QUALIFICATION = 0x6400ul, > > + IO_RCX = 0x6402ul, > > + IO_RSI = 0x6404ul, > > + IO_RDI = 0x6406ul, > > + IO_RIP = 0x6408ul, > > + GUEST_LINEAR_ADDRESS = 0x640aul, > > + > > + /* Natural-Width Guest State Fields */ > > + GUEST_CR0 = 0x6800ul, > > + GUEST_CR3 = 0x6802ul, > > + GUEST_CR4 = 0x6804ul, > > + GUEST_BASE_ES = 0x6806ul, > > + GUEST_BASE_CS = 0x6808ul, > > + GUEST_BASE_SS = 0x680aul, > > + GUEST_BASE_DS = 0x680cul, > > + GUEST_BASE_FS = 0x680eul, > > + GUEST_BASE_GS = 0x6810ul, > > + GUEST_BASE_LDTR = 0x6812ul, > > + GUEST_BASE_TR = 0x6814ul, > > + GUEST_BASE_GDTR = 0x6816ul, > > + GUEST_BASE_IDTR = 0x6818ul, > > + GUEST_DR7 = 0x681aul, > > + GUEST_RSP = 0x681cul, > > + GUEST_RIP = 0x681eul, > > + GUEST_RFLAGS = 0x6820ul, > > + GUEST_PENDING_DEBUG = 0x6822ul, > > + GUEST_SYSENTER_ESP = 0x6824ul, > > + GUEST_SYSENTER_EIP = 0x6826ul, > > + > > + /* Natural-Width Host State Fields */ > > + HOST_CR0 = 0x6c00ul, > > + HOST_CR3 = 0x6c02ul, > > + HOST_CR4 = 0x6c04ul, > > + HOST_BASE_FS = 0x6c06ul, > > + HOST_BASE_GS = 0x6c08ul, > > + HOST_BASE_TR = 0x6c0aul, > > + HOST_BASE_GDTR = 0x6c0cul, > > + HOST_BASE_IDTR = 0x6c0eul, > > + HOST_SYSENTER_ESP = 0x6c10ul, > > + HOST_SYSENTER_EIP = 0x6c12ul, > > + HOST_RSP = 0x6c14ul, > > + HOST_RIP = 0x6c16ul > > +}; > > + > > +enum Reason { > > + VMX_EXC_NMI = 0, > > + VMX_EXTINT = 1, > > + VMX_TRIPLE_FAULT = 2, > > + VMX_INIT = 3, > > + VMX_SIPI = 4, > > + VMX_SMI_IO = 5, > > + VMX_SMI_OTHER = 6, > > + VMX_INTR_WINDOW = 7, > > + VMX_NMI_WINDOW = 8, > > + VMX_TASK_SWITCH = 9, > > + VMX_CPUID = 10, > > + VMX_GETSEC = 11, > > + VMX_HLT = 12, > > + VMX_INVD = 13, > > + VMX_INVLPG = 14, > > + VMX_RDPMC = 15, > > + VMX_RDTSC = 16, > > + VMX_RSM = 17, > > + VMX_VMCALL = 18, > > + VMX_VMCLEAR = 19, > > + VMX_VMLAUNCH = 20, > > + VMX_VMPTRLD = 21, > > + VMX_VMPTRST = 22, > > + VMX_VMREAD = 23, > > + VMX_VMRESUME = 24, > > + VMX_VMWRITE = 25, > > + VMX_VMXOFF = 26, > > + VMX_VMXON = 27, > > + VMX_CR = 28, > > + VMX_DR = 29, > > + VMX_IO = 30, > > + VMX_RDMSR = 31, > > + VMX_WRMSR = 32, > > + VMX_FAIL_STATE = 33, > > + VMX_FAIL_MSR = 34, > > + VMX_MWAIT = 36, > > + VMX_MTF = 37, > > + VMX_MONITOR = 39, > > + VMX_PAUSE = 40, > > + VMX_FAIL_MCHECK = 41, > > + VMX_TPR_THRESHOLD = 43, > > + VMX_APIC_ACCESS = 44, > > + VMX_GDTR_IDTR = 46, > > + VMX_LDTR_TR = 47, > > + VMX_EPT_VIOLATION = 48, > > + VMX_EPT_MISCONFIG = 49, > > + VMX_INVEPT = 50, > > + VMX_PREEMPT = 52, > > + VMX_INVVPID = 53, > > + VMX_WBINVD = 54, > > + VMX_XSETBV = 55 > > +}; > > + > > +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ > > +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ > > + > > +enum Ctrl_exi { > > + EXI_HOST_64 = 1UL << 9, > > + EXI_LOAD_PERF = 1UL << 12, > > + EXI_INTA = 1UL << 15, > > + EXI_LOAD_EFER = 1UL << 21, > > +}; > > + > > +enum Ctrl_ent { > > + ENT_GUEST_64 = 1UL << 9, > > + ENT_LOAD_EFER = 1UL << 15, > > +}; > > + > > +enum Ctrl_pin { > > + PIN_EXTINT = 1ul << 0, > > + PIN_NMI = 1ul << 3, > > + PIN_VIRT_NMI = 1ul << 5, > > +}; > > + > > +enum Ctrl0 { > > + CPU_INTR_WINDOW = 1ul << 2, > > + CPU_HLT = 1ul << 7, > > + CPU_INVLPG = 1ul << 9, > > + CPU_CR3_LOAD = 1ul << 15, > > + CPU_CR3_STORE = 1ul << 16, > > + CPU_TPR_SHADOW = 1ul << 21, > > + CPU_NMI_WINDOW = 1ul << 22, > > + CPU_IO = 1ul << 24, > > + CPU_IO_BITMAP = 1ul << 25, > > + CPU_SECONDARY = 1ul << 31, > > +}; > > + > > +enum Ctrl1 { > > + CPU_EPT = 1ul << 1, > > + CPU_VPID = 1ul << 5, > > + CPU_URG = 1ul << 7, > > +}; > > + > > +#define SAVE_GPR \ > > + "xchg %rax, regs\n\t" \ > > + "xchg %rbx, regs+0x8\n\t" \ > > + "xchg %rcx, regs+0x10\n\t" \ > > + "xchg %rdx, regs+0x18\n\t" \ > > + "xchg %rbp, regs+0x28\n\t" \ > > + "xchg %rsi, regs+0x30\n\t" \ > > + "xchg %rdi, regs+0x38\n\t" \ > > + "xchg %r8, regs+0x40\n\t" \ > > + "xchg %r9, regs+0x48\n\t" \ > > + "xchg %r10, regs+0x50\n\t" \ > > + "xchg %r11, regs+0x58\n\t" \ > > + "xchg %r12, regs+0x60\n\t" \ > > + "xchg %r13, regs+0x68\n\t" \ > > + "xchg %r14, regs+0x70\n\t" \ > > + "xchg %r15, regs+0x78\n\t" > > + > > +#define LOAD_GPR SAVE_GPR > > + > > +#define SAVE_GPR_C \ > > + "xchg %%rax, regs\n\t" \ > > + "xchg %%rbx, regs+0x8\n\t" \ > > + "xchg %%rcx, regs+0x10\n\t" \ > > + "xchg %%rdx, regs+0x18\n\t" \ > > + "xchg %%rbp, regs+0x28\n\t" \ > > + "xchg %%rsi, regs+0x30\n\t" \ > > + "xchg %%rdi, regs+0x38\n\t" \ > > + "xchg %%r8, regs+0x40\n\t" \ > > + "xchg %%r9, regs+0x48\n\t" \ > > + "xchg %%r10, regs+0x50\n\t" \ > > + "xchg %%r11, regs+0x58\n\t" \ > > + "xchg %%r12, regs+0x60\n\t" \ > > + "xchg %%r13, regs+0x68\n\t" \ > > + "xchg %%r14, regs+0x70\n\t" \ > > + "xchg %%r15, regs+0x78\n\t" > > + > > +#define LOAD_GPR_C SAVE_GPR_C > > + > > +#define SAVE_RFLAGS \ > > + "pushf\n\t" \ > > + "pop regs+0x80\n\t" > > + > > +#define LOAD_RFLAGS \ > > + "push regs+0x80\n\t" \ > > + "popf\n\t" > > + > > +#define VMX_IO_SIZE_MASK 0x7 > > +#define _VMX_IO_BYTE 1 > > +#define _VMX_IO_WORD 2 > > +#define _VMX_IO_LONG 3 > > +#define VMX_IO_DIRECTION_MASK (1ul << 3) > > +#define VMX_IO_IN (1ul << 3) > > +#define VMX_IO_OUT 0 > > +#define VMX_IO_STRING (1ul << 4) > > +#define VMX_IO_REP (1ul << 5) > > +#define VMX_IO_OPRAND_DX (1ul << 6) > > +#define VMX_IO_PORT_MASK 0xFFFF0000 > > +#define VMX_IO_PORT_SHIFT 16 > > + > > +#define VMX_TEST_VMEXIT 1 > > +#define VMX_TEST_EXIT 2 > > +#define VMX_TEST_RESUME 3 > > +#define VMX_TEST_LAUNCH_ERR 4 > > +#define VMX_TEST_RESUME_ERR 5 > > + > > +#define HYPERCALL_BIT (1ul << 12) > > +#define HYPERCALL_MASK 0xFFF > > +#define HYPERCALL_VMEXIT 0x1 > > + > > +static inline int vmcs_clear(struct vmcs *vmcs) > > +{ > > + bool ret; > > + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > > + return ret; > > +} > > + > > +static inline u64 vmcs_read(enum Encoding enc) > > +{ > > + u64 val; > > + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); > > + return val; > > +} > > + > > +static inline int vmcs_write(enum Encoding enc, u64 val) > > +{ > > + bool ret; > > + asm volatile ("vmwrite %1, %2; setbe %0" > > + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); > > + return ret; > > +} > > + > > +static inline int vmcs_save(struct vmcs **vmcs) > > +{ > > + bool ret; > > + > > + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); > > + return ret; > > +} > > + > > +#endif > > + > > -- > > 1.7.9.5 > > -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Arthur, I'm trying to test your patch on a SandyBridge machine. Used 'nested=1' when loading kvm (from 3.9-1-amd64) and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)). Without nested=1 I get ``unhandled excecption 13'', so I presume this is OK, with it, the test_vmx_capability() fails at detecting vmx. I've used for qemu: $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1 Are there any knobs which I should further tune? Thanks, On Sun, Jul 28, 2013 at 04:24:34PM +0200, Arthur Chunqi Li wrote: > Hi Gleb, > > It suddenly occured to me that this patch also fails to handle > GUEST_RFLAGS when VMRESUME. > > I decide to remove rflags in struct regs since rflags can be read and > set via vmcs_read/vmcs_write in test suited defined functions (init > and exit_handler), and other general registers can only be set in the > framework code. > > Then I will wait for Paolo and Gleb's furthur feedback and commit the > final patch. > > Arthur > > On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote: > > This is the first version of VMX nested environment. It contains the > > basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/ > > VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the > > basic execution routine in VMX nested environment andlet the VM print > > "Hello World" to inform its successfully run. > > > > The first release also includes a test suite for vmenter (vmlaunch and > > vmresume). Besides, hypercall mechanism is included and currently it is > > used to invoke VM normal exit. > > > > New files added: > > x86/vmx.h : contains all VMX related macro declerations > > x86/vmx.c : main file for VMX nested test case > > > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > > --- > > ChangeLog: > > 1. Refine codes in function vmx_run() > > 2. Fix bug of setting GUEST_RFLAGS > > 3. Move defines of selectors to lib/x86/vm.h > > 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c > > 5. Move some inline functions to lib/x86/processor.h > > 6. Move some inline functions (vmcs related) to x86/vmx.h > > --- > > config-x86-common.mak | 2 + > > config-x86_64.mak | 1 + > > lib/x86/msr.h | 5 + > > lib/x86/processor.h | 15 ++ > > lib/x86/vm.c | 4 - > > lib/x86/vm.h | 21 ++ > > x86/cstart64.S | 4 + > > x86/unittests.cfg | 6 + > > x86/vmx.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++ > > x86/vmx.h | 466 ++++++++++++++++++++++++++++++++++ > > 10 files changed, 1194 insertions(+), 4 deletions(-) > > create mode 100644 x86/vmx.c > > create mode 100644 x86/vmx.h > > > > diff --git a/config-x86-common.mak b/config-x86-common.mak > > index 455032b..34a41e1 100644 > > --- a/config-x86-common.mak > > +++ b/config-x86-common.mak > > @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o > > > > $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o > > > > +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o > > + > > arch_clean: > > $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ > > $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o > > diff --git a/config-x86_64.mak b/config-x86_64.mak > > index 4e525f5..bb8ee89 100644 > > --- a/config-x86_64.mak > > +++ b/config-x86_64.mak > > @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ > > $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ > > $(TEST_DIR)/pcid.flat > > tests += $(TEST_DIR)/svm.flat > > +tests += $(TEST_DIR)/vmx.flat > > > > include config-x86-common.mak > > diff --git a/lib/x86/msr.h b/lib/x86/msr.h > > index 509a421..281255a 100644 > > --- a/lib/x86/msr.h > > +++ b/lib/x86/msr.h > > @@ -396,6 +396,11 @@ > > #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a > > #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b > > #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c > > +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d > > +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e > > +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f > > +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 > > + > > > > /* AMD-V MSRs */ > > > > diff --git a/lib/x86/processor.h b/lib/x86/processor.h > > index e46d8d0..f0c11cc 100644 > > --- a/lib/x86/processor.h > > +++ b/lib/x86/processor.h > > @@ -307,4 +307,19 @@ static inline void safe_halt(void) > > { > > asm volatile("sti; hlt"); > > } > > + > > +#ifdef __x86_64__ > > +static inline u64 read_rflags(void) > > +{ > > + u64 r; > > + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); > > + return r; > > +} > > + > > +static inline void write_rflags(u64 r) > > +{ > > + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); > > +} > > +#endif > > + > > #endif > > diff --git a/lib/x86/vm.c b/lib/x86/vm.c > > index 260ec45..188bf57 100644 > > --- a/lib/x86/vm.c > > +++ b/lib/x86/vm.c > > @@ -9,10 +9,6 @@ > > #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) > > #endif > > > > -#define X86_CR0_PE 0x00000001 > > -#define X86_CR0_WP 0x00010000 > > -#define X86_CR0_PG 0x80000000 > > -#define X86_CR4_PSE 0x00000010 > > static void *free = 0; > > static void *vfree_top = 0; > > > > diff --git a/lib/x86/vm.h b/lib/x86/vm.h > > index 0b5b5c7..eff6f72 100644 > > --- a/lib/x86/vm.h > > +++ b/lib/x86/vm.h > > @@ -16,6 +16,27 @@ > > #define PTE_USER (1ull << 2) > > #define PTE_ADDR (0xffffffffff000ull) > > > > +#define X86_CR0_PE 0x00000001 > > +#define X86_CR0_WP 0x00010000 > > +#define X86_CR0_PG 0x80000000 > > +#define X86_CR4_VMXE 0x00000001 > > +#define X86_CR4_PSE 0x00000010 > > +#define X86_CR4_PAE 0x00000020 > > +#define X86_CR4_PCIDE 0x00020000 > > + > > +#ifdef __x86_64__ > > +#define SEL_NULL_DESC 0x0 > > +#define SEL_KERN_CODE_64 0x8 > > +#define SEL_KERN_DATA_64 0x10 > > +#define SEL_USER_CODE_64 0x18 > > +#define SEL_USER_DATA_64 0x20 > > +#define SEL_CODE_32 0x28 > > +#define SEL_DATA_32 0x30 > > +#define SEL_CODE_16 0x38 > > +#define SEL_DATA_16 0x40 > > +#define SEL_TSS_RUN 0x48 > > +#endif > > + > > void setup_vm(); > > > > void *vmalloc(unsigned long size); > > diff --git a/x86/cstart64.S b/x86/cstart64.S > > index 24df5f8..0fe76da 100644 > > --- a/x86/cstart64.S > > +++ b/x86/cstart64.S > > @@ -4,6 +4,10 @@ > > .globl boot_idt > > boot_idt = 0 > > > > +.globl idt_descr > > +.globl tss_descr > > +.globl gdt64_desc > > + > > ipi_vector = 0x20 > > > > max_cpus = 64 > > diff --git a/x86/unittests.cfg b/x86/unittests.cfg > > index bc9643e..85c36aa 100644 > > --- a/x86/unittests.cfg > > +++ b/x86/unittests.cfg > > @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" > > file = pcid.flat > > extra_params = -cpu qemu64,+pcid > > arch = x86_64 > > + > > +[vmx] > > +file = vmx.flat > > +extra_params = -cpu host,+vmx > > +arch = x86_64 > > + > > diff --git a/x86/vmx.c b/x86/vmx.c > > new file mode 100644 > > index 0000000..7467927 > > --- /dev/null > > +++ b/x86/vmx.c > > @@ -0,0 +1,674 @@ > > +#include "libcflat.h" > > +#include "processor.h" > > +#include "vm.h" > > +#include "desc.h" > > +#include "vmx.h" > > +#include "msr.h" > > +#include "smp.h" > > +#include "io.h" > > + > > +int fails = 0, tests = 0; > > +u32 *vmxon_region; > > +struct vmcs *vmcs_root; > > +u32 vpid_cnt; > > +void *guest_stack, *guest_syscall_stack; > > +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; > > +ulong fix_cr0_set, fix_cr0_clr; > > +ulong fix_cr4_set, fix_cr4_clr; > > +struct regs regs; > > +struct vmx_test *current; > > +u64 hypercall_field = 0; > > +bool launched; > > + > > +extern u64 gdt64_desc[]; > > +extern u64 idt_descr[]; > > +extern u64 tss_descr[]; > > +extern void *vmx_return; > > +extern void *entry_sysenter; > > +extern void *guest_entry; > > + > > +static void report(const char *name, int result) > > +{ > > + ++tests; > > + if (result) > > + printf("PASS: %s\n", name); > > + else { > > + printf("FAIL: %s\n", name); > > + ++fails; > > + } > > +} > > + > > +static int make_vmcs_current(struct vmcs *vmcs) > > +{ > > + bool ret; > > + > > + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > > + return ret; > > +} > > + > > +/* entry_sysenter */ > > +asm( > > + ".align 4, 0x90\n\t" > > + ".globl entry_sysenter\n\t" > > + "entry_sysenter:\n\t" > > + SAVE_GPR > > + " and $0xf, %rax\n\t" > > + " mov %rax, %rdi\n\t" > > + " call syscall_handler\n\t" > > + LOAD_GPR > > + " vmresume\n\t" > > +); > > + > > +static void __attribute__((__used__)) syscall_handler(u64 syscall_no) > > +{ > > + current->syscall_handler(syscall_no); > > +} > > + > > +static inline int vmx_on() > > +{ > > + bool ret; > > + asm volatile ("vmxon %1; setbe %0\n\t" > > + : "=q"(ret) : "m"(vmxon_region) : "cc"); > > + return ret; > > +} > > + > > +static inline int vmx_off() > > +{ > > + bool ret; > > + asm volatile("vmxoff; setbe %0\n\t" > > + : "=q"(ret) : : "cc"); > > + return ret; > > +} > > + > > +static void print_vmexit_info() > > +{ > > + u64 guest_rip, guest_rsp; > > + ulong reason = vmcs_read(EXI_REASON) & 0xff; > > + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); > > + guest_rip = vmcs_read(GUEST_RIP); > > + guest_rsp = vmcs_read(GUEST_RSP); > > + printf("VMEXIT info:\n"); > > + printf("\tvmexit reason = %d\n", reason); > > + printf("\texit qualification = 0x%x\n", exit_qual); > > + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); > > + printf("\tguest_rip = 0x%llx\n", guest_rip); > > + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", > > + regs.rax, regs.rbx, regs.rcx, regs.rdx); > > + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", > > + guest_rsp, regs.rbp, regs.rsi, regs.rdi); > > + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", > > + regs.r8, regs.r9, regs.r10, regs.r11); > > + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", > > + regs.r12, regs.r13, regs.r14, regs.r15); > > +} > > + > > +static void test_vmclear(void) > > +{ > > + u64 rflags; > > + > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + report("test vmclear", vmcs_clear(vmcs_root) == 0); > > +} > > + > > +static void test_vmxoff(void) > > +{ > > + int ret; > > + u64 rflags; > > + > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + ret = vmx_off(); > > + report("test vmxoff", !ret); > > +} > > + > > +static void __attribute__((__used__)) guest_main(void) > > +{ > > + current->guest_main(); > > +} > > + > > +/* guest_entry */ > > +asm( > > + ".align 4, 0x90\n\t" > > + ".globl entry_guest\n\t" > > + "guest_entry:\n\t" > > + " call guest_main\n\t" > > + " mov $1, %edi\n\t" > > + " call hypercall\n\t" > > +); > > + > > +static void init_vmcs_ctrl(void) > > +{ > > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > > + /* 26.2.1.1 */ > > + vmcs_write(PIN_CONTROLS, ctrl_pin); > > + /* Disable VMEXIT of IO instruction */ > > + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); > > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { > > + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; > > + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); > > + } > > + vmcs_write(CR3_TARGET_COUNT, 0); > > + vmcs_write(VPID, ++vpid_cnt); > > +} > > + > > +static void init_vmcs_host(void) > > +{ > > + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ > > + /* 26.2.1.2 */ > > + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); > > + > > + /* 26.2.1.3 */ > > + vmcs_write(ENT_CONTROLS, ctrl_enter); > > + vmcs_write(EXI_CONTROLS, ctrl_exit); > > + > > + /* 26.2.2 */ > > + vmcs_write(HOST_CR0, read_cr0()); > > + vmcs_write(HOST_CR3, read_cr3()); > > + vmcs_write(HOST_CR4, read_cr4()); > > + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); > > + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); > > + > > + /* 26.2.3 */ > > + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); > > + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); > > + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); > > + vmcs_write(HOST_BASE_TR, (u64)tss_descr); > > + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); > > + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); > > + vmcs_write(HOST_BASE_FS, 0); > > + vmcs_write(HOST_BASE_GS, 0); > > + > > + /* Set other vmcs area */ > > + vmcs_write(PF_ERROR_MASK, 0); > > + vmcs_write(PF_ERROR_MATCH, 0); > > + vmcs_write(VMCS_LINK_PTR, ~0ul); > > + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); > > + vmcs_write(HOST_RIP, (u64)(&vmx_return)); > > +} > > + > > +static void init_vmcs_guest(void) > > +{ > > + /* 26.3 CHECKING AND LOADING GUEST STATE */ > > + ulong guest_cr0, guest_cr4, guest_cr3; > > + /* 26.3.1.1 */ > > + guest_cr0 = read_cr0(); > > + guest_cr4 = read_cr4(); > > + guest_cr3 = read_cr3(); > > + if (ctrl_enter & ENT_GUEST_64) { > > + guest_cr0 |= X86_CR0_PG; > > + guest_cr4 |= X86_CR4_PAE; > > + } > > + if ((ctrl_enter & ENT_GUEST_64) == 0) > > + guest_cr4 &= (~X86_CR4_PCIDE); > > + if (guest_cr0 & X86_CR0_PG) > > + guest_cr0 |= X86_CR0_PE; > > + vmcs_write(GUEST_CR0, guest_cr0); > > + vmcs_write(GUEST_CR3, guest_cr3); > > + vmcs_write(GUEST_CR4, guest_cr4); > > + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); > > + vmcs_write(GUEST_SYSENTER_ESP, > > + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); > > + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); > > + vmcs_write(GUEST_DR7, 0); > > + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); > > + > > + /* 26.3.1.2 */ > > + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); > > + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); > > + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); > > + vmcs_write(GUEST_SEL_LDTR, 0); > > + > > + vmcs_write(GUEST_BASE_CS, 0); > > + vmcs_write(GUEST_BASE_ES, 0); > > + vmcs_write(GUEST_BASE_SS, 0); > > + vmcs_write(GUEST_BASE_DS, 0); > > + vmcs_write(GUEST_BASE_FS, 0); > > + vmcs_write(GUEST_BASE_GS, 0); > > + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); > > + vmcs_write(GUEST_BASE_LDTR, 0); > > + > > + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); > > + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); > > + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); > > + > > + vmcs_write(GUEST_AR_CS, 0xa09b); > > + vmcs_write(GUEST_AR_DS, 0xc093); > > + vmcs_write(GUEST_AR_ES, 0xc093); > > + vmcs_write(GUEST_AR_FS, 0xc093); > > + vmcs_write(GUEST_AR_GS, 0xc093); > > + vmcs_write(GUEST_AR_SS, 0xc093); > > + vmcs_write(GUEST_AR_LDTR, 0x82); > > + vmcs_write(GUEST_AR_TR, 0x8b); > > + > > + /* 26.3.1.3 */ > > + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); > > + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); > > + vmcs_write(GUEST_LIMIT_GDTR, > > + ((struct descr *)gdt64_desc)->limit & 0xffff); > > + vmcs_write(GUEST_LIMIT_IDTR, > > + ((struct descr *)idt_descr)->limit & 0xffff); > > + > > + /* 26.3.1.4 */ > > + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); > > + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); > > + vmcs_write(GUEST_RFLAGS, 0x2); > > + > > + /* 26.3.1.5 */ > > + vmcs_write(GUEST_ACTV_STATE, 0); > > + vmcs_write(GUEST_INTR_STATE, 0); > > +} > > + > > +static int init_vmcs(struct vmcs **vmcs) > > +{ > > + *vmcs = alloc_page(); > > + memset(*vmcs, 0, PAGE_SIZE); > > + (*vmcs)->revision_id = basic.revision; > > + /* vmclear first to init vmcs */ > > + if (vmcs_clear(*vmcs)) { > > + printf("%s : vmcs_clear error\n", __func__); > > + return 1; > > + } > > + > > + if (make_vmcs_current(*vmcs)) { > > + printf("%s : make_vmcs_current error\n", __func__); > > + return 1; > > + } > > + > > + /* All settings to pin/exit/enter/cpu > > + control fields should be placed here */ > > + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; > > + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; > > + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); > > + ctrl_cpu[0] |= CPU_HLT; > > + /* DIsable IO instruction VMEXIT now */ > > + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); > > + ctrl_cpu[1] = 0; > > + > > + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; > > + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; > > + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; > > + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; > > + > > + init_vmcs_ctrl(); > > + init_vmcs_host(); > > + init_vmcs_guest(); > > + return 0; > > +} > > + > > +static void init_vmx(void) > > +{ > > + vmxon_region = alloc_page(); > > + memset(vmxon_region, 0, PAGE_SIZE); > > + > > + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); > > + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); > > + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); > > + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); > > + basic.val = rdmsr(MSR_IA32_VMX_BASIC); > > + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN > > + : MSR_IA32_VMX_PINBASED_CTLS); > > + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT > > + : MSR_IA32_VMX_EXIT_CTLS); > > + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY > > + : MSR_IA32_VMX_ENTRY_CTLS); > > + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC > > + : MSR_IA32_VMX_PROCBASED_CTLS); > > + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) > > + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); > > + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) > > + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); > > + > > + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); > > + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); > > + > > + *vmxon_region = basic.revision; > > + > > + guest_stack = alloc_page(); > > + memset(guest_stack, 0, PAGE_SIZE); > > + guest_syscall_stack = alloc_page(); > > + memset(guest_syscall_stack, 0, PAGE_SIZE); > > +} > > + > > +static int test_vmx_capability(void) > > +{ > > + struct cpuid r; > > + u64 ret1, ret2; > > + u64 ia32_feature_control; > > + r = cpuid(1); > > + ret1 = ((r.c) >> 5) & 1; > > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > > + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { > > + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); > > + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); > > + ret2 = ((ia32_feature_control & 0x5) == 0x5); > > + } > > + report("test vmx capability", ret1 & ret2); > > + return !(ret1 & ret2); > > +} > > + > > +static int test_vmxon(void) > > +{ > > + int ret; > > + u64 rflags; > > + > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + ret = vmx_on(); > > + report("test vmxon", !ret); > > + return ret; > > +} > > + > > +static void test_vmptrld(void) > > +{ > > + u64 rflags; > > + struct vmcs *vmcs; > > + > > + vmcs = alloc_page(); > > + vmcs->revision_id = basic.revision; > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + report("test vmptrld", make_vmcs_current(vmcs) == 0); > > +} > > + > > +static void test_vmptrst(void) > > +{ > > + u64 rflags; > > + int ret; > > + struct vmcs *vmcs1, *vmcs2; > > + > > + vmcs1 = alloc_page(); > > + memset(vmcs1, 0, PAGE_SIZE); > > + init_vmcs(&vmcs1); > > + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; > > + write_rflags(rflags); > > + ret = vmcs_save(&vmcs2); > > + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); > > +} > > + > > +/* This function can only be called in guest */ > > +static void __attribute__((__used__)) hypercall(u32 hypercall_no) > > +{ > > + u64 val = 0; > > + val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; > > + hypercall_field = val; > > + asm volatile("vmcall\n\t"); > > +} > > + > > +static bool is_hypercall() > > +{ > > + ulong reason, hyper_bit; > > + > > + reason = vmcs_read(EXI_REASON) & 0xff; > > + hyper_bit = hypercall_field & HYPERCALL_BIT; > > + if (reason == VMX_VMCALL && hyper_bit) > > + return true; > > + return false; > > +} > > + > > +static int handle_hypercall() > > +{ > > + ulong hypercall_no; > > + > > + hypercall_no = hypercall_field & HYPERCALL_MASK; > > + hypercall_field = 0; > > + switch (hypercall_no) { > > + case HYPERCALL_VMEXIT: > > + return VMX_TEST_VMEXIT; > > + default: > > + printf("ERROR : Invalid hypercall number : %d\n", hypercall_no); > > + } > > + return VMX_TEST_EXIT; > > +} > > + > > +static int exit_handler() > > +{ > > + int ret; > > + > > + current->exits++; > > + current->guest_regs = regs; > > + if (is_hypercall()) > > + ret = handle_hypercall(); > > + else > > + ret = current->exit_handler(); > > + regs = current->guest_regs; > > + switch (ret) { > > + case VMX_TEST_VMEXIT: > > + case VMX_TEST_RESUME: > > + return ret; > > + case VMX_TEST_EXIT: > > + break; > > + default: > > + printf("ERROR : Invalid exit_handler return val %d.\n" > > + , ret); > > + } > > + print_vmexit_info(); > > + exit(-1); > > + return 0; > > +} > > + > > +static int vmx_run() > > +{ > > + u32 ret = 0, fail = 0; > > + > > + while (1) { > > + asm volatile ( > > + "mov %%rsp, %%rsi\n\t" > > + "mov %2, %%rdi\n\t" > > + "vmwrite %%rsi, %%rdi\n\t" > > + > > + LOAD_GPR_C > > + "cmpl $0, %1\n\t" > > + "jne 1f\n\t" > > + LOAD_RFLAGS > > + "vmlaunch\n\t" > > + "jmp 2f\n\t" > > + "1: " > > + "vmresume\n\t" > > + "2: " > > + "setbe %0\n\t" > > + "vmx_return:\n\t" > > + SAVE_GPR_C > > + SAVE_RFLAGS > > + : "=m"(fail) > > + : "m"(launched), "i"(HOST_RSP) > > + : "rdi", "rsi", "memory", "cc" > > + > > + ); > > + if (fail) > > + ret = launched ? VMX_TEST_RESUME_ERR : > > + VMX_TEST_LAUNCH_ERR; > > + else { > > + launched = 1; > > + ret = exit_handler(); > > + } > > + if (ret != VMX_TEST_RESUME) > > + break; > > + } > > + launched = 0; > > + switch (ret) { > > + case VMX_TEST_VMEXIT: > > + return 0; > > + case VMX_TEST_LAUNCH_ERR: > > + printf("%s : vmlaunch failed.\n", __func__); > > + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) > > + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) > > + printf("\tvmlaunch set wrong flags\n"); > > + report("test vmlaunch", 0); > > + break; > > + case VMX_TEST_RESUME_ERR: > > + printf("%s : vmresume failed.\n", __func__); > > + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) > > + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) > > + printf("\tvmresume set wrong flags\n"); > > + report("test vmresume", 0); > > + break; > > + default: > > + printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret); > > + break; > > + } > > + return 1; > > +} > > + > > +static int test_run(struct vmx_test *test) > > +{ > > + if (test->name == NULL) > > + test->name = "(no name)"; > > + if (vmx_on()) { > > + printf("%s : vmxon failed.\n", __func__); > > + return 1; > > + } > > + init_vmcs(&(test->vmcs)); > > + /* Directly call test->init is ok here, init_vmcs has done > > + vmcs init, vmclear and vmptrld*/ > > + if (test->init) > > + test->init(test->vmcs); > > + test->exits = 0; > > + current = test; > > + regs = test->guest_regs; > > + vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); > > + launched = 0; > > + printf("\nTest suite : %s\n", test->name); > > + vmx_run(); > > + if (vmx_off()) { > > + printf("%s : vmxoff failed.\n", __func__); > > + return 1; > > + } > > + return 0; > > +} > > + > > +static void basic_init() > > +{ > > +} > > + > > +static void basic_guest_main() > > +{ > > + /* Here is null guest_main, print Hello World */ > > + printf("\tHello World, this is null_guest_main!\n"); > > +} > > + > > +static int basic_exit_handler() > > +{ > > + u64 guest_rip; > > + ulong reason; > > + > > + guest_rip = vmcs_read(GUEST_RIP); > > + reason = vmcs_read(EXI_REASON) & 0xff; > > + > > + switch (reason) { > > + case VMX_VMCALL: > > + print_vmexit_info(); > > + vmcs_write(GUEST_RIP, guest_rip + 3); > > + return VMX_TEST_RESUME; > > + default: > > + break; > > + } > > + printf("ERROR : Unhandled vmx exit.\n"); > > + print_vmexit_info(); > > + return VMX_TEST_EXIT; > > +} > > + > > +static void basic_syscall_handler(u64 syscall_no) > > +{ > > +} > > + > > +static void vmenter_main() > > +{ > > + u64 rax; > > + u64 rsp, resume_rsp; > > + > > + report("test vmlaunch", 1); > > + > > + asm volatile( > > + "mov %%rsp, %0\n\t" > > + "mov %3, %%rax\n\t" > > + "vmcall\n\t" > > + "mov %%rax, %1\n\t" > > + "mov %%rsp, %2\n\t" > > + : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) > > + : "g"(0xABCD)); > > + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); > > +} > > + > > +static int vmenter_exit_handler() > > +{ > > + u64 guest_rip; > > + ulong reason; > > + > > + guest_rip = vmcs_read(GUEST_RIP); > > + reason = vmcs_read(EXI_REASON) & 0xff; > > + switch (reason) { > > + case VMX_VMCALL: > > + if (current->guest_regs.rax != 0xABCD) { > > + report("test vmresume", 0); > > + return VMX_TEST_VMEXIT; > > + } > > + current->guest_regs.rax = 0xFFFF; > > + vmcs_write(GUEST_RIP, guest_rip + 3); > > + return VMX_TEST_RESUME; > > + default: > > + report("test vmresume", 0); > > + print_vmexit_info(); > > + } > > + return VMX_TEST_VMEXIT; > > +} > > + > > + > > +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs > > + basic_* just implement some basic functions */ > > +static struct vmx_test vmx_tests[] = { > > + { "null", basic_init, basic_guest_main, basic_exit_handler, > > + basic_syscall_handler, {0} }, > > + { "vmenter", basic_init, vmenter_main, vmenter_exit_handler, > > + basic_syscall_handler, {0} }, > > +}; > > + > > +int main(void) > > +{ > > + int i; > > + > > + setup_vm(); > > + setup_idt(); > > + > > + if (test_vmx_capability() != 0) { > > + printf("ERROR : vmx not supported, check +vmx option\n"); > > + goto exit; > > + } > > + init_vmx(); > > + /* Set basic test ctxt the same as "null" */ > > + current = &vmx_tests[0]; > > + if (test_vmxon() != 0) > > + goto exit; > > + test_vmptrld(); > > + test_vmclear(); > > + test_vmptrst(); > > + init_vmcs(&vmcs_root); > > + if (vmx_run()) { > > + report("test vmlaunch", 0); > > + goto exit; > > + } > > + test_vmxoff(); > > + > > + for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) { > > + if (test_run(&vmx_tests[i])) > > + goto exit; > > + } > > + > > +exit: > > + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); > > + return fails ? 1 : 0; > > +} > > diff --git a/x86/vmx.h b/x86/vmx.h > > new file mode 100644 > > index 0000000..1fb9738 > > --- /dev/null > > +++ b/x86/vmx.h > > @@ -0,0 +1,466 @@ > > +#ifndef __HYPERVISOR_H > > +#define __HYPERVISOR_H > > + > > +#include "libcflat.h" > > + > > +struct vmcs { > > + u32 revision_id; /* vmcs revision identifier */ > > + u32 abort; /* VMX-abort indicator */ > > + /* VMCS data */ > > + char data[0]; > > +}; > > + > > +struct regs { > > + u64 rax; > > + u64 rcx; > > + u64 rdx; > > + u64 rbx; > > + u64 cr2; > > + u64 rbp; > > + u64 rsi; > > + u64 rdi; > > + u64 r8; > > + u64 r9; > > + u64 r10; > > + u64 r11; > > + u64 r12; > > + u64 r13; > > + u64 r14; > > + u64 r15; > > + u64 rflags; > > +}; > > + > > +struct vmx_test { > > + const char *name; > > + void (*init)(struct vmcs *vmcs); > > + void (*guest_main)(); > > + int (*exit_handler)(); > > + void (*syscall_handler)(u64 syscall_no); > > + struct regs guest_regs; > > + struct vmcs *vmcs; > > + int exits; > > +}; > > + > > +static union vmx_basic { > > + u64 val; > > + struct { > > + u32 revision; > > + u32 size:13, > > + : 3, > > + width:1, > > + dual:1, > > + type:4, > > + insouts:1, > > + ctrl:1; > > + }; > > +} basic; > > + > > +static union vmx_ctrl_pin { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_pin_rev; > > + > > +static union vmx_ctrl_cpu { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_cpu_rev[2]; > > + > > +static union vmx_ctrl_exit { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_exit_rev; > > + > > +static union vmx_ctrl_ent { > > + u64 val; > > + struct { > > + u32 set, clr; > > + }; > > +} ctrl_enter_rev; > > + > > +static union vmx_ept_vpid { > > + u64 val; > > + struct { > > + u32:16, > > + super:2, > > + : 2, > > + invept:1, > > + : 11; > > + u32 invvpid:1; > > + }; > > +} ept_vpid; > > + > > +struct descr { > > + u16 limit; > > + u64 addr; > > +}; > > + > > +enum Encoding { > > + /* 16-Bit Control Fields */ > > + VPID = 0x0000ul, > > + /* Posted-interrupt notification vector */ > > + PINV = 0x0002ul, > > + /* EPTP index */ > > + EPTP_IDX = 0x0004ul, > > + > > + /* 16-Bit Guest State Fields */ > > + GUEST_SEL_ES = 0x0800ul, > > + GUEST_SEL_CS = 0x0802ul, > > + GUEST_SEL_SS = 0x0804ul, > > + GUEST_SEL_DS = 0x0806ul, > > + GUEST_SEL_FS = 0x0808ul, > > + GUEST_SEL_GS = 0x080aul, > > + GUEST_SEL_LDTR = 0x080cul, > > + GUEST_SEL_TR = 0x080eul, > > + GUEST_INT_STATUS = 0x0810ul, > > + > > + /* 16-Bit Host State Fields */ > > + HOST_SEL_ES = 0x0c00ul, > > + HOST_SEL_CS = 0x0c02ul, > > + HOST_SEL_SS = 0x0c04ul, > > + HOST_SEL_DS = 0x0c06ul, > > + HOST_SEL_FS = 0x0c08ul, > > + HOST_SEL_GS = 0x0c0aul, > > + HOST_SEL_TR = 0x0c0cul, > > + > > + /* 64-Bit Control Fields */ > > + IO_BITMAP_A = 0x2000ul, > > + IO_BITMAP_B = 0x2002ul, > > + MSR_BITMAP = 0x2004ul, > > + EXIT_MSR_ST_ADDR = 0x2006ul, > > + EXIT_MSR_LD_ADDR = 0x2008ul, > > + ENTER_MSR_LD_ADDR = 0x200aul, > > + VMCS_EXEC_PTR = 0x200cul, > > + TSC_OFFSET = 0x2010ul, > > + TSC_OFFSET_HI = 0x2011ul, > > + APIC_VIRT_ADDR = 0x2012ul, > > + APIC_ACCS_ADDR = 0x2014ul, > > + EPTP = 0x201aul, > > + EPTP_HI = 0x201bul, > > + > > + /* 64-Bit Readonly Data Field */ > > + INFO_PHYS_ADDR = 0x2400ul, > > + > > + /* 64-Bit Guest State */ > > + VMCS_LINK_PTR = 0x2800ul, > > + VMCS_LINK_PTR_HI = 0x2801ul, > > + GUEST_DEBUGCTL = 0x2802ul, > > + GUEST_DEBUGCTL_HI = 0x2803ul, > > + GUEST_EFER = 0x2806ul, > > + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, > > + GUEST_PDPTE = 0x280aul, > > + > > + /* 64-Bit Host State */ > > + HOST_EFER = 0x2c02ul, > > + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, > > + > > + /* 32-Bit Control Fields */ > > + PIN_CONTROLS = 0x4000ul, > > + CPU_EXEC_CTRL0 = 0x4002ul, > > + EXC_BITMAP = 0x4004ul, > > + PF_ERROR_MASK = 0x4006ul, > > + PF_ERROR_MATCH = 0x4008ul, > > + CR3_TARGET_COUNT = 0x400aul, > > + EXI_CONTROLS = 0x400cul, > > + EXI_MSR_ST_CNT = 0x400eul, > > + EXI_MSR_LD_CNT = 0x4010ul, > > + ENT_CONTROLS = 0x4012ul, > > + ENT_MSR_LD_CNT = 0x4014ul, > > + ENT_INTR_INFO = 0x4016ul, > > + ENT_INTR_ERROR = 0x4018ul, > > + ENT_INST_LEN = 0x401aul, > > + TPR_THRESHOLD = 0x401cul, > > + CPU_EXEC_CTRL1 = 0x401eul, > > + > > + /* 32-Bit R/O Data Fields */ > > + VMX_INST_ERROR = 0x4400ul, > > + EXI_REASON = 0x4402ul, > > + EXI_INTR_INFO = 0x4404ul, > > + EXI_INTR_ERROR = 0x4406ul, > > + IDT_VECT_INFO = 0x4408ul, > > + IDT_VECT_ERROR = 0x440aul, > > + EXI_INST_LEN = 0x440cul, > > + EXI_INST_INFO = 0x440eul, > > + > > + /* 32-Bit Guest State Fields */ > > + GUEST_LIMIT_ES = 0x4800ul, > > + GUEST_LIMIT_CS = 0x4802ul, > > + GUEST_LIMIT_SS = 0x4804ul, > > + GUEST_LIMIT_DS = 0x4806ul, > > + GUEST_LIMIT_FS = 0x4808ul, > > + GUEST_LIMIT_GS = 0x480aul, > > + GUEST_LIMIT_LDTR = 0x480cul, > > + GUEST_LIMIT_TR = 0x480eul, > > + GUEST_LIMIT_GDTR = 0x4810ul, > > + GUEST_LIMIT_IDTR = 0x4812ul, > > + GUEST_AR_ES = 0x4814ul, > > + GUEST_AR_CS = 0x4816ul, > > + GUEST_AR_SS = 0x4818ul, > > + GUEST_AR_DS = 0x481aul, > > + GUEST_AR_FS = 0x481cul, > > + GUEST_AR_GS = 0x481eul, > > + GUEST_AR_LDTR = 0x4820ul, > > + GUEST_AR_TR = 0x4822ul, > > + GUEST_INTR_STATE = 0x4824ul, > > + GUEST_ACTV_STATE = 0x4826ul, > > + GUEST_SMBASE = 0x4828ul, > > + GUEST_SYSENTER_CS = 0x482aul, > > + > > + /* 32-Bit Host State Fields */ > > + HOST_SYSENTER_CS = 0x4c00ul, > > + > > + /* Natural-Width Control Fields */ > > + CR0_MASK = 0x6000ul, > > + CR4_MASK = 0x6002ul, > > + CR0_READ_SHADOW = 0x6004ul, > > + CR4_READ_SHADOW = 0x6006ul, > > + CR3_TARGET_0 = 0x6008ul, > > + CR3_TARGET_1 = 0x600aul, > > + CR3_TARGET_2 = 0x600cul, > > + CR3_TARGET_3 = 0x600eul, > > + > > + /* Natural-Width R/O Data Fields */ > > + EXI_QUALIFICATION = 0x6400ul, > > + IO_RCX = 0x6402ul, > > + IO_RSI = 0x6404ul, > > + IO_RDI = 0x6406ul, > > + IO_RIP = 0x6408ul, > > + GUEST_LINEAR_ADDRESS = 0x640aul, > > + > > + /* Natural-Width Guest State Fields */ > > + GUEST_CR0 = 0x6800ul, > > + GUEST_CR3 = 0x6802ul, > > + GUEST_CR4 = 0x6804ul, > > + GUEST_BASE_ES = 0x6806ul, > > + GUEST_BASE_CS = 0x6808ul, > > + GUEST_BASE_SS = 0x680aul, > > + GUEST_BASE_DS = 0x680cul, > > + GUEST_BASE_FS = 0x680eul, > > + GUEST_BASE_GS = 0x6810ul, > > + GUEST_BASE_LDTR = 0x6812ul, > > + GUEST_BASE_TR = 0x6814ul, > > + GUEST_BASE_GDTR = 0x6816ul, > > + GUEST_BASE_IDTR = 0x6818ul, > > + GUEST_DR7 = 0x681aul, > > + GUEST_RSP = 0x681cul, > > + GUEST_RIP = 0x681eul, > > + GUEST_RFLAGS = 0x6820ul, > > + GUEST_PENDING_DEBUG = 0x6822ul, > > + GUEST_SYSENTER_ESP = 0x6824ul, > > + GUEST_SYSENTER_EIP = 0x6826ul, > > + > > + /* Natural-Width Host State Fields */ > > + HOST_CR0 = 0x6c00ul, > > + HOST_CR3 = 0x6c02ul, > > + HOST_CR4 = 0x6c04ul, > > + HOST_BASE_FS = 0x6c06ul, > > + HOST_BASE_GS = 0x6c08ul, > > + HOST_BASE_TR = 0x6c0aul, > > + HOST_BASE_GDTR = 0x6c0cul, > > + HOST_BASE_IDTR = 0x6c0eul, > > + HOST_SYSENTER_ESP = 0x6c10ul, > > + HOST_SYSENTER_EIP = 0x6c12ul, > > + HOST_RSP = 0x6c14ul, > > + HOST_RIP = 0x6c16ul > > +}; > > + > > +enum Reason { > > + VMX_EXC_NMI = 0, > > + VMX_EXTINT = 1, > > + VMX_TRIPLE_FAULT = 2, > > + VMX_INIT = 3, > > + VMX_SIPI = 4, > > + VMX_SMI_IO = 5, > > + VMX_SMI_OTHER = 6, > > + VMX_INTR_WINDOW = 7, > > + VMX_NMI_WINDOW = 8, > > + VMX_TASK_SWITCH = 9, > > + VMX_CPUID = 10, > > + VMX_GETSEC = 11, > > + VMX_HLT = 12, > > + VMX_INVD = 13, > > + VMX_INVLPG = 14, > > + VMX_RDPMC = 15, > > + VMX_RDTSC = 16, > > + VMX_RSM = 17, > > + VMX_VMCALL = 18, > > + VMX_VMCLEAR = 19, > > + VMX_VMLAUNCH = 20, > > + VMX_VMPTRLD = 21, > > + VMX_VMPTRST = 22, > > + VMX_VMREAD = 23, > > + VMX_VMRESUME = 24, > > + VMX_VMWRITE = 25, > > + VMX_VMXOFF = 26, > > + VMX_VMXON = 27, > > + VMX_CR = 28, > > + VMX_DR = 29, > > + VMX_IO = 30, > > + VMX_RDMSR = 31, > > + VMX_WRMSR = 32, > > + VMX_FAIL_STATE = 33, > > + VMX_FAIL_MSR = 34, > > + VMX_MWAIT = 36, > > + VMX_MTF = 37, > > + VMX_MONITOR = 39, > > + VMX_PAUSE = 40, > > + VMX_FAIL_MCHECK = 41, > > + VMX_TPR_THRESHOLD = 43, > > + VMX_APIC_ACCESS = 44, > > + VMX_GDTR_IDTR = 46, > > + VMX_LDTR_TR = 47, > > + VMX_EPT_VIOLATION = 48, > > + VMX_EPT_MISCONFIG = 49, > > + VMX_INVEPT = 50, > > + VMX_PREEMPT = 52, > > + VMX_INVVPID = 53, > > + VMX_WBINVD = 54, > > + VMX_XSETBV = 55 > > +}; > > + > > +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ > > +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ > > + > > +enum Ctrl_exi { > > + EXI_HOST_64 = 1UL << 9, > > + EXI_LOAD_PERF = 1UL << 12, > > + EXI_INTA = 1UL << 15, > > + EXI_LOAD_EFER = 1UL << 21, > > +}; > > + > > +enum Ctrl_ent { > > + ENT_GUEST_64 = 1UL << 9, > > + ENT_LOAD_EFER = 1UL << 15, > > +}; > > + > > +enum Ctrl_pin { > > + PIN_EXTINT = 1ul << 0, > > + PIN_NMI = 1ul << 3, > > + PIN_VIRT_NMI = 1ul << 5, > > +}; > > + > > +enum Ctrl0 { > > + CPU_INTR_WINDOW = 1ul << 2, > > + CPU_HLT = 1ul << 7, > > + CPU_INVLPG = 1ul << 9, > > + CPU_CR3_LOAD = 1ul << 15, > > + CPU_CR3_STORE = 1ul << 16, > > + CPU_TPR_SHADOW = 1ul << 21, > > + CPU_NMI_WINDOW = 1ul << 22, > > + CPU_IO = 1ul << 24, > > + CPU_IO_BITMAP = 1ul << 25, > > + CPU_SECONDARY = 1ul << 31, > > +}; > > + > > +enum Ctrl1 { > > + CPU_EPT = 1ul << 1, > > + CPU_VPID = 1ul << 5, > > + CPU_URG = 1ul << 7, > > +}; > > + > > +#define SAVE_GPR \ > > + "xchg %rax, regs\n\t" \ > > + "xchg %rbx, regs+0x8\n\t" \ > > + "xchg %rcx, regs+0x10\n\t" \ > > + "xchg %rdx, regs+0x18\n\t" \ > > + "xchg %rbp, regs+0x28\n\t" \ > > + "xchg %rsi, regs+0x30\n\t" \ > > + "xchg %rdi, regs+0x38\n\t" \ > > + "xchg %r8, regs+0x40\n\t" \ > > + "xchg %r9, regs+0x48\n\t" \ > > + "xchg %r10, regs+0x50\n\t" \ > > + "xchg %r11, regs+0x58\n\t" \ > > + "xchg %r12, regs+0x60\n\t" \ > > + "xchg %r13, regs+0x68\n\t" \ > > + "xchg %r14, regs+0x70\n\t" \ > > + "xchg %r15, regs+0x78\n\t" > > + > > +#define LOAD_GPR SAVE_GPR > > + > > +#define SAVE_GPR_C \ > > + "xchg %%rax, regs\n\t" \ > > + "xchg %%rbx, regs+0x8\n\t" \ > > + "xchg %%rcx, regs+0x10\n\t" \ > > + "xchg %%rdx, regs+0x18\n\t" \ > > + "xchg %%rbp, regs+0x28\n\t" \ > > + "xchg %%rsi, regs+0x30\n\t" \ > > + "xchg %%rdi, regs+0x38\n\t" \ > > + "xchg %%r8, regs+0x40\n\t" \ > > + "xchg %%r9, regs+0x48\n\t" \ > > + "xchg %%r10, regs+0x50\n\t" \ > > + "xchg %%r11, regs+0x58\n\t" \ > > + "xchg %%r12, regs+0x60\n\t" \ > > + "xchg %%r13, regs+0x68\n\t" \ > > + "xchg %%r14, regs+0x70\n\t" \ > > + "xchg %%r15, regs+0x78\n\t" > > + > > +#define LOAD_GPR_C SAVE_GPR_C > > + > > +#define SAVE_RFLAGS \ > > + "pushf\n\t" \ > > + "pop regs+0x80\n\t" > > + > > +#define LOAD_RFLAGS \ > > + "push regs+0x80\n\t" \ > > + "popf\n\t" > > + > > +#define VMX_IO_SIZE_MASK 0x7 > > +#define _VMX_IO_BYTE 1 > > +#define _VMX_IO_WORD 2 > > +#define _VMX_IO_LONG 3 > > +#define VMX_IO_DIRECTION_MASK (1ul << 3) > > +#define VMX_IO_IN (1ul << 3) > > +#define VMX_IO_OUT 0 > > +#define VMX_IO_STRING (1ul << 4) > > +#define VMX_IO_REP (1ul << 5) > > +#define VMX_IO_OPRAND_DX (1ul << 6) > > +#define VMX_IO_PORT_MASK 0xFFFF0000 > > +#define VMX_IO_PORT_SHIFT 16 > > + > > +#define VMX_TEST_VMEXIT 1 > > +#define VMX_TEST_EXIT 2 > > +#define VMX_TEST_RESUME 3 > > +#define VMX_TEST_LAUNCH_ERR 4 > > +#define VMX_TEST_RESUME_ERR 5 > > + > > +#define HYPERCALL_BIT (1ul << 12) > > +#define HYPERCALL_MASK 0xFFF > > +#define HYPERCALL_VMEXIT 0x1 > > + > > +static inline int vmcs_clear(struct vmcs *vmcs) > > +{ > > + bool ret; > > + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); > > + return ret; > > +} > > + > > +static inline u64 vmcs_read(enum Encoding enc) > > +{ > > + u64 val; > > + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); > > + return val; > > +} > > + > > +static inline int vmcs_write(enum Encoding enc, u64 val) > > +{ > > + bool ret; > > + asm volatile ("vmwrite %1, %2; setbe %0" > > + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); > > + return ret; > > +} > > + > > +static inline int vmcs_save(struct vmcs **vmcs) > > +{ > > + bool ret; > > + > > + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); > > + return ret; > > +} > > + > > +#endif > > + > > -- > > 1.7.9.5 > > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote: > Hi Arthur, > > I'm trying to test your patch on a SandyBridge machine. > > Used 'nested=1' when loading kvm (from 3.9-1-amd64) Should be kvm-intel, but you will get error otherwise, so I assume that this is what you did. > and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)). > > Without nested=1 I get ``unhandled excecption 13'', so I presume > this is OK, with it, the test_vmx_capability() fails at > detecting vmx. > > I've used for qemu: > $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device > isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device > pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1 > > Are there any knobs which I should further tune? > On the first glance you have everything that is needed. What is the output of "cat /proc/cpuinfo" in the guest is? -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Gleb, On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote: > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote: > > Hi Arthur, > > > > I'm trying to test your patch on a SandyBridge machine. > > > > Used 'nested=1' when loading kvm (from 3.9-1-amd64) > Should be kvm-intel, but you will get error otherwise, so I assume that > this is what you did. Indeed. > > > and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)). > > > > Without nested=1 I get ``unhandled excecption 13'', so I presume > > this is OK, with it, the test_vmx_capability() fails at > > detecting vmx. > > > > I've used for qemu: > > $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device > > isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device > > pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1 > > > > Are there any knobs which I should further tune? > > > On the first glance you have everything that is needed. What is the output of > "cat /proc/cpuinfo" in the guest is? (initramfs) cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 42 model name : Intel Xeon E312xx (Sandy Bridge) stepping : 1 microcode : 0x1 cpu MHz : 3410.012 cache size : 4096 KB fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx rdtscp lm constant_tsc arch_perfmon rep_good nopl eagerfpu pni pclmulqdq vmx ssse3 cx16 sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx hypervisor lahf_lm xsaveopt bogomips : 6820.02 clflush size : 64 cache_alignment : 64 address sizes : 40 bits physical, 48 bits virtual power management: Used: $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device\ isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device\ pci-testdev -kernel /boot/vmlinuz-3.9-1-amd64 -initrd\ /boot/initrd.img-3.9-1-amd64 -append 'init=/bin/bash console=ttyS0' -cpu\ SandyBridge,+vmx,+x2apic -smp 1 -serial stdio > > -- > Gleb. >
On Tue, Jul 30, 2013 at 09:59:47AM +0200, Marius Vlad wrote: > Hi Gleb, > > On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote: > > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote: > > > Hi Arthur, > > > > > > I'm trying to test your patch on a SandyBridge machine. > > > > > > Used 'nested=1' when loading kvm (from 3.9-1-amd64) > > Should be kvm-intel, but you will get error otherwise, so I assume that > > this is what you did. > Indeed. > > > > > and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)). > > > > > > Without nested=1 I get ``unhandled excecption 13'', so I presume > > > this is OK, with it, the test_vmx_capability() fails at > > > detecting vmx. > > > > > > I've used for qemu: > > > $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device > > > isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device > > > pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1 > > > > > > Are there any knobs which I should further tune? > > > > > On the first glance you have everything that is needed. What is the output of > > "cat /proc/cpuinfo" in the guest is? I checked what test_vmx_capability() is actually doing and it uses MSR_IA32_FEATURE_CONTROL MSR which is not supported in your version of kernel. You need b3897a49e22fc173efa77527a447c714f753f681 commit, it is not in any released version of kernel yet. You can just omit the test. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 30, 2013 at 4:07 PM, Gleb Natapov <gleb@redhat.com> wrote: > On Tue, Jul 30, 2013 at 09:59:47AM +0200, Marius Vlad wrote: >> Hi Gleb, >> >> On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote: >> > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote: >> > > Hi Arthur, >> > > >> > > I'm trying to test your patch on a SandyBridge machine. >> > > >> > > Used 'nested=1' when loading kvm (from 3.9-1-amd64) >> > Should be kvm-intel, but you will get error otherwise, so I assume that >> > this is what you did. >> Indeed. >> > >> > > and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)). >> > > >> > > Without nested=1 I get ``unhandled excecption 13'', so I presume >> > > this is OK, with it, the test_vmx_capability() fails at >> > > detecting vmx. >> > > >> > > I've used for qemu: >> > > $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device >> > > isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device >> > > pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1 >> > > >> > > Are there any knobs which I should further tune? >> > > >> > On the first glance you have everything that is needed. What is the output of >> > "cat /proc/cpuinfo" in the guest is? > I checked what test_vmx_capability() is actually doing and it uses > MSR_IA32_FEATURE_CONTROL MSR which is not supported in your version of > kernel. You need b3897a49e22fc173efa77527a447c714f753f681 commit, it is > not in any released version of kernel yet. You can just omit the test. Hi Marius, What Gleb said is generally right and I have some additional information. This patch is ongoing as well as fixing some existing bugs when developing. You can refer to [1] to get all the relevant bug fix patches. For kernel, you'd better git clone kvm kernel from [2] and checkout to "next" branch and make it. Some patches are on kvm's wait queue and are not merged into master. Thanks, Arthur [1] http://wiki.qemu.org/Features/KVMNestedVirtualizationTestsuite [2] https://git.kernel.org/cgit/virt/kvm/kvm.git/ > > -- > Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 30, 2013 at 11:14:38AM +0200, Arthur Chunqi Li wrote: > On Tue, Jul 30, 2013 at 4:07 PM, Gleb Natapov <gleb@redhat.com> wrote: > > On Tue, Jul 30, 2013 at 09:59:47AM +0200, Marius Vlad wrote: > >> Hi Gleb, > >> > >> On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote: > >> > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote: > >> > > Hi Arthur, > >> > > > >> > > I'm trying to test your patch on a SandyBridge machine. > >> > > > >> > > Used 'nested=1' when loading kvm (from 3.9-1-amd64) > >> > Should be kvm-intel, but you will get error otherwise, so I assume that > >> > this is what you did. > >> Indeed. > >> > > >> > > and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)). > >> > > > >> > > Without nested=1 I get ``unhandled excecption 13'', so I presume > >> > > this is OK, with it, the test_vmx_capability() fails at > >> > > detecting vmx. > >> > > > >> > > I've used for qemu: > >> > > $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device > >> > > isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device > >> > > pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1 > >> > > > >> > > Are there any knobs which I should further tune? > >> > > > >> > On the first glance you have everything that is needed. What is the output of > >> > "cat /proc/cpuinfo" in the guest is? > > I checked what test_vmx_capability() is actually doing and it uses > > MSR_IA32_FEATURE_CONTROL MSR which is not supported in your version of > > kernel. You need b3897a49e22fc173efa77527a447c714f753f681 commit, it is > > not in any released version of kernel yet. You can just omit the test. > Hi Marius, > > What Gleb said is generally right and I have some additional > information. This patch is ongoing as well as fixing some existing > bugs when developing. You can refer to [1] to get all the relevant bug > fix patches. For kernel, you'd better git clone kvm kernel from [2] > and checkout to "next" branch and make it. Some patches are on kvm's > wait queue and are not merged into master. I somehow hinted that I require an upstream version of kvm. Thanks for pointing the queue branch. The tests pass now. > > Thanks, > Arthur > > [1] http://wiki.qemu.org/Features/KVMNestedVirtualizationTestsuite > [2] https://git.kernel.org/cgit/virt/kvm/kvm.git/ > > > > -- > > Gleb.
Il 28/07/2013 16:00, Arthur Chunqi Li ha scritto: > + > + while (1) { > + asm volatile ( > + "mov %%rsp, %%rsi\n\t" > + "mov %2, %%rdi\n\t" > + "vmwrite %%rsi, %%rdi\n\t" > + > + LOAD_GPR_C > + "cmpl $0, %1\n\t" > + "jne 1f\n\t" > + LOAD_RFLAGS > + "vmlaunch\n\t" > + "jmp 2f\n\t" > + "1: " > + "vmresume\n\t" > + "2: " > + "setbe %0\n\t" Perhaps here add jbe vmx_return ud2 but it can be added in a follow-up. > + "vmx_return:\n\t" > + SAVE_GPR_C > + SAVE_RFLAGS > + : "=m"(fail) > + : "m"(launched), "i"(HOST_RSP) > + : "rdi", "rsi", "memory", "cc" > + > + ); Nice! :) I pushed the patch. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 30, 2013 at 10:53 PM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 28/07/2013 16:00, Arthur Chunqi Li ha scritto: >> + >> + while (1) { >> + asm volatile ( >> + "mov %%rsp, %%rsi\n\t" >> + "mov %2, %%rdi\n\t" >> + "vmwrite %%rsi, %%rdi\n\t" >> + >> + LOAD_GPR_C >> + "cmpl $0, %1\n\t" >> + "jne 1f\n\t" >> + LOAD_RFLAGS >> + "vmlaunch\n\t" >> + "jmp 2f\n\t" >> + "1: " >> + "vmresume\n\t" >> + "2: " >> + "setbe %0\n\t" > > Perhaps here add > > jbe vmx_return > ud2 > > but it can be added in a follow-up. > >> + "vmx_return:\n\t" >> + SAVE_GPR_C >> + SAVE_RFLAGS >> + : "=m"(fail) >> + : "m"(launched), "i"(HOST_RSP) >> + : "rdi", "rsi", "memory", "cc" >> + >> + ); > > Nice! :) > > I pushed the patch. Hi Paolo, Slow down pushing it. There's something wrong with host_rflags handling. I will commit another version as well as your suggestion, you could push that version. Thanks, Arthur > > Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 30/07/2013 16:57, Arthur Chunqi Li ha scritto: > On Tue, Jul 30, 2013 at 10:53 PM, Paolo Bonzini <pbonzini@redhat.com> wrote: >> Il 28/07/2013 16:00, Arthur Chunqi Li ha scritto: >>> + >>> + while (1) { >>> + asm volatile ( >>> + "mov %%rsp, %%rsi\n\t" >>> + "mov %2, %%rdi\n\t" >>> + "vmwrite %%rsi, %%rdi\n\t" >>> + >>> + LOAD_GPR_C >>> + "cmpl $0, %1\n\t" >>> + "jne 1f\n\t" >>> + LOAD_RFLAGS >>> + "vmlaunch\n\t" >>> + "jmp 2f\n\t" >>> + "1: " >>> + "vmresume\n\t" >>> + "2: " >>> + "setbe %0\n\t" >> >> Perhaps here add >> >> jbe vmx_return >> ud2 >> >> but it can be added in a follow-up. >> >>> + "vmx_return:\n\t" >>> + SAVE_GPR_C >>> + SAVE_RFLAGS >>> + : "=m"(fail) >>> + : "m"(launched), "i"(HOST_RSP) >>> + : "rdi", "rsi", "memory", "cc" >>> + >>> + ); >> >> Nice! :) >> >> I pushed the patch. > Hi Paolo, > Slow down pushing it. There's something wrong with host_rflags > handling. I will commit another version as well as your suggestion, > you could push that version. Well, too late... anyway I prefer follow-up patches because they make the differences clearer. With a single 1200-line patch it is a bit complicated to follow. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/config-x86-common.mak b/config-x86-common.mak index 455032b..34a41e1 100644 --- a/config-x86-common.mak +++ b/config-x86-common.mak @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o + arch_clean: $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \ $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o diff --git a/config-x86_64.mak b/config-x86_64.mak index 4e525f5..bb8ee89 100644 --- a/config-x86_64.mak +++ b/config-x86_64.mak @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \ $(TEST_DIR)/pcid.flat tests += $(TEST_DIR)/svm.flat +tests += $(TEST_DIR)/vmx.flat include config-x86-common.mak diff --git a/lib/x86/msr.h b/lib/x86/msr.h index 509a421..281255a 100644 --- a/lib/x86/msr.h +++ b/lib/x86/msr.h @@ -396,6 +396,11 @@ #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PIN 0x0000048d +#define MSR_IA32_VMX_TRUE_PROC 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY 0x00000490 + /* AMD-V MSRs */ diff --git a/lib/x86/processor.h b/lib/x86/processor.h index e46d8d0..f0c11cc 100644 --- a/lib/x86/processor.h +++ b/lib/x86/processor.h @@ -307,4 +307,19 @@ static inline void safe_halt(void) { asm volatile("sti; hlt"); } + +#ifdef __x86_64__ +static inline u64 read_rflags(void) +{ + u64 r; + asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc"); + return r; +} + +static inline void write_rflags(u64 r) +{ + asm volatile("push %0; popf\n\t" : : "q"(r) : "cc"); +} +#endif + #endif diff --git a/lib/x86/vm.c b/lib/x86/vm.c index 260ec45..188bf57 100644 --- a/lib/x86/vm.c +++ b/lib/x86/vm.c @@ -9,10 +9,6 @@ #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) #endif -#define X86_CR0_PE 0x00000001 -#define X86_CR0_WP 0x00010000 -#define X86_CR0_PG 0x80000000 -#define X86_CR4_PSE 0x00000010 static void *free = 0; static void *vfree_top = 0; diff --git a/lib/x86/vm.h b/lib/x86/vm.h index 0b5b5c7..eff6f72 100644 --- a/lib/x86/vm.h +++ b/lib/x86/vm.h @@ -16,6 +16,27 @@ #define PTE_USER (1ull << 2) #define PTE_ADDR (0xffffffffff000ull) +#define X86_CR0_PE 0x00000001 +#define X86_CR0_WP 0x00010000 +#define X86_CR0_PG 0x80000000 +#define X86_CR4_VMXE 0x00000001 +#define X86_CR4_PSE 0x00000010 +#define X86_CR4_PAE 0x00000020 +#define X86_CR4_PCIDE 0x00020000 + +#ifdef __x86_64__ +#define SEL_NULL_DESC 0x0 +#define SEL_KERN_CODE_64 0x8 +#define SEL_KERN_DATA_64 0x10 +#define SEL_USER_CODE_64 0x18 +#define SEL_USER_DATA_64 0x20 +#define SEL_CODE_32 0x28 +#define SEL_DATA_32 0x30 +#define SEL_CODE_16 0x38 +#define SEL_DATA_16 0x40 +#define SEL_TSS_RUN 0x48 +#endif + void setup_vm(); void *vmalloc(unsigned long size); diff --git a/x86/cstart64.S b/x86/cstart64.S index 24df5f8..0fe76da 100644 --- a/x86/cstart64.S +++ b/x86/cstart64.S @@ -4,6 +4,10 @@ .globl boot_idt boot_idt = 0 +.globl idt_descr +.globl tss_descr +.globl gdt64_desc + ipi_vector = 0x20 max_cpus = 64 diff --git a/x86/unittests.cfg b/x86/unittests.cfg index bc9643e..85c36aa 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`" file = pcid.flat extra_params = -cpu qemu64,+pcid arch = x86_64 + +[vmx] +file = vmx.flat +extra_params = -cpu host,+vmx +arch = x86_64 + diff --git a/x86/vmx.c b/x86/vmx.c new file mode 100644 index 0000000..7467927 --- /dev/null +++ b/x86/vmx.c @@ -0,0 +1,674 @@ +#include "libcflat.h" +#include "processor.h" +#include "vm.h" +#include "desc.h" +#include "vmx.h" +#include "msr.h" +#include "smp.h" +#include "io.h" + +int fails = 0, tests = 0; +u32 *vmxon_region; +struct vmcs *vmcs_root; +u32 vpid_cnt; +void *guest_stack, *guest_syscall_stack; +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2]; +ulong fix_cr0_set, fix_cr0_clr; +ulong fix_cr4_set, fix_cr4_clr; +struct regs regs; +struct vmx_test *current; +u64 hypercall_field = 0; +bool launched; + +extern u64 gdt64_desc[]; +extern u64 idt_descr[]; +extern u64 tss_descr[]; +extern void *vmx_return; +extern void *entry_sysenter; +extern void *guest_entry; + +static void report(const char *name, int result) +{ + ++tests; + if (result) + printf("PASS: %s\n", name); + else { + printf("FAIL: %s\n", name); + ++fails; + } +} + +static int make_vmcs_current(struct vmcs *vmcs) +{ + bool ret; + + asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); + return ret; +} + +/* entry_sysenter */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_sysenter\n\t" + "entry_sysenter:\n\t" + SAVE_GPR + " and $0xf, %rax\n\t" + " mov %rax, %rdi\n\t" + " call syscall_handler\n\t" + LOAD_GPR + " vmresume\n\t" +); + +static void __attribute__((__used__)) syscall_handler(u64 syscall_no) +{ + current->syscall_handler(syscall_no); +} + +static inline int vmx_on() +{ + bool ret; + asm volatile ("vmxon %1; setbe %0\n\t" + : "=q"(ret) : "m"(vmxon_region) : "cc"); + return ret; +} + +static inline int vmx_off() +{ + bool ret; + asm volatile("vmxoff; setbe %0\n\t" + : "=q"(ret) : : "cc"); + return ret; +} + +static void print_vmexit_info() +{ + u64 guest_rip, guest_rsp; + ulong reason = vmcs_read(EXI_REASON) & 0xff; + ulong exit_qual = vmcs_read(EXI_QUALIFICATION); + guest_rip = vmcs_read(GUEST_RIP); + guest_rsp = vmcs_read(GUEST_RSP); + printf("VMEXIT info:\n"); + printf("\tvmexit reason = %d\n", reason); + printf("\texit qualification = 0x%x\n", exit_qual); + printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1); + printf("\tguest_rip = 0x%llx\n", guest_rip); + printf("\tRAX=0x%llx RBX=0x%llx RCX=0x%llx RDX=0x%llx\n", + regs.rax, regs.rbx, regs.rcx, regs.rdx); + printf("\tRSP=0x%llx RBP=0x%llx RSI=0x%llx RDI=0x%llx\n", + guest_rsp, regs.rbp, regs.rsi, regs.rdi); + printf("\tR8 =0x%llx R9 =0x%llx R10=0x%llx R11=0x%llx\n", + regs.r8, regs.r9, regs.r10, regs.r11); + printf("\tR12=0x%llx R13=0x%llx R14=0x%llx R15=0x%llx\n", + regs.r12, regs.r13, regs.r14, regs.r15); +} + +static void test_vmclear(void) +{ + u64 rflags; + + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + write_rflags(rflags); + report("test vmclear", vmcs_clear(vmcs_root) == 0); +} + +static void test_vmxoff(void) +{ + int ret; + u64 rflags; + + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + write_rflags(rflags); + ret = vmx_off(); + report("test vmxoff", !ret); +} + +static void __attribute__((__used__)) guest_main(void) +{ + current->guest_main(); +} + +/* guest_entry */ +asm( + ".align 4, 0x90\n\t" + ".globl entry_guest\n\t" + "guest_entry:\n\t" + " call guest_main\n\t" + " mov $1, %edi\n\t" + " call hypercall\n\t" +); + +static void init_vmcs_ctrl(void) +{ + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ + /* 26.2.1.1 */ + vmcs_write(PIN_CONTROLS, ctrl_pin); + /* Disable VMEXIT of IO instruction */ + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) { + ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr; + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]); + } + vmcs_write(CR3_TARGET_COUNT, 0); + vmcs_write(VPID, ++vpid_cnt); +} + +static void init_vmcs_host(void) +{ + /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ + /* 26.2.1.2 */ + vmcs_write(HOST_EFER, rdmsr(MSR_EFER)); + + /* 26.2.1.3 */ + vmcs_write(ENT_CONTROLS, ctrl_enter); + vmcs_write(EXI_CONTROLS, ctrl_exit); + + /* 26.2.2 */ + vmcs_write(HOST_CR0, read_cr0()); + vmcs_write(HOST_CR3, read_cr3()); + vmcs_write(HOST_CR4, read_cr4()); + vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter)); + vmcs_write(HOST_SYSENTER_CS, SEL_KERN_CODE_64); + + /* 26.2.3 */ + vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64); + vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64); + vmcs_write(HOST_SEL_TR, SEL_TSS_RUN); + vmcs_write(HOST_BASE_TR, (u64)tss_descr); + vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc); + vmcs_write(HOST_BASE_IDTR, (u64)idt_descr); + vmcs_write(HOST_BASE_FS, 0); + vmcs_write(HOST_BASE_GS, 0); + + /* Set other vmcs area */ + vmcs_write(PF_ERROR_MASK, 0); + vmcs_write(PF_ERROR_MATCH, 0); + vmcs_write(VMCS_LINK_PTR, ~0ul); + vmcs_write(VMCS_LINK_PTR_HI, ~0ul); + vmcs_write(HOST_RIP, (u64)(&vmx_return)); +} + +static void init_vmcs_guest(void) +{ + /* 26.3 CHECKING AND LOADING GUEST STATE */ + ulong guest_cr0, guest_cr4, guest_cr3; + /* 26.3.1.1 */ + guest_cr0 = read_cr0(); + guest_cr4 = read_cr4(); + guest_cr3 = read_cr3(); + if (ctrl_enter & ENT_GUEST_64) { + guest_cr0 |= X86_CR0_PG; + guest_cr4 |= X86_CR4_PAE; + } + if ((ctrl_enter & ENT_GUEST_64) == 0) + guest_cr4 &= (~X86_CR4_PCIDE); + if (guest_cr0 & X86_CR0_PG) + guest_cr0 |= X86_CR0_PE; + vmcs_write(GUEST_CR0, guest_cr0); + vmcs_write(GUEST_CR3, guest_cr3); + vmcs_write(GUEST_CR4, guest_cr4); + vmcs_write(GUEST_SYSENTER_CS, SEL_KERN_CODE_64); + vmcs_write(GUEST_SYSENTER_ESP, + (u64)(guest_syscall_stack + PAGE_SIZE - 1)); + vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter)); + vmcs_write(GUEST_DR7, 0); + vmcs_write(GUEST_EFER, rdmsr(MSR_EFER)); + + /* 26.3.1.2 */ + vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64); + vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64); + vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN); + vmcs_write(GUEST_SEL_LDTR, 0); + + vmcs_write(GUEST_BASE_CS, 0); + vmcs_write(GUEST_BASE_ES, 0); + vmcs_write(GUEST_BASE_SS, 0); + vmcs_write(GUEST_BASE_DS, 0); + vmcs_write(GUEST_BASE_FS, 0); + vmcs_write(GUEST_BASE_GS, 0); + vmcs_write(GUEST_BASE_TR, (u64)tss_descr); + vmcs_write(GUEST_BASE_LDTR, 0); + + vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF); + vmcs_write(GUEST_LIMIT_LDTR, 0xffff); + vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit); + + vmcs_write(GUEST_AR_CS, 0xa09b); + vmcs_write(GUEST_AR_DS, 0xc093); + vmcs_write(GUEST_AR_ES, 0xc093); + vmcs_write(GUEST_AR_FS, 0xc093); + vmcs_write(GUEST_AR_GS, 0xc093); + vmcs_write(GUEST_AR_SS, 0xc093); + vmcs_write(GUEST_AR_LDTR, 0x82); + vmcs_write(GUEST_AR_TR, 0x8b); + + /* 26.3.1.3 */ + vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc); + vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr); + vmcs_write(GUEST_LIMIT_GDTR, + ((struct descr *)gdt64_desc)->limit & 0xffff); + vmcs_write(GUEST_LIMIT_IDTR, + ((struct descr *)idt_descr)->limit & 0xffff); + + /* 26.3.1.4 */ + vmcs_write(GUEST_RIP, (u64)(&guest_entry)); + vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1)); + vmcs_write(GUEST_RFLAGS, 0x2); + + /* 26.3.1.5 */ + vmcs_write(GUEST_ACTV_STATE, 0); + vmcs_write(GUEST_INTR_STATE, 0); +} + +static int init_vmcs(struct vmcs **vmcs) +{ + *vmcs = alloc_page(); + memset(*vmcs, 0, PAGE_SIZE); + (*vmcs)->revision_id = basic.revision; + /* vmclear first to init vmcs */ + if (vmcs_clear(*vmcs)) { + printf("%s : vmcs_clear error\n", __func__); + return 1; + } + + if (make_vmcs_current(*vmcs)) { + printf("%s : make_vmcs_current error\n", __func__); + return 1; + } + + /* All settings to pin/exit/enter/cpu + control fields should be placed here */ + ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI; + ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64; + ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64); + ctrl_cpu[0] |= CPU_HLT; + /* DIsable IO instruction VMEXIT now */ + ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP)); + ctrl_cpu[1] = 0; + + ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr; + ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr; + ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr; + ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr; + + init_vmcs_ctrl(); + init_vmcs_host(); + init_vmcs_guest(); + return 0; +} + +static void init_vmx(void) +{ + vmxon_region = alloc_page(); + memset(vmxon_region, 0, PAGE_SIZE); + + fix_cr0_set = rdmsr(MSR_IA32_VMX_CR0_FIXED0); + fix_cr0_clr = rdmsr(MSR_IA32_VMX_CR0_FIXED1); + fix_cr4_set = rdmsr(MSR_IA32_VMX_CR4_FIXED0); + fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1); + basic.val = rdmsr(MSR_IA32_VMX_BASIC); + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN + : MSR_IA32_VMX_PINBASED_CTLS); + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT + : MSR_IA32_VMX_EXIT_CTLS); + ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY + : MSR_IA32_VMX_ENTRY_CTLS); + ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC + : MSR_IA32_VMX_PROCBASED_CTLS); + if (ctrl_cpu_rev[0].set & CPU_SECONDARY) + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); + if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + + write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); + write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); + + *vmxon_region = basic.revision; + + guest_stack = alloc_page(); + memset(guest_stack, 0, PAGE_SIZE); + guest_syscall_stack = alloc_page(); + memset(guest_syscall_stack, 0, PAGE_SIZE); +} + +static int test_vmx_capability(void) +{ + struct cpuid r; + u64 ret1, ret2; + u64 ia32_feature_control; + r = cpuid(1); + ret1 = ((r.c) >> 5) & 1; + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + ret2 = ((ia32_feature_control & 0x5) == 0x5); + if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) { + wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5); + ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + ret2 = ((ia32_feature_control & 0x5) == 0x5); + } + report("test vmx capability", ret1 & ret2); + return !(ret1 & ret2); +} + +static int test_vmxon(void) +{ + int ret; + u64 rflags; + + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + write_rflags(rflags); + ret = vmx_on(); + report("test vmxon", !ret); + return ret; +} + +static void test_vmptrld(void) +{ + u64 rflags; + struct vmcs *vmcs; + + vmcs = alloc_page(); + vmcs->revision_id = basic.revision; + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + write_rflags(rflags); + report("test vmptrld", make_vmcs_current(vmcs) == 0); +} + +static void test_vmptrst(void) +{ + u64 rflags; + int ret; + struct vmcs *vmcs1, *vmcs2; + + vmcs1 = alloc_page(); + memset(vmcs1, 0, PAGE_SIZE); + init_vmcs(&vmcs1); + rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF; + write_rflags(rflags); + ret = vmcs_save(&vmcs2); + report("test vmptrst", (!ret) && (vmcs1 == vmcs2)); +} + +/* This function can only be called in guest */ +static void __attribute__((__used__)) hypercall(u32 hypercall_no) +{ + u64 val = 0; + val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT; + hypercall_field = val; + asm volatile("vmcall\n\t"); +} + +static bool is_hypercall() +{ + ulong reason, hyper_bit; + + reason = vmcs_read(EXI_REASON) & 0xff; + hyper_bit = hypercall_field & HYPERCALL_BIT; + if (reason == VMX_VMCALL && hyper_bit) + return true; + return false; +} + +static int handle_hypercall() +{ + ulong hypercall_no; + + hypercall_no = hypercall_field & HYPERCALL_MASK; + hypercall_field = 0; + switch (hypercall_no) { + case HYPERCALL_VMEXIT: + return VMX_TEST_VMEXIT; + default: + printf("ERROR : Invalid hypercall number : %d\n", hypercall_no); + } + return VMX_TEST_EXIT; +} + +static int exit_handler() +{ + int ret; + + current->exits++; + current->guest_regs = regs; + if (is_hypercall()) + ret = handle_hypercall(); + else + ret = current->exit_handler(); + regs = current->guest_regs; + switch (ret) { + case VMX_TEST_VMEXIT: + case VMX_TEST_RESUME: + return ret; + case VMX_TEST_EXIT: + break; + default: + printf("ERROR : Invalid exit_handler return val %d.\n" + , ret); + } + print_vmexit_info(); + exit(-1); + return 0; +} + +static int vmx_run() +{ + u32 ret = 0, fail = 0; + + while (1) { + asm volatile ( + "mov %%rsp, %%rsi\n\t" + "mov %2, %%rdi\n\t" + "vmwrite %%rsi, %%rdi\n\t" + + LOAD_GPR_C + "cmpl $0, %1\n\t" + "jne 1f\n\t" + LOAD_RFLAGS + "vmlaunch\n\t" + "jmp 2f\n\t" + "1: " + "vmresume\n\t" + "2: " + "setbe %0\n\t" + "vmx_return:\n\t" + SAVE_GPR_C + SAVE_RFLAGS + : "=m"(fail) + : "m"(launched), "i"(HOST_RSP) + : "rdi", "rsi", "memory", "cc" + + ); + if (fail) + ret = launched ? VMX_TEST_RESUME_ERR : + VMX_TEST_LAUNCH_ERR; + else { + launched = 1; + ret = exit_handler(); + } + if (ret != VMX_TEST_RESUME) + break; + } + launched = 0; + switch (ret) { + case VMX_TEST_VMEXIT: + return 0; + case VMX_TEST_LAUNCH_ERR: + printf("%s : vmlaunch failed.\n", __func__); + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) + printf("\tvmlaunch set wrong flags\n"); + report("test vmlaunch", 0); + break; + case VMX_TEST_RESUME_ERR: + printf("%s : vmresume failed.\n", __func__); + if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF)) + || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF))) + printf("\tvmresume set wrong flags\n"); + report("test vmresume", 0); + break; + default: + printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret); + break; + } + return 1; +} + +static int test_run(struct vmx_test *test) +{ + if (test->name == NULL) + test->name = "(no name)"; + if (vmx_on()) { + printf("%s : vmxon failed.\n", __func__); + return 1; + } + init_vmcs(&(test->vmcs)); + /* Directly call test->init is ok here, init_vmcs has done + vmcs init, vmclear and vmptrld*/ + if (test->init) + test->init(test->vmcs); + test->exits = 0; + current = test; + regs = test->guest_regs; + vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2); + launched = 0; + printf("\nTest suite : %s\n", test->name); + vmx_run(); + if (vmx_off()) { + printf("%s : vmxoff failed.\n", __func__); + return 1; + } + return 0; +} + +static void basic_init() +{ +} + +static void basic_guest_main() +{ + /* Here is null guest_main, print Hello World */ + printf("\tHello World, this is null_guest_main!\n"); +} + +static int basic_exit_handler() +{ + u64 guest_rip; + ulong reason; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + + switch (reason) { + case VMX_VMCALL: + print_vmexit_info(); + vmcs_write(GUEST_RIP, guest_rip + 3); + return VMX_TEST_RESUME; + default: + break; + } + printf("ERROR : Unhandled vmx exit.\n"); + print_vmexit_info(); + return VMX_TEST_EXIT; +} + +static void basic_syscall_handler(u64 syscall_no) +{ +} + +static void vmenter_main() +{ + u64 rax; + u64 rsp, resume_rsp; + + report("test vmlaunch", 1); + + asm volatile( + "mov %%rsp, %0\n\t" + "mov %3, %%rax\n\t" + "vmcall\n\t" + "mov %%rax, %1\n\t" + "mov %%rsp, %2\n\t" + : "=r"(rsp), "=r"(rax), "=r"(resume_rsp) + : "g"(0xABCD)); + report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp)); +} + +static int vmenter_exit_handler() +{ + u64 guest_rip; + ulong reason; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + switch (reason) { + case VMX_VMCALL: + if (current->guest_regs.rax != 0xABCD) { + report("test vmresume", 0); + return VMX_TEST_VMEXIT; + } + current->guest_regs.rax = 0xFFFF; + vmcs_write(GUEST_RIP, guest_rip + 3); + return VMX_TEST_RESUME; + default: + report("test vmresume", 0); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + + +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs + basic_* just implement some basic functions */ +static struct vmx_test vmx_tests[] = { + { "null", basic_init, basic_guest_main, basic_exit_handler, + basic_syscall_handler, {0} }, + { "vmenter", basic_init, vmenter_main, vmenter_exit_handler, + basic_syscall_handler, {0} }, +}; + +int main(void) +{ + int i; + + setup_vm(); + setup_idt(); + + if (test_vmx_capability() != 0) { + printf("ERROR : vmx not supported, check +vmx option\n"); + goto exit; + } + init_vmx(); + /* Set basic test ctxt the same as "null" */ + current = &vmx_tests[0]; + if (test_vmxon() != 0) + goto exit; + test_vmptrld(); + test_vmclear(); + test_vmptrst(); + init_vmcs(&vmcs_root); + if (vmx_run()) { + report("test vmlaunch", 0); + goto exit; + } + test_vmxoff(); + + for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) { + if (test_run(&vmx_tests[i])) + goto exit; + } + +exit: + printf("\nSUMMARY: %d tests, %d failures\n", tests, fails); + return fails ? 1 : 0; +} diff --git a/x86/vmx.h b/x86/vmx.h new file mode 100644 index 0000000..1fb9738 --- /dev/null +++ b/x86/vmx.h @@ -0,0 +1,466 @@ +#ifndef __HYPERVISOR_H +#define __HYPERVISOR_H + +#include "libcflat.h" + +struct vmcs { + u32 revision_id; /* vmcs revision identifier */ + u32 abort; /* VMX-abort indicator */ + /* VMCS data */ + char data[0]; +}; + +struct regs { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 cr2; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u64 rflags; +}; + +struct vmx_test { + const char *name; + void (*init)(struct vmcs *vmcs); + void (*guest_main)(); + int (*exit_handler)(); + void (*syscall_handler)(u64 syscall_no); + struct regs guest_regs; + struct vmcs *vmcs; + int exits; +}; + +static union vmx_basic { + u64 val; + struct { + u32 revision; + u32 size:13, + : 3, + width:1, + dual:1, + type:4, + insouts:1, + ctrl:1; + }; +} basic; + +static union vmx_ctrl_pin { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_pin_rev; + +static union vmx_ctrl_cpu { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_cpu_rev[2]; + +static union vmx_ctrl_exit { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_exit_rev; + +static union vmx_ctrl_ent { + u64 val; + struct { + u32 set, clr; + }; +} ctrl_enter_rev; + +static union vmx_ept_vpid { + u64 val; + struct { + u32:16, + super:2, + : 2, + invept:1, + : 11; + u32 invvpid:1; + }; +} ept_vpid; + +struct descr { + u16 limit; + u64 addr; +}; + +enum Encoding { + /* 16-Bit Control Fields */ + VPID = 0x0000ul, + /* Posted-interrupt notification vector */ + PINV = 0x0002ul, + /* EPTP index */ + EPTP_IDX = 0x0004ul, + + /* 16-Bit Guest State Fields */ + GUEST_SEL_ES = 0x0800ul, + GUEST_SEL_CS = 0x0802ul, + GUEST_SEL_SS = 0x0804ul, + GUEST_SEL_DS = 0x0806ul, + GUEST_SEL_FS = 0x0808ul, + GUEST_SEL_GS = 0x080aul, + GUEST_SEL_LDTR = 0x080cul, + GUEST_SEL_TR = 0x080eul, + GUEST_INT_STATUS = 0x0810ul, + + /* 16-Bit Host State Fields */ + HOST_SEL_ES = 0x0c00ul, + HOST_SEL_CS = 0x0c02ul, + HOST_SEL_SS = 0x0c04ul, + HOST_SEL_DS = 0x0c06ul, + HOST_SEL_FS = 0x0c08ul, + HOST_SEL_GS = 0x0c0aul, + HOST_SEL_TR = 0x0c0cul, + + /* 64-Bit Control Fields */ + IO_BITMAP_A = 0x2000ul, + IO_BITMAP_B = 0x2002ul, + MSR_BITMAP = 0x2004ul, + EXIT_MSR_ST_ADDR = 0x2006ul, + EXIT_MSR_LD_ADDR = 0x2008ul, + ENTER_MSR_LD_ADDR = 0x200aul, + VMCS_EXEC_PTR = 0x200cul, + TSC_OFFSET = 0x2010ul, + TSC_OFFSET_HI = 0x2011ul, + APIC_VIRT_ADDR = 0x2012ul, + APIC_ACCS_ADDR = 0x2014ul, + EPTP = 0x201aul, + EPTP_HI = 0x201bul, + + /* 64-Bit Readonly Data Field */ + INFO_PHYS_ADDR = 0x2400ul, + + /* 64-Bit Guest State */ + VMCS_LINK_PTR = 0x2800ul, + VMCS_LINK_PTR_HI = 0x2801ul, + GUEST_DEBUGCTL = 0x2802ul, + GUEST_DEBUGCTL_HI = 0x2803ul, + GUEST_EFER = 0x2806ul, + GUEST_PERF_GLOBAL_CTRL = 0x2808ul, + GUEST_PDPTE = 0x280aul, + + /* 64-Bit Host State */ + HOST_EFER = 0x2c02ul, + HOST_PERF_GLOBAL_CTRL = 0x2c04ul, + + /* 32-Bit Control Fields */ + PIN_CONTROLS = 0x4000ul, + CPU_EXEC_CTRL0 = 0x4002ul, + EXC_BITMAP = 0x4004ul, + PF_ERROR_MASK = 0x4006ul, + PF_ERROR_MATCH = 0x4008ul, + CR3_TARGET_COUNT = 0x400aul, + EXI_CONTROLS = 0x400cul, + EXI_MSR_ST_CNT = 0x400eul, + EXI_MSR_LD_CNT = 0x4010ul, + ENT_CONTROLS = 0x4012ul, + ENT_MSR_LD_CNT = 0x4014ul, + ENT_INTR_INFO = 0x4016ul, + ENT_INTR_ERROR = 0x4018ul, + ENT_INST_LEN = 0x401aul, + TPR_THRESHOLD = 0x401cul, + CPU_EXEC_CTRL1 = 0x401eul, + + /* 32-Bit R/O Data Fields */ + VMX_INST_ERROR = 0x4400ul, + EXI_REASON = 0x4402ul, + EXI_INTR_INFO = 0x4404ul, + EXI_INTR_ERROR = 0x4406ul, + IDT_VECT_INFO = 0x4408ul, + IDT_VECT_ERROR = 0x440aul, + EXI_INST_LEN = 0x440cul, + EXI_INST_INFO = 0x440eul, + + /* 32-Bit Guest State Fields */ + GUEST_LIMIT_ES = 0x4800ul, + GUEST_LIMIT_CS = 0x4802ul, + GUEST_LIMIT_SS = 0x4804ul, + GUEST_LIMIT_DS = 0x4806ul, + GUEST_LIMIT_FS = 0x4808ul, + GUEST_LIMIT_GS = 0x480aul, + GUEST_LIMIT_LDTR = 0x480cul, + GUEST_LIMIT_TR = 0x480eul, + GUEST_LIMIT_GDTR = 0x4810ul, + GUEST_LIMIT_IDTR = 0x4812ul, + GUEST_AR_ES = 0x4814ul, + GUEST_AR_CS = 0x4816ul, + GUEST_AR_SS = 0x4818ul, + GUEST_AR_DS = 0x481aul, + GUEST_AR_FS = 0x481cul, + GUEST_AR_GS = 0x481eul, + GUEST_AR_LDTR = 0x4820ul, + GUEST_AR_TR = 0x4822ul, + GUEST_INTR_STATE = 0x4824ul, + GUEST_ACTV_STATE = 0x4826ul, + GUEST_SMBASE = 0x4828ul, + GUEST_SYSENTER_CS = 0x482aul, + + /* 32-Bit Host State Fields */ + HOST_SYSENTER_CS = 0x4c00ul, + + /* Natural-Width Control Fields */ + CR0_MASK = 0x6000ul, + CR4_MASK = 0x6002ul, + CR0_READ_SHADOW = 0x6004ul, + CR4_READ_SHADOW = 0x6006ul, + CR3_TARGET_0 = 0x6008ul, + CR3_TARGET_1 = 0x600aul, + CR3_TARGET_2 = 0x600cul, + CR3_TARGET_3 = 0x600eul, + + /* Natural-Width R/O Data Fields */ + EXI_QUALIFICATION = 0x6400ul, + IO_RCX = 0x6402ul, + IO_RSI = 0x6404ul, + IO_RDI = 0x6406ul, + IO_RIP = 0x6408ul, + GUEST_LINEAR_ADDRESS = 0x640aul, + + /* Natural-Width Guest State Fields */ + GUEST_CR0 = 0x6800ul, + GUEST_CR3 = 0x6802ul, + GUEST_CR4 = 0x6804ul, + GUEST_BASE_ES = 0x6806ul, + GUEST_BASE_CS = 0x6808ul, + GUEST_BASE_SS = 0x680aul, + GUEST_BASE_DS = 0x680cul, + GUEST_BASE_FS = 0x680eul, + GUEST_BASE_GS = 0x6810ul, + GUEST_BASE_LDTR = 0x6812ul, + GUEST_BASE_TR = 0x6814ul, + GUEST_BASE_GDTR = 0x6816ul, + GUEST_BASE_IDTR = 0x6818ul, + GUEST_DR7 = 0x681aul, + GUEST_RSP = 0x681cul, + GUEST_RIP = 0x681eul, + GUEST_RFLAGS = 0x6820ul, + GUEST_PENDING_DEBUG = 0x6822ul, + GUEST_SYSENTER_ESP = 0x6824ul, + GUEST_SYSENTER_EIP = 0x6826ul, + + /* Natural-Width Host State Fields */ + HOST_CR0 = 0x6c00ul, + HOST_CR3 = 0x6c02ul, + HOST_CR4 = 0x6c04ul, + HOST_BASE_FS = 0x6c06ul, + HOST_BASE_GS = 0x6c08ul, + HOST_BASE_TR = 0x6c0aul, + HOST_BASE_GDTR = 0x6c0cul, + HOST_BASE_IDTR = 0x6c0eul, + HOST_SYSENTER_ESP = 0x6c10ul, + HOST_SYSENTER_EIP = 0x6c12ul, + HOST_RSP = 0x6c14ul, + HOST_RIP = 0x6c16ul +}; + +enum Reason { + VMX_EXC_NMI = 0, + VMX_EXTINT = 1, + VMX_TRIPLE_FAULT = 2, + VMX_INIT = 3, + VMX_SIPI = 4, + VMX_SMI_IO = 5, + VMX_SMI_OTHER = 6, + VMX_INTR_WINDOW = 7, + VMX_NMI_WINDOW = 8, + VMX_TASK_SWITCH = 9, + VMX_CPUID = 10, + VMX_GETSEC = 11, + VMX_HLT = 12, + VMX_INVD = 13, + VMX_INVLPG = 14, + VMX_RDPMC = 15, + VMX_RDTSC = 16, + VMX_RSM = 17, + VMX_VMCALL = 18, + VMX_VMCLEAR = 19, + VMX_VMLAUNCH = 20, + VMX_VMPTRLD = 21, + VMX_VMPTRST = 22, + VMX_VMREAD = 23, + VMX_VMRESUME = 24, + VMX_VMWRITE = 25, + VMX_VMXOFF = 26, + VMX_VMXON = 27, + VMX_CR = 28, + VMX_DR = 29, + VMX_IO = 30, + VMX_RDMSR = 31, + VMX_WRMSR = 32, + VMX_FAIL_STATE = 33, + VMX_FAIL_MSR = 34, + VMX_MWAIT = 36, + VMX_MTF = 37, + VMX_MONITOR = 39, + VMX_PAUSE = 40, + VMX_FAIL_MCHECK = 41, + VMX_TPR_THRESHOLD = 43, + VMX_APIC_ACCESS = 44, + VMX_GDTR_IDTR = 46, + VMX_LDTR_TR = 47, + VMX_EPT_VIOLATION = 48, + VMX_EPT_MISCONFIG = 49, + VMX_INVEPT = 50, + VMX_PREEMPT = 52, + VMX_INVVPID = 53, + VMX_WBINVD = 54, + VMX_XSETBV = 55 +}; + +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ + +enum Ctrl_exi { + EXI_HOST_64 = 1UL << 9, + EXI_LOAD_PERF = 1UL << 12, + EXI_INTA = 1UL << 15, + EXI_LOAD_EFER = 1UL << 21, +}; + +enum Ctrl_ent { + ENT_GUEST_64 = 1UL << 9, + ENT_LOAD_EFER = 1UL << 15, +}; + +enum Ctrl_pin { + PIN_EXTINT = 1ul << 0, + PIN_NMI = 1ul << 3, + PIN_VIRT_NMI = 1ul << 5, +}; + +enum Ctrl0 { + CPU_INTR_WINDOW = 1ul << 2, + CPU_HLT = 1ul << 7, + CPU_INVLPG = 1ul << 9, + CPU_CR3_LOAD = 1ul << 15, + CPU_CR3_STORE = 1ul << 16, + CPU_TPR_SHADOW = 1ul << 21, + CPU_NMI_WINDOW = 1ul << 22, + CPU_IO = 1ul << 24, + CPU_IO_BITMAP = 1ul << 25, + CPU_SECONDARY = 1ul << 31, +}; + +enum Ctrl1 { + CPU_EPT = 1ul << 1, + CPU_VPID = 1ul << 5, + CPU_URG = 1ul << 7, +}; + +#define SAVE_GPR \ + "xchg %rax, regs\n\t" \ + "xchg %rbx, regs+0x8\n\t" \ + "xchg %rcx, regs+0x10\n\t" \ + "xchg %rdx, regs+0x18\n\t" \ + "xchg %rbp, regs+0x28\n\t" \ + "xchg %rsi, regs+0x30\n\t" \ + "xchg %rdi, regs+0x38\n\t" \ + "xchg %r8, regs+0x40\n\t" \ + "xchg %r9, regs+0x48\n\t" \ + "xchg %r10, regs+0x50\n\t" \ + "xchg %r11, regs+0x58\n\t" \ + "xchg %r12, regs+0x60\n\t" \ + "xchg %r13, regs+0x68\n\t" \ + "xchg %r14, regs+0x70\n\t" \ + "xchg %r15, regs+0x78\n\t" + +#define LOAD_GPR SAVE_GPR + +#define SAVE_GPR_C \ + "xchg %%rax, regs\n\t" \ + "xchg %%rbx, regs+0x8\n\t" \ + "xchg %%rcx, regs+0x10\n\t" \ + "xchg %%rdx, regs+0x18\n\t" \ + "xchg %%rbp, regs+0x28\n\t" \ + "xchg %%rsi, regs+0x30\n\t" \ + "xchg %%rdi, regs+0x38\n\t" \ + "xchg %%r8, regs+0x40\n\t" \ + "xchg %%r9, regs+0x48\n\t" \ + "xchg %%r10, regs+0x50\n\t" \ + "xchg %%r11, regs+0x58\n\t" \ + "xchg %%r12, regs+0x60\n\t" \ + "xchg %%r13, regs+0x68\n\t" \ + "xchg %%r14, regs+0x70\n\t" \ + "xchg %%r15, regs+0x78\n\t" + +#define LOAD_GPR_C SAVE_GPR_C + +#define SAVE_RFLAGS \ + "pushf\n\t" \ + "pop regs+0x80\n\t" + +#define LOAD_RFLAGS \ + "push regs+0x80\n\t" \ + "popf\n\t" + +#define VMX_IO_SIZE_MASK 0x7 +#define _VMX_IO_BYTE 1 +#define _VMX_IO_WORD 2 +#define _VMX_IO_LONG 3 +#define VMX_IO_DIRECTION_MASK (1ul << 3) +#define VMX_IO_IN (1ul << 3) +#define VMX_IO_OUT 0 +#define VMX_IO_STRING (1ul << 4) +#define VMX_IO_REP (1ul << 5) +#define VMX_IO_OPRAND_DX (1ul << 6) +#define VMX_IO_PORT_MASK 0xFFFF0000 +#define VMX_IO_PORT_SHIFT 16 + +#define VMX_TEST_VMEXIT 1 +#define VMX_TEST_EXIT 2 +#define VMX_TEST_RESUME 3 +#define VMX_TEST_LAUNCH_ERR 4 +#define VMX_TEST_RESUME_ERR 5 + +#define HYPERCALL_BIT (1ul << 12) +#define HYPERCALL_MASK 0xFFF +#define HYPERCALL_VMEXIT 0x1 + +static inline int vmcs_clear(struct vmcs *vmcs) +{ + bool ret; + asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc"); + return ret; +} + +static inline u64 vmcs_read(enum Encoding enc) +{ + u64 val; + asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc"); + return val; +} + +static inline int vmcs_write(enum Encoding enc, u64 val) +{ + bool ret; + asm volatile ("vmwrite %1, %2; setbe %0" + : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc"); + return ret; +} + +static inline int vmcs_save(struct vmcs **vmcs) +{ + bool ret; + + asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc"); + return ret; +} + +#endif +
This is the first version of VMX nested environment. It contains the basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/ VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the basic execution routine in VMX nested environment andlet the VM print "Hello World" to inform its successfully run. The first release also includes a test suite for vmenter (vmlaunch and vmresume). Besides, hypercall mechanism is included and currently it is used to invoke VM normal exit. New files added: x86/vmx.h : contains all VMX related macro declerations x86/vmx.c : main file for VMX nested test case Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> --- ChangeLog: 1. Refine codes in function vmx_run() 2. Fix bug of setting GUEST_RFLAGS 3. Move defines of selectors to lib/x86/vm.h 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c 5. Move some inline functions to lib/x86/processor.h 6. Move some inline functions (vmcs related) to x86/vmx.h --- config-x86-common.mak | 2 + config-x86_64.mak | 1 + lib/x86/msr.h | 5 + lib/x86/processor.h | 15 ++ lib/x86/vm.c | 4 - lib/x86/vm.h | 21 ++ x86/cstart64.S | 4 + x86/unittests.cfg | 6 + x86/vmx.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++ x86/vmx.h | 466 ++++++++++++++++++++++++++++++++++ 10 files changed, 1194 insertions(+), 4 deletions(-) create mode 100644 x86/vmx.c create mode 100644 x86/vmx.h