diff mbox

[v3,2/2] kvm-unit-tests : The first version of VMX nested test case

Message ID 1373990297-8214-3-git-send-email-yzt356@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arthur Chunqi Li July 16, 2013, 3:58 p.m. UTC
This is the first version for VMX nested environment test case. It
contains the basic VMX instructions test cases, including VMXON/
VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch
also tests the basic execution routine in VMX nested environment and
let the VM print "Hello World" to inform its successfully run.

New files added:
x86/vmx.h : contains all VMX related macro declerations
x86/vmx.c : main file for VMX nested test case

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 config-x86-common.mak |    2 +
 config-x86_64.mak     |    1 +
 lib/x86/msr.h         |    5 +
 x86/cstart64.S        |    4 +
 x86/unittests.cfg     |    6 +
 x86/vmx.c             |  561 +++++++++++++++++++++++++++++++++++++++++++++++++
 x86/vmx.h             |  406 +++++++++++++++++++++++++++++++++++
 7 files changed, 985 insertions(+)
 create mode 100644 x86/vmx.c
 create mode 100644 x86/vmx.h
diff mbox

Patch

diff --git a/config-x86-common.mak b/config-x86-common.mak
index 455032b..34a41e1 100644
--- a/config-x86-common.mak
+++ b/config-x86-common.mak
@@ -101,6 +101,8 @@  $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
 
 $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
 
+$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
+
 arch_clean:
 	$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
 	$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
diff --git a/config-x86_64.mak b/config-x86_64.mak
index 91ffcce..5d9b22a 100644
--- a/config-x86_64.mak
+++ b/config-x86_64.mak
@@ -11,5 +11,6 @@  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 	  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
 	  $(TEST_DIR)/pcid.flat
 tests += $(TEST_DIR)/svm.flat
+tests += $(TEST_DIR)/vmx.flat
 
 include config-x86-common.mak
diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 509a421..281255a 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -396,6 +396,11 @@ 
 #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
 #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
 #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PIN		0x0000048d
+#define MSR_IA32_VMX_TRUE_PROC		0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT		0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY		0x00000490
+
 
 /* AMD-V MSRs */
 
diff --git a/x86/cstart64.S b/x86/cstart64.S
index 24df5f8..0fe76da 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -4,6 +4,10 @@ 
 .globl boot_idt
 boot_idt = 0
 
+.globl idt_descr
+.globl tss_descr
+.globl gdt64_desc
+
 ipi_vector = 0x20
 
 max_cpus = 64
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index bc9643e..85c36aa 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -149,3 +149,9 @@  extra_params = --append "10000000 `date +%s`"
 file = pcid.flat
 extra_params = -cpu qemu64,+pcid
 arch = x86_64
+
+[vmx]
+file = vmx.flat
+extra_params = -cpu host,+vmx
+arch = x86_64
+
diff --git a/x86/vmx.c b/x86/vmx.c
new file mode 100644
index 0000000..b858400
--- /dev/null
+++ b/x86/vmx.c
@@ -0,0 +1,561 @@ 
+#include "libcflat.h"
+#include "processor.h"
+#include "vm.h"
+#include "desc.h"
+#include "vmx.h"
+#include "msr.h"
+#include "smp.h"
+#include "io.h"
+#include "setjmp.h"
+
+int fails = 0, tests = 0;
+u32 *vmxon_region;
+struct vmcs *vmcs_root;
+void *io_bmp1, *io_bmp2;
+void *msr_bmp;
+u32 vpid_ctr;
+char *guest_stack, *host_stack;
+char *guest_syscall_stack, *host_syscall_stack;
+u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
+ulong fix_cr0_set, fix_cr0_clr;
+ulong fix_cr4_set, fix_cr4_clr;
+struct regs regs;
+jmp_buf env;
+
+extern u64 gdt64_desc[];
+extern u64 idt_descr[];
+extern u64 tss_descr[];
+extern void *entry_vmx;
+extern void *entry_sysenter;
+extern void *entry_guest;
+
+void report(const char *name, int result)
+{
+	++tests;
+	if (result)
+		printf("PASS: %s\n", name);
+	else {
+		printf("FAIL: %s\n", name);
+		++fails;
+	}
+}
+
+inline u64 get_rflags(void)
+{
+	u64 r;
+	asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
+	return r;
+}
+
+inline void set_rflags(u64 r)
+{
+	asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
+}
+
+int vmcs_clear(struct vmcs *vmcs)
+{
+	bool ret;
+	asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return ret;
+}
+
+u64 vmcs_read(enum Encoding enc)
+{
+	u64 val;
+	asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
+	return val;
+}
+
+int vmcs_write(enum Encoding enc, u64 val)
+{
+	bool ret;
+	asm volatile ("vmwrite %1, %2; setbe %0"
+		: "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
+	return ret;
+}
+
+int make_vmcs_current(struct vmcs *vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return ret;
+}
+
+int save_vmcs(struct vmcs **vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
+	return ret;
+}
+
+/* entry_vmx */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_vmx\n\t"
+	"entry_vmx:\n\t"
+	SAVE_GPR
+	"	call	vmx_handler\n\t"
+	LOAD_GPR
+	"	vmresume\n\t"
+);
+
+/* entry_sysenter */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_sysenter\n\t"
+	"entry_sysenter:\n\t"
+	SAVE_GPR
+	"	and	$0xf, %rax\n\t"
+	"	push	%rax\n\t"
+	"	call	syscall_handler\n\t"
+);
+
+void syscall_handler(u64 syscall_no)
+{
+	printf("Here in syscall_handler, syscall_no = %d\n", syscall_no);
+}
+
+void vmx_run()
+{
+	bool ret;
+	printf("Now run vm.\n\n");
+	asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret));
+	printf("VMLAUNCH error, ret=%d\n", ret);
+}
+
+void vmx_resume()
+{
+	asm volatile(LOAD_GPR
+		"vmresume\n\t");
+	/* VMRESUME fail if reach here */
+}
+
+void print_vmexit_info()
+{
+	u64 guest_rip, guest_rsp;
+	ulong reason = vmcs_read(EXI_REASON) & 0xff;
+	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
+	guest_rip = vmcs_read(GUEST_RIP);
+	guest_rsp = vmcs_read(GUEST_RSP);
+	printf("VMEXIT info:\n");
+	printf("\tvmexit reason = %d\n", reason);
+	printf("\texit qualification = 0x%x\n", exit_qual);
+	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
+	printf("\tguest_rip = 0x%llx\n", guest_rip);
+	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
+		regs.rax, regs.rbx, regs.rcx, regs.rdx);
+	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
+		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
+	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
+		regs.r8, regs.r9, regs.r10, regs.r11);
+	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
+		regs.r12, regs.r13, regs.r14, regs.r15);
+}
+
+void test_vmclear(void)
+{
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	report("test vmclear", vmcs_clear(vmcs_root) == 0);
+}
+
+void test_vmxoff(void)
+{
+	bool ret;
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	asm volatile("vmxoff; seta %0\n\t" : "=q"(ret) : : "cc");
+	report("test vmxoff", ret);
+}
+
+void vmx_handler()
+{
+	u64 guest_rip;
+	ulong reason = vmcs_read(EXI_REASON) & 0xff;
+
+	if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG)
+		&& !(rdmsr(MSR_EFER) & EFER_LMA))
+		printf("ERROR : PDPTEs should be checked\n");
+
+	guest_rip = vmcs_read(GUEST_RIP);
+
+	switch (reason) {
+	case VMX_VMCALL:
+		switch (regs.rax) {
+		case TEST_VMRESUME:
+			regs.rax = 0xFFFF;
+			break;
+		default:
+			printf("ERROR : Invalid VMCALL param : %d\n", regs.rax);
+		}
+		vmcs_write(GUEST_RIP, guest_rip + 3);
+		goto vmx_resume;
+	case VMX_IO:
+		print_vmexit_info();
+		break;
+	case VMX_HLT:
+		printf("\nVM exit.\n");
+		longjmp(env, 1);
+		/* Should not reach here */
+		goto vmx_exit;
+	case VMX_EXC_NMI:
+	case VMX_EXTINT:
+	case VMX_INVLPG:
+	case VMX_CR:
+	case VMX_EPT_VIOLATION:
+	default:
+		break;
+	}
+	printf("ERROR : Unhandled vmx exit.\n");
+	print_vmexit_info();
+vmx_exit:
+	exit(-1);
+vmx_resume:
+	vmx_resume();
+	/* Should not reach here */
+	exit(-1);
+}
+
+void test_vmresume()
+{
+	u64 rax;
+	u64 rsp, resume_rsp;
+
+	rax = 0;
+	asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
+	asm volatile("mov %2, %%rax\n\t"
+		"vmcall\n\t"
+		"mov %%rax, %0\n\t"
+		"mov %%rsp, %1\n\t"
+		: "=r"(rax), "=r"(resume_rsp)
+		: "g"(TEST_VMRESUME));
+	report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
+}
+
+/* entry_guest */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_guest\n\t"
+	"entry_guest:\n\t"
+	"	call	guest_main\n\t"
+	"	hlt\n\t"
+);
+
+void guest_main(void)
+{
+	/* If reach here, VMLAUNCH runs OK */
+	report("test vmlaunch", 1);
+	printf("cr0 in guest = %llx\n", read_cr0());
+	printf("cr3 in guest = %llx\n", read_cr3());
+	printf("cr4 in guest = %llx\n", read_cr4());
+	printf("\nHello World!\n");
+	test_vmresume();
+}
+
+void init_vmcs_ctrl(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.1 */
+	vmcs_write(PIN_CONTROLS, ctrl_pin);
+	/* Disable VMEXIT of IO instruction */
+	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
+		ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
+		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
+	}
+	vmcs_write(CR3_TARGET_COUNT, 0);
+	io_bmp1 = alloc_page();
+	io_bmp2 = alloc_page();
+	memset(io_bmp1, 0, PAGE_SIZE);
+	memset(io_bmp2, 0, PAGE_SIZE);
+	vmcs_write(IO_BITMAP_A, (u64)io_bmp1);
+	vmcs_write(IO_BITMAP_B, (u64)io_bmp2);
+	msr_bmp = alloc_page();
+	memset(msr_bmp, 0, PAGE_SIZE);
+	vmcs_write(MSR_BITMAP, (u64)msr_bmp);
+	vmcs_write(VPID, ++vpid_ctr);
+}
+
+void init_vmcs_host(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.2 */
+	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.2.1.3 */
+	vmcs_write(ENT_CONTROLS, ctrl_enter);
+	vmcs_write(EXI_CONTROLS, ctrl_exit);
+
+	/* 26.2.2 */
+	vmcs_write(HOST_CR0, read_cr0());
+	vmcs_write(HOST_CR3, read_cr3());
+	vmcs_write(HOST_CR4, read_cr4());
+	vmcs_write(HOST_SYSENTER_ESP,
+		(u64)(host_syscall_stack + PAGE_SIZE - 1));
+	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
+
+	/* 26.2.3 */
+	vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(HOST_BASE_FS, 0);
+	vmcs_write(HOST_BASE_GS, 0);
+
+	/* Set other vmcs area */
+	vmcs_write(PF_ERROR_MASK, 0);
+	vmcs_write(PF_ERROR_MATCH, 0);
+	vmcs_write(VMCS_LINK_PTR, ~0ul);
+	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
+	vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1));
+	vmcs_write(HOST_RIP, (u64)(&entry_vmx));
+}
+
+void init_vmcs_guest(void)
+{
+	/* 26.3 CHECKING AND LOADING GUEST STATE */
+	ulong guest_cr0, guest_cr4, guest_cr3;
+	/* 26.3.1.1 */
+	guest_cr0 = read_cr0();
+	guest_cr4 = read_cr4();
+	guest_cr3 = read_cr3();
+	if (ctrl_enter & ENT_GUEST_64) {
+		guest_cr0 |= CR0_PG;
+		guest_cr4 |= CR4_PAE;
+	}
+	if ((ctrl_enter & ENT_GUEST_64) == 0)
+		guest_cr4 &= (~CR4_PCIDE);
+	if (guest_cr0 & CR0_PG)
+		guest_cr0 |= CR0_PE;
+	vmcs_write(GUEST_CR0, guest_cr0);
+	vmcs_write(GUEST_CR3, guest_cr3);
+	vmcs_write(GUEST_CR4, guest_cr4);
+	vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SYSENTER_ESP,
+		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(GUEST_DR7, 0);
+	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.3.1.2 */
+	vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(GUEST_SEL_LDTR, 0);
+
+	vmcs_write(GUEST_BASE_CS, 0);
+	vmcs_write(GUEST_BASE_ES, 0);
+	vmcs_write(GUEST_BASE_SS, 0);
+	vmcs_write(GUEST_BASE_DS, 0);
+	vmcs_write(GUEST_BASE_FS, 0);
+	vmcs_write(GUEST_BASE_GS, 0);
+	vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(GUEST_BASE_LDTR, 0);
+
+	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
+	vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
+
+	vmcs_write(GUEST_AR_CS, 0xa09b);
+	vmcs_write(GUEST_AR_DS, 0xc093);
+	vmcs_write(GUEST_AR_ES, 0xc093);
+	vmcs_write(GUEST_AR_FS, 0xc093);
+	vmcs_write(GUEST_AR_GS, 0xc093);
+	vmcs_write(GUEST_AR_SS, 0xc093);
+	vmcs_write(GUEST_AR_LDTR, 0x82);
+	vmcs_write(GUEST_AR_TR, 0x8b);
+
+	/* 26.3.1.3 */
+	vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(GUEST_LIMIT_GDTR,
+		((struct descr *)gdt64_desc)->limit & 0xffff);
+	vmcs_write(GUEST_LIMIT_IDTR,
+		((struct descr *)idt_descr)->limit & 0xffff);
+
+	/* 26.3.1.4 */
+	vmcs_write(GUEST_RIP, (u64)(&entry_guest));
+	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_RFLAGS, 0x2);
+
+	/* 26.3.1.5 */
+	vmcs_write(GUEST_ACTV_STATE, 0);
+	vmcs_write(GUEST_INTR_STATE, 0);
+}
+
+int init_vmcs(struct vmcs **vmcs)
+{
+	*vmcs = alloc_page();
+	memset(*vmcs, 0, PAGE_SIZE);
+	(*vmcs)->revision_id = basic.revision;
+	/* vmclear first to init vmcs */
+	if (vmcs_clear(*vmcs)) {
+		printf("%s : vmcs_clear error\n", __func__);
+		return 1;
+	}
+
+	if (make_vmcs_current(*vmcs)) {
+		printf("%s : make_vmcs_current error\n", __func__);
+		return 1;
+	}
+
+	/* All settings to pin/exit/enter/cpu
+	   control fields should be placed here */
+	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
+	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
+	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
+	ctrl_cpu[0] |= CPU_HLT;
+	/* DIsable IO instruction VMEXIT now */
+	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
+	ctrl_cpu[1] = 0;
+
+	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
+	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
+	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
+	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
+
+	init_vmcs_ctrl();
+	init_vmcs_host();
+	init_vmcs_guest();
+	return 0;
+}
+
+void init_vmx(void)
+{
+	vmxon_region = alloc_page();
+	memset(vmxon_region, 0, PAGE_SIZE);
+
+	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
+			: MSR_IA32_VMX_EXIT_CTLS);
+	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
+			: MSR_IA32_VMX_ENTRY_CTLS);
+	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
+			: MSR_IA32_VMX_PROCBASED_CTLS);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
+		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
+	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
+		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+
+	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
+	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE);
+
+	*vmxon_region = basic.revision;
+
+	guest_stack = alloc_page();
+	memset(guest_stack, 0, PAGE_SIZE);
+	guest_syscall_stack = alloc_page();
+	memset(guest_syscall_stack, 0, PAGE_SIZE);
+	host_stack = alloc_page();
+	memset(host_stack, 0, PAGE_SIZE);
+	host_syscall_stack = alloc_page();
+	memset(host_syscall_stack, 0, PAGE_SIZE);
+}
+
+int test_vmx_capability(void)
+{
+	struct cpuid r;
+	u64 ret1, ret2;
+	r = cpuid(1);
+	ret1 = ((r.c) >> 5) & 1;
+	ret2 = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
+	report("test vmx capability", ret1 & ret2);
+	return !(ret1 & ret2);
+}
+
+int test_vmxon(void)
+{
+	bool ret;
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	asm volatile ("vmxon %1; setbe %0\n\t"
+		: "=q"(ret) : "m"(vmxon_region) : "cc");
+	report("test vmxon", !ret);
+	return ret;
+}
+
+void test_vmptrld(void)
+{
+	u64 rflags;
+	struct vmcs *vmcs;
+
+	vmcs = alloc_page();
+	vmcs->revision_id = basic.revision;
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	report("test vmptrld", make_vmcs_current(vmcs) == 0);
+}
+
+void test_vmptrst(void)
+{
+	u64 rflags;
+	int ret;
+	struct vmcs *vmcs1, *vmcs2;
+
+	vmcs1 = alloc_page();
+	memset(vmcs1, 0, PAGE_SIZE);
+	init_vmcs(&vmcs1);
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	ret = save_vmcs(&vmcs2);
+	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
+}
+
+int main(void)
+{
+	setup_vm();
+	setup_idt();
+
+	if (test_vmx_capability() != 0) {
+		printf("ERROR : vmx not supported, check +vmx option\n");
+		goto exit;
+	}
+	init_vmx();
+	if (test_vmxon() != 0)
+		goto exit;
+	test_vmptrld();
+	test_vmclear();
+	test_vmptrst();
+	init_vmcs(&vmcs_root);
+
+	if (setjmp(env) == 0){
+		vmx_run();
+		/* Should not reach here */
+		report("test vmlaunch", 0);
+	}
+	test_vmxoff();
+
+exit:
+	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
+	return fails ? 1 : 0;
+}
diff --git a/x86/vmx.h b/x86/vmx.h
new file mode 100644
index 0000000..167815d
--- /dev/null
+++ b/x86/vmx.h
@@ -0,0 +1,406 @@ 
+#ifndef __HYPERVISOR_H
+#define __HYPERVISOR_H
+
+#include "libcflat.h"
+
+struct vmcs {
+	u32 revision_id; /* vmcs revision identifier */
+	u32 abort; /* VMX-abort indicator */
+	/* VMCS data */
+	char data[0];
+};
+
+struct regs {
+	u64 rax;
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u64 cr2;
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+};
+
+static union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			: 3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1;
+	};
+} basic;
+
+static union vmx_ctrl_pin {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_pin_rev;
+
+static union vmx_ctrl_cpu {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_cpu_rev[2];
+
+static union vmx_ctrl_exit {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_exit_rev;
+
+static union vmx_ctrl_ent {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_enter_rev;
+
+static union vmx_ept_vpid {
+	u64 val;
+	struct {
+		u32:16,
+			super:2,
+			: 2,
+			invept:1,
+			: 11;
+		u32	invvpid:1;
+	};
+} ept_vpid;
+
+struct descr {
+	u16 limit;
+	u64 addr;
+};
+
+enum Encoding {
+	/* 16-Bit Control Fields */
+	VPID			= 0x0000ul,
+	/* Posted-interrupt notification vector */
+	PINV			= 0x0002ul,
+	/* EPTP index */
+	EPTP_IDX		= 0x0004ul,
+
+	/* 16-Bit Guest State Fields */
+	GUEST_SEL_ES		= 0x0800ul,
+	GUEST_SEL_CS		= 0x0802ul,
+	GUEST_SEL_SS		= 0x0804ul,
+	GUEST_SEL_DS		= 0x0806ul,
+	GUEST_SEL_FS		= 0x0808ul,
+	GUEST_SEL_GS		= 0x080aul,
+	GUEST_SEL_LDTR		= 0x080cul,
+	GUEST_SEL_TR		= 0x080eul,
+	GUEST_INT_STATUS	= 0x0810ul,
+
+	/* 16-Bit Host State Fields */
+	HOST_SEL_ES		= 0x0c00ul,
+	HOST_SEL_CS		= 0x0c02ul,
+	HOST_SEL_SS		= 0x0c04ul,
+	HOST_SEL_DS		= 0x0c06ul,
+	HOST_SEL_FS		= 0x0c08ul,
+	HOST_SEL_GS		= 0x0c0aul,
+	HOST_SEL_TR		= 0x0c0cul,
+
+	/* 64-Bit Control Fields */
+	IO_BITMAP_A		= 0x2000ul,
+	IO_BITMAP_B		= 0x2002ul,
+	MSR_BITMAP		= 0x2004ul,
+	EXIT_MSR_ST_ADDR	= 0x2006ul,
+	EXIT_MSR_LD_ADDR	= 0x2008ul,
+	ENTER_MSR_LD_ADDR	= 0x200aul,
+	VMCS_EXEC_PTR		= 0x200cul,
+	TSC_OFFSET		= 0x2010ul,
+	TSC_OFFSET_HI		= 0x2011ul,
+	APIC_VIRT_ADDR		= 0x2012ul,
+	APIC_ACCS_ADDR		= 0x2014ul,
+	EPTP			= 0x201aul,
+	EPTP_HI			= 0x201bul,
+
+	/* 64-Bit Readonly Data Field */
+	INFO_PHYS_ADDR		= 0x2400ul,
+
+	/* 64-Bit Guest State */
+	VMCS_LINK_PTR		= 0x2800ul,
+	VMCS_LINK_PTR_HI	= 0x2801ul,
+	GUEST_DEBUGCTL		= 0x2802ul,
+	GUEST_DEBUGCTL_HI	= 0x2803ul,
+	GUEST_EFER		= 0x2806ul,
+	GUEST_PERF_GLOBAL_CTRL	= 0x2808ul,
+	GUEST_PDPTE		= 0x280aul,
+
+	/* 64-Bit Host State */
+	HOST_EFER		= 0x2c02ul,
+	HOST_PERF_GLOBAL_CTRL	= 0x2c04ul,
+
+	/* 32-Bit Control Fields */
+	PIN_CONTROLS		= 0x4000ul,
+	CPU_EXEC_CTRL0		= 0x4002ul,
+	EXC_BITMAP		= 0x4004ul,
+	PF_ERROR_MASK		= 0x4006ul,
+	PF_ERROR_MATCH		= 0x4008ul,
+	CR3_TARGET_COUNT	= 0x400aul,
+	EXI_CONTROLS		= 0x400cul,
+	EXI_MSR_ST_CNT		= 0x400eul,
+	EXI_MSR_LD_CNT		= 0x4010ul,
+	ENT_CONTROLS		= 0x4012ul,
+	ENT_MSR_LD_CNT		= 0x4014ul,
+	ENT_INTR_INFO		= 0x4016ul,
+	ENT_INTR_ERROR		= 0x4018ul,
+	ENT_INST_LEN		= 0x401aul,
+	TPR_THRESHOLD		= 0x401cul,
+	CPU_EXEC_CTRL1		= 0x401eul,
+
+	/* 32-Bit R/O Data Fields */
+	VMX_INST_ERROR		= 0x4400ul,
+	EXI_REASON		= 0x4402ul,
+	EXI_INTR_INFO		= 0x4404ul,
+	EXI_INTR_ERROR		= 0x4406ul,
+	IDT_VECT_INFO		= 0x4408ul,
+	IDT_VECT_ERROR		= 0x440aul,
+	EXI_INST_LEN		= 0x440cul,
+	EXI_INST_INFO		= 0x440eul,
+
+	/* 32-Bit Guest State Fields */
+	GUEST_LIMIT_ES		= 0x4800ul,
+	GUEST_LIMIT_CS		= 0x4802ul,
+	GUEST_LIMIT_SS		= 0x4804ul,
+	GUEST_LIMIT_DS		= 0x4806ul,
+	GUEST_LIMIT_FS		= 0x4808ul,
+	GUEST_LIMIT_GS		= 0x480aul,
+	GUEST_LIMIT_LDTR	= 0x480cul,
+	GUEST_LIMIT_TR		= 0x480eul,
+	GUEST_LIMIT_GDTR	= 0x4810ul,
+	GUEST_LIMIT_IDTR	= 0x4812ul,
+	GUEST_AR_ES		= 0x4814ul,
+	GUEST_AR_CS		= 0x4816ul,
+	GUEST_AR_SS		= 0x4818ul,
+	GUEST_AR_DS		= 0x481aul,
+	GUEST_AR_FS		= 0x481cul,
+	GUEST_AR_GS		= 0x481eul,
+	GUEST_AR_LDTR		= 0x4820ul,
+	GUEST_AR_TR		= 0x4822ul,
+	GUEST_INTR_STATE	= 0x4824ul,
+	GUEST_ACTV_STATE	= 0x4826ul,
+	GUEST_SMBASE		= 0x4828ul,
+	GUEST_SYSENTER_CS	= 0x482aul,
+
+	/* 32-Bit Host State Fields */
+	HOST_SYSENTER_CS	= 0x4c00ul,
+
+	/* Natural-Width Control Fields */
+	CR0_MASK		= 0x6000ul,
+	CR4_MASK		= 0x6002ul,
+	CR0_READ_SHADOW	= 0x6004ul,
+	CR4_READ_SHADOW	= 0x6006ul,
+	CR3_TARGET_0		= 0x6008ul,
+	CR3_TARGET_1		= 0x600aul,
+	CR3_TARGET_2		= 0x600cul,
+	CR3_TARGET_3		= 0x600eul,
+
+	/* Natural-Width R/O Data Fields */
+	EXI_QUALIFICATION	= 0x6400ul,
+	IO_RCX			= 0x6402ul,
+	IO_RSI			= 0x6404ul,
+	IO_RDI			= 0x6406ul,
+	IO_RIP			= 0x6408ul,
+	GUEST_LINEAR_ADDRESS	= 0x640aul,
+
+	/* Natural-Width Guest State Fields */
+	GUEST_CR0		= 0x6800ul,
+	GUEST_CR3		= 0x6802ul,
+	GUEST_CR4		= 0x6804ul,
+	GUEST_BASE_ES		= 0x6806ul,
+	GUEST_BASE_CS		= 0x6808ul,
+	GUEST_BASE_SS		= 0x680aul,
+	GUEST_BASE_DS		= 0x680cul,
+	GUEST_BASE_FS		= 0x680eul,
+	GUEST_BASE_GS		= 0x6810ul,
+	GUEST_BASE_LDTR		= 0x6812ul,
+	GUEST_BASE_TR		= 0x6814ul,
+	GUEST_BASE_GDTR		= 0x6816ul,
+	GUEST_BASE_IDTR		= 0x6818ul,
+	GUEST_DR7		= 0x681aul,
+	GUEST_RSP		= 0x681cul,
+	GUEST_RIP		= 0x681eul,
+	GUEST_RFLAGS		= 0x6820ul,
+	GUEST_PENDING_DEBUG	= 0x6822ul,
+	GUEST_SYSENTER_ESP	= 0x6824ul,
+	GUEST_SYSENTER_EIP	= 0x6826ul,
+
+	/* Natural-Width Host State Fields */
+	HOST_CR0		= 0x6c00ul,
+	HOST_CR3		= 0x6c02ul,
+	HOST_CR4		= 0x6c04ul,
+	HOST_BASE_FS		= 0x6c06ul,
+	HOST_BASE_GS		= 0x6c08ul,
+	HOST_BASE_TR		= 0x6c0aul,
+	HOST_BASE_GDTR		= 0x6c0cul,
+	HOST_BASE_IDTR		= 0x6c0eul,
+	HOST_SYSENTER_ESP	= 0x6c10ul,
+	HOST_SYSENTER_EIP	= 0x6c12ul,
+	HOST_RSP		= 0x6c14ul,
+	HOST_RIP		= 0x6c16ul
+};
+
+enum Reason {
+	VMX_EXC_NMI		= 0,
+	VMX_EXTINT		= 1,
+	VMX_TRIPLE_FAULT	= 2,
+	VMX_INIT		= 3,
+	VMX_SIPI		= 4,
+	VMX_SMI_IO		= 5,
+	VMX_SMI_OTHER		= 6,
+	VMX_INTR_WINDOW		= 7,
+	VMX_NMI_WINDOW		= 8,
+	VMX_TASK_SWITCH		= 9,
+	VMX_CPUID		= 10,
+	VMX_GETSEC		= 11,
+	VMX_HLT			= 12,
+	VMX_INVD		= 13,
+	VMX_INVLPG		= 14,
+	VMX_RDPMC		= 15,
+	VMX_RDTSC		= 16,
+	VMX_RSM			= 17,
+	VMX_VMCALL		= 18,
+	VMX_VMCLEAR		= 19,
+	VMX_VMLAUNCH		= 20,
+	VMX_VMPTRLD		= 21,
+	VMX_VMPTRST		= 22,
+	VMX_VMREAD		= 23,
+	VMX_VMRESUME		= 24,
+	VMX_VMWRITE		= 25,
+	VMX_VMXOFF		= 26,
+	VMX_VMXON		= 27,
+	VMX_CR			= 28,
+	VMX_DR			= 29,
+	VMX_IO			= 30,
+	VMX_RDMSR		= 31,
+	VMX_WRMSR		= 32,
+	VMX_FAIL_STATE		= 33,
+	VMX_FAIL_MSR		= 34,
+	VMX_MWAIT		= 36,
+	VMX_MTF			= 37,
+	VMX_MONITOR		= 39,
+	VMX_PAUSE		= 40,
+	VMX_FAIL_MCHECK		= 41,
+	VMX_TPR_THRESHOLD	= 43,
+	VMX_APIC_ACCESS		= 44,
+	VMX_GDTR_IDTR		= 46,
+	VMX_LDTR_TR		= 47,
+	VMX_EPT_VIOLATION	= 48,
+	VMX_EPT_MISCONFIG	= 49,
+	VMX_INVEPT		= 50,
+	VMX_PREEMPT		= 52,
+	VMX_INVVPID		= 53,
+	VMX_WBINVD		= 54,
+	VMX_XSETBV		= 55
+};
+
+#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
+
+enum Ctrl_exi {
+	EXI_HOST_64             = 1UL << 9,
+	EXI_LOAD_PERF		= 1UL << 12,
+	EXI_INTA                = 1UL << 15,
+	EXI_LOAD_EFER           = 1UL << 21,
+};
+
+enum Ctrl_ent {
+	ENT_GUEST_64            = 1UL << 9,
+	ENT_LOAD_EFER           = 1UL << 15,
+};
+
+enum Ctrl_pin {
+	PIN_EXTINT              = 1ul << 0,
+	PIN_NMI                 = 1ul << 3,
+	PIN_VIRT_NMI            = 1ul << 5,
+};
+
+enum Ctrl0 {
+	CPU_INTR_WINDOW		= 1ul << 2,
+	CPU_HLT			= 1ul << 7,
+	CPU_INVLPG		= 1ul << 9,
+	CPU_CR3_LOAD		= 1ul << 15,
+	CPU_CR3_STORE		= 1ul << 16,
+	CPU_TPR_SHADOW		= 1ul << 21,
+	CPU_NMI_WINDOW		= 1ul << 22,
+	CPU_IO			= 1ul << 24,
+	CPU_IO_BITMAP		= 1ul << 25,
+	CPU_SECONDARY		= 1ul << 31,
+};
+
+enum Ctrl1 {
+	CPU_EPT			= 1ul << 1,
+	CPU_VPID		= 1ul << 5,
+	CPU_URG			= 1ul << 7,
+};
+
+#define SEL_NULL_DESC		0x0
+#define SEL_KERN_CODE_64	0x8
+#define SEL_KERN_DATA_64	0x10
+#define SEL_USER_CODE_64	0x18
+#define SEL_USER_DATA_64	0x20
+#define SEL_CODE_32		0x28
+#define SEL_DATA_32		0x30
+#define SEL_CODE_16		0x38
+#define SEL_DATA_16		0x40
+#define SEL_TSS_RUN		0x48
+
+#define SAVE_GPR				\
+	"xchg %rax, regs\n\t"			\
+	"xchg %rbx, regs+0x8\n\t"		\
+	"xchg %rcx, regs+0x10\n\t"		\
+	"xchg %rdx, regs+0x18\n\t"		\
+	"xchg %rbp, regs+0x28\n\t"		\
+	"xchg %rsi, regs+0x30\n\t"		\
+	"xchg %rdi, regs+0x38\n\t"		\
+	"xchg %r8, regs+0x40\n\t"		\
+	"xchg %r9, regs+0x48\n\t"		\
+	"xchg %r10, regs+0x50\n\t"		\
+	"xchg %r11, regs+0x58\n\t"		\
+	"xchg %r12, regs+0x60\n\t"		\
+	"xchg %r13, regs+0x68\n\t"		\
+	"xchg %r14, regs+0x70\n\t"		\
+	"xchg %r15, regs+0x78\n\t"
+
+#define LOAD_GPR	SAVE_GPR
+
+#define CR0_PE		(1ul << 0)
+#define CR0_PG		(1ul << 31)
+#define CR4_VMXE	(1ul << 0)
+#define CR4_PAE		(1ul << 5)
+#define CR4_PCIDE	(1ul << 17)
+
+#define VMX_IO_SIZE_MASK		0x7
+#define _VMX_IO_BYTE			1
+#define _VMX_IO_WORD			2
+#define _VMX_IO_LONG			3
+#define VMX_IO_DIRECTION_MASK		(1ul << 3)
+#define VMX_IO_IN			(1ul << 3)
+#define VMX_IO_OUT			0
+#define VMX_IO_STRING			(1ul << 4)
+#define VMX_IO_REP			(1ul << 5)
+#define VMX_IO_OPRAND_DX		(1ul << 6)
+#define VMX_IO_PORT_MASK		0xFFFF0000
+#define VMX_IO_PORT_SHIFT		16
+
+#define TEST_VMRESUME		0x1001
+
+#endif
+