diff mbox

[v3] kvm-unit-tests : Basic architecture of VMX nested test case

Message ID 1375020009-6225-1-git-send-email-yzt356@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arthur Chunqi Li July 28, 2013, 2 p.m. UTC
This is the first version of VMX nested environment. It contains the
basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/
VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the
basic execution routine in VMX nested environment andlet the VM print
"Hello World" to inform its successfully run.

The first release also includes a test suite for vmenter (vmlaunch and
vmresume). Besides, hypercall mechanism is included and currently it is
used to invoke VM normal exit.

New files added:
x86/vmx.h : contains all VMX related macro declerations
x86/vmx.c : main file for VMX nested test case

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
ChangeLog:
1. Refine codes in function vmx_run()
2. Fix bug of setting GUEST_RFLAGS
3. Move defines of selectors to lib/x86/vm.h
4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c
5. Move some inline functions to lib/x86/processor.h
6. Move some inline functions (vmcs related) to x86/vmx.h
---
 config-x86-common.mak |    2 +
 config-x86_64.mak     |    1 +
 lib/x86/msr.h         |    5 +
 lib/x86/processor.h   |   15 ++
 lib/x86/vm.c          |    4 -
 lib/x86/vm.h          |   21 ++
 x86/cstart64.S        |    4 +
 x86/unittests.cfg     |    6 +
 x86/vmx.c             |  674 +++++++++++++++++++++++++++++++++++++++++++++++++
 x86/vmx.h             |  466 ++++++++++++++++++++++++++++++++++
 10 files changed, 1194 insertions(+), 4 deletions(-)
 create mode 100644 x86/vmx.c
 create mode 100644 x86/vmx.h

Comments

Arthur Chunqi Li July 28, 2013, 2:24 p.m. UTC | #1
Hi Gleb,

It suddenly occured to me that this patch also fails to handle
GUEST_RFLAGS when VMRESUME.

I decide to remove rflags in struct regs since rflags can be read and
set via vmcs_read/vmcs_write in test suited defined functions (init
and exit_handler), and other general registers can only be set in the
framework code.

Then I will wait for Paolo and Gleb's furthur feedback and commit the
final patch.

Arthur

On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> This is the first version of VMX nested environment. It contains the
> basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/
> VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the
> basic execution routine in VMX nested environment andlet the VM print
> "Hello World" to inform its successfully run.
>
> The first release also includes a test suite for vmenter (vmlaunch and
> vmresume). Besides, hypercall mechanism is included and currently it is
> used to invoke VM normal exit.
>
> New files added:
> x86/vmx.h : contains all VMX related macro declerations
> x86/vmx.c : main file for VMX nested test case
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
> ChangeLog:
> 1. Refine codes in function vmx_run()
> 2. Fix bug of setting GUEST_RFLAGS
> 3. Move defines of selectors to lib/x86/vm.h
> 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c
> 5. Move some inline functions to lib/x86/processor.h
> 6. Move some inline functions (vmcs related) to x86/vmx.h
> ---
>  config-x86-common.mak |    2 +
>  config-x86_64.mak     |    1 +
>  lib/x86/msr.h         |    5 +
>  lib/x86/processor.h   |   15 ++
>  lib/x86/vm.c          |    4 -
>  lib/x86/vm.h          |   21 ++
>  x86/cstart64.S        |    4 +
>  x86/unittests.cfg     |    6 +
>  x86/vmx.c             |  674 +++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/vmx.h             |  466 ++++++++++++++++++++++++++++++++++
>  10 files changed, 1194 insertions(+), 4 deletions(-)
>  create mode 100644 x86/vmx.c
>  create mode 100644 x86/vmx.h
>
> diff --git a/config-x86-common.mak b/config-x86-common.mak
> index 455032b..34a41e1 100644
> --- a/config-x86-common.mak
> +++ b/config-x86-common.mak
> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
>
>  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
>
> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
> +
>  arch_clean:
>         $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
>         $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
> diff --git a/config-x86_64.mak b/config-x86_64.mak
> index 4e525f5..bb8ee89 100644
> --- a/config-x86_64.mak
> +++ b/config-x86_64.mak
> @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>           $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
>           $(TEST_DIR)/pcid.flat
>  tests += $(TEST_DIR)/svm.flat
> +tests += $(TEST_DIR)/vmx.flat
>
>  include config-x86-common.mak
> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
> index 509a421..281255a 100644
> --- a/lib/x86/msr.h
> +++ b/lib/x86/msr.h
> @@ -396,6 +396,11 @@
>  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
>  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
>  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
> +#define MSR_IA32_VMX_TRUE_PIN          0x0000048d
> +#define MSR_IA32_VMX_TRUE_PROC         0x0000048e
> +#define MSR_IA32_VMX_TRUE_EXIT         0x0000048f
> +#define MSR_IA32_VMX_TRUE_ENTRY                0x00000490
> +
>
>  /* AMD-V MSRs */
>
> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> index e46d8d0..f0c11cc 100644
> --- a/lib/x86/processor.h
> +++ b/lib/x86/processor.h
> @@ -307,4 +307,19 @@ static inline void safe_halt(void)
>  {
>         asm volatile("sti; hlt");
>  }
> +
> +#ifdef __x86_64__
> +static inline u64 read_rflags(void)
> +{
> +       u64 r;
> +       asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
> +       return r;
> +}
> +
> +static inline void write_rflags(u64 r)
> +{
> +       asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
> +}
> +#endif
> +
>  #endif
> diff --git a/lib/x86/vm.c b/lib/x86/vm.c
> index 260ec45..188bf57 100644
> --- a/lib/x86/vm.c
> +++ b/lib/x86/vm.c
> @@ -9,10 +9,6 @@
>  #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE)
>  #endif
>
> -#define X86_CR0_PE      0x00000001
> -#define X86_CR0_WP      0x00010000
> -#define X86_CR0_PG      0x80000000
> -#define X86_CR4_PSE     0x00000010
>  static void *free = 0;
>  static void *vfree_top = 0;
>
> diff --git a/lib/x86/vm.h b/lib/x86/vm.h
> index 0b5b5c7..eff6f72 100644
> --- a/lib/x86/vm.h
> +++ b/lib/x86/vm.h
> @@ -16,6 +16,27 @@
>  #define PTE_USER    (1ull << 2)
>  #define PTE_ADDR    (0xffffffffff000ull)
>
> +#define X86_CR0_PE      0x00000001
> +#define X86_CR0_WP      0x00010000
> +#define X86_CR0_PG      0x80000000
> +#define X86_CR4_VMXE   0x00000001
> +#define X86_CR4_PSE     0x00000010
> +#define X86_CR4_PAE     0x00000020
> +#define X86_CR4_PCIDE  0x00020000
> +
> +#ifdef __x86_64__
> +#define SEL_NULL_DESC          0x0
> +#define SEL_KERN_CODE_64       0x8
> +#define SEL_KERN_DATA_64       0x10
> +#define SEL_USER_CODE_64       0x18
> +#define SEL_USER_DATA_64       0x20
> +#define SEL_CODE_32            0x28
> +#define SEL_DATA_32            0x30
> +#define SEL_CODE_16            0x38
> +#define SEL_DATA_16            0x40
> +#define SEL_TSS_RUN            0x48
> +#endif
> +
>  void setup_vm();
>
>  void *vmalloc(unsigned long size);
> diff --git a/x86/cstart64.S b/x86/cstart64.S
> index 24df5f8..0fe76da 100644
> --- a/x86/cstart64.S
> +++ b/x86/cstart64.S
> @@ -4,6 +4,10 @@
>  .globl boot_idt
>  boot_idt = 0
>
> +.globl idt_descr
> +.globl tss_descr
> +.globl gdt64_desc
> +
>  ipi_vector = 0x20
>
>  max_cpus = 64
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index bc9643e..85c36aa 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
>  file = pcid.flat
>  extra_params = -cpu qemu64,+pcid
>  arch = x86_64
> +
> +[vmx]
> +file = vmx.flat
> +extra_params = -cpu host,+vmx
> +arch = x86_64
> +
> diff --git a/x86/vmx.c b/x86/vmx.c
> new file mode 100644
> index 0000000..7467927
> --- /dev/null
> +++ b/x86/vmx.c
> @@ -0,0 +1,674 @@
> +#include "libcflat.h"
> +#include "processor.h"
> +#include "vm.h"
> +#include "desc.h"
> +#include "vmx.h"
> +#include "msr.h"
> +#include "smp.h"
> +#include "io.h"
> +
> +int fails = 0, tests = 0;
> +u32 *vmxon_region;
> +struct vmcs *vmcs_root;
> +u32 vpid_cnt;
> +void *guest_stack, *guest_syscall_stack;
> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
> +ulong fix_cr0_set, fix_cr0_clr;
> +ulong fix_cr4_set, fix_cr4_clr;
> +struct regs regs;
> +struct vmx_test *current;
> +u64 hypercall_field = 0;
> +bool launched;
> +
> +extern u64 gdt64_desc[];
> +extern u64 idt_descr[];
> +extern u64 tss_descr[];
> +extern void *vmx_return;
> +extern void *entry_sysenter;
> +extern void *guest_entry;
> +
> +static void report(const char *name, int result)
> +{
> +       ++tests;
> +       if (result)
> +               printf("PASS: %s\n", name);
> +       else {
> +               printf("FAIL: %s\n", name);
> +               ++fails;
> +       }
> +}
> +
> +static int make_vmcs_current(struct vmcs *vmcs)
> +{
> +       bool ret;
> +
> +       asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
> +       return ret;
> +}
> +
> +/* entry_sysenter */
> +asm(
> +       ".align 4, 0x90\n\t"
> +       ".globl entry_sysenter\n\t"
> +       "entry_sysenter:\n\t"
> +       SAVE_GPR
> +       "       and     $0xf, %rax\n\t"
> +       "       mov     %rax, %rdi\n\t"
> +       "       call    syscall_handler\n\t"
> +       LOAD_GPR
> +       "       vmresume\n\t"
> +);
> +
> +static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
> +{
> +       current->syscall_handler(syscall_no);
> +}
> +
> +static inline int vmx_on()
> +{
> +       bool ret;
> +       asm volatile ("vmxon %1; setbe %0\n\t"
> +               : "=q"(ret) : "m"(vmxon_region) : "cc");
> +       return ret;
> +}
> +
> +static inline int vmx_off()
> +{
> +       bool ret;
> +       asm volatile("vmxoff; setbe %0\n\t"
> +               : "=q"(ret) : : "cc");
> +       return ret;
> +}
> +
> +static void print_vmexit_info()
> +{
> +       u64 guest_rip, guest_rsp;
> +       ulong reason = vmcs_read(EXI_REASON) & 0xff;
> +       ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
> +       guest_rip = vmcs_read(GUEST_RIP);
> +       guest_rsp = vmcs_read(GUEST_RSP);
> +       printf("VMEXIT info:\n");
> +       printf("\tvmexit reason = %d\n", reason);
> +       printf("\texit qualification = 0x%x\n", exit_qual);
> +       printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
> +       printf("\tguest_rip = 0x%llx\n", guest_rip);
> +       printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
> +               regs.rax, regs.rbx, regs.rcx, regs.rdx);
> +       printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
> +               guest_rsp, regs.rbp, regs.rsi, regs.rdi);
> +       printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
> +               regs.r8, regs.r9, regs.r10, regs.r11);
> +       printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
> +               regs.r12, regs.r13, regs.r14, regs.r15);
> +}
> +
> +static void test_vmclear(void)
> +{
> +       u64 rflags;
> +
> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       write_rflags(rflags);
> +       report("test vmclear", vmcs_clear(vmcs_root) == 0);
> +}
> +
> +static void test_vmxoff(void)
> +{
> +       int ret;
> +       u64 rflags;
> +
> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       write_rflags(rflags);
> +       ret = vmx_off();
> +       report("test vmxoff", !ret);
> +}
> +
> +static void __attribute__((__used__)) guest_main(void)
> +{
> +       current->guest_main();
> +}
> +
> +/* guest_entry */
> +asm(
> +       ".align 4, 0x90\n\t"
> +       ".globl entry_guest\n\t"
> +       "guest_entry:\n\t"
> +       "       call guest_main\n\t"
> +       "       mov $1, %edi\n\t"
> +       "       call hypercall\n\t"
> +);
> +
> +static void init_vmcs_ctrl(void)
> +{
> +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> +       /* 26.2.1.1 */
> +       vmcs_write(PIN_CONTROLS, ctrl_pin);
> +       /* Disable VMEXIT of IO instruction */
> +       vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
> +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
> +               ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
> +               vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
> +       }
> +       vmcs_write(CR3_TARGET_COUNT, 0);
> +       vmcs_write(VPID, ++vpid_cnt);
> +}
> +
> +static void init_vmcs_host(void)
> +{
> +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> +       /* 26.2.1.2 */
> +       vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
> +
> +       /* 26.2.1.3 */
> +       vmcs_write(ENT_CONTROLS, ctrl_enter);
> +       vmcs_write(EXI_CONTROLS, ctrl_exit);
> +
> +       /* 26.2.2 */
> +       vmcs_write(HOST_CR0, read_cr0());
> +       vmcs_write(HOST_CR3, read_cr3());
> +       vmcs_write(HOST_CR4, read_cr4());
> +       vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
> +       vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
> +
> +       /* 26.2.3 */
> +       vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
> +       vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
> +       vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
> +       vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
> +       vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
> +       vmcs_write(HOST_BASE_FS, 0);
> +       vmcs_write(HOST_BASE_GS, 0);
> +
> +       /* Set other vmcs area */
> +       vmcs_write(PF_ERROR_MASK, 0);
> +       vmcs_write(PF_ERROR_MATCH, 0);
> +       vmcs_write(VMCS_LINK_PTR, ~0ul);
> +       vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
> +       vmcs_write(HOST_RIP, (u64)(&vmx_return));
> +}
> +
> +static void init_vmcs_guest(void)
> +{
> +       /* 26.3 CHECKING AND LOADING GUEST STATE */
> +       ulong guest_cr0, guest_cr4, guest_cr3;
> +       /* 26.3.1.1 */
> +       guest_cr0 = read_cr0();
> +       guest_cr4 = read_cr4();
> +       guest_cr3 = read_cr3();
> +       if (ctrl_enter & ENT_GUEST_64) {
> +               guest_cr0 |= X86_CR0_PG;
> +               guest_cr4 |= X86_CR4_PAE;
> +       }
> +       if ((ctrl_enter & ENT_GUEST_64) == 0)
> +               guest_cr4 &= (~X86_CR4_PCIDE);
> +       if (guest_cr0 & X86_CR0_PG)
> +               guest_cr0 |= X86_CR0_PE;
> +       vmcs_write(GUEST_CR0, guest_cr0);
> +       vmcs_write(GUEST_CR3, guest_cr3);
> +       vmcs_write(GUEST_CR4, guest_cr4);
> +       vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
> +       vmcs_write(GUEST_SYSENTER_ESP,
> +               (u64)(guest_syscall_stack + PAGE_SIZE - 1));
> +       vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
> +       vmcs_write(GUEST_DR7, 0);
> +       vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
> +
> +       /* 26.3.1.2 */
> +       vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
> +       vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
> +       vmcs_write(GUEST_SEL_LDTR, 0);
> +
> +       vmcs_write(GUEST_BASE_CS, 0);
> +       vmcs_write(GUEST_BASE_ES, 0);
> +       vmcs_write(GUEST_BASE_SS, 0);
> +       vmcs_write(GUEST_BASE_DS, 0);
> +       vmcs_write(GUEST_BASE_FS, 0);
> +       vmcs_write(GUEST_BASE_GS, 0);
> +       vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
> +       vmcs_write(GUEST_BASE_LDTR, 0);
> +
> +       vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
> +       vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
> +
> +       vmcs_write(GUEST_AR_CS, 0xa09b);
> +       vmcs_write(GUEST_AR_DS, 0xc093);
> +       vmcs_write(GUEST_AR_ES, 0xc093);
> +       vmcs_write(GUEST_AR_FS, 0xc093);
> +       vmcs_write(GUEST_AR_GS, 0xc093);
> +       vmcs_write(GUEST_AR_SS, 0xc093);
> +       vmcs_write(GUEST_AR_LDTR, 0x82);
> +       vmcs_write(GUEST_AR_TR, 0x8b);
> +
> +       /* 26.3.1.3 */
> +       vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
> +       vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
> +       vmcs_write(GUEST_LIMIT_GDTR,
> +               ((struct descr *)gdt64_desc)->limit & 0xffff);
> +       vmcs_write(GUEST_LIMIT_IDTR,
> +               ((struct descr *)idt_descr)->limit & 0xffff);
> +
> +       /* 26.3.1.4 */
> +       vmcs_write(GUEST_RIP, (u64)(&guest_entry));
> +       vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
> +       vmcs_write(GUEST_RFLAGS, 0x2);
> +
> +       /* 26.3.1.5 */
> +       vmcs_write(GUEST_ACTV_STATE, 0);
> +       vmcs_write(GUEST_INTR_STATE, 0);
> +}
> +
> +static int init_vmcs(struct vmcs **vmcs)
> +{
> +       *vmcs = alloc_page();
> +       memset(*vmcs, 0, PAGE_SIZE);
> +       (*vmcs)->revision_id = basic.revision;
> +       /* vmclear first to init vmcs */
> +       if (vmcs_clear(*vmcs)) {
> +               printf("%s : vmcs_clear error\n", __func__);
> +               return 1;
> +       }
> +
> +       if (make_vmcs_current(*vmcs)) {
> +               printf("%s : make_vmcs_current error\n", __func__);
> +               return 1;
> +       }
> +
> +       /* All settings to pin/exit/enter/cpu
> +          control fields should be placed here */
> +       ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
> +       ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
> +       ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
> +       ctrl_cpu[0] |= CPU_HLT;
> +       /* DIsable IO instruction VMEXIT now */
> +       ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
> +       ctrl_cpu[1] = 0;
> +
> +       ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
> +       ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
> +       ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
> +       ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
> +
> +       init_vmcs_ctrl();
> +       init_vmcs_host();
> +       init_vmcs_guest();
> +       return 0;
> +}
> +
> +static void init_vmx(void)
> +{
> +       vmxon_region = alloc_page();
> +       memset(vmxon_region, 0, PAGE_SIZE);
> +
> +       fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
> +       fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
> +       fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
> +       fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
> +       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> +       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
> +                       : MSR_IA32_VMX_PINBASED_CTLS);
> +       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
> +                       : MSR_IA32_VMX_EXIT_CTLS);
> +       ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
> +                       : MSR_IA32_VMX_ENTRY_CTLS);
> +       ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
> +                       : MSR_IA32_VMX_PROCBASED_CTLS);
> +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
> +               ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> +       if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
> +               ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
> +
> +       write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
> +       write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
> +
> +       *vmxon_region = basic.revision;
> +
> +       guest_stack = alloc_page();
> +       memset(guest_stack, 0, PAGE_SIZE);
> +       guest_syscall_stack = alloc_page();
> +       memset(guest_syscall_stack, 0, PAGE_SIZE);
> +}
> +
> +static int test_vmx_capability(void)
> +{
> +       struct cpuid r;
> +       u64 ret1, ret2;
> +       u64 ia32_feature_control;
> +       r = cpuid(1);
> +       ret1 = ((r.c) >> 5) & 1;
> +       ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
> +       ret2 = ((ia32_feature_control & 0x5) == 0x5);
> +       if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) {
> +               wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
> +               ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
> +               ret2 = ((ia32_feature_control & 0x5) == 0x5);
> +       }
> +       report("test vmx capability", ret1 & ret2);
> +       return !(ret1 & ret2);
> +}
> +
> +static int test_vmxon(void)
> +{
> +       int ret;
> +       u64 rflags;
> +
> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       write_rflags(rflags);
> +       ret = vmx_on();
> +       report("test vmxon", !ret);
> +       return ret;
> +}
> +
> +static void test_vmptrld(void)
> +{
> +       u64 rflags;
> +       struct vmcs *vmcs;
> +
> +       vmcs = alloc_page();
> +       vmcs->revision_id = basic.revision;
> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       write_rflags(rflags);
> +       report("test vmptrld", make_vmcs_current(vmcs) == 0);
> +}
> +
> +static void test_vmptrst(void)
> +{
> +       u64 rflags;
> +       int ret;
> +       struct vmcs *vmcs1, *vmcs2;
> +
> +       vmcs1 = alloc_page();
> +       memset(vmcs1, 0, PAGE_SIZE);
> +       init_vmcs(&vmcs1);
> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       write_rflags(rflags);
> +       ret = vmcs_save(&vmcs2);
> +       report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
> +}
> +
> +/* This function can only be called in guest */
> +static void __attribute__((__used__)) hypercall(u32 hypercall_no)
> +{
> +       u64 val = 0;
> +       val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
> +       hypercall_field = val;
> +       asm volatile("vmcall\n\t");
> +}
> +
> +static bool is_hypercall()
> +{
> +       ulong reason, hyper_bit;
> +
> +       reason = vmcs_read(EXI_REASON) & 0xff;
> +       hyper_bit = hypercall_field & HYPERCALL_BIT;
> +       if (reason == VMX_VMCALL && hyper_bit)
> +               return true;
> +       return false;
> +}
> +
> +static int handle_hypercall()
> +{
> +       ulong hypercall_no;
> +
> +       hypercall_no = hypercall_field & HYPERCALL_MASK;
> +       hypercall_field = 0;
> +       switch (hypercall_no) {
> +       case HYPERCALL_VMEXIT:
> +               return VMX_TEST_VMEXIT;
> +       default:
> +               printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
> +       }
> +       return VMX_TEST_EXIT;
> +}
> +
> +static int exit_handler()
> +{
> +       int ret;
> +
> +       current->exits++;
> +       current->guest_regs = regs;
> +       if (is_hypercall())
> +               ret = handle_hypercall();
> +       else
> +               ret = current->exit_handler();
> +       regs = current->guest_regs;
> +       switch (ret) {
> +       case VMX_TEST_VMEXIT:
> +       case VMX_TEST_RESUME:
> +               return ret;
> +       case VMX_TEST_EXIT:
> +               break;
> +       default:
> +               printf("ERROR : Invalid exit_handler return val %d.\n"
> +                       , ret);
> +       }
> +       print_vmexit_info();
> +       exit(-1);
> +       return 0;
> +}
> +
> +static int vmx_run()
> +{
> +       u32 ret = 0, fail = 0;
> +
> +       while (1) {
> +               asm volatile (
> +                       "mov %%rsp, %%rsi\n\t"
> +                       "mov %2, %%rdi\n\t"
> +                       "vmwrite %%rsi, %%rdi\n\t"
> +
> +                       LOAD_GPR_C
> +                       "cmpl $0, %1\n\t"
> +                       "jne 1f\n\t"
> +                       LOAD_RFLAGS
> +                       "vmlaunch\n\t"
> +                       "jmp 2f\n\t"
> +                       "1: "
> +                       "vmresume\n\t"
> +                       "2: "
> +                       "setbe %0\n\t"
> +                       "vmx_return:\n\t"
> +                       SAVE_GPR_C
> +                       SAVE_RFLAGS
> +                       : "=m"(fail)
> +                       : "m"(launched), "i"(HOST_RSP)
> +                       : "rdi", "rsi", "memory", "cc"
> +
> +               );
> +               if (fail)
> +                       ret = launched ? VMX_TEST_RESUME_ERR :
> +                               VMX_TEST_LAUNCH_ERR;
> +               else {
> +                       launched = 1;
> +                       ret = exit_handler();
> +               }
> +               if (ret != VMX_TEST_RESUME)
> +                       break;
> +       }
> +       launched = 0;
> +       switch (ret) {
> +       case VMX_TEST_VMEXIT:
> +               return 0;
> +       case VMX_TEST_LAUNCH_ERR:
> +               printf("%s : vmlaunch failed.\n", __func__);
> +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
> +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
> +                       printf("\tvmlaunch set wrong flags\n");
> +               report("test vmlaunch", 0);
> +               break;
> +       case VMX_TEST_RESUME_ERR:
> +               printf("%s : vmresume failed.\n", __func__);
> +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
> +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
> +                       printf("\tvmresume set wrong flags\n");
> +               report("test vmresume", 0);
> +               break;
> +       default:
> +               printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
> +               break;
> +       }
> +       return 1;
> +}
> +
> +static int test_run(struct vmx_test *test)
> +{
> +       if (test->name == NULL)
> +               test->name = "(no name)";
> +       if (vmx_on()) {
> +               printf("%s : vmxon failed.\n", __func__);
> +               return 1;
> +       }
> +       init_vmcs(&(test->vmcs));
> +       /* Directly call test->init is ok here, init_vmcs has done
> +          vmcs init, vmclear and vmptrld*/
> +       if (test->init)
> +               test->init(test->vmcs);
> +       test->exits = 0;
> +       current = test;
> +       regs = test->guest_regs;
> +       vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
> +       launched = 0;
> +       printf("\nTest suite : %s\n", test->name);
> +       vmx_run();
> +       if (vmx_off()) {
> +               printf("%s : vmxoff failed.\n", __func__);
> +               return 1;
> +       }
> +       return 0;
> +}
> +
> +static void basic_init()
> +{
> +}
> +
> +static void basic_guest_main()
> +{
> +       /* Here is null guest_main, print Hello World */
> +       printf("\tHello World, this is null_guest_main!\n");
> +}
> +
> +static int basic_exit_handler()
> +{
> +       u64 guest_rip;
> +       ulong reason;
> +
> +       guest_rip = vmcs_read(GUEST_RIP);
> +       reason = vmcs_read(EXI_REASON) & 0xff;
> +
> +       switch (reason) {
> +       case VMX_VMCALL:
> +               print_vmexit_info();
> +               vmcs_write(GUEST_RIP, guest_rip + 3);
> +               return VMX_TEST_RESUME;
> +       default:
> +               break;
> +       }
> +       printf("ERROR : Unhandled vmx exit.\n");
> +       print_vmexit_info();
> +       return VMX_TEST_EXIT;
> +}
> +
> +static void basic_syscall_handler(u64 syscall_no)
> +{
> +}
> +
> +static void vmenter_main()
> +{
> +       u64 rax;
> +       u64 rsp, resume_rsp;
> +
> +       report("test vmlaunch", 1);
> +
> +       asm volatile(
> +               "mov %%rsp, %0\n\t"
> +               "mov %3, %%rax\n\t"
> +               "vmcall\n\t"
> +               "mov %%rax, %1\n\t"
> +               "mov %%rsp, %2\n\t"
> +               : "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
> +               : "g"(0xABCD));
> +       report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
> +}
> +
> +static int vmenter_exit_handler()
> +{
> +       u64 guest_rip;
> +       ulong reason;
> +
> +       guest_rip = vmcs_read(GUEST_RIP);
> +       reason = vmcs_read(EXI_REASON) & 0xff;
> +       switch (reason) {
> +       case VMX_VMCALL:
> +               if (current->guest_regs.rax != 0xABCD) {
> +                       report("test vmresume", 0);
> +                       return VMX_TEST_VMEXIT;
> +               }
> +               current->guest_regs.rax = 0xFFFF;
> +               vmcs_write(GUEST_RIP, guest_rip + 3);
> +               return VMX_TEST_RESUME;
> +       default:
> +               report("test vmresume", 0);
> +               print_vmexit_info();
> +       }
> +       return VMX_TEST_VMEXIT;
> +}
> +
> +
> +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs
> +   basic_* just implement some basic functions */
> +static struct vmx_test vmx_tests[] = {
> +       { "null", basic_init, basic_guest_main, basic_exit_handler,
> +               basic_syscall_handler, {0} },
> +       { "vmenter", basic_init, vmenter_main, vmenter_exit_handler,
> +               basic_syscall_handler, {0} },
> +};
> +
> +int main(void)
> +{
> +       int i;
> +
> +       setup_vm();
> +       setup_idt();
> +
> +       if (test_vmx_capability() != 0) {
> +               printf("ERROR : vmx not supported, check +vmx option\n");
> +               goto exit;
> +       }
> +       init_vmx();
> +       /* Set basic test ctxt the same as "null" */
> +       current = &vmx_tests[0];
> +       if (test_vmxon() != 0)
> +               goto exit;
> +       test_vmptrld();
> +       test_vmclear();
> +       test_vmptrst();
> +       init_vmcs(&vmcs_root);
> +       if (vmx_run()) {
> +               report("test vmlaunch", 0);
> +               goto exit;
> +       }
> +       test_vmxoff();
> +
> +       for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) {
> +               if (test_run(&vmx_tests[i]))
> +                       goto exit;
> +       }
> +
> +exit:
> +       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> +       return fails ? 1 : 0;
> +}
> diff --git a/x86/vmx.h b/x86/vmx.h
> new file mode 100644
> index 0000000..1fb9738
> --- /dev/null
> +++ b/x86/vmx.h
> @@ -0,0 +1,466 @@
> +#ifndef __HYPERVISOR_H
> +#define __HYPERVISOR_H
> +
> +#include "libcflat.h"
> +
> +struct vmcs {
> +       u32 revision_id; /* vmcs revision identifier */
> +       u32 abort; /* VMX-abort indicator */
> +       /* VMCS data */
> +       char data[0];
> +};
> +
> +struct regs {
> +       u64 rax;
> +       u64 rcx;
> +       u64 rdx;
> +       u64 rbx;
> +       u64 cr2;
> +       u64 rbp;
> +       u64 rsi;
> +       u64 rdi;
> +       u64 r8;
> +       u64 r9;
> +       u64 r10;
> +       u64 r11;
> +       u64 r12;
> +       u64 r13;
> +       u64 r14;
> +       u64 r15;
> +       u64 rflags;
> +};
> +
> +struct vmx_test {
> +       const char *name;
> +       void (*init)(struct vmcs *vmcs);
> +       void (*guest_main)();
> +       int (*exit_handler)();
> +       void (*syscall_handler)(u64 syscall_no);
> +       struct regs guest_regs;
> +       struct vmcs *vmcs;
> +       int exits;
> +};
> +
> +static union vmx_basic {
> +       u64 val;
> +       struct {
> +               u32 revision;
> +               u32     size:13,
> +                       : 3,
> +                       width:1,
> +                       dual:1,
> +                       type:4,
> +                       insouts:1,
> +                       ctrl:1;
> +       };
> +} basic;
> +
> +static union vmx_ctrl_pin {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_pin_rev;
> +
> +static union vmx_ctrl_cpu {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_cpu_rev[2];
> +
> +static union vmx_ctrl_exit {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_exit_rev;
> +
> +static union vmx_ctrl_ent {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_enter_rev;
> +
> +static union vmx_ept_vpid {
> +       u64 val;
> +       struct {
> +               u32:16,
> +                       super:2,
> +                       : 2,
> +                       invept:1,
> +                       : 11;
> +               u32     invvpid:1;
> +       };
> +} ept_vpid;
> +
> +struct descr {
> +       u16 limit;
> +       u64 addr;
> +};
> +
> +enum Encoding {
> +       /* 16-Bit Control Fields */
> +       VPID                    = 0x0000ul,
> +       /* Posted-interrupt notification vector */
> +       PINV                    = 0x0002ul,
> +       /* EPTP index */
> +       EPTP_IDX                = 0x0004ul,
> +
> +       /* 16-Bit Guest State Fields */
> +       GUEST_SEL_ES            = 0x0800ul,
> +       GUEST_SEL_CS            = 0x0802ul,
> +       GUEST_SEL_SS            = 0x0804ul,
> +       GUEST_SEL_DS            = 0x0806ul,
> +       GUEST_SEL_FS            = 0x0808ul,
> +       GUEST_SEL_GS            = 0x080aul,
> +       GUEST_SEL_LDTR          = 0x080cul,
> +       GUEST_SEL_TR            = 0x080eul,
> +       GUEST_INT_STATUS        = 0x0810ul,
> +
> +       /* 16-Bit Host State Fields */
> +       HOST_SEL_ES             = 0x0c00ul,
> +       HOST_SEL_CS             = 0x0c02ul,
> +       HOST_SEL_SS             = 0x0c04ul,
> +       HOST_SEL_DS             = 0x0c06ul,
> +       HOST_SEL_FS             = 0x0c08ul,
> +       HOST_SEL_GS             = 0x0c0aul,
> +       HOST_SEL_TR             = 0x0c0cul,
> +
> +       /* 64-Bit Control Fields */
> +       IO_BITMAP_A             = 0x2000ul,
> +       IO_BITMAP_B             = 0x2002ul,
> +       MSR_BITMAP              = 0x2004ul,
> +       EXIT_MSR_ST_ADDR        = 0x2006ul,
> +       EXIT_MSR_LD_ADDR        = 0x2008ul,
> +       ENTER_MSR_LD_ADDR       = 0x200aul,
> +       VMCS_EXEC_PTR           = 0x200cul,
> +       TSC_OFFSET              = 0x2010ul,
> +       TSC_OFFSET_HI           = 0x2011ul,
> +       APIC_VIRT_ADDR          = 0x2012ul,
> +       APIC_ACCS_ADDR          = 0x2014ul,
> +       EPTP                    = 0x201aul,
> +       EPTP_HI                 = 0x201bul,
> +
> +       /* 64-Bit Readonly Data Field */
> +       INFO_PHYS_ADDR          = 0x2400ul,
> +
> +       /* 64-Bit Guest State */
> +       VMCS_LINK_PTR           = 0x2800ul,
> +       VMCS_LINK_PTR_HI        = 0x2801ul,
> +       GUEST_DEBUGCTL          = 0x2802ul,
> +       GUEST_DEBUGCTL_HI       = 0x2803ul,
> +       GUEST_EFER              = 0x2806ul,
> +       GUEST_PERF_GLOBAL_CTRL  = 0x2808ul,
> +       GUEST_PDPTE             = 0x280aul,
> +
> +       /* 64-Bit Host State */
> +       HOST_EFER               = 0x2c02ul,
> +       HOST_PERF_GLOBAL_CTRL   = 0x2c04ul,
> +
> +       /* 32-Bit Control Fields */
> +       PIN_CONTROLS            = 0x4000ul,
> +       CPU_EXEC_CTRL0          = 0x4002ul,
> +       EXC_BITMAP              = 0x4004ul,
> +       PF_ERROR_MASK           = 0x4006ul,
> +       PF_ERROR_MATCH          = 0x4008ul,
> +       CR3_TARGET_COUNT        = 0x400aul,
> +       EXI_CONTROLS            = 0x400cul,
> +       EXI_MSR_ST_CNT          = 0x400eul,
> +       EXI_MSR_LD_CNT          = 0x4010ul,
> +       ENT_CONTROLS            = 0x4012ul,
> +       ENT_MSR_LD_CNT          = 0x4014ul,
> +       ENT_INTR_INFO           = 0x4016ul,
> +       ENT_INTR_ERROR          = 0x4018ul,
> +       ENT_INST_LEN            = 0x401aul,
> +       TPR_THRESHOLD           = 0x401cul,
> +       CPU_EXEC_CTRL1          = 0x401eul,
> +
> +       /* 32-Bit R/O Data Fields */
> +       VMX_INST_ERROR          = 0x4400ul,
> +       EXI_REASON              = 0x4402ul,
> +       EXI_INTR_INFO           = 0x4404ul,
> +       EXI_INTR_ERROR          = 0x4406ul,
> +       IDT_VECT_INFO           = 0x4408ul,
> +       IDT_VECT_ERROR          = 0x440aul,
> +       EXI_INST_LEN            = 0x440cul,
> +       EXI_INST_INFO           = 0x440eul,
> +
> +       /* 32-Bit Guest State Fields */
> +       GUEST_LIMIT_ES          = 0x4800ul,
> +       GUEST_LIMIT_CS          = 0x4802ul,
> +       GUEST_LIMIT_SS          = 0x4804ul,
> +       GUEST_LIMIT_DS          = 0x4806ul,
> +       GUEST_LIMIT_FS          = 0x4808ul,
> +       GUEST_LIMIT_GS          = 0x480aul,
> +       GUEST_LIMIT_LDTR        = 0x480cul,
> +       GUEST_LIMIT_TR          = 0x480eul,
> +       GUEST_LIMIT_GDTR        = 0x4810ul,
> +       GUEST_LIMIT_IDTR        = 0x4812ul,
> +       GUEST_AR_ES             = 0x4814ul,
> +       GUEST_AR_CS             = 0x4816ul,
> +       GUEST_AR_SS             = 0x4818ul,
> +       GUEST_AR_DS             = 0x481aul,
> +       GUEST_AR_FS             = 0x481cul,
> +       GUEST_AR_GS             = 0x481eul,
> +       GUEST_AR_LDTR           = 0x4820ul,
> +       GUEST_AR_TR             = 0x4822ul,
> +       GUEST_INTR_STATE        = 0x4824ul,
> +       GUEST_ACTV_STATE        = 0x4826ul,
> +       GUEST_SMBASE            = 0x4828ul,
> +       GUEST_SYSENTER_CS       = 0x482aul,
> +
> +       /* 32-Bit Host State Fields */
> +       HOST_SYSENTER_CS        = 0x4c00ul,
> +
> +       /* Natural-Width Control Fields */
> +       CR0_MASK                = 0x6000ul,
> +       CR4_MASK                = 0x6002ul,
> +       CR0_READ_SHADOW = 0x6004ul,
> +       CR4_READ_SHADOW = 0x6006ul,
> +       CR3_TARGET_0            = 0x6008ul,
> +       CR3_TARGET_1            = 0x600aul,
> +       CR3_TARGET_2            = 0x600cul,
> +       CR3_TARGET_3            = 0x600eul,
> +
> +       /* Natural-Width R/O Data Fields */
> +       EXI_QUALIFICATION       = 0x6400ul,
> +       IO_RCX                  = 0x6402ul,
> +       IO_RSI                  = 0x6404ul,
> +       IO_RDI                  = 0x6406ul,
> +       IO_RIP                  = 0x6408ul,
> +       GUEST_LINEAR_ADDRESS    = 0x640aul,
> +
> +       /* Natural-Width Guest State Fields */
> +       GUEST_CR0               = 0x6800ul,
> +       GUEST_CR3               = 0x6802ul,
> +       GUEST_CR4               = 0x6804ul,
> +       GUEST_BASE_ES           = 0x6806ul,
> +       GUEST_BASE_CS           = 0x6808ul,
> +       GUEST_BASE_SS           = 0x680aul,
> +       GUEST_BASE_DS           = 0x680cul,
> +       GUEST_BASE_FS           = 0x680eul,
> +       GUEST_BASE_GS           = 0x6810ul,
> +       GUEST_BASE_LDTR         = 0x6812ul,
> +       GUEST_BASE_TR           = 0x6814ul,
> +       GUEST_BASE_GDTR         = 0x6816ul,
> +       GUEST_BASE_IDTR         = 0x6818ul,
> +       GUEST_DR7               = 0x681aul,
> +       GUEST_RSP               = 0x681cul,
> +       GUEST_RIP               = 0x681eul,
> +       GUEST_RFLAGS            = 0x6820ul,
> +       GUEST_PENDING_DEBUG     = 0x6822ul,
> +       GUEST_SYSENTER_ESP      = 0x6824ul,
> +       GUEST_SYSENTER_EIP      = 0x6826ul,
> +
> +       /* Natural-Width Host State Fields */
> +       HOST_CR0                = 0x6c00ul,
> +       HOST_CR3                = 0x6c02ul,
> +       HOST_CR4                = 0x6c04ul,
> +       HOST_BASE_FS            = 0x6c06ul,
> +       HOST_BASE_GS            = 0x6c08ul,
> +       HOST_BASE_TR            = 0x6c0aul,
> +       HOST_BASE_GDTR          = 0x6c0cul,
> +       HOST_BASE_IDTR          = 0x6c0eul,
> +       HOST_SYSENTER_ESP       = 0x6c10ul,
> +       HOST_SYSENTER_EIP       = 0x6c12ul,
> +       HOST_RSP                = 0x6c14ul,
> +       HOST_RIP                = 0x6c16ul
> +};
> +
> +enum Reason {
> +       VMX_EXC_NMI             = 0,
> +       VMX_EXTINT              = 1,
> +       VMX_TRIPLE_FAULT        = 2,
> +       VMX_INIT                = 3,
> +       VMX_SIPI                = 4,
> +       VMX_SMI_IO              = 5,
> +       VMX_SMI_OTHER           = 6,
> +       VMX_INTR_WINDOW         = 7,
> +       VMX_NMI_WINDOW          = 8,
> +       VMX_TASK_SWITCH         = 9,
> +       VMX_CPUID               = 10,
> +       VMX_GETSEC              = 11,
> +       VMX_HLT                 = 12,
> +       VMX_INVD                = 13,
> +       VMX_INVLPG              = 14,
> +       VMX_RDPMC               = 15,
> +       VMX_RDTSC               = 16,
> +       VMX_RSM                 = 17,
> +       VMX_VMCALL              = 18,
> +       VMX_VMCLEAR             = 19,
> +       VMX_VMLAUNCH            = 20,
> +       VMX_VMPTRLD             = 21,
> +       VMX_VMPTRST             = 22,
> +       VMX_VMREAD              = 23,
> +       VMX_VMRESUME            = 24,
> +       VMX_VMWRITE             = 25,
> +       VMX_VMXOFF              = 26,
> +       VMX_VMXON               = 27,
> +       VMX_CR                  = 28,
> +       VMX_DR                  = 29,
> +       VMX_IO                  = 30,
> +       VMX_RDMSR               = 31,
> +       VMX_WRMSR               = 32,
> +       VMX_FAIL_STATE          = 33,
> +       VMX_FAIL_MSR            = 34,
> +       VMX_MWAIT               = 36,
> +       VMX_MTF                 = 37,
> +       VMX_MONITOR             = 39,
> +       VMX_PAUSE               = 40,
> +       VMX_FAIL_MCHECK         = 41,
> +       VMX_TPR_THRESHOLD       = 43,
> +       VMX_APIC_ACCESS         = 44,
> +       VMX_GDTR_IDTR           = 46,
> +       VMX_LDTR_TR             = 47,
> +       VMX_EPT_VIOLATION       = 48,
> +       VMX_EPT_MISCONFIG       = 49,
> +       VMX_INVEPT              = 50,
> +       VMX_PREEMPT             = 52,
> +       VMX_INVVPID             = 53,
> +       VMX_WBINVD              = 54,
> +       VMX_XSETBV              = 55
> +};
> +
> +#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
> +#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
> +
> +enum Ctrl_exi {
> +       EXI_HOST_64             = 1UL << 9,
> +       EXI_LOAD_PERF           = 1UL << 12,
> +       EXI_INTA                = 1UL << 15,
> +       EXI_LOAD_EFER           = 1UL << 21,
> +};
> +
> +enum Ctrl_ent {
> +       ENT_GUEST_64            = 1UL << 9,
> +       ENT_LOAD_EFER           = 1UL << 15,
> +};
> +
> +enum Ctrl_pin {
> +       PIN_EXTINT              = 1ul << 0,
> +       PIN_NMI                 = 1ul << 3,
> +       PIN_VIRT_NMI            = 1ul << 5,
> +};
> +
> +enum Ctrl0 {
> +       CPU_INTR_WINDOW         = 1ul << 2,
> +       CPU_HLT                 = 1ul << 7,
> +       CPU_INVLPG              = 1ul << 9,
> +       CPU_CR3_LOAD            = 1ul << 15,
> +       CPU_CR3_STORE           = 1ul << 16,
> +       CPU_TPR_SHADOW          = 1ul << 21,
> +       CPU_NMI_WINDOW          = 1ul << 22,
> +       CPU_IO                  = 1ul << 24,
> +       CPU_IO_BITMAP           = 1ul << 25,
> +       CPU_SECONDARY           = 1ul << 31,
> +};
> +
> +enum Ctrl1 {
> +       CPU_EPT                 = 1ul << 1,
> +       CPU_VPID                = 1ul << 5,
> +       CPU_URG                 = 1ul << 7,
> +};
> +
> +#define SAVE_GPR                               \
> +       "xchg %rax, regs\n\t"                   \
> +       "xchg %rbx, regs+0x8\n\t"               \
> +       "xchg %rcx, regs+0x10\n\t"              \
> +       "xchg %rdx, regs+0x18\n\t"              \
> +       "xchg %rbp, regs+0x28\n\t"              \
> +       "xchg %rsi, regs+0x30\n\t"              \
> +       "xchg %rdi, regs+0x38\n\t"              \
> +       "xchg %r8, regs+0x40\n\t"               \
> +       "xchg %r9, regs+0x48\n\t"               \
> +       "xchg %r10, regs+0x50\n\t"              \
> +       "xchg %r11, regs+0x58\n\t"              \
> +       "xchg %r12, regs+0x60\n\t"              \
> +       "xchg %r13, regs+0x68\n\t"              \
> +       "xchg %r14, regs+0x70\n\t"              \
> +       "xchg %r15, regs+0x78\n\t"
> +
> +#define LOAD_GPR       SAVE_GPR
> +
> +#define SAVE_GPR_C                             \
> +       "xchg %%rax, regs\n\t"                  \
> +       "xchg %%rbx, regs+0x8\n\t"              \
> +       "xchg %%rcx, regs+0x10\n\t"             \
> +       "xchg %%rdx, regs+0x18\n\t"             \
> +       "xchg %%rbp, regs+0x28\n\t"             \
> +       "xchg %%rsi, regs+0x30\n\t"             \
> +       "xchg %%rdi, regs+0x38\n\t"             \
> +       "xchg %%r8, regs+0x40\n\t"              \
> +       "xchg %%r9, regs+0x48\n\t"              \
> +       "xchg %%r10, regs+0x50\n\t"             \
> +       "xchg %%r11, regs+0x58\n\t"             \
> +       "xchg %%r12, regs+0x60\n\t"             \
> +       "xchg %%r13, regs+0x68\n\t"             \
> +       "xchg %%r14, regs+0x70\n\t"             \
> +       "xchg %%r15, regs+0x78\n\t"
> +
> +#define LOAD_GPR_C     SAVE_GPR_C
> +
> +#define SAVE_RFLAGS            \
> +       "pushf\n\t"                     \
> +       "pop regs+0x80\n\t"
> +
> +#define LOAD_RFLAGS            \
> +       "push regs+0x80\n\t"    \
> +       "popf\n\t"
> +
> +#define VMX_IO_SIZE_MASK               0x7
> +#define _VMX_IO_BYTE                   1
> +#define _VMX_IO_WORD                   2
> +#define _VMX_IO_LONG                   3
> +#define VMX_IO_DIRECTION_MASK          (1ul << 3)
> +#define VMX_IO_IN                      (1ul << 3)
> +#define VMX_IO_OUT                     0
> +#define VMX_IO_STRING                  (1ul << 4)
> +#define VMX_IO_REP                     (1ul << 5)
> +#define VMX_IO_OPRAND_DX               (1ul << 6)
> +#define VMX_IO_PORT_MASK               0xFFFF0000
> +#define VMX_IO_PORT_SHIFT              16
> +
> +#define VMX_TEST_VMEXIT                        1
> +#define VMX_TEST_EXIT                  2
> +#define VMX_TEST_RESUME                        3
> +#define VMX_TEST_LAUNCH_ERR            4
> +#define VMX_TEST_RESUME_ERR            5
> +
> +#define HYPERCALL_BIT          (1ul << 12)
> +#define HYPERCALL_MASK         0xFFF
> +#define HYPERCALL_VMEXIT       0x1
> +
> +static inline int vmcs_clear(struct vmcs *vmcs)
> +{
> +       bool ret;
> +       asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
> +       return ret;
> +}
> +
> +static inline u64 vmcs_read(enum Encoding enc)
> +{
> +       u64 val;
> +       asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
> +       return val;
> +}
> +
> +static inline int vmcs_write(enum Encoding enc, u64 val)
> +{
> +       bool ret;
> +       asm volatile ("vmwrite %1, %2; setbe %0"
> +               : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
> +       return ret;
> +}
> +
> +static inline int vmcs_save(struct vmcs **vmcs)
> +{
> +       bool ret;
> +
> +       asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
> +       return ret;
> +}
> +
> +#endif
> +
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arthur Chunqi Li July 28, 2013, 2:32 p.m. UTC | #2
On Sun, Jul 28, 2013 at 10:24 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> Hi Gleb,
>
> It suddenly occured to me that this patch also fails to handle
> GUEST_RFLAGS when VMRESUME.
>
> I decide to remove rflags in struct regs since rflags can be read and
> set via vmcs_read/vmcs_write in test suited defined functions (init
> and exit_handler), and other general registers can only be set in the
> framework code.
Besides, I previously used regs.rflags as host rflags (in vmx_run()),
so I changed it to regs.host_rflags and avoid confusion. In the
previous version, regs.rflags is also not used in SAVE_GPR and
LOAD_GPR, so it is reasonable to leave it for user to set up.

Arthur
>
> Then I will wait for Paolo and Gleb's furthur feedback and commit the
> final patch.
>
> Arthur
>
> On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
>> This is the first version of VMX nested environment. It contains the
>> basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/
>> VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the
>> basic execution routine in VMX nested environment andlet the VM print
>> "Hello World" to inform its successfully run.
>>
>> The first release also includes a test suite for vmenter (vmlaunch and
>> vmresume). Besides, hypercall mechanism is included and currently it is
>> used to invoke VM normal exit.
>>
>> New files added:
>> x86/vmx.h : contains all VMX related macro declerations
>> x86/vmx.c : main file for VMX nested test case
>>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>> ChangeLog:
>> 1. Refine codes in function vmx_run()
>> 2. Fix bug of setting GUEST_RFLAGS
>> 3. Move defines of selectors to lib/x86/vm.h
>> 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c
>> 5. Move some inline functions to lib/x86/processor.h
>> 6. Move some inline functions (vmcs related) to x86/vmx.h
>> ---
>>  config-x86-common.mak |    2 +
>>  config-x86_64.mak     |    1 +
>>  lib/x86/msr.h         |    5 +
>>  lib/x86/processor.h   |   15 ++
>>  lib/x86/vm.c          |    4 -
>>  lib/x86/vm.h          |   21 ++
>>  x86/cstart64.S        |    4 +
>>  x86/unittests.cfg     |    6 +
>>  x86/vmx.c             |  674 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  x86/vmx.h             |  466 ++++++++++++++++++++++++++++++++++
>>  10 files changed, 1194 insertions(+), 4 deletions(-)
>>  create mode 100644 x86/vmx.c
>>  create mode 100644 x86/vmx.h
>>
>> diff --git a/config-x86-common.mak b/config-x86-common.mak
>> index 455032b..34a41e1 100644
>> --- a/config-x86-common.mak
>> +++ b/config-x86-common.mak
>> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
>>
>>  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
>>
>> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
>> +
>>  arch_clean:
>>         $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
>>         $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
>> diff --git a/config-x86_64.mak b/config-x86_64.mak
>> index 4e525f5..bb8ee89 100644
>> --- a/config-x86_64.mak
>> +++ b/config-x86_64.mak
>> @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>>           $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
>>           $(TEST_DIR)/pcid.flat
>>  tests += $(TEST_DIR)/svm.flat
>> +tests += $(TEST_DIR)/vmx.flat
>>
>>  include config-x86-common.mak
>> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
>> index 509a421..281255a 100644
>> --- a/lib/x86/msr.h
>> +++ b/lib/x86/msr.h
>> @@ -396,6 +396,11 @@
>>  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
>>  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
>>  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
>> +#define MSR_IA32_VMX_TRUE_PIN          0x0000048d
>> +#define MSR_IA32_VMX_TRUE_PROC         0x0000048e
>> +#define MSR_IA32_VMX_TRUE_EXIT         0x0000048f
>> +#define MSR_IA32_VMX_TRUE_ENTRY                0x00000490
>> +
>>
>>  /* AMD-V MSRs */
>>
>> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
>> index e46d8d0..f0c11cc 100644
>> --- a/lib/x86/processor.h
>> +++ b/lib/x86/processor.h
>> @@ -307,4 +307,19 @@ static inline void safe_halt(void)
>>  {
>>         asm volatile("sti; hlt");
>>  }
>> +
>> +#ifdef __x86_64__
>> +static inline u64 read_rflags(void)
>> +{
>> +       u64 r;
>> +       asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
>> +       return r;
>> +}
>> +
>> +static inline void write_rflags(u64 r)
>> +{
>> +       asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
>> +}
>> +#endif
>> +
>>  #endif
>> diff --git a/lib/x86/vm.c b/lib/x86/vm.c
>> index 260ec45..188bf57 100644
>> --- a/lib/x86/vm.c
>> +++ b/lib/x86/vm.c
>> @@ -9,10 +9,6 @@
>>  #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE)
>>  #endif
>>
>> -#define X86_CR0_PE      0x00000001
>> -#define X86_CR0_WP      0x00010000
>> -#define X86_CR0_PG      0x80000000
>> -#define X86_CR4_PSE     0x00000010
>>  static void *free = 0;
>>  static void *vfree_top = 0;
>>
>> diff --git a/lib/x86/vm.h b/lib/x86/vm.h
>> index 0b5b5c7..eff6f72 100644
>> --- a/lib/x86/vm.h
>> +++ b/lib/x86/vm.h
>> @@ -16,6 +16,27 @@
>>  #define PTE_USER    (1ull << 2)
>>  #define PTE_ADDR    (0xffffffffff000ull)
>>
>> +#define X86_CR0_PE      0x00000001
>> +#define X86_CR0_WP      0x00010000
>> +#define X86_CR0_PG      0x80000000
>> +#define X86_CR4_VMXE   0x00000001
>> +#define X86_CR4_PSE     0x00000010
>> +#define X86_CR4_PAE     0x00000020
>> +#define X86_CR4_PCIDE  0x00020000
>> +
>> +#ifdef __x86_64__
>> +#define SEL_NULL_DESC          0x0
>> +#define SEL_KERN_CODE_64       0x8
>> +#define SEL_KERN_DATA_64       0x10
>> +#define SEL_USER_CODE_64       0x18
>> +#define SEL_USER_DATA_64       0x20
>> +#define SEL_CODE_32            0x28
>> +#define SEL_DATA_32            0x30
>> +#define SEL_CODE_16            0x38
>> +#define SEL_DATA_16            0x40
>> +#define SEL_TSS_RUN            0x48
>> +#endif
>> +
>>  void setup_vm();
>>
>>  void *vmalloc(unsigned long size);
>> diff --git a/x86/cstart64.S b/x86/cstart64.S
>> index 24df5f8..0fe76da 100644
>> --- a/x86/cstart64.S
>> +++ b/x86/cstart64.S
>> @@ -4,6 +4,10 @@
>>  .globl boot_idt
>>  boot_idt = 0
>>
>> +.globl idt_descr
>> +.globl tss_descr
>> +.globl gdt64_desc
>> +
>>  ipi_vector = 0x20
>>
>>  max_cpus = 64
>> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
>> index bc9643e..85c36aa 100644
>> --- a/x86/unittests.cfg
>> +++ b/x86/unittests.cfg
>> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
>>  file = pcid.flat
>>  extra_params = -cpu qemu64,+pcid
>>  arch = x86_64
>> +
>> +[vmx]
>> +file = vmx.flat
>> +extra_params = -cpu host,+vmx
>> +arch = x86_64
>> +
>> diff --git a/x86/vmx.c b/x86/vmx.c
>> new file mode 100644
>> index 0000000..7467927
>> --- /dev/null
>> +++ b/x86/vmx.c
>> @@ -0,0 +1,674 @@
>> +#include "libcflat.h"
>> +#include "processor.h"
>> +#include "vm.h"
>> +#include "desc.h"
>> +#include "vmx.h"
>> +#include "msr.h"
>> +#include "smp.h"
>> +#include "io.h"
>> +
>> +int fails = 0, tests = 0;
>> +u32 *vmxon_region;
>> +struct vmcs *vmcs_root;
>> +u32 vpid_cnt;
>> +void *guest_stack, *guest_syscall_stack;
>> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
>> +ulong fix_cr0_set, fix_cr0_clr;
>> +ulong fix_cr4_set, fix_cr4_clr;
>> +struct regs regs;
>> +struct vmx_test *current;
>> +u64 hypercall_field = 0;
>> +bool launched;
>> +
>> +extern u64 gdt64_desc[];
>> +extern u64 idt_descr[];
>> +extern u64 tss_descr[];
>> +extern void *vmx_return;
>> +extern void *entry_sysenter;
>> +extern void *guest_entry;
>> +
>> +static void report(const char *name, int result)
>> +{
>> +       ++tests;
>> +       if (result)
>> +               printf("PASS: %s\n", name);
>> +       else {
>> +               printf("FAIL: %s\n", name);
>> +               ++fails;
>> +       }
>> +}
>> +
>> +static int make_vmcs_current(struct vmcs *vmcs)
>> +{
>> +       bool ret;
>> +
>> +       asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
>> +       return ret;
>> +}
>> +
>> +/* entry_sysenter */
>> +asm(
>> +       ".align 4, 0x90\n\t"
>> +       ".globl entry_sysenter\n\t"
>> +       "entry_sysenter:\n\t"
>> +       SAVE_GPR
>> +       "       and     $0xf, %rax\n\t"
>> +       "       mov     %rax, %rdi\n\t"
>> +       "       call    syscall_handler\n\t"
>> +       LOAD_GPR
>> +       "       vmresume\n\t"
>> +);
>> +
>> +static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
>> +{
>> +       current->syscall_handler(syscall_no);
>> +}
>> +
>> +static inline int vmx_on()
>> +{
>> +       bool ret;
>> +       asm volatile ("vmxon %1; setbe %0\n\t"
>> +               : "=q"(ret) : "m"(vmxon_region) : "cc");
>> +       return ret;
>> +}
>> +
>> +static inline int vmx_off()
>> +{
>> +       bool ret;
>> +       asm volatile("vmxoff; setbe %0\n\t"
>> +               : "=q"(ret) : : "cc");
>> +       return ret;
>> +}
>> +
>> +static void print_vmexit_info()
>> +{
>> +       u64 guest_rip, guest_rsp;
>> +       ulong reason = vmcs_read(EXI_REASON) & 0xff;
>> +       ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
>> +       guest_rip = vmcs_read(GUEST_RIP);
>> +       guest_rsp = vmcs_read(GUEST_RSP);
>> +       printf("VMEXIT info:\n");
>> +       printf("\tvmexit reason = %d\n", reason);
>> +       printf("\texit qualification = 0x%x\n", exit_qual);
>> +       printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
>> +       printf("\tguest_rip = 0x%llx\n", guest_rip);
>> +       printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
>> +               regs.rax, regs.rbx, regs.rcx, regs.rdx);
>> +       printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
>> +               guest_rsp, regs.rbp, regs.rsi, regs.rdi);
>> +       printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
>> +               regs.r8, regs.r9, regs.r10, regs.r11);
>> +       printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
>> +               regs.r12, regs.r13, regs.r14, regs.r15);
>> +}
>> +
>> +static void test_vmclear(void)
>> +{
>> +       u64 rflags;
>> +
>> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +       write_rflags(rflags);
>> +       report("test vmclear", vmcs_clear(vmcs_root) == 0);
>> +}
>> +
>> +static void test_vmxoff(void)
>> +{
>> +       int ret;
>> +       u64 rflags;
>> +
>> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +       write_rflags(rflags);
>> +       ret = vmx_off();
>> +       report("test vmxoff", !ret);
>> +}
>> +
>> +static void __attribute__((__used__)) guest_main(void)
>> +{
>> +       current->guest_main();
>> +}
>> +
>> +/* guest_entry */
>> +asm(
>> +       ".align 4, 0x90\n\t"
>> +       ".globl entry_guest\n\t"
>> +       "guest_entry:\n\t"
>> +       "       call guest_main\n\t"
>> +       "       mov $1, %edi\n\t"
>> +       "       call hypercall\n\t"
>> +);
>> +
>> +static void init_vmcs_ctrl(void)
>> +{
>> +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
>> +       /* 26.2.1.1 */
>> +       vmcs_write(PIN_CONTROLS, ctrl_pin);
>> +       /* Disable VMEXIT of IO instruction */
>> +       vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
>> +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
>> +               ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
>> +               vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
>> +       }
>> +       vmcs_write(CR3_TARGET_COUNT, 0);
>> +       vmcs_write(VPID, ++vpid_cnt);
>> +}
>> +
>> +static void init_vmcs_host(void)
>> +{
>> +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
>> +       /* 26.2.1.2 */
>> +       vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
>> +
>> +       /* 26.2.1.3 */
>> +       vmcs_write(ENT_CONTROLS, ctrl_enter);
>> +       vmcs_write(EXI_CONTROLS, ctrl_exit);
>> +
>> +       /* 26.2.2 */
>> +       vmcs_write(HOST_CR0, read_cr0());
>> +       vmcs_write(HOST_CR3, read_cr3());
>> +       vmcs_write(HOST_CR4, read_cr4());
>> +       vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
>> +       vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
>> +
>> +       /* 26.2.3 */
>> +       vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
>> +       vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
>> +       vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
>> +       vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
>> +       vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
>> +       vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
>> +       vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
>> +       vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
>> +       vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
>> +       vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
>> +       vmcs_write(HOST_BASE_FS, 0);
>> +       vmcs_write(HOST_BASE_GS, 0);
>> +
>> +       /* Set other vmcs area */
>> +       vmcs_write(PF_ERROR_MASK, 0);
>> +       vmcs_write(PF_ERROR_MATCH, 0);
>> +       vmcs_write(VMCS_LINK_PTR, ~0ul);
>> +       vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
>> +       vmcs_write(HOST_RIP, (u64)(&vmx_return));
>> +}
>> +
>> +static void init_vmcs_guest(void)
>> +{
>> +       /* 26.3 CHECKING AND LOADING GUEST STATE */
>> +       ulong guest_cr0, guest_cr4, guest_cr3;
>> +       /* 26.3.1.1 */
>> +       guest_cr0 = read_cr0();
>> +       guest_cr4 = read_cr4();
>> +       guest_cr3 = read_cr3();
>> +       if (ctrl_enter & ENT_GUEST_64) {
>> +               guest_cr0 |= X86_CR0_PG;
>> +               guest_cr4 |= X86_CR4_PAE;
>> +       }
>> +       if ((ctrl_enter & ENT_GUEST_64) == 0)
>> +               guest_cr4 &= (~X86_CR4_PCIDE);
>> +       if (guest_cr0 & X86_CR0_PG)
>> +               guest_cr0 |= X86_CR0_PE;
>> +       vmcs_write(GUEST_CR0, guest_cr0);
>> +       vmcs_write(GUEST_CR3, guest_cr3);
>> +       vmcs_write(GUEST_CR4, guest_cr4);
>> +       vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
>> +       vmcs_write(GUEST_SYSENTER_ESP,
>> +               (u64)(guest_syscall_stack + PAGE_SIZE - 1));
>> +       vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
>> +       vmcs_write(GUEST_DR7, 0);
>> +       vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
>> +
>> +       /* 26.3.1.2 */
>> +       vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
>> +       vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
>> +       vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
>> +       vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
>> +       vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
>> +       vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
>> +       vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
>> +       vmcs_write(GUEST_SEL_LDTR, 0);
>> +
>> +       vmcs_write(GUEST_BASE_CS, 0);
>> +       vmcs_write(GUEST_BASE_ES, 0);
>> +       vmcs_write(GUEST_BASE_SS, 0);
>> +       vmcs_write(GUEST_BASE_DS, 0);
>> +       vmcs_write(GUEST_BASE_FS, 0);
>> +       vmcs_write(GUEST_BASE_GS, 0);
>> +       vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
>> +       vmcs_write(GUEST_BASE_LDTR, 0);
>> +
>> +       vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
>> +       vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
>> +       vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
>> +       vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
>> +       vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
>> +       vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
>> +       vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
>> +       vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
>> +
>> +       vmcs_write(GUEST_AR_CS, 0xa09b);
>> +       vmcs_write(GUEST_AR_DS, 0xc093);
>> +       vmcs_write(GUEST_AR_ES, 0xc093);
>> +       vmcs_write(GUEST_AR_FS, 0xc093);
>> +       vmcs_write(GUEST_AR_GS, 0xc093);
>> +       vmcs_write(GUEST_AR_SS, 0xc093);
>> +       vmcs_write(GUEST_AR_LDTR, 0x82);
>> +       vmcs_write(GUEST_AR_TR, 0x8b);
>> +
>> +       /* 26.3.1.3 */
>> +       vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
>> +       vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
>> +       vmcs_write(GUEST_LIMIT_GDTR,
>> +               ((struct descr *)gdt64_desc)->limit & 0xffff);
>> +       vmcs_write(GUEST_LIMIT_IDTR,
>> +               ((struct descr *)idt_descr)->limit & 0xffff);
>> +
>> +       /* 26.3.1.4 */
>> +       vmcs_write(GUEST_RIP, (u64)(&guest_entry));
>> +       vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
>> +       vmcs_write(GUEST_RFLAGS, 0x2);
>> +
>> +       /* 26.3.1.5 */
>> +       vmcs_write(GUEST_ACTV_STATE, 0);
>> +       vmcs_write(GUEST_INTR_STATE, 0);
>> +}
>> +
>> +static int init_vmcs(struct vmcs **vmcs)
>> +{
>> +       *vmcs = alloc_page();
>> +       memset(*vmcs, 0, PAGE_SIZE);
>> +       (*vmcs)->revision_id = basic.revision;
>> +       /* vmclear first to init vmcs */
>> +       if (vmcs_clear(*vmcs)) {
>> +               printf("%s : vmcs_clear error\n", __func__);
>> +               return 1;
>> +       }
>> +
>> +       if (make_vmcs_current(*vmcs)) {
>> +               printf("%s : make_vmcs_current error\n", __func__);
>> +               return 1;
>> +       }
>> +
>> +       /* All settings to pin/exit/enter/cpu
>> +          control fields should be placed here */
>> +       ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
>> +       ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
>> +       ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
>> +       ctrl_cpu[0] |= CPU_HLT;
>> +       /* DIsable IO instruction VMEXIT now */
>> +       ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
>> +       ctrl_cpu[1] = 0;
>> +
>> +       ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
>> +       ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
>> +       ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
>> +       ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
>> +
>> +       init_vmcs_ctrl();
>> +       init_vmcs_host();
>> +       init_vmcs_guest();
>> +       return 0;
>> +}
>> +
>> +static void init_vmx(void)
>> +{
>> +       vmxon_region = alloc_page();
>> +       memset(vmxon_region, 0, PAGE_SIZE);
>> +
>> +       fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
>> +       fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
>> +       fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
>> +       fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
>> +       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
>> +       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
>> +                       : MSR_IA32_VMX_PINBASED_CTLS);
>> +       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
>> +                       : MSR_IA32_VMX_EXIT_CTLS);
>> +       ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
>> +                       : MSR_IA32_VMX_ENTRY_CTLS);
>> +       ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
>> +                       : MSR_IA32_VMX_PROCBASED_CTLS);
>> +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
>> +               ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
>> +       if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
>> +               ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
>> +
>> +       write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
>> +       write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
>> +
>> +       *vmxon_region = basic.revision;
>> +
>> +       guest_stack = alloc_page();
>> +       memset(guest_stack, 0, PAGE_SIZE);
>> +       guest_syscall_stack = alloc_page();
>> +       memset(guest_syscall_stack, 0, PAGE_SIZE);
>> +}
>> +
>> +static int test_vmx_capability(void)
>> +{
>> +       struct cpuid r;
>> +       u64 ret1, ret2;
>> +       u64 ia32_feature_control;
>> +       r = cpuid(1);
>> +       ret1 = ((r.c) >> 5) & 1;
>> +       ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
>> +       ret2 = ((ia32_feature_control & 0x5) == 0x5);
>> +       if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) {
>> +               wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
>> +               ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
>> +               ret2 = ((ia32_feature_control & 0x5) == 0x5);
>> +       }
>> +       report("test vmx capability", ret1 & ret2);
>> +       return !(ret1 & ret2);
>> +}
>> +
>> +static int test_vmxon(void)
>> +{
>> +       int ret;
>> +       u64 rflags;
>> +
>> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +       write_rflags(rflags);
>> +       ret = vmx_on();
>> +       report("test vmxon", !ret);
>> +       return ret;
>> +}
>> +
>> +static void test_vmptrld(void)
>> +{
>> +       u64 rflags;
>> +       struct vmcs *vmcs;
>> +
>> +       vmcs = alloc_page();
>> +       vmcs->revision_id = basic.revision;
>> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +       write_rflags(rflags);
>> +       report("test vmptrld", make_vmcs_current(vmcs) == 0);
>> +}
>> +
>> +static void test_vmptrst(void)
>> +{
>> +       u64 rflags;
>> +       int ret;
>> +       struct vmcs *vmcs1, *vmcs2;
>> +
>> +       vmcs1 = alloc_page();
>> +       memset(vmcs1, 0, PAGE_SIZE);
>> +       init_vmcs(&vmcs1);
>> +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +       write_rflags(rflags);
>> +       ret = vmcs_save(&vmcs2);
>> +       report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
>> +}
>> +
>> +/* This function can only be called in guest */
>> +static void __attribute__((__used__)) hypercall(u32 hypercall_no)
>> +{
>> +       u64 val = 0;
>> +       val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
>> +       hypercall_field = val;
>> +       asm volatile("vmcall\n\t");
>> +}
>> +
>> +static bool is_hypercall()
>> +{
>> +       ulong reason, hyper_bit;
>> +
>> +       reason = vmcs_read(EXI_REASON) & 0xff;
>> +       hyper_bit = hypercall_field & HYPERCALL_BIT;
>> +       if (reason == VMX_VMCALL && hyper_bit)
>> +               return true;
>> +       return false;
>> +}
>> +
>> +static int handle_hypercall()
>> +{
>> +       ulong hypercall_no;
>> +
>> +       hypercall_no = hypercall_field & HYPERCALL_MASK;
>> +       hypercall_field = 0;
>> +       switch (hypercall_no) {
>> +       case HYPERCALL_VMEXIT:
>> +               return VMX_TEST_VMEXIT;
>> +       default:
>> +               printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
>> +       }
>> +       return VMX_TEST_EXIT;
>> +}
>> +
>> +static int exit_handler()
>> +{
>> +       int ret;
>> +
>> +       current->exits++;
>> +       current->guest_regs = regs;
>> +       if (is_hypercall())
>> +               ret = handle_hypercall();
>> +       else
>> +               ret = current->exit_handler();
>> +       regs = current->guest_regs;
>> +       switch (ret) {
>> +       case VMX_TEST_VMEXIT:
>> +       case VMX_TEST_RESUME:
>> +               return ret;
>> +       case VMX_TEST_EXIT:
>> +               break;
>> +       default:
>> +               printf("ERROR : Invalid exit_handler return val %d.\n"
>> +                       , ret);
>> +       }
>> +       print_vmexit_info();
>> +       exit(-1);
>> +       return 0;
>> +}
>> +
>> +static int vmx_run()
>> +{
>> +       u32 ret = 0, fail = 0;
>> +
>> +       while (1) {
>> +               asm volatile (
>> +                       "mov %%rsp, %%rsi\n\t"
>> +                       "mov %2, %%rdi\n\t"
>> +                       "vmwrite %%rsi, %%rdi\n\t"
>> +
>> +                       LOAD_GPR_C
>> +                       "cmpl $0, %1\n\t"
>> +                       "jne 1f\n\t"
>> +                       LOAD_RFLAGS
>> +                       "vmlaunch\n\t"
>> +                       "jmp 2f\n\t"
>> +                       "1: "
>> +                       "vmresume\n\t"
>> +                       "2: "
>> +                       "setbe %0\n\t"
>> +                       "vmx_return:\n\t"
>> +                       SAVE_GPR_C
>> +                       SAVE_RFLAGS
>> +                       : "=m"(fail)
>> +                       : "m"(launched), "i"(HOST_RSP)
>> +                       : "rdi", "rsi", "memory", "cc"
>> +
>> +               );
>> +               if (fail)
>> +                       ret = launched ? VMX_TEST_RESUME_ERR :
>> +                               VMX_TEST_LAUNCH_ERR;
>> +               else {
>> +                       launched = 1;
>> +                       ret = exit_handler();
>> +               }
>> +               if (ret != VMX_TEST_RESUME)
>> +                       break;
>> +       }
>> +       launched = 0;
>> +       switch (ret) {
>> +       case VMX_TEST_VMEXIT:
>> +               return 0;
>> +       case VMX_TEST_LAUNCH_ERR:
>> +               printf("%s : vmlaunch failed.\n", __func__);
>> +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
>> +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
>> +                       printf("\tvmlaunch set wrong flags\n");
>> +               report("test vmlaunch", 0);
>> +               break;
>> +       case VMX_TEST_RESUME_ERR:
>> +               printf("%s : vmresume failed.\n", __func__);
>> +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
>> +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
>> +                       printf("\tvmresume set wrong flags\n");
>> +               report("test vmresume", 0);
>> +               break;
>> +       default:
>> +               printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
>> +               break;
>> +       }
>> +       return 1;
>> +}
>> +
>> +static int test_run(struct vmx_test *test)
>> +{
>> +       if (test->name == NULL)
>> +               test->name = "(no name)";
>> +       if (vmx_on()) {
>> +               printf("%s : vmxon failed.\n", __func__);
>> +               return 1;
>> +       }
>> +       init_vmcs(&(test->vmcs));
>> +       /* Directly call test->init is ok here, init_vmcs has done
>> +          vmcs init, vmclear and vmptrld*/
>> +       if (test->init)
>> +               test->init(test->vmcs);
>> +       test->exits = 0;
>> +       current = test;
>> +       regs = test->guest_regs;
>> +       vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
>> +       launched = 0;
>> +       printf("\nTest suite : %s\n", test->name);
>> +       vmx_run();
>> +       if (vmx_off()) {
>> +               printf("%s : vmxoff failed.\n", __func__);
>> +               return 1;
>> +       }
>> +       return 0;
>> +}
>> +
>> +static void basic_init()
>> +{
>> +}
>> +
>> +static void basic_guest_main()
>> +{
>> +       /* Here is null guest_main, print Hello World */
>> +       printf("\tHello World, this is null_guest_main!\n");
>> +}
>> +
>> +static int basic_exit_handler()
>> +{
>> +       u64 guest_rip;
>> +       ulong reason;
>> +
>> +       guest_rip = vmcs_read(GUEST_RIP);
>> +       reason = vmcs_read(EXI_REASON) & 0xff;
>> +
>> +       switch (reason) {
>> +       case VMX_VMCALL:
>> +               print_vmexit_info();
>> +               vmcs_write(GUEST_RIP, guest_rip + 3);
>> +               return VMX_TEST_RESUME;
>> +       default:
>> +               break;
>> +       }
>> +       printf("ERROR : Unhandled vmx exit.\n");
>> +       print_vmexit_info();
>> +       return VMX_TEST_EXIT;
>> +}
>> +
>> +static void basic_syscall_handler(u64 syscall_no)
>> +{
>> +}
>> +
>> +static void vmenter_main()
>> +{
>> +       u64 rax;
>> +       u64 rsp, resume_rsp;
>> +
>> +       report("test vmlaunch", 1);
>> +
>> +       asm volatile(
>> +               "mov %%rsp, %0\n\t"
>> +               "mov %3, %%rax\n\t"
>> +               "vmcall\n\t"
>> +               "mov %%rax, %1\n\t"
>> +               "mov %%rsp, %2\n\t"
>> +               : "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
>> +               : "g"(0xABCD));
>> +       report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
>> +}
>> +
>> +static int vmenter_exit_handler()
>> +{
>> +       u64 guest_rip;
>> +       ulong reason;
>> +
>> +       guest_rip = vmcs_read(GUEST_RIP);
>> +       reason = vmcs_read(EXI_REASON) & 0xff;
>> +       switch (reason) {
>> +       case VMX_VMCALL:
>> +               if (current->guest_regs.rax != 0xABCD) {
>> +                       report("test vmresume", 0);
>> +                       return VMX_TEST_VMEXIT;
>> +               }
>> +               current->guest_regs.rax = 0xFFFF;
>> +               vmcs_write(GUEST_RIP, guest_rip + 3);
>> +               return VMX_TEST_RESUME;
>> +       default:
>> +               report("test vmresume", 0);
>> +               print_vmexit_info();
>> +       }
>> +       return VMX_TEST_VMEXIT;
>> +}
>> +
>> +
>> +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs
>> +   basic_* just implement some basic functions */
>> +static struct vmx_test vmx_tests[] = {
>> +       { "null", basic_init, basic_guest_main, basic_exit_handler,
>> +               basic_syscall_handler, {0} },
>> +       { "vmenter", basic_init, vmenter_main, vmenter_exit_handler,
>> +               basic_syscall_handler, {0} },
>> +};
>> +
>> +int main(void)
>> +{
>> +       int i;
>> +
>> +       setup_vm();
>> +       setup_idt();
>> +
>> +       if (test_vmx_capability() != 0) {
>> +               printf("ERROR : vmx not supported, check +vmx option\n");
>> +               goto exit;
>> +       }
>> +       init_vmx();
>> +       /* Set basic test ctxt the same as "null" */
>> +       current = &vmx_tests[0];
>> +       if (test_vmxon() != 0)
>> +               goto exit;
>> +       test_vmptrld();
>> +       test_vmclear();
>> +       test_vmptrst();
>> +       init_vmcs(&vmcs_root);
>> +       if (vmx_run()) {
>> +               report("test vmlaunch", 0);
>> +               goto exit;
>> +       }
>> +       test_vmxoff();
>> +
>> +       for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) {
>> +               if (test_run(&vmx_tests[i]))
>> +                       goto exit;
>> +       }
>> +
>> +exit:
>> +       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>> +       return fails ? 1 : 0;
>> +}
>> diff --git a/x86/vmx.h b/x86/vmx.h
>> new file mode 100644
>> index 0000000..1fb9738
>> --- /dev/null
>> +++ b/x86/vmx.h
>> @@ -0,0 +1,466 @@
>> +#ifndef __HYPERVISOR_H
>> +#define __HYPERVISOR_H
>> +
>> +#include "libcflat.h"
>> +
>> +struct vmcs {
>> +       u32 revision_id; /* vmcs revision identifier */
>> +       u32 abort; /* VMX-abort indicator */
>> +       /* VMCS data */
>> +       char data[0];
>> +};
>> +
>> +struct regs {
>> +       u64 rax;
>> +       u64 rcx;
>> +       u64 rdx;
>> +       u64 rbx;
>> +       u64 cr2;
>> +       u64 rbp;
>> +       u64 rsi;
>> +       u64 rdi;
>> +       u64 r8;
>> +       u64 r9;
>> +       u64 r10;
>> +       u64 r11;
>> +       u64 r12;
>> +       u64 r13;
>> +       u64 r14;
>> +       u64 r15;
>> +       u64 rflags;
>> +};
>> +
>> +struct vmx_test {
>> +       const char *name;
>> +       void (*init)(struct vmcs *vmcs);
>> +       void (*guest_main)();
>> +       int (*exit_handler)();
>> +       void (*syscall_handler)(u64 syscall_no);
>> +       struct regs guest_regs;
>> +       struct vmcs *vmcs;
>> +       int exits;
>> +};
>> +
>> +static union vmx_basic {
>> +       u64 val;
>> +       struct {
>> +               u32 revision;
>> +               u32     size:13,
>> +                       : 3,
>> +                       width:1,
>> +                       dual:1,
>> +                       type:4,
>> +                       insouts:1,
>> +                       ctrl:1;
>> +       };
>> +} basic;
>> +
>> +static union vmx_ctrl_pin {
>> +       u64 val;
>> +       struct {
>> +               u32 set, clr;
>> +       };
>> +} ctrl_pin_rev;
>> +
>> +static union vmx_ctrl_cpu {
>> +       u64 val;
>> +       struct {
>> +               u32 set, clr;
>> +       };
>> +} ctrl_cpu_rev[2];
>> +
>> +static union vmx_ctrl_exit {
>> +       u64 val;
>> +       struct {
>> +               u32 set, clr;
>> +       };
>> +} ctrl_exit_rev;
>> +
>> +static union vmx_ctrl_ent {
>> +       u64 val;
>> +       struct {
>> +               u32 set, clr;
>> +       };
>> +} ctrl_enter_rev;
>> +
>> +static union vmx_ept_vpid {
>> +       u64 val;
>> +       struct {
>> +               u32:16,
>> +                       super:2,
>> +                       : 2,
>> +                       invept:1,
>> +                       : 11;
>> +               u32     invvpid:1;
>> +       };
>> +} ept_vpid;
>> +
>> +struct descr {
>> +       u16 limit;
>> +       u64 addr;
>> +};
>> +
>> +enum Encoding {
>> +       /* 16-Bit Control Fields */
>> +       VPID                    = 0x0000ul,
>> +       /* Posted-interrupt notification vector */
>> +       PINV                    = 0x0002ul,
>> +       /* EPTP index */
>> +       EPTP_IDX                = 0x0004ul,
>> +
>> +       /* 16-Bit Guest State Fields */
>> +       GUEST_SEL_ES            = 0x0800ul,
>> +       GUEST_SEL_CS            = 0x0802ul,
>> +       GUEST_SEL_SS            = 0x0804ul,
>> +       GUEST_SEL_DS            = 0x0806ul,
>> +       GUEST_SEL_FS            = 0x0808ul,
>> +       GUEST_SEL_GS            = 0x080aul,
>> +       GUEST_SEL_LDTR          = 0x080cul,
>> +       GUEST_SEL_TR            = 0x080eul,
>> +       GUEST_INT_STATUS        = 0x0810ul,
>> +
>> +       /* 16-Bit Host State Fields */
>> +       HOST_SEL_ES             = 0x0c00ul,
>> +       HOST_SEL_CS             = 0x0c02ul,
>> +       HOST_SEL_SS             = 0x0c04ul,
>> +       HOST_SEL_DS             = 0x0c06ul,
>> +       HOST_SEL_FS             = 0x0c08ul,
>> +       HOST_SEL_GS             = 0x0c0aul,
>> +       HOST_SEL_TR             = 0x0c0cul,
>> +
>> +       /* 64-Bit Control Fields */
>> +       IO_BITMAP_A             = 0x2000ul,
>> +       IO_BITMAP_B             = 0x2002ul,
>> +       MSR_BITMAP              = 0x2004ul,
>> +       EXIT_MSR_ST_ADDR        = 0x2006ul,
>> +       EXIT_MSR_LD_ADDR        = 0x2008ul,
>> +       ENTER_MSR_LD_ADDR       = 0x200aul,
>> +       VMCS_EXEC_PTR           = 0x200cul,
>> +       TSC_OFFSET              = 0x2010ul,
>> +       TSC_OFFSET_HI           = 0x2011ul,
>> +       APIC_VIRT_ADDR          = 0x2012ul,
>> +       APIC_ACCS_ADDR          = 0x2014ul,
>> +       EPTP                    = 0x201aul,
>> +       EPTP_HI                 = 0x201bul,
>> +
>> +       /* 64-Bit Readonly Data Field */
>> +       INFO_PHYS_ADDR          = 0x2400ul,
>> +
>> +       /* 64-Bit Guest State */
>> +       VMCS_LINK_PTR           = 0x2800ul,
>> +       VMCS_LINK_PTR_HI        = 0x2801ul,
>> +       GUEST_DEBUGCTL          = 0x2802ul,
>> +       GUEST_DEBUGCTL_HI       = 0x2803ul,
>> +       GUEST_EFER              = 0x2806ul,
>> +       GUEST_PERF_GLOBAL_CTRL  = 0x2808ul,
>> +       GUEST_PDPTE             = 0x280aul,
>> +
>> +       /* 64-Bit Host State */
>> +       HOST_EFER               = 0x2c02ul,
>> +       HOST_PERF_GLOBAL_CTRL   = 0x2c04ul,
>> +
>> +       /* 32-Bit Control Fields */
>> +       PIN_CONTROLS            = 0x4000ul,
>> +       CPU_EXEC_CTRL0          = 0x4002ul,
>> +       EXC_BITMAP              = 0x4004ul,
>> +       PF_ERROR_MASK           = 0x4006ul,
>> +       PF_ERROR_MATCH          = 0x4008ul,
>> +       CR3_TARGET_COUNT        = 0x400aul,
>> +       EXI_CONTROLS            = 0x400cul,
>> +       EXI_MSR_ST_CNT          = 0x400eul,
>> +       EXI_MSR_LD_CNT          = 0x4010ul,
>> +       ENT_CONTROLS            = 0x4012ul,
>> +       ENT_MSR_LD_CNT          = 0x4014ul,
>> +       ENT_INTR_INFO           = 0x4016ul,
>> +       ENT_INTR_ERROR          = 0x4018ul,
>> +       ENT_INST_LEN            = 0x401aul,
>> +       TPR_THRESHOLD           = 0x401cul,
>> +       CPU_EXEC_CTRL1          = 0x401eul,
>> +
>> +       /* 32-Bit R/O Data Fields */
>> +       VMX_INST_ERROR          = 0x4400ul,
>> +       EXI_REASON              = 0x4402ul,
>> +       EXI_INTR_INFO           = 0x4404ul,
>> +       EXI_INTR_ERROR          = 0x4406ul,
>> +       IDT_VECT_INFO           = 0x4408ul,
>> +       IDT_VECT_ERROR          = 0x440aul,
>> +       EXI_INST_LEN            = 0x440cul,
>> +       EXI_INST_INFO           = 0x440eul,
>> +
>> +       /* 32-Bit Guest State Fields */
>> +       GUEST_LIMIT_ES          = 0x4800ul,
>> +       GUEST_LIMIT_CS          = 0x4802ul,
>> +       GUEST_LIMIT_SS          = 0x4804ul,
>> +       GUEST_LIMIT_DS          = 0x4806ul,
>> +       GUEST_LIMIT_FS          = 0x4808ul,
>> +       GUEST_LIMIT_GS          = 0x480aul,
>> +       GUEST_LIMIT_LDTR        = 0x480cul,
>> +       GUEST_LIMIT_TR          = 0x480eul,
>> +       GUEST_LIMIT_GDTR        = 0x4810ul,
>> +       GUEST_LIMIT_IDTR        = 0x4812ul,
>> +       GUEST_AR_ES             = 0x4814ul,
>> +       GUEST_AR_CS             = 0x4816ul,
>> +       GUEST_AR_SS             = 0x4818ul,
>> +       GUEST_AR_DS             = 0x481aul,
>> +       GUEST_AR_FS             = 0x481cul,
>> +       GUEST_AR_GS             = 0x481eul,
>> +       GUEST_AR_LDTR           = 0x4820ul,
>> +       GUEST_AR_TR             = 0x4822ul,
>> +       GUEST_INTR_STATE        = 0x4824ul,
>> +       GUEST_ACTV_STATE        = 0x4826ul,
>> +       GUEST_SMBASE            = 0x4828ul,
>> +       GUEST_SYSENTER_CS       = 0x482aul,
>> +
>> +       /* 32-Bit Host State Fields */
>> +       HOST_SYSENTER_CS        = 0x4c00ul,
>> +
>> +       /* Natural-Width Control Fields */
>> +       CR0_MASK                = 0x6000ul,
>> +       CR4_MASK                = 0x6002ul,
>> +       CR0_READ_SHADOW = 0x6004ul,
>> +       CR4_READ_SHADOW = 0x6006ul,
>> +       CR3_TARGET_0            = 0x6008ul,
>> +       CR3_TARGET_1            = 0x600aul,
>> +       CR3_TARGET_2            = 0x600cul,
>> +       CR3_TARGET_3            = 0x600eul,
>> +
>> +       /* Natural-Width R/O Data Fields */
>> +       EXI_QUALIFICATION       = 0x6400ul,
>> +       IO_RCX                  = 0x6402ul,
>> +       IO_RSI                  = 0x6404ul,
>> +       IO_RDI                  = 0x6406ul,
>> +       IO_RIP                  = 0x6408ul,
>> +       GUEST_LINEAR_ADDRESS    = 0x640aul,
>> +
>> +       /* Natural-Width Guest State Fields */
>> +       GUEST_CR0               = 0x6800ul,
>> +       GUEST_CR3               = 0x6802ul,
>> +       GUEST_CR4               = 0x6804ul,
>> +       GUEST_BASE_ES           = 0x6806ul,
>> +       GUEST_BASE_CS           = 0x6808ul,
>> +       GUEST_BASE_SS           = 0x680aul,
>> +       GUEST_BASE_DS           = 0x680cul,
>> +       GUEST_BASE_FS           = 0x680eul,
>> +       GUEST_BASE_GS           = 0x6810ul,
>> +       GUEST_BASE_LDTR         = 0x6812ul,
>> +       GUEST_BASE_TR           = 0x6814ul,
>> +       GUEST_BASE_GDTR         = 0x6816ul,
>> +       GUEST_BASE_IDTR         = 0x6818ul,
>> +       GUEST_DR7               = 0x681aul,
>> +       GUEST_RSP               = 0x681cul,
>> +       GUEST_RIP               = 0x681eul,
>> +       GUEST_RFLAGS            = 0x6820ul,
>> +       GUEST_PENDING_DEBUG     = 0x6822ul,
>> +       GUEST_SYSENTER_ESP      = 0x6824ul,
>> +       GUEST_SYSENTER_EIP      = 0x6826ul,
>> +
>> +       /* Natural-Width Host State Fields */
>> +       HOST_CR0                = 0x6c00ul,
>> +       HOST_CR3                = 0x6c02ul,
>> +       HOST_CR4                = 0x6c04ul,
>> +       HOST_BASE_FS            = 0x6c06ul,
>> +       HOST_BASE_GS            = 0x6c08ul,
>> +       HOST_BASE_TR            = 0x6c0aul,
>> +       HOST_BASE_GDTR          = 0x6c0cul,
>> +       HOST_BASE_IDTR          = 0x6c0eul,
>> +       HOST_SYSENTER_ESP       = 0x6c10ul,
>> +       HOST_SYSENTER_EIP       = 0x6c12ul,
>> +       HOST_RSP                = 0x6c14ul,
>> +       HOST_RIP                = 0x6c16ul
>> +};
>> +
>> +enum Reason {
>> +       VMX_EXC_NMI             = 0,
>> +       VMX_EXTINT              = 1,
>> +       VMX_TRIPLE_FAULT        = 2,
>> +       VMX_INIT                = 3,
>> +       VMX_SIPI                = 4,
>> +       VMX_SMI_IO              = 5,
>> +       VMX_SMI_OTHER           = 6,
>> +       VMX_INTR_WINDOW         = 7,
>> +       VMX_NMI_WINDOW          = 8,
>> +       VMX_TASK_SWITCH         = 9,
>> +       VMX_CPUID               = 10,
>> +       VMX_GETSEC              = 11,
>> +       VMX_HLT                 = 12,
>> +       VMX_INVD                = 13,
>> +       VMX_INVLPG              = 14,
>> +       VMX_RDPMC               = 15,
>> +       VMX_RDTSC               = 16,
>> +       VMX_RSM                 = 17,
>> +       VMX_VMCALL              = 18,
>> +       VMX_VMCLEAR             = 19,
>> +       VMX_VMLAUNCH            = 20,
>> +       VMX_VMPTRLD             = 21,
>> +       VMX_VMPTRST             = 22,
>> +       VMX_VMREAD              = 23,
>> +       VMX_VMRESUME            = 24,
>> +       VMX_VMWRITE             = 25,
>> +       VMX_VMXOFF              = 26,
>> +       VMX_VMXON               = 27,
>> +       VMX_CR                  = 28,
>> +       VMX_DR                  = 29,
>> +       VMX_IO                  = 30,
>> +       VMX_RDMSR               = 31,
>> +       VMX_WRMSR               = 32,
>> +       VMX_FAIL_STATE          = 33,
>> +       VMX_FAIL_MSR            = 34,
>> +       VMX_MWAIT               = 36,
>> +       VMX_MTF                 = 37,
>> +       VMX_MONITOR             = 39,
>> +       VMX_PAUSE               = 40,
>> +       VMX_FAIL_MCHECK         = 41,
>> +       VMX_TPR_THRESHOLD       = 43,
>> +       VMX_APIC_ACCESS         = 44,
>> +       VMX_GDTR_IDTR           = 46,
>> +       VMX_LDTR_TR             = 47,
>> +       VMX_EPT_VIOLATION       = 48,
>> +       VMX_EPT_MISCONFIG       = 49,
>> +       VMX_INVEPT              = 50,
>> +       VMX_PREEMPT             = 52,
>> +       VMX_INVVPID             = 53,
>> +       VMX_WBINVD              = 54,
>> +       VMX_XSETBV              = 55
>> +};
>> +
>> +#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
>> +#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
>> +
>> +enum Ctrl_exi {
>> +       EXI_HOST_64             = 1UL << 9,
>> +       EXI_LOAD_PERF           = 1UL << 12,
>> +       EXI_INTA                = 1UL << 15,
>> +       EXI_LOAD_EFER           = 1UL << 21,
>> +};
>> +
>> +enum Ctrl_ent {
>> +       ENT_GUEST_64            = 1UL << 9,
>> +       ENT_LOAD_EFER           = 1UL << 15,
>> +};
>> +
>> +enum Ctrl_pin {
>> +       PIN_EXTINT              = 1ul << 0,
>> +       PIN_NMI                 = 1ul << 3,
>> +       PIN_VIRT_NMI            = 1ul << 5,
>> +};
>> +
>> +enum Ctrl0 {
>> +       CPU_INTR_WINDOW         = 1ul << 2,
>> +       CPU_HLT                 = 1ul << 7,
>> +       CPU_INVLPG              = 1ul << 9,
>> +       CPU_CR3_LOAD            = 1ul << 15,
>> +       CPU_CR3_STORE           = 1ul << 16,
>> +       CPU_TPR_SHADOW          = 1ul << 21,
>> +       CPU_NMI_WINDOW          = 1ul << 22,
>> +       CPU_IO                  = 1ul << 24,
>> +       CPU_IO_BITMAP           = 1ul << 25,
>> +       CPU_SECONDARY           = 1ul << 31,
>> +};
>> +
>> +enum Ctrl1 {
>> +       CPU_EPT                 = 1ul << 1,
>> +       CPU_VPID                = 1ul << 5,
>> +       CPU_URG                 = 1ul << 7,
>> +};
>> +
>> +#define SAVE_GPR                               \
>> +       "xchg %rax, regs\n\t"                   \
>> +       "xchg %rbx, regs+0x8\n\t"               \
>> +       "xchg %rcx, regs+0x10\n\t"              \
>> +       "xchg %rdx, regs+0x18\n\t"              \
>> +       "xchg %rbp, regs+0x28\n\t"              \
>> +       "xchg %rsi, regs+0x30\n\t"              \
>> +       "xchg %rdi, regs+0x38\n\t"              \
>> +       "xchg %r8, regs+0x40\n\t"               \
>> +       "xchg %r9, regs+0x48\n\t"               \
>> +       "xchg %r10, regs+0x50\n\t"              \
>> +       "xchg %r11, regs+0x58\n\t"              \
>> +       "xchg %r12, regs+0x60\n\t"              \
>> +       "xchg %r13, regs+0x68\n\t"              \
>> +       "xchg %r14, regs+0x70\n\t"              \
>> +       "xchg %r15, regs+0x78\n\t"
>> +
>> +#define LOAD_GPR       SAVE_GPR
>> +
>> +#define SAVE_GPR_C                             \
>> +       "xchg %%rax, regs\n\t"                  \
>> +       "xchg %%rbx, regs+0x8\n\t"              \
>> +       "xchg %%rcx, regs+0x10\n\t"             \
>> +       "xchg %%rdx, regs+0x18\n\t"             \
>> +       "xchg %%rbp, regs+0x28\n\t"             \
>> +       "xchg %%rsi, regs+0x30\n\t"             \
>> +       "xchg %%rdi, regs+0x38\n\t"             \
>> +       "xchg %%r8, regs+0x40\n\t"              \
>> +       "xchg %%r9, regs+0x48\n\t"              \
>> +       "xchg %%r10, regs+0x50\n\t"             \
>> +       "xchg %%r11, regs+0x58\n\t"             \
>> +       "xchg %%r12, regs+0x60\n\t"             \
>> +       "xchg %%r13, regs+0x68\n\t"             \
>> +       "xchg %%r14, regs+0x70\n\t"             \
>> +       "xchg %%r15, regs+0x78\n\t"
>> +
>> +#define LOAD_GPR_C     SAVE_GPR_C
>> +
>> +#define SAVE_RFLAGS            \
>> +       "pushf\n\t"                     \
>> +       "pop regs+0x80\n\t"
>> +
>> +#define LOAD_RFLAGS            \
>> +       "push regs+0x80\n\t"    \
>> +       "popf\n\t"
>> +
>> +#define VMX_IO_SIZE_MASK               0x7
>> +#define _VMX_IO_BYTE                   1
>> +#define _VMX_IO_WORD                   2
>> +#define _VMX_IO_LONG                   3
>> +#define VMX_IO_DIRECTION_MASK          (1ul << 3)
>> +#define VMX_IO_IN                      (1ul << 3)
>> +#define VMX_IO_OUT                     0
>> +#define VMX_IO_STRING                  (1ul << 4)
>> +#define VMX_IO_REP                     (1ul << 5)
>> +#define VMX_IO_OPRAND_DX               (1ul << 6)
>> +#define VMX_IO_PORT_MASK               0xFFFF0000
>> +#define VMX_IO_PORT_SHIFT              16
>> +
>> +#define VMX_TEST_VMEXIT                        1
>> +#define VMX_TEST_EXIT                  2
>> +#define VMX_TEST_RESUME                        3
>> +#define VMX_TEST_LAUNCH_ERR            4
>> +#define VMX_TEST_RESUME_ERR            5
>> +
>> +#define HYPERCALL_BIT          (1ul << 12)
>> +#define HYPERCALL_MASK         0xFFF
>> +#define HYPERCALL_VMEXIT       0x1
>> +
>> +static inline int vmcs_clear(struct vmcs *vmcs)
>> +{
>> +       bool ret;
>> +       asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
>> +       return ret;
>> +}
>> +
>> +static inline u64 vmcs_read(enum Encoding enc)
>> +{
>> +       u64 val;
>> +       asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
>> +       return val;
>> +}
>> +
>> +static inline int vmcs_write(enum Encoding enc, u64 val)
>> +{
>> +       bool ret;
>> +       asm volatile ("vmwrite %1, %2; setbe %0"
>> +               : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
>> +       return ret;
>> +}
>> +
>> +static inline int vmcs_save(struct vmcs **vmcs)
>> +{
>> +       bool ret;
>> +
>> +       asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
>> +       return ret;
>> +}
>> +
>> +#endif
>> +
>> --
>> 1.7.9.5
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 28, 2013, 2:34 p.m. UTC | #3
On Sun, Jul 28, 2013 at 10:24:34PM +0800, Arthur Chunqi Li wrote:
> Hi Gleb,
> 
> It suddenly occured to me that this patch also fails to handle
> GUEST_RFLAGS when VMRESUME.
> 
As you are saying below if test wants to change rflags in a middle of
the run it can use vmcs_write, so this is not a big deal.

> I decide to remove rflags in struct regs since rflags can be read and
> set via vmcs_read/vmcs_write in test suited defined functions (init
> and exit_handler), and other general registers can only be set in the
> framework code.
> 
The code that prints vmlaunch/vmresume error in vmx_run() relies on
rflags been saved by assembly code, so be careful.

> Then I will wait for Paolo and Gleb's furthur feedback and commit the
> final patch.
> 
Yes please wait for Paolo comments. I want to hear his opinion on
assembly code. Will not be surprised if he will find the reason it
cannot work :)

> Arthur
> 
> On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> > This is the first version of VMX nested environment. It contains the
> > basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/
> > VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the
> > basic execution routine in VMX nested environment andlet the VM print
> > "Hello World" to inform its successfully run.
> >
> > The first release also includes a test suite for vmenter (vmlaunch and
> > vmresume). Besides, hypercall mechanism is included and currently it is
> > used to invoke VM normal exit.
> >
> > New files added:
> > x86/vmx.h : contains all VMX related macro declerations
> > x86/vmx.c : main file for VMX nested test case
> >
> > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> > ---
> > ChangeLog:
> > 1. Refine codes in function vmx_run()
> > 2. Fix bug of setting GUEST_RFLAGS
> > 3. Move defines of selectors to lib/x86/vm.h
> > 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c
> > 5. Move some inline functions to lib/x86/processor.h
> > 6. Move some inline functions (vmcs related) to x86/vmx.h
> > ---
> >  config-x86-common.mak |    2 +
> >  config-x86_64.mak     |    1 +
> >  lib/x86/msr.h         |    5 +
> >  lib/x86/processor.h   |   15 ++
> >  lib/x86/vm.c          |    4 -
> >  lib/x86/vm.h          |   21 ++
> >  x86/cstart64.S        |    4 +
> >  x86/unittests.cfg     |    6 +
> >  x86/vmx.c             |  674 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  x86/vmx.h             |  466 ++++++++++++++++++++++++++++++++++
> >  10 files changed, 1194 insertions(+), 4 deletions(-)
> >  create mode 100644 x86/vmx.c
> >  create mode 100644 x86/vmx.h
> >
> > diff --git a/config-x86-common.mak b/config-x86-common.mak
> > index 455032b..34a41e1 100644
> > --- a/config-x86-common.mak
> > +++ b/config-x86-common.mak
> > @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
> >
> >  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
> >
> > +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
> > +
> >  arch_clean:
> >         $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
> >         $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
> > diff --git a/config-x86_64.mak b/config-x86_64.mak
> > index 4e525f5..bb8ee89 100644
> > --- a/config-x86_64.mak
> > +++ b/config-x86_64.mak
> > @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
> >           $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
> >           $(TEST_DIR)/pcid.flat
> >  tests += $(TEST_DIR)/svm.flat
> > +tests += $(TEST_DIR)/vmx.flat
> >
> >  include config-x86-common.mak
> > diff --git a/lib/x86/msr.h b/lib/x86/msr.h
> > index 509a421..281255a 100644
> > --- a/lib/x86/msr.h
> > +++ b/lib/x86/msr.h
> > @@ -396,6 +396,11 @@
> >  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
> >  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
> >  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
> > +#define MSR_IA32_VMX_TRUE_PIN          0x0000048d
> > +#define MSR_IA32_VMX_TRUE_PROC         0x0000048e
> > +#define MSR_IA32_VMX_TRUE_EXIT         0x0000048f
> > +#define MSR_IA32_VMX_TRUE_ENTRY                0x00000490
> > +
> >
> >  /* AMD-V MSRs */
> >
> > diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> > index e46d8d0..f0c11cc 100644
> > --- a/lib/x86/processor.h
> > +++ b/lib/x86/processor.h
> > @@ -307,4 +307,19 @@ static inline void safe_halt(void)
> >  {
> >         asm volatile("sti; hlt");
> >  }
> > +
> > +#ifdef __x86_64__
> > +static inline u64 read_rflags(void)
> > +{
> > +       u64 r;
> > +       asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
> > +       return r;
> > +}
> > +
> > +static inline void write_rflags(u64 r)
> > +{
> > +       asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
> > +}
> > +#endif
> > +
> >  #endif
> > diff --git a/lib/x86/vm.c b/lib/x86/vm.c
> > index 260ec45..188bf57 100644
> > --- a/lib/x86/vm.c
> > +++ b/lib/x86/vm.c
> > @@ -9,10 +9,6 @@
> >  #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE)
> >  #endif
> >
> > -#define X86_CR0_PE      0x00000001
> > -#define X86_CR0_WP      0x00010000
> > -#define X86_CR0_PG      0x80000000
> > -#define X86_CR4_PSE     0x00000010
> >  static void *free = 0;
> >  static void *vfree_top = 0;
> >
> > diff --git a/lib/x86/vm.h b/lib/x86/vm.h
> > index 0b5b5c7..eff6f72 100644
> > --- a/lib/x86/vm.h
> > +++ b/lib/x86/vm.h
> > @@ -16,6 +16,27 @@
> >  #define PTE_USER    (1ull << 2)
> >  #define PTE_ADDR    (0xffffffffff000ull)
> >
> > +#define X86_CR0_PE      0x00000001
> > +#define X86_CR0_WP      0x00010000
> > +#define X86_CR0_PG      0x80000000
> > +#define X86_CR4_VMXE   0x00000001
> > +#define X86_CR4_PSE     0x00000010
> > +#define X86_CR4_PAE     0x00000020
> > +#define X86_CR4_PCIDE  0x00020000
> > +
> > +#ifdef __x86_64__
> > +#define SEL_NULL_DESC          0x0
> > +#define SEL_KERN_CODE_64       0x8
> > +#define SEL_KERN_DATA_64       0x10
> > +#define SEL_USER_CODE_64       0x18
> > +#define SEL_USER_DATA_64       0x20
> > +#define SEL_CODE_32            0x28
> > +#define SEL_DATA_32            0x30
> > +#define SEL_CODE_16            0x38
> > +#define SEL_DATA_16            0x40
> > +#define SEL_TSS_RUN            0x48
> > +#endif
> > +
> >  void setup_vm();
> >
> >  void *vmalloc(unsigned long size);
> > diff --git a/x86/cstart64.S b/x86/cstart64.S
> > index 24df5f8..0fe76da 100644
> > --- a/x86/cstart64.S
> > +++ b/x86/cstart64.S
> > @@ -4,6 +4,10 @@
> >  .globl boot_idt
> >  boot_idt = 0
> >
> > +.globl idt_descr
> > +.globl tss_descr
> > +.globl gdt64_desc
> > +
> >  ipi_vector = 0x20
> >
> >  max_cpus = 64
> > diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> > index bc9643e..85c36aa 100644
> > --- a/x86/unittests.cfg
> > +++ b/x86/unittests.cfg
> > @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
> >  file = pcid.flat
> >  extra_params = -cpu qemu64,+pcid
> >  arch = x86_64
> > +
> > +[vmx]
> > +file = vmx.flat
> > +extra_params = -cpu host,+vmx
> > +arch = x86_64
> > +
> > diff --git a/x86/vmx.c b/x86/vmx.c
> > new file mode 100644
> > index 0000000..7467927
> > --- /dev/null
> > +++ b/x86/vmx.c
> > @@ -0,0 +1,674 @@
> > +#include "libcflat.h"
> > +#include "processor.h"
> > +#include "vm.h"
> > +#include "desc.h"
> > +#include "vmx.h"
> > +#include "msr.h"
> > +#include "smp.h"
> > +#include "io.h"
> > +
> > +int fails = 0, tests = 0;
> > +u32 *vmxon_region;
> > +struct vmcs *vmcs_root;
> > +u32 vpid_cnt;
> > +void *guest_stack, *guest_syscall_stack;
> > +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
> > +ulong fix_cr0_set, fix_cr0_clr;
> > +ulong fix_cr4_set, fix_cr4_clr;
> > +struct regs regs;
> > +struct vmx_test *current;
> > +u64 hypercall_field = 0;
> > +bool launched;
> > +
> > +extern u64 gdt64_desc[];
> > +extern u64 idt_descr[];
> > +extern u64 tss_descr[];
> > +extern void *vmx_return;
> > +extern void *entry_sysenter;
> > +extern void *guest_entry;
> > +
> > +static void report(const char *name, int result)
> > +{
> > +       ++tests;
> > +       if (result)
> > +               printf("PASS: %s\n", name);
> > +       else {
> > +               printf("FAIL: %s\n", name);
> > +               ++fails;
> > +       }
> > +}
> > +
> > +static int make_vmcs_current(struct vmcs *vmcs)
> > +{
> > +       bool ret;
> > +
> > +       asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
> > +       return ret;
> > +}
> > +
> > +/* entry_sysenter */
> > +asm(
> > +       ".align 4, 0x90\n\t"
> > +       ".globl entry_sysenter\n\t"
> > +       "entry_sysenter:\n\t"
> > +       SAVE_GPR
> > +       "       and     $0xf, %rax\n\t"
> > +       "       mov     %rax, %rdi\n\t"
> > +       "       call    syscall_handler\n\t"
> > +       LOAD_GPR
> > +       "       vmresume\n\t"
> > +);
> > +
> > +static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
> > +{
> > +       current->syscall_handler(syscall_no);
> > +}
> > +
> > +static inline int vmx_on()
> > +{
> > +       bool ret;
> > +       asm volatile ("vmxon %1; setbe %0\n\t"
> > +               : "=q"(ret) : "m"(vmxon_region) : "cc");
> > +       return ret;
> > +}
> > +
> > +static inline int vmx_off()
> > +{
> > +       bool ret;
> > +       asm volatile("vmxoff; setbe %0\n\t"
> > +               : "=q"(ret) : : "cc");
> > +       return ret;
> > +}
> > +
> > +static void print_vmexit_info()
> > +{
> > +       u64 guest_rip, guest_rsp;
> > +       ulong reason = vmcs_read(EXI_REASON) & 0xff;
> > +       ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
> > +       guest_rip = vmcs_read(GUEST_RIP);
> > +       guest_rsp = vmcs_read(GUEST_RSP);
> > +       printf("VMEXIT info:\n");
> > +       printf("\tvmexit reason = %d\n", reason);
> > +       printf("\texit qualification = 0x%x\n", exit_qual);
> > +       printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
> > +       printf("\tguest_rip = 0x%llx\n", guest_rip);
> > +       printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
> > +               regs.rax, regs.rbx, regs.rcx, regs.rdx);
> > +       printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
> > +               guest_rsp, regs.rbp, regs.rsi, regs.rdi);
> > +       printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
> > +               regs.r8, regs.r9, regs.r10, regs.r11);
> > +       printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
> > +               regs.r12, regs.r13, regs.r14, regs.r15);
> > +}
> > +
> > +static void test_vmclear(void)
> > +{
> > +       u64 rflags;
> > +
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       report("test vmclear", vmcs_clear(vmcs_root) == 0);
> > +}
> > +
> > +static void test_vmxoff(void)
> > +{
> > +       int ret;
> > +       u64 rflags;
> > +
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       ret = vmx_off();
> > +       report("test vmxoff", !ret);
> > +}
> > +
> > +static void __attribute__((__used__)) guest_main(void)
> > +{
> > +       current->guest_main();
> > +}
> > +
> > +/* guest_entry */
> > +asm(
> > +       ".align 4, 0x90\n\t"
> > +       ".globl entry_guest\n\t"
> > +       "guest_entry:\n\t"
> > +       "       call guest_main\n\t"
> > +       "       mov $1, %edi\n\t"
> > +       "       call hypercall\n\t"
> > +);
> > +
> > +static void init_vmcs_ctrl(void)
> > +{
> > +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> > +       /* 26.2.1.1 */
> > +       vmcs_write(PIN_CONTROLS, ctrl_pin);
> > +       /* Disable VMEXIT of IO instruction */
> > +       vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
> > +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
> > +               ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
> > +               vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
> > +       }
> > +       vmcs_write(CR3_TARGET_COUNT, 0);
> > +       vmcs_write(VPID, ++vpid_cnt);
> > +}
> > +
> > +static void init_vmcs_host(void)
> > +{
> > +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> > +       /* 26.2.1.2 */
> > +       vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
> > +
> > +       /* 26.2.1.3 */
> > +       vmcs_write(ENT_CONTROLS, ctrl_enter);
> > +       vmcs_write(EXI_CONTROLS, ctrl_exit);
> > +
> > +       /* 26.2.2 */
> > +       vmcs_write(HOST_CR0, read_cr0());
> > +       vmcs_write(HOST_CR3, read_cr3());
> > +       vmcs_write(HOST_CR4, read_cr4());
> > +       vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
> > +       vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
> > +
> > +       /* 26.2.3 */
> > +       vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
> > +       vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
> > +       vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
> > +       vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
> > +       vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
> > +       vmcs_write(HOST_BASE_FS, 0);
> > +       vmcs_write(HOST_BASE_GS, 0);
> > +
> > +       /* Set other vmcs area */
> > +       vmcs_write(PF_ERROR_MASK, 0);
> > +       vmcs_write(PF_ERROR_MATCH, 0);
> > +       vmcs_write(VMCS_LINK_PTR, ~0ul);
> > +       vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
> > +       vmcs_write(HOST_RIP, (u64)(&vmx_return));
> > +}
> > +
> > +static void init_vmcs_guest(void)
> > +{
> > +       /* 26.3 CHECKING AND LOADING GUEST STATE */
> > +       ulong guest_cr0, guest_cr4, guest_cr3;
> > +       /* 26.3.1.1 */
> > +       guest_cr0 = read_cr0();
> > +       guest_cr4 = read_cr4();
> > +       guest_cr3 = read_cr3();
> > +       if (ctrl_enter & ENT_GUEST_64) {
> > +               guest_cr0 |= X86_CR0_PG;
> > +               guest_cr4 |= X86_CR4_PAE;
> > +       }
> > +       if ((ctrl_enter & ENT_GUEST_64) == 0)
> > +               guest_cr4 &= (~X86_CR4_PCIDE);
> > +       if (guest_cr0 & X86_CR0_PG)
> > +               guest_cr0 |= X86_CR0_PE;
> > +       vmcs_write(GUEST_CR0, guest_cr0);
> > +       vmcs_write(GUEST_CR3, guest_cr3);
> > +       vmcs_write(GUEST_CR4, guest_cr4);
> > +       vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
> > +       vmcs_write(GUEST_SYSENTER_ESP,
> > +               (u64)(guest_syscall_stack + PAGE_SIZE - 1));
> > +       vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
> > +       vmcs_write(GUEST_DR7, 0);
> > +       vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
> > +
> > +       /* 26.3.1.2 */
> > +       vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
> > +       vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
> > +       vmcs_write(GUEST_SEL_LDTR, 0);
> > +
> > +       vmcs_write(GUEST_BASE_CS, 0);
> > +       vmcs_write(GUEST_BASE_ES, 0);
> > +       vmcs_write(GUEST_BASE_SS, 0);
> > +       vmcs_write(GUEST_BASE_DS, 0);
> > +       vmcs_write(GUEST_BASE_FS, 0);
> > +       vmcs_write(GUEST_BASE_GS, 0);
> > +       vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
> > +       vmcs_write(GUEST_BASE_LDTR, 0);
> > +
> > +       vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
> > +       vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
> > +
> > +       vmcs_write(GUEST_AR_CS, 0xa09b);
> > +       vmcs_write(GUEST_AR_DS, 0xc093);
> > +       vmcs_write(GUEST_AR_ES, 0xc093);
> > +       vmcs_write(GUEST_AR_FS, 0xc093);
> > +       vmcs_write(GUEST_AR_GS, 0xc093);
> > +       vmcs_write(GUEST_AR_SS, 0xc093);
> > +       vmcs_write(GUEST_AR_LDTR, 0x82);
> > +       vmcs_write(GUEST_AR_TR, 0x8b);
> > +
> > +       /* 26.3.1.3 */
> > +       vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
> > +       vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
> > +       vmcs_write(GUEST_LIMIT_GDTR,
> > +               ((struct descr *)gdt64_desc)->limit & 0xffff);
> > +       vmcs_write(GUEST_LIMIT_IDTR,
> > +               ((struct descr *)idt_descr)->limit & 0xffff);
> > +
> > +       /* 26.3.1.4 */
> > +       vmcs_write(GUEST_RIP, (u64)(&guest_entry));
> > +       vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
> > +       vmcs_write(GUEST_RFLAGS, 0x2);
> > +
> > +       /* 26.3.1.5 */
> > +       vmcs_write(GUEST_ACTV_STATE, 0);
> > +       vmcs_write(GUEST_INTR_STATE, 0);
> > +}
> > +
> > +static int init_vmcs(struct vmcs **vmcs)
> > +{
> > +       *vmcs = alloc_page();
> > +       memset(*vmcs, 0, PAGE_SIZE);
> > +       (*vmcs)->revision_id = basic.revision;
> > +       /* vmclear first to init vmcs */
> > +       if (vmcs_clear(*vmcs)) {
> > +               printf("%s : vmcs_clear error\n", __func__);
> > +               return 1;
> > +       }
> > +
> > +       if (make_vmcs_current(*vmcs)) {
> > +               printf("%s : make_vmcs_current error\n", __func__);
> > +               return 1;
> > +       }
> > +
> > +       /* All settings to pin/exit/enter/cpu
> > +          control fields should be placed here */
> > +       ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
> > +       ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
> > +       ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
> > +       ctrl_cpu[0] |= CPU_HLT;
> > +       /* DIsable IO instruction VMEXIT now */
> > +       ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
> > +       ctrl_cpu[1] = 0;
> > +
> > +       ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
> > +       ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
> > +       ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
> > +       ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
> > +
> > +       init_vmcs_ctrl();
> > +       init_vmcs_host();
> > +       init_vmcs_guest();
> > +       return 0;
> > +}
> > +
> > +static void init_vmx(void)
> > +{
> > +       vmxon_region = alloc_page();
> > +       memset(vmxon_region, 0, PAGE_SIZE);
> > +
> > +       fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
> > +       fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
> > +       fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
> > +       fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
> > +       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> > +       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
> > +                       : MSR_IA32_VMX_PINBASED_CTLS);
> > +       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
> > +                       : MSR_IA32_VMX_EXIT_CTLS);
> > +       ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
> > +                       : MSR_IA32_VMX_ENTRY_CTLS);
> > +       ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
> > +                       : MSR_IA32_VMX_PROCBASED_CTLS);
> > +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
> > +               ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> > +       if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
> > +               ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
> > +
> > +       write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
> > +       write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
> > +
> > +       *vmxon_region = basic.revision;
> > +
> > +       guest_stack = alloc_page();
> > +       memset(guest_stack, 0, PAGE_SIZE);
> > +       guest_syscall_stack = alloc_page();
> > +       memset(guest_syscall_stack, 0, PAGE_SIZE);
> > +}
> > +
> > +static int test_vmx_capability(void)
> > +{
> > +       struct cpuid r;
> > +       u64 ret1, ret2;
> > +       u64 ia32_feature_control;
> > +       r = cpuid(1);
> > +       ret1 = ((r.c) >> 5) & 1;
> > +       ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
> > +       ret2 = ((ia32_feature_control & 0x5) == 0x5);
> > +       if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) {
> > +               wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
> > +               ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
> > +               ret2 = ((ia32_feature_control & 0x5) == 0x5);
> > +       }
> > +       report("test vmx capability", ret1 & ret2);
> > +       return !(ret1 & ret2);
> > +}
> > +
> > +static int test_vmxon(void)
> > +{
> > +       int ret;
> > +       u64 rflags;
> > +
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       ret = vmx_on();
> > +       report("test vmxon", !ret);
> > +       return ret;
> > +}
> > +
> > +static void test_vmptrld(void)
> > +{
> > +       u64 rflags;
> > +       struct vmcs *vmcs;
> > +
> > +       vmcs = alloc_page();
> > +       vmcs->revision_id = basic.revision;
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       report("test vmptrld", make_vmcs_current(vmcs) == 0);
> > +}
> > +
> > +static void test_vmptrst(void)
> > +{
> > +       u64 rflags;
> > +       int ret;
> > +       struct vmcs *vmcs1, *vmcs2;
> > +
> > +       vmcs1 = alloc_page();
> > +       memset(vmcs1, 0, PAGE_SIZE);
> > +       init_vmcs(&vmcs1);
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       ret = vmcs_save(&vmcs2);
> > +       report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
> > +}
> > +
> > +/* This function can only be called in guest */
> > +static void __attribute__((__used__)) hypercall(u32 hypercall_no)
> > +{
> > +       u64 val = 0;
> > +       val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
> > +       hypercall_field = val;
> > +       asm volatile("vmcall\n\t");
> > +}
> > +
> > +static bool is_hypercall()
> > +{
> > +       ulong reason, hyper_bit;
> > +
> > +       reason = vmcs_read(EXI_REASON) & 0xff;
> > +       hyper_bit = hypercall_field & HYPERCALL_BIT;
> > +       if (reason == VMX_VMCALL && hyper_bit)
> > +               return true;
> > +       return false;
> > +}
> > +
> > +static int handle_hypercall()
> > +{
> > +       ulong hypercall_no;
> > +
> > +       hypercall_no = hypercall_field & HYPERCALL_MASK;
> > +       hypercall_field = 0;
> > +       switch (hypercall_no) {
> > +       case HYPERCALL_VMEXIT:
> > +               return VMX_TEST_VMEXIT;
> > +       default:
> > +               printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
> > +       }
> > +       return VMX_TEST_EXIT;
> > +}
> > +
> > +static int exit_handler()
> > +{
> > +       int ret;
> > +
> > +       current->exits++;
> > +       current->guest_regs = regs;
> > +       if (is_hypercall())
> > +               ret = handle_hypercall();
> > +       else
> > +               ret = current->exit_handler();
> > +       regs = current->guest_regs;
> > +       switch (ret) {
> > +       case VMX_TEST_VMEXIT:
> > +       case VMX_TEST_RESUME:
> > +               return ret;
> > +       case VMX_TEST_EXIT:
> > +               break;
> > +       default:
> > +               printf("ERROR : Invalid exit_handler return val %d.\n"
> > +                       , ret);
> > +       }
> > +       print_vmexit_info();
> > +       exit(-1);
> > +       return 0;
> > +}
> > +
> > +static int vmx_run()
> > +{
> > +       u32 ret = 0, fail = 0;
> > +
> > +       while (1) {
> > +               asm volatile (
> > +                       "mov %%rsp, %%rsi\n\t"
> > +                       "mov %2, %%rdi\n\t"
> > +                       "vmwrite %%rsi, %%rdi\n\t"
> > +
> > +                       LOAD_GPR_C
> > +                       "cmpl $0, %1\n\t"
> > +                       "jne 1f\n\t"
> > +                       LOAD_RFLAGS
> > +                       "vmlaunch\n\t"
> > +                       "jmp 2f\n\t"
> > +                       "1: "
> > +                       "vmresume\n\t"
> > +                       "2: "
> > +                       "setbe %0\n\t"
> > +                       "vmx_return:\n\t"
> > +                       SAVE_GPR_C
> > +                       SAVE_RFLAGS
> > +                       : "=m"(fail)
> > +                       : "m"(launched), "i"(HOST_RSP)
> > +                       : "rdi", "rsi", "memory", "cc"
> > +
> > +               );
> > +               if (fail)
> > +                       ret = launched ? VMX_TEST_RESUME_ERR :
> > +                               VMX_TEST_LAUNCH_ERR;
> > +               else {
> > +                       launched = 1;
> > +                       ret = exit_handler();
> > +               }
> > +               if (ret != VMX_TEST_RESUME)
> > +                       break;
> > +       }
> > +       launched = 0;
> > +       switch (ret) {
> > +       case VMX_TEST_VMEXIT:
> > +               return 0;
> > +       case VMX_TEST_LAUNCH_ERR:
> > +               printf("%s : vmlaunch failed.\n", __func__);
> > +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
> > +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
> > +                       printf("\tvmlaunch set wrong flags\n");
> > +               report("test vmlaunch", 0);
> > +               break;
> > +       case VMX_TEST_RESUME_ERR:
> > +               printf("%s : vmresume failed.\n", __func__);
> > +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
> > +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
> > +                       printf("\tvmresume set wrong flags\n");
> > +               report("test vmresume", 0);
> > +               break;
> > +       default:
> > +               printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
> > +               break;
> > +       }
> > +       return 1;
> > +}
> > +
> > +static int test_run(struct vmx_test *test)
> > +{
> > +       if (test->name == NULL)
> > +               test->name = "(no name)";
> > +       if (vmx_on()) {
> > +               printf("%s : vmxon failed.\n", __func__);
> > +               return 1;
> > +       }
> > +       init_vmcs(&(test->vmcs));
> > +       /* Directly call test->init is ok here, init_vmcs has done
> > +          vmcs init, vmclear and vmptrld*/
> > +       if (test->init)
> > +               test->init(test->vmcs);
> > +       test->exits = 0;
> > +       current = test;
> > +       regs = test->guest_regs;
> > +       vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
> > +       launched = 0;
> > +       printf("\nTest suite : %s\n", test->name);
> > +       vmx_run();
> > +       if (vmx_off()) {
> > +               printf("%s : vmxoff failed.\n", __func__);
> > +               return 1;
> > +       }
> > +       return 0;
> > +}
> > +
> > +static void basic_init()
> > +{
> > +}
> > +
> > +static void basic_guest_main()
> > +{
> > +       /* Here is null guest_main, print Hello World */
> > +       printf("\tHello World, this is null_guest_main!\n");
> > +}
> > +
> > +static int basic_exit_handler()
> > +{
> > +       u64 guest_rip;
> > +       ulong reason;
> > +
> > +       guest_rip = vmcs_read(GUEST_RIP);
> > +       reason = vmcs_read(EXI_REASON) & 0xff;
> > +
> > +       switch (reason) {
> > +       case VMX_VMCALL:
> > +               print_vmexit_info();
> > +               vmcs_write(GUEST_RIP, guest_rip + 3);
> > +               return VMX_TEST_RESUME;
> > +       default:
> > +               break;
> > +       }
> > +       printf("ERROR : Unhandled vmx exit.\n");
> > +       print_vmexit_info();
> > +       return VMX_TEST_EXIT;
> > +}
> > +
> > +static void basic_syscall_handler(u64 syscall_no)
> > +{
> > +}
> > +
> > +static void vmenter_main()
> > +{
> > +       u64 rax;
> > +       u64 rsp, resume_rsp;
> > +
> > +       report("test vmlaunch", 1);
> > +
> > +       asm volatile(
> > +               "mov %%rsp, %0\n\t"
> > +               "mov %3, %%rax\n\t"
> > +               "vmcall\n\t"
> > +               "mov %%rax, %1\n\t"
> > +               "mov %%rsp, %2\n\t"
> > +               : "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
> > +               : "g"(0xABCD));
> > +       report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
> > +}
> > +
> > +static int vmenter_exit_handler()
> > +{
> > +       u64 guest_rip;
> > +       ulong reason;
> > +
> > +       guest_rip = vmcs_read(GUEST_RIP);
> > +       reason = vmcs_read(EXI_REASON) & 0xff;
> > +       switch (reason) {
> > +       case VMX_VMCALL:
> > +               if (current->guest_regs.rax != 0xABCD) {
> > +                       report("test vmresume", 0);
> > +                       return VMX_TEST_VMEXIT;
> > +               }
> > +               current->guest_regs.rax = 0xFFFF;
> > +               vmcs_write(GUEST_RIP, guest_rip + 3);
> > +               return VMX_TEST_RESUME;
> > +       default:
> > +               report("test vmresume", 0);
> > +               print_vmexit_info();
> > +       }
> > +       return VMX_TEST_VMEXIT;
> > +}
> > +
> > +
> > +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs
> > +   basic_* just implement some basic functions */
> > +static struct vmx_test vmx_tests[] = {
> > +       { "null", basic_init, basic_guest_main, basic_exit_handler,
> > +               basic_syscall_handler, {0} },
> > +       { "vmenter", basic_init, vmenter_main, vmenter_exit_handler,
> > +               basic_syscall_handler, {0} },
> > +};
> > +
> > +int main(void)
> > +{
> > +       int i;
> > +
> > +       setup_vm();
> > +       setup_idt();
> > +
> > +       if (test_vmx_capability() != 0) {
> > +               printf("ERROR : vmx not supported, check +vmx option\n");
> > +               goto exit;
> > +       }
> > +       init_vmx();
> > +       /* Set basic test ctxt the same as "null" */
> > +       current = &vmx_tests[0];
> > +       if (test_vmxon() != 0)
> > +               goto exit;
> > +       test_vmptrld();
> > +       test_vmclear();
> > +       test_vmptrst();
> > +       init_vmcs(&vmcs_root);
> > +       if (vmx_run()) {
> > +               report("test vmlaunch", 0);
> > +               goto exit;
> > +       }
> > +       test_vmxoff();
> > +
> > +       for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) {
> > +               if (test_run(&vmx_tests[i]))
> > +                       goto exit;
> > +       }
> > +
> > +exit:
> > +       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> > +       return fails ? 1 : 0;
> > +}
> > diff --git a/x86/vmx.h b/x86/vmx.h
> > new file mode 100644
> > index 0000000..1fb9738
> > --- /dev/null
> > +++ b/x86/vmx.h
> > @@ -0,0 +1,466 @@
> > +#ifndef __HYPERVISOR_H
> > +#define __HYPERVISOR_H
> > +
> > +#include "libcflat.h"
> > +
> > +struct vmcs {
> > +       u32 revision_id; /* vmcs revision identifier */
> > +       u32 abort; /* VMX-abort indicator */
> > +       /* VMCS data */
> > +       char data[0];
> > +};
> > +
> > +struct regs {
> > +       u64 rax;
> > +       u64 rcx;
> > +       u64 rdx;
> > +       u64 rbx;
> > +       u64 cr2;
> > +       u64 rbp;
> > +       u64 rsi;
> > +       u64 rdi;
> > +       u64 r8;
> > +       u64 r9;
> > +       u64 r10;
> > +       u64 r11;
> > +       u64 r12;
> > +       u64 r13;
> > +       u64 r14;
> > +       u64 r15;
> > +       u64 rflags;
> > +};
> > +
> > +struct vmx_test {
> > +       const char *name;
> > +       void (*init)(struct vmcs *vmcs);
> > +       void (*guest_main)();
> > +       int (*exit_handler)();
> > +       void (*syscall_handler)(u64 syscall_no);
> > +       struct regs guest_regs;
> > +       struct vmcs *vmcs;
> > +       int exits;
> > +};
> > +
> > +static union vmx_basic {
> > +       u64 val;
> > +       struct {
> > +               u32 revision;
> > +               u32     size:13,
> > +                       : 3,
> > +                       width:1,
> > +                       dual:1,
> > +                       type:4,
> > +                       insouts:1,
> > +                       ctrl:1;
> > +       };
> > +} basic;
> > +
> > +static union vmx_ctrl_pin {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_pin_rev;
> > +
> > +static union vmx_ctrl_cpu {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_cpu_rev[2];
> > +
> > +static union vmx_ctrl_exit {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_exit_rev;
> > +
> > +static union vmx_ctrl_ent {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_enter_rev;
> > +
> > +static union vmx_ept_vpid {
> > +       u64 val;
> > +       struct {
> > +               u32:16,
> > +                       super:2,
> > +                       : 2,
> > +                       invept:1,
> > +                       : 11;
> > +               u32     invvpid:1;
> > +       };
> > +} ept_vpid;
> > +
> > +struct descr {
> > +       u16 limit;
> > +       u64 addr;
> > +};
> > +
> > +enum Encoding {
> > +       /* 16-Bit Control Fields */
> > +       VPID                    = 0x0000ul,
> > +       /* Posted-interrupt notification vector */
> > +       PINV                    = 0x0002ul,
> > +       /* EPTP index */
> > +       EPTP_IDX                = 0x0004ul,
> > +
> > +       /* 16-Bit Guest State Fields */
> > +       GUEST_SEL_ES            = 0x0800ul,
> > +       GUEST_SEL_CS            = 0x0802ul,
> > +       GUEST_SEL_SS            = 0x0804ul,
> > +       GUEST_SEL_DS            = 0x0806ul,
> > +       GUEST_SEL_FS            = 0x0808ul,
> > +       GUEST_SEL_GS            = 0x080aul,
> > +       GUEST_SEL_LDTR          = 0x080cul,
> > +       GUEST_SEL_TR            = 0x080eul,
> > +       GUEST_INT_STATUS        = 0x0810ul,
> > +
> > +       /* 16-Bit Host State Fields */
> > +       HOST_SEL_ES             = 0x0c00ul,
> > +       HOST_SEL_CS             = 0x0c02ul,
> > +       HOST_SEL_SS             = 0x0c04ul,
> > +       HOST_SEL_DS             = 0x0c06ul,
> > +       HOST_SEL_FS             = 0x0c08ul,
> > +       HOST_SEL_GS             = 0x0c0aul,
> > +       HOST_SEL_TR             = 0x0c0cul,
> > +
> > +       /* 64-Bit Control Fields */
> > +       IO_BITMAP_A             = 0x2000ul,
> > +       IO_BITMAP_B             = 0x2002ul,
> > +       MSR_BITMAP              = 0x2004ul,
> > +       EXIT_MSR_ST_ADDR        = 0x2006ul,
> > +       EXIT_MSR_LD_ADDR        = 0x2008ul,
> > +       ENTER_MSR_LD_ADDR       = 0x200aul,
> > +       VMCS_EXEC_PTR           = 0x200cul,
> > +       TSC_OFFSET              = 0x2010ul,
> > +       TSC_OFFSET_HI           = 0x2011ul,
> > +       APIC_VIRT_ADDR          = 0x2012ul,
> > +       APIC_ACCS_ADDR          = 0x2014ul,
> > +       EPTP                    = 0x201aul,
> > +       EPTP_HI                 = 0x201bul,
> > +
> > +       /* 64-Bit Readonly Data Field */
> > +       INFO_PHYS_ADDR          = 0x2400ul,
> > +
> > +       /* 64-Bit Guest State */
> > +       VMCS_LINK_PTR           = 0x2800ul,
> > +       VMCS_LINK_PTR_HI        = 0x2801ul,
> > +       GUEST_DEBUGCTL          = 0x2802ul,
> > +       GUEST_DEBUGCTL_HI       = 0x2803ul,
> > +       GUEST_EFER              = 0x2806ul,
> > +       GUEST_PERF_GLOBAL_CTRL  = 0x2808ul,
> > +       GUEST_PDPTE             = 0x280aul,
> > +
> > +       /* 64-Bit Host State */
> > +       HOST_EFER               = 0x2c02ul,
> > +       HOST_PERF_GLOBAL_CTRL   = 0x2c04ul,
> > +
> > +       /* 32-Bit Control Fields */
> > +       PIN_CONTROLS            = 0x4000ul,
> > +       CPU_EXEC_CTRL0          = 0x4002ul,
> > +       EXC_BITMAP              = 0x4004ul,
> > +       PF_ERROR_MASK           = 0x4006ul,
> > +       PF_ERROR_MATCH          = 0x4008ul,
> > +       CR3_TARGET_COUNT        = 0x400aul,
> > +       EXI_CONTROLS            = 0x400cul,
> > +       EXI_MSR_ST_CNT          = 0x400eul,
> > +       EXI_MSR_LD_CNT          = 0x4010ul,
> > +       ENT_CONTROLS            = 0x4012ul,
> > +       ENT_MSR_LD_CNT          = 0x4014ul,
> > +       ENT_INTR_INFO           = 0x4016ul,
> > +       ENT_INTR_ERROR          = 0x4018ul,
> > +       ENT_INST_LEN            = 0x401aul,
> > +       TPR_THRESHOLD           = 0x401cul,
> > +       CPU_EXEC_CTRL1          = 0x401eul,
> > +
> > +       /* 32-Bit R/O Data Fields */
> > +       VMX_INST_ERROR          = 0x4400ul,
> > +       EXI_REASON              = 0x4402ul,
> > +       EXI_INTR_INFO           = 0x4404ul,
> > +       EXI_INTR_ERROR          = 0x4406ul,
> > +       IDT_VECT_INFO           = 0x4408ul,
> > +       IDT_VECT_ERROR          = 0x440aul,
> > +       EXI_INST_LEN            = 0x440cul,
> > +       EXI_INST_INFO           = 0x440eul,
> > +
> > +       /* 32-Bit Guest State Fields */
> > +       GUEST_LIMIT_ES          = 0x4800ul,
> > +       GUEST_LIMIT_CS          = 0x4802ul,
> > +       GUEST_LIMIT_SS          = 0x4804ul,
> > +       GUEST_LIMIT_DS          = 0x4806ul,
> > +       GUEST_LIMIT_FS          = 0x4808ul,
> > +       GUEST_LIMIT_GS          = 0x480aul,
> > +       GUEST_LIMIT_LDTR        = 0x480cul,
> > +       GUEST_LIMIT_TR          = 0x480eul,
> > +       GUEST_LIMIT_GDTR        = 0x4810ul,
> > +       GUEST_LIMIT_IDTR        = 0x4812ul,
> > +       GUEST_AR_ES             = 0x4814ul,
> > +       GUEST_AR_CS             = 0x4816ul,
> > +       GUEST_AR_SS             = 0x4818ul,
> > +       GUEST_AR_DS             = 0x481aul,
> > +       GUEST_AR_FS             = 0x481cul,
> > +       GUEST_AR_GS             = 0x481eul,
> > +       GUEST_AR_LDTR           = 0x4820ul,
> > +       GUEST_AR_TR             = 0x4822ul,
> > +       GUEST_INTR_STATE        = 0x4824ul,
> > +       GUEST_ACTV_STATE        = 0x4826ul,
> > +       GUEST_SMBASE            = 0x4828ul,
> > +       GUEST_SYSENTER_CS       = 0x482aul,
> > +
> > +       /* 32-Bit Host State Fields */
> > +       HOST_SYSENTER_CS        = 0x4c00ul,
> > +
> > +       /* Natural-Width Control Fields */
> > +       CR0_MASK                = 0x6000ul,
> > +       CR4_MASK                = 0x6002ul,
> > +       CR0_READ_SHADOW = 0x6004ul,
> > +       CR4_READ_SHADOW = 0x6006ul,
> > +       CR3_TARGET_0            = 0x6008ul,
> > +       CR3_TARGET_1            = 0x600aul,
> > +       CR3_TARGET_2            = 0x600cul,
> > +       CR3_TARGET_3            = 0x600eul,
> > +
> > +       /* Natural-Width R/O Data Fields */
> > +       EXI_QUALIFICATION       = 0x6400ul,
> > +       IO_RCX                  = 0x6402ul,
> > +       IO_RSI                  = 0x6404ul,
> > +       IO_RDI                  = 0x6406ul,
> > +       IO_RIP                  = 0x6408ul,
> > +       GUEST_LINEAR_ADDRESS    = 0x640aul,
> > +
> > +       /* Natural-Width Guest State Fields */
> > +       GUEST_CR0               = 0x6800ul,
> > +       GUEST_CR3               = 0x6802ul,
> > +       GUEST_CR4               = 0x6804ul,
> > +       GUEST_BASE_ES           = 0x6806ul,
> > +       GUEST_BASE_CS           = 0x6808ul,
> > +       GUEST_BASE_SS           = 0x680aul,
> > +       GUEST_BASE_DS           = 0x680cul,
> > +       GUEST_BASE_FS           = 0x680eul,
> > +       GUEST_BASE_GS           = 0x6810ul,
> > +       GUEST_BASE_LDTR         = 0x6812ul,
> > +       GUEST_BASE_TR           = 0x6814ul,
> > +       GUEST_BASE_GDTR         = 0x6816ul,
> > +       GUEST_BASE_IDTR         = 0x6818ul,
> > +       GUEST_DR7               = 0x681aul,
> > +       GUEST_RSP               = 0x681cul,
> > +       GUEST_RIP               = 0x681eul,
> > +       GUEST_RFLAGS            = 0x6820ul,
> > +       GUEST_PENDING_DEBUG     = 0x6822ul,
> > +       GUEST_SYSENTER_ESP      = 0x6824ul,
> > +       GUEST_SYSENTER_EIP      = 0x6826ul,
> > +
> > +       /* Natural-Width Host State Fields */
> > +       HOST_CR0                = 0x6c00ul,
> > +       HOST_CR3                = 0x6c02ul,
> > +       HOST_CR4                = 0x6c04ul,
> > +       HOST_BASE_FS            = 0x6c06ul,
> > +       HOST_BASE_GS            = 0x6c08ul,
> > +       HOST_BASE_TR            = 0x6c0aul,
> > +       HOST_BASE_GDTR          = 0x6c0cul,
> > +       HOST_BASE_IDTR          = 0x6c0eul,
> > +       HOST_SYSENTER_ESP       = 0x6c10ul,
> > +       HOST_SYSENTER_EIP       = 0x6c12ul,
> > +       HOST_RSP                = 0x6c14ul,
> > +       HOST_RIP                = 0x6c16ul
> > +};
> > +
> > +enum Reason {
> > +       VMX_EXC_NMI             = 0,
> > +       VMX_EXTINT              = 1,
> > +       VMX_TRIPLE_FAULT        = 2,
> > +       VMX_INIT                = 3,
> > +       VMX_SIPI                = 4,
> > +       VMX_SMI_IO              = 5,
> > +       VMX_SMI_OTHER           = 6,
> > +       VMX_INTR_WINDOW         = 7,
> > +       VMX_NMI_WINDOW          = 8,
> > +       VMX_TASK_SWITCH         = 9,
> > +       VMX_CPUID               = 10,
> > +       VMX_GETSEC              = 11,
> > +       VMX_HLT                 = 12,
> > +       VMX_INVD                = 13,
> > +       VMX_INVLPG              = 14,
> > +       VMX_RDPMC               = 15,
> > +       VMX_RDTSC               = 16,
> > +       VMX_RSM                 = 17,
> > +       VMX_VMCALL              = 18,
> > +       VMX_VMCLEAR             = 19,
> > +       VMX_VMLAUNCH            = 20,
> > +       VMX_VMPTRLD             = 21,
> > +       VMX_VMPTRST             = 22,
> > +       VMX_VMREAD              = 23,
> > +       VMX_VMRESUME            = 24,
> > +       VMX_VMWRITE             = 25,
> > +       VMX_VMXOFF              = 26,
> > +       VMX_VMXON               = 27,
> > +       VMX_CR                  = 28,
> > +       VMX_DR                  = 29,
> > +       VMX_IO                  = 30,
> > +       VMX_RDMSR               = 31,
> > +       VMX_WRMSR               = 32,
> > +       VMX_FAIL_STATE          = 33,
> > +       VMX_FAIL_MSR            = 34,
> > +       VMX_MWAIT               = 36,
> > +       VMX_MTF                 = 37,
> > +       VMX_MONITOR             = 39,
> > +       VMX_PAUSE               = 40,
> > +       VMX_FAIL_MCHECK         = 41,
> > +       VMX_TPR_THRESHOLD       = 43,
> > +       VMX_APIC_ACCESS         = 44,
> > +       VMX_GDTR_IDTR           = 46,
> > +       VMX_LDTR_TR             = 47,
> > +       VMX_EPT_VIOLATION       = 48,
> > +       VMX_EPT_MISCONFIG       = 49,
> > +       VMX_INVEPT              = 50,
> > +       VMX_PREEMPT             = 52,
> > +       VMX_INVVPID             = 53,
> > +       VMX_WBINVD              = 54,
> > +       VMX_XSETBV              = 55
> > +};
> > +
> > +#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
> > +#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
> > +
> > +enum Ctrl_exi {
> > +       EXI_HOST_64             = 1UL << 9,
> > +       EXI_LOAD_PERF           = 1UL << 12,
> > +       EXI_INTA                = 1UL << 15,
> > +       EXI_LOAD_EFER           = 1UL << 21,
> > +};
> > +
> > +enum Ctrl_ent {
> > +       ENT_GUEST_64            = 1UL << 9,
> > +       ENT_LOAD_EFER           = 1UL << 15,
> > +};
> > +
> > +enum Ctrl_pin {
> > +       PIN_EXTINT              = 1ul << 0,
> > +       PIN_NMI                 = 1ul << 3,
> > +       PIN_VIRT_NMI            = 1ul << 5,
> > +};
> > +
> > +enum Ctrl0 {
> > +       CPU_INTR_WINDOW         = 1ul << 2,
> > +       CPU_HLT                 = 1ul << 7,
> > +       CPU_INVLPG              = 1ul << 9,
> > +       CPU_CR3_LOAD            = 1ul << 15,
> > +       CPU_CR3_STORE           = 1ul << 16,
> > +       CPU_TPR_SHADOW          = 1ul << 21,
> > +       CPU_NMI_WINDOW          = 1ul << 22,
> > +       CPU_IO                  = 1ul << 24,
> > +       CPU_IO_BITMAP           = 1ul << 25,
> > +       CPU_SECONDARY           = 1ul << 31,
> > +};
> > +
> > +enum Ctrl1 {
> > +       CPU_EPT                 = 1ul << 1,
> > +       CPU_VPID                = 1ul << 5,
> > +       CPU_URG                 = 1ul << 7,
> > +};
> > +
> > +#define SAVE_GPR                               \
> > +       "xchg %rax, regs\n\t"                   \
> > +       "xchg %rbx, regs+0x8\n\t"               \
> > +       "xchg %rcx, regs+0x10\n\t"              \
> > +       "xchg %rdx, regs+0x18\n\t"              \
> > +       "xchg %rbp, regs+0x28\n\t"              \
> > +       "xchg %rsi, regs+0x30\n\t"              \
> > +       "xchg %rdi, regs+0x38\n\t"              \
> > +       "xchg %r8, regs+0x40\n\t"               \
> > +       "xchg %r9, regs+0x48\n\t"               \
> > +       "xchg %r10, regs+0x50\n\t"              \
> > +       "xchg %r11, regs+0x58\n\t"              \
> > +       "xchg %r12, regs+0x60\n\t"              \
> > +       "xchg %r13, regs+0x68\n\t"              \
> > +       "xchg %r14, regs+0x70\n\t"              \
> > +       "xchg %r15, regs+0x78\n\t"
> > +
> > +#define LOAD_GPR       SAVE_GPR
> > +
> > +#define SAVE_GPR_C                             \
> > +       "xchg %%rax, regs\n\t"                  \
> > +       "xchg %%rbx, regs+0x8\n\t"              \
> > +       "xchg %%rcx, regs+0x10\n\t"             \
> > +       "xchg %%rdx, regs+0x18\n\t"             \
> > +       "xchg %%rbp, regs+0x28\n\t"             \
> > +       "xchg %%rsi, regs+0x30\n\t"             \
> > +       "xchg %%rdi, regs+0x38\n\t"             \
> > +       "xchg %%r8, regs+0x40\n\t"              \
> > +       "xchg %%r9, regs+0x48\n\t"              \
> > +       "xchg %%r10, regs+0x50\n\t"             \
> > +       "xchg %%r11, regs+0x58\n\t"             \
> > +       "xchg %%r12, regs+0x60\n\t"             \
> > +       "xchg %%r13, regs+0x68\n\t"             \
> > +       "xchg %%r14, regs+0x70\n\t"             \
> > +       "xchg %%r15, regs+0x78\n\t"
> > +
> > +#define LOAD_GPR_C     SAVE_GPR_C
> > +
> > +#define SAVE_RFLAGS            \
> > +       "pushf\n\t"                     \
> > +       "pop regs+0x80\n\t"
> > +
> > +#define LOAD_RFLAGS            \
> > +       "push regs+0x80\n\t"    \
> > +       "popf\n\t"
> > +
> > +#define VMX_IO_SIZE_MASK               0x7
> > +#define _VMX_IO_BYTE                   1
> > +#define _VMX_IO_WORD                   2
> > +#define _VMX_IO_LONG                   3
> > +#define VMX_IO_DIRECTION_MASK          (1ul << 3)
> > +#define VMX_IO_IN                      (1ul << 3)
> > +#define VMX_IO_OUT                     0
> > +#define VMX_IO_STRING                  (1ul << 4)
> > +#define VMX_IO_REP                     (1ul << 5)
> > +#define VMX_IO_OPRAND_DX               (1ul << 6)
> > +#define VMX_IO_PORT_MASK               0xFFFF0000
> > +#define VMX_IO_PORT_SHIFT              16
> > +
> > +#define VMX_TEST_VMEXIT                        1
> > +#define VMX_TEST_EXIT                  2
> > +#define VMX_TEST_RESUME                        3
> > +#define VMX_TEST_LAUNCH_ERR            4
> > +#define VMX_TEST_RESUME_ERR            5
> > +
> > +#define HYPERCALL_BIT          (1ul << 12)
> > +#define HYPERCALL_MASK         0xFFF
> > +#define HYPERCALL_VMEXIT       0x1
> > +
> > +static inline int vmcs_clear(struct vmcs *vmcs)
> > +{
> > +       bool ret;
> > +       asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
> > +       return ret;
> > +}
> > +
> > +static inline u64 vmcs_read(enum Encoding enc)
> > +{
> > +       u64 val;
> > +       asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
> > +       return val;
> > +}
> > +
> > +static inline int vmcs_write(enum Encoding enc, u64 val)
> > +{
> > +       bool ret;
> > +       asm volatile ("vmwrite %1, %2; setbe %0"
> > +               : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
> > +       return ret;
> > +}
> > +
> > +static inline int vmcs_save(struct vmcs **vmcs)
> > +{
> > +       bool ret;
> > +
> > +       asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
> > +       return ret;
> > +}
> > +
> > +#endif
> > +
> > --
> > 1.7.9.5
> >

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marius Vlad July 30, 2013, 7:31 a.m. UTC | #4
Hi Arthur, 

   I'm trying to test your patch on a SandyBridge machine.

   Used 'nested=1' when loading kvm (from 3.9-1-amd64)
   and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)).

   Without nested=1 I get ``unhandled excecption 13'', so I presume
   this is OK, with it, the test_vmx_capability() fails at
   detecting vmx.

   I've used for qemu:
   $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device
   isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device
   pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1

   Are there any knobs which I should further tune?

   Thanks,


On Sun, Jul 28, 2013 at 04:24:34PM +0200, Arthur Chunqi Li wrote:
> Hi Gleb,
> 
> It suddenly occured to me that this patch also fails to handle
> GUEST_RFLAGS when VMRESUME.
> 
> I decide to remove rflags in struct regs since rflags can be read and
> set via vmcs_read/vmcs_write in test suited defined functions (init
> and exit_handler), and other general registers can only be set in the
> framework code.
> 
> Then I will wait for Paolo and Gleb's furthur feedback and commit the
> final patch.
> 
> Arthur
> 
> On Sun, Jul 28, 2013 at 10:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> > This is the first version of VMX nested environment. It contains the
> > basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/
> > VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the
> > basic execution routine in VMX nested environment andlet the VM print
> > "Hello World" to inform its successfully run.
> >
> > The first release also includes a test suite for vmenter (vmlaunch and
> > vmresume). Besides, hypercall mechanism is included and currently it is
> > used to invoke VM normal exit.
> >
> > New files added:
> > x86/vmx.h : contains all VMX related macro declerations
> > x86/vmx.c : main file for VMX nested test case
> >
> > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> > ---
> > ChangeLog:
> > 1. Refine codes in function vmx_run()
> > 2. Fix bug of setting GUEST_RFLAGS
> > 3. Move defines of selectors to lib/x86/vm.h
> > 4. Move CR0/4 defines to lib/x86/vm.h, so as some defines in lib/x86/vm.c
> > 5. Move some inline functions to lib/x86/processor.h
> > 6. Move some inline functions (vmcs related) to x86/vmx.h
> > ---
> >  config-x86-common.mak |    2 +
> >  config-x86_64.mak     |    1 +
> >  lib/x86/msr.h         |    5 +
> >  lib/x86/processor.h   |   15 ++
> >  lib/x86/vm.c          |    4 -
> >  lib/x86/vm.h          |   21 ++
> >  x86/cstart64.S        |    4 +
> >  x86/unittests.cfg     |    6 +
> >  x86/vmx.c             |  674 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  x86/vmx.h             |  466 ++++++++++++++++++++++++++++++++++
> >  10 files changed, 1194 insertions(+), 4 deletions(-)
> >  create mode 100644 x86/vmx.c
> >  create mode 100644 x86/vmx.h
> >
> > diff --git a/config-x86-common.mak b/config-x86-common.mak
> > index 455032b..34a41e1 100644
> > --- a/config-x86-common.mak
> > +++ b/config-x86-common.mak
> > @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
> >
> >  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
> >
> > +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
> > +
> >  arch_clean:
> >         $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
> >         $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
> > diff --git a/config-x86_64.mak b/config-x86_64.mak
> > index 4e525f5..bb8ee89 100644
> > --- a/config-x86_64.mak
> > +++ b/config-x86_64.mak
> > @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
> >           $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
> >           $(TEST_DIR)/pcid.flat
> >  tests += $(TEST_DIR)/svm.flat
> > +tests += $(TEST_DIR)/vmx.flat
> >
> >  include config-x86-common.mak
> > diff --git a/lib/x86/msr.h b/lib/x86/msr.h
> > index 509a421..281255a 100644
> > --- a/lib/x86/msr.h
> > +++ b/lib/x86/msr.h
> > @@ -396,6 +396,11 @@
> >  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
> >  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
> >  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
> > +#define MSR_IA32_VMX_TRUE_PIN          0x0000048d
> > +#define MSR_IA32_VMX_TRUE_PROC         0x0000048e
> > +#define MSR_IA32_VMX_TRUE_EXIT         0x0000048f
> > +#define MSR_IA32_VMX_TRUE_ENTRY                0x00000490
> > +
> >
> >  /* AMD-V MSRs */
> >
> > diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> > index e46d8d0..f0c11cc 100644
> > --- a/lib/x86/processor.h
> > +++ b/lib/x86/processor.h
> > @@ -307,4 +307,19 @@ static inline void safe_halt(void)
> >  {
> >         asm volatile("sti; hlt");
> >  }
> > +
> > +#ifdef __x86_64__
> > +static inline u64 read_rflags(void)
> > +{
> > +       u64 r;
> > +       asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
> > +       return r;
> > +}
> > +
> > +static inline void write_rflags(u64 r)
> > +{
> > +       asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
> > +}
> > +#endif
> > +
> >  #endif
> > diff --git a/lib/x86/vm.c b/lib/x86/vm.c
> > index 260ec45..188bf57 100644
> > --- a/lib/x86/vm.c
> > +++ b/lib/x86/vm.c
> > @@ -9,10 +9,6 @@
> >  #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE)
> >  #endif
> >
> > -#define X86_CR0_PE      0x00000001
> > -#define X86_CR0_WP      0x00010000
> > -#define X86_CR0_PG      0x80000000
> > -#define X86_CR4_PSE     0x00000010
> >  static void *free = 0;
> >  static void *vfree_top = 0;
> >
> > diff --git a/lib/x86/vm.h b/lib/x86/vm.h
> > index 0b5b5c7..eff6f72 100644
> > --- a/lib/x86/vm.h
> > +++ b/lib/x86/vm.h
> > @@ -16,6 +16,27 @@
> >  #define PTE_USER    (1ull << 2)
> >  #define PTE_ADDR    (0xffffffffff000ull)
> >
> > +#define X86_CR0_PE      0x00000001
> > +#define X86_CR0_WP      0x00010000
> > +#define X86_CR0_PG      0x80000000
> > +#define X86_CR4_VMXE   0x00000001
> > +#define X86_CR4_PSE     0x00000010
> > +#define X86_CR4_PAE     0x00000020
> > +#define X86_CR4_PCIDE  0x00020000
> > +
> > +#ifdef __x86_64__
> > +#define SEL_NULL_DESC          0x0
> > +#define SEL_KERN_CODE_64       0x8
> > +#define SEL_KERN_DATA_64       0x10
> > +#define SEL_USER_CODE_64       0x18
> > +#define SEL_USER_DATA_64       0x20
> > +#define SEL_CODE_32            0x28
> > +#define SEL_DATA_32            0x30
> > +#define SEL_CODE_16            0x38
> > +#define SEL_DATA_16            0x40
> > +#define SEL_TSS_RUN            0x48
> > +#endif
> > +
> >  void setup_vm();
> >
> >  void *vmalloc(unsigned long size);
> > diff --git a/x86/cstart64.S b/x86/cstart64.S
> > index 24df5f8..0fe76da 100644
> > --- a/x86/cstart64.S
> > +++ b/x86/cstart64.S
> > @@ -4,6 +4,10 @@
> >  .globl boot_idt
> >  boot_idt = 0
> >
> > +.globl idt_descr
> > +.globl tss_descr
> > +.globl gdt64_desc
> > +
> >  ipi_vector = 0x20
> >
> >  max_cpus = 64
> > diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> > index bc9643e..85c36aa 100644
> > --- a/x86/unittests.cfg
> > +++ b/x86/unittests.cfg
> > @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
> >  file = pcid.flat
> >  extra_params = -cpu qemu64,+pcid
> >  arch = x86_64
> > +
> > +[vmx]
> > +file = vmx.flat
> > +extra_params = -cpu host,+vmx
> > +arch = x86_64
> > +
> > diff --git a/x86/vmx.c b/x86/vmx.c
> > new file mode 100644
> > index 0000000..7467927
> > --- /dev/null
> > +++ b/x86/vmx.c
> > @@ -0,0 +1,674 @@
> > +#include "libcflat.h"
> > +#include "processor.h"
> > +#include "vm.h"
> > +#include "desc.h"
> > +#include "vmx.h"
> > +#include "msr.h"
> > +#include "smp.h"
> > +#include "io.h"
> > +
> > +int fails = 0, tests = 0;
> > +u32 *vmxon_region;
> > +struct vmcs *vmcs_root;
> > +u32 vpid_cnt;
> > +void *guest_stack, *guest_syscall_stack;
> > +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
> > +ulong fix_cr0_set, fix_cr0_clr;
> > +ulong fix_cr4_set, fix_cr4_clr;
> > +struct regs regs;
> > +struct vmx_test *current;
> > +u64 hypercall_field = 0;
> > +bool launched;
> > +
> > +extern u64 gdt64_desc[];
> > +extern u64 idt_descr[];
> > +extern u64 tss_descr[];
> > +extern void *vmx_return;
> > +extern void *entry_sysenter;
> > +extern void *guest_entry;
> > +
> > +static void report(const char *name, int result)
> > +{
> > +       ++tests;
> > +       if (result)
> > +               printf("PASS: %s\n", name);
> > +       else {
> > +               printf("FAIL: %s\n", name);
> > +               ++fails;
> > +       }
> > +}
> > +
> > +static int make_vmcs_current(struct vmcs *vmcs)
> > +{
> > +       bool ret;
> > +
> > +       asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
> > +       return ret;
> > +}
> > +
> > +/* entry_sysenter */
> > +asm(
> > +       ".align 4, 0x90\n\t"
> > +       ".globl entry_sysenter\n\t"
> > +       "entry_sysenter:\n\t"
> > +       SAVE_GPR
> > +       "       and     $0xf, %rax\n\t"
> > +       "       mov     %rax, %rdi\n\t"
> > +       "       call    syscall_handler\n\t"
> > +       LOAD_GPR
> > +       "       vmresume\n\t"
> > +);
> > +
> > +static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
> > +{
> > +       current->syscall_handler(syscall_no);
> > +}
> > +
> > +static inline int vmx_on()
> > +{
> > +       bool ret;
> > +       asm volatile ("vmxon %1; setbe %0\n\t"
> > +               : "=q"(ret) : "m"(vmxon_region) : "cc");
> > +       return ret;
> > +}
> > +
> > +static inline int vmx_off()
> > +{
> > +       bool ret;
> > +       asm volatile("vmxoff; setbe %0\n\t"
> > +               : "=q"(ret) : : "cc");
> > +       return ret;
> > +}
> > +
> > +static void print_vmexit_info()
> > +{
> > +       u64 guest_rip, guest_rsp;
> > +       ulong reason = vmcs_read(EXI_REASON) & 0xff;
> > +       ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
> > +       guest_rip = vmcs_read(GUEST_RIP);
> > +       guest_rsp = vmcs_read(GUEST_RSP);
> > +       printf("VMEXIT info:\n");
> > +       printf("\tvmexit reason = %d\n", reason);
> > +       printf("\texit qualification = 0x%x\n", exit_qual);
> > +       printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
> > +       printf("\tguest_rip = 0x%llx\n", guest_rip);
> > +       printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
> > +               regs.rax, regs.rbx, regs.rcx, regs.rdx);
> > +       printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
> > +               guest_rsp, regs.rbp, regs.rsi, regs.rdi);
> > +       printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
> > +               regs.r8, regs.r9, regs.r10, regs.r11);
> > +       printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
> > +               regs.r12, regs.r13, regs.r14, regs.r15);
> > +}
> > +
> > +static void test_vmclear(void)
> > +{
> > +       u64 rflags;
> > +
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       report("test vmclear", vmcs_clear(vmcs_root) == 0);
> > +}
> > +
> > +static void test_vmxoff(void)
> > +{
> > +       int ret;
> > +       u64 rflags;
> > +
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       ret = vmx_off();
> > +       report("test vmxoff", !ret);
> > +}
> > +
> > +static void __attribute__((__used__)) guest_main(void)
> > +{
> > +       current->guest_main();
> > +}
> > +
> > +/* guest_entry */
> > +asm(
> > +       ".align 4, 0x90\n\t"
> > +       ".globl entry_guest\n\t"
> > +       "guest_entry:\n\t"
> > +       "       call guest_main\n\t"
> > +       "       mov $1, %edi\n\t"
> > +       "       call hypercall\n\t"
> > +);
> > +
> > +static void init_vmcs_ctrl(void)
> > +{
> > +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> > +       /* 26.2.1.1 */
> > +       vmcs_write(PIN_CONTROLS, ctrl_pin);
> > +       /* Disable VMEXIT of IO instruction */
> > +       vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
> > +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
> > +               ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
> > +               vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
> > +       }
> > +       vmcs_write(CR3_TARGET_COUNT, 0);
> > +       vmcs_write(VPID, ++vpid_cnt);
> > +}
> > +
> > +static void init_vmcs_host(void)
> > +{
> > +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> > +       /* 26.2.1.2 */
> > +       vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
> > +
> > +       /* 26.2.1.3 */
> > +       vmcs_write(ENT_CONTROLS, ctrl_enter);
> > +       vmcs_write(EXI_CONTROLS, ctrl_exit);
> > +
> > +       /* 26.2.2 */
> > +       vmcs_write(HOST_CR0, read_cr0());
> > +       vmcs_write(HOST_CR3, read_cr3());
> > +       vmcs_write(HOST_CR4, read_cr4());
> > +       vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
> > +       vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
> > +
> > +       /* 26.2.3 */
> > +       vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
> > +       vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
> > +       vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
> > +       vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
> > +       vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
> > +       vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
> > +       vmcs_write(HOST_BASE_FS, 0);
> > +       vmcs_write(HOST_BASE_GS, 0);
> > +
> > +       /* Set other vmcs area */
> > +       vmcs_write(PF_ERROR_MASK, 0);
> > +       vmcs_write(PF_ERROR_MATCH, 0);
> > +       vmcs_write(VMCS_LINK_PTR, ~0ul);
> > +       vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
> > +       vmcs_write(HOST_RIP, (u64)(&vmx_return));
> > +}
> > +
> > +static void init_vmcs_guest(void)
> > +{
> > +       /* 26.3 CHECKING AND LOADING GUEST STATE */
> > +       ulong guest_cr0, guest_cr4, guest_cr3;
> > +       /* 26.3.1.1 */
> > +       guest_cr0 = read_cr0();
> > +       guest_cr4 = read_cr4();
> > +       guest_cr3 = read_cr3();
> > +       if (ctrl_enter & ENT_GUEST_64) {
> > +               guest_cr0 |= X86_CR0_PG;
> > +               guest_cr4 |= X86_CR4_PAE;
> > +       }
> > +       if ((ctrl_enter & ENT_GUEST_64) == 0)
> > +               guest_cr4 &= (~X86_CR4_PCIDE);
> > +       if (guest_cr0 & X86_CR0_PG)
> > +               guest_cr0 |= X86_CR0_PE;
> > +       vmcs_write(GUEST_CR0, guest_cr0);
> > +       vmcs_write(GUEST_CR3, guest_cr3);
> > +       vmcs_write(GUEST_CR4, guest_cr4);
> > +       vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
> > +       vmcs_write(GUEST_SYSENTER_ESP,
> > +               (u64)(guest_syscall_stack + PAGE_SIZE - 1));
> > +       vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
> > +       vmcs_write(GUEST_DR7, 0);
> > +       vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
> > +
> > +       /* 26.3.1.2 */
> > +       vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
> > +       vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
> > +       vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
> > +       vmcs_write(GUEST_SEL_LDTR, 0);
> > +
> > +       vmcs_write(GUEST_BASE_CS, 0);
> > +       vmcs_write(GUEST_BASE_ES, 0);
> > +       vmcs_write(GUEST_BASE_SS, 0);
> > +       vmcs_write(GUEST_BASE_DS, 0);
> > +       vmcs_write(GUEST_BASE_FS, 0);
> > +       vmcs_write(GUEST_BASE_GS, 0);
> > +       vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
> > +       vmcs_write(GUEST_BASE_LDTR, 0);
> > +
> > +       vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
> > +       vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
> > +       vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
> > +
> > +       vmcs_write(GUEST_AR_CS, 0xa09b);
> > +       vmcs_write(GUEST_AR_DS, 0xc093);
> > +       vmcs_write(GUEST_AR_ES, 0xc093);
> > +       vmcs_write(GUEST_AR_FS, 0xc093);
> > +       vmcs_write(GUEST_AR_GS, 0xc093);
> > +       vmcs_write(GUEST_AR_SS, 0xc093);
> > +       vmcs_write(GUEST_AR_LDTR, 0x82);
> > +       vmcs_write(GUEST_AR_TR, 0x8b);
> > +
> > +       /* 26.3.1.3 */
> > +       vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
> > +       vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
> > +       vmcs_write(GUEST_LIMIT_GDTR,
> > +               ((struct descr *)gdt64_desc)->limit & 0xffff);
> > +       vmcs_write(GUEST_LIMIT_IDTR,
> > +               ((struct descr *)idt_descr)->limit & 0xffff);
> > +
> > +       /* 26.3.1.4 */
> > +       vmcs_write(GUEST_RIP, (u64)(&guest_entry));
> > +       vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
> > +       vmcs_write(GUEST_RFLAGS, 0x2);
> > +
> > +       /* 26.3.1.5 */
> > +       vmcs_write(GUEST_ACTV_STATE, 0);
> > +       vmcs_write(GUEST_INTR_STATE, 0);
> > +}
> > +
> > +static int init_vmcs(struct vmcs **vmcs)
> > +{
> > +       *vmcs = alloc_page();
> > +       memset(*vmcs, 0, PAGE_SIZE);
> > +       (*vmcs)->revision_id = basic.revision;
> > +       /* vmclear first to init vmcs */
> > +       if (vmcs_clear(*vmcs)) {
> > +               printf("%s : vmcs_clear error\n", __func__);
> > +               return 1;
> > +       }
> > +
> > +       if (make_vmcs_current(*vmcs)) {
> > +               printf("%s : make_vmcs_current error\n", __func__);
> > +               return 1;
> > +       }
> > +
> > +       /* All settings to pin/exit/enter/cpu
> > +          control fields should be placed here */
> > +       ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
> > +       ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
> > +       ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
> > +       ctrl_cpu[0] |= CPU_HLT;
> > +       /* DIsable IO instruction VMEXIT now */
> > +       ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
> > +       ctrl_cpu[1] = 0;
> > +
> > +       ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
> > +       ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
> > +       ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
> > +       ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
> > +
> > +       init_vmcs_ctrl();
> > +       init_vmcs_host();
> > +       init_vmcs_guest();
> > +       return 0;
> > +}
> > +
> > +static void init_vmx(void)
> > +{
> > +       vmxon_region = alloc_page();
> > +       memset(vmxon_region, 0, PAGE_SIZE);
> > +
> > +       fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
> > +       fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
> > +       fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
> > +       fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
> > +       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> > +       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
> > +                       : MSR_IA32_VMX_PINBASED_CTLS);
> > +       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
> > +                       : MSR_IA32_VMX_EXIT_CTLS);
> > +       ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
> > +                       : MSR_IA32_VMX_ENTRY_CTLS);
> > +       ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
> > +                       : MSR_IA32_VMX_PROCBASED_CTLS);
> > +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
> > +               ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> > +       if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
> > +               ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
> > +
> > +       write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
> > +       write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
> > +
> > +       *vmxon_region = basic.revision;
> > +
> > +       guest_stack = alloc_page();
> > +       memset(guest_stack, 0, PAGE_SIZE);
> > +       guest_syscall_stack = alloc_page();
> > +       memset(guest_syscall_stack, 0, PAGE_SIZE);
> > +}
> > +
> > +static int test_vmx_capability(void)
> > +{
> > +       struct cpuid r;
> > +       u64 ret1, ret2;
> > +       u64 ia32_feature_control;
> > +       r = cpuid(1);
> > +       ret1 = ((r.c) >> 5) & 1;
> > +       ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
> > +       ret2 = ((ia32_feature_control & 0x5) == 0x5);
> > +       if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) {
> > +               wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
> > +               ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
> > +               ret2 = ((ia32_feature_control & 0x5) == 0x5);
> > +       }
> > +       report("test vmx capability", ret1 & ret2);
> > +       return !(ret1 & ret2);
> > +}
> > +
> > +static int test_vmxon(void)
> > +{
> > +       int ret;
> > +       u64 rflags;
> > +
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       ret = vmx_on();
> > +       report("test vmxon", !ret);
> > +       return ret;
> > +}
> > +
> > +static void test_vmptrld(void)
> > +{
> > +       u64 rflags;
> > +       struct vmcs *vmcs;
> > +
> > +       vmcs = alloc_page();
> > +       vmcs->revision_id = basic.revision;
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       report("test vmptrld", make_vmcs_current(vmcs) == 0);
> > +}
> > +
> > +static void test_vmptrst(void)
> > +{
> > +       u64 rflags;
> > +       int ret;
> > +       struct vmcs *vmcs1, *vmcs2;
> > +
> > +       vmcs1 = alloc_page();
> > +       memset(vmcs1, 0, PAGE_SIZE);
> > +       init_vmcs(&vmcs1);
> > +       rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> > +       write_rflags(rflags);
> > +       ret = vmcs_save(&vmcs2);
> > +       report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
> > +}
> > +
> > +/* This function can only be called in guest */
> > +static void __attribute__((__used__)) hypercall(u32 hypercall_no)
> > +{
> > +       u64 val = 0;
> > +       val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
> > +       hypercall_field = val;
> > +       asm volatile("vmcall\n\t");
> > +}
> > +
> > +static bool is_hypercall()
> > +{
> > +       ulong reason, hyper_bit;
> > +
> > +       reason = vmcs_read(EXI_REASON) & 0xff;
> > +       hyper_bit = hypercall_field & HYPERCALL_BIT;
> > +       if (reason == VMX_VMCALL && hyper_bit)
> > +               return true;
> > +       return false;
> > +}
> > +
> > +static int handle_hypercall()
> > +{
> > +       ulong hypercall_no;
> > +
> > +       hypercall_no = hypercall_field & HYPERCALL_MASK;
> > +       hypercall_field = 0;
> > +       switch (hypercall_no) {
> > +       case HYPERCALL_VMEXIT:
> > +               return VMX_TEST_VMEXIT;
> > +       default:
> > +               printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
> > +       }
> > +       return VMX_TEST_EXIT;
> > +}
> > +
> > +static int exit_handler()
> > +{
> > +       int ret;
> > +
> > +       current->exits++;
> > +       current->guest_regs = regs;
> > +       if (is_hypercall())
> > +               ret = handle_hypercall();
> > +       else
> > +               ret = current->exit_handler();
> > +       regs = current->guest_regs;
> > +       switch (ret) {
> > +       case VMX_TEST_VMEXIT:
> > +       case VMX_TEST_RESUME:
> > +               return ret;
> > +       case VMX_TEST_EXIT:
> > +               break;
> > +       default:
> > +               printf("ERROR : Invalid exit_handler return val %d.\n"
> > +                       , ret);
> > +       }
> > +       print_vmexit_info();
> > +       exit(-1);
> > +       return 0;
> > +}
> > +
> > +static int vmx_run()
> > +{
> > +       u32 ret = 0, fail = 0;
> > +
> > +       while (1) {
> > +               asm volatile (
> > +                       "mov %%rsp, %%rsi\n\t"
> > +                       "mov %2, %%rdi\n\t"
> > +                       "vmwrite %%rsi, %%rdi\n\t"
> > +
> > +                       LOAD_GPR_C
> > +                       "cmpl $0, %1\n\t"
> > +                       "jne 1f\n\t"
> > +                       LOAD_RFLAGS
> > +                       "vmlaunch\n\t"
> > +                       "jmp 2f\n\t"
> > +                       "1: "
> > +                       "vmresume\n\t"
> > +                       "2: "
> > +                       "setbe %0\n\t"
> > +                       "vmx_return:\n\t"
> > +                       SAVE_GPR_C
> > +                       SAVE_RFLAGS
> > +                       : "=m"(fail)
> > +                       : "m"(launched), "i"(HOST_RSP)
> > +                       : "rdi", "rsi", "memory", "cc"
> > +
> > +               );
> > +               if (fail)
> > +                       ret = launched ? VMX_TEST_RESUME_ERR :
> > +                               VMX_TEST_LAUNCH_ERR;
> > +               else {
> > +                       launched = 1;
> > +                       ret = exit_handler();
> > +               }
> > +               if (ret != VMX_TEST_RESUME)
> > +                       break;
> > +       }
> > +       launched = 0;
> > +       switch (ret) {
> > +       case VMX_TEST_VMEXIT:
> > +               return 0;
> > +       case VMX_TEST_LAUNCH_ERR:
> > +               printf("%s : vmlaunch failed.\n", __func__);
> > +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
> > +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
> > +                       printf("\tvmlaunch set wrong flags\n");
> > +               report("test vmlaunch", 0);
> > +               break;
> > +       case VMX_TEST_RESUME_ERR:
> > +               printf("%s : vmresume failed.\n", __func__);
> > +               if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
> > +                       || ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
> > +                       printf("\tvmresume set wrong flags\n");
> > +               report("test vmresume", 0);
> > +               break;
> > +       default:
> > +               printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
> > +               break;
> > +       }
> > +       return 1;
> > +}
> > +
> > +static int test_run(struct vmx_test *test)
> > +{
> > +       if (test->name == NULL)
> > +               test->name = "(no name)";
> > +       if (vmx_on()) {
> > +               printf("%s : vmxon failed.\n", __func__);
> > +               return 1;
> > +       }
> > +       init_vmcs(&(test->vmcs));
> > +       /* Directly call test->init is ok here, init_vmcs has done
> > +          vmcs init, vmclear and vmptrld*/
> > +       if (test->init)
> > +               test->init(test->vmcs);
> > +       test->exits = 0;
> > +       current = test;
> > +       regs = test->guest_regs;
> > +       vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
> > +       launched = 0;
> > +       printf("\nTest suite : %s\n", test->name);
> > +       vmx_run();
> > +       if (vmx_off()) {
> > +               printf("%s : vmxoff failed.\n", __func__);
> > +               return 1;
> > +       }
> > +       return 0;
> > +}
> > +
> > +static void basic_init()
> > +{
> > +}
> > +
> > +static void basic_guest_main()
> > +{
> > +       /* Here is null guest_main, print Hello World */
> > +       printf("\tHello World, this is null_guest_main!\n");
> > +}
> > +
> > +static int basic_exit_handler()
> > +{
> > +       u64 guest_rip;
> > +       ulong reason;
> > +
> > +       guest_rip = vmcs_read(GUEST_RIP);
> > +       reason = vmcs_read(EXI_REASON) & 0xff;
> > +
> > +       switch (reason) {
> > +       case VMX_VMCALL:
> > +               print_vmexit_info();
> > +               vmcs_write(GUEST_RIP, guest_rip + 3);
> > +               return VMX_TEST_RESUME;
> > +       default:
> > +               break;
> > +       }
> > +       printf("ERROR : Unhandled vmx exit.\n");
> > +       print_vmexit_info();
> > +       return VMX_TEST_EXIT;
> > +}
> > +
> > +static void basic_syscall_handler(u64 syscall_no)
> > +{
> > +}
> > +
> > +static void vmenter_main()
> > +{
> > +       u64 rax;
> > +       u64 rsp, resume_rsp;
> > +
> > +       report("test vmlaunch", 1);
> > +
> > +       asm volatile(
> > +               "mov %%rsp, %0\n\t"
> > +               "mov %3, %%rax\n\t"
> > +               "vmcall\n\t"
> > +               "mov %%rax, %1\n\t"
> > +               "mov %%rsp, %2\n\t"
> > +               : "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
> > +               : "g"(0xABCD));
> > +       report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
> > +}
> > +
> > +static int vmenter_exit_handler()
> > +{
> > +       u64 guest_rip;
> > +       ulong reason;
> > +
> > +       guest_rip = vmcs_read(GUEST_RIP);
> > +       reason = vmcs_read(EXI_REASON) & 0xff;
> > +       switch (reason) {
> > +       case VMX_VMCALL:
> > +               if (current->guest_regs.rax != 0xABCD) {
> > +                       report("test vmresume", 0);
> > +                       return VMX_TEST_VMEXIT;
> > +               }
> > +               current->guest_regs.rax = 0xFFFF;
> > +               vmcs_write(GUEST_RIP, guest_rip + 3);
> > +               return VMX_TEST_RESUME;
> > +       default:
> > +               report("test vmresume", 0);
> > +               print_vmexit_info();
> > +       }
> > +       return VMX_TEST_VMEXIT;
> > +}
> > +
> > +
> > +/* name/init/guest_main/exit_handler/syscall_handler/guest_regs
> > +   basic_* just implement some basic functions */
> > +static struct vmx_test vmx_tests[] = {
> > +       { "null", basic_init, basic_guest_main, basic_exit_handler,
> > +               basic_syscall_handler, {0} },
> > +       { "vmenter", basic_init, vmenter_main, vmenter_exit_handler,
> > +               basic_syscall_handler, {0} },
> > +};
> > +
> > +int main(void)
> > +{
> > +       int i;
> > +
> > +       setup_vm();
> > +       setup_idt();
> > +
> > +       if (test_vmx_capability() != 0) {
> > +               printf("ERROR : vmx not supported, check +vmx option\n");
> > +               goto exit;
> > +       }
> > +       init_vmx();
> > +       /* Set basic test ctxt the same as "null" */
> > +       current = &vmx_tests[0];
> > +       if (test_vmxon() != 0)
> > +               goto exit;
> > +       test_vmptrld();
> > +       test_vmclear();
> > +       test_vmptrst();
> > +       init_vmcs(&vmcs_root);
> > +       if (vmx_run()) {
> > +               report("test vmlaunch", 0);
> > +               goto exit;
> > +       }
> > +       test_vmxoff();
> > +
> > +       for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) {
> > +               if (test_run(&vmx_tests[i]))
> > +                       goto exit;
> > +       }
> > +
> > +exit:
> > +       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> > +       return fails ? 1 : 0;
> > +}
> > diff --git a/x86/vmx.h b/x86/vmx.h
> > new file mode 100644
> > index 0000000..1fb9738
> > --- /dev/null
> > +++ b/x86/vmx.h
> > @@ -0,0 +1,466 @@
> > +#ifndef __HYPERVISOR_H
> > +#define __HYPERVISOR_H
> > +
> > +#include "libcflat.h"
> > +
> > +struct vmcs {
> > +       u32 revision_id; /* vmcs revision identifier */
> > +       u32 abort; /* VMX-abort indicator */
> > +       /* VMCS data */
> > +       char data[0];
> > +};
> > +
> > +struct regs {
> > +       u64 rax;
> > +       u64 rcx;
> > +       u64 rdx;
> > +       u64 rbx;
> > +       u64 cr2;
> > +       u64 rbp;
> > +       u64 rsi;
> > +       u64 rdi;
> > +       u64 r8;
> > +       u64 r9;
> > +       u64 r10;
> > +       u64 r11;
> > +       u64 r12;
> > +       u64 r13;
> > +       u64 r14;
> > +       u64 r15;
> > +       u64 rflags;
> > +};
> > +
> > +struct vmx_test {
> > +       const char *name;
> > +       void (*init)(struct vmcs *vmcs);
> > +       void (*guest_main)();
> > +       int (*exit_handler)();
> > +       void (*syscall_handler)(u64 syscall_no);
> > +       struct regs guest_regs;
> > +       struct vmcs *vmcs;
> > +       int exits;
> > +};
> > +
> > +static union vmx_basic {
> > +       u64 val;
> > +       struct {
> > +               u32 revision;
> > +               u32     size:13,
> > +                       : 3,
> > +                       width:1,
> > +                       dual:1,
> > +                       type:4,
> > +                       insouts:1,
> > +                       ctrl:1;
> > +       };
> > +} basic;
> > +
> > +static union vmx_ctrl_pin {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_pin_rev;
> > +
> > +static union vmx_ctrl_cpu {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_cpu_rev[2];
> > +
> > +static union vmx_ctrl_exit {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_exit_rev;
> > +
> > +static union vmx_ctrl_ent {
> > +       u64 val;
> > +       struct {
> > +               u32 set, clr;
> > +       };
> > +} ctrl_enter_rev;
> > +
> > +static union vmx_ept_vpid {
> > +       u64 val;
> > +       struct {
> > +               u32:16,
> > +                       super:2,
> > +                       : 2,
> > +                       invept:1,
> > +                       : 11;
> > +               u32     invvpid:1;
> > +       };
> > +} ept_vpid;
> > +
> > +struct descr {
> > +       u16 limit;
> > +       u64 addr;
> > +};
> > +
> > +enum Encoding {
> > +       /* 16-Bit Control Fields */
> > +       VPID                    = 0x0000ul,
> > +       /* Posted-interrupt notification vector */
> > +       PINV                    = 0x0002ul,
> > +       /* EPTP index */
> > +       EPTP_IDX                = 0x0004ul,
> > +
> > +       /* 16-Bit Guest State Fields */
> > +       GUEST_SEL_ES            = 0x0800ul,
> > +       GUEST_SEL_CS            = 0x0802ul,
> > +       GUEST_SEL_SS            = 0x0804ul,
> > +       GUEST_SEL_DS            = 0x0806ul,
> > +       GUEST_SEL_FS            = 0x0808ul,
> > +       GUEST_SEL_GS            = 0x080aul,
> > +       GUEST_SEL_LDTR          = 0x080cul,
> > +       GUEST_SEL_TR            = 0x080eul,
> > +       GUEST_INT_STATUS        = 0x0810ul,
> > +
> > +       /* 16-Bit Host State Fields */
> > +       HOST_SEL_ES             = 0x0c00ul,
> > +       HOST_SEL_CS             = 0x0c02ul,
> > +       HOST_SEL_SS             = 0x0c04ul,
> > +       HOST_SEL_DS             = 0x0c06ul,
> > +       HOST_SEL_FS             = 0x0c08ul,
> > +       HOST_SEL_GS             = 0x0c0aul,
> > +       HOST_SEL_TR             = 0x0c0cul,
> > +
> > +       /* 64-Bit Control Fields */
> > +       IO_BITMAP_A             = 0x2000ul,
> > +       IO_BITMAP_B             = 0x2002ul,
> > +       MSR_BITMAP              = 0x2004ul,
> > +       EXIT_MSR_ST_ADDR        = 0x2006ul,
> > +       EXIT_MSR_LD_ADDR        = 0x2008ul,
> > +       ENTER_MSR_LD_ADDR       = 0x200aul,
> > +       VMCS_EXEC_PTR           = 0x200cul,
> > +       TSC_OFFSET              = 0x2010ul,
> > +       TSC_OFFSET_HI           = 0x2011ul,
> > +       APIC_VIRT_ADDR          = 0x2012ul,
> > +       APIC_ACCS_ADDR          = 0x2014ul,
> > +       EPTP                    = 0x201aul,
> > +       EPTP_HI                 = 0x201bul,
> > +
> > +       /* 64-Bit Readonly Data Field */
> > +       INFO_PHYS_ADDR          = 0x2400ul,
> > +
> > +       /* 64-Bit Guest State */
> > +       VMCS_LINK_PTR           = 0x2800ul,
> > +       VMCS_LINK_PTR_HI        = 0x2801ul,
> > +       GUEST_DEBUGCTL          = 0x2802ul,
> > +       GUEST_DEBUGCTL_HI       = 0x2803ul,
> > +       GUEST_EFER              = 0x2806ul,
> > +       GUEST_PERF_GLOBAL_CTRL  = 0x2808ul,
> > +       GUEST_PDPTE             = 0x280aul,
> > +
> > +       /* 64-Bit Host State */
> > +       HOST_EFER               = 0x2c02ul,
> > +       HOST_PERF_GLOBAL_CTRL   = 0x2c04ul,
> > +
> > +       /* 32-Bit Control Fields */
> > +       PIN_CONTROLS            = 0x4000ul,
> > +       CPU_EXEC_CTRL0          = 0x4002ul,
> > +       EXC_BITMAP              = 0x4004ul,
> > +       PF_ERROR_MASK           = 0x4006ul,
> > +       PF_ERROR_MATCH          = 0x4008ul,
> > +       CR3_TARGET_COUNT        = 0x400aul,
> > +       EXI_CONTROLS            = 0x400cul,
> > +       EXI_MSR_ST_CNT          = 0x400eul,
> > +       EXI_MSR_LD_CNT          = 0x4010ul,
> > +       ENT_CONTROLS            = 0x4012ul,
> > +       ENT_MSR_LD_CNT          = 0x4014ul,
> > +       ENT_INTR_INFO           = 0x4016ul,
> > +       ENT_INTR_ERROR          = 0x4018ul,
> > +       ENT_INST_LEN            = 0x401aul,
> > +       TPR_THRESHOLD           = 0x401cul,
> > +       CPU_EXEC_CTRL1          = 0x401eul,
> > +
> > +       /* 32-Bit R/O Data Fields */
> > +       VMX_INST_ERROR          = 0x4400ul,
> > +       EXI_REASON              = 0x4402ul,
> > +       EXI_INTR_INFO           = 0x4404ul,
> > +       EXI_INTR_ERROR          = 0x4406ul,
> > +       IDT_VECT_INFO           = 0x4408ul,
> > +       IDT_VECT_ERROR          = 0x440aul,
> > +       EXI_INST_LEN            = 0x440cul,
> > +       EXI_INST_INFO           = 0x440eul,
> > +
> > +       /* 32-Bit Guest State Fields */
> > +       GUEST_LIMIT_ES          = 0x4800ul,
> > +       GUEST_LIMIT_CS          = 0x4802ul,
> > +       GUEST_LIMIT_SS          = 0x4804ul,
> > +       GUEST_LIMIT_DS          = 0x4806ul,
> > +       GUEST_LIMIT_FS          = 0x4808ul,
> > +       GUEST_LIMIT_GS          = 0x480aul,
> > +       GUEST_LIMIT_LDTR        = 0x480cul,
> > +       GUEST_LIMIT_TR          = 0x480eul,
> > +       GUEST_LIMIT_GDTR        = 0x4810ul,
> > +       GUEST_LIMIT_IDTR        = 0x4812ul,
> > +       GUEST_AR_ES             = 0x4814ul,
> > +       GUEST_AR_CS             = 0x4816ul,
> > +       GUEST_AR_SS             = 0x4818ul,
> > +       GUEST_AR_DS             = 0x481aul,
> > +       GUEST_AR_FS             = 0x481cul,
> > +       GUEST_AR_GS             = 0x481eul,
> > +       GUEST_AR_LDTR           = 0x4820ul,
> > +       GUEST_AR_TR             = 0x4822ul,
> > +       GUEST_INTR_STATE        = 0x4824ul,
> > +       GUEST_ACTV_STATE        = 0x4826ul,
> > +       GUEST_SMBASE            = 0x4828ul,
> > +       GUEST_SYSENTER_CS       = 0x482aul,
> > +
> > +       /* 32-Bit Host State Fields */
> > +       HOST_SYSENTER_CS        = 0x4c00ul,
> > +
> > +       /* Natural-Width Control Fields */
> > +       CR0_MASK                = 0x6000ul,
> > +       CR4_MASK                = 0x6002ul,
> > +       CR0_READ_SHADOW = 0x6004ul,
> > +       CR4_READ_SHADOW = 0x6006ul,
> > +       CR3_TARGET_0            = 0x6008ul,
> > +       CR3_TARGET_1            = 0x600aul,
> > +       CR3_TARGET_2            = 0x600cul,
> > +       CR3_TARGET_3            = 0x600eul,
> > +
> > +       /* Natural-Width R/O Data Fields */
> > +       EXI_QUALIFICATION       = 0x6400ul,
> > +       IO_RCX                  = 0x6402ul,
> > +       IO_RSI                  = 0x6404ul,
> > +       IO_RDI                  = 0x6406ul,
> > +       IO_RIP                  = 0x6408ul,
> > +       GUEST_LINEAR_ADDRESS    = 0x640aul,
> > +
> > +       /* Natural-Width Guest State Fields */
> > +       GUEST_CR0               = 0x6800ul,
> > +       GUEST_CR3               = 0x6802ul,
> > +       GUEST_CR4               = 0x6804ul,
> > +       GUEST_BASE_ES           = 0x6806ul,
> > +       GUEST_BASE_CS           = 0x6808ul,
> > +       GUEST_BASE_SS           = 0x680aul,
> > +       GUEST_BASE_DS           = 0x680cul,
> > +       GUEST_BASE_FS           = 0x680eul,
> > +       GUEST_BASE_GS           = 0x6810ul,
> > +       GUEST_BASE_LDTR         = 0x6812ul,
> > +       GUEST_BASE_TR           = 0x6814ul,
> > +       GUEST_BASE_GDTR         = 0x6816ul,
> > +       GUEST_BASE_IDTR         = 0x6818ul,
> > +       GUEST_DR7               = 0x681aul,
> > +       GUEST_RSP               = 0x681cul,
> > +       GUEST_RIP               = 0x681eul,
> > +       GUEST_RFLAGS            = 0x6820ul,
> > +       GUEST_PENDING_DEBUG     = 0x6822ul,
> > +       GUEST_SYSENTER_ESP      = 0x6824ul,
> > +       GUEST_SYSENTER_EIP      = 0x6826ul,
> > +
> > +       /* Natural-Width Host State Fields */
> > +       HOST_CR0                = 0x6c00ul,
> > +       HOST_CR3                = 0x6c02ul,
> > +       HOST_CR4                = 0x6c04ul,
> > +       HOST_BASE_FS            = 0x6c06ul,
> > +       HOST_BASE_GS            = 0x6c08ul,
> > +       HOST_BASE_TR            = 0x6c0aul,
> > +       HOST_BASE_GDTR          = 0x6c0cul,
> > +       HOST_BASE_IDTR          = 0x6c0eul,
> > +       HOST_SYSENTER_ESP       = 0x6c10ul,
> > +       HOST_SYSENTER_EIP       = 0x6c12ul,
> > +       HOST_RSP                = 0x6c14ul,
> > +       HOST_RIP                = 0x6c16ul
> > +};
> > +
> > +enum Reason {
> > +       VMX_EXC_NMI             = 0,
> > +       VMX_EXTINT              = 1,
> > +       VMX_TRIPLE_FAULT        = 2,
> > +       VMX_INIT                = 3,
> > +       VMX_SIPI                = 4,
> > +       VMX_SMI_IO              = 5,
> > +       VMX_SMI_OTHER           = 6,
> > +       VMX_INTR_WINDOW         = 7,
> > +       VMX_NMI_WINDOW          = 8,
> > +       VMX_TASK_SWITCH         = 9,
> > +       VMX_CPUID               = 10,
> > +       VMX_GETSEC              = 11,
> > +       VMX_HLT                 = 12,
> > +       VMX_INVD                = 13,
> > +       VMX_INVLPG              = 14,
> > +       VMX_RDPMC               = 15,
> > +       VMX_RDTSC               = 16,
> > +       VMX_RSM                 = 17,
> > +       VMX_VMCALL              = 18,
> > +       VMX_VMCLEAR             = 19,
> > +       VMX_VMLAUNCH            = 20,
> > +       VMX_VMPTRLD             = 21,
> > +       VMX_VMPTRST             = 22,
> > +       VMX_VMREAD              = 23,
> > +       VMX_VMRESUME            = 24,
> > +       VMX_VMWRITE             = 25,
> > +       VMX_VMXOFF              = 26,
> > +       VMX_VMXON               = 27,
> > +       VMX_CR                  = 28,
> > +       VMX_DR                  = 29,
> > +       VMX_IO                  = 30,
> > +       VMX_RDMSR               = 31,
> > +       VMX_WRMSR               = 32,
> > +       VMX_FAIL_STATE          = 33,
> > +       VMX_FAIL_MSR            = 34,
> > +       VMX_MWAIT               = 36,
> > +       VMX_MTF                 = 37,
> > +       VMX_MONITOR             = 39,
> > +       VMX_PAUSE               = 40,
> > +       VMX_FAIL_MCHECK         = 41,
> > +       VMX_TPR_THRESHOLD       = 43,
> > +       VMX_APIC_ACCESS         = 44,
> > +       VMX_GDTR_IDTR           = 46,
> > +       VMX_LDTR_TR             = 47,
> > +       VMX_EPT_VIOLATION       = 48,
> > +       VMX_EPT_MISCONFIG       = 49,
> > +       VMX_INVEPT              = 50,
> > +       VMX_PREEMPT             = 52,
> > +       VMX_INVVPID             = 53,
> > +       VMX_WBINVD              = 54,
> > +       VMX_XSETBV              = 55
> > +};
> > +
> > +#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
> > +#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
> > +
> > +enum Ctrl_exi {
> > +       EXI_HOST_64             = 1UL << 9,
> > +       EXI_LOAD_PERF           = 1UL << 12,
> > +       EXI_INTA                = 1UL << 15,
> > +       EXI_LOAD_EFER           = 1UL << 21,
> > +};
> > +
> > +enum Ctrl_ent {
> > +       ENT_GUEST_64            = 1UL << 9,
> > +       ENT_LOAD_EFER           = 1UL << 15,
> > +};
> > +
> > +enum Ctrl_pin {
> > +       PIN_EXTINT              = 1ul << 0,
> > +       PIN_NMI                 = 1ul << 3,
> > +       PIN_VIRT_NMI            = 1ul << 5,
> > +};
> > +
> > +enum Ctrl0 {
> > +       CPU_INTR_WINDOW         = 1ul << 2,
> > +       CPU_HLT                 = 1ul << 7,
> > +       CPU_INVLPG              = 1ul << 9,
> > +       CPU_CR3_LOAD            = 1ul << 15,
> > +       CPU_CR3_STORE           = 1ul << 16,
> > +       CPU_TPR_SHADOW          = 1ul << 21,
> > +       CPU_NMI_WINDOW          = 1ul << 22,
> > +       CPU_IO                  = 1ul << 24,
> > +       CPU_IO_BITMAP           = 1ul << 25,
> > +       CPU_SECONDARY           = 1ul << 31,
> > +};
> > +
> > +enum Ctrl1 {
> > +       CPU_EPT                 = 1ul << 1,
> > +       CPU_VPID                = 1ul << 5,
> > +       CPU_URG                 = 1ul << 7,
> > +};
> > +
> > +#define SAVE_GPR                               \
> > +       "xchg %rax, regs\n\t"                   \
> > +       "xchg %rbx, regs+0x8\n\t"               \
> > +       "xchg %rcx, regs+0x10\n\t"              \
> > +       "xchg %rdx, regs+0x18\n\t"              \
> > +       "xchg %rbp, regs+0x28\n\t"              \
> > +       "xchg %rsi, regs+0x30\n\t"              \
> > +       "xchg %rdi, regs+0x38\n\t"              \
> > +       "xchg %r8, regs+0x40\n\t"               \
> > +       "xchg %r9, regs+0x48\n\t"               \
> > +       "xchg %r10, regs+0x50\n\t"              \
> > +       "xchg %r11, regs+0x58\n\t"              \
> > +       "xchg %r12, regs+0x60\n\t"              \
> > +       "xchg %r13, regs+0x68\n\t"              \
> > +       "xchg %r14, regs+0x70\n\t"              \
> > +       "xchg %r15, regs+0x78\n\t"
> > +
> > +#define LOAD_GPR       SAVE_GPR
> > +
> > +#define SAVE_GPR_C                             \
> > +       "xchg %%rax, regs\n\t"                  \
> > +       "xchg %%rbx, regs+0x8\n\t"              \
> > +       "xchg %%rcx, regs+0x10\n\t"             \
> > +       "xchg %%rdx, regs+0x18\n\t"             \
> > +       "xchg %%rbp, regs+0x28\n\t"             \
> > +       "xchg %%rsi, regs+0x30\n\t"             \
> > +       "xchg %%rdi, regs+0x38\n\t"             \
> > +       "xchg %%r8, regs+0x40\n\t"              \
> > +       "xchg %%r9, regs+0x48\n\t"              \
> > +       "xchg %%r10, regs+0x50\n\t"             \
> > +       "xchg %%r11, regs+0x58\n\t"             \
> > +       "xchg %%r12, regs+0x60\n\t"             \
> > +       "xchg %%r13, regs+0x68\n\t"             \
> > +       "xchg %%r14, regs+0x70\n\t"             \
> > +       "xchg %%r15, regs+0x78\n\t"
> > +
> > +#define LOAD_GPR_C     SAVE_GPR_C
> > +
> > +#define SAVE_RFLAGS            \
> > +       "pushf\n\t"                     \
> > +       "pop regs+0x80\n\t"
> > +
> > +#define LOAD_RFLAGS            \
> > +       "push regs+0x80\n\t"    \
> > +       "popf\n\t"
> > +
> > +#define VMX_IO_SIZE_MASK               0x7
> > +#define _VMX_IO_BYTE                   1
> > +#define _VMX_IO_WORD                   2
> > +#define _VMX_IO_LONG                   3
> > +#define VMX_IO_DIRECTION_MASK          (1ul << 3)
> > +#define VMX_IO_IN                      (1ul << 3)
> > +#define VMX_IO_OUT                     0
> > +#define VMX_IO_STRING                  (1ul << 4)
> > +#define VMX_IO_REP                     (1ul << 5)
> > +#define VMX_IO_OPRAND_DX               (1ul << 6)
> > +#define VMX_IO_PORT_MASK               0xFFFF0000
> > +#define VMX_IO_PORT_SHIFT              16
> > +
> > +#define VMX_TEST_VMEXIT                        1
> > +#define VMX_TEST_EXIT                  2
> > +#define VMX_TEST_RESUME                        3
> > +#define VMX_TEST_LAUNCH_ERR            4
> > +#define VMX_TEST_RESUME_ERR            5
> > +
> > +#define HYPERCALL_BIT          (1ul << 12)
> > +#define HYPERCALL_MASK         0xFFF
> > +#define HYPERCALL_VMEXIT       0x1
> > +
> > +static inline int vmcs_clear(struct vmcs *vmcs)
> > +{
> > +       bool ret;
> > +       asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
> > +       return ret;
> > +}
> > +
> > +static inline u64 vmcs_read(enum Encoding enc)
> > +{
> > +       u64 val;
> > +       asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
> > +       return val;
> > +}
> > +
> > +static inline int vmcs_write(enum Encoding enc, u64 val)
> > +{
> > +       bool ret;
> > +       asm volatile ("vmwrite %1, %2; setbe %0"
> > +               : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
> > +       return ret;
> > +}
> > +
> > +static inline int vmcs_save(struct vmcs **vmcs)
> > +{
> > +       bool ret;
> > +
> > +       asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
> > +       return ret;
> > +}
> > +
> > +#endif
> > +
> > --
> > 1.7.9.5
> >
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 30, 2013, 7:42 a.m. UTC | #5
On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote:
> Hi Arthur, 
> 
>    I'm trying to test your patch on a SandyBridge machine.
> 
>    Used 'nested=1' when loading kvm (from 3.9-1-amd64)
Should be kvm-intel, but you will get error otherwise, so I assume that
this is what you did.

>    and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)).
> 
>    Without nested=1 I get ``unhandled excecption 13'', so I presume
>    this is OK, with it, the test_vmx_capability() fails at
>    detecting vmx.
> 
>    I've used for qemu:
>    $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device
>    isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device
>    pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1
> 
>    Are there any knobs which I should further tune?
> 
On the first glance you have everything that is needed. What is the output of
"cat /proc/cpuinfo" in the guest is?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marius Vlad July 30, 2013, 7:59 a.m. UTC | #6
Hi Gleb,

On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote:
> On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote:
> > Hi Arthur, 
> > 
> >    I'm trying to test your patch on a SandyBridge machine.
> > 
> >    Used 'nested=1' when loading kvm (from 3.9-1-amd64)
> Should be kvm-intel, but you will get error otherwise, so I assume that
> this is what you did.
Indeed.
> 
> >    and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)).
> > 
> >    Without nested=1 I get ``unhandled excecption 13'', so I presume
> >    this is OK, with it, the test_vmx_capability() fails at
> >    detecting vmx.
> > 
> >    I've used for qemu:
> >    $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device
> >    isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device
> >    pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1
> > 
> >    Are there any knobs which I should further tune?
> > 
> On the first glance you have everything that is needed. What is the output of
> "cat /proc/cpuinfo" in the guest is?
(initramfs) cat /proc/cpuinfo 
processor       : 0
vendor_id       : GenuineIntel
cpu family      : 6
model           : 42
model name      : Intel Xeon E312xx (Sandy Bridge)
stepping        : 1
microcode       : 0x1
cpu MHz         : 3410.012
cache size      : 4096 KB
fpu             : yes
fpu_exception   : yes
cpuid level     : 13
wp              : yes
flags           : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx rdtscp lm constant_tsc arch_perfmon rep_good nopl eagerfpu pni pclmulqdq vmx ssse3 cx16 sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx hypervisor lahf_lm xsaveopt
bogomips        : 6820.02
clflush size    : 64
cache_alignment : 64
address sizes   : 40 bits physical, 48 bits virtual
power management:

Used: 
$ qemu-system-x86_64 -enable-kvm -device pc-testdev -device\
isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device\
pci-testdev -kernel /boot/vmlinuz-3.9-1-amd64 -initrd\
/boot/initrd.img-3.9-1-amd64 -append 'init=/bin/bash console=ttyS0' -cpu\
SandyBridge,+vmx,+x2apic -smp 1 -serial stdio
> 
> --
> 			Gleb.
>
Gleb Natapov July 30, 2013, 8:07 a.m. UTC | #7
On Tue, Jul 30, 2013 at 09:59:47AM +0200, Marius Vlad wrote:
> Hi Gleb,
> 
> On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote:
> > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote:
> > > Hi Arthur, 
> > > 
> > >    I'm trying to test your patch on a SandyBridge machine.
> > > 
> > >    Used 'nested=1' when loading kvm (from 3.9-1-amd64)
> > Should be kvm-intel, but you will get error otherwise, so I assume that
> > this is what you did.
> Indeed.
> > 
> > >    and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)).
> > > 
> > >    Without nested=1 I get ``unhandled excecption 13'', so I presume
> > >    this is OK, with it, the test_vmx_capability() fails at
> > >    detecting vmx.
> > > 
> > >    I've used for qemu:
> > >    $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device
> > >    isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device
> > >    pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1
> > > 
> > >    Are there any knobs which I should further tune?
> > > 
> > On the first glance you have everything that is needed. What is the output of
> > "cat /proc/cpuinfo" in the guest is?
I checked what test_vmx_capability() is actually doing and it uses
MSR_IA32_FEATURE_CONTROL MSR which is not supported in your version of
kernel. You need b3897a49e22fc173efa77527a447c714f753f681 commit, it is
not in any released version of kernel yet. You can just omit the test.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arthur Chunqi Li July 30, 2013, 9:14 a.m. UTC | #8
On Tue, Jul 30, 2013 at 4:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jul 30, 2013 at 09:59:47AM +0200, Marius Vlad wrote:
>> Hi Gleb,
>>
>> On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote:
>> > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote:
>> > > Hi Arthur,
>> > >
>> > >    I'm trying to test your patch on a SandyBridge machine.
>> > >
>> > >    Used 'nested=1' when loading kvm (from 3.9-1-amd64)
>> > Should be kvm-intel, but you will get error otherwise, so I assume that
>> > this is what you did.
>> Indeed.
>> >
>> > >    and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)).
>> > >
>> > >    Without nested=1 I get ``unhandled excecption 13'', so I presume
>> > >    this is OK, with it, the test_vmx_capability() fails at
>> > >    detecting vmx.
>> > >
>> > >    I've used for qemu:
>> > >    $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device
>> > >    isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device
>> > >    pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1
>> > >
>> > >    Are there any knobs which I should further tune?
>> > >
>> > On the first glance you have everything that is needed. What is the output of
>> > "cat /proc/cpuinfo" in the guest is?
> I checked what test_vmx_capability() is actually doing and it uses
> MSR_IA32_FEATURE_CONTROL MSR which is not supported in your version of
> kernel. You need b3897a49e22fc173efa77527a447c714f753f681 commit, it is
> not in any released version of kernel yet. You can just omit the test.
Hi Marius,

What Gleb said is generally right and I have some additional
information. This patch is ongoing as well as fixing some existing
bugs when developing. You can refer to [1] to get all the relevant bug
fix patches. For kernel, you'd better git clone kvm kernel from [2]
and checkout to "next" branch and make it. Some patches are on kvm's
wait queue and are not merged into master.

Thanks,
Arthur

[1] http://wiki.qemu.org/Features/KVMNestedVirtualizationTestsuite
[2] https://git.kernel.org/cgit/virt/kvm/kvm.git/
>
> --
>                         Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marius Vlad July 30, 2013, 11:51 a.m. UTC | #9
On Tue, Jul 30, 2013 at 11:14:38AM +0200, Arthur Chunqi Li wrote:
> On Tue, Jul 30, 2013 at 4:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jul 30, 2013 at 09:59:47AM +0200, Marius Vlad wrote:
> >> Hi Gleb,
> >>
> >> On Tue, Jul 30, 2013 at 09:42:12AM +0200, Gleb Natapov wrote:
> >> > On Tue, Jul 30, 2013 at 09:31:17AM +0200, Marius Vlad wrote:
> >> > > Hi Arthur,
> >> > >
> >> > >    I'm trying to test your patch on a SandyBridge machine.
> >> > >
> >> > >    Used 'nested=1' when loading kvm (from 3.9-1-amd64)
> >> > Should be kvm-intel, but you will get error otherwise, so I assume that
> >> > this is what you did.
> >> Indeed.
> >> >
> >> > >    and qemu (emulator version 1.5.1 (Debian 1.5.0+dfsg-5)).
> >> > >
> >> > >    Without nested=1 I get ``unhandled excecption 13'', so I presume
> >> > >    this is OK, with it, the test_vmx_capability() fails at
> >> > >    detecting vmx.
> >> > >
> >> > >    I've used for qemu:
> >> > >    $ qemu-system-x86_64 -enable-kvm -device pc-testdev -device
> >> > >    isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device
> >> > >    pci-testdev -kernel x86/vmx.flat -cpu SandyBridge,+vmx,+x2apic -smp 1
> >> > >
> >> > >    Are there any knobs which I should further tune?
> >> > >
> >> > On the first glance you have everything that is needed. What is the output of
> >> > "cat /proc/cpuinfo" in the guest is?
> > I checked what test_vmx_capability() is actually doing and it uses
> > MSR_IA32_FEATURE_CONTROL MSR which is not supported in your version of
> > kernel. You need b3897a49e22fc173efa77527a447c714f753f681 commit, it is
> > not in any released version of kernel yet. You can just omit the test.
> Hi Marius,
> 
> What Gleb said is generally right and I have some additional
> information. This patch is ongoing as well as fixing some existing
> bugs when developing. You can refer to [1] to get all the relevant bug
> fix patches. For kernel, you'd better git clone kvm kernel from [2]
> and checkout to "next" branch and make it. Some patches are on kvm's
> wait queue and are not merged into master.

        I somehow hinted that I require an upstream version of kvm.
        Thanks for pointing the queue branch. The tests pass now.

> 
> Thanks,
> Arthur
> 
> [1] http://wiki.qemu.org/Features/KVMNestedVirtualizationTestsuite
> [2] https://git.kernel.org/cgit/virt/kvm/kvm.git/
> >
> > --
> >                         Gleb.
Paolo Bonzini July 30, 2013, 2:53 p.m. UTC | #10
Il 28/07/2013 16:00, Arthur Chunqi Li ha scritto:
> +
> +	while (1) {
> +		asm volatile (
> +			"mov %%rsp, %%rsi\n\t"
> +			"mov %2, %%rdi\n\t"
> +			"vmwrite %%rsi, %%rdi\n\t"
> +
> +			LOAD_GPR_C
> +			"cmpl $0, %1\n\t"
> +			"jne 1f\n\t"
> +			LOAD_RFLAGS
> +			"vmlaunch\n\t"
> +			"jmp 2f\n\t"
> +			"1: "
> +			"vmresume\n\t"
> +			"2: "
> +			"setbe %0\n\t"

Perhaps here add

			jbe vmx_return
			ud2

but it can be added in a follow-up.

> +			"vmx_return:\n\t"
> +			SAVE_GPR_C
> +			SAVE_RFLAGS
> +			: "=m"(fail)
> +			: "m"(launched), "i"(HOST_RSP)
> +			: "rdi", "rsi", "memory", "cc"
> +
> +		);

Nice! :)

I pushed the patch.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arthur Chunqi Li July 30, 2013, 2:57 p.m. UTC | #11
On Tue, Jul 30, 2013 at 10:53 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 28/07/2013 16:00, Arthur Chunqi Li ha scritto:
>> +
>> +     while (1) {
>> +             asm volatile (
>> +                     "mov %%rsp, %%rsi\n\t"
>> +                     "mov %2, %%rdi\n\t"
>> +                     "vmwrite %%rsi, %%rdi\n\t"
>> +
>> +                     LOAD_GPR_C
>> +                     "cmpl $0, %1\n\t"
>> +                     "jne 1f\n\t"
>> +                     LOAD_RFLAGS
>> +                     "vmlaunch\n\t"
>> +                     "jmp 2f\n\t"
>> +                     "1: "
>> +                     "vmresume\n\t"
>> +                     "2: "
>> +                     "setbe %0\n\t"
>
> Perhaps here add
>
>                         jbe vmx_return
>                         ud2
>
> but it can be added in a follow-up.
>
>> +                     "vmx_return:\n\t"
>> +                     SAVE_GPR_C
>> +                     SAVE_RFLAGS
>> +                     : "=m"(fail)
>> +                     : "m"(launched), "i"(HOST_RSP)
>> +                     : "rdi", "rsi", "memory", "cc"
>> +
>> +             );
>
> Nice! :)
>
> I pushed the patch.
Hi Paolo,
Slow down pushing it. There's something wrong with host_rflags
handling. I will commit another version as well as your suggestion,
you could push that version.

Thanks,
Arthur
>
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 30, 2013, 2:59 p.m. UTC | #12
Il 30/07/2013 16:57, Arthur Chunqi Li ha scritto:
> On Tue, Jul 30, 2013 at 10:53 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 28/07/2013 16:00, Arthur Chunqi Li ha scritto:
>>> +
>>> +     while (1) {
>>> +             asm volatile (
>>> +                     "mov %%rsp, %%rsi\n\t"
>>> +                     "mov %2, %%rdi\n\t"
>>> +                     "vmwrite %%rsi, %%rdi\n\t"
>>> +
>>> +                     LOAD_GPR_C
>>> +                     "cmpl $0, %1\n\t"
>>> +                     "jne 1f\n\t"
>>> +                     LOAD_RFLAGS
>>> +                     "vmlaunch\n\t"
>>> +                     "jmp 2f\n\t"
>>> +                     "1: "
>>> +                     "vmresume\n\t"
>>> +                     "2: "
>>> +                     "setbe %0\n\t"
>>
>> Perhaps here add
>>
>>                         jbe vmx_return
>>                         ud2
>>
>> but it can be added in a follow-up.
>>
>>> +                     "vmx_return:\n\t"
>>> +                     SAVE_GPR_C
>>> +                     SAVE_RFLAGS
>>> +                     : "=m"(fail)
>>> +                     : "m"(launched), "i"(HOST_RSP)
>>> +                     : "rdi", "rsi", "memory", "cc"
>>> +
>>> +             );
>>
>> Nice! :)
>>
>> I pushed the patch.
> Hi Paolo,
> Slow down pushing it. There's something wrong with host_rflags
> handling. I will commit another version as well as your suggestion,
> you could push that version.

Well, too late...

anyway I prefer follow-up patches because they make the differences
clearer.  With a single 1200-line patch it is a bit complicated to follow.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/config-x86-common.mak b/config-x86-common.mak
index 455032b..34a41e1 100644
--- a/config-x86-common.mak
+++ b/config-x86-common.mak
@@ -101,6 +101,8 @@  $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
 
 $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
 
+$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
+
 arch_clean:
 	$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
 	$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
diff --git a/config-x86_64.mak b/config-x86_64.mak
index 4e525f5..bb8ee89 100644
--- a/config-x86_64.mak
+++ b/config-x86_64.mak
@@ -9,5 +9,6 @@  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 	  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
 	  $(TEST_DIR)/pcid.flat
 tests += $(TEST_DIR)/svm.flat
+tests += $(TEST_DIR)/vmx.flat
 
 include config-x86-common.mak
diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 509a421..281255a 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -396,6 +396,11 @@ 
 #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
 #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
 #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PIN		0x0000048d
+#define MSR_IA32_VMX_TRUE_PROC		0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT		0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY		0x00000490
+
 
 /* AMD-V MSRs */
 
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index e46d8d0..f0c11cc 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -307,4 +307,19 @@  static inline void safe_halt(void)
 {
 	asm volatile("sti; hlt");
 }
+
+#ifdef __x86_64__
+static inline u64 read_rflags(void)
+{
+	u64 r;
+	asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
+	return r;
+}
+
+static inline void write_rflags(u64 r)
+{
+	asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
+}
+#endif
+
 #endif
diff --git a/lib/x86/vm.c b/lib/x86/vm.c
index 260ec45..188bf57 100644
--- a/lib/x86/vm.c
+++ b/lib/x86/vm.c
@@ -9,10 +9,6 @@ 
 #define LARGE_PAGE_SIZE (1024 * PAGE_SIZE)
 #endif
 
-#define X86_CR0_PE      0x00000001
-#define X86_CR0_WP      0x00010000
-#define X86_CR0_PG      0x80000000
-#define X86_CR4_PSE     0x00000010
 static void *free = 0;
 static void *vfree_top = 0;
 
diff --git a/lib/x86/vm.h b/lib/x86/vm.h
index 0b5b5c7..eff6f72 100644
--- a/lib/x86/vm.h
+++ b/lib/x86/vm.h
@@ -16,6 +16,27 @@ 
 #define PTE_USER    (1ull << 2)
 #define PTE_ADDR    (0xffffffffff000ull)
 
+#define X86_CR0_PE      0x00000001
+#define X86_CR0_WP      0x00010000
+#define X86_CR0_PG      0x80000000
+#define X86_CR4_VMXE   0x00000001
+#define X86_CR4_PSE     0x00000010
+#define X86_CR4_PAE     0x00000020
+#define X86_CR4_PCIDE  0x00020000
+
+#ifdef __x86_64__
+#define SEL_NULL_DESC		0x0
+#define SEL_KERN_CODE_64	0x8
+#define SEL_KERN_DATA_64	0x10
+#define SEL_USER_CODE_64	0x18
+#define SEL_USER_DATA_64	0x20
+#define SEL_CODE_32		0x28
+#define SEL_DATA_32		0x30
+#define SEL_CODE_16		0x38
+#define SEL_DATA_16		0x40
+#define SEL_TSS_RUN		0x48
+#endif
+
 void setup_vm();
 
 void *vmalloc(unsigned long size);
diff --git a/x86/cstart64.S b/x86/cstart64.S
index 24df5f8..0fe76da 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -4,6 +4,10 @@ 
 .globl boot_idt
 boot_idt = 0
 
+.globl idt_descr
+.globl tss_descr
+.globl gdt64_desc
+
 ipi_vector = 0x20
 
 max_cpus = 64
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index bc9643e..85c36aa 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -149,3 +149,9 @@  extra_params = --append "10000000 `date +%s`"
 file = pcid.flat
 extra_params = -cpu qemu64,+pcid
 arch = x86_64
+
+[vmx]
+file = vmx.flat
+extra_params = -cpu host,+vmx
+arch = x86_64
+
diff --git a/x86/vmx.c b/x86/vmx.c
new file mode 100644
index 0000000..7467927
--- /dev/null
+++ b/x86/vmx.c
@@ -0,0 +1,674 @@ 
+#include "libcflat.h"
+#include "processor.h"
+#include "vm.h"
+#include "desc.h"
+#include "vmx.h"
+#include "msr.h"
+#include "smp.h"
+#include "io.h"
+
+int fails = 0, tests = 0;
+u32 *vmxon_region;
+struct vmcs *vmcs_root;
+u32 vpid_cnt;
+void *guest_stack, *guest_syscall_stack;
+u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
+ulong fix_cr0_set, fix_cr0_clr;
+ulong fix_cr4_set, fix_cr4_clr;
+struct regs regs;
+struct vmx_test *current;
+u64 hypercall_field = 0;
+bool launched;
+
+extern u64 gdt64_desc[];
+extern u64 idt_descr[];
+extern u64 tss_descr[];
+extern void *vmx_return;
+extern void *entry_sysenter;
+extern void *guest_entry;
+
+static void report(const char *name, int result)
+{
+	++tests;
+	if (result)
+		printf("PASS: %s\n", name);
+	else {
+		printf("FAIL: %s\n", name);
+		++fails;
+	}
+}
+
+static int make_vmcs_current(struct vmcs *vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return ret;
+}
+
+/* entry_sysenter */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_sysenter\n\t"
+	"entry_sysenter:\n\t"
+	SAVE_GPR
+	"	and	$0xf, %rax\n\t"
+	"	mov	%rax, %rdi\n\t"
+	"	call	syscall_handler\n\t"
+	LOAD_GPR
+	"	vmresume\n\t"
+);
+
+static void __attribute__((__used__)) syscall_handler(u64 syscall_no)
+{
+	current->syscall_handler(syscall_no);
+}
+
+static inline int vmx_on()
+{
+	bool ret;
+	asm volatile ("vmxon %1; setbe %0\n\t"
+		: "=q"(ret) : "m"(vmxon_region) : "cc");
+	return ret;
+}
+
+static inline int vmx_off()
+{
+	bool ret;
+	asm volatile("vmxoff; setbe %0\n\t"
+		: "=q"(ret) : : "cc");
+	return ret;
+}
+
+static void print_vmexit_info()
+{
+	u64 guest_rip, guest_rsp;
+	ulong reason = vmcs_read(EXI_REASON) & 0xff;
+	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
+	guest_rip = vmcs_read(GUEST_RIP);
+	guest_rsp = vmcs_read(GUEST_RSP);
+	printf("VMEXIT info:\n");
+	printf("\tvmexit reason = %d\n", reason);
+	printf("\texit qualification = 0x%x\n", exit_qual);
+	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
+	printf("\tguest_rip = 0x%llx\n", guest_rip);
+	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
+		regs.rax, regs.rbx, regs.rcx, regs.rdx);
+	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
+		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
+	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
+		regs.r8, regs.r9, regs.r10, regs.r11);
+	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
+		regs.r12, regs.r13, regs.r14, regs.r15);
+}
+
+static void test_vmclear(void)
+{
+	u64 rflags;
+
+	rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	write_rflags(rflags);
+	report("test vmclear", vmcs_clear(vmcs_root) == 0);
+}
+
+static void test_vmxoff(void)
+{
+	int ret;
+	u64 rflags;
+
+	rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	write_rflags(rflags);
+	ret = vmx_off();
+	report("test vmxoff", !ret);
+}
+
+static void __attribute__((__used__)) guest_main(void)
+{
+	current->guest_main();
+}
+
+/* guest_entry */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_guest\n\t"
+	"guest_entry:\n\t"
+	"	call guest_main\n\t"
+	"	mov $1, %edi\n\t"
+	"	call hypercall\n\t"
+);
+
+static void init_vmcs_ctrl(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.1 */
+	vmcs_write(PIN_CONTROLS, ctrl_pin);
+	/* Disable VMEXIT of IO instruction */
+	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
+		ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
+		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
+	}
+	vmcs_write(CR3_TARGET_COUNT, 0);
+	vmcs_write(VPID, ++vpid_cnt);
+}
+
+static void init_vmcs_host(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.2 */
+	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.2.1.3 */
+	vmcs_write(ENT_CONTROLS, ctrl_enter);
+	vmcs_write(EXI_CONTROLS, ctrl_exit);
+
+	/* 26.2.2 */
+	vmcs_write(HOST_CR0, read_cr0());
+	vmcs_write(HOST_CR3, read_cr3());
+	vmcs_write(HOST_CR4, read_cr4());
+	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
+
+	/* 26.2.3 */
+	vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(HOST_BASE_FS, 0);
+	vmcs_write(HOST_BASE_GS, 0);
+
+	/* Set other vmcs area */
+	vmcs_write(PF_ERROR_MASK, 0);
+	vmcs_write(PF_ERROR_MATCH, 0);
+	vmcs_write(VMCS_LINK_PTR, ~0ul);
+	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
+	vmcs_write(HOST_RIP, (u64)(&vmx_return));
+}
+
+static void init_vmcs_guest(void)
+{
+	/* 26.3 CHECKING AND LOADING GUEST STATE */
+	ulong guest_cr0, guest_cr4, guest_cr3;
+	/* 26.3.1.1 */
+	guest_cr0 = read_cr0();
+	guest_cr4 = read_cr4();
+	guest_cr3 = read_cr3();
+	if (ctrl_enter & ENT_GUEST_64) {
+		guest_cr0 |= X86_CR0_PG;
+		guest_cr4 |= X86_CR4_PAE;
+	}
+	if ((ctrl_enter & ENT_GUEST_64) == 0)
+		guest_cr4 &= (~X86_CR4_PCIDE);
+	if (guest_cr0 & X86_CR0_PG)
+		guest_cr0 |= X86_CR0_PE;
+	vmcs_write(GUEST_CR0, guest_cr0);
+	vmcs_write(GUEST_CR3, guest_cr3);
+	vmcs_write(GUEST_CR4, guest_cr4);
+	vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SYSENTER_ESP,
+		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(GUEST_DR7, 0);
+	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.3.1.2 */
+	vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(GUEST_SEL_LDTR, 0);
+
+	vmcs_write(GUEST_BASE_CS, 0);
+	vmcs_write(GUEST_BASE_ES, 0);
+	vmcs_write(GUEST_BASE_SS, 0);
+	vmcs_write(GUEST_BASE_DS, 0);
+	vmcs_write(GUEST_BASE_FS, 0);
+	vmcs_write(GUEST_BASE_GS, 0);
+	vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(GUEST_BASE_LDTR, 0);
+
+	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
+	vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
+
+	vmcs_write(GUEST_AR_CS, 0xa09b);
+	vmcs_write(GUEST_AR_DS, 0xc093);
+	vmcs_write(GUEST_AR_ES, 0xc093);
+	vmcs_write(GUEST_AR_FS, 0xc093);
+	vmcs_write(GUEST_AR_GS, 0xc093);
+	vmcs_write(GUEST_AR_SS, 0xc093);
+	vmcs_write(GUEST_AR_LDTR, 0x82);
+	vmcs_write(GUEST_AR_TR, 0x8b);
+
+	/* 26.3.1.3 */
+	vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(GUEST_LIMIT_GDTR,
+		((struct descr *)gdt64_desc)->limit & 0xffff);
+	vmcs_write(GUEST_LIMIT_IDTR,
+		((struct descr *)idt_descr)->limit & 0xffff);
+
+	/* 26.3.1.4 */
+	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
+	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_RFLAGS, 0x2);
+
+	/* 26.3.1.5 */
+	vmcs_write(GUEST_ACTV_STATE, 0);
+	vmcs_write(GUEST_INTR_STATE, 0);
+}
+
+static int init_vmcs(struct vmcs **vmcs)
+{
+	*vmcs = alloc_page();
+	memset(*vmcs, 0, PAGE_SIZE);
+	(*vmcs)->revision_id = basic.revision;
+	/* vmclear first to init vmcs */
+	if (vmcs_clear(*vmcs)) {
+		printf("%s : vmcs_clear error\n", __func__);
+		return 1;
+	}
+
+	if (make_vmcs_current(*vmcs)) {
+		printf("%s : make_vmcs_current error\n", __func__);
+		return 1;
+	}
+
+	/* All settings to pin/exit/enter/cpu
+	   control fields should be placed here */
+	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
+	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
+	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
+	ctrl_cpu[0] |= CPU_HLT;
+	/* DIsable IO instruction VMEXIT now */
+	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
+	ctrl_cpu[1] = 0;
+
+	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
+	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
+	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
+	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
+
+	init_vmcs_ctrl();
+	init_vmcs_host();
+	init_vmcs_guest();
+	return 0;
+}
+
+static void init_vmx(void)
+{
+	vmxon_region = alloc_page();
+	memset(vmxon_region, 0, PAGE_SIZE);
+
+	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
+			: MSR_IA32_VMX_EXIT_CTLS);
+	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
+			: MSR_IA32_VMX_ENTRY_CTLS);
+	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
+			: MSR_IA32_VMX_PROCBASED_CTLS);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
+		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
+	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
+		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+
+	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
+	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
+
+	*vmxon_region = basic.revision;
+
+	guest_stack = alloc_page();
+	memset(guest_stack, 0, PAGE_SIZE);
+	guest_syscall_stack = alloc_page();
+	memset(guest_syscall_stack, 0, PAGE_SIZE);
+}
+
+static int test_vmx_capability(void)
+{
+	struct cpuid r;
+	u64 ret1, ret2;
+	u64 ia32_feature_control;
+	r = cpuid(1);
+	ret1 = ((r.c) >> 5) & 1;
+	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+	ret2 = ((ia32_feature_control & 0x5) == 0x5);
+	if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) {
+		wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
+		ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+		ret2 = ((ia32_feature_control & 0x5) == 0x5);
+	}
+	report("test vmx capability", ret1 & ret2);
+	return !(ret1 & ret2);
+}
+
+static int test_vmxon(void)
+{
+	int ret;
+	u64 rflags;
+
+	rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	write_rflags(rflags);
+	ret = vmx_on();
+	report("test vmxon", !ret);
+	return ret;
+}
+
+static void test_vmptrld(void)
+{
+	u64 rflags;
+	struct vmcs *vmcs;
+
+	vmcs = alloc_page();
+	vmcs->revision_id = basic.revision;
+	rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	write_rflags(rflags);
+	report("test vmptrld", make_vmcs_current(vmcs) == 0);
+}
+
+static void test_vmptrst(void)
+{
+	u64 rflags;
+	int ret;
+	struct vmcs *vmcs1, *vmcs2;
+
+	vmcs1 = alloc_page();
+	memset(vmcs1, 0, PAGE_SIZE);
+	init_vmcs(&vmcs1);
+	rflags = read_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	write_rflags(rflags);
+	ret = vmcs_save(&vmcs2);
+	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
+}
+
+/* This function can only be called in guest */
+static void __attribute__((__used__)) hypercall(u32 hypercall_no)
+{
+	u64 val = 0;
+	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
+	hypercall_field = val;
+	asm volatile("vmcall\n\t");
+}
+
+static bool is_hypercall()
+{
+	ulong reason, hyper_bit;
+
+	reason = vmcs_read(EXI_REASON) & 0xff;
+	hyper_bit = hypercall_field & HYPERCALL_BIT;
+	if (reason == VMX_VMCALL && hyper_bit)
+		return true;
+	return false;
+}
+
+static int handle_hypercall()
+{
+	ulong hypercall_no;
+
+	hypercall_no = hypercall_field & HYPERCALL_MASK;
+	hypercall_field = 0;
+	switch (hypercall_no) {
+	case HYPERCALL_VMEXIT:
+		return VMX_TEST_VMEXIT;
+	default:
+		printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
+	}
+	return VMX_TEST_EXIT;
+}
+
+static int exit_handler()
+{
+	int ret;
+
+	current->exits++;
+	current->guest_regs = regs;
+	if (is_hypercall())
+		ret = handle_hypercall();
+	else
+		ret = current->exit_handler();
+	regs = current->guest_regs;
+	switch (ret) {
+	case VMX_TEST_VMEXIT:
+	case VMX_TEST_RESUME:
+		return ret;
+	case VMX_TEST_EXIT:
+		break;
+	default:
+		printf("ERROR : Invalid exit_handler return val %d.\n"
+			, ret);
+	}
+	print_vmexit_info();
+	exit(-1);
+	return 0;
+}
+
+static int vmx_run()
+{
+	u32 ret = 0, fail = 0;
+
+	while (1) {
+		asm volatile (
+			"mov %%rsp, %%rsi\n\t"
+			"mov %2, %%rdi\n\t"
+			"vmwrite %%rsi, %%rdi\n\t"
+
+			LOAD_GPR_C
+			"cmpl $0, %1\n\t"
+			"jne 1f\n\t"
+			LOAD_RFLAGS
+			"vmlaunch\n\t"
+			"jmp 2f\n\t"
+			"1: "
+			"vmresume\n\t"
+			"2: "
+			"setbe %0\n\t"
+			"vmx_return:\n\t"
+			SAVE_GPR_C
+			SAVE_RFLAGS
+			: "=m"(fail)
+			: "m"(launched), "i"(HOST_RSP)
+			: "rdi", "rsi", "memory", "cc"
+
+		);
+		if (fail)
+			ret = launched ? VMX_TEST_RESUME_ERR :
+				VMX_TEST_LAUNCH_ERR;
+		else {
+			launched = 1;
+			ret = exit_handler();
+		}
+		if (ret != VMX_TEST_RESUME)
+			break;
+	}
+	launched = 0;
+	switch (ret) {
+	case VMX_TEST_VMEXIT:
+		return 0;
+	case VMX_TEST_LAUNCH_ERR:
+		printf("%s : vmlaunch failed.\n", __func__);
+		if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
+			|| ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
+			printf("\tvmlaunch set wrong flags\n");
+		report("test vmlaunch", 0);
+		break;
+	case VMX_TEST_RESUME_ERR:
+		printf("%s : vmresume failed.\n", __func__);
+		if ((!(regs.rflags & X86_EFLAGS_CF) && !(regs.rflags & X86_EFLAGS_ZF))
+			|| ((regs.rflags & X86_EFLAGS_CF) && (regs.rflags & X86_EFLAGS_ZF)))
+			printf("\tvmresume set wrong flags\n");
+		report("test vmresume", 0);
+		break;
+	default:
+		printf("%s : unhandled ret from exit_handler, ret=%d.\n", __func__, ret);
+		break;
+	}
+	return 1;
+}
+
+static int test_run(struct vmx_test *test)
+{
+	if (test->name == NULL)
+		test->name = "(no name)";
+	if (vmx_on()) {
+		printf("%s : vmxon failed.\n", __func__);
+		return 1;
+	}
+	init_vmcs(&(test->vmcs));
+	/* Directly call test->init is ok here, init_vmcs has done
+	   vmcs init, vmclear and vmptrld*/
+	if (test->init)
+		test->init(test->vmcs);
+	test->exits = 0;
+	current = test;
+	regs = test->guest_regs;
+	vmcs_write(GUEST_RFLAGS, regs.rflags | 0x2);
+	launched = 0;
+	printf("\nTest suite : %s\n", test->name);
+	vmx_run();
+	if (vmx_off()) {
+		printf("%s : vmxoff failed.\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+static void basic_init()
+{
+}
+
+static void basic_guest_main()
+{
+	/* Here is null guest_main, print Hello World */
+	printf("\tHello World, this is null_guest_main!\n");
+}
+
+static int basic_exit_handler()
+{
+	u64 guest_rip;
+	ulong reason;
+
+	guest_rip = vmcs_read(GUEST_RIP);
+	reason = vmcs_read(EXI_REASON) & 0xff;
+
+	switch (reason) {
+	case VMX_VMCALL:
+		print_vmexit_info();
+		vmcs_write(GUEST_RIP, guest_rip + 3);
+		return VMX_TEST_RESUME;
+	default:
+		break;
+	}
+	printf("ERROR : Unhandled vmx exit.\n");
+	print_vmexit_info();
+	return VMX_TEST_EXIT;
+}
+
+static void basic_syscall_handler(u64 syscall_no)
+{
+}
+
+static void vmenter_main()
+{
+	u64 rax;
+	u64 rsp, resume_rsp;
+
+	report("test vmlaunch", 1);
+
+	asm volatile(
+		"mov %%rsp, %0\n\t"
+		"mov %3, %%rax\n\t"
+		"vmcall\n\t"
+		"mov %%rax, %1\n\t"
+		"mov %%rsp, %2\n\t"
+		: "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
+		: "g"(0xABCD));
+	report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
+}
+
+static int vmenter_exit_handler()
+{
+	u64 guest_rip;
+	ulong reason;
+
+	guest_rip = vmcs_read(GUEST_RIP);
+	reason = vmcs_read(EXI_REASON) & 0xff;
+	switch (reason) {
+	case VMX_VMCALL:
+		if (current->guest_regs.rax != 0xABCD) {
+			report("test vmresume", 0);
+			return VMX_TEST_VMEXIT;
+		}
+		current->guest_regs.rax = 0xFFFF;
+		vmcs_write(GUEST_RIP, guest_rip + 3);
+		return VMX_TEST_RESUME;
+	default:
+		report("test vmresume", 0);
+		print_vmexit_info();
+	}
+	return VMX_TEST_VMEXIT;
+}
+
+
+/* name/init/guest_main/exit_handler/syscall_handler/guest_regs
+   basic_* just implement some basic functions */
+static struct vmx_test vmx_tests[] = {
+	{ "null", basic_init, basic_guest_main, basic_exit_handler,
+		basic_syscall_handler, {0} },
+	{ "vmenter", basic_init, vmenter_main, vmenter_exit_handler,
+		basic_syscall_handler, {0} },
+};
+
+int main(void)
+{
+	int i;
+
+	setup_vm();
+	setup_idt();
+
+	if (test_vmx_capability() != 0) {
+		printf("ERROR : vmx not supported, check +vmx option\n");
+		goto exit;
+	}
+	init_vmx();
+	/* Set basic test ctxt the same as "null" */
+	current = &vmx_tests[0];
+	if (test_vmxon() != 0)
+		goto exit;
+	test_vmptrld();
+	test_vmclear();
+	test_vmptrst();
+	init_vmcs(&vmcs_root);
+	if (vmx_run()) {
+		report("test vmlaunch", 0);
+		goto exit;
+	}
+	test_vmxoff();
+
+	for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) {
+		if (test_run(&vmx_tests[i]))
+			goto exit;
+	}
+
+exit:
+	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
+	return fails ? 1 : 0;
+}
diff --git a/x86/vmx.h b/x86/vmx.h
new file mode 100644
index 0000000..1fb9738
--- /dev/null
+++ b/x86/vmx.h
@@ -0,0 +1,466 @@ 
+#ifndef __HYPERVISOR_H
+#define __HYPERVISOR_H
+
+#include "libcflat.h"
+
+struct vmcs {
+	u32 revision_id; /* vmcs revision identifier */
+	u32 abort; /* VMX-abort indicator */
+	/* VMCS data */
+	char data[0];
+};
+
+struct regs {
+	u64 rax;
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u64 cr2;
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+	u64 rflags;
+};
+
+struct vmx_test {
+	const char *name;
+	void (*init)(struct vmcs *vmcs);
+	void (*guest_main)();
+	int (*exit_handler)();
+	void (*syscall_handler)(u64 syscall_no);
+	struct regs guest_regs;
+	struct vmcs *vmcs;
+	int exits;
+};
+
+static union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			: 3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1;
+	};
+} basic;
+
+static union vmx_ctrl_pin {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_pin_rev;
+
+static union vmx_ctrl_cpu {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_cpu_rev[2];
+
+static union vmx_ctrl_exit {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_exit_rev;
+
+static union vmx_ctrl_ent {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_enter_rev;
+
+static union vmx_ept_vpid {
+	u64 val;
+	struct {
+		u32:16,
+			super:2,
+			: 2,
+			invept:1,
+			: 11;
+		u32	invvpid:1;
+	};
+} ept_vpid;
+
+struct descr {
+	u16 limit;
+	u64 addr;
+};
+
+enum Encoding {
+	/* 16-Bit Control Fields */
+	VPID			= 0x0000ul,
+	/* Posted-interrupt notification vector */
+	PINV			= 0x0002ul,
+	/* EPTP index */
+	EPTP_IDX		= 0x0004ul,
+
+	/* 16-Bit Guest State Fields */
+	GUEST_SEL_ES		= 0x0800ul,
+	GUEST_SEL_CS		= 0x0802ul,
+	GUEST_SEL_SS		= 0x0804ul,
+	GUEST_SEL_DS		= 0x0806ul,
+	GUEST_SEL_FS		= 0x0808ul,
+	GUEST_SEL_GS		= 0x080aul,
+	GUEST_SEL_LDTR		= 0x080cul,
+	GUEST_SEL_TR		= 0x080eul,
+	GUEST_INT_STATUS	= 0x0810ul,
+
+	/* 16-Bit Host State Fields */
+	HOST_SEL_ES		= 0x0c00ul,
+	HOST_SEL_CS		= 0x0c02ul,
+	HOST_SEL_SS		= 0x0c04ul,
+	HOST_SEL_DS		= 0x0c06ul,
+	HOST_SEL_FS		= 0x0c08ul,
+	HOST_SEL_GS		= 0x0c0aul,
+	HOST_SEL_TR		= 0x0c0cul,
+
+	/* 64-Bit Control Fields */
+	IO_BITMAP_A		= 0x2000ul,
+	IO_BITMAP_B		= 0x2002ul,
+	MSR_BITMAP		= 0x2004ul,
+	EXIT_MSR_ST_ADDR	= 0x2006ul,
+	EXIT_MSR_LD_ADDR	= 0x2008ul,
+	ENTER_MSR_LD_ADDR	= 0x200aul,
+	VMCS_EXEC_PTR		= 0x200cul,
+	TSC_OFFSET		= 0x2010ul,
+	TSC_OFFSET_HI		= 0x2011ul,
+	APIC_VIRT_ADDR		= 0x2012ul,
+	APIC_ACCS_ADDR		= 0x2014ul,
+	EPTP			= 0x201aul,
+	EPTP_HI			= 0x201bul,
+
+	/* 64-Bit Readonly Data Field */
+	INFO_PHYS_ADDR		= 0x2400ul,
+
+	/* 64-Bit Guest State */
+	VMCS_LINK_PTR		= 0x2800ul,
+	VMCS_LINK_PTR_HI	= 0x2801ul,
+	GUEST_DEBUGCTL		= 0x2802ul,
+	GUEST_DEBUGCTL_HI	= 0x2803ul,
+	GUEST_EFER		= 0x2806ul,
+	GUEST_PERF_GLOBAL_CTRL	= 0x2808ul,
+	GUEST_PDPTE		= 0x280aul,
+
+	/* 64-Bit Host State */
+	HOST_EFER		= 0x2c02ul,
+	HOST_PERF_GLOBAL_CTRL	= 0x2c04ul,
+
+	/* 32-Bit Control Fields */
+	PIN_CONTROLS		= 0x4000ul,
+	CPU_EXEC_CTRL0		= 0x4002ul,
+	EXC_BITMAP		= 0x4004ul,
+	PF_ERROR_MASK		= 0x4006ul,
+	PF_ERROR_MATCH		= 0x4008ul,
+	CR3_TARGET_COUNT	= 0x400aul,
+	EXI_CONTROLS		= 0x400cul,
+	EXI_MSR_ST_CNT		= 0x400eul,
+	EXI_MSR_LD_CNT		= 0x4010ul,
+	ENT_CONTROLS		= 0x4012ul,
+	ENT_MSR_LD_CNT		= 0x4014ul,
+	ENT_INTR_INFO		= 0x4016ul,
+	ENT_INTR_ERROR		= 0x4018ul,
+	ENT_INST_LEN		= 0x401aul,
+	TPR_THRESHOLD		= 0x401cul,
+	CPU_EXEC_CTRL1		= 0x401eul,
+
+	/* 32-Bit R/O Data Fields */
+	VMX_INST_ERROR		= 0x4400ul,
+	EXI_REASON		= 0x4402ul,
+	EXI_INTR_INFO		= 0x4404ul,
+	EXI_INTR_ERROR		= 0x4406ul,
+	IDT_VECT_INFO		= 0x4408ul,
+	IDT_VECT_ERROR		= 0x440aul,
+	EXI_INST_LEN		= 0x440cul,
+	EXI_INST_INFO		= 0x440eul,
+
+	/* 32-Bit Guest State Fields */
+	GUEST_LIMIT_ES		= 0x4800ul,
+	GUEST_LIMIT_CS		= 0x4802ul,
+	GUEST_LIMIT_SS		= 0x4804ul,
+	GUEST_LIMIT_DS		= 0x4806ul,
+	GUEST_LIMIT_FS		= 0x4808ul,
+	GUEST_LIMIT_GS		= 0x480aul,
+	GUEST_LIMIT_LDTR	= 0x480cul,
+	GUEST_LIMIT_TR		= 0x480eul,
+	GUEST_LIMIT_GDTR	= 0x4810ul,
+	GUEST_LIMIT_IDTR	= 0x4812ul,
+	GUEST_AR_ES		= 0x4814ul,
+	GUEST_AR_CS		= 0x4816ul,
+	GUEST_AR_SS		= 0x4818ul,
+	GUEST_AR_DS		= 0x481aul,
+	GUEST_AR_FS		= 0x481cul,
+	GUEST_AR_GS		= 0x481eul,
+	GUEST_AR_LDTR		= 0x4820ul,
+	GUEST_AR_TR		= 0x4822ul,
+	GUEST_INTR_STATE	= 0x4824ul,
+	GUEST_ACTV_STATE	= 0x4826ul,
+	GUEST_SMBASE		= 0x4828ul,
+	GUEST_SYSENTER_CS	= 0x482aul,
+
+	/* 32-Bit Host State Fields */
+	HOST_SYSENTER_CS	= 0x4c00ul,
+
+	/* Natural-Width Control Fields */
+	CR0_MASK		= 0x6000ul,
+	CR4_MASK		= 0x6002ul,
+	CR0_READ_SHADOW	= 0x6004ul,
+	CR4_READ_SHADOW	= 0x6006ul,
+	CR3_TARGET_0		= 0x6008ul,
+	CR3_TARGET_1		= 0x600aul,
+	CR3_TARGET_2		= 0x600cul,
+	CR3_TARGET_3		= 0x600eul,
+
+	/* Natural-Width R/O Data Fields */
+	EXI_QUALIFICATION	= 0x6400ul,
+	IO_RCX			= 0x6402ul,
+	IO_RSI			= 0x6404ul,
+	IO_RDI			= 0x6406ul,
+	IO_RIP			= 0x6408ul,
+	GUEST_LINEAR_ADDRESS	= 0x640aul,
+
+	/* Natural-Width Guest State Fields */
+	GUEST_CR0		= 0x6800ul,
+	GUEST_CR3		= 0x6802ul,
+	GUEST_CR4		= 0x6804ul,
+	GUEST_BASE_ES		= 0x6806ul,
+	GUEST_BASE_CS		= 0x6808ul,
+	GUEST_BASE_SS		= 0x680aul,
+	GUEST_BASE_DS		= 0x680cul,
+	GUEST_BASE_FS		= 0x680eul,
+	GUEST_BASE_GS		= 0x6810ul,
+	GUEST_BASE_LDTR		= 0x6812ul,
+	GUEST_BASE_TR		= 0x6814ul,
+	GUEST_BASE_GDTR		= 0x6816ul,
+	GUEST_BASE_IDTR		= 0x6818ul,
+	GUEST_DR7		= 0x681aul,
+	GUEST_RSP		= 0x681cul,
+	GUEST_RIP		= 0x681eul,
+	GUEST_RFLAGS		= 0x6820ul,
+	GUEST_PENDING_DEBUG	= 0x6822ul,
+	GUEST_SYSENTER_ESP	= 0x6824ul,
+	GUEST_SYSENTER_EIP	= 0x6826ul,
+
+	/* Natural-Width Host State Fields */
+	HOST_CR0		= 0x6c00ul,
+	HOST_CR3		= 0x6c02ul,
+	HOST_CR4		= 0x6c04ul,
+	HOST_BASE_FS		= 0x6c06ul,
+	HOST_BASE_GS		= 0x6c08ul,
+	HOST_BASE_TR		= 0x6c0aul,
+	HOST_BASE_GDTR		= 0x6c0cul,
+	HOST_BASE_IDTR		= 0x6c0eul,
+	HOST_SYSENTER_ESP	= 0x6c10ul,
+	HOST_SYSENTER_EIP	= 0x6c12ul,
+	HOST_RSP		= 0x6c14ul,
+	HOST_RIP		= 0x6c16ul
+};
+
+enum Reason {
+	VMX_EXC_NMI		= 0,
+	VMX_EXTINT		= 1,
+	VMX_TRIPLE_FAULT	= 2,
+	VMX_INIT		= 3,
+	VMX_SIPI		= 4,
+	VMX_SMI_IO		= 5,
+	VMX_SMI_OTHER		= 6,
+	VMX_INTR_WINDOW		= 7,
+	VMX_NMI_WINDOW		= 8,
+	VMX_TASK_SWITCH		= 9,
+	VMX_CPUID		= 10,
+	VMX_GETSEC		= 11,
+	VMX_HLT			= 12,
+	VMX_INVD		= 13,
+	VMX_INVLPG		= 14,
+	VMX_RDPMC		= 15,
+	VMX_RDTSC		= 16,
+	VMX_RSM			= 17,
+	VMX_VMCALL		= 18,
+	VMX_VMCLEAR		= 19,
+	VMX_VMLAUNCH		= 20,
+	VMX_VMPTRLD		= 21,
+	VMX_VMPTRST		= 22,
+	VMX_VMREAD		= 23,
+	VMX_VMRESUME		= 24,
+	VMX_VMWRITE		= 25,
+	VMX_VMXOFF		= 26,
+	VMX_VMXON		= 27,
+	VMX_CR			= 28,
+	VMX_DR			= 29,
+	VMX_IO			= 30,
+	VMX_RDMSR		= 31,
+	VMX_WRMSR		= 32,
+	VMX_FAIL_STATE		= 33,
+	VMX_FAIL_MSR		= 34,
+	VMX_MWAIT		= 36,
+	VMX_MTF			= 37,
+	VMX_MONITOR		= 39,
+	VMX_PAUSE		= 40,
+	VMX_FAIL_MCHECK		= 41,
+	VMX_TPR_THRESHOLD	= 43,
+	VMX_APIC_ACCESS		= 44,
+	VMX_GDTR_IDTR		= 46,
+	VMX_LDTR_TR		= 47,
+	VMX_EPT_VIOLATION	= 48,
+	VMX_EPT_MISCONFIG	= 49,
+	VMX_INVEPT		= 50,
+	VMX_PREEMPT		= 52,
+	VMX_INVVPID		= 53,
+	VMX_WBINVD		= 54,
+	VMX_XSETBV		= 55
+};
+
+#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
+
+enum Ctrl_exi {
+	EXI_HOST_64             = 1UL << 9,
+	EXI_LOAD_PERF		= 1UL << 12,
+	EXI_INTA                = 1UL << 15,
+	EXI_LOAD_EFER           = 1UL << 21,
+};
+
+enum Ctrl_ent {
+	ENT_GUEST_64            = 1UL << 9,
+	ENT_LOAD_EFER           = 1UL << 15,
+};
+
+enum Ctrl_pin {
+	PIN_EXTINT              = 1ul << 0,
+	PIN_NMI                 = 1ul << 3,
+	PIN_VIRT_NMI            = 1ul << 5,
+};
+
+enum Ctrl0 {
+	CPU_INTR_WINDOW		= 1ul << 2,
+	CPU_HLT			= 1ul << 7,
+	CPU_INVLPG		= 1ul << 9,
+	CPU_CR3_LOAD		= 1ul << 15,
+	CPU_CR3_STORE		= 1ul << 16,
+	CPU_TPR_SHADOW		= 1ul << 21,
+	CPU_NMI_WINDOW		= 1ul << 22,
+	CPU_IO			= 1ul << 24,
+	CPU_IO_BITMAP		= 1ul << 25,
+	CPU_SECONDARY		= 1ul << 31,
+};
+
+enum Ctrl1 {
+	CPU_EPT			= 1ul << 1,
+	CPU_VPID		= 1ul << 5,
+	CPU_URG			= 1ul << 7,
+};
+
+#define SAVE_GPR				\
+	"xchg %rax, regs\n\t"			\
+	"xchg %rbx, regs+0x8\n\t"		\
+	"xchg %rcx, regs+0x10\n\t"		\
+	"xchg %rdx, regs+0x18\n\t"		\
+	"xchg %rbp, regs+0x28\n\t"		\
+	"xchg %rsi, regs+0x30\n\t"		\
+	"xchg %rdi, regs+0x38\n\t"		\
+	"xchg %r8, regs+0x40\n\t"		\
+	"xchg %r9, regs+0x48\n\t"		\
+	"xchg %r10, regs+0x50\n\t"		\
+	"xchg %r11, regs+0x58\n\t"		\
+	"xchg %r12, regs+0x60\n\t"		\
+	"xchg %r13, regs+0x68\n\t"		\
+	"xchg %r14, regs+0x70\n\t"		\
+	"xchg %r15, regs+0x78\n\t"
+
+#define LOAD_GPR	SAVE_GPR
+
+#define SAVE_GPR_C				\
+	"xchg %%rax, regs\n\t"			\
+	"xchg %%rbx, regs+0x8\n\t"		\
+	"xchg %%rcx, regs+0x10\n\t"		\
+	"xchg %%rdx, regs+0x18\n\t"		\
+	"xchg %%rbp, regs+0x28\n\t"		\
+	"xchg %%rsi, regs+0x30\n\t"		\
+	"xchg %%rdi, regs+0x38\n\t"		\
+	"xchg %%r8, regs+0x40\n\t"		\
+	"xchg %%r9, regs+0x48\n\t"		\
+	"xchg %%r10, regs+0x50\n\t"		\
+	"xchg %%r11, regs+0x58\n\t"		\
+	"xchg %%r12, regs+0x60\n\t"		\
+	"xchg %%r13, regs+0x68\n\t"		\
+	"xchg %%r14, regs+0x70\n\t"		\
+	"xchg %%r15, regs+0x78\n\t"
+
+#define LOAD_GPR_C	SAVE_GPR_C
+
+#define SAVE_RFLAGS		\
+	"pushf\n\t"			\
+	"pop regs+0x80\n\t"
+
+#define LOAD_RFLAGS		\
+	"push regs+0x80\n\t"	\
+	"popf\n\t"
+
+#define VMX_IO_SIZE_MASK		0x7
+#define _VMX_IO_BYTE			1
+#define _VMX_IO_WORD			2
+#define _VMX_IO_LONG			3
+#define VMX_IO_DIRECTION_MASK		(1ul << 3)
+#define VMX_IO_IN			(1ul << 3)
+#define VMX_IO_OUT			0
+#define VMX_IO_STRING			(1ul << 4)
+#define VMX_IO_REP			(1ul << 5)
+#define VMX_IO_OPRAND_DX		(1ul << 6)
+#define VMX_IO_PORT_MASK		0xFFFF0000
+#define VMX_IO_PORT_SHIFT		16
+
+#define VMX_TEST_VMEXIT			1
+#define VMX_TEST_EXIT			2
+#define VMX_TEST_RESUME			3
+#define VMX_TEST_LAUNCH_ERR		4
+#define VMX_TEST_RESUME_ERR		5
+
+#define HYPERCALL_BIT		(1ul << 12)
+#define HYPERCALL_MASK		0xFFF
+#define HYPERCALL_VMEXIT	0x1
+
+static inline int vmcs_clear(struct vmcs *vmcs)
+{
+	bool ret;
+	asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return ret;
+}
+
+static inline u64 vmcs_read(enum Encoding enc)
+{
+	u64 val;
+	asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
+	return val;
+}
+
+static inline int vmcs_write(enum Encoding enc, u64 val)
+{
+	bool ret;
+	asm volatile ("vmwrite %1, %2; setbe %0"
+		: "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
+	return ret;
+}
+
+static inline int vmcs_save(struct vmcs **vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
+	return ret;
+}
+
+#endif
+