diff mbox

[1/2] kvm-unit-tests: VMX: The framework of EPT for nested VMX testing

Message ID 1378702644-23655-2-git-send-email-yzt356@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Arthur Chunqi Li Sept. 9, 2013, 4:57 a.m. UTC
The framework of EPT for nested VMX, including functions to build up
EPT paging structures, read/set EPT PTEs and setup a range of 1:1 map
EPT.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/vmx.c |  159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 x86/vmx.h |   76 +++++++++++++++++++++++++++++
 2 files changed, 231 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Sept. 9, 2013, 1:45 p.m. UTC | #1
Il 09/09/2013 06:57, Arthur Chunqi Li ha scritto:
> The framework of EPT for nested VMX, including functions to build up
> EPT paging structures, read/set EPT PTEs and setup a range of 1:1 map
> EPT.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/vmx.c |  159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  x86/vmx.h |   76 +++++++++++++++++++++++++++++
>  2 files changed, 231 insertions(+), 4 deletions(-)
> 
> @@ -336,10 +489,8 @@ static void init_vmx(void)
>  			: MSR_IA32_VMX_ENTRY_CTLS);
>  	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
>  			: MSR_IA32_VMX_PROCBASED_CTLS);
> -	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
> -		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> -	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
> -		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
> +	ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> +	ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);

This is because these MSRs are confusing.

Your definitions are:

union vmx_ctrl_cpu {
        u64 val;
        struct { 
                u32 set, clr; 
        };
};

so "set" are the low 32-bit and "clr" are the high 32-bits.

This is how the SDM's description should be read:

set    clr
0      0          if bit is 0, ok. if bit is 1, fail
			=> reserved, must be 0
0      1          if bit is 0, ok. if bit is 1, ok
			=> supported by processor
1      0          if bit is 0, fail. if bit is 1, fail
			=> impossible
1      1	  if bit is 0, fail. if bit is 1, ok
			=> reserved, must be 1
So the right fix is:

        if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
                ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
        if ((ctrl_cpu_rev[1].clr & (CPU_EPT | CPU_VPID)) != 0)
                ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);

While looking at this I found another related bug.  This line:

	ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;

should be

	ctrl_cpu[1] = (ctrl_cpu[1] | ctrl_cpu_rev[1].set) & ctrl_cpu_rev[1].clr;

which is the same as other lines using the MSRs.

Paolo

>  	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
>  	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
> diff --git a/x86/vmx.h b/x86/vmx.h
> index 28595d8..742c2b2 100644
> --- a/x86/vmx.h
> +++ b/x86/vmx.h
> @@ -432,6 +432,59 @@ enum Ctrl1 {
>  #define HYPERCALL_MASK		0xFFF
>  #define HYPERCALL_VMEXIT	0x1
>  
> +#define EPTP_PG_WALK_LEN_SHIFT	3ul
> +#define EPTP_AD_FLAG			(1ul << 6)
> +
> +#define EPT_MEM_TYPE_UC	0ul
> +#define EPT_MEM_TYPE_WC	1ul
> +#define EPT_MEM_TYPE_WT	4ul
> +#define EPT_MEM_TYPE_WP	5ul
> +#define EPT_MEM_TYPE_WB	6ul
> +
> +#define EPT_RA			1ul
> +#define EPT_WA			2ul
> +#define EPT_EA			4ul
> +#define EPT_PRESENT		(EPT_RA | EPT_WA | EPT_EA)	
> +#define EPT_ACCESS_FLAG	(1ul << 8)
> +#define EPT_DIRTY_FLAG		(1ul << 9)
> +#define EPT_LARGE_PAGE		(1ul << 7)
> +#define EPT_MEM_TYPE_SHIFT	3ul
> +#define EPT_IGNORE_PAT		(1ul << 6)
> +#define EPT_SUPPRESS_VE	(1ull << 63)
> +
> +#define EPT_CAP_WT		1ull
> +#define EPT_CAP_PWL4		(1ull << 6)
> +#define EPT_CAP_UC		(1ull << 8)
> +#define EPT_CAP_WB		(1ull << 14)
> +#define EPT_CAP_2M_PAGE	(1ull << 16)
> +#define EPT_CAP_1G_PAGE	(1ull << 17)
> +#define EPT_CAP_INVEPT		(1ull << 20)
> +#define EPT_CAP_INVEPT_SINGLE	(1ull << 25)
> +#define EPT_CAP_INVEPT_ALL	(1ull << 26)
> +#define EPT_CAP_AD_FLAG	(1ull << 21)
> +
> +#define PAGE_SIZE_2M		(512 * PAGE_SIZE)
> +#define PAGE_SIZE_1G		(512 * PAGE_SIZE_2M)
> +#define	EPT_PAGE_LEVEL	4
> +#define	EPT_PGDIR_WIDTH	9
> +#define	EPT_PGDIR_MASK	511
> +#define PAGE_MASK (~(PAGE_SIZE-1))
> +
> +#define EPT_VLT_RD		1
> +#define EPT_VLT_WR		(1 << 1)
> +#define EPT_VLT_FETCH		(1 << 2)
> +#define EPT_VLT_PERM_RD	(1 << 3)
> +#define EPT_VLT_PERM_WR	(1 << 4)
> +#define EPT_VLT_PERM_EX	(1 << 5)
> +#define EPT_VLT_LADDR_VLD	(1 << 7)
> +#define EPT_VLT_PADDR		(1 << 8)
> +
> +#define MAGIC_VAL_1		0x12345678ul
> +#define MAGIC_VAL_2		0x87654321ul
> +#define MAGIC_VAL_3		0xfffffffful
> +
> +#define INVEPT_SINGLE		1
> +#define INVEPT_GLOBAL		2
>  
>  extern struct regs regs;
>  
> @@ -472,8 +525,31 @@ static inline int vmcs_save(struct vmcs **vmcs)
>  	return ret;
>  }
>  
> +static inline void invept(unsigned long type, u64 eptp)
> +{
> +	struct {
> +		u64 eptp, gpa;
> +	} operand = {eptp, 0};
> +	asm volatile("invept %0, %1\n" ::"m"(operand),"r"(type));
> +}
> +
>  void report(const char *name, int result);
>  void print_vmexit_info();
> +void install_ept_entry(unsigned long *pml4, int pte_level,
> +		unsigned long guest_addr, unsigned long pte,
> +		unsigned long *pt_page);
> +void install_1g_ept(unsigned long *pml4, unsigned long phys,
> +		unsigned long guest_addr, u64 perm);
> +void install_2m_ept(unsigned long *pml4, unsigned long phys,
> +		unsigned long guest_addr, u64 perm);
> +void install_ept(unsigned long *pml4, unsigned long phys,
> +		unsigned long guest_addr, u64 perm);
> +int setup_ept_range(unsigned long *pml4, unsigned long start,
> +		unsigned long len, int map_1g, int map_2m, u64 perm);
> +unsigned long get_ept_pte(unsigned long *pml4,
> +		unsigned long guest_addr, int level);
> +int set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
> +		int level, u64 pte_val);
>  
>  #endif
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/x86/vmx.c b/x86/vmx.c
index ca36d35..87d1d55 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c
@@ -143,6 +143,159 @@  asm(
 	"	call hypercall\n\t"
 );
 
+/* EPT paging structure related functions */
+/* install_ept_entry : Install a page to a given level in EPT
+		@pml4 : addr of pml4 table
+		@pte_level : level of PTE to set
+		@guest_addr : physical address of guest
+		@pte : pte value to set
+		@pt_page : address of page table, NULL for a new page
+ */
+void install_ept_entry(unsigned long *pml4,
+		int pte_level,
+		unsigned long guest_addr,
+		unsigned long pte,
+		unsigned long *pt_page)
+{
+	int level;
+	unsigned long *pt = pml4;
+	unsigned offset;
+
+	for (level = EPT_PAGE_LEVEL; level > pte_level; --level) {
+		offset = (guest_addr >> ((level-1) * EPT_PGDIR_WIDTH + 12))
+				& EPT_PGDIR_MASK;
+		if (!(pt[offset] & (EPT_PRESENT))) {
+			unsigned long *new_pt = pt_page;
+			if (!new_pt)
+				new_pt = alloc_page();
+			else
+				pt_page = 0;
+			memset(new_pt, 0, PAGE_SIZE);
+			pt[offset] = virt_to_phys(new_pt)
+					| EPT_RA | EPT_WA | EPT_EA;
+		}
+		pt = phys_to_virt(pt[offset] & 0xffffffffff000ull);
+	}
+	offset = ((unsigned long)guest_addr >> ((level-1) *
+			EPT_PGDIR_WIDTH + 12)) & EPT_PGDIR_MASK;
+	pt[offset] = pte;
+}
+
+/* Map a page, @perm is the permission of the page */
+void install_ept(unsigned long *pml4,
+		unsigned long phys,
+		unsigned long guest_addr,
+		u64 perm)
+{
+	install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0);
+}
+
+/* Map a 1G-size page */
+void install_1g_ept(unsigned long *pml4,
+		unsigned long phys,
+		unsigned long guest_addr,
+		u64 perm)
+{
+	install_ept_entry(pml4, 3, guest_addr,
+			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
+}
+
+/* Map a 2M-size page */
+void install_2m_ept(unsigned long *pml4,
+		unsigned long phys,
+		unsigned long guest_addr,
+		u64 perm)
+{
+	install_ept_entry(pml4, 2, guest_addr,
+			(phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0);
+}
+
+/* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure.
+		@start : start address of guest page
+		@len : length of address to be mapped
+		@map_1g : whether 1G page map is used
+		@map_2m : whether 2M page map is used
+		@perm : permission for every page
+ */
+int setup_ept_range(unsigned long *pml4, unsigned long start,
+		unsigned long len, int map_1g, int map_2m, u64 perm)
+{
+	u64 phys = start;
+	u64 max = (u64)len + (u64)start;
+
+	if (map_1g) {
+		while (phys + PAGE_SIZE_1G <= max) {
+			install_1g_ept(pml4, phys, phys, perm);
+			phys += PAGE_SIZE_1G;
+		}
+	}
+	if (map_2m) {
+		while (phys + PAGE_SIZE_2M <= max) {
+			install_2m_ept(pml4, phys, phys, perm);
+			phys += PAGE_SIZE_2M;
+		}
+	}
+	while (phys + PAGE_SIZE <= max) {
+		install_ept(pml4, phys, phys, perm);
+		phys += PAGE_SIZE;
+	}
+	return 0;
+}
+
+/* get_ept_pte : Get the PTE of a given level in EPT,
+    @level == 1 means get the latest level*/
+unsigned long get_ept_pte(unsigned long *pml4,
+		unsigned long guest_addr, int level)
+{
+	int l;
+	unsigned long *pt = pml4, pte;
+	unsigned offset;
+
+	for (l = EPT_PAGE_LEVEL; l > 1; --l) {
+		offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
+				& EPT_PGDIR_MASK;
+		pte = pt[offset];
+		if (!(pte & (EPT_PRESENT)))
+			return 0;
+		if (l == level)
+			return pte;
+		if (l < 4 && (pte & EPT_LARGE_PAGE))
+			return pte;
+		pt = (unsigned long *)(pte & 0xffffffffff000ull);
+	}
+	offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
+			& EPT_PGDIR_MASK;
+	pte = pt[offset];
+	return pte;
+}
+
+int set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
+		int level, u64 pte_val)
+{
+	int l;
+	unsigned long *pt = pml4;
+	unsigned offset;
+
+	if (level < 1 || level > 3)
+		return -1;
+	for (l = EPT_PAGE_LEVEL; l > 1; --l) {
+		offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
+				& EPT_PGDIR_MASK;
+		if (l == level) {
+			pt[offset] = pte_val;
+			return 0;
+		}
+		if (!(pt[offset] & (EPT_PRESENT)))
+			return -1;
+		pt = (unsigned long *)(pt[offset] & 0xffffffffff000ull);
+	}
+	offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12))
+			& EPT_PGDIR_MASK;
+	pt[offset] = pte_val;
+	return 0;
+}
+
+
 static void init_vmcs_ctrl(void)
 {
 	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
@@ -336,10 +489,8 @@  static void init_vmx(void)
 			: MSR_IA32_VMX_ENTRY_CTLS);
 	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
 			: MSR_IA32_VMX_PROCBASED_CTLS);
-	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
-		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
-	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
-		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+	ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
+	ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
 
 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
diff --git a/x86/vmx.h b/x86/vmx.h
index 28595d8..742c2b2 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h
@@ -432,6 +432,59 @@  enum Ctrl1 {
 #define HYPERCALL_MASK		0xFFF
 #define HYPERCALL_VMEXIT	0x1
 
+#define EPTP_PG_WALK_LEN_SHIFT	3ul
+#define EPTP_AD_FLAG			(1ul << 6)
+
+#define EPT_MEM_TYPE_UC	0ul
+#define EPT_MEM_TYPE_WC	1ul
+#define EPT_MEM_TYPE_WT	4ul
+#define EPT_MEM_TYPE_WP	5ul
+#define EPT_MEM_TYPE_WB	6ul
+
+#define EPT_RA			1ul
+#define EPT_WA			2ul
+#define EPT_EA			4ul
+#define EPT_PRESENT		(EPT_RA | EPT_WA | EPT_EA)	
+#define EPT_ACCESS_FLAG	(1ul << 8)
+#define EPT_DIRTY_FLAG		(1ul << 9)
+#define EPT_LARGE_PAGE		(1ul << 7)
+#define EPT_MEM_TYPE_SHIFT	3ul
+#define EPT_IGNORE_PAT		(1ul << 6)
+#define EPT_SUPPRESS_VE	(1ull << 63)
+
+#define EPT_CAP_WT		1ull
+#define EPT_CAP_PWL4		(1ull << 6)
+#define EPT_CAP_UC		(1ull << 8)
+#define EPT_CAP_WB		(1ull << 14)
+#define EPT_CAP_2M_PAGE	(1ull << 16)
+#define EPT_CAP_1G_PAGE	(1ull << 17)
+#define EPT_CAP_INVEPT		(1ull << 20)
+#define EPT_CAP_INVEPT_SINGLE	(1ull << 25)
+#define EPT_CAP_INVEPT_ALL	(1ull << 26)
+#define EPT_CAP_AD_FLAG	(1ull << 21)
+
+#define PAGE_SIZE_2M		(512 * PAGE_SIZE)
+#define PAGE_SIZE_1G		(512 * PAGE_SIZE_2M)
+#define	EPT_PAGE_LEVEL	4
+#define	EPT_PGDIR_WIDTH	9
+#define	EPT_PGDIR_MASK	511
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define EPT_VLT_RD		1
+#define EPT_VLT_WR		(1 << 1)
+#define EPT_VLT_FETCH		(1 << 2)
+#define EPT_VLT_PERM_RD	(1 << 3)
+#define EPT_VLT_PERM_WR	(1 << 4)
+#define EPT_VLT_PERM_EX	(1 << 5)
+#define EPT_VLT_LADDR_VLD	(1 << 7)
+#define EPT_VLT_PADDR		(1 << 8)
+
+#define MAGIC_VAL_1		0x12345678ul
+#define MAGIC_VAL_2		0x87654321ul
+#define MAGIC_VAL_3		0xfffffffful
+
+#define INVEPT_SINGLE		1
+#define INVEPT_GLOBAL		2
 
 extern struct regs regs;
 
@@ -472,8 +525,31 @@  static inline int vmcs_save(struct vmcs **vmcs)
 	return ret;
 }
 
+static inline void invept(unsigned long type, u64 eptp)
+{
+	struct {
+		u64 eptp, gpa;
+	} operand = {eptp, 0};
+	asm volatile("invept %0, %1\n" ::"m"(operand),"r"(type));
+}
+
 void report(const char *name, int result);
 void print_vmexit_info();
+void install_ept_entry(unsigned long *pml4, int pte_level,
+		unsigned long guest_addr, unsigned long pte,
+		unsigned long *pt_page);
+void install_1g_ept(unsigned long *pml4, unsigned long phys,
+		unsigned long guest_addr, u64 perm);
+void install_2m_ept(unsigned long *pml4, unsigned long phys,
+		unsigned long guest_addr, u64 perm);
+void install_ept(unsigned long *pml4, unsigned long phys,
+		unsigned long guest_addr, u64 perm);
+int setup_ept_range(unsigned long *pml4, unsigned long start,
+		unsigned long len, int map_1g, int map_2m, u64 perm);
+unsigned long get_ept_pte(unsigned long *pml4,
+		unsigned long guest_addr, int level);
+int set_ept_pte(unsigned long *pml4, unsigned long guest_addr,
+		int level, u64 pte_val);
 
 #endif