diff mbox series

[kvm-unit-tests,v4,4/8] x86: Improve set_mmu_range() to implement npt

Message ID 20220428070851.21985-5-manali.shukla@amd.com (mailing list archive)
State New, archived
Headers show
Series Move npt test cases and NPT code improvements | expand

Commit Message

Manali Shukla April 28, 2022, 7:08 a.m. UTC
If U/S bit is "0" for all page table entries, all these pages are
considered as supervisor pages. By default, pte_opt_mask is set to "0"
for all npt test cases, which sets U/S bit in all PTEs to "0".

Any nested page table accesses performed by the MMU are treated as user
acesses. So while implementing a nested page table dynamically, PT_USER_MASK
needs to be enabled for all npt entries.

set_mmu_range() function is improved based on above analysis.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Manali Shukla <manali.shukla@amd.com>
---
 lib/x86/vm.c | 37 +++++++++++++++++++++++++++----------
 lib/x86/vm.h |  3 +++
 2 files changed, 30 insertions(+), 10 deletions(-)

Comments

Sean Christopherson June 15, 2022, 11:58 p.m. UTC | #1
On Thu, Apr 28, 2022, Manali Shukla wrote:
> If U/S bit is "0" for all page table entries, all these pages are
> considered as supervisor pages. By default, pte_opt_mask is set to "0"
> for all npt test cases, which sets U/S bit in all PTEs to "0".
> 
> Any nested page table accesses performed by the MMU are treated as user
> acesses. So while implementing a nested page table dynamically, PT_USER_MASK
> needs to be enabled for all npt entries.

Bits in PTEs aren't really "enabled".

> set_mmu_range() function is improved based on above analysis.

Same comments.  Don't provide analysis and then say "do that", just state what
the patch does.  Background details are great, but first and foremost the reader
needs to know what the patch actually does.

> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Manali Shukla <manali.shukla@amd.com>
> ---
>  lib/x86/vm.c | 37 +++++++++++++++++++++++++++----------
>  lib/x86/vm.h |  3 +++
>  2 files changed, 30 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/x86/vm.c b/lib/x86/vm.c
> index 25a4f5f..b555d5b 100644
> --- a/lib/x86/vm.c
> +++ b/lib/x86/vm.c
> @@ -4,7 +4,7 @@
>  #include "alloc_page.h"
>  #include "smp.h"
>  
> -static pteval_t pte_opt_mask;
> +static pteval_t pte_opt_mask, prev_pte_opt_mask;
>  
>  pteval_t *install_pte(pgd_t *cr3,
>  		      int pte_level,
> @@ -140,16 +140,33 @@ bool any_present_pages(pgd_t *cr3, void *virt, size_t len)
>  	return false;
>  }
>  
> -static void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len)
> +void set_pte_opt_mask()
> +{
> +        prev_pte_opt_mask = pte_opt_mask;
> +        pte_opt_mask = PT_USER_MASK;
> +}
> +
> +void reset_pte_opt_mask()

These should have "void" parameters.  I'm surprised gcc doesn't complain.  But
that's a moot point, because these don't need to exist, just to the save/mod/restore
on the stack in setup_mmu_range().

> +{
> +        pte_opt_mask = prev_pte_opt_mask;
> +}
> +
> +void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
>  {
>  	u64 max = (u64)len + (u64)start;
>  	u64 phys = start;
>  
> -	while (phys + LARGE_PAGE_SIZE <= max) {
> -		install_large_page(cr3, phys, (void *)(ulong)phys);
> -		phys += LARGE_PAGE_SIZE;
> -	}
> -	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> +        if (nested_mmu == false) {
> +                while (phys + LARGE_PAGE_SIZE <= max) {
> +                        install_large_page(cr3, phys, (void *)(ulong)phys);
> +		        phys += LARGE_PAGE_SIZE;
> +	        }
> +	        install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> +        } else {
> +                set_pte_opt_mask();
> +                install_pages(cr3, phys, len, (void *)(ulong)phys);
> +                reset_pte_opt_mask();
> +        }

Why can't a nested_mmu use large pages?
Sean Christopherson June 16, 2022, 12:04 a.m. UTC | #2
On Wed, Jun 15, 2022, Sean Christopherson wrote:
> On Thu, Apr 28, 2022, Manali Shukla wrote:
> > +void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
> >  {
> >  	u64 max = (u64)len + (u64)start;
> >  	u64 phys = start;
> >  
> > -	while (phys + LARGE_PAGE_SIZE <= max) {
> > -		install_large_page(cr3, phys, (void *)(ulong)phys);
> > -		phys += LARGE_PAGE_SIZE;
> > -	}
> > -	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> > +        if (nested_mmu == false) {
> > +                while (phys + LARGE_PAGE_SIZE <= max) {
> > +                        install_large_page(cr3, phys, (void *)(ulong)phys);
> > +		        phys += LARGE_PAGE_SIZE;
> > +	        }
> > +	        install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> > +        } else {
> > +                set_pte_opt_mask();
> > +                install_pages(cr3, phys, len, (void *)(ulong)phys);
> > +                reset_pte_opt_mask();
> > +        }
> 
> Why can't a nested_mmu use large pages?

Oh, duh, you're just preserving the existing functionality.

I dislike bool params, but I also don't see a better option at this time.  To make
it slightly less evil, add a wrapper so that the use and bool are closer together.
And then the callers don't need to be updated.

void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool use_hugepages);

static inline void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len)
{
	__setup_mmu_range(cr3, start, len, true);
}


And if you name it use_hugepages, then you can do:

void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
{
        u64 orig_opt_mask = pte_opt_mask;
	u64 max = (u64)len + (u64)start;
	u64 phys = start;

	/* comment goes here. */
	pte_opt_mask |= PT_USER_MASK;

        if (use_hugepages) {
                while (phys + LARGE_PAGE_SIZE <= max) {
                        install_large_page(cr3, phys, (void *)(ulong)phys);
		        phys += LARGE_PAGE_SIZE;
	        }
	}
	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);

	pte_opt_mask = orig_opt_mask;
}
Shukla, Manali June 22, 2022, 3:32 p.m. UTC | #3
On 6/16/2022 5:34 AM, Sean Christopherson wrote:
> On Wed, Jun 15, 2022, Sean Christopherson wrote:
>> On Thu, Apr 28, 2022, Manali Shukla wrote:
>>> +void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
>>>  {
>>>  	u64 max = (u64)len + (u64)start;
>>>  	u64 phys = start;
>>>  
>>> -	while (phys + LARGE_PAGE_SIZE <= max) {
>>> -		install_large_page(cr3, phys, (void *)(ulong)phys);
>>> -		phys += LARGE_PAGE_SIZE;
>>> -	}
>>> -	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
>>> +        if (nested_mmu == false) {
>>> +                while (phys + LARGE_PAGE_SIZE <= max) {
>>> +                        install_large_page(cr3, phys, (void *)(ulong)phys);
>>> +		        phys += LARGE_PAGE_SIZE;
>>> +	        }
>>> +	        install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
>>> +        } else {
>>> +                set_pte_opt_mask();
>>> +                install_pages(cr3, phys, len, (void *)(ulong)phys);
>>> +                reset_pte_opt_mask();
>>> +        }
>>
>> Why can't a nested_mmu use large pages?
> 
> Oh, duh, you're just preserving the existing functionality.
> 
> I dislike bool params, but I also don't see a better option at this time.  To make
> it slightly less evil, add a wrapper so that the use and bool are closer together.
> And then the callers don't need to be updated.
> 
> void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool use_hugepages);
> 
> static inline void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len)
> {
> 	__setup_mmu_range(cr3, start, len, true);
> }
> 
> 
> And if you name it use_hugepages, then you can do:
> 
> void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
> {
>         u64 orig_opt_mask = pte_opt_mask;
> 	u64 max = (u64)len + (u64)start;
> 	u64 phys = start;
> 
> 	/* comment goes here. */
> 	pte_opt_mask |= PT_USER_MASK;
> 
>         if (use_hugepages) {
>                 while (phys + LARGE_PAGE_SIZE <= max) {
>                         install_large_page(cr3, phys, (void *)(ulong)phys);
> 		        phys += LARGE_PAGE_SIZE;
> 	        }
> 	}
> 	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> 
> 	pte_opt_mask = orig_opt_mask;
> }

Hi Sean,

Thank you so much for reviewing my changes.

RSVD bit test case will start failing with above implementation as we will be setting PT_USER_MASK bit for all host PTEs (in order to toggle CR4.SMEP) which will defeat one of the purpose of this patch. 

Right now, pte_opt_mask value which is set from setup_vm(), is overwritten in setup_mmu_range() for all the conditions.
How about setting PT_USER_MASK only for nested mmu in setup_mmu_range()?  It will retain the same value of pte_opt_mask which is set from setup_vm() in all the other cases.


#define IS_NESTED_MMU 1ULL
#define USE_HUGEPAGES 2ULL

void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, unsigned long mmu_flags) {
        u64 orig_opt_mask = pte_opt_mask;
        u64 max = (u64)len + (u64)start;
        u64 phys = start;

        /* Allocate 4k pages only for nested page table, PT_USER_MASK needs to
         * be enabled for nested page.
         */
        if (mmu_flags & IS_NESTED_MMU)
                pte_opt_mask |= PT_USER_MASK;

        if (mmu_flags & USE_HUGEPAGES) {
                while (phys + LARGE_PAGE_SIZE <= max) {
                        install_large_page(cr3, phys, (void *)(ulong)phys);
                        phys += LARGE_PAGE_SIZE;
                }
        }
        install_pages(cr3, phys, max - phys, (void *)(ulong)phys);

        pte_opt_mask = orig_opt_mask;
}

static inline void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len) {
        __setup_mmu_range(cr3, start, len, USE_HUGEPAGES);
}

Thank you,
Manali
Sean Christopherson June 24, 2022, 12:46 a.m. UTC | #4
On Wed, Jun 22, 2022, Shukla, Manali wrote:
> 
> On 6/16/2022 5:34 AM, Sean Christopherson wrote:
> > void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
> > {
> >         u64 orig_opt_mask = pte_opt_mask;
> > 	u64 max = (u64)len + (u64)start;
> > 	u64 phys = start;
> > 
> > 	/* comment goes here. */
> > 	pte_opt_mask |= PT_USER_MASK;
> > 
> >         if (use_hugepages) {
> >                 while (phys + LARGE_PAGE_SIZE <= max) {
> >                         install_large_page(cr3, phys, (void *)(ulong)phys);
> > 		        phys += LARGE_PAGE_SIZE;
> > 	        }
> > 	}
> > 	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> > 
> > 	pte_opt_mask = orig_opt_mask;
> > }
> 
> Hi Sean,
> 
> Thank you so much for reviewing my changes.
> 
> RSVD bit test case will start failing with above implementation as we will be
> setting PT_USER_MASK bit for all host PTEs (in order to toggle CR4.SMEP)
> which will defeat one of the purpose of this patch. 

/facepalm

> Right now, pte_opt_mask value which is set from setup_vm(), is overwritten in
> setup_mmu_range() for all the conditions.  How about setting PT_USER_MASK
> only for nested mmu in setup_mmu_range()?  It will retain the same value of
> pte_opt_mask which is set from setup_vm() in all the other cases.

Ya, that should work.

> #define IS_NESTED_MMU 1ULL
> #define USE_HUGEPAGES 2ULL

Use BIT().  And not the ULL single the param is an unsigned long, not a u64.

> void __setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, unsigned long mmu_flags) {

Brace goes on its own line.

>         u64 orig_opt_mask = pte_opt_mask;
>         u64 max = (u64)len + (u64)start;
>         u64 phys = start;
> 
>         /* Allocate 4k pages only for nested page table, PT_USER_MASK needs to
	
	/*
	 * Multi-line comments look like this.
	 * Line 2.
	 */

>          * be enabled for nested page.
>          */
>         if (mmu_flags & IS_NESTED_MMU)
>                 pte_opt_mask |= PT_USER_MASK;
> 
>         if (mmu_flags & USE_HUGEPAGES) {
>                 while (phys + LARGE_PAGE_SIZE <= max) {
>                         install_large_page(cr3, phys, (void *)(ulong)phys);
>                         phys += LARGE_PAGE_SIZE;
>                 }
>         }
>         install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
> 
>         pte_opt_mask = orig_opt_mask;
> }
> 
> static inline void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len) {
>         __setup_mmu_range(cr3, start, len, USE_HUGEPAGES);
> }
> 
> Thank you,
> Manali
diff mbox series

Patch

diff --git a/lib/x86/vm.c b/lib/x86/vm.c
index 25a4f5f..b555d5b 100644
--- a/lib/x86/vm.c
+++ b/lib/x86/vm.c
@@ -4,7 +4,7 @@ 
 #include "alloc_page.h"
 #include "smp.h"
 
-static pteval_t pte_opt_mask;
+static pteval_t pte_opt_mask, prev_pte_opt_mask;
 
 pteval_t *install_pte(pgd_t *cr3,
 		      int pte_level,
@@ -140,16 +140,33 @@  bool any_present_pages(pgd_t *cr3, void *virt, size_t len)
 	return false;
 }
 
-static void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len)
+void set_pte_opt_mask()
+{
+        prev_pte_opt_mask = pte_opt_mask;
+        pte_opt_mask = PT_USER_MASK;
+}
+
+void reset_pte_opt_mask()
+{
+        pte_opt_mask = prev_pte_opt_mask;
+}
+
+void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu)
 {
 	u64 max = (u64)len + (u64)start;
 	u64 phys = start;
 
-	while (phys + LARGE_PAGE_SIZE <= max) {
-		install_large_page(cr3, phys, (void *)(ulong)phys);
-		phys += LARGE_PAGE_SIZE;
-	}
-	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
+        if (nested_mmu == false) {
+                while (phys + LARGE_PAGE_SIZE <= max) {
+                        install_large_page(cr3, phys, (void *)(ulong)phys);
+		        phys += LARGE_PAGE_SIZE;
+	        }
+	        install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
+        } else {
+                set_pte_opt_mask();
+                install_pages(cr3, phys, len, (void *)(ulong)phys);
+                reset_pte_opt_mask();
+        }
 }
 
 static void set_additional_vcpu_vmregs(struct vm_vcpu_info *info)
@@ -176,10 +193,10 @@  void *setup_mmu(phys_addr_t end_of_memory, void *opt_mask)
     if (end_of_memory < (1ul << 32))
         end_of_memory = (1ul << 32);  /* map mmio 1:1 */
 
-    setup_mmu_range(cr3, 0, end_of_memory);
+    setup_mmu_range(cr3, 0, end_of_memory, false);
 #else
-    setup_mmu_range(cr3, 0, (2ul << 30));
-    setup_mmu_range(cr3, 3ul << 30, (1ul << 30));
+    setup_mmu_range(cr3, 0, (2ul << 30), false);
+    setup_mmu_range(cr3, 3ul << 30, (1ul << 30), false);
     init_alloc_vpage((void*)(3ul << 30));
 #endif
 
diff --git a/lib/x86/vm.h b/lib/x86/vm.h
index 4c6dff9..fbb657f 100644
--- a/lib/x86/vm.h
+++ b/lib/x86/vm.h
@@ -37,6 +37,9 @@  pteval_t *install_pte(pgd_t *cr3,
 pteval_t *install_large_page(pgd_t *cr3, phys_addr_t phys, void *virt);
 void install_pages(pgd_t *cr3, phys_addr_t phys, size_t len, void *virt);
 bool any_present_pages(pgd_t *cr3, void *virt, size_t len);
+void set_pte_opt_mask(void);
+void reset_pte_opt_mask(void);
+void setup_mmu_range(pgd_t *cr3, phys_addr_t start, size_t len, bool nested_mmu);
 
 static inline void *current_page_table(void)
 {