diff mbox series

[kvm-unit-tests,v2,2/4] x86/access: CR0.WP toggling write to r/o data test

Message ID 20230331135709.132713-3-minipli@grsecurity.net (mailing list archive)
State New, archived
Headers show
Series Tests for CR0.WP=0/1 r/o write access | expand

Commit Message

Mathias Krause March 31, 2023, 1:57 p.m. UTC
We already have tests that verify a write access to an r/o page is
successful when CR0.WP=0, but we lack a test that explicitly verifies
that the same access will fail after we set CR0.WP=1 without flushing
any associated TLB entries either explicitly (INVLPG) or implicitly
(write to CR3). Add such a test.

Signed-off-by: Mathias Krause <minipli@grsecurity.net>
---
 x86/access.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

Comments

Sean Christopherson March 31, 2023, 4:20 p.m. UTC | #1
On Fri, Mar 31, 2023, Mathias Krause wrote:
> We already have tests that verify a write access to an r/o page is

"supervisor write access"

> successful when CR0.WP=0, but we lack a test that explicitly verifies
> that the same access will fail after we set CR0.WP=1 without flushing

s/fail/fault to be more precise about the expected behavior.

> any associated TLB entries either explicitly (INVLPG) or implicitly
> (write to CR3). Add such a test.

Without pronouns:

    KUT has tests that verify a supervisor write access to an r/o page is
    successful when CR0.WP=0, but lacks a test that explicitly verifies that
    the same access faults after setting CR0.WP=1 without flushing any
    associated TLB entries, either explicitly (INVLPG) or implicitly (write
    to CR3). Add such a test.

> 
> Signed-off-by: Mathias Krause <minipli@grsecurity.net>
> ---
>  x86/access.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 54 insertions(+), 2 deletions(-)
> 
> diff --git a/x86/access.c b/x86/access.c
> index 203353a3f74f..d1ec99b4fa73 100644
> --- a/x86/access.c
> +++ b/x86/access.c
> @@ -575,9 +575,10 @@ fault:
>  		at->expected_error &= ~PFERR_FETCH_MASK;
>  }
>  
> -static void ac_set_expected_status(ac_test_t *at)
> +static void __ac_set_expected_status(ac_test_t *at, bool flush)
>  {
> -	invlpg(at->virt);
> +	if (flush)
> +		invlpg(at->virt);
>  
>  	if (at->ptep)
>  		at->expected_pte = *at->ptep;
> @@ -599,6 +600,11 @@ static void ac_set_expected_status(ac_test_t *at)
>  	ac_emulate_access(at, at->flags);
>  }
>  
> +static void ac_set_expected_status(ac_test_t *at)
> +{
> +	__ac_set_expected_status(at, true);
> +}
> +
>  static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep)
>  {
>  	pt_element_t pte;
> @@ -1061,6 +1067,51 @@ err:
>  	return 0;
>  }
>  
> +static int check_write_cr0wp(ac_pt_env_t *pt_env)

How about check_toggle_cr0_wp() so that it's (hopefully) obvious that the testcase
does more than just check writes to CR0.WP?  Ah, or maybe the "write" is 

> +{
> +	ac_test_t at;
> +	int err = 0;
> +
> +	ac_test_init(&at, 0xffff923042007000ul, pt_env);
> +	at.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
> +		   AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
> +		   AC_ACCESS_WRITE_MASK;
> +	ac_test_setup_ptes(&at);
> +
> +	/*
> +	 * Under VMX the guest might own the CR0.WP bit, requiring KVM to
> +	 * manually keep track of its state where needed, e.g. in the guest
> +	 * page table walker.
> +	 *
> +	 * We load CR0.WP with the inverse value of what would be used during

Avoid pronouns in comments too.  If the immediate code is doing something, phrase
the comment as a command (same "rule" as changelogs), e.g.

	/*
	 * Load CR0.WP with the inverse value of what will be used during the
	 * access test, and toggle EFER.NX to coerce KVM into rebuilding the
	 * current MMU context based on the soon-to-be-stale CR0.WP.
	 */

> +	 * the access test and toggle EFER.NX to flush and rebuild the current
> +	 * MMU context based on that value.
> +	 */
> +
> +	set_cr0_wp(1);
> +	set_efer_nx(1);
> +	set_efer_nx(0);

Rather than copy+paste and end up with a superfluous for-loop, through the guts
of the test into a separate inner function, e.g.

  static int __check_toggle_cr0_wp(ac_test_t *at, bool cr0_wp_initially_set)

and then use @cr0_wp_initially_set to set/clear AC_CPU_CR0_WP_MASK.  And for the
printf(), check "at.flags & AC_CPU_CR0_WP_MASK" to determine whether the access
was expected to fault or succeed.  That should make it easy to test all the
combinations.

And then when FEP comes along, add that as a param too.  FEP is probably better
off passing the flag instead of a bool, e.g.

  static int __check_toggle_cr0_wp(ac_test_t *at, bool cr0_wp_initially_set,
				   int fep_flag)

Ah, a better approach would be to capture the flags in a global macro:

  #define TOGGLE_CR0_WP_BASE_FLAGS (base flags without CR0_WP_MASK or FEP_MASK)

and then take the "extra" flags as a param

  static int __check_toggle_cr0_wp(ac_test_t *at, int flags)

which will yield simple code in the helper

  ac->flags = TOGGLE_CR0_WP_BASE_FLAGS | flags;

and somewhat self-documenting code in the caller:

  ret = __check_toggle_cr0_wp(&at, AC_CPU_CR0_WP_MASK);

  ret = __check_toggle_cr0_wp(&at, 0);

  ret = __check_toggle_cr0_wp(&at, AC_CPU_CR0_WP_MASK | FEP_MASK);

  ...

> +
> +	if (!ac_test_do_access(&at)) {
> +		printf("%s: CR0.WP=0 r/o write fail\n", __FUNCTION__);

"fail" is ambiguous.  Did the access fault, or did the test fail?  Better would
be something like (in the inner helper):

		printf("%s: supervisor write with CR0.WP=%d did not %S as expected\n",
		       __FUNCTION__, !!(at->flags & AC_CPU_CR0_WP_MASK),
		       (at->flags & AC_CPU_CR0_WP_MASK) ? "FAULT" : "SUCCEED");
Mathias Krause April 3, 2023, 9:01 a.m. UTC | #2
On 31.03.23 18:20, Sean Christopherson wrote:
> On Fri, Mar 31, 2023, Mathias Krause wrote:
>> We already have tests that verify a write access to an r/o page is
> 
> "supervisor write access"
> 
>> successful when CR0.WP=0, but we lack a test that explicitly verifies
>> that the same access will fail after we set CR0.WP=1 without flushing
> 
> s/fail/fault to be more precise about the expected behavior.
> 
>> any associated TLB entries either explicitly (INVLPG) or implicitly
>> (write to CR3). Add such a test.
> 
> Without pronouns:
> 
>     KUT has tests that verify a supervisor write access to an r/o page is
>     successful when CR0.WP=0, but lacks a test that explicitly verifies that
>     the same access faults after setting CR0.WP=1 without flushing any
>     associated TLB entries, either explicitly (INVLPG) or implicitly (write
>     to CR3). Add such a test.

Ok.

>>
>> Signed-off-by: Mathias Krause <minipli@grsecurity.net>
>> ---
>>  x86/access.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 54 insertions(+), 2 deletions(-)
>>
>> diff --git a/x86/access.c b/x86/access.c
>> index 203353a3f74f..d1ec99b4fa73 100644
>> --- a/x86/access.c
>> +++ b/x86/access.c
>> @@ -575,9 +575,10 @@ fault:
>>  		at->expected_error &= ~PFERR_FETCH_MASK;
>>  }
>>  
>> -static void ac_set_expected_status(ac_test_t *at)
>> +static void __ac_set_expected_status(ac_test_t *at, bool flush)
>>  {
>> -	invlpg(at->virt);
>> +	if (flush)
>> +		invlpg(at->virt);
>>  
>>  	if (at->ptep)
>>  		at->expected_pte = *at->ptep;
>> @@ -599,6 +600,11 @@ static void ac_set_expected_status(ac_test_t *at)
>>  	ac_emulate_access(at, at->flags);
>>  }
>>  
>> +static void ac_set_expected_status(ac_test_t *at)
>> +{
>> +	__ac_set_expected_status(at, true);
>> +}
>> +
>>  static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep)
>>  {
>>  	pt_element_t pte;
>> @@ -1061,6 +1067,51 @@ err:
>>  	return 0;
>>  }
>>  
>> +static int check_write_cr0wp(ac_pt_env_t *pt_env)
> 
> How about check_toggle_cr0_wp() so that it's (hopefully) obvious that the testcase
> does more than just check writes to CR0.WP?  Ah, or maybe the "write" is 

That last sentence lacks a few words. But yeah, the test is about
verifying access permissions of a write operation to a read-only page
wrt toggling CR0.WP.

> 
>> +{
>> +	ac_test_t at;
>> +	int err = 0;
>> +
>> +	ac_test_init(&at, 0xffff923042007000ul, pt_env);
>> +	at.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
>> +		   AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
>> +		   AC_ACCESS_WRITE_MASK;
>> +	ac_test_setup_ptes(&at);
>> +
>> +	/*
>> +	 * Under VMX the guest might own the CR0.WP bit, requiring KVM to
>> +	 * manually keep track of its state where needed, e.g. in the guest
>> +	 * page table walker.
>> +	 *
>> +	 * We load CR0.WP with the inverse value of what would be used during
> 
> Avoid pronouns in comments too.  If the immediate code is doing something, phrase
> the comment as a command (same "rule" as changelogs), e.g.

Ok, will try to pay more attention to the wording in the future!

> 
> 	/*
> 	 * Load CR0.WP with the inverse value of what will be used during the
> 	 * access test, and toggle EFER.NX to coerce KVM into rebuilding the
> 	 * current MMU context based on the soon-to-be-stale CR0.WP.
> 	 */
> 
>> +	 * the access test and toggle EFER.NX to flush and rebuild the current
>> +	 * MMU context based on that value.
>> +	 */
>> +
>> +	set_cr0_wp(1);
>> +	set_efer_nx(1);
>> +	set_efer_nx(0);
> 
> Rather than copy+paste and end up with a superfluous for-loop, through the guts
> of the test into a separate inner function, e.g.
> 
>   static int __check_toggle_cr0_wp(ac_test_t *at, bool cr0_wp_initially_set)
> 
> and then use @cr0_wp_initially_set to set/clear AC_CPU_CR0_WP_MASK.  And for the
> printf(), check "at.flags & AC_CPU_CR0_WP_MASK" to determine whether the access
> was expected to fault or succeed.  That should make it easy to test all the
> combinations.

Well, I thought of a helper function too and folding the value of CR0.WP
into the error message. But looking at other tests I got the impression,
it's more important to make the test conditions more obvious than it is
to write compact code. This not only makes it easier to see what gets
tested but also shows what the test was intended to do. If, instead, the
error message is based on the value of AC_CPU_CR0_WP_MASK, one not only
has to check what value it was set to but also look up what its meaning
really is, like if set, does it mean CR0.WP=0 or 1?

That said, your below changes make it more obvious, so the helper might
not be such a bad idea in the end.

> 
> And then when FEP comes along, add that as a param too.  FEP is probably better
> off passing the flag instead of a bool, e.g.
> 
>   static int __check_toggle_cr0_wp(ac_test_t *at, bool cr0_wp_initially_set,
> 				   int fep_flag)
> 
> Ah, a better approach would be to capture the flags in a global macro:
> 
>   #define TOGGLE_CR0_WP_BASE_FLAGS (base flags without CR0_WP_MASK or FEP_MASK)
> 
> and then take the "extra" flags as a param
> 
>   static int __check_toggle_cr0_wp(ac_test_t *at, int flags)
> 
> which will yield simple code in the helper
> 
>   ac->flags = TOGGLE_CR0_WP_BASE_FLAGS | flags;
> 
> and somewhat self-documenting code in the caller:
> 
>   ret = __check_toggle_cr0_wp(&at, AC_CPU_CR0_WP_MASK);
> 
>   ret = __check_toggle_cr0_wp(&at, 0);
> 
>   ret = __check_toggle_cr0_wp(&at, AC_CPU_CR0_WP_MASK | FEP_MASK);
> 
>   ...

Yeah, looks good indeed. Will change!

> 
>> +
>> +	if (!ac_test_do_access(&at)) {
>> +		printf("%s: CR0.WP=0 r/o write fail\n", __FUNCTION__);
> 
> "fail" is ambiguous.  Did the access fault, or did the test fail?  Better would
> be something like (in the inner helper):
> 
> 		printf("%s: supervisor write with CR0.WP=%d did not %S as expected\n",
> 		       __FUNCTION__, !!(at->flags & AC_CPU_CR0_WP_MASK),
> 		       (at->flags & AC_CPU_CR0_WP_MASK) ? "FAULT" : "SUCCEED");

Thanks,
Mathias
Sean Christopherson April 3, 2023, 5:09 p.m. UTC | #3
On Mon, Apr 03, 2023, Mathias Krause wrote:
> On 31.03.23 18:20, Sean Christopherson wrote:
> > On Fri, Mar 31, 2023, Mathias Krause wrote:
> >> +	 * the access test and toggle EFER.NX to flush and rebuild the current
> >> +	 * MMU context based on that value.
> >> +	 */
> >> +
> >> +	set_cr0_wp(1);
> >> +	set_efer_nx(1);
> >> +	set_efer_nx(0);
> > 
> > Rather than copy+paste and end up with a superfluous for-loop, through the guts
> > of the test into a separate inner function, e.g.
> > 
> >   static int __check_toggle_cr0_wp(ac_test_t *at, bool cr0_wp_initially_set)
> > 
> > and then use @cr0_wp_initially_set to set/clear AC_CPU_CR0_WP_MASK.  And for the
> > printf(), check "at.flags & AC_CPU_CR0_WP_MASK" to determine whether the access
> > was expected to fault or succeed.  That should make it easy to test all the
> > combinations.
> 
> Well, I thought of a helper function too and folding the value of CR0.WP
> into the error message. But looking at other tests I got the impression,
> it's more important to make the test conditions more obvious than it is
> to write compact code. 

LOL, you're looking for reasoning/logic where there is none.  The sad truth is
that the vast majority of tests were "written" by copy+paste+tweak.

> This not only makes it easier to see what gets tested but also shows what the
> test was intended to do. If, instead, the error message is based on the value
> of AC_CPU_CR0_WP_MASK, one not only has to check what value it was set to but
> also look up what its meaning really is, like if set, does it mean CR0.WP=0
> or 1?

I generally agree with having self-documenting code and/or assertions, but that
only works to a certain point in tests, especially for tests that are verifying
architectural behavior.  Oftentimes I know what KUT code is doing, but it takes
far too much digging to understand what is actually expected to happen and why.
The architectural behavior cases are especially problematic because there are so
many details that aren't really captured in the test itself.

IMO, given how KUT is structured and what it is testing, documenting the exact
testscase is the role of the error message.  E.g. if the error message explicitly
states the test case, then it documents the code _and_ provides a helpful message
to allow users to quickly triage failures.  Of course, the vast majority of error
messages in KUT are awful IMO, and are a constant source of frustration, but one
can dream :-)
diff mbox series

Patch

diff --git a/x86/access.c b/x86/access.c
index 203353a3f74f..d1ec99b4fa73 100644
--- a/x86/access.c
+++ b/x86/access.c
@@ -575,9 +575,10 @@  fault:
 		at->expected_error &= ~PFERR_FETCH_MASK;
 }
 
-static void ac_set_expected_status(ac_test_t *at)
+static void __ac_set_expected_status(ac_test_t *at, bool flush)
 {
-	invlpg(at->virt);
+	if (flush)
+		invlpg(at->virt);
 
 	if (at->ptep)
 		at->expected_pte = *at->ptep;
@@ -599,6 +600,11 @@  static void ac_set_expected_status(ac_test_t *at)
 	ac_emulate_access(at, at->flags);
 }
 
+static void ac_set_expected_status(ac_test_t *at)
+{
+	__ac_set_expected_status(at, true);
+}
+
 static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep)
 {
 	pt_element_t pte;
@@ -1061,6 +1067,51 @@  err:
 	return 0;
 }
 
+static int check_write_cr0wp(ac_pt_env_t *pt_env)
+{
+	ac_test_t at;
+	int err = 0;
+
+	ac_test_init(&at, 0xffff923042007000ul, pt_env);
+	at.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
+		   AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
+		   AC_ACCESS_WRITE_MASK;
+	ac_test_setup_ptes(&at);
+
+	/*
+	 * Under VMX the guest might own the CR0.WP bit, requiring KVM to
+	 * manually keep track of its state where needed, e.g. in the guest
+	 * page table walker.
+	 *
+	 * We load CR0.WP with the inverse value of what would be used during
+	 * the access test and toggle EFER.NX to flush and rebuild the current
+	 * MMU context based on that value.
+	 */
+
+	set_cr0_wp(1);
+	set_efer_nx(1);
+	set_efer_nx(0);
+
+	if (!ac_test_do_access(&at)) {
+		printf("%s: CR0.WP=0 r/o write fail\n", __FUNCTION__);
+		err++;
+	}
+
+	at.flags |= AC_CPU_CR0_WP_MASK;
+	__ac_set_expected_status(&at, false);
+
+	set_cr0_wp(0);
+	set_efer_nx(1);
+	set_efer_nx(0);
+
+	if (!ac_test_do_access(&at)) {
+		printf("%s: CR0.WP=1 r/o write deny fail\n", __FUNCTION__);
+		err++;
+	}
+
+	return err == 0;
+}
+
 static int check_effective_sp_permissions(ac_pt_env_t *pt_env)
 {
 	unsigned long ptr1 = 0xffff923480000000;
@@ -1150,6 +1201,7 @@  const ac_test_fn ac_test_cases[] =
 	check_pfec_on_prefetch_pte,
 	check_large_pte_dirty_for_nowp,
 	check_smep_andnot_wp,
+	check_write_cr0wp,
 	check_effective_sp_permissions,
 };