diff mbox series

[v5,10/10] KVM: selftests: Test disabling NX hugepages on a VM

Message ID 20220413175944.71705-11-bgardon@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Add a cap to disable NX hugepages on a VM | expand

Commit Message

Ben Gardon April 13, 2022, 5:59 p.m. UTC
Add an argument to the NX huge pages test to test disabling the feature
on a VM using the new capability.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 .../selftests/kvm/include/kvm_util_base.h     |  2 +
 tools/testing/selftests/kvm/lib/kvm_util.c    | 16 ++++-
 .../selftests/kvm/x86_64/nx_huge_pages_test.c | 62 +++++++++++++++----
 3 files changed, 68 insertions(+), 12 deletions(-)

Comments

Sean Christopherson April 13, 2022, 10:48 p.m. UTC | #1
On Wed, Apr 13, 2022, Ben Gardon wrote:
> diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
> index 7f80e48781fd..21c31e1d567e 100644
> --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
> @@ -13,6 +13,8 @@
>  #include <fcntl.h>
>  #include <stdint.h>
>  #include <time.h>
> +#include <linux/reboot.h>
> +#include <sys/syscall.h>
>  
>  #include <test_util.h>
>  #include "kvm_util.h"
> @@ -80,13 +82,45 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
>  		    expected_splits, actual_splits);
>  }
>  
> -int main(int argc, char **argv)
> +void run_test(bool disable_nx)

Probably worth naming this disable_nx_workaround or disable_nx_mitigation, it's
quite easy to think this means "disable EFER.NX".

>  {
>  	struct kvm_vm *vm;
>  	struct timespec ts;
> +	uint64_t pages;
>  	void *hva;
> -
> -	vm = vm_create_default(0, 0, guest_code);
> +	int r;
> +
> +	pages = vm_pages_needed(VM_MODE_DEFAULT, 1, DEFAULT_GUEST_PHY_PAGES,
> +				0, 0);
> +	vm = vm_create_without_vcpus(VM_MODE_DEFAULT, pages);
> +
> +	if (disable_nx) {
> +		kvm_check_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES);
> +
> +		/*
> +		 * Check if this process has the reboot permissions needed to
> +		 * disable NX huge pages on a VM.
> +		 *
> +		 * The reboot call below will never have any effect because
> +		 * the magic values are not set correctly, however the
> +		 * permission check is done before the magic value check.
> +		 */
> +		r = syscall(SYS_reboot, 0, 0, 0, NULL);
> +		if (r && errno == EPERM) {
> +			r = vm_disable_nx_huge_pages(vm);
> +			TEST_ASSERT(r == EPERM,
> +				    "This process should not have permission to disable NX huge pages");

First off, huge kudos for negative testing!  But, it's going to provide poor coverage
if we teach everyone to use the runner script, because that'll likely require root on
most hosts, e.g. to futz with the module param.

Aha!  Idea.  And it should eliminate the SYS_reboot shenanigans, which while hilarious,
are mildy scary.

In the runner script, wrap all the modification of sysfs knobs with sudo, and then
(again with sudo) do:

	setcap cap_sys_boot+ep path/to/nx_huge_pages_test
	path/to/nx_huge_pages_test MAGIC_NUMBER -b

where "-b" means "has CAP_SYS_BOOT".  And then 

	setcap cap_sys_boot-ep path/to/nx_huge_pages_test
	path/to/nx_huge_pages_test MAGIC_NUMBER

Hmm, and I guess if the script is run as root, just skip the second invocation.

> +			return;
> +		}
> +
> +		TEST_ASSERT(r && errno == EINVAL,
> +			    "Reboot syscall should fail with -EINVAL");
> +
> +		r = vm_disable_nx_huge_pages(vm);
> +		TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
> +	}
> +
> +	vm_vcpu_add_default(vm, 0, guest_code);
>  
>  	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
>  				    HPAGE_GPA, HPAGE_SLOT,
> @@ -121,21 +155,21 @@ int main(int argc, char **argv)
>  	 * to be remapped at 4k.
>  	 */
>  	vcpu_run(vm, 0);
> -	check_2m_page_count(vm, 1);
> -	check_split_count(vm, 1);
> +	check_2m_page_count(vm, disable_nx ? 2 : 1);
> +	check_split_count(vm, disable_nx ? 0 : 1);

Can you update the comments to explain why these magic number of pages are
expected for NX enabled/disabled?  As Jim has pointed out, just because KVM and
selftests might agree that 1==2, doesn't mean that their math is correct :-)
Ben Gardon April 14, 2022, 9:14 p.m. UTC | #2
On Wed, Apr 13, 2022 at 3:48 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Wed, Apr 13, 2022, Ben Gardon wrote:
> > diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
> > index 7f80e48781fd..21c31e1d567e 100644
> > --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
> > +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
> > @@ -13,6 +13,8 @@
> >  #include <fcntl.h>
> >  #include <stdint.h>
> >  #include <time.h>
> > +#include <linux/reboot.h>
> > +#include <sys/syscall.h>
> >
> >  #include <test_util.h>
> >  #include "kvm_util.h"
> > @@ -80,13 +82,45 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
> >                   expected_splits, actual_splits);
> >  }
> >
> > -int main(int argc, char **argv)
> > +void run_test(bool disable_nx)
>
> Probably worth naming this disable_nx_workaround or disable_nx_mitigation, it's
> quite easy to think this means "disable EFER.NX".
>
> >  {
> >       struct kvm_vm *vm;
> >       struct timespec ts;
> > +     uint64_t pages;
> >       void *hva;
> > -
> > -     vm = vm_create_default(0, 0, guest_code);
> > +     int r;
> > +
> > +     pages = vm_pages_needed(VM_MODE_DEFAULT, 1, DEFAULT_GUEST_PHY_PAGES,
> > +                             0, 0);
> > +     vm = vm_create_without_vcpus(VM_MODE_DEFAULT, pages);
> > +
> > +     if (disable_nx) {
> > +             kvm_check_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES);
> > +
> > +             /*
> > +              * Check if this process has the reboot permissions needed to
> > +              * disable NX huge pages on a VM.
> > +              *
> > +              * The reboot call below will never have any effect because
> > +              * the magic values are not set correctly, however the
> > +              * permission check is done before the magic value check.
> > +              */
> > +             r = syscall(SYS_reboot, 0, 0, 0, NULL);
> > +             if (r && errno == EPERM) {
> > +                     r = vm_disable_nx_huge_pages(vm);
> > +                     TEST_ASSERT(r == EPERM,
> > +                                 "This process should not have permission to disable NX huge pages");
>
> First off, huge kudos for negative testing!  But, it's going to provide poor coverage
> if we teach everyone to use the runner script, because that'll likely require root on
> most hosts, e.g. to futz with the module param.
>
> Aha!  Idea.  And it should eliminate the SYS_reboot shenanigans, which while hilarious,
> are mildy scary.
>
> In the runner script, wrap all the modification of sysfs knobs with sudo, and then
> (again with sudo) do:
>
>         setcap cap_sys_boot+ep path/to/nx_huge_pages_test
>         path/to/nx_huge_pages_test MAGIC_NUMBER -b
>
> where "-b" means "has CAP_SYS_BOOT".  And then
>
>         setcap cap_sys_boot-ep path/to/nx_huge_pages_test
>         path/to/nx_huge_pages_test MAGIC_NUMBER
>
> Hmm, and I guess if the script is run as root, just skip the second invocation.

Wouldn't it be easier to just run the test binary twice and just have
the second time run without root permissions? I don't know if there's
an easy way to do that.

>
> > +                     return;
> > +             }
> > +
> > +             TEST_ASSERT(r && errno == EINVAL,
> > +                         "Reboot syscall should fail with -EINVAL");
> > +
> > +             r = vm_disable_nx_huge_pages(vm);
> > +             TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
> > +     }
> > +
> > +     vm_vcpu_add_default(vm, 0, guest_code);
> >
> >       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
> >                                   HPAGE_GPA, HPAGE_SLOT,
> > @@ -121,21 +155,21 @@ int main(int argc, char **argv)
> >        * to be remapped at 4k.
> >        */
> >       vcpu_run(vm, 0);
> > -     check_2m_page_count(vm, 1);
> > -     check_split_count(vm, 1);
> > +     check_2m_page_count(vm, disable_nx ? 2 : 1);
> > +     check_split_count(vm, disable_nx ? 0 : 1);
>
> Can you update the comments to explain why these magic number of pages are
> expected for NX enabled/disabled?  As Jim has pointed out, just because KVM and
> selftests might agree that 1==2, doesn't mean that their math is correct :-)
Sean Christopherson April 14, 2022, 10:29 p.m. UTC | #3
On Thu, Apr 14, 2022, Ben Gardon wrote:
> On Wed, Apr 13, 2022 at 3:48 PM Sean Christopherson <seanjc@google.com> wrote:
> > First off, huge kudos for negative testing!  But, it's going to provide poor coverage
> > if we teach everyone to use the runner script, because that'll likely require root on
> > most hosts, e.g. to futz with the module param.
> >
> > Aha!  Idea.  And it should eliminate the SYS_reboot shenanigans, which while hilarious,
> > are mildy scary.
> >
> > In the runner script, wrap all the modification of sysfs knobs with sudo, and then
> > (again with sudo) do:
> >
> >         setcap cap_sys_boot+ep path/to/nx_huge_pages_test
> >         path/to/nx_huge_pages_test MAGIC_NUMBER -b
> >
> > where "-b" means "has CAP_SYS_BOOT".  And then
> >
> >         setcap cap_sys_boot-ep path/to/nx_huge_pages_test
> >         path/to/nx_huge_pages_test MAGIC_NUMBER
> >
> > Hmm, and I guess if the script is run as root, just skip the second invocation.
> 
> Wouldn't it be easier to just run the test binary twice and just have
> the second time run without root permissions? I don't know if there's
> an easy way to do that.

I don't think so, e.g. what if there is no other user account to switch to?  On
the other hand, I doubt I'm the only person that typically runs selftests with a
user account.

Using setcap isn't hard, e.g.

	# If the test isn't running as root, verify KVM correctly rejects the
	# per-VM override if the process doesn't have CAP_SYS_BOOT.
	if [[ $(id -u) -ne 0 ]]; then
		sudo setcap cap_sys_boot-ep path/to/nx_huge_pages_test
		path/to/nx_huge_pages_test MAGIC_NUMBER

		sudo setcap cap_sys_boot+ep path/to/nx_huge_pages_test
	fi

	# The test now has CAP_SYS_BOOT, or is running as root.
	path/to/nx_huge_pages_test MAGIC_NUMBER -b

Bonus points if you want to save/restore the capability.
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 1dac3c6607f1..8f6aad253392 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -414,4 +414,6 @@  uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name);
 
 uint32_t guest_get_vcpuid(void);
 
+int vm_disable_nx_huge_pages(struct kvm_vm *vm);
+
 #endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5ffed44ab328..ef01858745e9 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -112,6 +112,11 @@  int vm_check_cap(struct kvm_vm *vm, long cap)
 	return ret;
 }
 
+static int __vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+	return ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+}
+
 /* VM Enable Capability
  *
  * Input Args:
@@ -128,7 +133,7 @@  int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
 {
 	int ret;
 
-	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+	ret = __vm_enable_cap(vm, cap);
 	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
 		"  rc: %i errno: %i", ret, errno);
 
@@ -2740,3 +2745,12 @@  uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
 		    stat_name, ret);
 	return data;
 }
+
+int vm_disable_nx_huge_pages(struct kvm_vm *vm)
+{
+	struct kvm_enable_cap cap = { 0 };
+
+	cap.cap = KVM_CAP_VM_DISABLE_NX_HUGE_PAGES;
+	cap.args[0] = 0;
+	return __vm_enable_cap(vm, &cap);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 7f80e48781fd..21c31e1d567e 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -13,6 +13,8 @@ 
 #include <fcntl.h>
 #include <stdint.h>
 #include <time.h>
+#include <linux/reboot.h>
+#include <sys/syscall.h>
 
 #include <test_util.h>
 #include "kvm_util.h"
@@ -80,13 +82,45 @@  static void check_split_count(struct kvm_vm *vm, int expected_splits)
 		    expected_splits, actual_splits);
 }
 
-int main(int argc, char **argv)
+void run_test(bool disable_nx)
 {
 	struct kvm_vm *vm;
 	struct timespec ts;
+	uint64_t pages;
 	void *hva;
-
-	vm = vm_create_default(0, 0, guest_code);
+	int r;
+
+	pages = vm_pages_needed(VM_MODE_DEFAULT, 1, DEFAULT_GUEST_PHY_PAGES,
+				0, 0);
+	vm = vm_create_without_vcpus(VM_MODE_DEFAULT, pages);
+
+	if (disable_nx) {
+		kvm_check_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES);
+
+		/*
+		 * Check if this process has the reboot permissions needed to
+		 * disable NX huge pages on a VM.
+		 *
+		 * The reboot call below will never have any effect because
+		 * the magic values are not set correctly, however the
+		 * permission check is done before the magic value check.
+		 */
+		r = syscall(SYS_reboot, 0, 0, 0, NULL);
+		if (r && errno == EPERM) {
+			r = vm_disable_nx_huge_pages(vm);
+			TEST_ASSERT(r == EPERM,
+				    "This process should not have permission to disable NX huge pages");
+			return;
+		}
+
+		TEST_ASSERT(r && errno == EINVAL,
+			    "Reboot syscall should fail with -EINVAL");
+
+		r = vm_disable_nx_huge_pages(vm);
+		TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+	}
+
+	vm_vcpu_add_default(vm, 0, guest_code);
 
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
 				    HPAGE_GPA, HPAGE_SLOT,
@@ -121,21 +155,21 @@  int main(int argc, char **argv)
 	 * to be remapped at 4k.
 	 */
 	vcpu_run(vm, 0);
-	check_2m_page_count(vm, 1);
-	check_split_count(vm, 1);
+	check_2m_page_count(vm, disable_nx ? 2 : 1);
+	check_split_count(vm, disable_nx ? 0 : 1);
 
 	/*
 	 * Executing from the third huge page (previously unaccessed) will
 	 * cause part to be mapped at 4k.
 	 */
 	vcpu_run(vm, 0);
-	check_2m_page_count(vm, 1);
-	check_split_count(vm, 2);
+	check_2m_page_count(vm, disable_nx ? 3 : 1);
+	check_split_count(vm, disable_nx ? 0 : 2);
 
 	/* Reading from the first huge page again should have no effect. */
 	vcpu_run(vm, 0);
-	check_2m_page_count(vm, 1);
-	check_split_count(vm, 2);
+	check_2m_page_count(vm, disable_nx ? 3 : 1);
+	check_split_count(vm, disable_nx ? 0 : 2);
 
 	/*
 	 * Give recovery thread time to run. The wrapper script sets
@@ -148,7 +182,7 @@  int main(int argc, char **argv)
 	/*
 	 * Now that the reclaimer has run, all the split pages should be gone.
 	 */
-	check_2m_page_count(vm, 1);
+	check_2m_page_count(vm, disable_nx ? 3 : 1);
 	check_split_count(vm, 0);
 
 	/*
@@ -156,10 +190,16 @@  int main(int argc, char **argv)
 	 * reading from it causes a huge page mapping to be installed.
 	 */
 	vcpu_run(vm, 0);
-	check_2m_page_count(vm, 2);
+	check_2m_page_count(vm, disable_nx ? 3 : 2);
 	check_split_count(vm, 0);
 
 	kvm_vm_free(vm);
+}
+
+int main(int argc, char **argv)
+{
+	run_test(false);
+	run_test(true);
 
 	return 0;
 }