diff mbox series

[kvm-unit-tests] x86: nvmx: test max atomic switch MSRs

Message ID 20190912180928.123660-1-marcorr@google.com (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests] x86: nvmx: test max atomic switch MSRs | expand

Commit Message

Marc Orr Sept. 12, 2019, 6:09 p.m. UTC
Excerise nested VMX's atomic MSR switch code (e.g., VM-entry MSR-load
list) at the maximum number of MSRs supported, as described in the SDM,
in the appendix chapter titled "MISCELLANEOUS DATA".

Suggested-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Marc Orr <marcorr@google.com>
---
 x86/vmx_tests.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)

Comments

Sean Christopherson Sept. 13, 2019, 3:24 p.m. UTC | #1
On Thu, Sep 12, 2019 at 11:09:28AM -0700, Marc Orr wrote:
> Excerise nested VMX's atomic MSR switch code (e.g., VM-entry MSR-load
> list) at the maximum number of MSRs supported, as described in the SDM,
> in the appendix chapter titled "MISCELLANEOUS DATA".
> 
> Suggested-by: Jim Mattson <jmattson@google.com>
> Reviewed-by: Jim Mattson <jmattson@google.com>
> Signed-off-by: Marc Orr <marcorr@google.com>
> ---
>  x86/vmx_tests.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 139 insertions(+)
> 
> diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
> index f035f24a771a..b3b4d5f7cc8f 100644
> --- a/x86/vmx_tests.c
> +++ b/x86/vmx_tests.c
> @@ -2718,6 +2718,11 @@ static void ept_reserved_bit(int bit)
>  #define PAGE_2M_ORDER 9
>  #define PAGE_1G_ORDER 18
>  
> +static void *alloc_2m_page(void)
> +{
> +	return alloc_pages(PAGE_2M_ORDER);
> +}

Allocating 2mb pages is complete overkill.  The absolute theoretical max
for the number of MSRs is (8 * 512) = 4096, for a total of 32kb per list.
We can even show the math so that it's obvious how the size is calculated.
Plus one order so we can test overrun.

/*
 * The max number of MSRs is specified in 3 bits bits, plus 1. I.e. 7+1==8.
 * Allocate 64k bytes of data to cover max_msr_list_size and then some.
 */
static const u32 msr_list_page_order = 4;

> +
>  static void *get_1g_page(void)
>  {
>  	static void *alloc;
> @@ -8570,6 +8575,138 @@ static int invalid_msr_entry_failure(struct vmentry_failure *failure)
>  	return VMX_TEST_VMEXIT;
>  }
>  
> +enum atomic_switch_msr_scenario {
> +	VM_ENTER_LOAD,
> +	VM_EXIT_LOAD,
> +	VM_EXIT_STORE,
> +	ATOMIC_SWITCH_MSR_SCENARIO_END,
> +};

How about:

enum atomic_switch_msr_lists {
	VM_ENTER_LOAD,
	VM_EXIT_LOAD,
	VM_EXIT_STORE,
	NR_ATOMIC_SWITCH_MSR_LISTS,
};

IMO that yields a much more intuitive test loop:

	for (i = 0; i < NR_ATOMIC_SWITCH_MSR_LISTS; i++) {
	}

But we probably don't even need a loop...

> +
> +static void atomic_switch_msr_limit_test_guest(void)
> +{
> +	vmcall();
> +}
> +
> +static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> +{
> +	int i;
> +
> +	for (i = 0; i < count; i++) {
> +		msr_list[i].index = MSR_IA32_TSC;
> +		msr_list[i].reserved = 0;
> +		msr_list[i].value = 0x1234567890abcdef;

Maybe overkill, but we can use a fast string op for this.  I think
I got the union right?

static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
{
	union {
		struct vmx_msr_entry msr;
		u64 val;
	} tmp;

	tmp.msr.index = MSR_IA32_TSC;
	tmp.msr.reserved = 0;
	tmp.msr.value = 0x1234567890abcdef;

	asm volatile (
		"rep stosq\n\t"
		: "=c"(count), "=D"(msr_list)
		: "a"(tmp.val), "c"(count), "D"(msr_list)
		: "memory"
	);
}

> +	}
> +}
> +
> +static void configure_atomic_switch_msr_limit_test(
> +		struct vmx_msr_entry *test_msr_list, int count)
> +{
> +	struct vmx_msr_entry *msr_list;
> +	const u32 two_mb = 1 << 21;
> +	enum atomic_switch_msr_scenario s;
> +	enum Encoding addr_field;
> +	enum Encoding cnt_field;
> +
> +	for (s = 0; s < ATOMIC_SWITCH_MSR_SCENARIO_END; s++) {
> +		switch (s) {
> +		case VM_ENTER_LOAD:
> +			addr_field = ENTER_MSR_LD_ADDR;
> +			cnt_field = ENT_MSR_LD_CNT;
> +			break;
> +		case VM_EXIT_LOAD:
> +			addr_field = EXIT_MSR_LD_ADDR;
> +			cnt_field = EXI_MSR_LD_CNT;
> +			break;
> +		case VM_EXIT_STORE:
> +			addr_field = EXIT_MSR_ST_ADDR;
> +			cnt_field = EXI_MSR_ST_CNT;
> +			break;
> +		default:
> +			TEST_ASSERT(false);
> +		}
> +
> +		msr_list = (struct vmx_msr_entry *)vmcs_read(addr_field);
> +		memset(msr_list, 0xff, two_mb);

Writing 8mb of data for each test is a waste of time, i.e. 6mb to reset
each list, and another 2mb to populate the target list.

The for-loop in the helper is also confusing and superfluous.

> +		if (msr_list == test_msr_list) {
> +			populate_msr_list(msr_list, count);
> +			vmcs_write(cnt_field, count);
> +		} else {
> +			vmcs_write(cnt_field, 0);
> +		}
> +	}
> +}
> +
> +static int max_msr_list_size(void)
> +{
> +	u32 vmx_misc = rdmsr(MSR_IA32_VMX_MISC);
> +	u32 factor = ((vmx_misc & GENMASK(27, 25)) >> 25) + 1;
> +
> +	return factor * 512;
> +}
> +
> +static void atomic_switch_msr_limit_test(void)
> +{
> +	struct vmx_msr_entry *msr_list;
> +	enum atomic_switch_msr_scenario s;
> +
> +	/*
> +	 * Check for the IA32_TSC MSR,
> +	 * available with the "TSC flag" and used to populate the MSR lists.
> +	 */
> +	if (!(cpuid(1).d & (1 << 4))) {
> +		report_skip(__func__);
> +		return;
> +	}
> +
> +	/* Set L2 guest. */
> +	test_set_guest(atomic_switch_msr_limit_test_guest);
> +
> +	/* Setup atomic MSR switch lists. */
> +	msr_list = alloc_2m_page();
> +	vmcs_write(ENTER_MSR_LD_ADDR, virt_to_phys(msr_list));
> +	msr_list = alloc_2m_page();
> +	vmcs_write(EXIT_MSR_LD_ADDR, virt_to_phys(msr_list));
> +	msr_list = alloc_2m_page();
> +	vmcs_write(EXIT_MSR_ST_ADDR, virt_to_phys(msr_list));

This memory should really be freed.  Not holding pointers for each list
also seems silly, e.g. requires a VMREAD just to get a pointer.

> +
> +	/* Execute each test case. */
> +	for (s = 0; s < ATOMIC_SWITCH_MSR_SCENARIO_END; s++) {

Since you're testing the passing case, why not test all three at once?
I.e. hammer KVM while also consuming less test cycles.  The "MSR switch"
test already verifies the correctness of each list.

> +		struct vmx_msr_entry *msr_list;
> +		int count = max_msr_list_size();
> +
> +		switch (s) {
> +		case VM_ENTER_LOAD:
> +			msr_list = (struct vmx_msr_entry *)vmcs_read(
> +					ENTER_MSR_LD_ADDR);

These should use phys_to_virt() since virt_to_phys() is used to write them.

> +			break;
> +		case VM_EXIT_LOAD:
> +			msr_list = (struct vmx_msr_entry *)vmcs_read(
> +					EXIT_MSR_LD_ADDR);
> +			break;
> +		case VM_EXIT_STORE:
> +			msr_list = (struct vmx_msr_entry *)vmcs_read(
> +					EXIT_MSR_ST_ADDR);
> +			break;
> +		default:
> +			report("Bad test scenario, %d.", false, s);
> +			continue;
> +		}
> +
> +		configure_atomic_switch_msr_limit_test(msr_list, count);

Again, feeding the list into a helper that also iterates over the lists
is not intuitive in terms of understanding what is being tested.

> +		enter_guest();
> +		assert_exit_reason(VMX_VMCALL);
> +	}
> +
> +	/* Reset the atomic MSR switch count to 0 for all three lists. */
> +	configure_atomic_switch_msr_limit_test(0, 0);
> +	/* Proceed past guest's single vmcall instruction. */
> +	enter_guest();
> +	skip_exit_vmcall();
> +	/* Terminate the guest. */
> +	enter_guest();
> +	skip_exit_vmcall();
> +}
> +
>  
>  #define TEST(name) { #name, .v2 = name }
>  
> @@ -8660,5 +8797,7 @@ struct vmx_test vmx_tests[] = {
>  	TEST(ept_access_test_paddr_read_execute_ad_enabled),
>  	TEST(ept_access_test_paddr_not_present_page_fault),
>  	TEST(ept_access_test_force_2m_page),
> +	/* Atomic MSR switch tests. */
> +	TEST(atomic_switch_msr_limit_test),

This is a misleading name, e.g. it took me quite a while to realize this
is testing only the passing scenario.  For me, "limit test" implies that
it'd be deliberately exceeding the limit, or at least testing both the
passing and failing cases.  I suppose we can't easily test the VMX abort
cases, but we can at least test VM_ENTER_LOAD.

Distilling things down to the bare minimum yields something like the
following.

	struct vmx_msr_entry *vm_enter_load;
	struct vmx_msr_entry *vm_exit_load;
	struct vmx_msr_entry *vm_exit_store;
	u32 i, max_allowed;

	max_allowed = max_msr_list_size();

	/* Setup atomic MSR switch lists. */
	vm_enter_load = alloc_pages(msr_list_page_order);
	vm_exit_load = alloc_pages(msr_list_page_order);
	vm_exit_store = alloc_pages(msr_list_page_order);

	populate_msr_list(vm_enter_load, max_allowed + 1);
	populate_msr_list(vm_exit_load,  max_allowed + 1);
	populate_msr_list(vm_exit_store, max_allowed + 1);

	vmcs_write(ENTER_MSR_LD_ADDR, virt_to_phys(vm_enter_load));
	vmcs_write(EXIT_MSR_LD_ADDR, virt_to_phys(vm_exit_load));
	vmcs_write(EXIT_MSR_ST_ADDR, virt_to_phys(vm_exit_store));

	/*
	 * VM-Enter should fail when exceeing max number of MSRs. The VM-Exit
	 * lists cause VMX abort, so those are currently not tested.
	 */
	vmcs_write(ENT_MSR_LD_CNT, max_allowed + 1);
	vmcs_write(EXI_MSR_LD_CNT, max_allowed);
	vmcs_write(EXI_MSR_ST_CNT, max_allowed);

	helper_function_to_verify_vm_fail();

	/*
	 * VM-Enter should succeed up to the max number of MSRs per list, and
	 * should not consume junk beyond the last entry.
	 */
	vmcs_write(ENT_MSR_LD_CNT, max_allowed);

	/* This pointer arithmetic is probably wrong. */
	memset(vm_enter_load + max_allowed + 1, 0xff, sizeof(*vm_enter_load);
	memset(vm_exit_load  + max_allowed + 1, 0xff, sizeof(*vm_exit_load);
	memset(vm_exit_store + max_allowed + 1, 0xff, sizeof(*vm_exit_store);

	enter_guest();
	assert_exit_reason(VMX_VMCALL);

	<clean up>

>  	{ NULL, NULL, NULL, NULL, NULL, {0} },
>  };
> -- 
> 2.23.0.237.gc6a4ce50a0-goog
>
Jim Mattson Sept. 13, 2019, 4:26 p.m. UTC | #2
On Fri, Sep 13, 2019 at 8:24 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:

> This is a misleading name, e.g. it took me quite a while to realize this
> is testing only the passing scenario.  For me, "limit test" implies that
> it'd be deliberately exceeding the limit, or at least testing both the
> passing and failing cases.  I suppose we can't easily test the VMX abort
> cases, but we can at least test VM_ENTER_LOAD.

It's hard to test for "undefined behavior may result." :-)

One could check to see if the test is running under KVM, and then
check for the behavior that Marc's other patch introduces, but even
that is implementation-dependent.
Sean Christopherson Sept. 13, 2019, 5:15 p.m. UTC | #3
On Fri, Sep 13, 2019 at 09:26:04AM -0700, Jim Mattson wrote:
> On Fri, Sep 13, 2019 at 8:24 AM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> 
> > This is a misleading name, e.g. it took me quite a while to realize this
> > is testing only the passing scenario.  For me, "limit test" implies that
> > it'd be deliberately exceeding the limit, or at least testing both the
> > passing and failing cases.  I suppose we can't easily test the VMX abort
> > cases, but we can at least test VM_ENTER_LOAD.
> 
> It's hard to test for "undefined behavior may result." :-)

Fortune favors the bold?

> One could check to see if the test is running under KVM, and then
> check for the behavior that Marc's other patch introduces, but even
> that is implementation-dependent.

Hmm, what if kvm-unit-tests accepts both VM-Enter success and fail as
passing conditions?  We can at least verify KVM doesn't explode, and if
VM-Enter fails, that the exit qual is correct.

The SDM state that the max is a recommendation, which leads me to believe
that bare metal will work just fine if the software exceeds the
recommended max by an entry or two, but can run into trouble if the list
is ludicrously big.  There's no way the CPU is tuned so finely that it
works at N but fails at N+1.
Jim Mattson Sept. 13, 2019, 5:21 p.m. UTC | #4
On Fri, Sep 13, 2019 at 10:15 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Sep 13, 2019 at 09:26:04AM -0700, Jim Mattson wrote:
> > On Fri, Sep 13, 2019 at 8:24 AM Sean Christopherson
> > <sean.j.christopherson@intel.com> wrote:
> >
> > > This is a misleading name, e.g. it took me quite a while to realize this
> > > is testing only the passing scenario.  For me, "limit test" implies that
> > > it'd be deliberately exceeding the limit, or at least testing both the
> > > passing and failing cases.  I suppose we can't easily test the VMX abort
> > > cases, but we can at least test VM_ENTER_LOAD.
> >
> > It's hard to test for "undefined behavior may result." :-)
>
> Fortune favors the bold?
>
> > One could check to see if the test is running under KVM, and then
> > check for the behavior that Marc's other patch introduces, but even
> > that is implementation-dependent.
>
> Hmm, what if kvm-unit-tests accepts both VM-Enter success and fail as
> passing conditions?  We can at least verify KVM doesn't explode, and if
> VM-Enter fails, that the exit qual is correct.
>
> The SDM state that the max is a recommendation, which leads me to believe
> that bare metal will work just fine if the software exceeds the
> recommended max by an entry or two, but can run into trouble if the list
> is ludicrously big.  There's no way the CPU is tuned so finely that it
> works at N but fails at N+1.

It would have been nice if the hardware designers had seen fit to
architect a hard failure at N+1, but I guess that ship sailed long
ago. :-(
Marc Orr Sept. 13, 2019, 6:02 p.m. UTC | #5
> > +static void *alloc_2m_page(void)
> > +{
> > +     return alloc_pages(PAGE_2M_ORDER);
> > +}
>
> Allocating 2mb pages is complete overkill.  The absolute theoretical max
> for the number of MSRs is (8 * 512) = 4096, for a total of 32kb per list.
> We can even show the math so that it's obvious how the size is calculated.
> Plus one order so we can test overrun.
> /*
>  * The max number of MSRs is specified in 3 bits bits, plus 1. I.e. 7+1==8.
>  * Allocate 64k bytes of data to cover max_msr_list_size and then some.
>  */
> static const u32 msr_list_page_order = 4;

8 * 512 should be 16 * 512, right [1]? Therefore, the maximum
theoretical is 64 kB.

I'll happily apply what you've suggested in v2. But I don't see why
it's so terrible to over-allocate here. Leveraging a generic 2 MB page
allocator can be reused going forward, and encourages uniformity
across tests.

[1]
struct vmx_msr_entry {
  u32 index;
  u32 reserved;
  u64 value;
} __aligned(16);

> > +static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < count; i++) {
> > +             msr_list[i].index = MSR_IA32_TSC;
> > +             msr_list[i].reserved = 0;
> > +             msr_list[i].value = 0x1234567890abcdef;
>
> Maybe overkill, but we can use a fast string op for this.  I think
> I got the union right?
>
> static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> {
>         union {
>                 struct vmx_msr_entry msr;
>                 u64 val;
>         } tmp;
>
>         tmp.msr.index = MSR_IA32_TSC;
>         tmp.msr.reserved = 0;
>         tmp.msr.value = 0x1234567890abcdef;
>
>         asm volatile (
>                 "rep stosq\n\t"
>                 : "=c"(count), "=D"(msr_list)
>                 : "a"(tmp.val), "c"(count), "D"(msr_list)
>                 : "memory"
>         );
> }

:-). I do think it's overkill. However, I'm OK to apply this
suggestion in v2 if everyone is OK with me adding a comment that
explains it in terms of the original code, so that x86 noobs, like
myself, can understand what's going on.

> This is a misleading name, e.g. it took me quite a while to realize this
> is testing only the passing scenario.  For me, "limit test" implies that
> it'd be deliberately exceeding the limit, or at least testing both the
> passing and failing cases.  I suppose we can't easily test the VMX abort
> cases, but we can at least test VM_ENTER_LOAD.
>
> Distilling things down to the bare minimum yields something like the
> following.

Looks excellent overall. Still not clear what the consensus is on
whether or not to test the VM-entry failure. I think a flag seems like
a reasonable compromise. I've never added a flag to a kvm-unit-test,
so I'll see if I can figure that out.
Sean Christopherson Sept. 13, 2019, 6:30 p.m. UTC | #6
On Fri, Sep 13, 2019 at 11:02:39AM -0700, Marc Orr wrote:
> > > +static void *alloc_2m_page(void)
> > > +{
> > > +     return alloc_pages(PAGE_2M_ORDER);
> > > +}
> >
> > Allocating 2mb pages is complete overkill.  The absolute theoretical max
> > for the number of MSRs is (8 * 512) = 4096, for a total of 32kb per list.
> > We can even show the math so that it's obvious how the size is calculated.
> > Plus one order so we can test overrun.
> > /*
> >  * The max number of MSRs is specified in 3 bits bits, plus 1. I.e. 7+1==8.
> >  * Allocate 64k bytes of data to cover max_msr_list_size and then some.
> >  */
> > static const u32 msr_list_page_order = 4;
> 
> 8 * 512 should be 16 * 512, right [1]? Therefore, the maximum
> theoretical is 64 kB.

*sigh*  I stand by my assertion that "Math is hard".  :-)

> I'll happily apply what you've suggested in v2. But I don't see why
> it's so terrible to over-allocate here. Leveraging a generic 2 MB page
> allocator can be reused going forward, and encourages uniformity
> across tests.

My main concern is avoiding setting 6mb+ of memory.  I like to run the
tests in L1 and L2 and would prefer to keep overhead at a minimum.

As for the allocation itself, being precise in the allocation size is a
form of documentation, e.g. it conveys that the size/order was chosen to
ensure enough space for the maximum theoretical list size.  A purely
arbitrary size, especially one that corresponds with a large page size,
can lead to people looking for things that don't exist, e.g. the 2mb size
is partially what led me to believe that this test was deliberately
exceeding the limit, otherwise why allocate such a large amount of memory?
I also didn't know if 2mb was sufficient to handle the maximum theoretical
list size.

> [1]
> struct vmx_msr_entry {
>   u32 index;
>   u32 reserved;
>   u64 value;
> } __aligned(16);
> 
> > > +static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> > > +{
> > > +     int i;
> > > +
> > > +     for (i = 0; i < count; i++) {
> > > +             msr_list[i].index = MSR_IA32_TSC;
> > > +             msr_list[i].reserved = 0;
> > > +             msr_list[i].value = 0x1234567890abcdef;
> >
> > Maybe overkill, but we can use a fast string op for this.  I think
> > I got the union right?
> >
> > static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> > {
> >         union {
> >                 struct vmx_msr_entry msr;
> >                 u64 val;
> >         } tmp;
> >
> >         tmp.msr.index = MSR_IA32_TSC;
> >         tmp.msr.reserved = 0;
> >         tmp.msr.value = 0x1234567890abcdef;
> >
> >         asm volatile (
> >                 "rep stosq\n\t"
> >                 : "=c"(count), "=D"(msr_list)
> >                 : "a"(tmp.val), "c"(count), "D"(msr_list)
> >                 : "memory"
> >         );
> > }
> 
> :-). I do think it's overkill. However, I'm OK to apply this
> suggestion in v2 if everyone is OK with me adding a comment that
> explains it in terms of the original code, so that x86 noobs, like
> myself, can understand what's going on.

Never mind, I can't count.  This only works if the size of the entry
is 8 bytes.

> > This is a misleading name, e.g. it took me quite a while to realize this
> > is testing only the passing scenario.  For me, "limit test" implies that
> > it'd be deliberately exceeding the limit, or at least testing both the
> > passing and failing cases.  I suppose we can't easily test the VMX abort
> > cases, but we can at least test VM_ENTER_LOAD.
> >
> > Distilling things down to the bare minimum yields something like the
> > following.
> 
> Looks excellent overall. Still not clear what the consensus is on
> whether or not to test the VM-entry failure. I think a flag seems like
> a reasonable compromise. I've never added a flag to a kvm-unit-test,
> so I'll see if I can figure that out.

No need for a flag if you want to go that route, just put it in a separate
VMX subtest and exclude said test from the [vmx] config, i.e. make the
test opt-in.
Marc Orr Sept. 13, 2019, 9:55 p.m. UTC | #7
Thanks for the review! I'll get to work on v2 now.

> > I'll happily apply what you've suggested in v2. But I don't see why
> > it's so terrible to over-allocate here. Leveraging a generic 2 MB page
> > allocator can be reused going forward, and encourages uniformity
> > across tests.
>
> My main concern is avoiding setting 6mb+ of memory.  I like to run the
> tests in L1 and L2 and would prefer to keep overhead at a minimum.
>
> As for the allocation itself, being precise in the allocation size is a
> form of documentation, e.g. it conveys that the size/order was chosen to
> ensure enough space for the maximum theoretical list size.  A purely
> arbitrary size, especially one that corresponds with a large page size,
> can lead to people looking for things that don't exist, e.g. the 2mb size
> is partially what led me to believe that this test was deliberately
> exceeding the limit, otherwise why allocate such a large amount of memory?
> I also didn't know if 2mb was sufficient to handle the maximum theoretical
> list size.

SGTM. I'll make this change in v2.

> > > Distilling things down to the bare minimum yields something like the
> > > following.
> >
> > Looks excellent overall. Still not clear what the consensus is on
> > whether or not to test the VM-entry failure. I think a flag seems like
> > a reasonable compromise. I've never added a flag to a kvm-unit-test,
> > so I'll see if I can figure that out.
>
> No need for a flag if you want to go that route, just put it in a separate
> VMX subtest and exclude said test from the [vmx] config, i.e. make the
> test opt-in.

SGTM, thanks!
Marc Orr Sept. 14, 2019, 12:49 a.m. UTC | #8
> > +static void *alloc_2m_page(void)
> > +{
> > +     return alloc_pages(PAGE_2M_ORDER);
> > +}
>
> Allocating 2mb pages is complete overkill.  The absolute theoretical max
> for the number of MSRs is (8 * 512) = 4096, for a total of 32kb per list.
> We can even show the math so that it's obvious how the size is calculated.
> Plus one order so we can test overrun.
>
> /*
>  * The max number of MSRs is specified in 3 bits bits, plus 1. I.e. 7+1==8.
>  * Allocate 64k bytes of data to cover max_msr_list_size and then some.
>  */
> static const u32 msr_list_page_order = 4;
>

Done. Changed msr_list_page_order to 5, per our previous discussion
that you meant 16 * 512.

> > +enum atomic_switch_msr_scenario {
> > +     VM_ENTER_LOAD,
> > +     VM_EXIT_LOAD,
> > +     VM_EXIT_STORE,
> > +     ATOMIC_SWITCH_MSR_SCENARIO_END,
> > +};
>
> How about:
>
> enum atomic_switch_msr_lists {
>         VM_ENTER_LOAD,
>         VM_EXIT_LOAD,
>         VM_EXIT_STORE,
>         NR_ATOMIC_SWITCH_MSR_LISTS,
> };
>
> IMO that yields a much more intuitive test loop:
>
>         for (i = 0; i < NR_ATOMIC_SWITCH_MSR_LISTS; i++) {
>         }
>
> But we probably don't even need a loop...

Ack. Got rid of the loop.

> > +static void atomic_switch_msr_limit_test_guest(void)
> > +{
> > +     vmcall();
> > +}
> > +
> > +static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < count; i++) {
> > +             msr_list[i].index = MSR_IA32_TSC;
> > +             msr_list[i].reserved = 0;
> > +             msr_list[i].value = 0x1234567890abcdef;
>
> Maybe overkill, but we can use a fast string op for this.  I think
> I got the union right?
>
> static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
> {
>         union {
>                 struct vmx_msr_entry msr;
>                 u64 val;
>         } tmp;
>
>         tmp.msr.index = MSR_IA32_TSC;
>         tmp.msr.reserved = 0;
>         tmp.msr.value = 0x1234567890abcdef;
>
>         asm volatile (
>                 "rep stosq\n\t"
>                 : "=c"(count), "=D"(msr_list)
>                 : "a"(tmp.val), "c"(count), "D"(msr_list)
>                 : "memory"
>         );
> }

Skipped per our previous conversation that this doesn't work due to
the string being 16 bytes.

> > +     for (s = 0; s < ATOMIC_SWITCH_MSR_SCENARIO_END; s++) {
> > +             switch (s) {
> > +             case VM_ENTER_LOAD:
> > +                     addr_field = ENTER_MSR_LD_ADDR;
> > +                     cnt_field = ENT_MSR_LD_CNT;
> > +                     break;
> > +             case VM_EXIT_LOAD:
> > +                     addr_field = EXIT_MSR_LD_ADDR;
> > +                     cnt_field = EXI_MSR_LD_CNT;
> > +                     break;
> > +             case VM_EXIT_STORE:
> > +                     addr_field = EXIT_MSR_ST_ADDR;
> > +                     cnt_field = EXI_MSR_ST_CNT;
> > +                     break;
> > +             default:
> > +                     TEST_ASSERT(false);
> > +             }
> > +
> > +             msr_list = (struct vmx_msr_entry *)vmcs_read(addr_field);
> > +             memset(msr_list, 0xff, two_mb);
>
> Writing 8mb of data for each test is a waste of time, i.e. 6mb to reset
> each list, and another 2mb to populate the target list.
>
> The for-loop in the helper is also confusing and superfluous.

Ack. Got rid of the helper.

> > +     /* Setup atomic MSR switch lists. */
> > +     msr_list = alloc_2m_page();
> > +     vmcs_write(ENTER_MSR_LD_ADDR, virt_to_phys(msr_list));
> > +     msr_list = alloc_2m_page();
> > +     vmcs_write(EXIT_MSR_LD_ADDR, virt_to_phys(msr_list));
> > +     msr_list = alloc_2m_page();
> > +     vmcs_write(EXIT_MSR_ST_ADDR, virt_to_phys(msr_list));
>
> This memory should really be freed.  Not holding pointers for each list
> also seems silly, e.g. requires a VMREAD just to get a pointer.

Done.

> > +
> > +     /* Execute each test case. */
> > +     for (s = 0; s < ATOMIC_SWITCH_MSR_SCENARIO_END; s++) {
>
> Since you're testing the passing case, why not test all three at once?
> I.e. hammer KVM while also consuming less test cycles.  The "MSR switch"
> test already verifies the correctness of each list.

Done.

> > +             struct vmx_msr_entry *msr_list;
> > +             int count = max_msr_list_size();
> > +
> > +             switch (s) {
> > +             case VM_ENTER_LOAD:
> > +                     msr_list = (struct vmx_msr_entry *)vmcs_read(
> > +                                     ENTER_MSR_LD_ADDR);
>
> These should use phys_to_virt() since virt_to_phys() is used to write them.

Hmm. Actually, why don't we just use an explicit (u64) cast. That's
what I originally had, but then when Jim pre-reviewed the patch before
I posted it on the list he suggested virt_to_phys() with a ?. I didn't
really understand why that was better than the explicit cast. And your
suggestion to use phys_to_virt() doesn't work at all because it
returns a pointer rather than a u64.

> > +                     break;
> > +             case VM_EXIT_LOAD:
> > +                     msr_list = (struct vmx_msr_entry *)vmcs_read(
> > +                                     EXIT_MSR_LD_ADDR);
> > +                     break;
> > +             case VM_EXIT_STORE:
> > +                     msr_list = (struct vmx_msr_entry *)vmcs_read(
> > +                                     EXIT_MSR_ST_ADDR);
> > +                     break;
> > +             default:
> > +                     report("Bad test scenario, %d.", false, s);
> > +                     continue;
> > +             }
> > +
> > +             configure_atomic_switch_msr_limit_test(msr_list, count);
>
> Again, feeding the list into a helper that also iterates over the lists
> is not intuitive in terms of understanding what is being tested.

Done. Got rid of the loop.

> > +     /* Atomic MSR switch tests. */
> > +     TEST(atomic_switch_msr_limit_test),
>
> This is a misleading name, e.g. it took me quite a while to realize this
> is testing only the passing scenario.  For me, "limit test" implies that
> it'd be deliberately exceeding the limit, or at least testing both the
> passing and failing cases.  I suppose we can't easily test the VMX abort
> cases, but we can at least test VM_ENTER_LOAD.

Done. Renamed to atomic_switch_max_msrs_test.

> Distilling things down to the bare minimum yields something like the
> following.

Thanks!
diff mbox series

Patch

diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index f035f24a771a..b3b4d5f7cc8f 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -2718,6 +2718,11 @@  static void ept_reserved_bit(int bit)
 #define PAGE_2M_ORDER 9
 #define PAGE_1G_ORDER 18
 
+static void *alloc_2m_page(void)
+{
+	return alloc_pages(PAGE_2M_ORDER);
+}
+
 static void *get_1g_page(void)
 {
 	static void *alloc;
@@ -8570,6 +8575,138 @@  static int invalid_msr_entry_failure(struct vmentry_failure *failure)
 	return VMX_TEST_VMEXIT;
 }
 
+enum atomic_switch_msr_scenario {
+	VM_ENTER_LOAD,
+	VM_EXIT_LOAD,
+	VM_EXIT_STORE,
+	ATOMIC_SWITCH_MSR_SCENARIO_END,
+};
+
+static void atomic_switch_msr_limit_test_guest(void)
+{
+	vmcall();
+}
+
+static void populate_msr_list(struct vmx_msr_entry *msr_list, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		msr_list[i].index = MSR_IA32_TSC;
+		msr_list[i].reserved = 0;
+		msr_list[i].value = 0x1234567890abcdef;
+	}
+}
+
+static void configure_atomic_switch_msr_limit_test(
+		struct vmx_msr_entry *test_msr_list, int count)
+{
+	struct vmx_msr_entry *msr_list;
+	const u32 two_mb = 1 << 21;
+	enum atomic_switch_msr_scenario s;
+	enum Encoding addr_field;
+	enum Encoding cnt_field;
+
+	for (s = 0; s < ATOMIC_SWITCH_MSR_SCENARIO_END; s++) {
+		switch (s) {
+		case VM_ENTER_LOAD:
+			addr_field = ENTER_MSR_LD_ADDR;
+			cnt_field = ENT_MSR_LD_CNT;
+			break;
+		case VM_EXIT_LOAD:
+			addr_field = EXIT_MSR_LD_ADDR;
+			cnt_field = EXI_MSR_LD_CNT;
+			break;
+		case VM_EXIT_STORE:
+			addr_field = EXIT_MSR_ST_ADDR;
+			cnt_field = EXI_MSR_ST_CNT;
+			break;
+		default:
+			TEST_ASSERT(false);
+		}
+
+		msr_list = (struct vmx_msr_entry *)vmcs_read(addr_field);
+		memset(msr_list, 0xff, two_mb);
+		if (msr_list == test_msr_list) {
+			populate_msr_list(msr_list, count);
+			vmcs_write(cnt_field, count);
+		} else {
+			vmcs_write(cnt_field, 0);
+		}
+	}
+}
+
+static int max_msr_list_size(void)
+{
+	u32 vmx_misc = rdmsr(MSR_IA32_VMX_MISC);
+	u32 factor = ((vmx_misc & GENMASK(27, 25)) >> 25) + 1;
+
+	return factor * 512;
+}
+
+static void atomic_switch_msr_limit_test(void)
+{
+	struct vmx_msr_entry *msr_list;
+	enum atomic_switch_msr_scenario s;
+
+	/*
+	 * Check for the IA32_TSC MSR,
+	 * available with the "TSC flag" and used to populate the MSR lists.
+	 */
+	if (!(cpuid(1).d & (1 << 4))) {
+		report_skip(__func__);
+		return;
+	}
+
+	/* Set L2 guest. */
+	test_set_guest(atomic_switch_msr_limit_test_guest);
+
+	/* Setup atomic MSR switch lists. */
+	msr_list = alloc_2m_page();
+	vmcs_write(ENTER_MSR_LD_ADDR, virt_to_phys(msr_list));
+	msr_list = alloc_2m_page();
+	vmcs_write(EXIT_MSR_LD_ADDR, virt_to_phys(msr_list));
+	msr_list = alloc_2m_page();
+	vmcs_write(EXIT_MSR_ST_ADDR, virt_to_phys(msr_list));
+
+	/* Execute each test case. */
+	for (s = 0; s < ATOMIC_SWITCH_MSR_SCENARIO_END; s++) {
+		struct vmx_msr_entry *msr_list;
+		int count = max_msr_list_size();
+
+		switch (s) {
+		case VM_ENTER_LOAD:
+			msr_list = (struct vmx_msr_entry *)vmcs_read(
+					ENTER_MSR_LD_ADDR);
+			break;
+		case VM_EXIT_LOAD:
+			msr_list = (struct vmx_msr_entry *)vmcs_read(
+					EXIT_MSR_LD_ADDR);
+			break;
+		case VM_EXIT_STORE:
+			msr_list = (struct vmx_msr_entry *)vmcs_read(
+					EXIT_MSR_ST_ADDR);
+			break;
+		default:
+			report("Bad test scenario, %d.", false, s);
+			continue;
+		}
+
+		configure_atomic_switch_msr_limit_test(msr_list, count);
+		enter_guest();
+		assert_exit_reason(VMX_VMCALL);
+	}
+
+	/* Reset the atomic MSR switch count to 0 for all three lists. */
+	configure_atomic_switch_msr_limit_test(0, 0);
+	/* Proceed past guest's single vmcall instruction. */
+	enter_guest();
+	skip_exit_vmcall();
+	/* Terminate the guest. */
+	enter_guest();
+	skip_exit_vmcall();
+}
+
 
 #define TEST(name) { #name, .v2 = name }
 
@@ -8660,5 +8797,7 @@  struct vmx_test vmx_tests[] = {
 	TEST(ept_access_test_paddr_read_execute_ad_enabled),
 	TEST(ept_access_test_paddr_not_present_page_fault),
 	TEST(ept_access_test_force_2m_page),
+	/* Atomic MSR switch tests. */
+	TEST(atomic_switch_msr_limit_test),
 	{ NULL, NULL, NULL, NULL, NULL, {0} },
 };