Message ID | 20200518201600.255669-3-makarandsonare@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Fix VMX preemption timer migration | expand |
Makarand Sonare <makarandsonare@google.com> writes: > When a nested VM with a VMX-preemption timer is migrated, verify that the > nested VM and its parent VM observe the VMX-preemption timer exit close to > the original expiration deadline. > > Signed-off-by: Makarand Sonare <makarandsonare@google.com> > Reviewed-by: Jim Mattson <jmattson@google.com> > Change-Id: Ia79337c682ee161399525edc34201fad473fc190 > --- > tools/arch/x86/include/uapi/asm/kvm.h | 1 + > tools/testing/selftests/kvm/.gitignore | 1 + > tools/testing/selftests/kvm/Makefile | 1 + > .../testing/selftests/kvm/include/kvm_util.h | 2 + > .../selftests/kvm/include/x86_64/processor.h | 11 +- > .../selftests/kvm/include/x86_64/vmx.h | 27 ++ > .../kvm/x86_64/vmx_preemption_timer_test.c | 256 ++++++++++++++++++ > 7 files changed, 295 insertions(+), 4 deletions(-) > create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c > > diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h > index 60701178b9cc1..13dca545554dc 100644 > --- a/tools/arch/x86/include/uapi/asm/kvm.h > +++ b/tools/arch/x86/include/uapi/asm/kvm.h > @@ -402,6 +402,7 @@ struct kvm_sync_regs { > struct kvm_vmx_nested_state_data { > __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; > __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; > + __u64 preemption_timer_deadline; > }; > > struct kvm_vmx_nested_state_hdr { > diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore > index 222e50104296a..f159718f90c0a 100644 > --- a/tools/testing/selftests/kvm/.gitignore > +++ b/tools/testing/selftests/kvm/.gitignore > @@ -10,6 +10,7 @@ > /x86_64/set_sregs_test > /x86_64/smm_test > /x86_64/state_test > +/x86_64/vmx_preemption_timer_test > /x86_64/svm_vmcall_test > /x86_64/sync_regs_test > /x86_64/vmx_close_while_nested_test > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index c66f4eec34111..780f0c189a7bc 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test > TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test > TEST_GEN_PROGS_x86_64 += x86_64/smm_test > TEST_GEN_PROGS_x86_64 += x86_64/state_test > +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test > TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test > TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test > TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test > diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h > index e244c6ecfc1d5..919e161dd2893 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util.h > +++ b/tools/testing/selftests/kvm/include/kvm_util.h > @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm); > void ucall(uint64_t cmd, int nargs, ...); > uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); > > +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ > + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) > #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) > #define GUEST_DONE() ucall(UCALL_DONE, 0) > #define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ > diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h > index 7428513a4c687..7cb19eca6c72d 100644 > --- a/tools/testing/selftests/kvm/include/x86_64/processor.h > +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h > @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc) > static inline uint64_t rdtsc(void) > { > uint32_t eax, edx; > - > + uint32_t tsc_val; > /* > * The lfence is to wait (on Intel CPUs) until all previous > - * instructions have been executed. > + * instructions have been executed. If software requires RDTSC to be > + * executed prior to execution of any subsequent instruction, it can > + * execute LFENCE immediately after RDTSC > */ > - __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); > - return ((uint64_t)edx) << 32 | eax; > + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); > + tsc_val = ((uint64_t)edx) << 32 | eax; > + return tsc_val; > } > > static inline uint64_t rdtscp(uint32_t *aux) > diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h > index 3d27069b9ed9c..ccff3e6e27048 100644 > --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h > +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h > @@ -575,6 +575,33 @@ struct vmx_pages { > void *eptp; > }; > > +union vmx_basic { > + u64 val; > + struct { > + u32 revision; > + u32 size:13, > + reserved1:3, > + width:1, > + dual:1, > + type:4, > + insouts:1, > + ctrl:1, > + vm_entry_exception_ctrl:1, > + reserved2:7; > + }; > +}; > + > +union vmx_ctrl_msr { > + u64 val; > + struct { > + u32 set, clr; > + }; > +}; > + > +union vmx_basic basic; > +union vmx_ctrl_msr ctrl_pin_rev; > +union vmx_ctrl_msr ctrl_exit_rev; > + > struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); > bool prepare_for_vmx_operation(struct vmx_pages *vmx); > void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); > diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c > new file mode 100644 > index 0000000000000..10893b11511be > --- /dev/null > +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c > @@ -0,0 +1,256 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * VMX-preemption timer test > + * > + * Copyright (C) 2020, Google, LLC. > + * > + * Test to ensure the VM-Enter after migration doesn't > + * incorrectly restarts the timer with the full timer > + * value instead of partially decayed timer value > + * > + */ > +#define _GNU_SOURCE /* for program_invocation_short_name */ > +#include <fcntl.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/ioctl.h> > + > +#include "test_util.h" > + > +#include "kvm_util.h" > +#include "processor.h" > +#include "vmx.h" > + > +#define VCPU_ID 5 > +#define PREEMPTION_TIMER_VALUE 100000000ull > +#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull > + > +u32 vmx_pt_rate; > +bool l2_save_restore_done; > +static u64 l2_vmx_pt_start; > +volatile u64 l2_vmx_pt_finish; > + > +void l2_guest_code(void) > +{ > + u64 vmx_pt_delta; > + > + vmcall(); > + l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; > + > + // > + // Wait until the 1st threshold has passed > + // > + do { > + l2_vmx_pt_finish = rdtsc(); > + vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> > + vmx_pt_rate; > + } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); > + > + // > + // Force L2 through Save and Restore cycle > + // > + GUEST_SYNC(1); > + > + l2_save_restore_done = 1; > + > + // > + // Now wait for the preemption timer to fire and > + // exit to L1 > + // > + while ((l2_vmx_pt_finish = rdtsc())) > + ; > +} > + > +void l1_guest_code(struct vmx_pages *vmx_pages) > +{ > +#define L2_GUEST_STACK_SIZE 64 > + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; > + u64 l1_vmx_pt_start; > + u64 l1_vmx_pt_finish; > + u64 l1_tsc_deadline, l2_tsc_deadline; > + > + GUEST_ASSERT(vmx_pages->vmcs_gpa); > + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); > + GUEST_ASSERT(load_vmcs(vmx_pages)); > + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); > + > + prepare_vmcs(vmx_pages, l2_guest_code, > + &l2_guest_stack[L2_GUEST_STACK_SIZE]); > + > + GUEST_ASSERT(!vmlaunch()); > + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); > + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); > + > + // > + // Check for Preemption timer support and turn on PIN control > + // > + basic.val = rdmsr(MSR_IA32_VMX_BASIC); > + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS > + : MSR_IA32_VMX_PINBASED_CTLS); > + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS > + : MSR_IA32_VMX_EXIT_CTLS); > + > + if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || > + !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) > + return; > + > + GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, > + vmreadz(PIN_BASED_VM_EXEC_CONTROL) | > + PIN_BASED_VMX_PREEMPTION_TIMER)); > + > + GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, > + PREEMPTION_TIMER_VALUE)); > + > + vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; > + > + l2_save_restore_done = 0; > + > + l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; > + > + GUEST_ASSERT(!vmresume()); > + > + l1_vmx_pt_finish = rdtsc(); > + > + // > + // Ensure exit from L2 happens after L2 goes through > + // save and restore > + GUEST_ASSERT(l2_save_restore_done); > + > + // > + // Ensure the exit from L2 is due to preemption timer expiry > + // > + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); > + > + l1_tsc_deadline = l1_vmx_pt_start + > + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); > + > + l2_tsc_deadline = l2_vmx_pt_start + > + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); > + > + // > + // Sync with the host and pass the l1|l2 pt_expiry_finish times and > + // tsc deadlines so that host can verify they are as expected > + // > + GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, > + l2_vmx_pt_finish, l2_tsc_deadline); > +} > + > +void guest_code(struct vmx_pages *vmx_pages) > +{ > + if (vmx_pages) > + l1_guest_code(vmx_pages); > + > + GUEST_DONE(); > +} > + > +int main(int argc, char *argv[]) > +{ > + vm_vaddr_t vmx_pages_gva = 0; > + > + struct kvm_regs regs1, regs2; > + struct kvm_vm *vm; > + struct kvm_run *run; > + struct kvm_x86_state *state; > + struct ucall uc; > + int stage; > + > + /* > + * AMD currently does not implement any VMX features, so for now we > + * just early out. > + */ > + nested_vmx_check_supported(); > + > + /* Create VM */ > + vm = vm_create_default(VCPU_ID, 0, guest_code); > + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); > + run = vcpu_state(vm, VCPU_ID); > + > + vcpu_regs_get(vm, VCPU_ID, ®s1); > + > + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { You need to check the newly added KVM_CAP_NESTED_STATE_PREEMPTION_TIMER instead or skip the whole test. > + vcpu_alloc_vmx(vm, &vmx_pages_gva); > + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); > + } else { > + pr_info("will skip nested state checks\n"); Copy/paste from state_test.c detected :-) > + goto done; > + } > + > + for (stage = 1;; stage++) { > + _vcpu_run(vm, VCPU_ID); > + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, > + "Stage %d: unexpected exit reason: %u (%s),\n", > + stage, run->exit_reason, > + exit_reason_str(run->exit_reason)); > + > + switch (get_ucall(vm, VCPU_ID, &uc)) { > + case UCALL_ABORT: > + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], > + __FILE__, uc.args[1]); > + /* NOT REACHED */ > + case UCALL_SYNC: > + break; > + case UCALL_DONE: > + goto done; > + default: > + TEST_FAIL("Unknown ucall %lu", uc.cmd); > + } > + > + /* UCALL_SYNC is handled here. */ > + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && > + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", > + stage, (ulong)uc.args[1]); > + // > + // If this stage 2 then we should verify the vmx pt expiry > + // is as expected > + // Nitpick: coding style requirements for selftests is definitely lower than for KVM itself but could you please be consistent with comments and use kernel style /* * This is a comment. */ comments exclusively (you seem to have a mix)? Thanks! > + if (stage == 2) { > + > + pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", > + stage, uc.args[2], uc.args[3]); > + > + pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", > + stage, uc.args[4], uc.args[5]); > + > + // > + // From L1's perspective verify Preemption timer hasn't > + // expired too early > + // > + > + TEST_ASSERT(uc.args[2] >= uc.args[3], > + "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", > + stage, uc.args[2], uc.args[3]); > + > + // > + // From L2's perspective verify Preemption timer hasn't > + // expired too late > + // > + TEST_ASSERT(uc.args[4] < uc.args[5], > + "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", > + stage, uc.args[4], uc.args[5]); > + } > + > + state = vcpu_save_state(vm, VCPU_ID); > + memset(®s1, 0, sizeof(regs1)); > + vcpu_regs_get(vm, VCPU_ID, ®s1); > + > + kvm_vm_release(vm); > + > + /* Restore state in a new VM. */ > + kvm_vm_restart(vm, O_RDWR); > + vm_vcpu_add(vm, VCPU_ID); > + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); > + vcpu_load_state(vm, VCPU_ID, state); > + run = vcpu_state(vm, VCPU_ID); > + free(state); > + > + memset(®s2, 0, sizeof(regs2)); > + vcpu_regs_get(vm, VCPU_ID, ®s2); > + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), > + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", > + (ulong) regs2.rdi, (ulong) regs2.rsi); > + } > + > +done: > + kvm_vm_free(vm); > +}
On 5/18/20 1:16 PM, Makarand Sonare wrote: > When a nested VM with a VMX-preemption timer is migrated, verify that the > nested VM and its parent VM observe the VMX-preemption timer exit close to > the original expiration deadline. > > Signed-off-by: Makarand Sonare <makarandsonare@google.com> > Reviewed-by: Jim Mattson <jmattson@google.com> > Change-Id: Ia79337c682ee161399525edc34201fad473fc190 > --- > tools/arch/x86/include/uapi/asm/kvm.h | 1 + > tools/testing/selftests/kvm/.gitignore | 1 + > tools/testing/selftests/kvm/Makefile | 1 + > .../testing/selftests/kvm/include/kvm_util.h | 2 + > .../selftests/kvm/include/x86_64/processor.h | 11 +- > .../selftests/kvm/include/x86_64/vmx.h | 27 ++ > .../kvm/x86_64/vmx_preemption_timer_test.c | 256 ++++++++++++++++++ > 7 files changed, 295 insertions(+), 4 deletions(-) > create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c > > diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h > index 60701178b9cc1..13dca545554dc 100644 > --- a/tools/arch/x86/include/uapi/asm/kvm.h > +++ b/tools/arch/x86/include/uapi/asm/kvm.h > @@ -402,6 +402,7 @@ struct kvm_sync_regs { > struct kvm_vmx_nested_state_data { > __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; > __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; > + __u64 preemption_timer_deadline; > }; > > struct kvm_vmx_nested_state_hdr { > diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore > index 222e50104296a..f159718f90c0a 100644 > --- a/tools/testing/selftests/kvm/.gitignore > +++ b/tools/testing/selftests/kvm/.gitignore > @@ -10,6 +10,7 @@ > /x86_64/set_sregs_test > /x86_64/smm_test > /x86_64/state_test > +/x86_64/vmx_preemption_timer_test > /x86_64/svm_vmcall_test > /x86_64/sync_regs_test > /x86_64/vmx_close_while_nested_test > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index c66f4eec34111..780f0c189a7bc 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test > TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test > TEST_GEN_PROGS_x86_64 += x86_64/smm_test > TEST_GEN_PROGS_x86_64 += x86_64/state_test > +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test > TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test > TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test > TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test > diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h > index e244c6ecfc1d5..919e161dd2893 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util.h > +++ b/tools/testing/selftests/kvm/include/kvm_util.h > @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm); > void ucall(uint64_t cmd, int nargs, ...); > uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); > > +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ > + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) > #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) > #define GUEST_DONE() ucall(UCALL_DONE, 0) > #define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ > diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h > index 7428513a4c687..7cb19eca6c72d 100644 > --- a/tools/testing/selftests/kvm/include/x86_64/processor.h > +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h > @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc) > static inline uint64_t rdtsc(void) > { > uint32_t eax, edx; > - > + uint32_t tsc_val; > /* > * The lfence is to wait (on Intel CPUs) until all previous > - * instructions have been executed. > + * instructions have been executed. If software requires RDTSC to be > + * executed prior to execution of any subsequent instruction, it can > + * execute LFENCE immediately after RDTSC > */ > - __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); > - return ((uint64_t)edx) << 32 | eax; > + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); > + tsc_val = ((uint64_t)edx) << 32 | eax; > + return tsc_val; > } > > static inline uint64_t rdtscp(uint32_t *aux) > diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h > index 3d27069b9ed9c..ccff3e6e27048 100644 > --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h > +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h > @@ -575,6 +575,33 @@ struct vmx_pages { > void *eptp; > }; > > +union vmx_basic { > + u64 val; > + struct { > + u32 revision; > + u32 size:13, > + reserved1:3, > + width:1, > + dual:1, > + type:4, > + insouts:1, > + ctrl:1, > + vm_entry_exception_ctrl:1, > + reserved2:7; > + }; > +}; > + > +union vmx_ctrl_msr { > + u64 val; > + struct { > + u32 set, clr; > + }; > +}; > + > +union vmx_basic basic; > +union vmx_ctrl_msr ctrl_pin_rev; > +union vmx_ctrl_msr ctrl_exit_rev; > + > struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); > bool prepare_for_vmx_operation(struct vmx_pages *vmx); > void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); > diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c > new file mode 100644 > index 0000000000000..10893b11511be > --- /dev/null > +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c > @@ -0,0 +1,256 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * VMX-preemption timer test > + * > + * Copyright (C) 2020, Google, LLC. > + * > + * Test to ensure the VM-Enter after migration doesn't > + * incorrectly restarts the timer with the full timer > + * value instead of partially decayed timer value > + * > + */ > +#define _GNU_SOURCE /* for program_invocation_short_name */ > +#include <fcntl.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/ioctl.h> > + > +#include "test_util.h" > + > +#include "kvm_util.h" > +#include "processor.h" > +#include "vmx.h" > + > +#define VCPU_ID 5 > +#define PREEMPTION_TIMER_VALUE 100000000ull > +#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull > + > +u32 vmx_pt_rate; > +bool l2_save_restore_done; > +static u64 l2_vmx_pt_start; > +volatile u64 l2_vmx_pt_finish; > + > +void l2_guest_code(void) > +{ > + u64 vmx_pt_delta; > + > + vmcall(); > + l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; > + > + // > + // Wait until the 1st threshold has passed > + // > + do { > + l2_vmx_pt_finish = rdtsc(); > + vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> > + vmx_pt_rate; > + } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); > + > + // > + // Force L2 through Save and Restore cycle > + // > + GUEST_SYNC(1); > + > + l2_save_restore_done = 1; > + > + // > + // Now wait for the preemption timer to fire and > + // exit to L1 > + // > + while ((l2_vmx_pt_finish = rdtsc())) > + ; > +} > + > +void l1_guest_code(struct vmx_pages *vmx_pages) > +{ > +#define L2_GUEST_STACK_SIZE 64 > + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; > + u64 l1_vmx_pt_start; > + u64 l1_vmx_pt_finish; > + u64 l1_tsc_deadline, l2_tsc_deadline; > + > + GUEST_ASSERT(vmx_pages->vmcs_gpa); > + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); > + GUEST_ASSERT(load_vmcs(vmx_pages)); > + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); > + > + prepare_vmcs(vmx_pages, l2_guest_code, > + &l2_guest_stack[L2_GUEST_STACK_SIZE]); > + > + GUEST_ASSERT(!vmlaunch()); > + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); > + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); Please use vmcs_read(VM_EXIT_INSTRUCTION_LEN) instead of 3. > + > + // > + // Check for Preemption timer support and turn on PIN control > + // > + basic.val = rdmsr(MSR_IA32_VMX_BASIC); > + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS > + : MSR_IA32_VMX_PINBASED_CTLS); > + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS > + : MSR_IA32_VMX_EXIT_CTLS); > + > + if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || > + !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) > + return; > + Why not do this check in the beginning of L1, before VMLAUNCH of L2 ? If these two controls are not supported, the test is just void. > + GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, > + vmreadz(PIN_BASED_VM_EXEC_CONTROL) | > + PIN_BASED_VMX_PREEMPTION_TIMER)); > + > + GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, > + PREEMPTION_TIMER_VALUE)); > + > + vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; > + > + l2_save_restore_done = 0; > + > + l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; > + > + GUEST_ASSERT(!vmresume()); > + > + l1_vmx_pt_finish = rdtsc(); > + > + // > + // Ensure exit from L2 happens after L2 goes through > + // save and restore > + GUEST_ASSERT(l2_save_restore_done); > + > + // > + // Ensure the exit from L2 is due to preemption timer expiry > + // > + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); I am wondering if checking just the EXIT reason is enough to determine that L2 has gone through a successful migration. > + > + l1_tsc_deadline = l1_vmx_pt_start + > + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); > + > + l2_tsc_deadline = l2_vmx_pt_start + > + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); > + > + // > + // Sync with the host and pass the l1|l2 pt_expiry_finish times and > + // tsc deadlines so that host can verify they are as expected > + // > + GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, > + l2_vmx_pt_finish, l2_tsc_deadline); > +} > + > +void guest_code(struct vmx_pages *vmx_pages) > +{ > + if (vmx_pages) > + l1_guest_code(vmx_pages); > + > + GUEST_DONE(); > +} > + > +int main(int argc, char *argv[]) > +{ > + vm_vaddr_t vmx_pages_gva = 0; > + > + struct kvm_regs regs1, regs2; > + struct kvm_vm *vm; > + struct kvm_run *run; > + struct kvm_x86_state *state; > + struct ucall uc; > + int stage; > + > + /* > + * AMD currently does not implement any VMX features, so for now we > + * just early out. > + */ > + nested_vmx_check_supported(); > + > + /* Create VM */ > + vm = vm_create_default(VCPU_ID, 0, guest_code); > + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); > + run = vcpu_state(vm, VCPU_ID); > + > + vcpu_regs_get(vm, VCPU_ID, ®s1); > + > + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { > + vcpu_alloc_vmx(vm, &vmx_pages_gva); > + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); > + } else { > + pr_info("will skip nested state checks\n"); This error message looks odd. Shouldn't it say something like, "nested state capability not available, skipping test" > + goto done; > + } > + > + for (stage = 1;; stage++) { > + _vcpu_run(vm, VCPU_ID); > + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, > + "Stage %d: unexpected exit reason: %u (%s),\n", > + stage, run->exit_reason, > + exit_reason_str(run->exit_reason)); > + > + switch (get_ucall(vm, VCPU_ID, &uc)) { > + case UCALL_ABORT: > + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], > + __FILE__, uc.args[1]); > + /* NOT REACHED */ > + case UCALL_SYNC: > + break; > + case UCALL_DONE: > + goto done; > + default: > + TEST_FAIL("Unknown ucall %lu", uc.cmd); > + } > + > + /* UCALL_SYNC is handled here. */ > + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && > + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", > + stage, (ulong)uc.args[1]); > + // > + // If this stage 2 then we should verify the vmx pt expiry > + // is as expected > + // > + if (stage == 2) { > + > + pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", > + stage, uc.args[2], uc.args[3]); > + > + pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", > + stage, uc.args[4], uc.args[5]); > + > + // > + // From L1's perspective verify Preemption timer hasn't > + // expired too early > + // > + > + TEST_ASSERT(uc.args[2] >= uc.args[3], > + "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", > + stage, uc.args[2], uc.args[3]); > + > + // > + // From L2's perspective verify Preemption timer hasn't > + // expired too late > + // > + TEST_ASSERT(uc.args[4] < uc.args[5], > + "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", > + stage, uc.args[4], uc.args[5]); > + } > + For readability, it's better to put a block comment here for the entire save/restore operation as a whole. Also, this save/restore block should be placed before the above stage2 block. > + state = vcpu_save_state(vm, VCPU_ID); > + memset(®s1, 0, sizeof(regs1)); > + vcpu_regs_get(vm, VCPU_ID, ®s1); > + > + kvm_vm_release(vm); > + > + /* Restore state in a new VM. */ > + kvm_vm_restart(vm, O_RDWR); > + vm_vcpu_add(vm, VCPU_ID); > + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); > + vcpu_load_state(vm, VCPU_ID, state); > + run = vcpu_state(vm, VCPU_ID); > + free(state); > + > + memset(®s2, 0, sizeof(regs2)); > + vcpu_regs_get(vm, VCPU_ID, ®s2); > + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), > + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", > + (ulong) regs2.rdi, (ulong) regs2.rsi); > + } > + > +done: > + kvm_vm_free(vm); > +}
> On 5/18/20 1:16 PM, Makarand Sonare wrote: >> When a nested VM with a VMX-preemption timer is migrated, verify that the >> nested VM and its parent VM observe the VMX-preemption timer exit close >> to >> the original expiration deadline. >> >> Signed-off-by: Makarand Sonare <makarandsonare@google.com> >> Reviewed-by: Jim Mattson <jmattson@google.com> >> Change-Id: Ia79337c682ee161399525edc34201fad473fc190 >> --- >> tools/arch/x86/include/uapi/asm/kvm.h | 1 + >> tools/testing/selftests/kvm/.gitignore | 1 + >> tools/testing/selftests/kvm/Makefile | 1 + >> .../testing/selftests/kvm/include/kvm_util.h | 2 + >> .../selftests/kvm/include/x86_64/processor.h | 11 +- >> .../selftests/kvm/include/x86_64/vmx.h | 27 ++ >> .../kvm/x86_64/vmx_preemption_timer_test.c | 256 ++++++++++++++++++ >> 7 files changed, 295 insertions(+), 4 deletions(-) >> create mode 100644 >> tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c >> >> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h >> b/tools/arch/x86/include/uapi/asm/kvm.h >> index 60701178b9cc1..13dca545554dc 100644 >> --- a/tools/arch/x86/include/uapi/asm/kvm.h >> +++ b/tools/arch/x86/include/uapi/asm/kvm.h >> @@ -402,6 +402,7 @@ struct kvm_sync_regs { >> struct kvm_vmx_nested_state_data { >> __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; >> __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; >> + __u64 preemption_timer_deadline; >> }; >> >> struct kvm_vmx_nested_state_hdr { >> diff --git a/tools/testing/selftests/kvm/.gitignore >> b/tools/testing/selftests/kvm/.gitignore >> index 222e50104296a..f159718f90c0a 100644 >> --- a/tools/testing/selftests/kvm/.gitignore >> +++ b/tools/testing/selftests/kvm/.gitignore >> @@ -10,6 +10,7 @@ >> /x86_64/set_sregs_test >> /x86_64/smm_test >> /x86_64/state_test >> +/x86_64/vmx_preemption_timer_test >> /x86_64/svm_vmcall_test >> /x86_64/sync_regs_test >> /x86_64/vmx_close_while_nested_test >> diff --git a/tools/testing/selftests/kvm/Makefile >> b/tools/testing/selftests/kvm/Makefile >> index c66f4eec34111..780f0c189a7bc 100644 >> --- a/tools/testing/selftests/kvm/Makefile >> +++ b/tools/testing/selftests/kvm/Makefile >> @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test >> TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test >> TEST_GEN_PROGS_x86_64 += x86_64/smm_test >> TEST_GEN_PROGS_x86_64 += x86_64/state_test >> +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test >> TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test >> TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test >> TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test >> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h >> b/tools/testing/selftests/kvm/include/kvm_util.h >> index e244c6ecfc1d5..919e161dd2893 100644 >> --- a/tools/testing/selftests/kvm/include/kvm_util.h >> +++ b/tools/testing/selftests/kvm/include/kvm_util.h >> @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm); >> void ucall(uint64_t cmd, int nargs, ...); >> uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall >> *uc); >> >> +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ >> + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) >> #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) >> #define GUEST_DONE() ucall(UCALL_DONE, 0) >> #define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ >> diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h >> b/tools/testing/selftests/kvm/include/x86_64/processor.h >> index 7428513a4c687..7cb19eca6c72d 100644 >> --- a/tools/testing/selftests/kvm/include/x86_64/processor.h >> +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h >> @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct >> desc64 *desc) >> static inline uint64_t rdtsc(void) >> { >> uint32_t eax, edx; >> - >> + uint32_t tsc_val; >> /* >> * The lfence is to wait (on Intel CPUs) until all previous >> - * instructions have been executed. >> + * instructions have been executed. If software requires RDTSC to be >> + * executed prior to execution of any subsequent instruction, it can >> + * execute LFENCE immediately after RDTSC >> */ >> - __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); >> - return ((uint64_t)edx) << 32 | eax; >> + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); >> + tsc_val = ((uint64_t)edx) << 32 | eax; >> + return tsc_val; >> } >> >> static inline uint64_t rdtscp(uint32_t *aux) >> diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h >> b/tools/testing/selftests/kvm/include/x86_64/vmx.h >> index 3d27069b9ed9c..ccff3e6e27048 100644 >> --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h >> +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h >> @@ -575,6 +575,33 @@ struct vmx_pages { >> void *eptp; >> }; >> >> +union vmx_basic { >> + u64 val; >> + struct { >> + u32 revision; >> + u32 size:13, >> + reserved1:3, >> + width:1, >> + dual:1, >> + type:4, >> + insouts:1, >> + ctrl:1, >> + vm_entry_exception_ctrl:1, >> + reserved2:7; >> + }; >> +}; >> + >> +union vmx_ctrl_msr { >> + u64 val; >> + struct { >> + u32 set, clr; >> + }; >> +}; >> + >> +union vmx_basic basic; >> +union vmx_ctrl_msr ctrl_pin_rev; >> +union vmx_ctrl_msr ctrl_exit_rev; >> + >> struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t >> *p_vmx_gva); >> bool prepare_for_vmx_operation(struct vmx_pages *vmx); >> void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void >> *guest_rsp); >> diff --git >> a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c >> b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c >> new file mode 100644 >> index 0000000000000..10893b11511be >> --- /dev/null >> +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c >> @@ -0,0 +1,256 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * VMX-preemption timer test >> + * >> + * Copyright (C) 2020, Google, LLC. >> + * >> + * Test to ensure the VM-Enter after migration doesn't >> + * incorrectly restarts the timer with the full timer >> + * value instead of partially decayed timer value >> + * >> + */ >> +#define _GNU_SOURCE /* for program_invocation_short_name */ >> +#include <fcntl.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <string.h> >> +#include <sys/ioctl.h> >> + >> +#include "test_util.h" >> + >> +#include "kvm_util.h" >> +#include "processor.h" >> +#include "vmx.h" >> + >> +#define VCPU_ID 5 >> +#define PREEMPTION_TIMER_VALUE 100000000ull >> +#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull >> + >> +u32 vmx_pt_rate; >> +bool l2_save_restore_done; >> +static u64 l2_vmx_pt_start; >> +volatile u64 l2_vmx_pt_finish; >> + >> +void l2_guest_code(void) >> +{ >> + u64 vmx_pt_delta; >> + >> + vmcall(); >> + l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; >> + >> + // >> + // Wait until the 1st threshold has passed >> + // >> + do { >> + l2_vmx_pt_finish = rdtsc(); >> + vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> >> + vmx_pt_rate; >> + } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); >> + >> + // >> + // Force L2 through Save and Restore cycle >> + // >> + GUEST_SYNC(1); >> + >> + l2_save_restore_done = 1; >> + >> + // >> + // Now wait for the preemption timer to fire and >> + // exit to L1 >> + // >> + while ((l2_vmx_pt_finish = rdtsc())) >> + ; >> +} >> + >> +void l1_guest_code(struct vmx_pages *vmx_pages) >> +{ >> +#define L2_GUEST_STACK_SIZE 64 >> + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; >> + u64 l1_vmx_pt_start; >> + u64 l1_vmx_pt_finish; >> + u64 l1_tsc_deadline, l2_tsc_deadline; >> + >> + GUEST_ASSERT(vmx_pages->vmcs_gpa); >> + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); >> + GUEST_ASSERT(load_vmcs(vmx_pages)); >> + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); >> + >> + prepare_vmcs(vmx_pages, l2_guest_code, >> + &l2_guest_stack[L2_GUEST_STACK_SIZE]); >> + >> + GUEST_ASSERT(!vmlaunch()); >> + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); >> + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); > > > Please use vmcs_read(VM_EXIT_INSTRUCTION_LEN) instead of 3. > >> + >> + // >> + // Check for Preemption timer support and turn on PIN control >> + // >> + basic.val = rdmsr(MSR_IA32_VMX_BASIC); >> + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS >> + : MSR_IA32_VMX_PINBASED_CTLS); >> + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS >> + : MSR_IA32_VMX_EXIT_CTLS); >> + >> + if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || >> + !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) >> + return; >> + > > > Why not do this check in the beginning of L1, before VMLAUNCH of L2 ? If > these two controls are not supported, the test is just void. > >> + GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, >> + vmreadz(PIN_BASED_VM_EXEC_CONTROL) | >> + PIN_BASED_VMX_PREEMPTION_TIMER)); >> + >> + GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, >> + PREEMPTION_TIMER_VALUE)); >> + >> + vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; >> + >> + l2_save_restore_done = 0; >> + >> + l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; >> + >> + GUEST_ASSERT(!vmresume()); >> + >> + l1_vmx_pt_finish = rdtsc(); >> + >> + // >> + // Ensure exit from L2 happens after L2 goes through >> + // save and restore >> + GUEST_ASSERT(l2_save_restore_done); >> + >> + // >> + // Ensure the exit from L2 is due to preemption timer expiry >> + // >> + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); > > > I am wondering if checking just the EXIT reason is enough to determine > that L2 has gone through a successful migration. > Yes. But I found it hugely helpful to pass the date to L0 for logging in case of failures. >> + >> + l1_tsc_deadline = l1_vmx_pt_start + >> + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); >> + >> + l2_tsc_deadline = l2_vmx_pt_start + >> + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); >> + >> + // >> + // Sync with the host and pass the l1|l2 pt_expiry_finish times and >> + // tsc deadlines so that host can verify they are as expected >> + // >> + GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, >> + l2_vmx_pt_finish, l2_tsc_deadline); >> +} >> + >> +void guest_code(struct vmx_pages *vmx_pages) >> +{ >> + if (vmx_pages) >> + l1_guest_code(vmx_pages); >> + >> + GUEST_DONE(); >> +} >> + >> +int main(int argc, char *argv[]) >> +{ >> + vm_vaddr_t vmx_pages_gva = 0; >> + >> + struct kvm_regs regs1, regs2; >> + struct kvm_vm *vm; >> + struct kvm_run *run; >> + struct kvm_x86_state *state; >> + struct ucall uc; >> + int stage; >> + >> + /* >> + * AMD currently does not implement any VMX features, so for now we >> + * just early out. >> + */ >> + nested_vmx_check_supported(); >> + >> + /* Create VM */ >> + vm = vm_create_default(VCPU_ID, 0, guest_code); >> + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); >> + run = vcpu_state(vm, VCPU_ID); >> + >> + vcpu_regs_get(vm, VCPU_ID, ®s1); >> + >> + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { >> + vcpu_alloc_vmx(vm, &vmx_pages_gva); >> + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); >> + } else { >> + pr_info("will skip nested state checks\n"); > > > This error message looks odd. Shouldn't it say something like, > > "nested state capability not available, skipping test" > >> + goto done; >> + } >> + >> + for (stage = 1;; stage++) { >> + _vcpu_run(vm, VCPU_ID); >> + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, >> + "Stage %d: unexpected exit reason: %u (%s),\n", >> + stage, run->exit_reason, >> + exit_reason_str(run->exit_reason)); >> + >> + switch (get_ucall(vm, VCPU_ID, &uc)) { >> + case UCALL_ABORT: >> + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], >> + __FILE__, uc.args[1]); >> + /* NOT REACHED */ >> + case UCALL_SYNC: >> + break; >> + case UCALL_DONE: >> + goto done; >> + default: >> + TEST_FAIL("Unknown ucall %lu", uc.cmd); >> + } >> + >> + /* UCALL_SYNC is handled here. */ >> + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && >> + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, >> got %lx", >> + stage, (ulong)uc.args[1]); >> + // >> + // If this stage 2 then we should verify the vmx pt expiry >> + // is as expected >> + // >> + if (stage == 2) { >> + >> + pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", >> + stage, uc.args[2], uc.args[3]); >> + >> + pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", >> + stage, uc.args[4], uc.args[5]); >> + >> + // >> + // From L1's perspective verify Preemption timer hasn't >> + // expired too early >> + // >> + >> + TEST_ASSERT(uc.args[2] >= uc.args[3], >> + "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", >> + stage, uc.args[2], uc.args[3]); >> + >> + // >> + // From L2's perspective verify Preemption timer hasn't >> + // expired too late >> + // >> + TEST_ASSERT(uc.args[4] < uc.args[5], >> + "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", >> + stage, uc.args[4], uc.args[5]); >> + } >> + > > > For readability, it's better to put a block comment here for the entire > save/restore operation as a whole. > > Also, this save/restore block should be placed before the above stage2 > block. > >> + state = vcpu_save_state(vm, VCPU_ID); >> + memset(®s1, 0, sizeof(regs1)); >> + vcpu_regs_get(vm, VCPU_ID, ®s1); >> + >> + kvm_vm_release(vm); >> + >> + /* Restore state in a new VM. */ >> + kvm_vm_restart(vm, O_RDWR); >> + vm_vcpu_add(vm, VCPU_ID); >> + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); >> + vcpu_load_state(vm, VCPU_ID, state); >> + run = vcpu_state(vm, VCPU_ID); >> + free(state); >> + >> + memset(®s2, 0, sizeof(regs2)); >> + vcpu_regs_get(vm, VCPU_ID, ®s2); >> + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), >> + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: >> %lx", >> + (ulong) regs2.rdi, (ulong) regs2.rsi); >> + } >> + >> +done: >> + kvm_vm_free(vm); >> +} >
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 60701178b9cc1..13dca545554dc 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -402,6 +402,7 @@ struct kvm_sync_regs { struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u64 preemption_timer_deadline; }; struct kvm_vmx_nested_state_hdr { diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 222e50104296a..f159718f90c0a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -10,6 +10,7 @@ /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test +/x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test /x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c66f4eec34111..780f0c189a7bc 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -20,6 +20,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e244c6ecfc1d5..919e161dd2893 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -314,6 +314,8 @@ void ucall_uninit(struct kvm_vm *vm); void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); +#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ + ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) #define GUEST_DONE() ucall(UCALL_DONE, 0) #define __GUEST_ASSERT(_condition, _nargs, _args...) do { \ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 7428513a4c687..7cb19eca6c72d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc) static inline uint64_t rdtsc(void) { uint32_t eax, edx; - + uint32_t tsc_val; /* * The lfence is to wait (on Intel CPUs) until all previous - * instructions have been executed. + * instructions have been executed. If software requires RDTSC to be + * executed prior to execution of any subsequent instruction, it can + * execute LFENCE immediately after RDTSC */ - __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); - return ((uint64_t)edx) << 32 | eax; + __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); + tsc_val = ((uint64_t)edx) << 32 | eax; + return tsc_val; } static inline uint64_t rdtscp(uint32_t *aux) diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 3d27069b9ed9c..ccff3e6e27048 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -575,6 +575,33 @@ struct vmx_pages { void *eptp; }; +union vmx_basic { + u64 val; + struct { + u32 revision; + u32 size:13, + reserved1:3, + width:1, + dual:1, + type:4, + insouts:1, + ctrl:1, + vm_entry_exception_ctrl:1, + reserved2:7; + }; +}; + +union vmx_ctrl_msr { + u64 val; + struct { + u32 set, clr; + }; +}; + +union vmx_basic basic; +union vmx_ctrl_msr ctrl_pin_rev; +union vmx_ctrl_msr ctrl_exit_rev; + struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c new file mode 100644 index 0000000000000..10893b11511be --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VMX-preemption timer test + * + * Copyright (C) 2020, Google, LLC. + * + * Test to ensure the VM-Enter after migration doesn't + * incorrectly restarts the timer with the full timer + * value instead of partially decayed timer value + * + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 5 +#define PREEMPTION_TIMER_VALUE 100000000ull +#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull + +u32 vmx_pt_rate; +bool l2_save_restore_done; +static u64 l2_vmx_pt_start; +volatile u64 l2_vmx_pt_finish; + +void l2_guest_code(void) +{ + u64 vmx_pt_delta; + + vmcall(); + l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; + + // + // Wait until the 1st threshold has passed + // + do { + l2_vmx_pt_finish = rdtsc(); + vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >> + vmx_pt_rate; + } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1); + + // + // Force L2 through Save and Restore cycle + // + GUEST_SYNC(1); + + l2_save_restore_done = 1; + + // + // Now wait for the preemption timer to fire and + // exit to L1 + // + while ((l2_vmx_pt_finish = rdtsc())) + ; +} + +void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 l1_vmx_pt_start; + u64 l1_vmx_pt_finish; + u64 l1_tsc_deadline, l2_tsc_deadline; + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); + + // + // Check for Preemption timer support and turn on PIN control + // + basic.val = rdmsr(MSR_IA32_VMX_BASIC); + ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS + : MSR_IA32_VMX_PINBASED_CTLS); + ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS + : MSR_IA32_VMX_EXIT_CTLS); + + if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) || + !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) + return; + + GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL, + vmreadz(PIN_BASED_VM_EXEC_CONTROL) | + PIN_BASED_VMX_PREEMPTION_TIMER)); + + GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE, + PREEMPTION_TIMER_VALUE)); + + vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F; + + l2_save_restore_done = 0; + + l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate; + + GUEST_ASSERT(!vmresume()); + + l1_vmx_pt_finish = rdtsc(); + + // + // Ensure exit from L2 happens after L2 goes through + // save and restore + GUEST_ASSERT(l2_save_restore_done); + + // + // Ensure the exit from L2 is due to preemption timer expiry + // + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER); + + l1_tsc_deadline = l1_vmx_pt_start + + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); + + l2_tsc_deadline = l2_vmx_pt_start + + (PREEMPTION_TIMER_VALUE << vmx_pt_rate); + + // + // Sync with the host and pass the l1|l2 pt_expiry_finish times and + // tsc deadlines so that host can verify they are as expected + // + GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline, + l2_vmx_pt_finish, l2_tsc_deadline); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + if (vmx_pages) + l1_guest_code(vmx_pages); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs1, regs2; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + /* + * AMD currently does not implement any VMX features, so for now we + * just early out. + */ + nested_vmx_check_supported(); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + vcpu_regs_get(vm, VCPU_ID, ®s1); + + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + } else { + pr_info("will skip nested state checks\n"); + goto done; + } + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + // + // If this stage 2 then we should verify the vmx pt expiry + // is as expected + // + if (stage == 2) { + + pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n", + stage, uc.args[2], uc.args[3]); + + pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n", + stage, uc.args[4], uc.args[5]); + + // + // From L1's perspective verify Preemption timer hasn't + // expired too early + // + + TEST_ASSERT(uc.args[2] >= uc.args[3], + "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)", + stage, uc.args[2], uc.args[3]); + + // + // From L2's perspective verify Preemption timer hasn't + // expired too late + // + TEST_ASSERT(uc.args[4] < uc.args[5], + "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)", + stage, uc.args[4], uc.args[5]); + } + + state = vcpu_save_state(vm, VCPU_ID); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +}