@@ -11,6 +11,7 @@
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
+/x86_64/tsc_offset_test
/x86_64/vmx_preemption_timer_test
/x86_64/svm_vmcall_test
/x86_64/sync_regs_test
@@ -55,6 +55,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_offset_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
@@ -65,4 +65,7 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
@@ -43,4 +43,9 @@ static inline bool cpu_has_svm(void)
return r.c & CPUID_SVM;
}
+static inline void vmmcall(void)
+{
+ __asm__ __volatile__("vmmcall");
+}
+
#endif /* SELFTEST_KVM_SVM_UTILS_H */
@@ -602,6 +602,8 @@ struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
+void generic_vmx_setup(struct vmx_pages *vmx, void *guest_rip,
+ void *guest_rsp);
bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
@@ -616,4 +618,11 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
+static inline bool cpu_has_vmx(void)
+{
+ struct cpuid r = raw_cpuid(1, 0);
+
+ return r.c & CPUID_VMX;
+}
+
#endif /* SELFTEST_KVM_VMX_H */
@@ -23,6 +23,7 @@
static void *align(void *x, size_t size)
{
size_t mask = size - 1;
+
TEST_ASSERT(size != 0 && !(size & (size - 1)),
"size not a power of 2: %lu", size);
return (void *) (((size_t) x + mask) & ~mask);
@@ -542,3 +542,14 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
+
+void generic_vmx_setup(struct vmx_pages *vmx_pages, void *guest_rip,
+ void *guest_rsp)
+{
+ GUEST_ASSERT(vmx_pages && vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ prepare_vmcs(vmx_pages, guest_rip, guest_rsp);
+}
+
new file mode 100644
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * TSC offset test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure that userspace control of the TSC offset field behaves as
+ * expected for both non-nested and nested guests.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#include "kselftest.h"
+
+#define L1_TSC_WRITE_VALUE 0
+#define L2_GUEST_STACK_SIZE 64
+#define L1_TSC_OFFSET (1ul << 48)
+#define L2_TSC_OFFSET -L1_TSC_OFFSET
+#define VCPU_ID 1
+
+bool vmx;
+
+void set_tsc_offset(struct kvm_vm *vm, u32 vcpuid, u64 val)
+{
+ vcpu_ioctl(vm, vcpuid, KVM_SET_TSC_OFFSET, &val);
+}
+
+void get_tsc_offset(struct kvm_vm *vm, u32 vcpuid, u64 *out)
+{
+ vcpu_ioctl(vm, vcpuid, KVM_GET_TSC_OFFSET, out);
+}
+
+void get_clock(struct kvm_vm *vm, struct kvm_clock_data *out)
+{
+ vm_ioctl(vm, KVM_GET_CLOCK, out);
+}
+
+/*
+ * Test that reading the TSC offset returns the previously written value.
+ */
+void set_get_tsc_offset_test(struct kvm_vm *vm, u32 vcpuid)
+{
+ u64 val;
+
+ set_tsc_offset(vm, vcpuid, L1_TSC_OFFSET);
+ get_tsc_offset(vm, vcpuid, &val);
+ TEST_ASSERT(val == L1_TSC_OFFSET,
+ "Expected %lu from GET_TSC_OFFSET but got %lu",
+ L1_TSC_OFFSET, val);
+}
+
+void check_value_bounds(const char *name, int stage, u64 low, u64 high, u64 val)
+{
+ TEST_ASSERT(low <= val && val <= high,
+ "Stage %d: expected %s value in the range [%lu, %lu] but got %lu",
+ stage, name, low, high, val);
+
+ /* only reached if passed */
+ pr_info("Stage %d: %s: %lu, expected range: [%lu, %lu]\n", stage, name,
+ val, low, high);
+}
+
+void check_value_bounds_signed(const char *name, int stage, s64 low, s64 high,
+ s64 val)
+{
+ TEST_ASSERT(low <= val && val <= high,
+ "Stage %d: expected %s value in the range [%ld, %ld] but got %ld",
+ stage, name, low, high, val);
+
+ /* only reached if passed */
+ pr_info("Stage %d: %s: %ld, expected range: [%ld, %ld]\n", stage, name,
+ val, low, high);
+}
+
+void check_value_bounds_overflow(const char *name, int stage, s64 low, s64 high,
+ s64 val)
+{
+ TEST_ASSERT(val <= low || val >= high,
+ "Stage %d: expected %s value outside the range [%ld, %ld] but got %ld",
+ stage, name, low, high, val);
+
+ pr_info("Stage %d: %s: %ld, expected range: [-MAX, %ld], [%ld, MAX]\n",
+ stage, name, val, low, high);
+}
+
+void generic_vmcall(void)
+{
+ if (vmx)
+ vmcall();
+ else
+ vmmcall();
+}
+
+void l2_main(void)
+{
+ /* Allow userspace to manipulate the TSC offset */
+ GUEST_SYNC(3);
+ GUEST_SYNC_ARGS(4, rdtsc(), 0, 0, 0);
+ generic_vmcall();
+}
+
+void l0_nested_setup(struct kvm_vm *vm, u32 vcpuid)
+{
+ vm_vaddr_t nested_pages = 0;
+
+ if (vmx)
+ vcpu_alloc_vmx(vm, &nested_pages);
+ else
+ vcpu_alloc_svm(vm, &nested_pages);
+
+ vcpu_args_set(vm, VCPU_ID, 1, nested_pages);
+}
+
+void l1_nested_setup(void *nested_pages, void *guest_stack)
+{
+ if (vmx)
+ generic_vmx_setup(nested_pages, l2_main, guest_stack);
+ else
+ generic_svm_setup(nested_pages, l2_main, guest_stack);
+}
+
+void l1_set_tsc_offset(void *nested_pages, u64 offset)
+{
+ if (vmx) {
+ GUEST_ASSERT(!vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_USE_TSC_OFFSETTING));
+
+ GUEST_ASSERT(!vmwrite(TSC_OFFSET, offset));
+ } else {
+ struct svm_test_data *svm = nested_pages;
+
+ svm->vmcb->control.tsc_offset = offset;
+ /* Mark the TSC offset field as dirty */
+ svm->vmcb->control.clean &= ~1u;
+ }
+
+}
+
+void l1_enter_guest(void *nested_pages)
+{
+ if (vmx) {
+ /* We only enter L2 once, hence VMLAUNCH */
+ GUEST_ASSERT(!vmlaunch());
+ } else {
+ struct svm_test_data *svm = nested_pages;
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ }
+}
+
+void l1_assert_exit_vmcall(void *nested_pages)
+{
+ if (vmx) {
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = nested_pages;
+
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
+}
+
+void l1_main(void *nested_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ bool nested;
+
+ /*
+ * Guest doesn't share memory with userspace, determine VMX presence
+ * inside guest.
+ */
+ vmx = cpu_has_vmx();
+ nested = vmx || cpu_has_svm();
+
+ if (nested) {
+ l1_nested_setup(nested_pages,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ l1_set_tsc_offset(nested_pages, L2_TSC_OFFSET);
+ }
+
+ GUEST_SYNC_ARGS(1, rdtsc(), 0, 0, 0);
+
+ wrmsr(MSR_IA32_TSC, L1_TSC_WRITE_VALUE);
+ GUEST_SYNC(2);
+
+ if (!nested)
+ GUEST_DONE();
+
+ l1_enter_guest(nested_pages);
+ l1_assert_exit_vmcall(nested_pages);
+
+ GUEST_SYNC_ARGS(5, rdtsc(), 0, 0, 0);
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ u64 start, stop, exp_low, exp_high;
+ struct kvm_clock_data clock_data;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool nested;
+ int stage;
+
+ if (!kvm_check_cap(KVM_CAP_TSC_OFFSET) ||
+ !kvm_check_cap(KVM_CAP_ADJUST_CLOCK)) {
+ pr_info("will skip tsc offset tests\n");
+ return 0;
+ }
+
+ /*
+ * Nested virtualization is not explicitly required for this test, but
+ * gates the L2 tests.
+ */
+ vmx = nested_vmx_supported();
+ nested = vmx || nested_svm_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_main);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ if (nested)
+ l0_nested_setup(vm, VCPU_ID);
+
+ set_get_tsc_offset_test(vm, VCPU_ID);
+
+ for (stage = 1;; stage++) {
+ start = rdtsc();
+ _vcpu_run(vm, VCPU_ID);
+ stop = rdtsc();
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s)\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto stage6;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /*
+ * Check that the guest's TSC value falls between expected
+ * bounds, considering the written TSC offset.
+ */
+ if (stage == 1) {
+ exp_low = start + L1_TSC_OFFSET;
+ exp_high = stop + L1_TSC_OFFSET;
+
+ check_value_bounds("L1 TSC", stage, exp_low, exp_high,
+ uc.args[2]);
+
+ /*
+ * KVM interprets writes to the TSC within a second of
+ * elapsed time as an attempt to synchronize TSCs. In
+ * order to get a TSC offset within expected bounds for
+ * stage 2, we must sleep for a second to avoid such
+ * handling of the TSC write.
+ */
+ sleep(1);
+ /*
+ * Check that guest writes to the TSC result in a TSC offset
+ * value between the expected bounds, considering the original
+ * TSC offset value.
+ */
+ } else if (stage == 2) {
+ s64 tsc_offset, low, high;
+
+ low = L1_TSC_WRITE_VALUE - stop;
+ high = L1_TSC_WRITE_VALUE - start;
+
+ get_tsc_offset(vm, VCPU_ID, (u64 *) &tsc_offset);
+
+ /*
+ * It is possible (though highly unlikely) that the
+ * host's TSC crosses 2^63 ticks while we are running
+ * the guest. In this case, the lower bound on the TSC
+ * offset will have wrapped around into the positive
+ * domain. In this case, we must instead assert that the
+ * observed value exists outside of the range (high,
+ * low).
+ */
+ if (low > high) {
+ /*
+ * Swap low and high such that the variable
+ * names correctly imply their value.
+ */
+ swap(low, high);
+ check_value_bounds_overflow("L1 TSC offset",
+ stage, low, high,
+ tsc_offset);
+ } else {
+ check_value_bounds_signed("L1 TSC offset",
+ stage, low, high,
+ tsc_offset);
+ }
+
+ /*
+ * Write the TSC offset while in guest mode
+ */
+ } else if (nested && stage == 3) {
+ set_tsc_offset(vm, VCPU_ID, L1_TSC_OFFSET);
+
+ /*
+ * Check that the write to TSC offset affects L2's perception of
+ * the TSC
+ */
+ } else if (nested && stage == 4) {
+ exp_low = start + L1_TSC_OFFSET + L2_TSC_OFFSET;
+ exp_high = stop + L1_TSC_OFFSET + L2_TSC_OFFSET;
+
+ check_value_bounds("L2 TSC", stage, exp_low, exp_high,
+ uc.args[2]);
+
+ /*
+ * Check that the modified TSC offset is also observed in L1
+ */
+ } else if (nested && stage == 5) {
+ exp_low = start + L1_TSC_OFFSET;
+ exp_high = stop + L1_TSC_OFFSET;
+
+ check_value_bounds("L1 TSC", stage, exp_low, exp_high,
+ uc.args[2]);
+ } else {
+ TEST_FAIL("Unexpected stage %d\n", stage);
+ }
+ }
+
+ /*
+ * Check that KVM sets the KVM_CLOCK_TSC_STABLE flag when vCPUs have an
+ * equivalent TSC offset.
+ */
+stage6:
+ vm_vcpu_add_default(vm, VCPU_ID + 1, NULL);
+ vcpu_set_cpuid(vm, VCPU_ID + 1, kvm_get_supported_cpuid());
+
+ set_tsc_offset(vm, VCPU_ID, L1_TSC_OFFSET),
+ set_tsc_offset(vm, VCPU_ID + 1, L1_TSC_OFFSET);
+ get_clock(vm, &clock_data);
+
+ TEST_ASSERT(clock_data.flags & KVM_CLOCK_TSC_STABLE,
+ "Stage 6: expected KVM_CLOCK_TSC_STABLE (%#x) flag to be set but got %#x",
+ KVM_CLOCK_TSC_STABLE, clock_data.flags);
+
+ pr_info("Stage 6: clock_data.flags = %#x, expected KVM_CLOCK_TSC_STABLE (%#x) flag\n",
+ clock_data.flags, KVM_CLOCK_TSC_STABLE);
+
+ kvm_vm_free(vm);
+ return 0;
+}