@@ -2793,19 +2793,20 @@ static void kvm_update_masterclock(struct kvm *kvm)
kvm_end_pvclock_update(kvm);
}
-u64 get_kvmclock_ns(struct kvm *kvm)
+static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
{
struct kvm_arch *ka = &kvm->arch;
struct pvclock_vcpu_time_info hv_clock;
unsigned long flags;
- u64 ret;
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) {
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
- return get_kvmclock_base_ns() + ka->kvmclock_offset;
+ data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
+ return;
}
+ data->flags |= KVM_CLOCK_TSC_STABLE;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
@@ -2817,13 +2818,26 @@ u64 get_kvmclock_ns(struct kvm *kvm)
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
- ret = __pvclock_read_cycles(&hv_clock, rdtsc());
- } else
- ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
+ data->clock = __pvclock_read_cycles(&hv_clock, rdtsc());
+ } else {
+ data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
+ }
put_cpu();
+}
- return ret;
+u64 get_kvmclock_ns(struct kvm *kvm)
+{
+ struct kvm_clock_data data;
+
+ /*
+ * Zero flags as it's accessed RMW, leave everything else uninitialized
+ * as clock is always written and no other fields are consumed.
+ */
+ data.flags = 0;
+
+ get_kvmclock(kvm, &data);
+ return data.clock;
}
static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
@@ -5832,13 +5846,9 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp)
{
struct kvm_clock_data data;
- u64 now_ns;
-
- now_ns = get_kvmclock_ns(kvm);
- user_ns.clock = now_ns;
- user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
- memset(&user_ns.pad, 0, sizeof(user_ns.pad));
+ memset(&data, 0, sizeof(data));
+ get_kvmclock(kvm, &data);
if (copy_to_user(argp, &data, sizeof(data)))
return -EFAULT;
Sean noticed that KVM_GET_CLOCK was checking kvm_arch.use_master_clock outside of the pvclock sync lock. This is problematic, as the clock value written to the user may or may not actually correspond to a stable TSC. Fix the race by populating the entire kvm_clock_data structure behind the pvclock_gtod_sync_lock. Suggested-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Oliver Upton <oupton@google.com> --- arch/x86/kvm/x86.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-)