@@ -781,7 +781,7 @@ struct kvm_arch {
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
- spinlock_t pvclock_gtod_sync_lock;
+ spinlock_t masterclock_lock;
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
@@ -812,40 +812,41 @@ TRACE_EVENT(kvm_write_tsc_offset,
{VCLOCK_TSC, "tsc"} \
TRACE_EVENT(kvm_update_master_clock,
- TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
- TP_ARGS(use_master_clock, host_clock, offset_matched),
+ TP_PROTO(bool use_master_clock, bool host_clock_stable,
+ bool offset_matched),
+ TP_ARGS(use_master_clock, host_clock_stable, offset_matched),
TP_STRUCT__entry(
__field( bool, use_master_clock )
- __field( unsigned int, host_clock )
+ __field( bool, host_clock_stable )
__field( bool, offset_matched )
),
TP_fast_assign(
__entry->use_master_clock = use_master_clock;
- __entry->host_clock = host_clock;
+ __entry->host_clock_stable = host_clock_stable;
__entry->offset_matched = offset_matched;
),
- TP_printk("masterclock %d hostclock %s offsetmatched %u",
+ TP_printk("masterclock %d hostclock stable %u offsetmatched %u",
__entry->use_master_clock,
- __print_symbolic(__entry->host_clock, host_clocks),
+ __entry->host_clock_stable,
__entry->offset_matched)
);
TRACE_EVENT(kvm_track_tsc,
TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
unsigned int online_vcpus, bool use_master_clock,
- unsigned int host_clock),
+ bool host_clock_stable),
TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
- host_clock),
+ host_clock_stable),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
__field( unsigned int, nr_vcpus_matched_tsc )
__field( unsigned int, online_vcpus )
__field( bool, use_master_clock )
- __field( unsigned int, host_clock )
+ __field( bool, host_clock_stable )
),
TP_fast_assign(
@@ -853,14 +854,14 @@ TRACE_EVENT(kvm_track_tsc,
__entry->nr_vcpus_matched_tsc = nr_matched;
__entry->online_vcpus = online_vcpus;
__entry->use_master_clock = use_master_clock;
- __entry->host_clock = host_clock;
+ __entry->host_clock_stable = host_clock_stable;
),
- TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
- " hostclock %s",
+ TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u "
+ "hostclock stable %u",
__entry->vcpu_id, __entry->use_master_clock,
__entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
- __print_symbolic(__entry->host_clock, host_clocks))
+ __entry->host_clock_stable)
);
#endif /* CONFIG_X86_64 */
@@ -50,7 +50,7 @@
#include <linux/hash.h>
#include <linux/pci.h>
#include <linux/timekeeper_internal.h>
-#include <linux/pvclock_gtod.h>
+#include <linux/cs_notifier.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
@@ -1131,50 +1131,6 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return kvm_set_msr(vcpu, &msr);
}
-#ifdef CONFIG_X86_64
-struct pvclock_gtod_data {
- seqcount_t seq;
-
- struct { /* extract of a clocksource struct */
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
- } clock;
-
- u64 boot_ns;
- u64 nsec_base;
- u64 wall_time_sec;
-};
-
-static struct pvclock_gtod_data pvclock_gtod_data;
-
-static void update_pvclock_gtod(struct timekeeper *tk)
-{
- struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
- u64 boot_ns;
-
- boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
-
- write_seqcount_begin(&vdata->seq);
-
- /* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
- vdata->clock.mask = tk->tkr_mono.mask;
- vdata->clock.mult = tk->tkr_mono.mult;
- vdata->clock.shift = tk->tkr_mono.shift;
-
- vdata->boot_ns = boot_ns;
- vdata->nsec_base = tk->tkr_mono.xtime_nsec;
-
- vdata->wall_time_sec = tk->xtime_sec;
-
- write_seqcount_end(&vdata->seq);
-}
-#endif
-
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{
/*
@@ -1266,10 +1222,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
__func__, base_hz, scaled_hz, shift, *pmultiplier);
}
-#ifdef CONFIG_X86_64
-static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
-#endif
-
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
static unsigned long max_tsc_khz;
@@ -1358,12 +1310,32 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+#ifdef CONFIG_X86_64
+static bool clocksource_stable(void)
+{
+ return get_tk_mono_clock_mode() == VCLOCK_TSC;
+}
+
+static bool clocksource_stability_check(void)
+{
+ unsigned int seq;
+ const seqcount_t *s = get_tk_seq();
+ bool stable;
+
+ {
+ seq = read_seqcount_begin(s);
+ stable = clocksource_stable();
+ } while (unlikely(read_seqcount_retry(s, seq)));
+
+ return stable;
+}
+#endif
+
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
- bool vcpus_matched;
+ bool vcpus_matched, clocksource_stable;
struct kvm_arch *ka = &vcpu->kvm->arch;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&vcpu->kvm->online_vcpus));
@@ -1376,13 +1348,14 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
* and the vcpus need to have matched TSCs. When that happens,
* perform request to enable masterclock.
*/
+ clocksource_stable = clocksource_stability_check();
if (ka->use_master_clock ||
- (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
+ (clocksource_stable && vcpus_matched))
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
- atomic_read(&vcpu->kvm->online_vcpus),
- ka->use_master_clock, gtod->clock.vclock_mode);
+ atomic_read(&vcpu->kvm->online_vcpus),
+ ka->use_master_clock, clocksource_stable);
#endif
}
@@ -1535,7 +1508,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
- spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+ spin_lock(&kvm->arch.masterclock_lock);
if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0;
} else if (!already_matched) {
@@ -1543,7 +1516,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
}
kvm_track_tsc_matching(vcpu);
- spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
+ spin_unlock(&kvm->arch.masterclock_lock);
}
EXPORT_SYMBOL_GPL(kvm_write_tsc);
@@ -1563,99 +1536,41 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
}
#ifdef CONFIG_X86_64
-
-static u64 read_tsc(void)
+static bool kvm_get_host_time_and_cycles(s64 *kernel_ns, u64 *cycle_now,
+ u64 (*get_time)(u64 *cycle_now))
{
- u64 ret = (u64)rdtsc_ordered();
- u64 last = pvclock_gtod_data.clock.cycle_last;
+ unsigned int seq;
+ const seqcount_t *s = get_tk_seq();
+ bool stable;
- if (likely(ret >= last))
- return ret;
+ {
+ seq = read_seqcount_begin(s);
+ stable = clocksource_stable();
+ if (stable)
+ *kernel_ns = get_time(cycle_now);
+ } while (unlikely(read_seqcount_retry(s, seq)));
- /*
- * GCC likes to generate cmov here, but this branch is extremely
- * predictable (it's just a function of time and the likely is
- * very likely) and there's a data dependence, so force GCC
- * to generate a branch instead. I don't barrier() because
- * we don't actually need a barrier, and if this function
- * ever gets inlined it will generate worse code.
- */
- asm volatile ("");
- return last;
-}
-
-static inline u64 vgettsc(u64 *cycle_now)
-{
- long v;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
-
- *cycle_now = read_tsc();
-
- v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
- return v * gtod->clock.mult;
-}
-
-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- mode = gtod->clock.vclock_mode;
- ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
- ns >>= gtod->clock.shift;
- ns += gtod->boot_ns;
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
- *t = ns;
-
- return mode;
-}
-
-static int do_realtime(struct timespec *ts, u64 *cycle_now)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- mode = gtod->clock.vclock_mode;
- ts->tv_sec = gtod->wall_time_sec;
- ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
- ns >>= gtod->clock.shift;
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
-
- ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return mode;
+ return stable;
}
/* returns true if host is using tsc clocksource */
static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
{
- /* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
- return false;
-
- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+ return kvm_get_host_time_and_cycles(
+ kernel_ns, cycle_now, ktime_get_boot_ns_with_cycles);
}
/* returns true if host is using tsc clocksource */
-static bool kvm_get_walltime_and_clockread(struct timespec *ts,
- u64 *cycle_now)
+static bool kvm_get_walltime_and_clockread(struct timespec *ts, u64 *cycle_now)
{
- /* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
- return false;
+ bool res;
+ s64 kernel_ns;
- return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+ res = kvm_get_host_time_and_cycles(
+ &kernel_ns, cycle_now, ktime_get_real_ns_with_cycles);
+ *ts = ktime_to_timespec(kernel_ns);
+
+ return res;
}
#endif
@@ -1700,12 +1615,11 @@ static bool kvm_get_walltime_and_clockread(struct timespec *ts,
*
*/
-static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
+static void update_masterclock(struct kvm *kvm)
{
#ifdef CONFIG_X86_64
struct kvm_arch *ka = &kvm->arch;
- int vclock_mode;
- bool host_tsc_clocksource, vcpus_matched;
+ bool host_clocksource_stable, vcpus_matched;
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&kvm->online_vcpus));
@@ -1714,20 +1628,16 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
*/
- host_tsc_clocksource = kvm_get_time_and_clockread(
+ host_clocksource_stable = kvm_get_time_and_clockread(
&ka->master_kernel_ns,
&ka->master_cycle_now);
- ka->use_master_clock = host_tsc_clocksource && vcpus_matched
+ ka->use_master_clock = host_clocksource_stable && vcpus_matched
&& !backwards_tsc_observed
&& !ka->boot_vcpu_runs_old_kvmclock;
- if (ka->use_master_clock)
- atomic_set(&kvm_guest_has_master_clock, 1);
-
- vclock_mode = pvclock_gtod_data.clock.vclock_mode;
- trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
- vcpus_matched);
+ trace_kvm_update_master_clock(ka->use_master_clock,
+ host_clocksource_stable, vcpus_matched);
#endif
}
@@ -1743,10 +1653,10 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
struct kvm_vcpu *vcpu;
struct kvm_arch *ka = &kvm->arch;
- spin_lock(&ka->pvclock_gtod_sync_lock);
+ spin_lock(&ka->masterclock_lock);
kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */
- pvclock_update_vm_gtod_copy(kvm);
+ update_masterclock(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -1755,7 +1665,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->masterclock_lock);
#endif
}
@@ -1765,15 +1675,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
struct pvclock_vcpu_time_info hv_clock;
u64 ret;
- spin_lock(&ka->pvclock_gtod_sync_lock);
+ spin_lock(&ka->masterclock_lock);
if (!ka->use_master_clock) {
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->masterclock_lock);
return ktime_get_boot_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->masterclock_lock);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
@@ -1859,13 +1769,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
*/
- spin_lock(&ka->pvclock_gtod_sync_lock);
+ spin_lock(&ka->masterclock_lock);
use_master_clock = ka->use_master_clock;
if (use_master_clock) {
host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns;
}
- spin_unlock(&ka->pvclock_gtod_sync_lock);
+ spin_unlock(&ka->masterclock_lock);
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
@@ -6015,7 +5925,8 @@ static void kvm_set_mmio_spte_mask(void)
}
#ifdef CONFIG_X86_64
-static void pvclock_gtod_update_fn(struct work_struct *work)
+static int process_clocksource_change(struct notifier_block *nb,
+ unsigned long unused0, void *unused1)
{
struct kvm *kvm;
@@ -6026,35 +5937,13 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
- atomic_set(&kvm_guest_has_master_clock, 0);
spin_unlock(&kvm_lock);
-}
-
-static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
-
-/*
- * Notification about pvclock gtod data update.
- */
-static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
- void *priv)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- struct timekeeper *tk = priv;
-
- update_pvclock_gtod(tk);
-
- /* disable master clock if host does not trust, or does not
- * use, TSC clocksource
- */
- if (gtod->clock.vclock_mode != VCLOCK_TSC &&
- atomic_read(&kvm_guest_has_master_clock) != 0)
- queue_work(system_long_wq, &pvclock_gtod_work);
-
return 0;
}
-static struct notifier_block pvclock_gtod_notifier = {
- .notifier_call = pvclock_gtod_notify,
+
+static struct notifier_block clocksource_changes_notifier = {
+ .notifier_call = process_clocksource_change,
};
#endif
@@ -6107,7 +5996,7 @@ int kvm_arch_init(void *opaque)
kvm_lapic_init();
#ifdef CONFIG_X86_64
- pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+ clocksource_changes_register_notifier(&clocksource_changes_notifier);
#endif
return 0;
@@ -6128,7 +6017,7 @@ void kvm_arch_exit(void)
CPUFREQ_TRANSITION_NOTIFIER);
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
- pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+ clocksource_changes_unregister_notifier(&clocksource_changes_notifier);
#endif
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
@@ -8031,10 +7920,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
mutex_init(&kvm->arch.hyperv.hv_lock);
- spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+ spin_lock_init(&kvm->arch.masterclock_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
- pvclock_update_vm_gtod_copy(kvm);
+ update_masterclock(kvm);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
Thanks to a set of recently added timekeeper functions providing the cycles stamp along with the kernel time, now we have an ability to get time values right from the kerenl avoiding supporting a shadow copy of timekeeper data structures. This reduces overheads and complexity of the KVM code and makes time operations more clear. Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/trace.h | 27 +++-- arch/x86/kvm/x86.c | 259 ++++++++++++---------------------------- 3 files changed, 89 insertions(+), 199 deletions(-)