@@ -822,6 +822,19 @@ config KVM_GUEST
underlying device model, the host provides the guest with
timing infrastructure such as time of day, and system time
+config KVM_VIRT_SUSPEND_TIMING_GUEST
+ bool "Virtual suspend time injection (guest side)"
+ depends on KVM_GUEST
+ default n
+ help
+ This option makes the host's suspension reflected on the guest's clocks.
+ In other words, guest's CLOCK_MONOTONIC will stop and
+ CLOCK_BOOTTIME keeps running during the host's suspension.
+ This feature will only be effective when both guest and host enable
+ this option.
+
+ If unsure, say N.
+
config ARCH_CPUIDLE_HALTPOLL
def_bool n
prompt "Disable host haltpoll when loading haltpoll driver"
@@ -671,6 +671,10 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
#endif
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+DECLARE_IDTENTRY_SYSVEC(VIRT_SUSPEND_TIMING_VECTOR, sysvec_virtual_suspend_time);
+#endif
+
#if IS_ENABLED(CONFIG_HYPERV)
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
@@ -16,6 +16,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+u64 kvm_get_suspend_time(void);
+#else
+static inline u64 kvm_get_suspend_time(void)
+{
+ return 0;
+}
+#endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST */
+
#define KVM_HYPERCALL \
ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
@@ -16,11 +16,15 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/set_memory.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
#include <asm/kvmclock.h>
+#include <asm/desc.h>
+#include <asm/idtentry.h>
static int kvmclock __initdata = 1;
static int kvmclock_vsyscall __initdata = 1;
@@ -48,6 +52,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
static struct pvclock_vsyscall_time_info
hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+static struct kvm_suspend_time suspend_time __bss_decrypted;
+#endif
static struct pvclock_wall_clock wall_clock __bss_decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static struct pvclock_vsyscall_time_info *hvclock_mem;
@@ -163,6 +170,18 @@ static int kvm_cs_enable(struct clocksource *cs)
return 0;
}
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+/*
+ * kvm_get_suspend_time - This function returns total time passed during
+ * the host was in a suspend state while this guest was running.
+ * (Not a duration of the last host suspension but cumulative time.)
+ */
+u64 kvm_get_suspend_time(void)
+{
+ return suspend_time.suspend_time_ns;
+}
+#endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST */
+
struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
@@ -290,6 +309,18 @@ static int kvmclock_setup_percpu(unsigned int cpu)
return p ? 0 : -ENOMEM;
}
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+void timekeeping_inject_virtual_suspend_time(void);
+DEFINE_IDTENTRY_SYSVEC(sysvec_virtual_suspend_time)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ timekeeping_inject_virtual_suspend_time();
+
+ set_irq_regs(old_regs);
+}
+#endif
+
void __init kvmclock_init(void)
{
u8 flags;
@@ -304,6 +335,15 @@ void __init kvmclock_init(void)
return;
}
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+ if (kvm_para_has_feature(KVM_FEATURE_HOST_SUSPEND_TIME)) {
+ alloc_intr_gate(VIRT_SUSPEND_TIMING_VECTOR, asm_sysvec_virtual_suspend_time);
+ /* Register the suspend time structure */
+ wrmsrl(MSR_KVM_HOST_SUSPEND_TIME,
+ slow_virt_to_phys(&suspend_time) | KVM_MSR_ENABLED);
+ }
+#endif
+
if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu",
kvmclock_setup_percpu, NULL) < 0) {
return;
@@ -124,6 +124,10 @@ struct timekeeper {
u32 ntp_err_mult;
/* Flag used to avoid updating NTP twice with same second */
u32 skip_second_overflow;
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+ /* suspend_time_injected keeps the duration injected through kvm */
+ u64 suspend_time_injected;
+#endif
#ifdef CONFIG_DEBUG_TIMEKEEPING
long last_warning;
/*
@@ -2125,6 +2125,39 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
return offset;
}
+#ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING_GUEST
+/*
+ * timekeeping_inject_virtual_suspend_time - Inject virtual suspend time
+ * when requested by the kvm host.
+ * This function should be called under irq context.
+ */
+void timekeeping_inject_virtual_suspend_time(void)
+{
+ /*
+ * Only updates shadow_timekeeper so the change will be reflected
+ * on the next call of timekeeping_advance().
+ */
+ struct timekeeper *tk = &shadow_timekeeper;
+ unsigned long flags;
+ struct timespec64 delta;
+ u64 suspend_time;
+
+ raw_spin_lock_irqsave(&timekeeper_lock, flags);
+ suspend_time = kvm_get_suspend_time();
+ if (suspend_time > tk->suspend_time_injected) {
+ /*
+ * Do injection only if the time is not injected yet.
+ * suspend_time and tk->suspend_time_injected values are
+ * cummrative, so take a diff and inject the duration.
+ */
+ delta = ns_to_timespec64(suspend_time - tk->suspend_time_injected);
+ __timekeeping_inject_sleeptime(tk, &delta);
+ tk->suspend_time_injected = suspend_time;
+ }
+ raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+}
+#endif
+
/*
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
This patch implements virtual suspend time injection support for kvm guests. If this functionality is enabled and the host supports KVM_FEATURE_HOST_SUSPEND_TIME, the guest will register struct kvm_host_suspend_time through MSR to get how much time the guest spend during the host's suspension. Host will notify the update on the structure (which happens if the host went into suspension while the guest was running) through the irq and the irq will trigger the adjustment of CLOCK_BOOTTIME inside a guest. Before this patch, there was no way to adjust the CLOCK_BOOTTIME without actually suspending the kernel. However, some guest applications rely on the fact that there will be some difference between CLOCK_BOOTTIME and CLOCK_MONOTONIC after the suspention of the execution and they will be broken if we just pausing the guest instead of actually suspending them. Pausing the guest kernels is one solution to solve the problem, but if we could adjust the clocks without actually suspending them, we can reduce the overhead of guest's suspend/resume cycles on every host's suspensions. So this change will be useful for the devices which experience suspend/resume frequently. Signed-off-by: Hikaru Nishida <hikalium@chromium.org> --- arch/x86/Kconfig | 13 ++++++++++ arch/x86/include/asm/idtentry.h | 4 +++ arch/x86/include/asm/kvm_para.h | 9 +++++++ arch/x86/kernel/kvmclock.c | 40 +++++++++++++++++++++++++++++ include/linux/timekeeper_internal.h | 4 +++ kernel/time/timekeeping.c | 33 ++++++++++++++++++++++++ 6 files changed, 103 insertions(+)