@@ -822,6 +822,10 @@ apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+apicinterrupt HYPERV_REENLIGHTENMENT_VECTOR hyperv_reenlightenment_intr smp_hyperv_reenlightenment_intr
+#endif
+
/*
* Exception entry points.
*/
@@ -101,6 +101,76 @@ static int hv_cpu_init(unsigned int cpu)
return 0;
}
+static void (*hv_reenlightenment_cb)(void);
+
+static void hv_reenlightenment_notify(struct work_struct *dummy)
+{
+ if (hv_reenlightenment_cb)
+ hv_reenlightenment_cb();
+}
+static DECLARE_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
+
+void hyperv_stop_tsc_emulation(void)
+{
+ u64 freq;
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ emu_status.inprogress = 0;
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+ tsc_khz = freq/1000;
+}
+EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
+
+void register_hv_tsc_update(void (*cb)(void))
+{
+ struct hv_reenlightenment_control re_ctrl = {
+ .vector = HYPERV_REENLIGHTENMENT_VECTOR,
+ .enabled = 1,
+ .target_vp = hv_vp_index[smp_processor_id()]
+ };
+ struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
+
+ if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
+ return;
+
+ hv_reenlightenment_cb = cb;
+
+ /* Make sure callback is registered before we write to MSRs */
+ wmb();
+
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
+}
+EXPORT_SYMBOL_GPL(register_hv_tsc_update);
+
+void unregister_hv_tsc_update(void)
+{
+ struct hv_reenlightenment_control re_ctrl;
+
+ if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
+ return;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+ re_ctrl.enabled = 0;
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+
+ hv_reenlightenment_cb = NULL;
+}
+EXPORT_SYMBOL_GPL(unregister_hv_tsc_update);
+
+asmlinkage __visible void
+__irq_entry smp_hyperv_reenlightenment_intr(struct pt_regs *regs)
+{
+ entering_ack_irq();
+
+ schedule_work(&hv_reenlightenment_work);
+
+ exiting_irq();
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -54,3 +54,7 @@ BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
#endif
#endif
+
+#if IS_ENABLED(CONFIG_HYPERV)
+BUILD_INTERRUPT(hyperv_reenlightenment_intr, HYPERV_REENLIGHTENMENT_VECTOR)
+#endif
@@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void kvm_posted_intr_nested_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
+extern asmlinkage void hyperv_reenlightenment_intr(void);
extern asmlinkage void spurious_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
@@ -103,7 +103,12 @@
#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
-#define LOCAL_TIMER_VECTOR 0xee
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
+#endif
+
+#define LOCAL_TIMER_VECTOR 0xed
#define NR_VECTORS 256
@@ -314,11 +314,19 @@ void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
+
+asmlinkage void smp_hyperv_reenlightenment_intr(struct pt_regs *regs);
+void register_hv_tsc_update(void (*cb)(void));
+void unregister_hv_tsc_update(void);
+void hyperv_stop_tsc_emulation(void);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline bool hv_is_hypercall_page_setup(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
+static inline void register_hv_tsc_update(void (*cb)(void)) {}
+static inline void unregister_hv_tsc_update(void) {}
+static inline void hyperv_stop_tsc_emulation(void) {};
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
@@ -40,6 +40,9 @@
*/
#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11)
+/* AccessReenlightenmentControls privilege */
+#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+
/*
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
@@ -234,6 +237,30 @@
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+/* TSC emulation after migration */
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+
+struct hv_reenlightenment_control {
+ u64 vector:8;
+ u64 reserved1:8;
+ u64 enabled:1;
+ u64 reserved2:15;
+ u64 target_vp:32;
+};
+
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+
+struct hv_tsc_emulation_control {
+ u64 enabled:1;
+ u64 reserved:63;
+};
+
+struct hv_tsc_emulation_status {
+ u64 inprogress:1;
+ u64 reserved:63;
+};
+
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
@@ -140,6 +140,9 @@ static const __initdata struct idt_data apic_idts[] = {
# ifdef CONFIG_IRQ_WORK
INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
# endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ INTG(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_intr),
+#endif
INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
INTG(ERROR_APIC_VECTOR, error_interrupt),
#endif
Hyper-V supports Live Migration notification. This is supposed to be used in conjunction with TSC emulation: when we are migrated to a host with different TSC frequency for some short period host emulates our accesses to TSC and sends us an interrupt to notify about the event. When we're done updating everything we can disable TSC emulation and everything will start working fast again. We didn't need these notifications before as Hyper-V guests are not supposed to use TSC as a clocksource: in Linux we even mark it as unstable on boot. Guests normally use 'tsc page' clocksouce and host updates its values on migrations automatically. Things change when we want to run nested virtualization: even when we pass through PV clocksources (kvm-clock or tsc page) to our guests we need to know TSC frequency and when it changes. Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The right one to check is EAX:BIT(13) of CPUID:0x40000003. I was assured that the fix in on the way. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> --- arch/x86/entry/entry_64.S | 4 +++ arch/x86/hyperv/hv_init.c | 70 ++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/entry_arch.h | 4 +++ arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/irq_vectors.h | 7 +++- arch/x86/include/asm/mshyperv.h | 8 +++++ arch/x86/include/uapi/asm/hyperv.h | 27 +++++++++++++++ arch/x86/kernel/idt.c | 3 ++ 8 files changed, 123 insertions(+), 1 deletion(-)