diff mbox

[3/6] x86/hyper-v: reenlightenment notifications support

Message ID 20171208105000.25116-4-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vitaly Kuznetsov Dec. 8, 2017, 10:49 a.m. UTC
Hyper-V supports Live Migration notification. This is supposed to be used
in conjunction with TSC emulation: when we are migrated to a host with
different TSC frequency for some short period host emulates our accesses
to TSC and sends us an interrupt to notify about the event. When we're
done updating everything we can disable TSC emulation and everything will
start working fast again.

We didn't need these notifications before as Hyper-V guests are not
supposed to use TSC as a clocksource: in Linux we even mark it as unstable
on boot. Guests normally use 'tsc page' clocksouce and host updates its
values on migrations automatically.

Things change when we want to run nested virtualization: even when we pass
through PV clocksources (kvm-clock or tsc page) to our guests we need to
know TSC frequency and when it changes.

Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies
EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The
right one to check is EAX:BIT(13) of CPUID:0x40000003. I was assured that
the fix in on the way.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
RFC -> v1:
- #include <asm/apic.h> [kbuild test robot]
- use div64_u64() [kbuild test robot]
- DECLARE_WORK -> DECLARE_DELAYED_WORK as testing showed there's some bug
  in Hyper-V hypervisor and disabling emulation after receiving interrupt
  may screw TSC counters.
---
 arch/x86/entry/entry_64.S          |  4 +++
 arch/x86/hyperv/hv_init.c          | 71 ++++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/entry_arch.h  |  4 +++
 arch/x86/include/asm/hw_irq.h      |  1 +
 arch/x86/include/asm/irq_vectors.h |  7 +++-
 arch/x86/include/asm/mshyperv.h    |  8 +++++
 arch/x86/include/uapi/asm/hyperv.h | 27 +++++++++++++++
 arch/x86/kernel/idt.c              |  3 ++
 8 files changed, 124 insertions(+), 1 deletion(-)

Comments

Roman Kagan Dec. 8, 2017, 6:10 p.m. UTC | #1
On Fri, Dec 08, 2017 at 11:49:57AM +0100, Vitaly Kuznetsov wrote:
> Hyper-V supports Live Migration notification. This is supposed to be used
> in conjunction with TSC emulation: when we are migrated to a host with
> different TSC frequency for some short period host emulates our accesses
> to TSC and sends us an interrupt to notify about the event. When we're
> done updating everything we can disable TSC emulation and everything will
> start working fast again.
> 
> We didn't need these notifications before as Hyper-V guests are not
> supposed to use TSC as a clocksource: in Linux we even mark it as unstable
> on boot. Guests normally use 'tsc page' clocksouce and host updates its
> values on migrations automatically.
> 
> Things change when we want to run nested virtualization: even when we pass
> through PV clocksources (kvm-clock or tsc page) to our guests we need to
> know TSC frequency and when it changes.
> 
> Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies
> EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The
> right one to check is EAX:BIT(13) of CPUID:0x40000003. I was assured that
> the fix in on the way.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
> RFC -> v1:
> - #include <asm/apic.h> [kbuild test robot]
> - use div64_u64() [kbuild test robot]
> - DECLARE_WORK -> DECLARE_DELAYED_WORK as testing showed there's some bug
>   in Hyper-V hypervisor and disabling emulation after receiving interrupt
>   may screw TSC counters.

Looks kinda fragile...

> ---
>  arch/x86/entry/entry_64.S          |  4 +++
>  arch/x86/hyperv/hv_init.c          | 71 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/include/asm/entry_arch.h  |  4 +++
>  arch/x86/include/asm/hw_irq.h      |  1 +
>  arch/x86/include/asm/irq_vectors.h |  7 +++-
>  arch/x86/include/asm/mshyperv.h    |  8 +++++
>  arch/x86/include/uapi/asm/hyperv.h | 27 +++++++++++++++
>  arch/x86/kernel/idt.c              |  3 ++
>  8 files changed, 124 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index f81d50d7ceac..a32730b260bc 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -826,6 +826,10 @@ apicinterrupt SPURIOUS_APIC_VECTOR		spurious_interrupt		smp_spurious_interrupt
>  apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
>  #endif
>  
> +#if IS_ENABLED(CONFIG_HYPERV)
> +apicinterrupt HYPERV_REENLIGHTENMENT_VECTOR	hyperv_reenlightenment_intr	smp_hyperv_reenlightenment_intr
> +#endif
> +
>  /*
>   * Exception entry points.
>   */
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 1a6c63f721bc..bb62839ede81 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -18,6 +18,7 @@
>   */
>  
>  #include <linux/types.h>
> +#include <asm/apic.h>
>  #include <asm/hypervisor.h>
>  #include <asm/hyperv.h>
>  #include <asm/mshyperv.h>
> @@ -102,6 +103,76 @@ static int hv_cpu_init(unsigned int cpu)
>  	return 0;
>  }
>  
> +static void (*hv_reenlightenment_cb)(void);
> +
> +static void hv_reenlightenment_notify(struct work_struct *dummy)
> +{
> +	if (hv_reenlightenment_cb)
> +		hv_reenlightenment_cb();
> +}
> +static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
> +
> +void hyperv_stop_tsc_emulation(void)
> +{
> +	u64 freq;
> +	struct hv_tsc_emulation_status emu_status;
> +
> +	rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
> +	emu_status.inprogress = 0;
> +	wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
> +
> +	rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);

IIRC the availability of this msr is not guaranteed (I guess in reality
it's present if the reenlightenment is supported, but I'd rather check).

> +	tsc_khz = div64_u64(freq, 1000);
> +}
> +EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
> +
> +void register_hv_tsc_update(void (*cb)(void))
> +{

The function name seems unfortunate.  IMHO such a name suggests
registering a callback on a notifier chain (rather than unconditionally
replacing the old callback), and having no other side effects.

> +	struct hv_reenlightenment_control re_ctrl = {
> +		.vector = HYPERV_REENLIGHTENMENT_VECTOR,
> +		.enabled = 1,
> +		.target_vp = hv_vp_index[smp_processor_id()]
> +	};
> +	struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
> +
> +	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
> +		return;

What happens then?  L2 guests keep running with their clocks ticking at
a different speed?

> +
> +	hv_reenlightenment_cb = cb;
> +
> +	/* Make sure callback is registered before we write to MSRs */
> +	wmb();
> +
> +	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
> +	wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
> +}
> +EXPORT_SYMBOL_GPL(register_hv_tsc_update);
> +
> +void unregister_hv_tsc_update(void)
> +{
> +	struct hv_reenlightenment_control re_ctrl;
> +
> +	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
> +		return;
> +
> +	rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
> +	re_ctrl.enabled = 0;
> +	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
> +
> +	hv_reenlightenment_cb = NULL;
> +}
> +EXPORT_SYMBOL_GPL(unregister_hv_tsc_update);
> +
> +asmlinkage __visible void
> +__irq_entry smp_hyperv_reenlightenment_intr(struct pt_regs *regs)
> +{
> +	entering_ack_irq();
> +
> +	schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
> +
> +	exiting_irq();
> +}
> +
>  /*
>   * This function is to be invoked early in the boot sequence after the
>   * hypervisor has been detected.
> diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
> index 416422762845..eb936cc49b62 100644
> --- a/arch/x86/include/asm/entry_arch.h
> +++ b/arch/x86/include/asm/entry_arch.h
> @@ -54,3 +54,7 @@ BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
>  BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
>  #endif
>  #endif
> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +BUILD_INTERRUPT(hyperv_reenlightenment_intr, HYPERV_REENLIGHTENMENT_VECTOR)
> +#endif
> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> index 2851077b6051..c65193dac7d9 100644
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
>  extern asmlinkage void kvm_posted_intr_nested_ipi(void);
>  extern asmlinkage void error_interrupt(void);
>  extern asmlinkage void irq_work_interrupt(void);
> +extern asmlinkage void hyperv_reenlightenment_intr(void);
>  
>  extern asmlinkage void spurious_interrupt(void);
>  extern asmlinkage void thermal_interrupt(void);
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 67421f649cfa..e71c1120426b 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -103,7 +103,12 @@
>  #endif
>  
>  #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
> -#define LOCAL_TIMER_VECTOR		0xee
> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
> +#endif
> +
> +#define LOCAL_TIMER_VECTOR		0xed
>  
>  #define NR_VECTORS			 256
>  
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index a0b34994f453..43164b097585 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -314,11 +314,19 @@ void hyper_alloc_mmu(void);
>  void hyperv_report_panic(struct pt_regs *regs, long err);
>  bool hv_is_hypercall_page_setup(void);
>  void hyperv_cleanup(void);
> +
> +asmlinkage void smp_hyperv_reenlightenment_intr(struct pt_regs *regs);
> +void register_hv_tsc_update(void (*cb)(void));
> +void unregister_hv_tsc_update(void);
> +void hyperv_stop_tsc_emulation(void);
>  #else /* CONFIG_HYPERV */
>  static inline void hyperv_init(void) {}
>  static inline bool hv_is_hypercall_page_setup(void) { return false; }
>  static inline void hyperv_cleanup(void) {}
>  static inline void hyperv_setup_mmu_ops(void) {}
> +static inline void register_hv_tsc_update(void (*cb)(void)) {}
> +static inline void unregister_hv_tsc_update(void) {}
> +static inline void hyperv_stop_tsc_emulation(void) {};
>  #endif /* CONFIG_HYPERV */
>  
>  #ifdef CONFIG_HYPERV_TSCPAGE
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index 1a5bfead93b4..197c2e6c7376 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -40,6 +40,9 @@
>   */
>  #define HV_X64_ACCESS_FREQUENCY_MSRS		(1 << 11)
>  
> +/* AccessReenlightenmentControls privilege */
> +#define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
> +
>  /*
>   * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
>   * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
> @@ -234,6 +237,30 @@
>  #define HV_X64_MSR_CRASH_PARAMS		\
>  		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
>  
> +/* TSC emulation after migration */
> +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
> +
> +struct hv_reenlightenment_control {
> +	u64 vector:8;
> +	u64 reserved1:8;
> +	u64 enabled:1;
> +	u64 reserved2:15;
> +	u64 target_vp:32;
> +};
> +
> +#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
> +#define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
> +
> +struct hv_tsc_emulation_control {
> +	u64 enabled:1;
> +	u64 reserved:63;
> +};
> +
> +struct hv_tsc_emulation_status {
> +	u64 inprogress:1;
> +	u64 reserved:63;
> +};
> +
>  #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
> index d985cef3984f..5b8512d48aa3 100644
> --- a/arch/x86/kernel/idt.c
> +++ b/arch/x86/kernel/idt.c
> @@ -140,6 +140,9 @@ static const __initdata struct idt_data apic_idts[] = {
>  # ifdef CONFIG_IRQ_WORK
>  	INTG(IRQ_WORK_VECTOR,		irq_work_interrupt),
>  # endif
> +#if IS_ENABLED(CONFIG_HYPERV)
> +	INTG(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_intr),
> +#endif
>  	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
>  	INTG(ERROR_APIC_VECTOR,		error_interrupt),
>  #endif


Roman.
Vitaly Kuznetsov Dec. 11, 2017, 9:56 a.m. UTC | #2
Roman Kagan <rkagan@virtuozzo.com> writes:

> On Fri, Dec 08, 2017 at 11:49:57AM +0100, Vitaly Kuznetsov wrote:
>> Hyper-V supports Live Migration notification. This is supposed to be used
>> in conjunction with TSC emulation: when we are migrated to a host with
>> different TSC frequency for some short period host emulates our accesses
>> to TSC and sends us an interrupt to notify about the event. When we're
>> done updating everything we can disable TSC emulation and everything will
>> start working fast again.
>> 
>> We didn't need these notifications before as Hyper-V guests are not
>> supposed to use TSC as a clocksource: in Linux we even mark it as unstable
>> on boot. Guests normally use 'tsc page' clocksouce and host updates its
>> values on migrations automatically.
>> 
>> Things change when we want to run nested virtualization: even when we pass
>> through PV clocksources (kvm-clock or tsc page) to our guests we need to
>> know TSC frequency and when it changes.
>> 
>> Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies
>> EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The
>> right one to check is EAX:BIT(13) of CPUID:0x40000003. I was assured that
>> the fix in on the way.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>> RFC -> v1:
>> - #include <asm/apic.h> [kbuild test robot]
>> - use div64_u64() [kbuild test robot]
>> - DECLARE_WORK -> DECLARE_DELAYED_WORK as testing showed there's some bug
>>   in Hyper-V hypervisor and disabling emulation after receiving interrupt
>>   may screw TSC counters.
>
> Looks kinda fragile...

I believe this is temporary and Microsoft will fix things up host side.

>
>> ---
>>  arch/x86/entry/entry_64.S          |  4 +++
>>  arch/x86/hyperv/hv_init.c          | 71 ++++++++++++++++++++++++++++++++++++++
>>  arch/x86/include/asm/entry_arch.h  |  4 +++
>>  arch/x86/include/asm/hw_irq.h      |  1 +
>>  arch/x86/include/asm/irq_vectors.h |  7 +++-
>>  arch/x86/include/asm/mshyperv.h    |  8 +++++
>>  arch/x86/include/uapi/asm/hyperv.h | 27 +++++++++++++++
>>  arch/x86/kernel/idt.c              |  3 ++
>>  8 files changed, 124 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> index f81d50d7ceac..a32730b260bc 100644
>> --- a/arch/x86/entry/entry_64.S
>> +++ b/arch/x86/entry/entry_64.S
>> @@ -826,6 +826,10 @@ apicinterrupt SPURIOUS_APIC_VECTOR		spurious_interrupt		smp_spurious_interrupt
>>  apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
>>  #endif
>>  
>> +#if IS_ENABLED(CONFIG_HYPERV)
>> +apicinterrupt HYPERV_REENLIGHTENMENT_VECTOR	hyperv_reenlightenment_intr	smp_hyperv_reenlightenment_intr
>> +#endif
>> +
>>  /*
>>   * Exception entry points.
>>   */
>> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
>> index 1a6c63f721bc..bb62839ede81 100644
>> --- a/arch/x86/hyperv/hv_init.c
>> +++ b/arch/x86/hyperv/hv_init.c
>> @@ -18,6 +18,7 @@
>>   */
>>  
>>  #include <linux/types.h>
>> +#include <asm/apic.h>
>>  #include <asm/hypervisor.h>
>>  #include <asm/hyperv.h>
>>  #include <asm/mshyperv.h>
>> @@ -102,6 +103,76 @@ static int hv_cpu_init(unsigned int cpu)
>>  	return 0;
>>  }
>>  
>> +static void (*hv_reenlightenment_cb)(void);
>> +
>> +static void hv_reenlightenment_notify(struct work_struct *dummy)
>> +{
>> +	if (hv_reenlightenment_cb)
>> +		hv_reenlightenment_cb();
>> +}
>> +static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
>> +
>> +void hyperv_stop_tsc_emulation(void)
>> +{
>> +	u64 freq;
>> +	struct hv_tsc_emulation_status emu_status;
>> +
>> +	rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
>> +	emu_status.inprogress = 0;
>> +	wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
>> +
>> +	rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
>
> IIRC the availability of this msr is not guaranteed (I guess in reality
> it's present if the reenlightenment is supported, but I'd rather check).
>

Will do.

>> +	tsc_khz = div64_u64(freq, 1000);
>> +}
>> +EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
>> +
>> +void register_hv_tsc_update(void (*cb)(void))
>> +{
>
> The function name seems unfortunate.  IMHO such a name suggests
> registering a callback on a notifier chain (rather than unconditionally
> replacing the old callback), and having no other side effects.

I see, any suggestion? register_hv_reenlightenment_cb? register_hv_tscchange_cb?

>
>> +	struct hv_reenlightenment_control re_ctrl = {
>> +		.vector = HYPERV_REENLIGHTENMENT_VECTOR,
>> +		.enabled = 1,
>> +		.target_vp = hv_vp_index[smp_processor_id()]
>> +	};
>> +	struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
>> +
>> +	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
>> +		return;
>
> What happens then?  L2 guests keep running with their clocks ticking at
> a different speed?
>

In reallity this never happens -- in case nested virtualization is
supported reenlightenment is also available. In theory, L0 can emulate
TSC acceess for forever after migration.

>> +
>> +	hv_reenlightenment_cb = cb;
>> +
>> +	/* Make sure callback is registered before we write to MSRs */
>> +	wmb();
>> +
>> +	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
>> +	wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
>> +}
>> +EXPORT_SYMBOL_GPL(register_hv_tsc_update);
>> +
>> +void unregister_hv_tsc_update(void)
>> +{
>> +	struct hv_reenlightenment_control re_ctrl;
>> +
>> +	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
>> +		return;
>> +
>> +	rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
>> +	re_ctrl.enabled = 0;
>> +	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
>> +
>> +	hv_reenlightenment_cb = NULL;
>> +}
>> +EXPORT_SYMBOL_GPL(unregister_hv_tsc_update);
>> +
>> +asmlinkage __visible void
>> +__irq_entry smp_hyperv_reenlightenment_intr(struct pt_regs *regs)
>> +{
>> +	entering_ack_irq();
>> +
>> +	schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
>> +
>> +	exiting_irq();
>> +}
>> +
>>  /*
>>   * This function is to be invoked early in the boot sequence after the
>>   * hypervisor has been detected.
>> diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
>> index 416422762845..eb936cc49b62 100644
>> --- a/arch/x86/include/asm/entry_arch.h
>> +++ b/arch/x86/include/asm/entry_arch.h
>> @@ -54,3 +54,7 @@ BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
>>  BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
>>  #endif
>>  #endif
>> +
>> +#if IS_ENABLED(CONFIG_HYPERV)
>> +BUILD_INTERRUPT(hyperv_reenlightenment_intr, HYPERV_REENLIGHTENMENT_VECTOR)
>> +#endif
>> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
>> index 2851077b6051..c65193dac7d9 100644
>> --- a/arch/x86/include/asm/hw_irq.h
>> +++ b/arch/x86/include/asm/hw_irq.h
>> @@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
>>  extern asmlinkage void kvm_posted_intr_nested_ipi(void);
>>  extern asmlinkage void error_interrupt(void);
>>  extern asmlinkage void irq_work_interrupt(void);
>> +extern asmlinkage void hyperv_reenlightenment_intr(void);
>>  
>>  extern asmlinkage void spurious_interrupt(void);
>>  extern asmlinkage void thermal_interrupt(void);
>> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
>> index 67421f649cfa..e71c1120426b 100644
>> --- a/arch/x86/include/asm/irq_vectors.h
>> +++ b/arch/x86/include/asm/irq_vectors.h
>> @@ -103,7 +103,12 @@
>>  #endif
>>  
>>  #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
>> -#define LOCAL_TIMER_VECTOR		0xee
>> +
>> +#if IS_ENABLED(CONFIG_HYPERV)
>> +#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
>> +#endif
>> +
>> +#define LOCAL_TIMER_VECTOR		0xed
>>  
>>  #define NR_VECTORS			 256
>>  
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index a0b34994f453..43164b097585 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -314,11 +314,19 @@ void hyper_alloc_mmu(void);
>>  void hyperv_report_panic(struct pt_regs *regs, long err);
>>  bool hv_is_hypercall_page_setup(void);
>>  void hyperv_cleanup(void);
>> +
>> +asmlinkage void smp_hyperv_reenlightenment_intr(struct pt_regs *regs);
>> +void register_hv_tsc_update(void (*cb)(void));
>> +void unregister_hv_tsc_update(void);
>> +void hyperv_stop_tsc_emulation(void);
>>  #else /* CONFIG_HYPERV */
>>  static inline void hyperv_init(void) {}
>>  static inline bool hv_is_hypercall_page_setup(void) { return false; }
>>  static inline void hyperv_cleanup(void) {}
>>  static inline void hyperv_setup_mmu_ops(void) {}
>> +static inline void register_hv_tsc_update(void (*cb)(void)) {}
>> +static inline void unregister_hv_tsc_update(void) {}
>> +static inline void hyperv_stop_tsc_emulation(void) {};
>>  #endif /* CONFIG_HYPERV */
>>  
>>  #ifdef CONFIG_HYPERV_TSCPAGE
>> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
>> index 1a5bfead93b4..197c2e6c7376 100644
>> --- a/arch/x86/include/uapi/asm/hyperv.h
>> +++ b/arch/x86/include/uapi/asm/hyperv.h
>> @@ -40,6 +40,9 @@
>>   */
>>  #define HV_X64_ACCESS_FREQUENCY_MSRS		(1 << 11)
>>  
>> +/* AccessReenlightenmentControls privilege */
>> +#define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
>> +
>>  /*
>>   * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
>>   * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
>> @@ -234,6 +237,30 @@
>>  #define HV_X64_MSR_CRASH_PARAMS		\
>>  		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
>>  
>> +/* TSC emulation after migration */
>> +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
>> +
>> +struct hv_reenlightenment_control {
>> +	u64 vector:8;
>> +	u64 reserved1:8;
>> +	u64 enabled:1;
>> +	u64 reserved2:15;
>> +	u64 target_vp:32;
>> +};
>> +
>> +#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
>> +#define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
>> +
>> +struct hv_tsc_emulation_control {
>> +	u64 enabled:1;
>> +	u64 reserved:63;
>> +};
>> +
>> +struct hv_tsc_emulation_status {
>> +	u64 inprogress:1;
>> +	u64 reserved:63;
>> +};
>> +
>>  #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
>>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
>>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
>> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
>> index d985cef3984f..5b8512d48aa3 100644
>> --- a/arch/x86/kernel/idt.c
>> +++ b/arch/x86/kernel/idt.c
>> @@ -140,6 +140,9 @@ static const __initdata struct idt_data apic_idts[] = {
>>  # ifdef CONFIG_IRQ_WORK
>>  	INTG(IRQ_WORK_VECTOR,		irq_work_interrupt),
>>  # endif
>> +#if IS_ENABLED(CONFIG_HYPERV)
>> +	INTG(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_intr),
>> +#endif
>>  	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
>>  	INTG(ERROR_APIC_VECTOR,		error_interrupt),
>>  #endif
>
> Roman.
Roman Kagan Dec. 11, 2017, 5:10 p.m. UTC | #3
On Mon, Dec 11, 2017 at 10:56:33AM +0100, Vitaly Kuznetsov wrote:
> Roman Kagan <rkagan@virtuozzo.com> writes:
> > On Fri, Dec 08, 2017 at 11:49:57AM +0100, Vitaly Kuznetsov wrote:
> >> +void register_hv_tsc_update(void (*cb)(void))
> >> +{
> >
> > The function name seems unfortunate.  IMHO such a name suggests
> > registering a callback on a notifier chain (rather than unconditionally
> > replacing the old callback), and having no other side effects.
> 
> I see, any suggestion? register_hv_reenlightenment_cb? register_hv_tscchange_cb?

IMHO arm_hv_reenlightenment_cb or arm_hv_tscchange_cb would be better,
but I'm not very good at giving descriptive names.

> 
> >
> >> +	struct hv_reenlightenment_control re_ctrl = {
> >> +		.vector = HYPERV_REENLIGHTENMENT_VECTOR,
> >> +		.enabled = 1,
> >> +		.target_vp = hv_vp_index[smp_processor_id()]
> >> +	};
> >> +	struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
> >> +
> >> +	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
> >> +		return;
> >
> > What happens then?  L2 guests keep running with their clocks ticking at
> > a different speed?
> >
> 
> In reallity this never happens -- in case nested virtualization is
> supported reenlightenment is also available. In theory, L0 can emulate
> TSC acceess for forever after migration.

I would think that Hyper-V only started rdtsc emulation if
TSC_EMULATION_CONTROL was turned on, which wouldn't happen here.

But indeed, normally this shouldn't be a problem.  It may make sense
just to issue a warning if the feature is unsupported, though.

Roman.
Vitaly Kuznetsov Dec. 12, 2017, 8:04 a.m. UTC | #4
Roman Kagan <rkagan@virtuozzo.com> writes:

> On Mon, Dec 11, 2017 at 10:56:33AM +0100, Vitaly Kuznetsov wrote:
>> Roman Kagan <rkagan@virtuozzo.com> writes:
>> > On Fri, Dec 08, 2017 at 11:49:57AM +0100, Vitaly Kuznetsov wrote:
>> >> +void register_hv_tsc_update(void (*cb)(void))
>> >> +{
>> >
>> > The function name seems unfortunate.  IMHO such a name suggests
>> > registering a callback on a notifier chain (rather than unconditionally
>> > replacing the old callback), and having no other side effects.
>> 
>> I see, any suggestion? register_hv_reenlightenment_cb? register_hv_tscchange_cb?
>
> IMHO arm_hv_reenlightenment_cb or arm_hv_tscchange_cb would be better,
> but I'm not very good at giving descriptive names.
>

I would probably try to avoid using 'arm' word in x86 code to assist
poor git-greppers :-) And we actually need a pair of functions
(enable/disable). I will probably go with

set_hv_tscchange_cb()
clear_hv_tscchange_cb()

in v2 unless there's a better suggestion.

>> 
>> >
>> >> +	struct hv_reenlightenment_control re_ctrl = {
>> >> +		.vector = HYPERV_REENLIGHTENMENT_VECTOR,
>> >> +		.enabled = 1,
>> >> +		.target_vp = hv_vp_index[smp_processor_id()]
>> >> +	};
>> >> +	struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
>> >> +
>> >> +	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
>> >> +		return;
>> >
>> > What happens then?  L2 guests keep running with their clocks ticking at
>> > a different speed?
>> >
>> 
>> In reallity this never happens -- in case nested virtualization is
>> supported reenlightenment is also available. In theory, L0 can emulate
>> TSC acceess for forever after migration.
>
> I would think that Hyper-V only started rdtsc emulation if
> TSC_EMULATION_CONTROL was turned on, which wouldn't happen here.
>

Yes, this is the de-facto behavior I observe with WS2016.

> But indeed, normally this shouldn't be a problem.  It may make sense
> just to issue a warning if the feature is unsupported, though.

Will do in v2, thanks.
Michael Kelley (EOSG) Dec. 13, 2017, 12:50 a.m. UTC | #5
On Fri, Dec 08, 2017 at 11:49:57AM +0100, Vitaly Kuznetsov wrote:

> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
> Sent: Friday, December 8, 2017 2:50 AM
> To: kvm@vger.kernel.org; x86@kernel.org
> Cc: Paolo Bonzini <pbonzini@redhat.com>; Radim Krčmář <rkrcmar@redhat.com>; Thomas
> Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; H. Peter Anvin
> <hpa@zytor.com>; KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>; Michael
> Kelley (EOSG) <Michael.H.Kelley@microsoft.com>; Andy Lutomirski <luto@kernel.org>;
> Mohammed Gamal <mmorsy@redhat.com>; Cathy Avery <cavery@redhat.com>; linux-
> kernel@vger.kernel.org; devel@linuxdriverproject.org
> Subject: [PATCH 3/6] x86/hyper-v: reenlightenment notifications support
> 
> Hyper-V supports Live Migration notification. This is supposed to be used in conjunction with
> TSC emulation: when we are migrated to a host with different TSC frequency for some short
> period host emulates our accesses to TSC and sends us an interrupt to notify about the event.
> When we're done updating everything we can disable TSC emulation and everything will start
> working fast again.
> 
> We didn't need these notifications before as Hyper-V guests are not supposed to use TSC as a
> clocksource: in Linux we even mark it as unstable on boot. Guests normally use 'tsc page'
> clocksouce and host updates its values on migrations automatically.
> 
> Things change when we want to run nested virtualization: even when we pass through PV
> clocksources (kvm-clock or tsc page) to our guests we need to know TSC frequency and when it
> changes.
> 
> Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies
> EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The right one to check is
> EAX:BIT(13) of CPUID:0x40000003. I was assured that the fix in on the way.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

[snip]

> diff --git a/arch/x86/include/asm/irq_vectors.h
> b/arch/x86/include/asm/irq_vectors.h
> index 67421f649cfa..e71c1120426b 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -103,7 +103,12 @@
>  #endif
> 
>  #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
> -#define LOCAL_TIMER_VECTOR		0xee
> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
> +#endif
> +
> +#define LOCAL_TIMER_VECTOR		0xed
> 
>  #define NR_VECTORS			 256

[snip]

Since you are pre-allocating a new vector, would you want to update the irq_cpustat_t
data structure and your interrupt handler to count the occurrences of these interrupts,
and update arch_show_interrupts() to show the count?  Then cat /proc/interrupts
will show the count.   The reenlightenment interrupts will presumably be rare, but so
are some of the others that are already counted and displayed, and it seems like
consistency should be maintained.

Michael
Vitaly Kuznetsov Dec. 13, 2017, 10:03 a.m. UTC | #6
"Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com> writes:

> On Fri, Dec 08, 2017 at 11:49:57AM +0100, Vitaly Kuznetsov wrote:
>
>> -----Original Message-----
>> From: Vitaly Kuznetsov [mailto:vkuznets@redhat.com]
>> Sent: Friday, December 8, 2017 2:50 AM
>> To: kvm@vger.kernel.org; x86@kernel.org
>> Cc: Paolo Bonzini <pbonzini@redhat.com>; Radim Krčmář <rkrcmar@redhat.com>; Thomas
>> Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; H. Peter Anvin
>> <hpa@zytor.com>; KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
>> <haiyangz@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>; Michael
>> Kelley (EOSG) <Michael.H.Kelley@microsoft.com>; Andy Lutomirski <luto@kernel.org>;
>> Mohammed Gamal <mmorsy@redhat.com>; Cathy Avery <cavery@redhat.com>; linux-
>> kernel@vger.kernel.org; devel@linuxdriverproject.org
>> Subject: [PATCH 3/6] x86/hyper-v: reenlightenment notifications support
>> 
>> Hyper-V supports Live Migration notification. This is supposed to be used in conjunction with
>> TSC emulation: when we are migrated to a host with different TSC frequency for some short
>> period host emulates our accesses to TSC and sends us an interrupt to notify about the event.
>> When we're done updating everything we can disable TSC emulation and everything will start
>> working fast again.
>> 
>> We didn't need these notifications before as Hyper-V guests are not supposed to use TSC as a
>> clocksource: in Linux we even mark it as unstable on boot. Guests normally use 'tsc page'
>> clocksouce and host updates its values on migrations automatically.
>> 
>> Things change when we want to run nested virtualization: even when we pass through PV
>> clocksources (kvm-clock or tsc page) to our guests we need to know TSC frequency and when it
>> changes.
>> 
>> Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies
>> EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The right one to check is
>> EAX:BIT(13) of CPUID:0x40000003. I was assured that the fix in on the way.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>
> [snip]
>
>> diff --git a/arch/x86/include/asm/irq_vectors.h
>> b/arch/x86/include/asm/irq_vectors.h
>> index 67421f649cfa..e71c1120426b 100644
>> --- a/arch/x86/include/asm/irq_vectors.h
>> +++ b/arch/x86/include/asm/irq_vectors.h
>> @@ -103,7 +103,12 @@
>>  #endif
>> 
>>  #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
>> -#define LOCAL_TIMER_VECTOR		0xee
>> +
>> +#if IS_ENABLED(CONFIG_HYPERV)
>> +#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
>> +#endif
>> +
>> +#define LOCAL_TIMER_VECTOR		0xed
>> 
>>  #define NR_VECTORS			 256
>
> [snip]
>
> Since you are pre-allocating a new vector, would you want to update the irq_cpustat_t
> data structure and your interrupt handler to count the occurrences of these interrupts,
> and update arch_show_interrupts() to show the count?  Then cat /proc/interrupts
> will show the count.   The reenlightenment interrupts will presumably be rare, but so
> are some of the others that are already counted and displayed, and it seems like
> consistency should be maintained.

I could do that.

The problem with adding this entry to /proc/interrupts, as I see it,
is that everyone who has CONFIG_HYPERV enabled in their distro will see
one additional line which 99,9% of the time will be just '0'. I,
however, see some value in seeing that L1 migration occured.

I'll add a separate patch to the series and let x86 maintainers decide
if it's worthy. Thanks,
diff mbox

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..a32730b260bc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -826,6 +826,10 @@  apicinterrupt SPURIOUS_APIC_VECTOR		spurious_interrupt		smp_spurious_interrupt
 apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
 #endif
 
+#if IS_ENABLED(CONFIG_HYPERV)
+apicinterrupt HYPERV_REENLIGHTENMENT_VECTOR	hyperv_reenlightenment_intr	smp_hyperv_reenlightenment_intr
+#endif
+
 /*
  * Exception entry points.
  */
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1a6c63f721bc..bb62839ede81 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -18,6 +18,7 @@ 
  */
 
 #include <linux/types.h>
+#include <asm/apic.h>
 #include <asm/hypervisor.h>
 #include <asm/hyperv.h>
 #include <asm/mshyperv.h>
@@ -102,6 +103,76 @@  static int hv_cpu_init(unsigned int cpu)
 	return 0;
 }
 
+static void (*hv_reenlightenment_cb)(void);
+
+static void hv_reenlightenment_notify(struct work_struct *dummy)
+{
+	if (hv_reenlightenment_cb)
+		hv_reenlightenment_cb();
+}
+static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
+
+void hyperv_stop_tsc_emulation(void)
+{
+	u64 freq;
+	struct hv_tsc_emulation_status emu_status;
+
+	rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+	emu_status.inprogress = 0;
+	wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+	rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+	tsc_khz = div64_u64(freq, 1000);
+}
+EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
+
+void register_hv_tsc_update(void (*cb)(void))
+{
+	struct hv_reenlightenment_control re_ctrl = {
+		.vector = HYPERV_REENLIGHTENMENT_VECTOR,
+		.enabled = 1,
+		.target_vp = hv_vp_index[smp_processor_id()]
+	};
+	struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
+
+	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
+		return;
+
+	hv_reenlightenment_cb = cb;
+
+	/* Make sure callback is registered before we write to MSRs */
+	wmb();
+
+	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+	wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
+}
+EXPORT_SYMBOL_GPL(register_hv_tsc_update);
+
+void unregister_hv_tsc_update(void)
+{
+	struct hv_reenlightenment_control re_ctrl;
+
+	if (!(ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT))
+		return;
+
+	rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+	re_ctrl.enabled = 0;
+	wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+
+	hv_reenlightenment_cb = NULL;
+}
+EXPORT_SYMBOL_GPL(unregister_hv_tsc_update);
+
+asmlinkage __visible void
+__irq_entry smp_hyperv_reenlightenment_intr(struct pt_regs *regs)
+{
+	entering_ack_irq();
+
+	schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
+
+	exiting_irq();
+}
+
 /*
  * This function is to be invoked early in the boot sequence after the
  * hypervisor has been detected.
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 416422762845..eb936cc49b62 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -54,3 +54,7 @@  BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
 BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
 #endif
 #endif
+
+#if IS_ENABLED(CONFIG_HYPERV)
+BUILD_INTERRUPT(hyperv_reenlightenment_intr, HYPERV_REENLIGHTENMENT_VECTOR)
+#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 2851077b6051..c65193dac7d9 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,6 +36,7 @@  extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
 extern asmlinkage void kvm_posted_intr_nested_ipi(void);
 extern asmlinkage void error_interrupt(void);
 extern asmlinkage void irq_work_interrupt(void);
+extern asmlinkage void hyperv_reenlightenment_intr(void);
 
 extern asmlinkage void spurious_interrupt(void);
 extern asmlinkage void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 67421f649cfa..e71c1120426b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,7 +103,12 @@ 
 #endif
 
 #define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
-#define LOCAL_TIMER_VECTOR		0xee
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
+#endif
+
+#define LOCAL_TIMER_VECTOR		0xed
 
 #define NR_VECTORS			 256
 
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index a0b34994f453..43164b097585 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -314,11 +314,19 @@  void hyper_alloc_mmu(void);
 void hyperv_report_panic(struct pt_regs *regs, long err);
 bool hv_is_hypercall_page_setup(void);
 void hyperv_cleanup(void);
+
+asmlinkage void smp_hyperv_reenlightenment_intr(struct pt_regs *regs);
+void register_hv_tsc_update(void (*cb)(void));
+void unregister_hv_tsc_update(void);
+void hyperv_stop_tsc_emulation(void);
 #else /* CONFIG_HYPERV */
 static inline void hyperv_init(void) {}
 static inline bool hv_is_hypercall_page_setup(void) { return false; }
 static inline void hyperv_cleanup(void) {}
 static inline void hyperv_setup_mmu_ops(void) {}
+static inline void register_hv_tsc_update(void (*cb)(void)) {}
+static inline void unregister_hv_tsc_update(void) {}
+static inline void hyperv_stop_tsc_emulation(void) {};
 #endif /* CONFIG_HYPERV */
 
 #ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 1a5bfead93b4..197c2e6c7376 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -40,6 +40,9 @@ 
  */
 #define HV_X64_ACCESS_FREQUENCY_MSRS		(1 << 11)
 
+/* AccessReenlightenmentControls privilege */
+#define HV_X64_ACCESS_REENLIGHTENMENT		BIT(13)
+
 /*
  * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
  * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
@@ -234,6 +237,30 @@ 
 #define HV_X64_MSR_CRASH_PARAMS		\
 		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
 
+/* TSC emulation after migration */
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
+
+struct hv_reenlightenment_control {
+	u64 vector:8;
+	u64 reserved1:8;
+	u64 enabled:1;
+	u64 reserved2:15;
+	u64 target_vp:32;
+};
+
+#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
+
+struct hv_tsc_emulation_control {
+	u64 enabled:1;
+	u64 reserved:63;
+};
+
+struct hv_tsc_emulation_status {
+	u64 inprogress:1;
+	u64 reserved:63;
+};
+
 #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..5b8512d48aa3 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -140,6 +140,9 @@  static const __initdata struct idt_data apic_idts[] = {
 # ifdef CONFIG_IRQ_WORK
 	INTG(IRQ_WORK_VECTOR,		irq_work_interrupt),
 # endif
+#if IS_ENABLED(CONFIG_HYPERV)
+	INTG(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_intr),
+#endif
 	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
 	INTG(ERROR_APIC_VECTOR,		error_interrupt),
 #endif