diff mbox

[6/6] x86/kvm: support Hyper-V reenlightenment

Message ID 20171208105000.25116-7-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vitaly Kuznetsov Dec. 8, 2017, 10:50 a.m. UTC
When we run nested KVM on Hyper-V guests we need to update masterclocks for
all guests when L1 migrates to a host with different TSC frequency.
Implement the procedure in the following way:
- Pause all guests.
- Tell our host (Hyper-V) to stop emulating TSC accesses.
- Update our gtod copy, recompute clocks.
- Unpause all guests.

This is somewhat similar to cpufreq but we have two important differences:
we can only disable TSC emulation globally (on all CPUs) and we don't know
the new TSC frequency until we turn the emulation off so we can't
'prepare' ourselves to the event.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

Roman Kagan Dec. 8, 2017, 5:39 p.m. UTC | #1
On Fri, Dec 08, 2017 at 11:50:00AM +0100, Vitaly Kuznetsov wrote:
> When we run nested KVM on Hyper-V guests we need to update masterclocks for
> all guests when L1 migrates to a host with different TSC frequency.
> Implement the procedure in the following way:
> - Pause all guests.
> - Tell our host (Hyper-V) to stop emulating TSC accesses.
> - Update our gtod copy, recompute clocks.
> - Unpause all guests.
> 
> This is somewhat similar to cpufreq but we have two important differences:
> we can only disable TSC emulation globally (on all CPUs) and we don't know
> the new TSC frequency until we turn the emulation off so we can't
> 'prepare' ourselves to the event.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 96e04a0cb921..04d90712ffd2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -68,6 +68,7 @@
>  #include <asm/div64.h>
>  #include <asm/irq_remapping.h>
>  #include <asm/mshyperv.h>
> +#include <asm/hypervisor.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include "trace.h"
> @@ -5946,6 +5947,43 @@ static void tsc_khz_changed(void *data)
>  	__this_cpu_write(cpu_tsc_khz, khz);
>  }
>  
> +void kvm_hyperv_tsc_notifier(void)
> +{
> +#ifdef CONFIG_X86_64
> +	struct kvm *kvm;
> +	struct kvm_vcpu *vcpu;
> +	int cpu;
> +
> +	spin_lock(&kvm_lock);
> +	list_for_each_entry(kvm, &vm_list, vm_list)
> +		kvm_make_mclock_inprogress_request(kvm);
> +
> +	hyperv_stop_tsc_emulation();
> +
> +	/* TSC frequency always matches when on Hyper-V */
> +	for_each_present_cpu(cpu)
> +		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
> +	kvm_max_guest_tsc_khz = tsc_khz;
> +
> +	list_for_each_entry(kvm, &vm_list, vm_list) {
> +		struct kvm_arch *ka = &kvm->arch;
> +
> +		spin_lock(&ka->pvclock_gtod_sync_lock);
> +
> +		pvclock_update_vm_gtod_copy(kvm);
> +
> +		kvm_for_each_vcpu(cpu, vcpu, kvm)
> +			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
> +
> +		kvm_for_each_vcpu(cpu, vcpu, kvm)
> +			kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
> +
> +		spin_unlock(&ka->pvclock_gtod_sync_lock);
> +	}
> +	spin_unlock(&kvm_lock);

Can't you skip all this if the tsc frequency hasn't changed (which
should probably be the case when the CPU supports tsc frequency
scaling)?

Roman.
Vitaly Kuznetsov Dec. 11, 2017, 9:57 a.m. UTC | #2
Roman Kagan <rkagan@virtuozzo.com> writes:

> On Fri, Dec 08, 2017 at 11:50:00AM +0100, Vitaly Kuznetsov wrote:
>> When we run nested KVM on Hyper-V guests we need to update masterclocks for
>> all guests when L1 migrates to a host with different TSC frequency.
>> Implement the procedure in the following way:
>> - Pause all guests.
>> - Tell our host (Hyper-V) to stop emulating TSC accesses.
>> - Update our gtod copy, recompute clocks.
>> - Unpause all guests.
>> 
>> This is somewhat similar to cpufreq but we have two important differences:
>> we can only disable TSC emulation globally (on all CPUs) and we don't know
>> the new TSC frequency until we turn the emulation off so we can't
>> 'prepare' ourselves to the event.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>>  arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 45 insertions(+)
>> 
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 96e04a0cb921..04d90712ffd2 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -68,6 +68,7 @@
>>  #include <asm/div64.h>
>>  #include <asm/irq_remapping.h>
>>  #include <asm/mshyperv.h>
>> +#include <asm/hypervisor.h>
>>  
>>  #define CREATE_TRACE_POINTS
>>  #include "trace.h"
>> @@ -5946,6 +5947,43 @@ static void tsc_khz_changed(void *data)
>>  	__this_cpu_write(cpu_tsc_khz, khz);
>>  }
>>  
>> +void kvm_hyperv_tsc_notifier(void)
>> +{
>> +#ifdef CONFIG_X86_64
>> +	struct kvm *kvm;
>> +	struct kvm_vcpu *vcpu;
>> +	int cpu;
>> +
>> +	spin_lock(&kvm_lock);
>> +	list_for_each_entry(kvm, &vm_list, vm_list)
>> +		kvm_make_mclock_inprogress_request(kvm);
>> +
>> +	hyperv_stop_tsc_emulation();
>> +
>> +	/* TSC frequency always matches when on Hyper-V */
>> +	for_each_present_cpu(cpu)
>> +		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>> +	kvm_max_guest_tsc_khz = tsc_khz;
>> +
>> +	list_for_each_entry(kvm, &vm_list, vm_list) {
>> +		struct kvm_arch *ka = &kvm->arch;
>> +
>> +		spin_lock(&ka->pvclock_gtod_sync_lock);
>> +
>> +		pvclock_update_vm_gtod_copy(kvm);
>> +
>> +		kvm_for_each_vcpu(cpu, vcpu, kvm)
>> +			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>> +
>> +		kvm_for_each_vcpu(cpu, vcpu, kvm)
>> +			kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
>> +
>> +		spin_unlock(&ka->pvclock_gtod_sync_lock);
>> +	}
>> +	spin_unlock(&kvm_lock);
>
> Can't you skip all this if the tsc frequency hasn't changed (which
> should probably be the case when the CPU supports tsc frequency
> scaling)?
>

The thing is that we don't know if it changed or not: only after
disabling TSC emulation we'll be able to read the new one from the host
and we need to do this with all VMs paused.
Vitaly Kuznetsov Dec. 12, 2017, 8:17 a.m. UTC | #3
Vitaly Kuznetsov <vkuznets@redhat.com> writes:

> Roman Kagan <rkagan@virtuozzo.com> writes:
>
>> On Fri, Dec 08, 2017 at 11:50:00AM +0100, Vitaly Kuznetsov wrote:
>>> When we run nested KVM on Hyper-V guests we need to update masterclocks for
>>> all guests when L1 migrates to a host with different TSC frequency.
>>> Implement the procedure in the following way:
>>> - Pause all guests.
>>> - Tell our host (Hyper-V) to stop emulating TSC accesses.
>>> - Update our gtod copy, recompute clocks.
>>> - Unpause all guests.
>>> 
>>> This is somewhat similar to cpufreq but we have two important differences:
>>> we can only disable TSC emulation globally (on all CPUs) and we don't know
>>> the new TSC frequency until we turn the emulation off so we can't
>>> 'prepare' ourselves to the event.
>>> 
>>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>>> ---
>>>  arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 45 insertions(+)
>>> 
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 96e04a0cb921..04d90712ffd2 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -68,6 +68,7 @@
>>>  #include <asm/div64.h>
>>>  #include <asm/irq_remapping.h>
>>>  #include <asm/mshyperv.h>
>>> +#include <asm/hypervisor.h>
>>>  
>>>  #define CREATE_TRACE_POINTS
>>>  #include "trace.h"
>>> @@ -5946,6 +5947,43 @@ static void tsc_khz_changed(void *data)
>>>  	__this_cpu_write(cpu_tsc_khz, khz);
>>>  }
>>>  
>>> +void kvm_hyperv_tsc_notifier(void)
>>> +{
>>> +#ifdef CONFIG_X86_64
>>> +	struct kvm *kvm;
>>> +	struct kvm_vcpu *vcpu;
>>> +	int cpu;
>>> +
>>> +	spin_lock(&kvm_lock);
>>> +	list_for_each_entry(kvm, &vm_list, vm_list)
>>> +		kvm_make_mclock_inprogress_request(kvm);
>>> +
>>> +	hyperv_stop_tsc_emulation();
>>> +
>>> +	/* TSC frequency always matches when on Hyper-V */
>>> +	for_each_present_cpu(cpu)
>>> +		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
>>> +	kvm_max_guest_tsc_khz = tsc_khz;
>>> +
>>> +	list_for_each_entry(kvm, &vm_list, vm_list) {
>>> +		struct kvm_arch *ka = &kvm->arch;
>>> +
>>> +		spin_lock(&ka->pvclock_gtod_sync_lock);
>>> +
>>> +		pvclock_update_vm_gtod_copy(kvm);
>>> +
>>> +		kvm_for_each_vcpu(cpu, vcpu, kvm)
>>> +			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
>>> +
>>> +		kvm_for_each_vcpu(cpu, vcpu, kvm)
>>> +			kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
>>> +
>>> +		spin_unlock(&ka->pvclock_gtod_sync_lock);
>>> +	}
>>> +	spin_unlock(&kvm_lock);
>>
>> Can't you skip all this if the tsc frequency hasn't changed (which
>> should probably be the case when the CPU supports tsc frequency
>> scaling)?
>>
>
> The thing is that we don't know if it changed or not: only after
> disabling TSC emulation we'll be able to read the new one from the host
> and we need to do this with all VMs paused.

(having second thoughts here)

While we don't know if TSC frequency has changed or not, we can check
the emulation status before calling the callback and if TSC accesses are
not emulated omit the call. However, it seems that Hyper-V host (as of
WS2016) turns on emulation regardless of the TSC scaling presence.

I'll add emulation status check before issuing the callback in v2. The
change will go to PATCH3.
diff mbox

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 96e04a0cb921..04d90712ffd2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -68,6 +68,7 @@ 
 #include <asm/div64.h>
 #include <asm/irq_remapping.h>
 #include <asm/mshyperv.h>
+#include <asm/hypervisor.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -5946,6 +5947,43 @@  static void tsc_khz_changed(void *data)
 	__this_cpu_write(cpu_tsc_khz, khz);
 }
 
+void kvm_hyperv_tsc_notifier(void)
+{
+#ifdef CONFIG_X86_64
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	int cpu;
+
+	spin_lock(&kvm_lock);
+	list_for_each_entry(kvm, &vm_list, vm_list)
+		kvm_make_mclock_inprogress_request(kvm);
+
+	hyperv_stop_tsc_emulation();
+
+	/* TSC frequency always matches when on Hyper-V */
+	for_each_present_cpu(cpu)
+		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+	kvm_max_guest_tsc_khz = tsc_khz;
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		struct kvm_arch *ka = &kvm->arch;
+
+		spin_lock(&ka->pvclock_gtod_sync_lock);
+
+		pvclock_update_vm_gtod_copy(kvm);
+
+		kvm_for_each_vcpu(cpu, vcpu, kvm)
+			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+		kvm_for_each_vcpu(cpu, vcpu, kvm)
+			kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
+
+		spin_unlock(&ka->pvclock_gtod_sync_lock);
+	}
+	spin_unlock(&kvm_lock);
+#endif
+}
+
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 				     void *data)
 {
@@ -6231,6 +6269,9 @@  int kvm_arch_init(void *opaque)
 	kvm_lapic_init();
 #ifdef CONFIG_X86_64
 	pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+
+	if (x86_hyper_type == X86_HYPER_MS_HYPERV)
+		register_hv_tsc_update(kvm_hyperv_tsc_notifier);
 #endif
 
 	return 0;
@@ -6243,6 +6284,10 @@  int kvm_arch_init(void *opaque)
 
 void kvm_arch_exit(void)
 {
+#ifdef CONFIG_X86_64
+	if (x86_hyper_type == X86_HYPER_MS_HYPERV)
+		unregister_hv_tsc_update();
+#endif
 	kvm_lapic_exit();
 	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);