diff mbox

[2/2] KVM: X86: Implement PV send IPI support

Message ID 1530265876-18136-3-git-send-email-wanpengli@tencent.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wanpeng Li June 29, 2018, 9:51 a.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster 
mode. 

Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still 
observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the 
total msr-induced vmexits reduce ~70%.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 Documentation/virtual/kvm/cpuid.txt |  4 ++++
 arch/x86/kvm/cpuid.c                |  3 ++-
 arch/x86/kvm/x86.c                  | 25 +++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h       |  1 +
 4 files changed, 32 insertions(+), 1 deletion(-)

Comments

Vitaly Kuznetsov June 29, 2018, 10:09 a.m. UTC | #1
Wanpeng Li <kernellwp@gmail.com> writes:

> From: Wanpeng Li <wanpengli@tencent.com>
>
> Using hypercall to send IPIs by one vmexit instead of one by one for
> xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster 
> mode. 
>
> Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still 
> observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the 
> total msr-induced vmexits reduce ~70%.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  Documentation/virtual/kvm/cpuid.txt |  4 ++++
>  arch/x86/kvm/cpuid.c                |  3 ++-
>  arch/x86/kvm/x86.c                  | 25 +++++++++++++++++++++++++
>  include/uapi/linux/kvm_para.h       |  1 +
>  4 files changed, 32 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> index ab022dc..d72359f 100644
> --- a/Documentation/virtual/kvm/cpuid.txt
> +++ b/Documentation/virtual/kvm/cpuid.txt
> @@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
>                                     ||       || can be enabled by setting bit 2
>                                     ||       || when writing to msr 0x4b564d02
>  ------------------------------------------------------------------------------
> +KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
> +                                   ||       || before enabling paravirtualized
> +                                   ||       || send IPIs.

In case we decide to apply this as-is we'll likely need a new feature
for PV IPI with > 64 vCPUs (or how else would the guest know if the host
is capable or not?)

> +------------------------------------------------------------------------------
>  KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
>                                     ||       || per-cpu warps are expected in
>                                     ||       || kvmclock.
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7e042e3..7bcfa61 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
>  			     (1 << KVM_FEATURE_PV_UNHALT) |
>  			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
> -			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
> +			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
> +			     (1 << KVM_FEATURE_PV_SEND_IPI);
>
>  		if (sched_info_on())
>  			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0046aa7..c2e6cdb 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6689,6 +6689,27 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
>  	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
>  }
>
> +static void kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap, u8 vector)
> +{
> +	struct kvm_apic_map *map;
> +	struct kvm_vcpu *vcpu;
> +	struct kvm_lapic_irq lapic_irq = {0};
> +	int i;
> +
> +	lapic_irq.delivery_mode = APIC_DM_FIXED;
> +	lapic_irq.vector = vector;
> +
> +	rcu_read_lock();
> +	map = rcu_dereference(kvm->arch.apic_map);
> +
> +	for_each_set_bit(i, &ipi_bitmap, sizeof(unsigned long)) {
> +		vcpu = map->phys_map[i]->vcpu;
> +		kvm_apic_set_irq(vcpu, &lapic_irq, NULL);
> +	}
> +
> +	rcu_read_unlock();
> +}
> +
>  void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
>  {
>  	vcpu->arch.apicv_active = false;
> @@ -6738,6 +6759,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>  		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
>  		break;
>  #endif
> +	case KVM_HC_SEND_IPI:
> +		kvm_pv_send_ipi(vcpu->kvm, a0, a1);
> +		ret = 0;
> +		break;
>  	default:
>  		ret = -KVM_ENOSYS;
>  		break;
> diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
> index dcf629d..7395f38 100644
> --- a/include/uapi/linux/kvm_para.h
> +++ b/include/uapi/linux/kvm_para.h
> @@ -26,6 +26,7 @@
>  #define KVM_HC_MIPS_EXIT_VM		7
>  #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
>  #define KVM_HC_CLOCK_PAIRING		9
> +#define KVM_HC_SEND_IPI         	10
>
>  /*
>   * hypercalls use architecture specific
Paolo Bonzini June 29, 2018, 10:45 a.m. UTC | #2
On 29/06/2018 11:51, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Using hypercall to send IPIs by one vmexit instead of one by one for
> xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster 
> mode. 
> 
> Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still 
> observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the 
> total msr-induced vmexits reduce ~70%.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  Documentation/virtual/kvm/cpuid.txt |  4 ++++
>  arch/x86/kvm/cpuid.c                |  3 ++-
>  arch/x86/kvm/x86.c                  | 25 +++++++++++++++++++++++++
>  include/uapi/linux/kvm_para.h       |  1 +
>  4 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> index ab022dc..d72359f 100644
> --- a/Documentation/virtual/kvm/cpuid.txt
> +++ b/Documentation/virtual/kvm/cpuid.txt
> @@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
>                                     ||       || can be enabled by setting bit 2
>                                     ||       || when writing to msr 0x4b564d02
>  ------------------------------------------------------------------------------
> +KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
> +                                   ||       || before enabling paravirtualized
> +                                   ||       || send IPIs.

It's not "enabling" but "using".  You also need to document the
hypercall itself.

Paolo

> +------------------------------------------------------------------------------
>  KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
>                                     ||       || per-cpu warps are expected in
>                                     ||       || kvmclock.
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7e042e3..7bcfa61 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
>  			     (1 << KVM_FEATURE_PV_UNHALT) |
>  			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
> -			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
> +			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
> +			     (1 << KVM_FEATURE_PV_SEND_IPI);
>  
>  		if (sched_info_on())
>  			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0046aa7..c2e6cdb 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6689,6 +6689,27 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
>  	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
>  }
>  
> +static void kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap, u8 vector)
> +{
> +	struct kvm_apic_map *map;
> +	struct kvm_vcpu *vcpu;
> +	struct kvm_lapic_irq lapic_irq = {0};
> +	int i;
> +
> +	lapic_irq.delivery_mode = APIC_DM_FIXED;
> +	lapic_irq.vector = vector;
> +
> +	rcu_read_lock();
> +	map = rcu_dereference(kvm->arch.apic_map);
> +
> +	for_each_set_bit(i, &ipi_bitmap, sizeof(unsigned long)) {
> +		vcpu = map->phys_map[i]->vcpu;
> +		kvm_apic_set_irq(vcpu, &lapic_irq, NULL);
> +	}
> +
> +	rcu_read_unlock();
> +}
> +
>  void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
>  {
>  	vcpu->arch.apicv_active = false;
> @@ -6738,6 +6759,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>  		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
>  		break;
>  #endif
> +	case KVM_HC_SEND_IPI:
> +		kvm_pv_send_ipi(vcpu->kvm, a0, a1);
> +		ret = 0;
> +		break;
>  	default:
>  		ret = -KVM_ENOSYS;
>  		break;
> diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
> index dcf629d..7395f38 100644
> --- a/include/uapi/linux/kvm_para.h
> +++ b/include/uapi/linux/kvm_para.h
> @@ -26,6 +26,7 @@
>  #define KVM_HC_MIPS_EXIT_VM		7
>  #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
>  #define KVM_HC_CLOCK_PAIRING		9
> +#define KVM_HC_SEND_IPI         	10
>  
>  /*
>   * hypercalls use architecture specific
>
Paolo Bonzini June 29, 2018, 10:48 a.m. UTC | #3
On 29/06/2018 12:09, Vitaly Kuznetsov wrote:
>> +KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
>> +                                   ||       || before enabling paravirtualized
>> +                                   ||       || send IPIs.
> In case we decide to apply this as-is we'll likely need a new feature
> for PV IPI with > 64 vCPUs (or how else would the guest know if the host
> is capable or not?)
> 

Yes, it makes sense.  Perhaps we can do one of the following, or both:

1) add an argument for a "base vCPU id", so that you can use the
hypercall to send the IPI to CPUs 64..127, 128..191 etc.

2) have two bitmask arguments so that one hypercall handles 128 vCPUs.

to remove or limit the need for the more generic hypercall.

Paolo
Wanpeng Li June 30, 2018, 10:04 a.m. UTC | #4
On Fri, 29 Jun 2018 at 18:45, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 29/06/2018 11:51, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Using hypercall to send IPIs by one vmexit instead of one by one for
> > xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster
> > mode.
> >
> > Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still
> > observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the
> > total msr-induced vmexits reduce ~70%.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  Documentation/virtual/kvm/cpuid.txt |  4 ++++
> >  arch/x86/kvm/cpuid.c                |  3 ++-
> >  arch/x86/kvm/x86.c                  | 25 +++++++++++++++++++++++++
> >  include/uapi/linux/kvm_para.h       |  1 +
> >  4 files changed, 32 insertions(+), 1 deletion(-)
> >
> > diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> > index ab022dc..d72359f 100644
> > --- a/Documentation/virtual/kvm/cpuid.txt
> > +++ b/Documentation/virtual/kvm/cpuid.txt
> > @@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
> >                                     ||       || can be enabled by setting bit 2
> >                                     ||       || when writing to msr 0x4b564d02
> >  ------------------------------------------------------------------------------
> > +KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
> > +                                   ||       || before enabling paravirtualized
> > +                                   ||       || send IPIs.
>
> It's not "enabling" but "using".  You also need to document the
> hypercall itself.

Will fix it in v2.

Regards,
Wanpeng Li
Wanpeng Li June 30, 2018, 10:05 a.m. UTC | #5
On Fri, 29 Jun 2018 at 18:49, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 29/06/2018 12:09, Vitaly Kuznetsov wrote:
> >> +KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
> >> +                                   ||       || before enabling paravirtualized
> >> +                                   ||       || send IPIs.
> > In case we decide to apply this as-is we'll likely need a new feature
> > for PV IPI with > 64 vCPUs (or how else would the guest know if the host
> > is capable or not?)
> >
>
> Yes, it makes sense.  Perhaps we can do one of the following, or both:
>
> 1) add an argument for a "base vCPU id", so that you can use the
> hypercall to send the IPI to CPUs 64..127, 128..191 etc.
>
> 2) have two bitmask arguments so that one hypercall handles 128 vCPUs.
>
> to remove or limit the need for the more generic hypercall.

Have already done 2) in v2, will send out later.

Regards,
Wanpeng Li
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index ab022dc..d72359f 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -62,6 +62,10 @@  KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
                                    ||       || can be enabled by setting bit 2
                                    ||       || when writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
+                                   ||       || before enabling paravirtualized
+                                   ||       || send IPIs.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3..7bcfa61 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@  static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 			     (1 << KVM_FEATURE_PV_UNHALT) |
 			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
-			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+			     (1 << KVM_FEATURE_PV_SEND_IPI);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa7..c2e6cdb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6689,6 +6689,27 @@  static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
 	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
+static void kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap, u8 vector)
+{
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu;
+	struct kvm_lapic_irq lapic_irq = {0};
+	int i;
+
+	lapic_irq.delivery_mode = APIC_DM_FIXED;
+	lapic_irq.vector = vector;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	for_each_set_bit(i, &ipi_bitmap, sizeof(unsigned long)) {
+		vcpu = map->phys_map[i]->vcpu;
+		kvm_apic_set_irq(vcpu, &lapic_irq, NULL);
+	}
+
+	rcu_read_unlock();
+}
+
 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.apicv_active = false;
@@ -6738,6 +6759,10 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
 		break;
 #endif
+	case KVM_HC_SEND_IPI:
+		kvm_pv_send_ipi(vcpu->kvm, a0, a1);
+		ret = 0;
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index dcf629d..7395f38 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -26,6 +26,7 @@ 
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 #define KVM_HC_CLOCK_PAIRING		9
+#define KVM_HC_SEND_IPI         	10
 
 /*
  * hypercalls use architecture specific