[v2,1/2] KVM: X86: Implement PV IPI in linux guest

Message ID	1530526462-920-2-git-send-email-wanpengli@tencent.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Wanpeng Li <kernellwp@gmail.com> To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org Cc: Paolo Bonzini <pbonzini@redhat.com>, =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>, Vitaly Kuznetsov <vkuznets@redhat.com> Subject: [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Date: Mon, 2 Jul 2018 18:14:21 +0800 Message-Id: <1530526462-920-2-git-send-email-wanpengli@tencent.com> In-Reply-To: <1530526462-920-1-git-send-email-wanpengli@tencent.com> References: <1530526462-920-1-git-send-email-wanpengli@tencent.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: kvm-owner@vger.kernel.org Precedence: bulk

Wanpeng Li July 2, 2018, 10:14 a.m. UTC

From: Wanpeng Li <wanpengli@tencent.com>

Implement PV IPIs in guest kernel.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kernel/kvm.c         | 99 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h |  1 +
 2 files changed, 100 insertions(+)

Vitaly Kuznetsov July 2, 2018, 10:26 a.m. UTC | #1

Wanpeng Li <kernellwp@gmail.com> writes:

> From: Wanpeng Li <wanpengli@tencent.com>
>
> Implement PV IPIs in guest kernel.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kernel/kvm.c         | 99 +++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/kvm_para.h |  1 +
>  2 files changed, 100 insertions(+)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 5b2300b..7e3ee25 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -47,6 +47,7 @@
>  #include <asm/hypervisor.h>
>  #include <asm/kvm_guest.h>
>
> +static struct apic orig_apic;
>  static int kvmapf = 1;
>
>  static int __init parse_no_kvmapf(char *arg)
> @@ -454,6 +455,89 @@ static void __init sev_map_percpu_data(void)
>  }
>
>  #ifdef CONFIG_SMP
> +
> +#ifdef CONFIG_X86_64
> +static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> +{
> +	unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> +	int cpu, apic_id, ret = 1;
> +
> +	if (cpumask_empty(mask))
> +		return true;
> +
> +	local_irq_save(flags);
> +
> +	for_each_cpu(cpu, mask) {
> +		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> +		if (apic_id < BITS_PER_LONG)
> +			__set_bit(apic_id, &ipi_bitmap_low);
> +		else if (apic_id < 2 * BITS_PER_LONG)
> +			__set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> +		else
> +			goto ipi_mask_done;

Nit:

Both the fact that we don't set 'ret' here and the fact that the label
is named 'ipi_mask_done' -- which sounds like 'all OK' at least to me --
contribute to the feeling that we just skip sending IPIs in some cases.

I would prefer to see something like

else {
   ret = -EFAULT;
   goto irq_restore_exit;
}

> +	}
> +
> +	switch (vector) {
> +	default:
> +		icr = APIC_DM_FIXED | vector;
> +		break;
> +	case NMI_VECTOR:
> +		icr = APIC_DM_NMI;
> +		break;
> +	}
> +
> +	ret = kvm_hypercall3(KVM_HC_SEND_IPI, ipi_bitmap_low, ipi_bitmap_high, icr);
> +
> +ipi_mask_done:
> +	local_irq_restore(flags);
> +	return ((ret == 0) ? true : false);

... and why in the first place do we need to make this function return
'bool' then? Let's just make it return 'int'.

> +}
> +
> +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
> +{
> +	if (!__send_ipi_mask(mask, vector))
> +		orig_apic.send_IPI_mask(mask, vector);
> +}
> +
> +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> +{
> +	unsigned int this_cpu = smp_processor_id();
> +	struct cpumask new_mask;
> +	const struct cpumask *local_mask;
> +
> +	cpumask_copy(&new_mask, mask);
> +	cpumask_clear_cpu(this_cpu, &new_mask);
> +	local_mask = &new_mask;
> +	if (!__send_ipi_mask(local_mask, vector))
> +		orig_apic.send_IPI_mask_allbutself(mask, vector);
> +}
> +
> +static void kvm_send_ipi_allbutself(int vector)
> +{
> +	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
> +}
> +
> +static void kvm_send_ipi_all(int vector)
> +{
> +	if (!__send_ipi_mask(cpu_online_mask, vector))
> +		orig_apic.send_IPI_all(vector);
> +}
> +
> +/*
> + * Set the IPI entry points
> + */
> +static void kvm_setup_pv_ipi(void)
> +{
> +	orig_apic = *apic;
> +
> +	apic->send_IPI_mask = kvm_send_ipi_mask;
> +	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
> +	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
> +	apic->send_IPI_all = kvm_send_ipi_all;
> +	pr_info("KVM setup pv IPIs\n");
> +}
> +#endif
> +
>  static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
>  {
>  	native_smp_prepare_cpus(max_cpus);
> @@ -624,12 +708,27 @@ static uint32_t __init kvm_detect(void)
>  	return kvm_cpuid_base();
>  }
>
> +static void __init kvm_apic_init(void)
> +{
> +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
> +	if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
> +		num_possible_cpus() <= 2 * BITS_PER_LONG)
> +		kvm_setup_pv_ipi();
> +#endif
> +}
> +
> +static void __init kvm_init_platform(void)
> +{
> +	x86_platform.apic_post_init = kvm_apic_init;
> +}
> +
>  const __initconst struct hypervisor_x86 x86_hyper_kvm = {
>  	.name			= "KVM",
>  	.detect			= kvm_detect,
>  	.type			= X86_HYPER_KVM,
>  	.init.guest_late_init	= kvm_guest_init,
>  	.init.x2apic_available	= kvm_para_available,
> +	.init.init_platform	= kvm_init_platform,
>  };
>
>  static __init int activate_jump_labels(void)
> diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
> index dcf629d..7395f38 100644
> --- a/include/uapi/linux/kvm_para.h
> +++ b/include/uapi/linux/kvm_para.h
> @@ -26,6 +26,7 @@
>  #define KVM_HC_MIPS_EXIT_VM		7
>  #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
>  #define KVM_HC_CLOCK_PAIRING		9
> +#define KVM_HC_SEND_IPI			10
>
>  /*
>   * hypercalls use architecture specific

Wanpeng Li July 2, 2018, 11:32 a.m. UTC | #2

On Mon, 2 Jul 2018 at 18:26, Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Wanpeng Li <kernellwp@gmail.com> writes:
>
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Implement PV IPIs in guest kernel.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kernel/kvm.c         | 99 +++++++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/kvm_para.h |  1 +
> >  2 files changed, 100 insertions(+)
> >
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 5b2300b..7e3ee25 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -47,6 +47,7 @@
> >  #include <asm/hypervisor.h>
> >  #include <asm/kvm_guest.h>
> >
> > +static struct apic orig_apic;
> >  static int kvmapf = 1;
> >
> >  static int __init parse_no_kvmapf(char *arg)
> > @@ -454,6 +455,89 @@ static void __init sev_map_percpu_data(void)
> >  }
> >
> >  #ifdef CONFIG_SMP
> > +
> > +#ifdef CONFIG_X86_64
> > +static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> > +{
> > +     unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> > +     int cpu, apic_id, ret = 1;
> > +
> > +     if (cpumask_empty(mask))
> > +             return true;
> > +
> > +     local_irq_save(flags);
> > +
> > +     for_each_cpu(cpu, mask) {
> > +             apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> > +             if (apic_id < BITS_PER_LONG)
> > +                     __set_bit(apic_id, &ipi_bitmap_low);
> > +             else if (apic_id < 2 * BITS_PER_LONG)
> > +                     __set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> > +             else
> > +                     goto ipi_mask_done;
>
> Nit:
>
> Both the fact that we don't set 'ret' here and the fact that the label
> is named 'ipi_mask_done' -- which sounds like 'all OK' at least to me --
> contribute to the feeling that we just skip sending IPIs in some cases.
>
> I would prefer to see something like
>
> else {
>    ret = -EFAULT;
>    goto irq_restore_exit;
> }
>
> > +     }
> > +
> > +     switch (vector) {
> > +     default:
> > +             icr = APIC_DM_FIXED | vector;
> > +             break;
> > +     case NMI_VECTOR:
> > +             icr = APIC_DM_NMI;
> > +             break;
> > +     }
> > +
> > +     ret = kvm_hypercall3(KVM_HC_SEND_IPI, ipi_bitmap_low, ipi_bitmap_high, icr);
> > +
> > +ipi_mask_done:
> > +     local_irq_restore(flags);
> > +     return ((ret == 0) ? true : false);
>
> ... and why in the first place do we need to make this function return
> 'bool' then? Let's just make it return 'int'.

Thanks for the comments, will do in v3. Btw, on my haswell desktop(i7
8 HT), there is a 2.5 times performance boot for the IPI
microbenchmark(https://lkml.org/lkml/2017/12/19/141). (8 vCPUs guest,
x2apic physical mode, I will retest on Skylake server w/ 64 vCPUs
x2apic cluster mode guest tomorrow):

Before:
 Dry-run:                         0,            1885493 ns
 Self-IPI:                  7071403,           14711151 ns
 Normal IPI:              204453899,          219896346 ns
 Broadcast IPI:                   0,         2213679722 ns
 Broadcast lock:                  0,         2241226307 ns

After:
 Dry-run:                         0,            1752903 ns
 Self-IPI:                  4944737,           10434149 ns
 Normal IPI:              202351280,          220807969 ns
 Broadcast IPI:                   0,          872991742 ns
=> 2.5 times boost
 Broadcast lock:                  0,          879995113 ns

Regards,
Wanpeng Li

Paolo Bonzini July 2, 2018, 11:32 a.m. UTC | #3

On 02/07/2018 12:14, Wanpeng Li wrote:
> +	unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> +	int cpu, apic_id, ret = 1;
> +
> +	if (cpumask_empty(mask))
> +		return true;
> +
> +	local_irq_save(flags);
> +
> +	for_each_cpu(cpu, mask) {
> +		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> +		if (apic_id < BITS_PER_LONG)
> +			__set_bit(apic_id, &ipi_bitmap_low);
> +		else if (apic_id < 2 * BITS_PER_LONG)
> +			__set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> +		else
> +			goto ipi_mask_done;
> +	}

CPU masks are themselves bitmaps made of longs, so you should be able to
avoid the loop here.

> +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
> +{
> +	if (!__send_ipi_mask(mask, vector))
> +		orig_apic.send_IPI_mask(mask, vector);
> +}
> +
> +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> +{
> +	unsigned int this_cpu = smp_processor_id();
> +	struct cpumask new_mask;
> +	const struct cpumask *local_mask;
> +
> +	cpumask_copy(&new_mask, mask);
> +	cpumask_clear_cpu(this_cpu, &new_mask);
> +	local_mask = &new_mask;
> +	if (!__send_ipi_mask(local_mask, vector))
> +		orig_apic.send_IPI_mask_allbutself(mask, vector);
> +}

Likewise, here it should be possible to check the highest bit in the
mask before copying it.

Paolo

Wanpeng Li July 2, 2018, 12:08 p.m. UTC | #4

On Mon, 2 Jul 2018 at 19:32, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 02/07/2018 12:14, Wanpeng Li wrote:
> > +     unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> > +     int cpu, apic_id, ret = 1;
> > +
> > +     if (cpumask_empty(mask))
> > +             return true;
> > +
> > +     local_irq_save(flags);
> > +
> > +     for_each_cpu(cpu, mask) {
> > +             apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> > +             if (apic_id < BITS_PER_LONG)
> > +                     __set_bit(apic_id, &ipi_bitmap_low);
> > +             else if (apic_id < 2 * BITS_PER_LONG)
> > +                     __set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> > +             else
> > +                     goto ipi_mask_done;
> > +     }
>
> CPU masks are themselves bitmaps made of longs, so you should be able to
> avoid the loop here.

As we discuss offline, loop is needed since I need to convert
processor id which is allocated by OS to apic id.

>
> > +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
> > +{
> > +     if (!__send_ipi_mask(mask, vector))
> > +             orig_apic.send_IPI_mask(mask, vector);
> > +}
> > +
> > +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> > +{
> > +     unsigned int this_cpu = smp_processor_id();
> > +     struct cpumask new_mask;
> > +     const struct cpumask *local_mask;
> > +
> > +     cpumask_copy(&new_mask, mask);
> > +     cpumask_clear_cpu(this_cpu, &new_mask);
> > +     local_mask = &new_mask;
> > +     if (!__send_ipi_mask(local_mask, vector))
> > +             orig_apic.send_IPI_mask_allbutself(mask, vector);
> > +}
>
> Likewise, here it should be possible to check the highest bit in the
> mask before copying it.

__send_ipi_mask() has already handled sparse APIC ID and > 128 APID ID
scenarios.

Regards,
Wanpeng Li

kernel test robot July 2, 2018, 1:33 p.m. UTC | #5

Hi Wanpeng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.18-rc3 next-20180702]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Wanpeng-Li/KVM-x86-Add-PV-IPIs-support/20180702-193633
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: x86_64-randconfig-x003-201826 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: the linux-review/Wanpeng-Li/KVM-x86-Add-PV-IPIs-support/20180702-193633 HEAD 2601a46efa23f54a8f175cd07dae28c88e36a40d builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   arch/x86/kernel/kvm.c: In function 'kvm_apic_init':
>> arch/x86/kernel/kvm.c:714:27: error: 'KVM_FEATURE_PV_SEND_IPI' undeclared (first use in this function); did you mean 'KVM_FEATURE_PV_EOI'?
     if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
                              ^~~~~~~~~~~~~~~~~~~~~~~
                              KVM_FEATURE_PV_EOI
   arch/x86/kernel/kvm.c:714:27: note: each undeclared identifier is reported only once for each function it appears in

vim +714 arch/x86/kernel/kvm.c

   710	
   711	static void __init kvm_apic_init(void)
   712	{
   713	#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
 > 714		if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
   715			num_possible_cpus() <= 2 * BITS_PER_LONG)
   716			kvm_setup_pv_ipi();
   717	#endif
   718	}
   719	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[v2,1/2] KVM: X86: Implement PV IPI in linux guest

Commit Message

Comments

Patch