diff mbox series

[v2,05/13] x86/irq: Reserve a per CPU IDT vector for posted MSIs

Message ID 20240405223110.1609888-6-jacob.jun.pan@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Coalesced Interrupt Delivery with posted MSI | expand

Commit Message

Jacob Pan April 5, 2024, 10:31 p.m. UTC
When posted MSI is enabled, all device MSIs are multiplexed into a single
notification vector. MSI handlers will be de-multiplexed at run-time by
system software without IDT delivery.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>

---
v2:
 - Add missing CONFIG_ in #ifdef
 - Extend changes to x86 tools
---
 arch/x86/include/asm/irq_vectors.h       | 9 ++++++++-
 tools/arch/x86/include/asm/irq_vectors.h | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

Comments

Thomas Gleixner April 11, 2024, 4:51 p.m. UTC | #1
On Fri, Apr 05 2024 at 15:31, Jacob Pan wrote:
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index d18bfb238f66..1ee00be8218d 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -97,9 +97,16 @@
>  
>  #define LOCAL_TIMER_VECTOR		0xec
>  
> +/*
> + * Posted interrupt notification vector for all device MSIs delivered to
> + * the host kernel.
> + */
> +#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
>  #define NR_VECTORS			 256
>  
> -#ifdef CONFIG_X86_LOCAL_APIC
> +#ifdef CONFIG_X86_POSTED_MSI
> +#define FIRST_SYSTEM_VECTOR		POSTED_MSI_NOTIFICATION_VECTOR
> +#elif defined(CONFIG_X86_LOCAL_APIC)
>  #define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
>  #else
>  #define FIRST_SYSTEM_VECTOR		NR_VECTORS

This is horrible and we had attempts before to make the system vector
space dense. They all did not work and making an exception for this is
not what we want.

If we really care then we do it proper for _all_ of them. Something like
the uncompiled below. There is certainly a smarter way to do the build
thing, but my kbuild foo is rusty.

Thanks,

        tglx
---
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -245,6 +245,7 @@ archscripts: scripts_basic
 
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
+	$(Q)$(MAKE) $(build)=arch/x86/kernel/irqvectors all
 
 ###
 # Kernel objects
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -43,59 +43,46 @@
  */
 #define ISA_IRQ_VECTOR(irq)		(((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq)
 
+#ifndef __ASSEMBLY__
 /*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
+ * Special IRQ vectors used by the SMP architecture, 0xff and downwards
  */
+enum {
+	__SPURIOUS_APIC_VECTOR,
+	__ERROR_APIC_VECTOR,
+	__RESCHEDULE_VECTOR,
+	__CALL_FUNCTION_VECTOR,
+	__CALL_FUNCTION_SINGLE_VECTOR,
+	__THERMAL_APIC_VECTOR,
+	__THRESHOLD_APIC_VECTOR,
+	__REBOOT_VECTOR,
+	__X86_PLATFORM_IPI_VECTOR,
+	__IRQ_WORK_VECTOR,
+	__DEFERRED_ERROR_VECTOR,
 
-#define SPURIOUS_APIC_VECTOR		0xff
-/*
- * Sanity check
- */
-#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
-# error SPURIOUS_APIC_VECTOR definition error
+#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST)
+	__HYPERVISOR_CALLBACK_VECTOR,
 #endif
 
-#define ERROR_APIC_VECTOR		0xfe
-#define RESCHEDULE_VECTOR		0xfd
-#define CALL_FUNCTION_VECTOR		0xfc
-#define CALL_FUNCTION_SINGLE_VECTOR	0xfb
-#define THERMAL_APIC_VECTOR		0xfa
-#define THRESHOLD_APIC_VECTOR		0xf9
-#define REBOOT_VECTOR			0xf8
-
-/*
- * Generic system vector for platform specific use
- */
-#define X86_PLATFORM_IPI_VECTOR		0xf7
-
-/*
- * IRQ work vector:
- */
-#define IRQ_WORK_VECTOR			0xf6
-
-/* 0xf5 - unused, was UV_BAU_MESSAGE */
-#define DEFERRED_ERROR_VECTOR		0xf4
-
-/* Vector on which hypervisor callbacks will be delivered */
-#define HYPERVISOR_CALLBACK_VECTOR	0xf3
-
-/* Vector for KVM to deliver posted interrupt IPI */
-#define POSTED_INTR_VECTOR		0xf2
-#define POSTED_INTR_WAKEUP_VECTOR	0xf1
-#define POSTED_INTR_NESTED_VECTOR	0xf0
-
-#define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
+#if IS_ENABLED(CONFIG_KVM)
+	/* Vector for KVM to deliver posted interrupt IPI */
+	__POSTED_INTR_VECTOR,
+	__POSTED_INTR_WAKEUP_VECTOR,
+	__POSTED_INTR_NESTED_VECTOR,
+#endif
+	__MANAGED_IRQ_SHUTDOWN_VECTOR,
 
 #if IS_ENABLED(CONFIG_HYPERV)
-#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
-#define HYPERV_STIMER0_VECTOR		0xed
+	__HYPERV_REENLIGHTENMENT_VECTOR,
+	__HYPERV_STIMER0_VECTOR,
 #endif
+	__LOCAL_TIMER_VECTOR,
+};
+#endif /* !__ASSEMBLY__ */
 
-#define LOCAL_TIMER_VECTOR		0xec
+#ifndef COMPILE_OFFSETS
+#include <asm/irqvectors.h>
+#endif
 
 #define NR_VECTORS			 256
 
--- /dev/null
+++ b/arch/x86/kernel/irqvectors/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+irqvectors-file	:= arch/$(SRCARCH)/include/generated/asm/irqvectors.h
+targets 	+= arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s
+
+$(irqvectors-file): arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s FORCE
+	$(call filechk,offsets,__ASM_IRQVECTORS_H__)
+
+PHONY += all
+all: $(irqvectors-file)
+	@:
--- /dev/null
+++ b/arch/x86/kernel/irqvectors/irqvectors.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#define COMPILE_OFFSETS
+
+#include <linux/kbuild.h>
+#include <asm/irq_vectors.h>
+
+#define VECNR(v)	(0xFF - __##v)
+#define VECTOR(v)	DEFINE(v, VECNR(v))
+
+static void __used common(void)
+{
+	VECTOR(SPURIOUS_APIC_VECTOR);
+	VECTOR(ERROR_APIC_VECTOR);
+	VECTOR(RESCHEDULE_VECTOR);
+	VECTOR(CALL_FUNCTION_VECTOR);
+	VECTOR(CALL_FUNCTION_SINGLE_VECTOR);
+	VECTOR(THERMAL_APIC_VECTOR);
+	VECTOR(THRESHOLD_APIC_VECTOR);
+	VECTOR(REBOOT_VECTOR);
+	VECTOR(X86_PLATFORM_IPI_VECTOR);
+	VECTOR(IRQ_WORK_VECTOR);
+	VECTOR(DEFERRED_ERROR_VECTOR);
+
+#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST)
+	VECTOR(HYPERVISOR_CALLBACK_VECTOR);
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+	/* Vector for KVM to deliver posted interrupt IPI */
+	VECTOR(POSTED_INTR_VECTOR);
+	VECTOR(POSTED_INTR_WAKEUP_VECTOR);
+	VECTOR(POSTED_INTR_NESTED_VECTOR);
+#endif
+	VECTOR(MANAGED_IRQ_SHUTDOWN_VECTOR);
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	VECTOR(HYPERV_REENLIGHTENMENT_VECTOR);
+	VECTOR(HYPERV_STIMER0_VECTOR);
+#endif
+	VECTOR(LOCAL_TIMER_VECTOR);
+}
+
Tian, Kevin April 12, 2024, 9:14 a.m. UTC | #2
> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Sent: Saturday, April 6, 2024 6:31 AM
> 
> +/*
> + * Posted interrupt notification vector for all device MSIs delivered to
> + * the host kernel.
> + */
> +#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
>  #define NR_VECTORS			 256
> 

Every interrupt is kind of a notification.

Just call it POSTED_MSI_VECTOR
Sean Christopherson April 12, 2024, 2:27 p.m. UTC | #3
On Fri, Apr 12, 2024, Kevin Tian wrote:
> > From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Sent: Saturday, April 6, 2024 6:31 AM
> > 
> > +/*
> > + * Posted interrupt notification vector for all device MSIs delivered to
> > + * the host kernel.
> > + */
> > +#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
> >  #define NR_VECTORS			 256
> > 
> 
> Every interrupt is kind of a notification.

FWIW, I find value in having "notification" in the name to differentiate between
the IRQ that is notifying the CPU that there's a posted IRQ to be processed, and
the posted IRQ itself.
Jacob Pan April 15, 2024, 6:53 p.m. UTC | #4
Hi Thomas,

On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de>
wrote:

> On Fri, Apr 05 2024 at 15:31, Jacob Pan wrote:
> > diff --git a/arch/x86/include/asm/irq_vectors.h
> > b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..1ee00be8218d
> > 100644 --- a/arch/x86/include/asm/irq_vectors.h
> > +++ b/arch/x86/include/asm/irq_vectors.h
> > @@ -97,9 +97,16 @@
> >  
> >  #define LOCAL_TIMER_VECTOR		0xec
> >  
> > +/*
> > + * Posted interrupt notification vector for all device MSIs delivered
> > to
> > + * the host kernel.
> > + */
> > +#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
> >  #define NR_VECTORS			 256
> >  
> > -#ifdef CONFIG_X86_LOCAL_APIC
> > +#ifdef CONFIG_X86_POSTED_MSI
> > +#define FIRST_SYSTEM_VECTOR
> > POSTED_MSI_NOTIFICATION_VECTOR +#elif defined(CONFIG_X86_LOCAL_APIC)
> >  #define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
> >  #else
> >  #define FIRST_SYSTEM_VECTOR		NR_VECTORS  
> 
> This is horrible and we had attempts before to make the system vector
> space dense. They all did not work and making an exception for this is
> not what we want.
> 
> If we really care then we do it proper for _all_ of them. Something like
> the uncompiled below. There is certainly a smarter way to do the build
> thing, but my kbuild foo is rusty.
I too had the concern of the wasting system vectors, but did not know how to
fix it. But now your code below works well. Tested without KVM in .config
to show the gaps:

In VECTOR IRQ domain.

BEFORE:
System: 46: 0-31,50,235-236,244,246-255

AFTER:
System: 46: 0-31,50,241-242,245-255

The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a
running system.

Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243

POSTED MSI/first system vector moved up from 235 to 241 for this case.

Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it
instead of manually copy over each time. Any suggestions greatly
appreciated.

> ---
> --- a/arch/x86/Makefile
> +++ b/arch/x86/Makefile
> @@ -245,6 +245,7 @@ archscripts: scripts_basic
>  
>  archheaders:
>  	$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
> +	$(Q)$(MAKE) $(build)=arch/x86/kernel/irqvectors all
>  
>  ###
>  # Kernel objects
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -43,59 +43,46 @@
>   */
>  #define ISA_IRQ_VECTOR(irq)		(((FIRST_EXTERNAL_VECTOR +
> 16) & ~15) + irq) 
> +#ifndef __ASSEMBLY__
>  /*
> - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
> - *
> - *  some of the following vectors are 'rare', they are merged
> - *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
> - *  TLB, reschedule and local APIC vectors are performance-critical.
> + * Special IRQ vectors used by the SMP architecture, 0xff and downwards
>   */
> +enum {
> +	__SPURIOUS_APIC_VECTOR,
> +	__ERROR_APIC_VECTOR,
> +	__RESCHEDULE_VECTOR,
> +	__CALL_FUNCTION_VECTOR,
> +	__CALL_FUNCTION_SINGLE_VECTOR,
> +	__THERMAL_APIC_VECTOR,
> +	__THRESHOLD_APIC_VECTOR,
> +	__REBOOT_VECTOR,
> +	__X86_PLATFORM_IPI_VECTOR,
> +	__IRQ_WORK_VECTOR,
> +	__DEFERRED_ERROR_VECTOR,
>  
> -#define SPURIOUS_APIC_VECTOR		0xff
> -/*
> - * Sanity check
> - */
> -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
> -# error SPURIOUS_APIC_VECTOR definition error
> +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST)
> +	__HYPERVISOR_CALLBACK_VECTOR,
>  #endif
>  
> -#define ERROR_APIC_VECTOR		0xfe
> -#define RESCHEDULE_VECTOR		0xfd
> -#define CALL_FUNCTION_VECTOR		0xfc
> -#define CALL_FUNCTION_SINGLE_VECTOR	0xfb
> -#define THERMAL_APIC_VECTOR		0xfa
> -#define THRESHOLD_APIC_VECTOR		0xf9
> -#define REBOOT_VECTOR			0xf8
> -
> -/*
> - * Generic system vector for platform specific use
> - */
> -#define X86_PLATFORM_IPI_VECTOR		0xf7
> -
> -/*
> - * IRQ work vector:
> - */
> -#define IRQ_WORK_VECTOR			0xf6
> -
> -/* 0xf5 - unused, was UV_BAU_MESSAGE */
> -#define DEFERRED_ERROR_VECTOR		0xf4
> -
> -/* Vector on which hypervisor callbacks will be delivered */
> -#define HYPERVISOR_CALLBACK_VECTOR	0xf3
> -
> -/* Vector for KVM to deliver posted interrupt IPI */
> -#define POSTED_INTR_VECTOR		0xf2
> -#define POSTED_INTR_WAKEUP_VECTOR	0xf1
> -#define POSTED_INTR_NESTED_VECTOR	0xf0
> -
> -#define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
> +#if IS_ENABLED(CONFIG_KVM)
> +	/* Vector for KVM to deliver posted interrupt IPI */
> +	__POSTED_INTR_VECTOR,
> +	__POSTED_INTR_WAKEUP_VECTOR,
> +	__POSTED_INTR_NESTED_VECTOR,
> +#endif
> +	__MANAGED_IRQ_SHUTDOWN_VECTOR,
>  
>  #if IS_ENABLED(CONFIG_HYPERV)
> -#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
> -#define HYPERV_STIMER0_VECTOR		0xed
> +	__HYPERV_REENLIGHTENMENT_VECTOR,
> +	__HYPERV_STIMER0_VECTOR,
>  #endif
> +	__LOCAL_TIMER_VECTOR,
> +};
> +#endif /* !__ASSEMBLY__ */
>  
> -#define LOCAL_TIMER_VECTOR		0xec
> +#ifndef COMPILE_OFFSETS
> +#include <asm/irqvectors.h>
> +#endif
>  
>  #define NR_VECTORS			 256
>  
> --- /dev/null
> +++ b/arch/x86/kernel/irqvectors/Makefile
> @@ -0,0 +1,11 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +irqvectors-file	:=
> arch/$(SRCARCH)/include/generated/asm/irqvectors.h +targets 	+=
> arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s +
> +$(irqvectors-file): arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s FORCE
> +	$(call filechk,offsets,__ASM_IRQVECTORS_H__)
> +
> +PHONY += all
> +all: $(irqvectors-file)
> +	@:
> --- /dev/null
> +++ b/arch/x86/kernel/irqvectors/irqvectors.c
> @@ -0,0 +1,42 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#define COMPILE_OFFSETS
> +
> +#include <linux/kbuild.h>
> +#include <asm/irq_vectors.h>
> +
> +#define VECNR(v)	(0xFF - __##v)
> +#define VECTOR(v)	DEFINE(v, VECNR(v))
> +
> +static void __used common(void)
> +{
> +	VECTOR(SPURIOUS_APIC_VECTOR);
> +	VECTOR(ERROR_APIC_VECTOR);
> +	VECTOR(RESCHEDULE_VECTOR);
> +	VECTOR(CALL_FUNCTION_VECTOR);
> +	VECTOR(CALL_FUNCTION_SINGLE_VECTOR);
> +	VECTOR(THERMAL_APIC_VECTOR);
> +	VECTOR(THRESHOLD_APIC_VECTOR);
> +	VECTOR(REBOOT_VECTOR);
> +	VECTOR(X86_PLATFORM_IPI_VECTOR);
> +	VECTOR(IRQ_WORK_VECTOR);
> +	VECTOR(DEFERRED_ERROR_VECTOR);
> +
> +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST)
> +	VECTOR(HYPERVISOR_CALLBACK_VECTOR);
> +#endif
> +
> +#if IS_ENABLED(CONFIG_KVM)
> +	/* Vector for KVM to deliver posted interrupt IPI */
> +	VECTOR(POSTED_INTR_VECTOR);
> +	VECTOR(POSTED_INTR_WAKEUP_VECTOR);
> +	VECTOR(POSTED_INTR_NESTED_VECTOR);
> +#endif
> +	VECTOR(MANAGED_IRQ_SHUTDOWN_VECTOR);
> +
> +#if IS_ENABLED(CONFIG_HYPERV)
> +	VECTOR(HYPERV_REENLIGHTENMENT_VECTOR);
> +	VECTOR(HYPERV_STIMER0_VECTOR);
> +#endif
> +	VECTOR(LOCAL_TIMER_VECTOR);
> +}
> +
> 
> 
> 


Thanks,

Jacob
Jacob Pan April 15, 2024, 8:43 p.m. UTC | #5
On Mon, 15 Apr 2024 11:53:58 -0700, Jacob Pan
<jacob.jun.pan@linux.intel.com> wrote:

> Hi Thomas,
> 
> On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de>
> wrote:
> 
> > On Fri, Apr 05 2024 at 15:31, Jacob Pan wrote:  
> > > diff --git a/arch/x86/include/asm/irq_vectors.h
> > > b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..1ee00be8218d
> > > 100644 --- a/arch/x86/include/asm/irq_vectors.h
> > > +++ b/arch/x86/include/asm/irq_vectors.h
> > > @@ -97,9 +97,16 @@
> > >  
> > >  #define LOCAL_TIMER_VECTOR		0xec
> > >  
> > > +/*
> > > + * Posted interrupt notification vector for all device MSIs delivered
> > > to
> > > + * the host kernel.
> > > + */
> > > +#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
> > >  #define NR_VECTORS			 256
> > >  
> > > -#ifdef CONFIG_X86_LOCAL_APIC
> > > +#ifdef CONFIG_X86_POSTED_MSI
> > > +#define FIRST_SYSTEM_VECTOR
> > > POSTED_MSI_NOTIFICATION_VECTOR +#elif defined(CONFIG_X86_LOCAL_APIC)
> > >  #define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
> > >  #else
> > >  #define FIRST_SYSTEM_VECTOR		NR_VECTORS    
> > 
> > This is horrible and we had attempts before to make the system vector
> > space dense. They all did not work and making an exception for this is
> > not what we want.
> > 
> > If we really care then we do it proper for _all_ of them. Something like
> > the uncompiled below. There is certainly a smarter way to do the build
> > thing, but my kbuild foo is rusty.  
> I too had the concern of the wasting system vectors, but did not know how
> to fix it. But now your code below works well. Tested without KVM in
> .config to show the gaps:
> 
> In VECTOR IRQ domain.
> 
> BEFORE:
> System: 46: 0-31,50,235-236,244,246-255
> 
> AFTER:
> System: 46: 0-31,50,241-242,245-255
> 
> The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a
> running system.
> 
> Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243
> 
> POSTED MSI/first system vector moved up from 235 to 241 for this case.
> 
> Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it
> instead of manually copy over each time. Any suggestions greatly
> appreciated.
> 
On a second thought, if we make system IRQ vector determined at compile
time based on different CONFIG options, will it break userspace tools such
as perf? More importantly the rule of not breaking userspace.

+Arnaldo

> > ---
> > --- a/arch/x86/Makefile
> > +++ b/arch/x86/Makefile
> > @@ -245,6 +245,7 @@ archscripts: scripts_basic
> >  
> >  archheaders:
> >  	$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
> > +	$(Q)$(MAKE) $(build)=arch/x86/kernel/irqvectors all
> >  
> >  ###
> >  # Kernel objects
> > --- a/arch/x86/include/asm/irq_vectors.h
> > +++ b/arch/x86/include/asm/irq_vectors.h
> > @@ -43,59 +43,46 @@
> >   */
> >  #define ISA_IRQ_VECTOR(irq)		(((FIRST_EXTERNAL_VECTOR +
> > 16) & ~15) + irq) 
> > +#ifndef __ASSEMBLY__
> >  /*
> > - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
> > - *
> > - *  some of the following vectors are 'rare', they are merged
> > - *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
> > - *  TLB, reschedule and local APIC vectors are performance-critical.
> > + * Special IRQ vectors used by the SMP architecture, 0xff and downwards
> >   */
> > +enum {
> > +	__SPURIOUS_APIC_VECTOR,
> > +	__ERROR_APIC_VECTOR,
> > +	__RESCHEDULE_VECTOR,
> > +	__CALL_FUNCTION_VECTOR,
> > +	__CALL_FUNCTION_SINGLE_VECTOR,
> > +	__THERMAL_APIC_VECTOR,
> > +	__THRESHOLD_APIC_VECTOR,
> > +	__REBOOT_VECTOR,
> > +	__X86_PLATFORM_IPI_VECTOR,
> > +	__IRQ_WORK_VECTOR,
> > +	__DEFERRED_ERROR_VECTOR,
> >  
> > -#define SPURIOUS_APIC_VECTOR		0xff
> > -/*
> > - * Sanity check
> > - */
> > -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
> > -# error SPURIOUS_APIC_VECTOR definition error
> > +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST)
> > +	__HYPERVISOR_CALLBACK_VECTOR,
> >  #endif
> >  
> > -#define ERROR_APIC_VECTOR		0xfe
> > -#define RESCHEDULE_VECTOR		0xfd
> > -#define CALL_FUNCTION_VECTOR		0xfc
> > -#define CALL_FUNCTION_SINGLE_VECTOR	0xfb
> > -#define THERMAL_APIC_VECTOR		0xfa
> > -#define THRESHOLD_APIC_VECTOR		0xf9
> > -#define REBOOT_VECTOR			0xf8
> > -
> > -/*
> > - * Generic system vector for platform specific use
> > - */
> > -#define X86_PLATFORM_IPI_VECTOR		0xf7
> > -
> > -/*
> > - * IRQ work vector:
> > - */
> > -#define IRQ_WORK_VECTOR			0xf6
> > -
> > -/* 0xf5 - unused, was UV_BAU_MESSAGE */
> > -#define DEFERRED_ERROR_VECTOR		0xf4
> > -
> > -/* Vector on which hypervisor callbacks will be delivered */
> > -#define HYPERVISOR_CALLBACK_VECTOR	0xf3
> > -
> > -/* Vector for KVM to deliver posted interrupt IPI */
> > -#define POSTED_INTR_VECTOR		0xf2
> > -#define POSTED_INTR_WAKEUP_VECTOR	0xf1
> > -#define POSTED_INTR_NESTED_VECTOR	0xf0
> > -
> > -#define MANAGED_IRQ_SHUTDOWN_VECTOR	0xef
> > +#if IS_ENABLED(CONFIG_KVM)
> > +	/* Vector for KVM to deliver posted interrupt IPI */
> > +	__POSTED_INTR_VECTOR,
> > +	__POSTED_INTR_WAKEUP_VECTOR,
> > +	__POSTED_INTR_NESTED_VECTOR,
> > +#endif
> > +	__MANAGED_IRQ_SHUTDOWN_VECTOR,
> >  
> >  #if IS_ENABLED(CONFIG_HYPERV)
> > -#define HYPERV_REENLIGHTENMENT_VECTOR	0xee
> > -#define HYPERV_STIMER0_VECTOR		0xed
> > +	__HYPERV_REENLIGHTENMENT_VECTOR,
> > +	__HYPERV_STIMER0_VECTOR,
> >  #endif
> > +	__LOCAL_TIMER_VECTOR,
> > +};
> > +#endif /* !__ASSEMBLY__ */
> >  
> > -#define LOCAL_TIMER_VECTOR		0xec
> > +#ifndef COMPILE_OFFSETS
> > +#include <asm/irqvectors.h>
> > +#endif
> >  
> >  #define NR_VECTORS			 256
> >  
> > --- /dev/null
> > +++ b/arch/x86/kernel/irqvectors/Makefile
> > @@ -0,0 +1,11 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +irqvectors-file	:=
> > arch/$(SRCARCH)/include/generated/asm/irqvectors.h +targets 	+=
> > arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s +
> > +$(irqvectors-file): arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s
> > FORCE
> > +	$(call filechk,offsets,__ASM_IRQVECTORS_H__)
> > +
> > +PHONY += all
> > +all: $(irqvectors-file)
> > +	@:
> > --- /dev/null
> > +++ b/arch/x86/kernel/irqvectors/irqvectors.c
> > @@ -0,0 +1,42 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#define COMPILE_OFFSETS
> > +
> > +#include <linux/kbuild.h>
> > +#include <asm/irq_vectors.h>
> > +
> > +#define VECNR(v)	(0xFF - __##v)
> > +#define VECTOR(v)	DEFINE(v, VECNR(v))
> > +
> > +static void __used common(void)
> > +{
> > +	VECTOR(SPURIOUS_APIC_VECTOR);
> > +	VECTOR(ERROR_APIC_VECTOR);
> > +	VECTOR(RESCHEDULE_VECTOR);
> > +	VECTOR(CALL_FUNCTION_VECTOR);
> > +	VECTOR(CALL_FUNCTION_SINGLE_VECTOR);
> > +	VECTOR(THERMAL_APIC_VECTOR);
> > +	VECTOR(THRESHOLD_APIC_VECTOR);
> > +	VECTOR(REBOOT_VECTOR);
> > +	VECTOR(X86_PLATFORM_IPI_VECTOR);
> > +	VECTOR(IRQ_WORK_VECTOR);
> > +	VECTOR(DEFERRED_ERROR_VECTOR);
> > +
> > +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST)
> > +	VECTOR(HYPERVISOR_CALLBACK_VECTOR);
> > +#endif
> > +
> > +#if IS_ENABLED(CONFIG_KVM)
> > +	/* Vector for KVM to deliver posted interrupt IPI */
> > +	VECTOR(POSTED_INTR_VECTOR);
> > +	VECTOR(POSTED_INTR_WAKEUP_VECTOR);
> > +	VECTOR(POSTED_INTR_NESTED_VECTOR);
> > +#endif
> > +	VECTOR(MANAGED_IRQ_SHUTDOWN_VECTOR);
> > +
> > +#if IS_ENABLED(CONFIG_HYPERV)
> > +	VECTOR(HYPERV_REENLIGHTENMENT_VECTOR);
> > +	VECTOR(HYPERV_STIMER0_VECTOR);
> > +#endif
> > +	VECTOR(LOCAL_TIMER_VECTOR);
> > +}
> > +
> > 
> > 
> >   
> 
> 
> Thanks,
> 
> Jacob


Thanks,

Jacob
Tian, Kevin April 16, 2024, 3:45 a.m. UTC | #6
> From: Sean Christopherson <seanjc@google.com>
> Sent: Friday, April 12, 2024 10:28 PM
> 
> On Fri, Apr 12, 2024, Kevin Tian wrote:
> > > From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > Sent: Saturday, April 6, 2024 6:31 AM
> > >
> > > +/*
> > > + * Posted interrupt notification vector for all device MSIs delivered to
> > > + * the host kernel.
> > > + */
> > > +#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
> > >  #define NR_VECTORS			 256
> > >
> >
> > Every interrupt is kind of a notification.
> 
> FWIW, I find value in having "notification" in the name to differentiate
> between
> the IRQ that is notifying the CPU that there's a posted IRQ to be processed,
> and
> the posted IRQ itself.

IMHO one who knows posted msi doesn't need the extra
'notification' in the name to differentiate.

one who doesn't know what posted msi is anyway needs to
look at the surrounding code including the above comment.
having 'notification' in the name alone doesn't really help.

but I'd not hold strong on this... 
Thomas Gleixner April 19, 2024, 4 a.m. UTC | #7
On Mon, Apr 15 2024 at 13:43, Jacob Pan wrote:
> On Mon, 15 Apr 2024 11:53:58 -0700, Jacob Pan <jacob.jun.pan@linux.intel.com> wrote:
>> On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de> wrote:
>> > If we really care then we do it proper for _all_ of them. Something like
>> > the uncompiled below. There is certainly a smarter way to do the build
>> > thing, but my kbuild foo is rusty.  
>> I too had the concern of the wasting system vectors, but did not know how
>> to fix it. But now your code below works well. Tested without KVM in
>> .config to show the gaps:
>> 
>> In VECTOR IRQ domain.
>> 
>> BEFORE:
>> System: 46: 0-31,50,235-236,244,246-255
>> 
>> AFTER:
>> System: 46: 0-31,50,241-242,245-255
>> 
>> The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a
>> running system.
>> 
>> Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243
>> 
>> POSTED MSI/first system vector moved up from 235 to 241 for this case.
>> 
>> Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it
>> instead of manually copy over each time. Any suggestions greatly
>> appreciated.
>>
> On a second thought, if we make system IRQ vector determined at compile
> time based on different CONFIG options, will it break userspace tools such
> as perf? More importantly the rule of not breaking userspace.

tools/arch/x86/include/asm/irq_vectors.h is only used to generate the
list of system vectors for pretty output. And your change already broke
that.

The obvious solution to that is to expose that list in sysfs for
consumption by perf.

But we don't have to do any of that right away. It's an orthogonal
issue. Just waste the extra system vector to start with and then we can
add the compile time dependend change on top if we really care about
gaining back the vectors.

Thanks,

        tglx
Arnaldo Carvalho de Melo April 19, 2024, 8:07 p.m. UTC | #8
On Fri, Apr 19, 2024 at 06:00:24AM +0200, Thomas Gleixner wrote:
> On Mon, Apr 15 2024 at 13:43, Jacob Pan wrote:
> > On Mon, 15 Apr 2024 11:53:58 -0700, Jacob Pan <jacob.jun.pan@linux.intel.com> wrote:
> >> On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de> wrote:
> >> > If we really care then we do it proper for _all_ of them. Something like
> >> > the uncompiled below. There is certainly a smarter way to do the build
> >> > thing, but my kbuild foo is rusty.  
> >> I too had the concern of the wasting system vectors, but did not know how
> >> to fix it. But now your code below works well. Tested without KVM in
> >> .config to show the gaps:
> >> 
> >> In VECTOR IRQ domain.
> >> 
> >> BEFORE:
> >> System: 46: 0-31,50,235-236,244,246-255
> >> 
> >> AFTER:
> >> System: 46: 0-31,50,241-242,245-255
> >> 
> >> The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a
> >> running system.
> >> 
> >> Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243
> >> 
> >> POSTED MSI/first system vector moved up from 235 to 241 for this case.
> >> 
> >> Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it
> >> instead of manually copy over each time. Any suggestions greatly
> >> appreciated.
> >>
> > On a second thought, if we make system IRQ vector determined at compile
> > time based on different CONFIG options, will it break userspace tools such
> > as perf? More importantly the rule of not breaking userspace.

The rule for tools/perf is "don't impose _any requirement_ on the kernel
developers, they don't have to test if any change they do outside of
tools/ will break something inside tools/."
 
> tools/arch/x86/include/asm/irq_vectors.h is only used to generate the
> list of system vectors for pretty output. And your change already broke
> that.

Yeah, I even moved that from tools/arch/x86/include/asm/irq_vectors.h
to tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h (for next
merge window).

Having it in tools/arch/x86/include/asm/irq_vectors.h was a bad decision
as it, as you mentinoned, is only used to generate string tables:

⬢[acme@toolbox perf-tools-next]$ tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh 
static const char *x86_irq_vectors[] = {
	[0x02] = "NMI",
	[0x80] = "IA32_SYSCALL",
	[0xec] = "LOCAL_TIMER",
	[0xed] = "HYPERV_STIMER0",
	[0xee] = "HYPERV_REENLIGHTENMENT",
	[0xef] = "MANAGED_IRQ_SHUTDOWN",
	[0xf0] = "POSTED_INTR_NESTED",
	[0xf1] = "POSTED_INTR_WAKEUP",
	[0xf2] = "POSTED_INTR",
	[0xf3] = "HYPERVISOR_CALLBACK",
	[0xf4] = "DEFERRED_ERROR",
	[0xf6] = "IRQ_WORK",
	[0xf7] = "X86_PLATFORM_IPI",
	[0xf8] = "REBOOT",
	[0xf9] = "THRESHOLD_APIC",
	[0xfa] = "THERMAL_APIC",
	[0xfb] = "CALL_FUNCTION_SINGLE",
	[0xfc] = "CALL_FUNCTION",
	[0xfd] = "RESCHEDULE",
	[0xfe] = "ERROR_APIC",
	[0xff] = "SPURIOUS_APIC",
};

⬢[acme@toolbox perf-tools-next]$

Used in:

root@number:~# perf trace -a -e irq_vectors:irq_work_entry/max-stack=32/ --max-events=1
     0.000 kworker/u57:0-/9912 irq_vectors:irq_work_entry(vector: IRQ_WORK)
                                       __sysvec_irq_work ([kernel.kallsyms])
                                       __sysvec_irq_work ([kernel.kallsyms])
                                       sysvec_irq_work ([kernel.kallsyms])
                                       asm_sysvec_irq_work ([kernel.kallsyms])
                                       _raw_spin_unlock_irqrestore ([kernel.kallsyms])
                                       dma_fence_wait_timeout ([kernel.kallsyms])
                                       intel_atomic_commit_tail ([kernel.kallsyms])
                                       process_one_work ([kernel.kallsyms])
                                       worker_thread ([kernel.kallsyms])
                                       kthread ([kernel.kallsyms])
                                       ret_from_fork ([kernel.kallsyms])
                                       ret_from_fork_asm ([kernel.kallsyms])
root@number:~#

But as the original cset introducing this explains, these irq_vectors:
tracepoins operate on just one of the vectors, so irq_work_entry(vector:
IRQ_WORK), irq_vectors:reschedule_exit(vector: RESCHEDULE), etc. 

> The obvious solution to that is to expose that list in sysfs for
> consumption by perf.

nah, the best thing these days is stop using 'int' for vector and use
'enum irq_vector', then since we have BTF we can use that to do the enum
-> string translation, like with (using /sys/kernel/btf/vmlinux, that is
pretty much available everywhere these days):

root@number:~# pahole clocksource_ids
enum clocksource_ids {
	CSID_GENERIC          = 0,
	CSID_ARM_ARCH_COUNTER = 1,
	CSID_MAX              = 2,
};

root@number:~# pahole skb_drop_reason | head
enum skb_drop_reason {
	SKB_NOT_DROPPED_YET                     = 0,
	SKB_CONSUMED                            = 1,
	SKB_DROP_REASON_NOT_SPECIFIED           = 2,
	SKB_DROP_REASON_NO_SOCKET               = 3,
	SKB_DROP_REASON_PKT_TOO_SMALL           = 4,
	SKB_DROP_REASON_TCP_CSUM                = 5,
	SKB_DROP_REASON_SOCKET_FILTER           = 6,
	SKB_DROP_REASON_UDP_CSUM                = 7,
	SKB_DROP_REASON_NETFILTER_DROP          = 8,
root@number:~#

Then its easy to go from 0 to CSID_GENERIC, etc.

⬢[acme@toolbox pahole]$ perf stat -e cycles pahole skb_drop_reason > /dev/null

 Performance counter stats for 'pahole skb_drop_reason':

         6,095,427      cpu_atom/cycles:u/                                                      (2.82%)
       103,694,633      cpu_core/cycles:u/                                                      (97.18%)

       0.039031759 seconds time elapsed

       0.016028000 seconds user
       0.023007000 seconds sys


⬢[acme@toolbox pahole]$

- Arnaldo
 
> But we don't have to do any of that right away. It's an orthogonal
> issue. Just waste the extra system vector to start with and then we can
> add the compile time dependend change on top if we really care about
> gaining back the vectors.
> 
> Thanks,
> 
>         tglx
Jacob Pan April 22, 2024, 10:32 p.m. UTC | #9
Hi Arnaldo,

On Fri, 19 Apr 2024 17:07:17 -0300, Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:

> > > On a second thought, if we make system IRQ vector determined at
> > > compile time based on different CONFIG options, will it break
> > > userspace tools such as perf? More importantly the rule of not
> > > breaking userspace.  
> 
> The rule for tools/perf is "don't impose _any requirement_ on the kernel
> developers, they don't have to test if any change they do outside of
> tools/ will break something inside tools/."
>  
> > tools/arch/x86/include/asm/irq_vectors.h is only used to generate the
> > list of system vectors for pretty output. And your change already broke
> > that.  
> 
> Yeah, I even moved that from tools/arch/x86/include/asm/irq_vectors.h
> to tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h (for next
> merge window).

So I will not add anything to the tools directory for my next version.
Just a heads-up for adding this new vector.

Thanks,

Jacob
diff mbox series

Patch

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index d18bfb238f66..1ee00be8218d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -97,9 +97,16 @@ 
 
 #define LOCAL_TIMER_VECTOR		0xec
 
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
 #define NR_VECTORS			 256
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_X86_POSTED_MSI
+#define FIRST_SYSTEM_VECTOR		POSTED_MSI_NOTIFICATION_VECTOR
+#elif defined(CONFIG_X86_LOCAL_APIC)
 #define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
 #else
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index 3f73ac3ed3a0..989816ca7c9e 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -99,9 +99,16 @@ 
 
 #define LOCAL_TIMER_VECTOR		0xec
 
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
 #define NR_VECTORS			 256
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_X86_POSTED_MSI
+#define FIRST_SYSTEM_VECTOR		POSTED_MSI_NOTIFICATION_VECTOR
+#elif defined(CONFIG_X86_LOCAL_APIC)
 #define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
 #else
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS