Message ID | 20240405223110.1609888-6-jacob.jun.pan@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Coalesced Interrupt Delivery with posted MSI | expand |
On Fri, Apr 05 2024 at 15:31, Jacob Pan wrote: > diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h > index d18bfb238f66..1ee00be8218d 100644 > --- a/arch/x86/include/asm/irq_vectors.h > +++ b/arch/x86/include/asm/irq_vectors.h > @@ -97,9 +97,16 @@ > > #define LOCAL_TIMER_VECTOR 0xec > > +/* > + * Posted interrupt notification vector for all device MSIs delivered to > + * the host kernel. > + */ > +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > #define NR_VECTORS 256 > > -#ifdef CONFIG_X86_LOCAL_APIC > +#ifdef CONFIG_X86_POSTED_MSI > +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR > +#elif defined(CONFIG_X86_LOCAL_APIC) > #define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR > #else > #define FIRST_SYSTEM_VECTOR NR_VECTORS This is horrible and we had attempts before to make the system vector space dense. They all did not work and making an exception for this is not what we want. If we really care then we do it proper for _all_ of them. Something like the uncompiled below. There is certainly a smarter way to do the build thing, but my kbuild foo is rusty. Thanks, tglx --- --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -245,6 +245,7 @@ archscripts: scripts_basic archheaders: $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all + $(Q)$(MAKE) $(build)=arch/x86/kernel/irqvectors all ### # Kernel objects --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -43,59 +43,46 @@ */ #define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) +#ifndef __ASSEMBLY__ /* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. + * Special IRQ vectors used by the SMP architecture, 0xff and downwards */ +enum { + __SPURIOUS_APIC_VECTOR, + __ERROR_APIC_VECTOR, + __RESCHEDULE_VECTOR, + __CALL_FUNCTION_VECTOR, + __CALL_FUNCTION_SINGLE_VECTOR, + __THERMAL_APIC_VECTOR, + __THRESHOLD_APIC_VECTOR, + __REBOOT_VECTOR, + __X86_PLATFORM_IPI_VECTOR, + __IRQ_WORK_VECTOR, + __DEFERRED_ERROR_VECTOR, -#define SPURIOUS_APIC_VECTOR 0xff -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST) + __HYPERVISOR_CALLBACK_VECTOR, #endif -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -#define CALL_FUNCTION_SINGLE_VECTOR 0xfb -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -#define REBOOT_VECTOR 0xf8 - -/* - * Generic system vector for platform specific use - */ -#define X86_PLATFORM_IPI_VECTOR 0xf7 - -/* - * IRQ work vector: - */ -#define IRQ_WORK_VECTOR 0xf6 - -/* 0xf5 - unused, was UV_BAU_MESSAGE */ -#define DEFERRED_ERROR_VECTOR 0xf4 - -/* Vector on which hypervisor callbacks will be delivered */ -#define HYPERVISOR_CALLBACK_VECTOR 0xf3 - -/* Vector for KVM to deliver posted interrupt IPI */ -#define POSTED_INTR_VECTOR 0xf2 -#define POSTED_INTR_WAKEUP_VECTOR 0xf1 -#define POSTED_INTR_NESTED_VECTOR 0xf0 - -#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef +#if IS_ENABLED(CONFIG_KVM) + /* Vector for KVM to deliver posted interrupt IPI */ + __POSTED_INTR_VECTOR, + __POSTED_INTR_WAKEUP_VECTOR, + __POSTED_INTR_NESTED_VECTOR, +#endif + __MANAGED_IRQ_SHUTDOWN_VECTOR, #if IS_ENABLED(CONFIG_HYPERV) -#define HYPERV_REENLIGHTENMENT_VECTOR 0xee -#define HYPERV_STIMER0_VECTOR 0xed + __HYPERV_REENLIGHTENMENT_VECTOR, + __HYPERV_STIMER0_VECTOR, #endif + __LOCAL_TIMER_VECTOR, +}; +#endif /* !__ASSEMBLY__ */ -#define LOCAL_TIMER_VECTOR 0xec +#ifndef COMPILE_OFFSETS +#include <asm/irqvectors.h> +#endif #define NR_VECTORS 256 --- /dev/null +++ b/arch/x86/kernel/irqvectors/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +irqvectors-file := arch/$(SRCARCH)/include/generated/asm/irqvectors.h +targets += arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s + +$(irqvectors-file): arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s FORCE + $(call filechk,offsets,__ASM_IRQVECTORS_H__) + +PHONY += all +all: $(irqvectors-file) + @: --- /dev/null +++ b/arch/x86/kernel/irqvectors/irqvectors.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#define COMPILE_OFFSETS + +#include <linux/kbuild.h> +#include <asm/irq_vectors.h> + +#define VECNR(v) (0xFF - __##v) +#define VECTOR(v) DEFINE(v, VECNR(v)) + +static void __used common(void) +{ + VECTOR(SPURIOUS_APIC_VECTOR); + VECTOR(ERROR_APIC_VECTOR); + VECTOR(RESCHEDULE_VECTOR); + VECTOR(CALL_FUNCTION_VECTOR); + VECTOR(CALL_FUNCTION_SINGLE_VECTOR); + VECTOR(THERMAL_APIC_VECTOR); + VECTOR(THRESHOLD_APIC_VECTOR); + VECTOR(REBOOT_VECTOR); + VECTOR(X86_PLATFORM_IPI_VECTOR); + VECTOR(IRQ_WORK_VECTOR); + VECTOR(DEFERRED_ERROR_VECTOR); + +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST) + VECTOR(HYPERVISOR_CALLBACK_VECTOR); +#endif + +#if IS_ENABLED(CONFIG_KVM) + /* Vector for KVM to deliver posted interrupt IPI */ + VECTOR(POSTED_INTR_VECTOR); + VECTOR(POSTED_INTR_WAKEUP_VECTOR); + VECTOR(POSTED_INTR_NESTED_VECTOR); +#endif + VECTOR(MANAGED_IRQ_SHUTDOWN_VECTOR); + +#if IS_ENABLED(CONFIG_HYPERV) + VECTOR(HYPERV_REENLIGHTENMENT_VECTOR); + VECTOR(HYPERV_STIMER0_VECTOR); +#endif + VECTOR(LOCAL_TIMER_VECTOR); +} +
> From: Jacob Pan <jacob.jun.pan@linux.intel.com> > Sent: Saturday, April 6, 2024 6:31 AM > > +/* > + * Posted interrupt notification vector for all device MSIs delivered to > + * the host kernel. > + */ > +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > #define NR_VECTORS 256 > Every interrupt is kind of a notification. Just call it POSTED_MSI_VECTOR
On Fri, Apr 12, 2024, Kevin Tian wrote: > > From: Jacob Pan <jacob.jun.pan@linux.intel.com> > > Sent: Saturday, April 6, 2024 6:31 AM > > > > +/* > > + * Posted interrupt notification vector for all device MSIs delivered to > > + * the host kernel. > > + */ > > +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > > #define NR_VECTORS 256 > > > > Every interrupt is kind of a notification. FWIW, I find value in having "notification" in the name to differentiate between the IRQ that is notifying the CPU that there's a posted IRQ to be processed, and the posted IRQ itself.
Hi Thomas, On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de> wrote: > On Fri, Apr 05 2024 at 15:31, Jacob Pan wrote: > > diff --git a/arch/x86/include/asm/irq_vectors.h > > b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..1ee00be8218d > > 100644 --- a/arch/x86/include/asm/irq_vectors.h > > +++ b/arch/x86/include/asm/irq_vectors.h > > @@ -97,9 +97,16 @@ > > > > #define LOCAL_TIMER_VECTOR 0xec > > > > +/* > > + * Posted interrupt notification vector for all device MSIs delivered > > to > > + * the host kernel. > > + */ > > +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > > #define NR_VECTORS 256 > > > > -#ifdef CONFIG_X86_LOCAL_APIC > > +#ifdef CONFIG_X86_POSTED_MSI > > +#define FIRST_SYSTEM_VECTOR > > POSTED_MSI_NOTIFICATION_VECTOR +#elif defined(CONFIG_X86_LOCAL_APIC) > > #define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR > > #else > > #define FIRST_SYSTEM_VECTOR NR_VECTORS > > This is horrible and we had attempts before to make the system vector > space dense. They all did not work and making an exception for this is > not what we want. > > If we really care then we do it proper for _all_ of them. Something like > the uncompiled below. There is certainly a smarter way to do the build > thing, but my kbuild foo is rusty. I too had the concern of the wasting system vectors, but did not know how to fix it. But now your code below works well. Tested without KVM in .config to show the gaps: In VECTOR IRQ domain. BEFORE: System: 46: 0-31,50,235-236,244,246-255 AFTER: System: 46: 0-31,50,241-242,245-255 The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a running system. Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243 POSTED MSI/first system vector moved up from 235 to 241 for this case. Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it instead of manually copy over each time. Any suggestions greatly appreciated. > --- > --- a/arch/x86/Makefile > +++ b/arch/x86/Makefile > @@ -245,6 +245,7 @@ archscripts: scripts_basic > > archheaders: > $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all > + $(Q)$(MAKE) $(build)=arch/x86/kernel/irqvectors all > > ### > # Kernel objects > --- a/arch/x86/include/asm/irq_vectors.h > +++ b/arch/x86/include/asm/irq_vectors.h > @@ -43,59 +43,46 @@ > */ > #define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + > 16) & ~15) + irq) > +#ifndef __ASSEMBLY__ > /* > - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff > - * > - * some of the following vectors are 'rare', they are merged > - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. > - * TLB, reschedule and local APIC vectors are performance-critical. > + * Special IRQ vectors used by the SMP architecture, 0xff and downwards > */ > +enum { > + __SPURIOUS_APIC_VECTOR, > + __ERROR_APIC_VECTOR, > + __RESCHEDULE_VECTOR, > + __CALL_FUNCTION_VECTOR, > + __CALL_FUNCTION_SINGLE_VECTOR, > + __THERMAL_APIC_VECTOR, > + __THRESHOLD_APIC_VECTOR, > + __REBOOT_VECTOR, > + __X86_PLATFORM_IPI_VECTOR, > + __IRQ_WORK_VECTOR, > + __DEFERRED_ERROR_VECTOR, > > -#define SPURIOUS_APIC_VECTOR 0xff > -/* > - * Sanity check > - */ > -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) > -# error SPURIOUS_APIC_VECTOR definition error > +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST) > + __HYPERVISOR_CALLBACK_VECTOR, > #endif > > -#define ERROR_APIC_VECTOR 0xfe > -#define RESCHEDULE_VECTOR 0xfd > -#define CALL_FUNCTION_VECTOR 0xfc > -#define CALL_FUNCTION_SINGLE_VECTOR 0xfb > -#define THERMAL_APIC_VECTOR 0xfa > -#define THRESHOLD_APIC_VECTOR 0xf9 > -#define REBOOT_VECTOR 0xf8 > - > -/* > - * Generic system vector for platform specific use > - */ > -#define X86_PLATFORM_IPI_VECTOR 0xf7 > - > -/* > - * IRQ work vector: > - */ > -#define IRQ_WORK_VECTOR 0xf6 > - > -/* 0xf5 - unused, was UV_BAU_MESSAGE */ > -#define DEFERRED_ERROR_VECTOR 0xf4 > - > -/* Vector on which hypervisor callbacks will be delivered */ > -#define HYPERVISOR_CALLBACK_VECTOR 0xf3 > - > -/* Vector for KVM to deliver posted interrupt IPI */ > -#define POSTED_INTR_VECTOR 0xf2 > -#define POSTED_INTR_WAKEUP_VECTOR 0xf1 > -#define POSTED_INTR_NESTED_VECTOR 0xf0 > - > -#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef > +#if IS_ENABLED(CONFIG_KVM) > + /* Vector for KVM to deliver posted interrupt IPI */ > + __POSTED_INTR_VECTOR, > + __POSTED_INTR_WAKEUP_VECTOR, > + __POSTED_INTR_NESTED_VECTOR, > +#endif > + __MANAGED_IRQ_SHUTDOWN_VECTOR, > > #if IS_ENABLED(CONFIG_HYPERV) > -#define HYPERV_REENLIGHTENMENT_VECTOR 0xee > -#define HYPERV_STIMER0_VECTOR 0xed > + __HYPERV_REENLIGHTENMENT_VECTOR, > + __HYPERV_STIMER0_VECTOR, > #endif > + __LOCAL_TIMER_VECTOR, > +}; > +#endif /* !__ASSEMBLY__ */ > > -#define LOCAL_TIMER_VECTOR 0xec > +#ifndef COMPILE_OFFSETS > +#include <asm/irqvectors.h> > +#endif > > #define NR_VECTORS 256 > > --- /dev/null > +++ b/arch/x86/kernel/irqvectors/Makefile > @@ -0,0 +1,11 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +irqvectors-file := > arch/$(SRCARCH)/include/generated/asm/irqvectors.h +targets += > arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s + > +$(irqvectors-file): arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s FORCE > + $(call filechk,offsets,__ASM_IRQVECTORS_H__) > + > +PHONY += all > +all: $(irqvectors-file) > + @: > --- /dev/null > +++ b/arch/x86/kernel/irqvectors/irqvectors.c > @@ -0,0 +1,42 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#define COMPILE_OFFSETS > + > +#include <linux/kbuild.h> > +#include <asm/irq_vectors.h> > + > +#define VECNR(v) (0xFF - __##v) > +#define VECTOR(v) DEFINE(v, VECNR(v)) > + > +static void __used common(void) > +{ > + VECTOR(SPURIOUS_APIC_VECTOR); > + VECTOR(ERROR_APIC_VECTOR); > + VECTOR(RESCHEDULE_VECTOR); > + VECTOR(CALL_FUNCTION_VECTOR); > + VECTOR(CALL_FUNCTION_SINGLE_VECTOR); > + VECTOR(THERMAL_APIC_VECTOR); > + VECTOR(THRESHOLD_APIC_VECTOR); > + VECTOR(REBOOT_VECTOR); > + VECTOR(X86_PLATFORM_IPI_VECTOR); > + VECTOR(IRQ_WORK_VECTOR); > + VECTOR(DEFERRED_ERROR_VECTOR); > + > +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST) > + VECTOR(HYPERVISOR_CALLBACK_VECTOR); > +#endif > + > +#if IS_ENABLED(CONFIG_KVM) > + /* Vector for KVM to deliver posted interrupt IPI */ > + VECTOR(POSTED_INTR_VECTOR); > + VECTOR(POSTED_INTR_WAKEUP_VECTOR); > + VECTOR(POSTED_INTR_NESTED_VECTOR); > +#endif > + VECTOR(MANAGED_IRQ_SHUTDOWN_VECTOR); > + > +#if IS_ENABLED(CONFIG_HYPERV) > + VECTOR(HYPERV_REENLIGHTENMENT_VECTOR); > + VECTOR(HYPERV_STIMER0_VECTOR); > +#endif > + VECTOR(LOCAL_TIMER_VECTOR); > +} > + > > > Thanks, Jacob
On Mon, 15 Apr 2024 11:53:58 -0700, Jacob Pan <jacob.jun.pan@linux.intel.com> wrote: > Hi Thomas, > > On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de> > wrote: > > > On Fri, Apr 05 2024 at 15:31, Jacob Pan wrote: > > > diff --git a/arch/x86/include/asm/irq_vectors.h > > > b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..1ee00be8218d > > > 100644 --- a/arch/x86/include/asm/irq_vectors.h > > > +++ b/arch/x86/include/asm/irq_vectors.h > > > @@ -97,9 +97,16 @@ > > > > > > #define LOCAL_TIMER_VECTOR 0xec > > > > > > +/* > > > + * Posted interrupt notification vector for all device MSIs delivered > > > to > > > + * the host kernel. > > > + */ > > > +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > > > #define NR_VECTORS 256 > > > > > > -#ifdef CONFIG_X86_LOCAL_APIC > > > +#ifdef CONFIG_X86_POSTED_MSI > > > +#define FIRST_SYSTEM_VECTOR > > > POSTED_MSI_NOTIFICATION_VECTOR +#elif defined(CONFIG_X86_LOCAL_APIC) > > > #define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR > > > #else > > > #define FIRST_SYSTEM_VECTOR NR_VECTORS > > > > This is horrible and we had attempts before to make the system vector > > space dense. They all did not work and making an exception for this is > > not what we want. > > > > If we really care then we do it proper for _all_ of them. Something like > > the uncompiled below. There is certainly a smarter way to do the build > > thing, but my kbuild foo is rusty. > I too had the concern of the wasting system vectors, but did not know how > to fix it. But now your code below works well. Tested without KVM in > .config to show the gaps: > > In VECTOR IRQ domain. > > BEFORE: > System: 46: 0-31,50,235-236,244,246-255 > > AFTER: > System: 46: 0-31,50,241-242,245-255 > > The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a > running system. > > Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243 > > POSTED MSI/first system vector moved up from 235 to 241 for this case. > > Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it > instead of manually copy over each time. Any suggestions greatly > appreciated. > On a second thought, if we make system IRQ vector determined at compile time based on different CONFIG options, will it break userspace tools such as perf? More importantly the rule of not breaking userspace. +Arnaldo > > --- > > --- a/arch/x86/Makefile > > +++ b/arch/x86/Makefile > > @@ -245,6 +245,7 @@ archscripts: scripts_basic > > > > archheaders: > > $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all > > + $(Q)$(MAKE) $(build)=arch/x86/kernel/irqvectors all > > > > ### > > # Kernel objects > > --- a/arch/x86/include/asm/irq_vectors.h > > +++ b/arch/x86/include/asm/irq_vectors.h > > @@ -43,59 +43,46 @@ > > */ > > #define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + > > 16) & ~15) + irq) > > +#ifndef __ASSEMBLY__ > > /* > > - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff > > - * > > - * some of the following vectors are 'rare', they are merged > > - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. > > - * TLB, reschedule and local APIC vectors are performance-critical. > > + * Special IRQ vectors used by the SMP architecture, 0xff and downwards > > */ > > +enum { > > + __SPURIOUS_APIC_VECTOR, > > + __ERROR_APIC_VECTOR, > > + __RESCHEDULE_VECTOR, > > + __CALL_FUNCTION_VECTOR, > > + __CALL_FUNCTION_SINGLE_VECTOR, > > + __THERMAL_APIC_VECTOR, > > + __THRESHOLD_APIC_VECTOR, > > + __REBOOT_VECTOR, > > + __X86_PLATFORM_IPI_VECTOR, > > + __IRQ_WORK_VECTOR, > > + __DEFERRED_ERROR_VECTOR, > > > > -#define SPURIOUS_APIC_VECTOR 0xff > > -/* > > - * Sanity check > > - */ > > -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) > > -# error SPURIOUS_APIC_VECTOR definition error > > +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST) > > + __HYPERVISOR_CALLBACK_VECTOR, > > #endif > > > > -#define ERROR_APIC_VECTOR 0xfe > > -#define RESCHEDULE_VECTOR 0xfd > > -#define CALL_FUNCTION_VECTOR 0xfc > > -#define CALL_FUNCTION_SINGLE_VECTOR 0xfb > > -#define THERMAL_APIC_VECTOR 0xfa > > -#define THRESHOLD_APIC_VECTOR 0xf9 > > -#define REBOOT_VECTOR 0xf8 > > - > > -/* > > - * Generic system vector for platform specific use > > - */ > > -#define X86_PLATFORM_IPI_VECTOR 0xf7 > > - > > -/* > > - * IRQ work vector: > > - */ > > -#define IRQ_WORK_VECTOR 0xf6 > > - > > -/* 0xf5 - unused, was UV_BAU_MESSAGE */ > > -#define DEFERRED_ERROR_VECTOR 0xf4 > > - > > -/* Vector on which hypervisor callbacks will be delivered */ > > -#define HYPERVISOR_CALLBACK_VECTOR 0xf3 > > - > > -/* Vector for KVM to deliver posted interrupt IPI */ > > -#define POSTED_INTR_VECTOR 0xf2 > > -#define POSTED_INTR_WAKEUP_VECTOR 0xf1 > > -#define POSTED_INTR_NESTED_VECTOR 0xf0 > > - > > -#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef > > +#if IS_ENABLED(CONFIG_KVM) > > + /* Vector for KVM to deliver posted interrupt IPI */ > > + __POSTED_INTR_VECTOR, > > + __POSTED_INTR_WAKEUP_VECTOR, > > + __POSTED_INTR_NESTED_VECTOR, > > +#endif > > + __MANAGED_IRQ_SHUTDOWN_VECTOR, > > > > #if IS_ENABLED(CONFIG_HYPERV) > > -#define HYPERV_REENLIGHTENMENT_VECTOR 0xee > > -#define HYPERV_STIMER0_VECTOR 0xed > > + __HYPERV_REENLIGHTENMENT_VECTOR, > > + __HYPERV_STIMER0_VECTOR, > > #endif > > + __LOCAL_TIMER_VECTOR, > > +}; > > +#endif /* !__ASSEMBLY__ */ > > > > -#define LOCAL_TIMER_VECTOR 0xec > > +#ifndef COMPILE_OFFSETS > > +#include <asm/irqvectors.h> > > +#endif > > > > #define NR_VECTORS 256 > > > > --- /dev/null > > +++ b/arch/x86/kernel/irqvectors/Makefile > > @@ -0,0 +1,11 @@ > > +# SPDX-License-Identifier: GPL-2.0 > > + > > +irqvectors-file := > > arch/$(SRCARCH)/include/generated/asm/irqvectors.h +targets += > > arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s + > > +$(irqvectors-file): arch/$(SRCARCH)/kernel/irqvectors/irqvectors.s > > FORCE > > + $(call filechk,offsets,__ASM_IRQVECTORS_H__) > > + > > +PHONY += all > > +all: $(irqvectors-file) > > + @: > > --- /dev/null > > +++ b/arch/x86/kernel/irqvectors/irqvectors.c > > @@ -0,0 +1,42 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +#define COMPILE_OFFSETS > > + > > +#include <linux/kbuild.h> > > +#include <asm/irq_vectors.h> > > + > > +#define VECNR(v) (0xFF - __##v) > > +#define VECTOR(v) DEFINE(v, VECNR(v)) > > + > > +static void __used common(void) > > +{ > > + VECTOR(SPURIOUS_APIC_VECTOR); > > + VECTOR(ERROR_APIC_VECTOR); > > + VECTOR(RESCHEDULE_VECTOR); > > + VECTOR(CALL_FUNCTION_VECTOR); > > + VECTOR(CALL_FUNCTION_SINGLE_VECTOR); > > + VECTOR(THERMAL_APIC_VECTOR); > > + VECTOR(THRESHOLD_APIC_VECTOR); > > + VECTOR(REBOOT_VECTOR); > > + VECTOR(X86_PLATFORM_IPI_VECTOR); > > + VECTOR(IRQ_WORK_VECTOR); > > + VECTOR(DEFERRED_ERROR_VECTOR); > > + > > +#if IS_ENABLED(CONFIG_HYPERVISOR_GUEST) > > + VECTOR(HYPERVISOR_CALLBACK_VECTOR); > > +#endif > > + > > +#if IS_ENABLED(CONFIG_KVM) > > + /* Vector for KVM to deliver posted interrupt IPI */ > > + VECTOR(POSTED_INTR_VECTOR); > > + VECTOR(POSTED_INTR_WAKEUP_VECTOR); > > + VECTOR(POSTED_INTR_NESTED_VECTOR); > > +#endif > > + VECTOR(MANAGED_IRQ_SHUTDOWN_VECTOR); > > + > > +#if IS_ENABLED(CONFIG_HYPERV) > > + VECTOR(HYPERV_REENLIGHTENMENT_VECTOR); > > + VECTOR(HYPERV_STIMER0_VECTOR); > > +#endif > > + VECTOR(LOCAL_TIMER_VECTOR); > > +} > > + > > > > > > > > > Thanks, > > Jacob Thanks, Jacob
> From: Sean Christopherson <seanjc@google.com> > Sent: Friday, April 12, 2024 10:28 PM > > On Fri, Apr 12, 2024, Kevin Tian wrote: > > > From: Jacob Pan <jacob.jun.pan@linux.intel.com> > > > Sent: Saturday, April 6, 2024 6:31 AM > > > > > > +/* > > > + * Posted interrupt notification vector for all device MSIs delivered to > > > + * the host kernel. > > > + */ > > > +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > > > #define NR_VECTORS 256 > > > > > > > Every interrupt is kind of a notification. > > FWIW, I find value in having "notification" in the name to differentiate > between > the IRQ that is notifying the CPU that there's a posted IRQ to be processed, > and > the posted IRQ itself. IMHO one who knows posted msi doesn't need the extra 'notification' in the name to differentiate. one who doesn't know what posted msi is anyway needs to look at the surrounding code including the above comment. having 'notification' in the name alone doesn't really help. but I'd not hold strong on this...
On Mon, Apr 15 2024 at 13:43, Jacob Pan wrote: > On Mon, 15 Apr 2024 11:53:58 -0700, Jacob Pan <jacob.jun.pan@linux.intel.com> wrote: >> On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de> wrote: >> > If we really care then we do it proper for _all_ of them. Something like >> > the uncompiled below. There is certainly a smarter way to do the build >> > thing, but my kbuild foo is rusty. >> I too had the concern of the wasting system vectors, but did not know how >> to fix it. But now your code below works well. Tested without KVM in >> .config to show the gaps: >> >> In VECTOR IRQ domain. >> >> BEFORE: >> System: 46: 0-31,50,235-236,244,246-255 >> >> AFTER: >> System: 46: 0-31,50,241-242,245-255 >> >> The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a >> running system. >> >> Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243 >> >> POSTED MSI/first system vector moved up from 235 to 241 for this case. >> >> Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it >> instead of manually copy over each time. Any suggestions greatly >> appreciated. >> > On a second thought, if we make system IRQ vector determined at compile > time based on different CONFIG options, will it break userspace tools such > as perf? More importantly the rule of not breaking userspace. tools/arch/x86/include/asm/irq_vectors.h is only used to generate the list of system vectors for pretty output. And your change already broke that. The obvious solution to that is to expose that list in sysfs for consumption by perf. But we don't have to do any of that right away. It's an orthogonal issue. Just waste the extra system vector to start with and then we can add the compile time dependend change on top if we really care about gaining back the vectors. Thanks, tglx
On Fri, Apr 19, 2024 at 06:00:24AM +0200, Thomas Gleixner wrote: > On Mon, Apr 15 2024 at 13:43, Jacob Pan wrote: > > On Mon, 15 Apr 2024 11:53:58 -0700, Jacob Pan <jacob.jun.pan@linux.intel.com> wrote: > >> On Thu, 11 Apr 2024 18:51:14 +0200, Thomas Gleixner <tglx@linutronix.de> wrote: > >> > If we really care then we do it proper for _all_ of them. Something like > >> > the uncompiled below. There is certainly a smarter way to do the build > >> > thing, but my kbuild foo is rusty. > >> I too had the concern of the wasting system vectors, but did not know how > >> to fix it. But now your code below works well. Tested without KVM in > >> .config to show the gaps: > >> > >> In VECTOR IRQ domain. > >> > >> BEFORE: > >> System: 46: 0-31,50,235-236,244,246-255 > >> > >> AFTER: > >> System: 46: 0-31,50,241-242,245-255 > >> > >> The only gap is MANAGED_IRQ_SHUTDOWN_VECTOR(243), which is expected on a > >> running system. > >> > >> Verified in irqvectors.s: .ascii "->MANAGED_IRQ_SHUTDOWN_VECTOR $243 > >> > >> POSTED MSI/first system vector moved up from 235 to 241 for this case. > >> > >> Will try to let tools/arch/x86/include/asm/irq_vectors.h also use it > >> instead of manually copy over each time. Any suggestions greatly > >> appreciated. > >> > > On a second thought, if we make system IRQ vector determined at compile > > time based on different CONFIG options, will it break userspace tools such > > as perf? More importantly the rule of not breaking userspace. The rule for tools/perf is "don't impose _any requirement_ on the kernel developers, they don't have to test if any change they do outside of tools/ will break something inside tools/." > tools/arch/x86/include/asm/irq_vectors.h is only used to generate the > list of system vectors for pretty output. And your change already broke > that. Yeah, I even moved that from tools/arch/x86/include/asm/irq_vectors.h to tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h (for next merge window). Having it in tools/arch/x86/include/asm/irq_vectors.h was a bad decision as it, as you mentinoned, is only used to generate string tables: ⬢[acme@toolbox perf-tools-next]$ tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh static const char *x86_irq_vectors[] = { [0x02] = "NMI", [0x80] = "IA32_SYSCALL", [0xec] = "LOCAL_TIMER", [0xed] = "HYPERV_STIMER0", [0xee] = "HYPERV_REENLIGHTENMENT", [0xef] = "MANAGED_IRQ_SHUTDOWN", [0xf0] = "POSTED_INTR_NESTED", [0xf1] = "POSTED_INTR_WAKEUP", [0xf2] = "POSTED_INTR", [0xf3] = "HYPERVISOR_CALLBACK", [0xf4] = "DEFERRED_ERROR", [0xf6] = "IRQ_WORK", [0xf7] = "X86_PLATFORM_IPI", [0xf8] = "REBOOT", [0xf9] = "THRESHOLD_APIC", [0xfa] = "THERMAL_APIC", [0xfb] = "CALL_FUNCTION_SINGLE", [0xfc] = "CALL_FUNCTION", [0xfd] = "RESCHEDULE", [0xfe] = "ERROR_APIC", [0xff] = "SPURIOUS_APIC", }; ⬢[acme@toolbox perf-tools-next]$ Used in: root@number:~# perf trace -a -e irq_vectors:irq_work_entry/max-stack=32/ --max-events=1 0.000 kworker/u57:0-/9912 irq_vectors:irq_work_entry(vector: IRQ_WORK) __sysvec_irq_work ([kernel.kallsyms]) __sysvec_irq_work ([kernel.kallsyms]) sysvec_irq_work ([kernel.kallsyms]) asm_sysvec_irq_work ([kernel.kallsyms]) _raw_spin_unlock_irqrestore ([kernel.kallsyms]) dma_fence_wait_timeout ([kernel.kallsyms]) intel_atomic_commit_tail ([kernel.kallsyms]) process_one_work ([kernel.kallsyms]) worker_thread ([kernel.kallsyms]) kthread ([kernel.kallsyms]) ret_from_fork ([kernel.kallsyms]) ret_from_fork_asm ([kernel.kallsyms]) root@number:~# But as the original cset introducing this explains, these irq_vectors: tracepoins operate on just one of the vectors, so irq_work_entry(vector: IRQ_WORK), irq_vectors:reschedule_exit(vector: RESCHEDULE), etc. > The obvious solution to that is to expose that list in sysfs for > consumption by perf. nah, the best thing these days is stop using 'int' for vector and use 'enum irq_vector', then since we have BTF we can use that to do the enum -> string translation, like with (using /sys/kernel/btf/vmlinux, that is pretty much available everywhere these days): root@number:~# pahole clocksource_ids enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER = 1, CSID_MAX = 2, }; root@number:~# pahole skb_drop_reason | head enum skb_drop_reason { SKB_NOT_DROPPED_YET = 0, SKB_CONSUMED = 1, SKB_DROP_REASON_NOT_SPECIFIED = 2, SKB_DROP_REASON_NO_SOCKET = 3, SKB_DROP_REASON_PKT_TOO_SMALL = 4, SKB_DROP_REASON_TCP_CSUM = 5, SKB_DROP_REASON_SOCKET_FILTER = 6, SKB_DROP_REASON_UDP_CSUM = 7, SKB_DROP_REASON_NETFILTER_DROP = 8, root@number:~# Then its easy to go from 0 to CSID_GENERIC, etc. ⬢[acme@toolbox pahole]$ perf stat -e cycles pahole skb_drop_reason > /dev/null Performance counter stats for 'pahole skb_drop_reason': 6,095,427 cpu_atom/cycles:u/ (2.82%) 103,694,633 cpu_core/cycles:u/ (97.18%) 0.039031759 seconds time elapsed 0.016028000 seconds user 0.023007000 seconds sys ⬢[acme@toolbox pahole]$ - Arnaldo > But we don't have to do any of that right away. It's an orthogonal > issue. Just waste the extra system vector to start with and then we can > add the compile time dependend change on top if we really care about > gaining back the vectors. > > Thanks, > > tglx
Hi Arnaldo, On Fri, 19 Apr 2024 17:07:17 -0300, Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > > On a second thought, if we make system IRQ vector determined at > > > compile time based on different CONFIG options, will it break > > > userspace tools such as perf? More importantly the rule of not > > > breaking userspace. > > The rule for tools/perf is "don't impose _any requirement_ on the kernel > developers, they don't have to test if any change they do outside of > tools/ will break something inside tools/." > > > tools/arch/x86/include/asm/irq_vectors.h is only used to generate the > > list of system vectors for pretty output. And your change already broke > > that. > > Yeah, I even moved that from tools/arch/x86/include/asm/irq_vectors.h > to tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h (for next > merge window). So I will not add anything to the tools directory for my next version. Just a heads-up for adding this new vector. Thanks, Jacob
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..1ee00be8218d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -97,9 +97,16 @@ #define LOCAL_TIMER_VECTOR 0xec +/* + * Posted interrupt notification vector for all device MSIs delivered to + * the host kernel. + */ +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb #define NR_VECTORS 256 -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef CONFIG_X86_POSTED_MSI +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR +#elif defined(CONFIG_X86_LOCAL_APIC) #define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h index 3f73ac3ed3a0..989816ca7c9e 100644 --- a/tools/arch/x86/include/asm/irq_vectors.h +++ b/tools/arch/x86/include/asm/irq_vectors.h @@ -99,9 +99,16 @@ #define LOCAL_TIMER_VECTOR 0xec +/* + * Posted interrupt notification vector for all device MSIs delivered to + * the host kernel. + */ +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb #define NR_VECTORS 256 -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef CONFIG_X86_POSTED_MSI +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR +#elif defined(CONFIG_X86_LOCAL_APIC) #define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS
When posted MSI is enabled, all device MSIs are multiplexed into a single notification vector. MSI handlers will be de-multiplexed at run-time by system software without IDT delivery. Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com> --- v2: - Add missing CONFIG_ in #ifdef - Extend changes to x86 tools --- arch/x86/include/asm/irq_vectors.h | 9 ++++++++- tools/arch/x86/include/asm/irq_vectors.h | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-)