diff mbox series

[09/13] x86/irq: Add x86_non_ir_cpumask

Message ID 20201005152856.974112-9-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series Fix per-domain IRQ affinity, allow >255 CPUs on x86 without IRQ remapping | expand

Commit Message

David Woodhouse Oct. 5, 2020, 3:28 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

This is the mask of CPUs to which IRQs can be delivered without interrupt
remapping.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/mpspec.h  |  1 +
 arch/x86/kernel/apic/apic.c    | 12 ++++++++++++
 arch/x86/kernel/apic/io_apic.c |  2 ++
 3 files changed, 15 insertions(+)

Comments

Thomas Gleixner Oct. 6, 2020, 9:42 p.m. UTC | #1
On Mon, Oct 05 2020 at 16:28, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
>
> This is the mask of CPUs to which IRQs can be delivered without interrupt
> remapping.
>  
> +/* Mask of CPUs which can be targeted by non-remapped interrupts. */
> +cpumask_t x86_non_ir_cpumask = { CPU_BITS_ALL };

What?

>  #ifdef CONFIG_X86_32
>  
>  /*
> @@ -1838,6 +1841,7 @@ static __init void x2apic_enable(void)
>  static __init void try_to_enable_x2apic(int remap_mode)
>  {
>  	u32 apic_limit = 0;
> +	int i;
>  
>  	if (x2apic_state == X2APIC_DISABLED)
>  		return;
> @@ -1880,6 +1884,14 @@ static __init void try_to_enable_x2apic(int remap_mode)
>  	if (apic_limit)
>  		x2apic_set_max_apicid(apic_limit);
>  
> +	/* Build the affinity mask for interrupts that can't be remapped. */
> +	cpumask_clear(&x86_non_ir_cpumask);
> +	i = min_t(unsigned int, num_possible_cpus() - 1, apic_limit);
> +	for ( ; i >= 0; i--) {
> +		if (cpu_physical_id(i) <= apic_limit)
> +			cpumask_set_cpu(i, &x86_non_ir_cpumask);
> +	}

Blink. If the APIC id is not linear with the cpu numbers then this
results in a reduced addressable set of CPUs. WHY?

> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> index aa9a3b54a96c..4d0ef46fedb9 100644
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -2098,6 +2098,8 @@ static int mp_alloc_timer_irq(int ioapic, int pin)
>  		struct irq_alloc_info info;
>  
>  		ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
> +		if (domain->parent == x86_vector_domain)
> +			info.mask = &x86_non_ir_cpumask;

We are not going to sprinkle such domain checks all over the
place. Again, the mask is a property of the interrupt domain.

Thanks,

        tglx
David Woodhouse Oct. 7, 2020, 7:25 a.m. UTC | #2
On Tue, 2020-10-06 at 23:42 +0200, Thomas Gleixner wrote:
> On Mon, Oct 05 2020 at 16:28, David Woodhouse wrote:
> > From: David Woodhouse <dwmw@amazon.co.uk>
> > 
> > This is the mask of CPUs to which IRQs can be delivered without
> > interrupt
> > remapping.
> >  
> > +/* Mask of CPUs which can be targeted by non-remapped interrupts.
> > */
> > +cpumask_t x86_non_ir_cpumask = { CPU_BITS_ALL };
> 
> What?

By default, if we didn't hit any limits, all CPUs can be targeted by
external interrupts. It's the default today.

Or at least we pretend it is, modulo the bugs :)

> >  #ifdef CONFIG_X86_32
> >  
> >  /*
> > @@ -1838,6 +1841,7 @@ static __init void x2apic_enable(void)
> >  static __init void try_to_enable_x2apic(int remap_mode)
> >  {
> >  	u32 apic_limit = 0;
> > +	int i;
> >  
> >  	if (x2apic_state == X2APIC_DISABLED)
> >  		return;
> > @@ -1880,6 +1884,14 @@ static __init void try_to_enable_x2apic(int remap_mode)
> >  	if (apic_limit)
> >  		x2apic_set_max_apicid(apic_limit);
> >  
> > +	/* Build the affinity mask for interrupts that can't be remapped. */
> > +	cpumask_clear(&x86_non_ir_cpumask);
> > +	i = min_t(unsigned int, num_possible_cpus() - 1, apic_limit);
> > +	for ( ; i >= 0; i--) {
> > +		if (cpu_physical_id(i) <= apic_limit)
> > +			cpumask_set_cpu(i, &x86_non_ir_cpumask);
> > +	}
> 
> Blink. If the APIC id is not linear with the cpu numbers then this
> results in a reduced addressable set of CPUs. WHY?

Hm, good question. That loop was cargo-culted from hyperv-iommu.c;
perhaps it makes more sense there because Hyper-V really does promise
that linearity, or perhaps it was already buggy. Will fix.

In fact, in apic.c I could probably just use the cpuid_to_apicid array
which is right there in the file.

> > diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> > index aa9a3b54a96c..4d0ef46fedb9 100644
> > --- a/arch/x86/kernel/apic/io_apic.c
> > +++ b/arch/x86/kernel/apic/io_apic.c
> > @@ -2098,6 +2098,8 @@ static int mp_alloc_timer_irq(int ioapic, int pin)
> >  		struct irq_alloc_info info;
> >  
> >  		ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
> > +		if (domain->parent == x86_vector_domain)
> > +			info.mask = &x86_non_ir_cpumask;
> 
> We are not going to sprinkle such domain checks all over the
> place. Again, the mask is a property of the interrupt domain.

Yeah, that's a hangover from the first attempts which I forgot to
delete.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 25ee8ca0a1f2..b2090be5b444 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -141,5 +141,6 @@  static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
 #define PHYSID_MASK_NONE	{ {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
 
 extern physid_mask_t phys_cpu_present_map;
+extern cpumask_t x86_non_ir_cpumask;
 
 #endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 459c78558f36..069f5e9f1d28 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -103,6 +103,9 @@  EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 
+/* Mask of CPUs which can be targeted by non-remapped interrupts. */
+cpumask_t x86_non_ir_cpumask = { CPU_BITS_ALL };
+
 #ifdef CONFIG_X86_32
 
 /*
@@ -1838,6 +1841,7 @@  static __init void x2apic_enable(void)
 static __init void try_to_enable_x2apic(int remap_mode)
 {
 	u32 apic_limit = 0;
+	int i;
 
 	if (x2apic_state == X2APIC_DISABLED)
 		return;
@@ -1880,6 +1884,14 @@  static __init void try_to_enable_x2apic(int remap_mode)
 	if (apic_limit)
 		x2apic_set_max_apicid(apic_limit);
 
+	/* Build the affinity mask for interrupts that can't be remapped. */
+	cpumask_clear(&x86_non_ir_cpumask);
+	i = min_t(unsigned int, num_possible_cpus() - 1, apic_limit);
+	for ( ; i >= 0; i--) {
+		if (cpu_physical_id(i) <= apic_limit)
+			cpumask_set_cpu(i, &x86_non_ir_cpumask);
+	}
+
 	x2apic_enable();
 }
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index aa9a3b54a96c..4d0ef46fedb9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2098,6 +2098,8 @@  static int mp_alloc_timer_irq(int ioapic, int pin)
 		struct irq_alloc_info info;
 
 		ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
+		if (domain->parent == x86_vector_domain)
+			info.mask = &x86_non_ir_cpumask;
 		info.devid = mpc_ioapic_id(ioapic);
 		info.ioapic.pin = pin;
 		mutex_lock(&ioapic_mutex);