diff mbox series

[v3,04/15] x86/IRQ: desc->affinity should strictly represent the requested value

Message ID 5CDE910E0200007800230072@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series x86: IRQ management adjustments | expand

Commit Message

Jan Beulich May 17, 2019, 10:46 a.m. UTC
desc->arch.cpu_mask reflects the actual set of target CPUs. Don't ever
fiddle with desc->affinity itself, except to store caller requested
values. Note that assign_irq_vector() now takes a NULL incoming CPU mask
to mean "all CPUs" now, rather than just "all currently online CPUs".
This way no further affinity adjustment is needed after onlining further
CPUs.

This renders both set_native_irq_info() uses (which weren't using proper
locking anyway) redundant - drop the function altogether.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Comments

Andrew Cooper July 3, 2019, 5:58 p.m. UTC | #1
On 17/05/2019 11:46, Jan Beulich wrote:
> @@ -2334,9 +2339,10 @@ static void dump_irqs(unsigned char key)
>  
>          spin_lock_irqsave(&desc->lock, flags);
>  
> -        printk("   IRQ:%4d aff:%*pb vec:%02x %-15s status=%03x ",
> -               irq, nr_cpu_ids, cpumask_bits(desc->affinity), desc->arch.vector,
> -               desc->handler->typename, desc->status);
> +        printk("   IRQ:%4d aff:%*pb/%*pb vec:%02x %-15s status=%03x ",
> +               irq, nr_cpu_ids, cpumask_bits(desc->affinity),
> +               nr_cpu_ids, cpumask_bits(desc->arch.cpu_mask),
> +               desc->arch.vector, desc->handler->typename, desc->status);

Taking a sample large system (Rome, with your x2apic series to be
specific), which is only half as large as typical high-end Skylake systems.

(XEN) IRQ information:
(XEN)    IRQ:   0 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:f0 type=IO-APIC-edge    status=00000000 time.c#timer_interrupt()
(XEN)    IRQ:   1 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:68 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   3 affinity:ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff vec:70 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   4 affinity:ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff vec:f1 type=IO-APIC-edge    status=00000000 ns16550.c#ns16550_interrupt()
(XEN)    IRQ:   5 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:78 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   6 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:88 type=IO-APIC-edge    status=00000002 mapped, unbound
(XEN)    IRQ:   7 affinity:ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff vec:90 type=IO-APIC-level   status=00000002 mapped, unbound
(XEN)    IRQ:   8 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:98 type=IO-APIC-edge    status=00000030 in-flight=0 domain-list=0:  8(---),
(XEN)    IRQ:   9 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:a0 type=IO-APIC-level   status=00000030 in-flight=1 domain-list=0:  9(PMM),

This change is going to double up the affinity block, which will make
the lines even longer.

Given that all examples I've ever spotted are either a single bit, or a
fully set block, {%*pbl} will render in a much shorter, and keep the
line length reasonable.  (This in practice applies to the previous patch
as well).

~Andrew
Jan Beulich July 4, 2019, 9:37 a.m. UTC | #2
On 03.07.2019 19:58, Andrew Cooper wrote:
> On 17/05/2019 11:46, Jan Beulich wrote:
>> @@ -2334,9 +2339,10 @@ static void dump_irqs(unsigned char key)
>>   
>>           spin_lock_irqsave(&desc->lock, flags);
>>   
>> -        printk("   IRQ:%4d aff:%*pb vec:%02x %-15s status=%03x ",
>> -               irq, nr_cpu_ids, cpumask_bits(desc->affinity), desc->arch.vector,
>> -               desc->handler->typename, desc->status);
>> +        printk("   IRQ:%4d aff:%*pb/%*pb vec:%02x %-15s status=%03x ",
>> +               irq, nr_cpu_ids, cpumask_bits(desc->affinity),
>> +               nr_cpu_ids, cpumask_bits(desc->arch.cpu_mask),
>> +               desc->arch.vector, desc->handler->typename, desc->status);
> 
> Taking a sample large system (Rome, with your x2apic series to be
> specific), which is only half as large as typical high-end Skylake systems.
> 
> (XEN) IRQ information:
> (XEN)    IRQ:   0 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:f0 type=IO-APIC-edge    status=00000000 time.c#timer_interrupt()
> (XEN)    IRQ:   1 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:68 type=IO-APIC-edge    status=00000002 mapped, unbound
> (XEN)    IRQ:   3 affinity:ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff vec:70 type=IO-APIC-edge    status=00000002 mapped, unbound
> (XEN)    IRQ:   4 affinity:ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff vec:f1 type=IO-APIC-edge    status=00000000 ns16550.c#ns16550_interrupt()
> (XEN)    IRQ:   5 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:78 type=IO-APIC-edge    status=00000002 mapped, unbound
> (XEN)    IRQ:   6 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:88 type=IO-APIC-edge    status=00000002 mapped, unbound
> (XEN)    IRQ:   7 affinity:ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff vec:90 type=IO-APIC-level   status=00000002 mapped, unbound
> (XEN)    IRQ:   8 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:98 type=IO-APIC-edge    status=00000030 in-flight=0 domain-list=0:  8(---),
> (XEN)    IRQ:   9 affinity:00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 vec:a0 type=IO-APIC-level   status=00000030 in-flight=1 domain-list=0:  9(PMM),
> 
> This change is going to double up the affinity block, which will make
> the lines even longer.
> 
> Given that all examples I've ever spotted are either a single bit, or a
> fully set block, {%*pbl} will render in a much shorter, and keep the
> line length reasonable.  (This in practice applies to the previous patch
> as well).

With SMT off (on Intel systems) I've certainly observed every other bit
being set, which is why I had specifically decided against %*pbl. Plus
using %*pbl would break the tabular formatting. The only middle ground
I could see (still having the undesirable latter effect) would be to
pick between both forms based on the ratio between set bits and total
number of them (and perhaps using %*pb as long as the total number of
them is below a certain threshold). Thoughts?

Jan
diff mbox series

Patch

--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1039,7 +1039,6 @@  static void __init setup_IO_APIC_irqs(vo
             SET_DEST(entry, logical, cpu_mask_to_apicid(TARGET_CPUS));
             spin_lock_irqsave(&ioapic_lock, flags);
             __ioapic_write_entry(apic, pin, 0, entry);
-            set_native_irq_info(irq, TARGET_CPUS);
             spin_unlock_irqrestore(&ioapic_lock, flags);
         }
     }
@@ -2248,7 +2247,6 @@  int io_apic_set_pci_routing (int ioapic,
 
     spin_lock_irqsave(&ioapic_lock, flags);
     __ioapic_write_entry(ioapic, pin, 0, entry);
-    set_native_irq_info(irq, TARGET_CPUS);
     spin_unlock(&ioapic_lock);
 
     spin_lock(&desc->lock);
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -582,11 +582,16 @@  int assign_irq_vector(int irq, const cpu
 
     spin_lock_irqsave(&vector_lock, flags);
     ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS);
-    if (!ret) {
+    if ( !ret )
+    {
         ret = desc->arch.vector;
-        cpumask_copy(desc->affinity, desc->arch.cpu_mask);
+        if ( mask )
+            cpumask_copy(desc->affinity, mask);
+        else
+            cpumask_setall(desc->affinity);
     }
     spin_unlock_irqrestore(&vector_lock, flags);
+
     return ret;
 }
 
@@ -2334,9 +2339,10 @@  static void dump_irqs(unsigned char key)
 
         spin_lock_irqsave(&desc->lock, flags);
 
-        printk("   IRQ:%4d aff:%*pb vec:%02x %-15s status=%03x ",
-               irq, nr_cpu_ids, cpumask_bits(desc->affinity), desc->arch.vector,
-               desc->handler->typename, desc->status);
+        printk("   IRQ:%4d aff:%*pb/%*pb vec:%02x %-15s status=%03x ",
+               irq, nr_cpu_ids, cpumask_bits(desc->affinity),
+               nr_cpu_ids, cpumask_bits(desc->arch.cpu_mask),
+               desc->arch.vector, desc->handler->typename, desc->status);
 
         if ( ssid )
             printk("Z=%-25s ", ssid);
@@ -2424,8 +2430,7 @@  void fixup_irqs(const cpumask_t *mask, b
                 release_old_vec(desc);
         }
 
-        cpumask_copy(&affinity, desc->affinity);
-        if ( !desc->action || cpumask_subset(&affinity, mask) )
+        if ( !desc->action || cpumask_subset(desc->affinity, mask) )
         {
             spin_unlock(&desc->lock);
             continue;
@@ -2458,12 +2463,13 @@  void fixup_irqs(const cpumask_t *mask, b
             desc->arch.move_in_progress = 0;
         }
 
-        cpumask_and(&affinity, &affinity, mask);
-        if ( cpumask_empty(&affinity) )
+        if ( !cpumask_intersects(mask, desc->affinity) )
         {
             break_affinity = true;
-            cpumask_copy(&affinity, mask);
+            cpumask_setall(&affinity);
         }
+        else
+            cpumask_copy(&affinity, desc->affinity);
 
         if ( desc->handler->disable )
             desc->handler->disable(desc);
--- a/xen/include/xen/irq.h
+++ b/xen/include/xen/irq.h
@@ -162,11 +162,6 @@  extern irq_desc_t *domain_spin_lock_irq_
 extern irq_desc_t *pirq_spin_lock_irq_desc(
     const struct pirq *, unsigned long *pflags);
 
-static inline void set_native_irq_info(unsigned int irq, const cpumask_t *mask)
-{
-    cpumask_copy(irq_to_desc(irq)->affinity, mask);
-}
-
 unsigned int set_desc_affinity(struct irq_desc *, const cpumask_t *);
 
 #ifndef arch_hwdom_irqs