Message ID | 5CD2D563020000780022CD40@prv1-mh.provo.novell.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86: IRQ management adjustments | expand |
>>> On 08.05.19 at 15:10, <JBeulich@suse.com> wrote: > All of __{assign,bind,clear}_irq_vector() manipulate struct irq_desc > fields, and hence ought to be called with the descriptor lock held in > addition to vector_lock. This is currently the case for only > set_desc_affinity() (in the common case) and destroy_irq(), which also > clarifies what the nesting behavior between the locks has to be. > Reflect the new expectation by having these functions all take a > descriptor as parameter instead of an interrupt number. > > Also take care of the two special cases of calls to set_desc_affinity(): > set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity() get called > directly as well, and in these cases the descriptor locks hadn't got > acquired till now. For set_ioapic_affinity_irq() this means acquiring / > releasing of the IO-APIC lock can be plain spin_{,un}lock() then. > > Drop one of the two leading underscores from all three functions at > the same time. > > There's one case left where descriptors get manipulated with just > vector_lock held: setup_vector_irq() assumes its caller to acquire > vector_lock, and hence can't itself acquire the descriptor locks (wrong > lock order). I don't currently see how to address this. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > v2: Also adjust set_ioapic_affinity_irq() and VT-d's > dma_msi_set_affinity(). I'm sorry, Kevin, I should have Cc-ed you on this one. Jan > --- a/xen/arch/x86/io_apic.c > +++ b/xen/arch/x86/io_apic.c > @@ -550,14 +550,14 @@ static void clear_IO_APIC (void) > static void > set_ioapic_affinity_irq(struct irq_desc *desc, const cpumask_t *mask) > { > - unsigned long flags; > unsigned int dest; > int pin, irq; > struct irq_pin_list *entry; > > irq = desc->irq; > > - spin_lock_irqsave(&ioapic_lock, flags); > + spin_lock(&ioapic_lock); > + > dest = set_desc_affinity(desc, mask); > if (dest != BAD_APICID) { > if ( !x2apic_enabled ) > @@ -580,8 +580,8 @@ set_ioapic_affinity_irq(struct irq_desc > entry = irq_2_pin + entry->next; > } > } > - spin_unlock_irqrestore(&ioapic_lock, flags); > > + spin_unlock(&ioapic_lock); > } > > /* > @@ -674,16 +674,19 @@ void /*__init*/ setup_ioapic_dest(void) > for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { > for (pin = 0; pin < nr_ioapic_entries[ioapic]; pin++) { > struct irq_desc *desc; > + unsigned long flags; > > irq_entry = find_irq_entry(ioapic, pin, mp_INT); > if (irq_entry == -1) > continue; > irq = pin_2_irq(irq_entry, ioapic, pin); > desc = irq_to_desc(irq); > + > + spin_lock_irqsave(&desc->lock, flags); > BUG_ON(!cpumask_intersects(desc->arch.cpu_mask, > &cpu_online_map)); > set_ioapic_affinity_irq(desc, desc->arch.cpu_mask); > + spin_unlock_irqrestore(&desc->lock, flags); > } > - > } > } > > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -27,6 +27,7 @@ > #include <public/physdev.h> > > static int parse_irq_vector_map_param(const char *s); > +static void _clear_irq_vector(struct irq_desc *desc); > > /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. > */ > bool __read_mostly opt_noirqbalance; > @@ -120,13 +121,12 @@ static void trace_irq_mask(uint32_t even > trace_var(event, 1, sizeof(d), &d); > } > > -static int __init __bind_irq_vector(int irq, int vector, const cpumask_t > *cpu_mask) > +static int __init _bind_irq_vector(struct irq_desc *desc, int vector, > + const cpumask_t *cpu_mask) > { > cpumask_t online_mask; > int cpu; > - struct irq_desc *desc = irq_to_desc(irq); > > - BUG_ON((unsigned)irq >= nr_irqs); > BUG_ON((unsigned)vector >= NR_VECTORS); > > cpumask_and(&online_mask, cpu_mask, &cpu_online_map); > @@ -137,9 +137,9 @@ static int __init __bind_irq_vector(int > return 0; > if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > return -EBUSY; > - trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, irq, vector, &online_mask); > + trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, desc->irq, vector, &online_mask); > for_each_cpu(cpu, &online_mask) > - per_cpu(vector_irq, cpu)[vector] = irq; > + per_cpu(vector_irq, cpu)[vector] = desc->irq; > desc->arch.vector = vector; > cpumask_copy(desc->arch.cpu_mask, &online_mask); > if ( desc->arch.used_vectors ) > @@ -153,12 +153,18 @@ static int __init __bind_irq_vector(int > > int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask) > { > + struct irq_desc *desc = irq_to_desc(irq); > unsigned long flags; > int ret; > > - spin_lock_irqsave(&vector_lock, flags); > - ret = __bind_irq_vector(irq, vector, cpu_mask); > - spin_unlock_irqrestore(&vector_lock, flags); > + BUG_ON((unsigned)irq >= nr_irqs); > + > + spin_lock_irqsave(&desc->lock, flags); > + spin_lock(&vector_lock); > + ret = _bind_irq_vector(desc, vector, cpu_mask); > + spin_unlock(&vector_lock); > + spin_unlock_irqrestore(&desc->lock, flags); > + > return ret; > } > > @@ -243,7 +249,9 @@ void destroy_irq(unsigned int irq) > > spin_lock_irqsave(&desc->lock, flags); > desc->handler = &no_irq_type; > - clear_irq_vector(irq); > + spin_lock(&vector_lock); > + _clear_irq_vector(desc); > + spin_unlock(&vector_lock); > desc->arch.used_vectors = NULL; > spin_unlock_irqrestore(&desc->lock, flags); > > @@ -266,11 +274,11 @@ static void release_old_vec(struct irq_d > } > } > > -static void __clear_irq_vector(int irq) > +static void _clear_irq_vector(struct irq_desc *desc) > { > - int cpu, vector, old_vector; > + unsigned int cpu; > + int vector, old_vector, irq = desc->irq; > cpumask_t tmp_mask; > - struct irq_desc *desc = irq_to_desc(irq); > > BUG_ON(!desc->arch.vector); > > @@ -316,11 +324,14 @@ static void __clear_irq_vector(int irq) > > void clear_irq_vector(int irq) > { > + struct irq_desc *desc = irq_to_desc(irq); > unsigned long flags; > > - spin_lock_irqsave(&vector_lock, flags); > - __clear_irq_vector(irq); > - spin_unlock_irqrestore(&vector_lock, flags); > + spin_lock_irqsave(&desc->lock, flags); > + spin_lock(&vector_lock); > + _clear_irq_vector(desc); > + spin_unlock(&vector_lock); > + spin_unlock_irqrestore(&desc->lock, flags); > } > > int irq_to_vector(int irq) > @@ -455,8 +466,7 @@ static vmask_t *irq_get_used_vector_mask > return ret; > } > > -static int __assign_irq_vector( > - int irq, struct irq_desc *desc, const cpumask_t *mask) > +static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask) > { > /* > * NOTE! The local APIC isn't very good at handling > @@ -470,7 +480,8 @@ static int __assign_irq_vector( > * 0x80, because int 0x80 is hm, kind of importantish. ;) > */ > static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0; > - int cpu, err, old_vector; > + unsigned int cpu; > + int err, old_vector, irq = desc->irq; > vmask_t *irq_used_vectors = NULL; > > old_vector = irq_to_vector(irq); > @@ -583,8 +594,12 @@ int assign_irq_vector(int irq, const cpu > > BUG_ON(irq >= nr_irqs || irq <0); > > - spin_lock_irqsave(&vector_lock, flags); > - ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS); > + spin_lock_irqsave(&desc->lock, flags); > + > + spin_lock(&vector_lock); > + ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS); > + spin_unlock(&vector_lock); > + > if ( !ret ) > { > ret = desc->arch.vector; > @@ -593,7 +608,8 @@ int assign_irq_vector(int irq, const cpu > else > cpumask_setall(desc->affinity); > } > - spin_unlock_irqrestore(&vector_lock, flags); > + > + spin_unlock_irqrestore(&desc->lock, flags); > > return ret; > } > @@ -767,7 +783,6 @@ void irq_complete_move(struct irq_desc * > > unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t > *mask) > { > - unsigned int irq; > int ret; > unsigned long flags; > cpumask_t dest_mask; > @@ -775,10 +790,8 @@ unsigned int set_desc_affinity(struct ir > if (!cpumask_intersects(mask, &cpu_online_map)) > return BAD_APICID; > > - irq = desc->irq; > - > spin_lock_irqsave(&vector_lock, flags); > - ret = __assign_irq_vector(irq, desc, mask); > + ret = _assign_irq_vector(desc, mask); > spin_unlock_irqrestore(&vector_lock, flags); > > if (ret < 0) > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a > unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) > : NUMA_NO_NODE; > const cpumask_t *cpumask = &cpu_online_map; > + struct irq_desc *desc; > > if ( node < MAX_NUMNODES && node_online(node) && > cpumask_intersects(&node_to_cpumask(node), cpumask) ) > cpumask = &node_to_cpumask(node); > - dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); > + > + desc = irq_to_desc(drhd->iommu->msi.irq); > + spin_lock_irq(&desc->lock); > + dma_msi_set_affinity(desc, cpumask); > + spin_unlock_irq(&desc->lock); > } > > static int adjust_vtd_irq_affinities(void) >
> From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Wednesday, May 8, 2019 9:16 PM > > >>> On 08.05.19 at 15:10, <JBeulich@suse.com> wrote: > > All of __{assign,bind,clear}_irq_vector() manipulate struct irq_desc > > fields, and hence ought to be called with the descriptor lock held in > > addition to vector_lock. This is currently the case for only > > set_desc_affinity() (in the common case) and destroy_irq(), which also > > clarifies what the nesting behavior between the locks has to be. > > Reflect the new expectation by having these functions all take a > > descriptor as parameter instead of an interrupt number. > > > > Also take care of the two special cases of calls to set_desc_affinity(): > > set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity() get called > > directly as well, and in these cases the descriptor locks hadn't got > > acquired till now. For set_ioapic_affinity_irq() this means acquiring / > > releasing of the IO-APIC lock can be plain spin_{,un}lock() then. > > > > Drop one of the two leading underscores from all three functions at > > the same time. > > > > There's one case left where descriptors get manipulated with just > > vector_lock held: setup_vector_irq() assumes its caller to acquire > > vector_lock, and hence can't itself acquire the descriptor locks (wrong > > lock order). I don't currently see how to address this. > > > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- > > v2: Also adjust set_ioapic_affinity_irq() and VT-d's > > dma_msi_set_affinity(). > > I'm sorry, Kevin, I should have Cc-ed you on this one. Reviewed-by: Kevin Tian <kevin.tian@intel.com> for vtd part.
On Wed, May 08, 2019 at 07:10:59AM -0600, Jan Beulich wrote: > All of __{assign,bind,clear}_irq_vector() manipulate struct irq_desc > fields, and hence ought to be called with the descriptor lock held in > addition to vector_lock. This is currently the case for only > set_desc_affinity() (in the common case) and destroy_irq(), which also > clarifies what the nesting behavior between the locks has to be. > Reflect the new expectation by having these functions all take a > descriptor as parameter instead of an interrupt number. > > Also take care of the two special cases of calls to set_desc_affinity(): > set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity() get called > directly as well, and in these cases the descriptor locks hadn't got > acquired till now. For set_ioapic_affinity_irq() this means acquiring / > releasing of the IO-APIC lock can be plain spin_{,un}lock() then. > > Drop one of the two leading underscores from all three functions at > the same time. > > There's one case left where descriptors get manipulated with just > vector_lock held: setup_vector_irq() assumes its caller to acquire > vector_lock, and hence can't itself acquire the descriptor locks (wrong > lock order). I don't currently see how to address this. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a > unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) > : NUMA_NO_NODE; > const cpumask_t *cpumask = &cpu_online_map; > + struct irq_desc *desc; > > if ( node < MAX_NUMNODES && node_online(node) && > cpumask_intersects(&node_to_cpumask(node), cpumask) ) > cpumask = &node_to_cpumask(node); > - dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); > + > + desc = irq_to_desc(drhd->iommu->msi.irq); > + spin_lock_irq(&desc->lock); I would use the irqsave/irqrestore variants here for extra safety. Thanks, Roger.
>>> On 13.05.19 at 15:48, <roger.pau@citrix.com> wrote: > On Wed, May 08, 2019 at 07:10:59AM -0600, Jan Beulich wrote: >> --- a/xen/drivers/passthrough/vtd/iommu.c >> +++ b/xen/drivers/passthrough/vtd/iommu.c >> @@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a >> unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) >> : NUMA_NO_NODE; >> const cpumask_t *cpumask = &cpu_online_map; >> + struct irq_desc *desc; >> >> if ( node < MAX_NUMNODES && node_online(node) && >> cpumask_intersects(&node_to_cpumask(node), cpumask) ) >> cpumask = &node_to_cpumask(node); >> - dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); >> + >> + desc = irq_to_desc(drhd->iommu->msi.irq); >> + spin_lock_irq(&desc->lock); > > I would use the irqsave/irqrestore variants here for extra safety. Hmm, maybe. But I think we're in bigger trouble if IRQs indeed ended up enabled at any of the two points where this function gets called. Jan
On Mon, May 13, 2019 at 08:19:04AM -0600, Jan Beulich wrote: > >>> On 13.05.19 at 15:48, <roger.pau@citrix.com> wrote: > > On Wed, May 08, 2019 at 07:10:59AM -0600, Jan Beulich wrote: > >> --- a/xen/drivers/passthrough/vtd/iommu.c > >> +++ b/xen/drivers/passthrough/vtd/iommu.c > >> @@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a > >> unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) > >> : NUMA_NO_NODE; > >> const cpumask_t *cpumask = &cpu_online_map; > >> + struct irq_desc *desc; > >> > >> if ( node < MAX_NUMNODES && node_online(node) && > >> cpumask_intersects(&node_to_cpumask(node), cpumask) ) > >> cpumask = &node_to_cpumask(node); > >> - dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); > >> + > >> + desc = irq_to_desc(drhd->iommu->msi.irq); > >> + spin_lock_irq(&desc->lock); > > > > I would use the irqsave/irqrestore variants here for extra safety. > > Hmm, maybe. But I think we're in bigger trouble if IRQs indeed > ended up enabled at any of the two points where this function > gets called. I think I'm misreading the above, but if you expect adjust_irq_affinity to always be called with interrupts disabled using spin_unlock_irq is wrong as it unconditionally enables interrupts. Roger.
>>> On 13.05.19 at 16:45, <roger.pau@citrix.com> wrote: > On Mon, May 13, 2019 at 08:19:04AM -0600, Jan Beulich wrote: >> >>> On 13.05.19 at 15:48, <roger.pau@citrix.com> wrote: >> > On Wed, May 08, 2019 at 07:10:59AM -0600, Jan Beulich wrote: >> >> --- a/xen/drivers/passthrough/vtd/iommu.c >> >> +++ b/xen/drivers/passthrough/vtd/iommu.c >> >> @@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a >> >> unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) >> >> : NUMA_NO_NODE; >> >> const cpumask_t *cpumask = &cpu_online_map; >> >> + struct irq_desc *desc; >> >> >> >> if ( node < MAX_NUMNODES && node_online(node) && >> >> cpumask_intersects(&node_to_cpumask(node), cpumask) ) >> >> cpumask = &node_to_cpumask(node); >> >> - dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); >> >> + >> >> + desc = irq_to_desc(drhd->iommu->msi.irq); >> >> + spin_lock_irq(&desc->lock); >> > >> > I would use the irqsave/irqrestore variants here for extra safety. >> >> Hmm, maybe. But I think we're in bigger trouble if IRQs indeed >> ended up enabled at any of the two points where this function >> gets called. > > I think I'm misreading the above, but if you expect > adjust_irq_affinity to always be called with interrupts disabled using > spin_unlock_irq is wrong as it unconditionally enables interrupts. Oops - s/enabled/disabled/ in my earlier reply. Jan
--- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -550,14 +550,14 @@ static void clear_IO_APIC (void) static void set_ioapic_affinity_irq(struct irq_desc *desc, const cpumask_t *mask) { - unsigned long flags; unsigned int dest; int pin, irq; struct irq_pin_list *entry; irq = desc->irq; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock(&ioapic_lock); + dest = set_desc_affinity(desc, mask); if (dest != BAD_APICID) { if ( !x2apic_enabled ) @@ -580,8 +580,8 @@ set_ioapic_affinity_irq(struct irq_desc entry = irq_2_pin + entry->next; } } - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock(&ioapic_lock); } /* @@ -674,16 +674,19 @@ void /*__init*/ setup_ioapic_dest(void) for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { for (pin = 0; pin < nr_ioapic_entries[ioapic]; pin++) { struct irq_desc *desc; + unsigned long flags; irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); desc = irq_to_desc(irq); + + spin_lock_irqsave(&desc->lock, flags); BUG_ON(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map)); set_ioapic_affinity_irq(desc, desc->arch.cpu_mask); + spin_unlock_irqrestore(&desc->lock, flags); } - } } --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -27,6 +27,7 @@ #include <public/physdev.h> static int parse_irq_vector_map_param(const char *s); +static void _clear_irq_vector(struct irq_desc *desc); /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ bool __read_mostly opt_noirqbalance; @@ -120,13 +121,12 @@ static void trace_irq_mask(uint32_t even trace_var(event, 1, sizeof(d), &d); } -static int __init __bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask) +static int __init _bind_irq_vector(struct irq_desc *desc, int vector, + const cpumask_t *cpu_mask) { cpumask_t online_mask; int cpu; - struct irq_desc *desc = irq_to_desc(irq); - BUG_ON((unsigned)irq >= nr_irqs); BUG_ON((unsigned)vector >= NR_VECTORS); cpumask_and(&online_mask, cpu_mask, &cpu_online_map); @@ -137,9 +137,9 @@ static int __init __bind_irq_vector(int return 0; if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) return -EBUSY; - trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, irq, vector, &online_mask); + trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, desc->irq, vector, &online_mask); for_each_cpu(cpu, &online_mask) - per_cpu(vector_irq, cpu)[vector] = irq; + per_cpu(vector_irq, cpu)[vector] = desc->irq; desc->arch.vector = vector; cpumask_copy(desc->arch.cpu_mask, &online_mask); if ( desc->arch.used_vectors ) @@ -153,12 +153,18 @@ static int __init __bind_irq_vector(int int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask) { + struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; int ret; - spin_lock_irqsave(&vector_lock, flags); - ret = __bind_irq_vector(irq, vector, cpu_mask); - spin_unlock_irqrestore(&vector_lock, flags); + BUG_ON((unsigned)irq >= nr_irqs); + + spin_lock_irqsave(&desc->lock, flags); + spin_lock(&vector_lock); + ret = _bind_irq_vector(desc, vector, cpu_mask); + spin_unlock(&vector_lock); + spin_unlock_irqrestore(&desc->lock, flags); + return ret; } @@ -243,7 +249,9 @@ void destroy_irq(unsigned int irq) spin_lock_irqsave(&desc->lock, flags); desc->handler = &no_irq_type; - clear_irq_vector(irq); + spin_lock(&vector_lock); + _clear_irq_vector(desc); + spin_unlock(&vector_lock); desc->arch.used_vectors = NULL; spin_unlock_irqrestore(&desc->lock, flags); @@ -266,11 +274,11 @@ static void release_old_vec(struct irq_d } } -static void __clear_irq_vector(int irq) +static void _clear_irq_vector(struct irq_desc *desc) { - int cpu, vector, old_vector; + unsigned int cpu; + int vector, old_vector, irq = desc->irq; cpumask_t tmp_mask; - struct irq_desc *desc = irq_to_desc(irq); BUG_ON(!desc->arch.vector); @@ -316,11 +324,14 @@ static void __clear_irq_vector(int irq) void clear_irq_vector(int irq) { + struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); + spin_lock_irqsave(&desc->lock, flags); + spin_lock(&vector_lock); + _clear_irq_vector(desc); + spin_unlock(&vector_lock); + spin_unlock_irqrestore(&desc->lock, flags); } int irq_to_vector(int irq) @@ -455,8 +466,7 @@ static vmask_t *irq_get_used_vector_mask return ret; } -static int __assign_irq_vector( - int irq, struct irq_desc *desc, const cpumask_t *mask) +static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -470,7 +480,8 @@ static int __assign_irq_vector( * 0x80, because int 0x80 is hm, kind of importantish. ;) */ static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0; - int cpu, err, old_vector; + unsigned int cpu; + int err, old_vector, irq = desc->irq; vmask_t *irq_used_vectors = NULL; old_vector = irq_to_vector(irq); @@ -583,8 +594,12 @@ int assign_irq_vector(int irq, const cpu BUG_ON(irq >= nr_irqs || irq <0); - spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS); + spin_lock_irqsave(&desc->lock, flags); + + spin_lock(&vector_lock); + ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS); + spin_unlock(&vector_lock); + if ( !ret ) { ret = desc->arch.vector; @@ -593,7 +608,8 @@ int assign_irq_vector(int irq, const cpu else cpumask_setall(desc->affinity); } - spin_unlock_irqrestore(&vector_lock, flags); + + spin_unlock_irqrestore(&desc->lock, flags); return ret; } @@ -767,7 +783,6 @@ void irq_complete_move(struct irq_desc * unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask) { - unsigned int irq; int ret; unsigned long flags; cpumask_t dest_mask; @@ -775,10 +790,8 @@ unsigned int set_desc_affinity(struct ir if (!cpumask_intersects(mask, &cpu_online_map)) return BAD_APICID; - irq = desc->irq; - spin_lock_irqsave(&vector_lock, flags); - ret = __assign_irq_vector(irq, desc, mask); + ret = _assign_irq_vector(desc, mask); spin_unlock_irqrestore(&vector_lock, flags); if (ret < 0) --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain) : NUMA_NO_NODE; const cpumask_t *cpumask = &cpu_online_map; + struct irq_desc *desc; if ( node < MAX_NUMNODES && node_online(node) && cpumask_intersects(&node_to_cpumask(node), cpumask) ) cpumask = &node_to_cpumask(node); - dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask); + + desc = irq_to_desc(drhd->iommu->msi.irq); + spin_lock_irq(&desc->lock); + dma_msi_set_affinity(desc, cpumask); + spin_unlock_irq(&desc->lock); } static int adjust_vtd_irq_affinities(void)
All of __{assign,bind,clear}_irq_vector() manipulate struct irq_desc fields, and hence ought to be called with the descriptor lock held in addition to vector_lock. This is currently the case for only set_desc_affinity() (in the common case) and destroy_irq(), which also clarifies what the nesting behavior between the locks has to be. Reflect the new expectation by having these functions all take a descriptor as parameter instead of an interrupt number. Also take care of the two special cases of calls to set_desc_affinity(): set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity() get called directly as well, and in these cases the descriptor locks hadn't got acquired till now. For set_ioapic_affinity_irq() this means acquiring / releasing of the IO-APIC lock can be plain spin_{,un}lock() then. Drop one of the two leading underscores from all three functions at the same time. There's one case left where descriptors get manipulated with just vector_lock held: setup_vector_irq() assumes its caller to acquire vector_lock, and hence can't itself acquire the descriptor locks (wrong lock order). I don't currently see how to address this. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: Also adjust set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity().