diff mbox series

[RFC,2/2] irqchip: irq-ti-sci-inta: Introduce IRQ affinity support

Message ID 20230122081607.959474-3-vigneshr@ti.com (mailing list archive)
State New, archived
Headers show
Series irqchip: irq-ti-sci-inta: Add IRQ affinity support | expand

Commit Message

Vignesh Raghavendra Jan. 22, 2023, 8:16 a.m. UTC
Add support for setting IRQ affinity for VINTs which have only one event
mapped to them. This just involves changing the parent IRQs affinity
(GIC/INTR). Flag VINTs which have affinity configured so as to not
aggregate/map more events to such VINTs.

Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/irqchip/irq-ti-sci-inta.c | 39 +++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

Comments

Marc Zyngier Jan. 26, 2023, 2:18 p.m. UTC | #1
On Sun, 22 Jan 2023 08:16:07 +0000,
Vignesh Raghavendra <vigneshr@ti.com> wrote:
> 
> Add support for setting IRQ affinity for VINTs which have only one event
> mapped to them. This just involves changing the parent IRQs affinity
> (GIC/INTR). Flag VINTs which have affinity configured so as to not
> aggregate/map more events to such VINTs.



> 
> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> ---
>  drivers/irqchip/irq-ti-sci-inta.c | 39 +++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
> index f1419d24568e..237cb4707cb8 100644
> --- a/drivers/irqchip/irq-ti-sci-inta.c
> +++ b/drivers/irqchip/irq-ti-sci-inta.c
> @@ -64,6 +64,7 @@ struct ti_sci_inta_event_desc {
>   * @events:		Array of event descriptors assigned to this vint.
>   * @parent_virq:	Linux IRQ number that gets attached to parent
>   * @vint_id:		TISCI vint ID
> + * @affinity_managed	flag to indicate VINT affinity is managed
>   */
>  struct ti_sci_inta_vint_desc {
>  	struct irq_domain *domain;
> @@ -72,6 +73,7 @@ struct ti_sci_inta_vint_desc {
>  	struct ti_sci_inta_event_desc events[MAX_EVENTS_PER_VINT];
>  	unsigned int parent_virq;
>  	u16 vint_id;
> +	bool affinity_managed;
>  };
>  
>  /**
> @@ -334,6 +336,8 @@ static struct ti_sci_inta_event_desc *ti_sci_inta_alloc_irq(struct irq_domain *d
>  	vint_id = ti_sci_get_free_resource(inta->vint);
>  	if (vint_id == TI_SCI_RESOURCE_NULL) {
>  		list_for_each_entry(vint_desc, &inta->vint_list, list) {
> +			if (vint_desc->affinity_managed)
> +				continue;
>  			free_bit = find_first_zero_bit(vint_desc->event_map,
>  						       MAX_EVENTS_PER_VINT);
>  			if (free_bit != MAX_EVENTS_PER_VINT)
> @@ -434,6 +438,7 @@ static int ti_sci_inta_request_resources(struct irq_data *data)
>  		return PTR_ERR(event_desc);
>  
>  	data->chip_data = event_desc;
> +	irq_data_update_effective_affinity(data, cpu_online_mask);
>  
>  	return 0;
>  }
> @@ -504,11 +509,45 @@ static void ti_sci_inta_ack_irq(struct irq_data *data)
>  		ti_sci_inta_manage_event(data, VINT_STATUS_OFFSET);
>  }
>  
> +#ifdef CONFIG_SMP
> +static int ti_sci_inta_set_affinity(struct irq_data *d,
> +				    const struct cpumask *mask_val, bool force)
> +{
> +	struct ti_sci_inta_event_desc *event_desc;
> +	struct ti_sci_inta_vint_desc *vint_desc;
> +	struct irq_data *parent_irq_data;
> +
> +	if (cpumask_equal(irq_data_get_effective_affinity_mask(d), mask_val))
> +		return 0;
> +
> +	event_desc = irq_data_get_irq_chip_data(d);
> +	if (event_desc) {
> +		vint_desc = to_vint_desc(event_desc, event_desc->vint_bit);
> +
> +		/*
> +		 * Cannot set affinity if there is more than one event
> +		 * mapped to same VINT
> +		 */
> +		if (bitmap_weight(vint_desc->event_map, MAX_EVENTS_PER_VINT) > 1)
> +			return -EINVAL;
> +
> +		vint_desc->affinity_managed = true;
> +
> +		irq_data_update_effective_affinity(d, mask_val);
> +		parent_irq_data = irq_get_irq_data(vint_desc->parent_virq);
> +		if (parent_irq_data->chip->irq_set_affinity)
> +			return parent_irq_data->chip->irq_set_affinity(parent_irq_data, mask_val, force);

This looks completely wrong.

You still have a chained irqchip on all paths, and have to do some
horrible probing to work out:

- which parent interrupt this is

- how many interrupts are connected to it

And then the fun begins:

- You have one interrupt that is standalone, so its affinity can be
  moved

- An unrelated driver gets probed, and one of its interrupts gets
  lumped together with the one above

- Now it cannot be moved anymore, and userspace complains

The rule is very simple: chained irqchip, no affinity management.
Either you reserve a poll of direct interrupts that have affinity
management and no muxing, or you keep the current approach.

But I'm strongly opposed to this sort of approach.

Thanks,

	M.
Vignesh Raghavendra Jan. 27, 2023, 5:53 p.m. UTC | #2
Hi Marc,d

On 1/26/2023 7:48 PM, Marc Zyngier wrote:
> On Sun, 22 Jan 2023 08:16:07 +0000,
> Vignesh Raghavendra <vigneshr@ti.com> wrote:
>>
>> Add support for setting IRQ affinity for VINTs which have only one event
>> mapped to them. This just involves changing the parent IRQs affinity
>> (GIC/INTR). Flag VINTs which have affinity configured so as to not
>> aggregate/map more events to such VINTs.
> 
> 
> 
>>
>> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
>> ---
>>  drivers/irqchip/irq-ti-sci-inta.c | 39 +++++++++++++++++++++++++++++++
>>  1 file changed, 39 insertions(+)
>>
>> diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
>> index f1419d24568e..237cb4707cb8 100644
>> --- a/drivers/irqchip/irq-ti-sci-inta.c
>> +++ b/drivers/irqchip/irq-ti-sci-inta.c
>> @@ -64,6 +64,7 @@ struct ti_sci_inta_event_desc {
>>   * @events:		Array of event descriptors assigned to this vint.
>>   * @parent_virq:	Linux IRQ number that gets attached to parent
>>   * @vint_id:		TISCI vint ID
>> + * @affinity_managed	flag to indicate VINT affinity is managed
>>   */
>>  struct ti_sci_inta_vint_desc {
>>  	struct irq_domain *domain;
>> @@ -72,6 +73,7 @@ struct ti_sci_inta_vint_desc {
>>  	struct ti_sci_inta_event_desc events[MAX_EVENTS_PER_VINT];
>>  	unsigned int parent_virq;
>>  	u16 vint_id;
>> +	bool affinity_managed;
>>  };
>>  
>>  /**
>> @@ -334,6 +336,8 @@ static struct ti_sci_inta_event_desc *ti_sci_inta_alloc_irq(struct irq_domain *d
>>  	vint_id = ti_sci_get_free_resource(inta->vint);
>>  	if (vint_id == TI_SCI_RESOURCE_NULL) {
>>  		list_for_each_entry(vint_desc, &inta->vint_list, list) {
>> +			if (vint_desc->affinity_managed)
>> +				continue;
>>  			free_bit = find_first_zero_bit(vint_desc->event_map,
>>  						       MAX_EVENTS_PER_VINT);
>>  			if (free_bit != MAX_EVENTS_PER_VINT)
>> @@ -434,6 +438,7 @@ static int ti_sci_inta_request_resources(struct irq_data *data)
>>  		return PTR_ERR(event_desc);
>>  
>>  	data->chip_data = event_desc;
>> +	irq_data_update_effective_affinity(data, cpu_online_mask);
>>  
>>  	return 0;
>>  }
>> @@ -504,11 +509,45 @@ static void ti_sci_inta_ack_irq(struct irq_data *data)
>>  		ti_sci_inta_manage_event(data, VINT_STATUS_OFFSET);
>>  }
>>  
>> +#ifdef CONFIG_SMP
>> +static int ti_sci_inta_set_affinity(struct irq_data *d,
>> +				    const struct cpumask *mask_val, bool force)
>> +{
>> +	struct ti_sci_inta_event_desc *event_desc;
>> +	struct ti_sci_inta_vint_desc *vint_desc;
>> +	struct irq_data *parent_irq_data;
>> +
>> +	if (cpumask_equal(irq_data_get_effective_affinity_mask(d), mask_val))
>> +		return 0;
>> +
>> +	event_desc = irq_data_get_irq_chip_data(d);
>> +	if (event_desc) {
>> +		vint_desc = to_vint_desc(event_desc, event_desc->vint_bit);
>> +
>> +		/*
>> +		 * Cannot set affinity if there is more than one event
>> +		 * mapped to same VINT
>> +		 */
>> +		if (bitmap_weight(vint_desc->event_map, MAX_EVENTS_PER_VINT) > 1)
>> +			return -EINVAL;
>> +
>> +		vint_desc->affinity_managed = true;
>> +
>> +		irq_data_update_effective_affinity(d, mask_val);
>> +		parent_irq_data = irq_get_irq_data(vint_desc->parent_virq);
>> +		if (parent_irq_data->chip->irq_set_affinity)
>> +			return parent_irq_data->chip->irq_set_affinity(parent_irq_data, mask_val, force);
> 
> This looks completely wrong.
> 
> You still have a chained irqchip on all paths, and have to do some
> horrible probing to work out:
> 
> - which parent interrupt this is
> 
> - how many interrupts are connected to it
> 
> And then the fun begins:
> 
> - You have one interrupt that is standalone, so its affinity can be
>   moved
> 
> - An unrelated driver gets probed, and one of its interrupts gets
>   lumped together with the one above
> 
> - Now it cannot be moved anymore, and userspace complains
> 
> The rule is very simple: chained irqchip, no affinity management.
> Either you reserve a poll of direct interrupts that have affinity

This is what I am trying to accomplish, that is, reserve a pool of
direct interrupts that can be used by certain drivers that require IRQ
steering for performance. But I don't see a way to indicate from client
drivers to allocate from this reserved pool (there is no hint in
request_irq() call that ends up in .irq_request_resources() that I can use)

I can try and virtually split INTA into two irqchips perhaps, with one
part modeled as chained irqchip and other as stacked for the reserved
pool (and would have to spawn of two child msi-domains I presume).
But, there is only one DT node for this irqchip and thus clients cannot
request IRQ for reserved pool.

Wondering if you have any pointers here?

Thanks for your patience.

Regards
Vignesh
Marc Zyngier Feb. 20, 2023, 8:47 a.m. UTC | #3
On Fri, 27 Jan 2023 17:53:55 +0000,
"Raghavendra, Vignesh" <vigneshr@ti.com> wrote:
>

[...]

> >> @@ -504,11 +509,45 @@ static void ti_sci_inta_ack_irq(struct irq_data *data)
> >>  		ti_sci_inta_manage_event(data, VINT_STATUS_OFFSET);
> >>  }
> >>  
> >> +#ifdef CONFIG_SMP
> >> +static int ti_sci_inta_set_affinity(struct irq_data *d,
> >> +				    const struct cpumask *mask_val, bool force)
> >> +{
> >> +	struct ti_sci_inta_event_desc *event_desc;
> >> +	struct ti_sci_inta_vint_desc *vint_desc;
> >> +	struct irq_data *parent_irq_data;
> >> +
> >> +	if (cpumask_equal(irq_data_get_effective_affinity_mask(d), mask_val))
> >> +		return 0;
> >> +
> >> +	event_desc = irq_data_get_irq_chip_data(d);
> >> +	if (event_desc) {
> >> +		vint_desc = to_vint_desc(event_desc, event_desc->vint_bit);
> >> +
> >> +		/*
> >> +		 * Cannot set affinity if there is more than one event
> >> +		 * mapped to same VINT
> >> +		 */
> >> +		if (bitmap_weight(vint_desc->event_map, MAX_EVENTS_PER_VINT) > 1)
> >> +			return -EINVAL;
> >> +
> >> +		vint_desc->affinity_managed = true;
> >> +
> >> +		irq_data_update_effective_affinity(d, mask_val);
> >> +		parent_irq_data = irq_get_irq_data(vint_desc->parent_virq);
> >> +		if (parent_irq_data->chip->irq_set_affinity)
> >> +			return parent_irq_data->chip->irq_set_affinity(parent_irq_data, mask_val, force);
> > 
> > This looks completely wrong.
> > 
> > You still have a chained irqchip on all paths, and have to do some
> > horrible probing to work out:
> > 
> > - which parent interrupt this is
> > 
> > - how many interrupts are connected to it
> > 
> > And then the fun begins:
> > 
> > - You have one interrupt that is standalone, so its affinity can be
> >   moved
> > 
> > - An unrelated driver gets probed, and one of its interrupts gets
> >   lumped together with the one above
> > 
> > - Now it cannot be moved anymore, and userspace complains
> > 
> > The rule is very simple: chained irqchip, no affinity management.
> > Either you reserve a poll of direct interrupts that have affinity
> 
> This is what I am trying to accomplish, that is, reserve a pool of
> direct interrupts that can be used by certain drivers that require IRQ
> steering for performance. But I don't see a way to indicate from client
> drivers to allocate from this reserved pool (there is no hint in
> request_irq() call that ends up in .irq_request_resources() that I can use)
> 
> I can try and virtually split INTA into two irqchips perhaps, with one
> part modeled as chained irqchip and other as stacked for the reserved
> pool (and would have to spawn of two child msi-domains I presume).
> But, there is only one DT node for this irqchip and thus clients cannot
> request IRQ for reserved pool.

I don't see why DT should be aware of this. You only need to decide at
allocation time which one is where, and plug it at the right level.

And you probably only need *one* chained interrupt that muxes
everything that cannot be allocated direct path.

> Wondering if you have any pointers here?

I don't. But this shouldn't be too hard to bolt onto the existing
framework.

	M.
Vignesh Raghavendra Feb. 22, 2023, 5:07 p.m. UTC | #4
On 2/20/2023 2:17 PM, Marc Zyngier wrote:
> On Fri, 27 Jan 2023 17:53:55 +0000,
> "Raghavendra, Vignesh" <vigneshr@ti.com> wrote:
>>
> 
> [...]
> 
>>>> @@ -504,11 +509,45 @@ static void ti_sci_inta_ack_irq(struct irq_data *data)
>>>>  		ti_sci_inta_manage_event(data, VINT_STATUS_OFFSET);
>>>>  }
>>>>  
>>>> +#ifdef CONFIG_SMP
>>>> +static int ti_sci_inta_set_affinity(struct irq_data *d,
>>>> +				    const struct cpumask *mask_val, bool force)
>>>> +{
>>>> +	struct ti_sci_inta_event_desc *event_desc;
>>>> +	struct ti_sci_inta_vint_desc *vint_desc;
>>>> +	struct irq_data *parent_irq_data;
>>>> +
>>>> +	if (cpumask_equal(irq_data_get_effective_affinity_mask(d), mask_val))
>>>> +		return 0;
>>>> +
>>>> +	event_desc = irq_data_get_irq_chip_data(d);
>>>> +	if (event_desc) {
>>>> +		vint_desc = to_vint_desc(event_desc, event_desc->vint_bit);
>>>> +
>>>> +		/*
>>>> +		 * Cannot set affinity if there is more than one event
>>>> +		 * mapped to same VINT
>>>> +		 */
>>>> +		if (bitmap_weight(vint_desc->event_map, MAX_EVENTS_PER_VINT) > 1)
>>>> +			return -EINVAL;
>>>> +
>>>> +		vint_desc->affinity_managed = true;
>>>> +
>>>> +		irq_data_update_effective_affinity(d, mask_val);
>>>> +		parent_irq_data = irq_get_irq_data(vint_desc->parent_virq);
>>>> +		if (parent_irq_data->chip->irq_set_affinity)
>>>> +			return parent_irq_data->chip->irq_set_affinity(parent_irq_data, mask_val, force);
>>>
>>> This looks completely wrong.
>>>
>>> You still have a chained irqchip on all paths, and have to do some
>>> horrible probing to work out:
>>>
>>> - which parent interrupt this is
>>>
>>> - how many interrupts are connected to it
>>>
>>> And then the fun begins:
>>>
>>> - You have one interrupt that is standalone, so its affinity can be
>>>   moved
>>>
>>> - An unrelated driver gets probed, and one of its interrupts gets
>>>   lumped together with the one above
>>>
>>> - Now it cannot be moved anymore, and userspace complains
>>>
>>> The rule is very simple: chained irqchip, no affinity management.
>>> Either you reserve a poll of direct interrupts that have affinity
>>
>> This is what I am trying to accomplish, that is, reserve a pool of
>> direct interrupts that can be used by certain drivers that require IRQ
>> steering for performance. But I don't see a way to indicate from client
>> drivers to allocate from this reserved pool (there is no hint in
>> request_irq() call that ends up in .irq_request_resources() that I can use)
>>
>> I can try and virtually split INTA into two irqchips perhaps, with one
>> part modeled as chained irqchip and other as stacked for the reserved
>> pool (and would have to spawn of two child msi-domains I presume).
>> But, there is only one DT node for this irqchip and thus clients cannot
>> request IRQ for reserved pool.
> 
> I don't see why DT should be aware of this. You only need to decide at
> allocation time which one is where, and plug it at the right level.
> 

Ah, okay. I can certainly update .irq_set_affinity to point to INTA's
parent during ti_sci_inta_irq_domain_alloc() for reserved pool of direct
interrupts.

> And you probably only need *one* chained interrupt that muxes
> everything that cannot be allocated direct path.

IIRC, suggestion is that driver is aware (eg via static list) of
incoming downstream events that need to be directly mapped and thus does
so during allocation, right?

But this is a bit tricky on TI K3 SoCs. INTA is meant to aggregate DMA
channel events. On certain SoCs, any DMA channel can talk to any
peripheral and there is no static binding. We, ideally, need to do load
balancing for certain networking (and such high IRQ load) peripherals only.

One way I see is create a special DMA channel pool at DMA driver as well
that correspond to reserved VINT pool here. Else I am not aware of a way
for DMA driver to inform INTA driver to allocate IRQ line from reserved
pool.

> 
>> Wondering if you have any pointers here?
> 
> I don't. But this shouldn't be too hard to bolt onto the existing
> framework.


Really appreciate feedback! Thanks

Regards
Vignesh
diff mbox series

Patch

diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
index f1419d24568e..237cb4707cb8 100644
--- a/drivers/irqchip/irq-ti-sci-inta.c
+++ b/drivers/irqchip/irq-ti-sci-inta.c
@@ -64,6 +64,7 @@  struct ti_sci_inta_event_desc {
  * @events:		Array of event descriptors assigned to this vint.
  * @parent_virq:	Linux IRQ number that gets attached to parent
  * @vint_id:		TISCI vint ID
+ * @affinity_managed	flag to indicate VINT affinity is managed
  */
 struct ti_sci_inta_vint_desc {
 	struct irq_domain *domain;
@@ -72,6 +73,7 @@  struct ti_sci_inta_vint_desc {
 	struct ti_sci_inta_event_desc events[MAX_EVENTS_PER_VINT];
 	unsigned int parent_virq;
 	u16 vint_id;
+	bool affinity_managed;
 };
 
 /**
@@ -334,6 +336,8 @@  static struct ti_sci_inta_event_desc *ti_sci_inta_alloc_irq(struct irq_domain *d
 	vint_id = ti_sci_get_free_resource(inta->vint);
 	if (vint_id == TI_SCI_RESOURCE_NULL) {
 		list_for_each_entry(vint_desc, &inta->vint_list, list) {
+			if (vint_desc->affinity_managed)
+				continue;
 			free_bit = find_first_zero_bit(vint_desc->event_map,
 						       MAX_EVENTS_PER_VINT);
 			if (free_bit != MAX_EVENTS_PER_VINT)
@@ -434,6 +438,7 @@  static int ti_sci_inta_request_resources(struct irq_data *data)
 		return PTR_ERR(event_desc);
 
 	data->chip_data = event_desc;
+	irq_data_update_effective_affinity(data, cpu_online_mask);
 
 	return 0;
 }
@@ -504,11 +509,45 @@  static void ti_sci_inta_ack_irq(struct irq_data *data)
 		ti_sci_inta_manage_event(data, VINT_STATUS_OFFSET);
 }
 
+#ifdef CONFIG_SMP
+static int ti_sci_inta_set_affinity(struct irq_data *d,
+				    const struct cpumask *mask_val, bool force)
+{
+	struct ti_sci_inta_event_desc *event_desc;
+	struct ti_sci_inta_vint_desc *vint_desc;
+	struct irq_data *parent_irq_data;
+
+	if (cpumask_equal(irq_data_get_effective_affinity_mask(d), mask_val))
+		return 0;
+
+	event_desc = irq_data_get_irq_chip_data(d);
+	if (event_desc) {
+		vint_desc = to_vint_desc(event_desc, event_desc->vint_bit);
+
+		/*
+		 * Cannot set affinity if there is more than one event
+		 * mapped to same VINT
+		 */
+		if (bitmap_weight(vint_desc->event_map, MAX_EVENTS_PER_VINT) > 1)
+			return -EINVAL;
+
+		vint_desc->affinity_managed = true;
+
+		irq_data_update_effective_affinity(d, mask_val);
+		parent_irq_data = irq_get_irq_data(vint_desc->parent_virq);
+		if (parent_irq_data->chip->irq_set_affinity)
+			return parent_irq_data->chip->irq_set_affinity(parent_irq_data, mask_val, force);
+	}
+
+	return -EINVAL;
+}
+#else
 static int ti_sci_inta_set_affinity(struct irq_data *d,
 				    const struct cpumask *mask_val, bool force)
 {
 	return -EINVAL;
 }
+#endif
 
 /**
  * ti_sci_inta_set_type() - Update the trigger type of the irq.