diff mbox series

[v3,1/2] x86/IRQ: make max number of guests for a shared IRQ configurable

Message ID 1607276587-19231-1-git-send-email-igor.druzhinin@citrix.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/2] x86/IRQ: make max number of guests for a shared IRQ configurable | expand

Commit Message

Igor Druzhinin Dec. 6, 2020, 5:43 p.m. UTC
... and increase the default to 16.

Current limit of 7 is too restrictive for modern systems where one GSI
could be shared by potentially many PCI INTx sources where each of them
corresponds to a device passed through to its own guest. Some systems do not
apply due dilligence in swizzling INTx links in case e.g. INTA is declared as
interrupt pin for the majority of PCI devices behind a single router,
resulting in overuse of a GSI.

Introduce a new command line option to configure that limit and dynamically
allocate an array of the necessary size. Set the default size now to 16 which
is higher than 7 but could later be increased even more if necessary.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
---

Changes in v2:
- introduced a command line option as suggested
- set initial default limit to 16

Changes in v3:
- changed option name to us - instead of _
- used uchar instead of uint to utilize integer_param overflow handling logic
- used xmalloc_flex_struct
- restructured printk as suggested

---
 docs/misc/xen-command-line.pandoc | 10 ++++++++++
 xen/arch/x86/irq.c                | 22 +++++++++++++++-------
 2 files changed, 25 insertions(+), 7 deletions(-)

Comments

Jan Beulich Dec. 7, 2020, 9:43 a.m. UTC | #1
On 06.12.2020 18:43, Igor Druzhinin wrote:
> ... and increase the default to 16.
> 
> Current limit of 7 is too restrictive for modern systems where one GSI
> could be shared by potentially many PCI INTx sources where each of them
> corresponds to a device passed through to its own guest. Some systems do not
> apply due dilligence in swizzling INTx links in case e.g. INTA is declared as
> interrupt pin for the majority of PCI devices behind a single router,
> resulting in overuse of a GSI.
> 
> Introduce a new command line option to configure that limit and dynamically
> allocate an array of the necessary size. Set the default size now to 16 which
> is higher than 7 but could later be increased even more if necessary.
> 
> Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
> ---
> 
> Changes in v2:
> - introduced a command line option as suggested
> - set initial default limit to 16
> 
> Changes in v3:
> - changed option name to us - instead of _
> - used uchar instead of uint to utilize integer_param overflow handling logic

Which now means rather than saturating at UINT8_MAX you'll get
-EOVERFLOW. Just as a remark, not as a strict request to change
anything.

> --- a/xen/arch/x86/irq.c
> +++ b/xen/arch/x86/irq.c
> @@ -42,6 +42,10 @@ integer_param("nr_irqs", nr_irqs);
>  int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
>  custom_param("irq_vector_map", parse_irq_vector_map_param);
>  
> +/* Max number of guests IRQ could be shared with */
> +static unsigned char __read_mostly irq_max_guests;
> +integer_param("irq-max-guests", irq_max_guests);

There's an implied assumption now that sizeof(irq_max_guests)
<= sizeof_field(irq_guest_action_t, nr_guests). Sadly a
respective BUILD_BUG_ON() can't ...

> @@ -435,6 +439,9 @@ int __init init_irq_data(void)
>      for ( ; irq < nr_irqs; irq++ )
>          irq_to_desc(irq)->irq = irq;
>  
> +    if ( !irq_max_guests )
> +        irq_max_guests = 16;

... go here, because the type gets defined only ...

> @@ -1028,7 +1035,6 @@ int __init setup_irq(unsigned int irq, unsigned int irqflags,
>   * HANDLING OF GUEST-BOUND PHYSICAL IRQS
>   */
>  
> -#define IRQ_MAX_GUESTS 7
>  typedef struct {
>      u8 nr_guests;
>      u8 in_flight;
> @@ -1039,7 +1045,7 @@ typedef struct {
>  #define ACKTYPE_EOI    2     /* EOI on the CPU that was interrupted  */
>      cpumask_var_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */
>      struct timer eoi_timer;
> -    struct domain *guest[IRQ_MAX_GUESTS];
> +    struct domain *guest[];
>  } irq_guest_action_t;

... here. The only later __init function is setup_dump_irqs(), so
it could be put there or in a new build_assertions() one.

> @@ -1633,11 +1640,12 @@ int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
>          goto retry;
>      }
>  
> -    if ( action->nr_guests == IRQ_MAX_GUESTS )
> +    if ( action->nr_guests == irq_max_guests )
>      {
> -        printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
> -               "Already at max share.\n",
> -               pirq->pirq, v->domain->domain_id);
> +        printk(XENLOG_G_INFO
> +               "Cannot bind IRQ%d to dom%pd: already at max share %u ",
> +               pirq->pirq, v->domain, irq_max_guests);
> +        printk("(increase with irq-max-guests= option)\n");

Now two separate printk()s are definitely worse. Then putting the
part of the format string inside the parentheses on a separate line
would still be better (and perhaps a sensible compromise with the
grep-ability desire).

With suitable adjustments, which I'd be okay making while committing
as long as you agree,
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
Igor Druzhinin Dec. 7, 2020, 11:28 a.m. UTC | #2
On 07/12/2020 09:43, Jan Beulich wrote:
> On 06.12.2020 18:43, Igor Druzhinin wrote:
>> @@ -1633,11 +1640,12 @@ int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
>>          goto retry;
>>      }
>>  
>> -    if ( action->nr_guests == IRQ_MAX_GUESTS )
>> +    if ( action->nr_guests == irq_max_guests )
>>      {
>> -        printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
>> -               "Already at max share.\n",
>> -               pirq->pirq, v->domain->domain_id);
>> +        printk(XENLOG_G_INFO
>> +               "Cannot bind IRQ%d to dom%pd: already at max share %u ",

I noticed it just now but could you also remove stray "dom" left in this line while commiting.

>> +               pirq->pirq, v->domain, irq_max_guests);
>> +        printk("(increase with irq-max-guests= option)\n");
> 
> Now two separate printk()s are definitely worse. Then putting the
> part of the format string inside the parentheses on a separate line
> would still be better (and perhaps a sensible compromise with the
> grep-ability desire).

Now I'm confused because you asked me not to split the format string between the lines which
wouldn't be possible without splitting printk's. I didn't really want to drop anything
informative.

> With suitable adjustments, which I'd be okay making while committing
> as long as you agree,

Yes, do with it whatever you see fit.

Igor
Jan Beulich Dec. 7, 2020, 11:44 a.m. UTC | #3
On 07.12.2020 12:28, Igor Druzhinin wrote:
> On 07/12/2020 09:43, Jan Beulich wrote:
>> On 06.12.2020 18:43, Igor Druzhinin wrote:
>>> @@ -1633,11 +1640,12 @@ int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
>>>          goto retry;
>>>      }
>>>  
>>> -    if ( action->nr_guests == IRQ_MAX_GUESTS )
>>> +    if ( action->nr_guests == irq_max_guests )
>>>      {
>>> -        printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
>>> -               "Already at max share.\n",
>>> -               pirq->pirq, v->domain->domain_id);
>>> +        printk(XENLOG_G_INFO
>>> +               "Cannot bind IRQ%d to dom%pd: already at max share %u ",
> 
> I noticed it just now but could you also remove stray "dom" left in this line while commiting.

Oh, sure.

>>> +               pirq->pirq, v->domain, irq_max_guests);
>>> +        printk("(increase with irq-max-guests= option)\n");
>>
>> Now two separate printk()s are definitely worse. Then putting the
>> part of the format string inside the parentheses on a separate line
>> would still be better (and perhaps a sensible compromise with the
>> grep-ability desire).
> 
> Now I'm confused because you asked me not to split the format string between the lines which
> wouldn't be possible without splitting printk's. I didn't really want to drop anything
> informative.

"Not splitting" really was meant in the sense of the words: No
splitting at all. Even less so across multiple printk()-s. But
since the line would get really long, I can live with the
outlined compromise.

Jan
diff mbox series

Patch

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index b4a0d60..53e676b 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1641,6 +1641,16 @@  This option is ignored in **pv-shim** mode.
 ### nr_irqs (x86)
 > `= <integer>`
 
+### irq-max-guests (x86)
+> `= <integer>`
+
+> Default: `16`
+
+Maximum number of guests any individual IRQ could be shared between,
+i.e. a limit on the number of guests it is possible to start each having
+assigned a device sharing a common interrupt line.  Accepts values between
+1 and 255.
+
 ### numa (x86)
 > `= on | off | fake=<integer> | noacpi`
 
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 8d1f9a9..4fd0578 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -42,6 +42,10 @@  integer_param("nr_irqs", nr_irqs);
 int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
 custom_param("irq_vector_map", parse_irq_vector_map_param);
 
+/* Max number of guests IRQ could be shared with */
+static unsigned char __read_mostly irq_max_guests;
+integer_param("irq-max-guests", irq_max_guests);
+
 vmask_t global_used_vector_map;
 
 struct irq_desc __read_mostly *irq_desc = NULL;
@@ -435,6 +439,9 @@  int __init init_irq_data(void)
     for ( ; irq < nr_irqs; irq++ )
         irq_to_desc(irq)->irq = irq;
 
+    if ( !irq_max_guests )
+        irq_max_guests = 16;
+
 #ifdef CONFIG_PV
     /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
     set_bit(LEGACY_SYSCALL_VECTOR, used_vectors);
@@ -1028,7 +1035,6 @@  int __init setup_irq(unsigned int irq, unsigned int irqflags,
  * HANDLING OF GUEST-BOUND PHYSICAL IRQS
  */
 
-#define IRQ_MAX_GUESTS 7
 typedef struct {
     u8 nr_guests;
     u8 in_flight;
@@ -1039,7 +1045,7 @@  typedef struct {
 #define ACKTYPE_EOI    2     /* EOI on the CPU that was interrupted  */
     cpumask_var_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */
     struct timer eoi_timer;
-    struct domain *guest[IRQ_MAX_GUESTS];
+    struct domain *guest[];
 } irq_guest_action_t;
 
 /*
@@ -1564,7 +1570,8 @@  int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
         if ( newaction == NULL )
         {
             spin_unlock_irq(&desc->lock);
-            if ( (newaction = xmalloc(irq_guest_action_t)) != NULL &&
+            if ( (newaction = xmalloc_flex_struct(irq_guest_action_t, guest,
+                                                  irq_max_guests)) != NULL &&
                  zalloc_cpumask_var(&newaction->cpu_eoi_map) )
                 goto retry;
             xfree(newaction);
@@ -1633,11 +1640,12 @@  int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
         goto retry;
     }
 
-    if ( action->nr_guests == IRQ_MAX_GUESTS )
+    if ( action->nr_guests == irq_max_guests )
     {
-        printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
-               "Already at max share.\n",
-               pirq->pirq, v->domain->domain_id);
+        printk(XENLOG_G_INFO
+               "Cannot bind IRQ%d to dom%pd: already at max share %u ",
+               pirq->pirq, v->domain, irq_max_guests);
+        printk("(increase with irq-max-guests= option)\n");
         rc = -EBUSY;
         goto unlock_out;
     }