diff mbox series

[RFC,XEN,v8,5/5] domctl: Add XEN_DOMCTL_gsi_permission to grant gsi

Message ID 20240516095235.64128-6-Jiqian.Chen@amd.com (mailing list archive)
State Superseded
Headers show
Series Support device passthrough when dom0 is PVH on Xen | expand

Commit Message

Chen, Jiqian May 16, 2024, 9:52 a.m. UTC
Some type of domain don't have PIRQ, like PVH, when
passthrough a device to guest on PVH dom0, callstack
pci_add_dm_done->XEN_DOMCTL_irq_permission will failed
at domain_pirq_to_irq.

So, add a new hypercall to grant/revoke gsi permission
when dom0 is not PV or dom0 has not PIRQ flag.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
 tools/include/xenctrl.h      |  5 +++
 tools/libs/ctrl/xc_domain.c  | 15 ++++++++
 tools/libs/light/libxl_pci.c | 72 ++++++++++++++++++++++++++++--------
 xen/arch/x86/domctl.c        | 31 ++++++++++++++++
 xen/include/public/domctl.h  |  9 +++++
 xen/xsm/flask/hooks.c        |  1 +
 6 files changed, 117 insertions(+), 16 deletions(-)

Comments

Jan Beulich May 16, 2024, 2:01 p.m. UTC | #1
On 16.05.2024 11:52, Jiqian Chen wrote:
> Some type of domain don't have PIRQ, like PVH, when
> passthrough a device to guest on PVH dom0, callstack
> pci_add_dm_done->XEN_DOMCTL_irq_permission will failed
> at domain_pirq_to_irq.
> 
> So, add a new hypercall to grant/revoke gsi permission
> when dom0 is not PV or dom0 has not PIRQ flag.

Honestly I find this hard to follow, and thus not really making clear why
no other existing mechanism could be used.

> Signed-off-by: Huang Rui <ray.huang@amd.com>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---

Below here in an RFC patch you typically would want to put specific items
you're seeking feedback on. Without that it's hard to tell why this is
marked RFC.

> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -237,6 +237,37 @@ long arch_do_domctl(
>          break;
>      }
>  
> +    case XEN_DOMCTL_gsi_permission:
> +    {
> +        unsigned int gsi = domctl->u.gsi_permission.gsi;
> +        int allow = domctl->u.gsi_permission.allow_access;

bool?

> +        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )
> +        {
> +            ret = -EOPNOTSUPP;
> +            break;
> +        }

Such a restriction imo wants explaining in a comment.

> +        if ( gsi >= nr_irqs_gsi )
> +        {
> +            ret = -EINVAL;
> +            break;
> +        }
> +
> +        if ( !irq_access_permitted(current->domain, gsi) ||

I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
source overrides may be surfaced by ACPI?

> +             xsm_irq_permission(XSM_HOOK, d, gsi, allow) )

Here I'm pretty sure you can't very well re-use an existing hook, as the
value of interest is in a different numbering space, and a possible hook
function has no way of knowing which one it is. Daniel?

> +        {
> +            ret = -EPERM;
> +            break;
> +        }
> +
> +        if ( allow )
> +            ret = irq_permit_access(d, gsi);
> +        else
> +            ret = irq_deny_access(d, gsi);

As above I'm afraid you can't assume IRQ == GSI.

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -447,6 +447,13 @@ struct xen_domctl_irq_permission {
>  };
>  
>  
> +/* XEN_DOMCTL_gsi_permission */
> +struct xen_domctl_gsi_permission {
> +    uint32_t gsi;
> +    uint8_t allow_access;    /* flag to specify enable/disable of x86 gsi access */
> +};

Explicit padding please, including a check that it's zero on input.

> +
> +
>  /* XEN_DOMCTL_iomem_permission */

No double blank lines please. In fact you will want to break the double blank
lines in leading context, inserting in the middle.

Jan
Chen, Jiqian May 17, 2024, 10:45 a.m. UTC | #2
On 2024/5/16 22:01, Jan Beulich wrote:
> On 16.05.2024 11:52, Jiqian Chen wrote:
>> Some type of domain don't have PIRQ, like PVH, when
>> passthrough a device to guest on PVH dom0, callstack
>> pci_add_dm_done->XEN_DOMCTL_irq_permission will failed
>> at domain_pirq_to_irq.
>>
>> So, add a new hypercall to grant/revoke gsi permission
>> when dom0 is not PV or dom0 has not PIRQ flag.
> 
> Honestly I find this hard to follow, and thus not really making clear why
> no other existing mechanism could be used.
Sorry, I will describe more clearly in next version.

> 
>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
> 
> Below here in an RFC patch you typically would want to put specific items
> you're seeking feedback on. Without that it's hard to tell why this is
> marked RFC.
OK, I will add " RFC: wait for the third patch on kernel side is accepted." in next version.

> 
>> --- a/xen/arch/x86/domctl.c
>> +++ b/xen/arch/x86/domctl.c
>> @@ -237,6 +237,37 @@ long arch_do_domctl(
>>          break;
>>      }
>>  
>> +    case XEN_DOMCTL_gsi_permission:
>> +    {
>> +        unsigned int gsi = domctl->u.gsi_permission.gsi;
>> +        int allow = domctl->u.gsi_permission.allow_access;
> 
> bool?
Will change.

> 
>> +        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )
>> +        {
>> +            ret = -EOPNOTSUPP;
>> +            break;
>> +        }
> 
> Such a restriction imo wants explaining in a comment.
Will add in next version.

> 
>> +        if ( gsi >= nr_irqs_gsi )
>> +        {
>> +            ret = -EINVAL;
>> +            break;
>> +        }
>> +
>> +        if ( !irq_access_permitted(current->domain, gsi) ||
> 
> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
> source overrides may be surfaced by ACPI?
All irqs smaller than nr_irqs_gsi are gsi, aren't they?

> 
>> +             xsm_irq_permission(XSM_HOOK, d, gsi, allow) )
> 
> Here I'm pretty sure you can't very well re-use an existing hook, as the
> value of interest is in a different numbering space, and a possible hook
> function has no way of knowing which one it is. Daniel?
> 
>> +        {
>> +            ret = -EPERM;
>> +            break;
>> +        }
>> +
>> +        if ( allow )
>> +            ret = irq_permit_access(d, gsi);
>> +        else
>> +            ret = irq_deny_access(d, gsi);
> 
> As above I'm afraid you can't assume IRQ == GSI.
> 
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -447,6 +447,13 @@ struct xen_domctl_irq_permission {
>>  };
>>  
>>  
>> +/* XEN_DOMCTL_gsi_permission */
>> +struct xen_domctl_gsi_permission {
>> +    uint32_t gsi;
>> +    uint8_t allow_access;    /* flag to specify enable/disable of x86 gsi access */
>> +};
> 
> Explicit padding please, including a check that it's zero on input.
Thanks, I will add in next version.

> 
>> +
>> +
>>  /* XEN_DOMCTL_iomem_permission */
> 
> No double blank lines please. In fact you will want to break the double blank
> lines in leading context, inserting in the middle.
Will remove one.
> 
> Jan
Jan Beulich May 17, 2024, 10:51 a.m. UTC | #3
On 17.05.2024 12:45, Chen, Jiqian wrote:
> On 2024/5/16 22:01, Jan Beulich wrote:
>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>> +        if ( gsi >= nr_irqs_gsi )
>>> +        {
>>> +            ret = -EINVAL;
>>> +            break;
>>> +        }
>>> +
>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>
>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>> source overrides may be surfaced by ACPI?
> All irqs smaller than nr_irqs_gsi are gsi, aren't they?

They are, but there's not necessarily a 1:1 mapping.

Jan
Chen, Jiqian May 17, 2024, 11:14 a.m. UTC | #4
On 2024/5/17 18:51, Jan Beulich wrote:
> On 17.05.2024 12:45, Chen, Jiqian wrote:
>> On 2024/5/16 22:01, Jan Beulich wrote:
>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>> +        if ( gsi >= nr_irqs_gsi )
>>>> +        {
>>>> +            ret = -EINVAL;
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>
>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>> source overrides may be surfaced by ACPI?
>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
> 
> They are, but there's not necessarily a 1:1 mapping.
Oh, so do I need to add a new gsi_caps to store granted gsi?
And Dom0 defaults to own all gsis permission?

> 
> Jan
Jan Beulich May 17, 2024, 11:50 a.m. UTC | #5
On 17.05.2024 13:14, Chen, Jiqian wrote:
> On 2024/5/17 18:51, Jan Beulich wrote:
>> On 17.05.2024 12:45, Chen, Jiqian wrote:
>>> On 2024/5/16 22:01, Jan Beulich wrote:
>>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>>> +        if ( gsi >= nr_irqs_gsi )
>>>>> +        {
>>>>> +            ret = -EINVAL;
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>>
>>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>>> source overrides may be surfaced by ACPI?
>>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
>>
>> They are, but there's not necessarily a 1:1 mapping.
> Oh, so do I need to add a new gsi_caps to store granted gsi?

Probably not. You ought to be able to translate between GSI and IRQ,
and then continue to record in / check against IRQ permissions.

Jan
Chen, Jiqian May 29, 2024, 2:41 a.m. UTC | #6
Hi,
On 2024/5/17 19:50, Jan Beulich wrote:
> On 17.05.2024 13:14, Chen, Jiqian wrote:
>> On 2024/5/17 18:51, Jan Beulich wrote:
>>> On 17.05.2024 12:45, Chen, Jiqian wrote:
>>>> On 2024/5/16 22:01, Jan Beulich wrote:
>>>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>>>> +        if ( gsi >= nr_irqs_gsi )
>>>>>> +        {
>>>>>> +            ret = -EINVAL;
>>>>>> +            break;
>>>>>> +        }
>>>>>> +
>>>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>>>
>>>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>>>> source overrides may be surfaced by ACPI?
>>>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
>>>
>>> They are, but there's not necessarily a 1:1 mapping.
>> Oh, so do I need to add a new gsi_caps to store granted gsi?
> 
> Probably not. You ought to be able to translate between GSI and IRQ,
> and then continue to record in / check against IRQ permissions.
But I found in function init_irq_data:
    for ( irq = 0; irq < nr_irqs_gsi; irq++ )
    {
        int rc;

        desc = irq_to_desc(irq);
        desc->irq = irq;

        rc = init_one_irq_desc(desc);
        if ( rc )
            return rc;
    }
Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.

Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
> 
> Jan
Jan Beulich May 29, 2024, 6:31 a.m. UTC | #7
On 29.05.2024 04:41, Chen, Jiqian wrote:
> Hi,
> On 2024/5/17 19:50, Jan Beulich wrote:
>> On 17.05.2024 13:14, Chen, Jiqian wrote:
>>> On 2024/5/17 18:51, Jan Beulich wrote:
>>>> On 17.05.2024 12:45, Chen, Jiqian wrote:
>>>>> On 2024/5/16 22:01, Jan Beulich wrote:
>>>>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>>>>> +        if ( gsi >= nr_irqs_gsi )
>>>>>>> +        {
>>>>>>> +            ret = -EINVAL;
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +
>>>>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>>>>
>>>>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>>>>> source overrides may be surfaced by ACPI?
>>>>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
>>>>
>>>> They are, but there's not necessarily a 1:1 mapping.
>>> Oh, so do I need to add a new gsi_caps to store granted gsi?
>>
>> Probably not. You ought to be able to translate between GSI and IRQ,
>> and then continue to record in / check against IRQ permissions.
> But I found in function init_irq_data:
>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>     {
>         int rc;
> 
>         desc = irq_to_desc(irq);
>         desc->irq = irq;
> 
>         rc = init_one_irq_desc(desc);
>         if ( rc )
>             return rc;
>     }
> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?

No, as explained before. I also don't see how you would derive that from
the code above. "nr_irqs_gsi" describes what its name says: The number of
IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
mapping.

> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.

Which may be wrong, while that wrong-ness may not have hit anyone in
practice (for reasons that would need working out).

> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?

Again - no.

Jan
Chen, Jiqian May 29, 2024, 6:56 a.m. UTC | #8
On 2024/5/29 14:31, Jan Beulich wrote:
> On 29.05.2024 04:41, Chen, Jiqian wrote:
>> Hi,
>> On 2024/5/17 19:50, Jan Beulich wrote:
>>> On 17.05.2024 13:14, Chen, Jiqian wrote:
>>>> On 2024/5/17 18:51, Jan Beulich wrote:
>>>>> On 17.05.2024 12:45, Chen, Jiqian wrote:
>>>>>> On 2024/5/16 22:01, Jan Beulich wrote:
>>>>>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>>>>>> +        if ( gsi >= nr_irqs_gsi )
>>>>>>>> +        {
>>>>>>>> +            ret = -EINVAL;
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +
>>>>>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>>>>>
>>>>>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>>>>>> source overrides may be surfaced by ACPI?
>>>>>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
>>>>>
>>>>> They are, but there's not necessarily a 1:1 mapping.
>>>> Oh, so do I need to add a new gsi_caps to store granted gsi?
>>>
>>> Probably not. You ought to be able to translate between GSI and IRQ,
>>> and then continue to record in / check against IRQ permissions.
>> But I found in function init_irq_data:
>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>     {
>>         int rc;
>>
>>         desc = irq_to_desc(irq);
>>         desc->irq = irq;
>>
>>         rc = init_one_irq_desc(desc);
>>         if ( rc )
>>             return rc;
>>     }
>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
> 
> No, as explained before. I also don't see how you would derive that from the code above.
Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.

> "nr_irqs_gsi" describes what its name says: The number of
> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
> mapping.
> 
>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
> 
> Which may be wrong, while that wrong-ness may not have hit anyone in
> practice (for reasons that would need working out).
> 
>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
> 
> Again - no.
Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
so that I can know how to do a conversion gsi from irq.

> 
> Jan
Jan Beulich May 29, 2024, 7:10 a.m. UTC | #9
On 29.05.2024 08:56, Chen, Jiqian wrote:
> On 2024/5/29 14:31, Jan Beulich wrote:
>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>> Hi,
>>> On 2024/5/17 19:50, Jan Beulich wrote:
>>>> On 17.05.2024 13:14, Chen, Jiqian wrote:
>>>>> On 2024/5/17 18:51, Jan Beulich wrote:
>>>>>> On 17.05.2024 12:45, Chen, Jiqian wrote:
>>>>>>> On 2024/5/16 22:01, Jan Beulich wrote:
>>>>>>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>>>>>>> +        if ( gsi >= nr_irqs_gsi )
>>>>>>>>> +        {
>>>>>>>>> +            ret = -EINVAL;
>>>>>>>>> +            break;
>>>>>>>>> +        }
>>>>>>>>> +
>>>>>>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>>>>>>
>>>>>>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>>>>>>> source overrides may be surfaced by ACPI?
>>>>>>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
>>>>>>
>>>>>> They are, but there's not necessarily a 1:1 mapping.
>>>>> Oh, so do I need to add a new gsi_caps to store granted gsi?
>>>>
>>>> Probably not. You ought to be able to translate between GSI and IRQ,
>>>> and then continue to record in / check against IRQ permissions.
>>> But I found in function init_irq_data:
>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>     {
>>>         int rc;
>>>
>>>         desc = irq_to_desc(irq);
>>>         desc->irq = irq;
>>>
>>>         rc = init_one_irq_desc(desc);
>>>         if ( rc )
>>>             return rc;
>>>     }
>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>
>> No, as explained before. I also don't see how you would derive that from the code above.
> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.

What are you taking this from? The loop bound isn't nr_gsis, and the iteration
variable isn't in GSI space either; it's in IRQ numbering space. In this loop
we're merely leveraging that every GSI has a corresponding IRQ; there are no
assumptions made about the mapping between the two. Afaics at least.

>> "nr_irqs_gsi" describes what its name says: The number of
>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>> mapping.
>>
>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>
>> Which may be wrong, while that wrong-ness may not have hit anyone in
>> practice (for reasons that would need working out).
>>
>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>
>> Again - no.
> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
> so that I can know how to do a conversion gsi from irq.

I did point you at the ACPI Interrupt Source Override structure before.
We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
start going from.

Jan
Chen, Jiqian May 29, 2024, 11:13 a.m. UTC | #10
On 2024/5/29 15:10, Jan Beulich wrote:
> On 29.05.2024 08:56, Chen, Jiqian wrote:
>> On 2024/5/29 14:31, Jan Beulich wrote:
>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>> Hi,
>>>> On 2024/5/17 19:50, Jan Beulich wrote:
>>>>> On 17.05.2024 13:14, Chen, Jiqian wrote:
>>>>>> On 2024/5/17 18:51, Jan Beulich wrote:
>>>>>>> On 17.05.2024 12:45, Chen, Jiqian wrote:
>>>>>>>> On 2024/5/16 22:01, Jan Beulich wrote:
>>>>>>>>> On 16.05.2024 11:52, Jiqian Chen wrote:
>>>>>>>>>> +        if ( gsi >= nr_irqs_gsi )
>>>>>>>>>> +        {
>>>>>>>>>> +            ret = -EINVAL;
>>>>>>>>>> +            break;
>>>>>>>>>> +        }
>>>>>>>>>> +
>>>>>>>>>> +        if ( !irq_access_permitted(current->domain, gsi) ||
>>>>>>>>>
>>>>>>>>> I.e. assuming IRQ == GSI? Is that a valid assumption when any number of
>>>>>>>>> source overrides may be surfaced by ACPI?
>>>>>>>> All irqs smaller than nr_irqs_gsi are gsi, aren't they?
>>>>>>>
>>>>>>> They are, but there's not necessarily a 1:1 mapping.
>>>>>> Oh, so do I need to add a new gsi_caps to store granted gsi?
>>>>>
>>>>> Probably not. You ought to be able to translate between GSI and IRQ,
>>>>> and then continue to record in / check against IRQ permissions.
>>>> But I found in function init_irq_data:
>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>     {
>>>>         int rc;
>>>>
>>>>         desc = irq_to_desc(irq);
>>>>         desc->irq = irq;
>>>>
>>>>         rc = init_one_irq_desc(desc);
>>>>         if ( rc )
>>>>             return rc;
>>>>     }
>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>
>>> No, as explained before. I also don't see how you would derive that from the code above.
>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
> 
> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
> we're merely leveraging that every GSI has a corresponding IRQ;
> there are no assumptions made about the mapping between the two. Afaics at least.
> 
>>> "nr_irqs_gsi" describes what its name says: The number of
>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>> mapping.
>>>
>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>
>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>> practice (for reasons that would need working out).
>>>
>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>
>>> Again - no.
>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>> so that I can know how to do a conversion gsi from irq.
> 
> I did point you at the ACPI Interrupt Source Override structure before.
> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
> start going from.
Oh! I think I know.
If I want to transform gsi to irq, I need to do below:
	int irq, entry, ioapic, pin;

	ioapic = mp_find_ioapic(gsi);
	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
	entry = find_irq_entry(ioapic, pin, mp_INT);
	irq = pin_2_irq(entry, ioapic, pin);

Am I right?

> 
> Jan
Jan Beulich May 29, 2024, 12:22 p.m. UTC | #11
On 29.05.2024 13:13, Chen, Jiqian wrote:
> On 2024/5/29 15:10, Jan Beulich wrote:
>> On 29.05.2024 08:56, Chen, Jiqian wrote:
>>> On 2024/5/29 14:31, Jan Beulich wrote:
>>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>>> But I found in function init_irq_data:
>>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>>     {
>>>>>         int rc;
>>>>>
>>>>>         desc = irq_to_desc(irq);
>>>>>         desc->irq = irq;
>>>>>
>>>>>         rc = init_one_irq_desc(desc);
>>>>>         if ( rc )
>>>>>             return rc;
>>>>>     }
>>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>>
>>>> No, as explained before. I also don't see how you would derive that from the code above.
>>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
>>
>> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
>> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
>> we're merely leveraging that every GSI has a corresponding IRQ;
>> there are no assumptions made about the mapping between the two. Afaics at least.
>>
>>>> "nr_irqs_gsi" describes what its name says: The number of
>>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>>> mapping.
>>>>
>>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>>
>>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>>> practice (for reasons that would need working out).
>>>>
>>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>>
>>>> Again - no.
>>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>>> so that I can know how to do a conversion gsi from irq.
>>
>> I did point you at the ACPI Interrupt Source Override structure before.
>> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
>> start going from.
> Oh! I think I know.
> If I want to transform gsi to irq, I need to do below:
> 	int irq, entry, ioapic, pin;
> 
> 	ioapic = mp_find_ioapic(gsi);
> 	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
> 	entry = find_irq_entry(ioapic, pin, mp_INT);
> 	irq = pin_2_irq(entry, ioapic, pin);
> 
> Am I right?

This looks plausible, yes.

Jan
Chen, Jiqian May 30, 2024, 11:19 a.m. UTC | #12
On 2024/5/29 20:22, Jan Beulich wrote:
> On 29.05.2024 13:13, Chen, Jiqian wrote:
>> On 2024/5/29 15:10, Jan Beulich wrote:
>>> On 29.05.2024 08:56, Chen, Jiqian wrote:
>>>> On 2024/5/29 14:31, Jan Beulich wrote:
>>>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>>>> But I found in function init_irq_data:
>>>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>>>     {
>>>>>>         int rc;
>>>>>>
>>>>>>         desc = irq_to_desc(irq);
>>>>>>         desc->irq = irq;
>>>>>>
>>>>>>         rc = init_one_irq_desc(desc);
>>>>>>         if ( rc )
>>>>>>             return rc;
>>>>>>     }
>>>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>>>
>>>>> No, as explained before. I also don't see how you would derive that from the code above.
>>>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
>>>
>>> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
>>> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
>>> we're merely leveraging that every GSI has a corresponding IRQ;
>>> there are no assumptions made about the mapping between the two. Afaics at least.
>>>
>>>>> "nr_irqs_gsi" describes what its name says: The number of
>>>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>>>> mapping.
>>>>>
>>>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>>>
>>>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>>>> practice (for reasons that would need working out).
>>>>>
>>>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>>>
>>>>> Again - no.
>>>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>>>> so that I can know how to do a conversion gsi from irq.
>>>
>>> I did point you at the ACPI Interrupt Source Override structure before.
>>> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
>>> start going from.
>> Oh! I think I know.
>> If I want to transform gsi to irq, I need to do below:
>> 	int irq, entry, ioapic, pin;
>>
>> 	ioapic = mp_find_ioapic(gsi);
>> 	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
>> 	entry = find_irq_entry(ioapic, pin, mp_INT);
>> 	irq = pin_2_irq(entry, ioapic, pin);
>>
>> Am I right?
> 
> This looks plausible, yes.
I dump all mpc_config_intsrc of array mp_irqs, it shows:
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
(XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
(XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15

It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
And my code will be:
    case XEN_DOMCTL_gsi_permission:
    {
        unsigned int gsi = domctl->u.gsi_permission.gsi;
        int irq;
        bool allow = domctl->u.gsi_permission.allow_access;

        /*
         * gsi[0,15] is not 1:1 mapping to legacy irq[0,15], it need a
         * transformation. Other gsi is considered be 1:1 mapping to irq
         */
        if ( gsi < 16 )
            irq = gsi_2_irq(gsi);
        else
            irq = gsi;

        /*
         * If current domain is PV or it has PIRQ flag, it has a mapping
         * of gsi, pirq and irq, so it should use XEN_DOMCTL_irq_permission
         * to grant irq permission.
         */
        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )
        {
            ret = -EOPNOTSUPP;
            break;
        }

        if ( gsi >= nr_irqs_gsi || irq < 0 )
        {
            ret = -EINVAL;
            break;
        }

        if ( !irq_access_permitted(current->domain, irq) ||
             xsm_irq_permission(XSM_HOOK, d, irq, allow) )
        {
            ret = -EPERM;
            break;
        }

        if ( allow )
            ret = irq_permit_access(d, irq);
        else
            ret = irq_deny_access(d, irq);
        break;
    }

Is above acceptable?

> 
> Jan
Jan Beulich May 30, 2024, 3:51 p.m. UTC | #13
On 30.05.2024 13:19, Chen, Jiqian wrote:
> On 2024/5/29 20:22, Jan Beulich wrote:
>> On 29.05.2024 13:13, Chen, Jiqian wrote:
>>> On 2024/5/29 15:10, Jan Beulich wrote:
>>>> On 29.05.2024 08:56, Chen, Jiqian wrote:
>>>>> On 2024/5/29 14:31, Jan Beulich wrote:
>>>>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>>>>> But I found in function init_irq_data:
>>>>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>>>>     {
>>>>>>>         int rc;
>>>>>>>
>>>>>>>         desc = irq_to_desc(irq);
>>>>>>>         desc->irq = irq;
>>>>>>>
>>>>>>>         rc = init_one_irq_desc(desc);
>>>>>>>         if ( rc )
>>>>>>>             return rc;
>>>>>>>     }
>>>>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>>>>
>>>>>> No, as explained before. I also don't see how you would derive that from the code above.
>>>>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
>>>>
>>>> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
>>>> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
>>>> we're merely leveraging that every GSI has a corresponding IRQ;
>>>> there are no assumptions made about the mapping between the two. Afaics at least.
>>>>
>>>>>> "nr_irqs_gsi" describes what its name says: The number of
>>>>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>>>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>>>>> mapping.
>>>>>>
>>>>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>>>>
>>>>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>>>>> practice (for reasons that would need working out).
>>>>>>
>>>>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>>>>
>>>>>> Again - no.
>>>>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>>>>> so that I can know how to do a conversion gsi from irq.
>>>>
>>>> I did point you at the ACPI Interrupt Source Override structure before.
>>>> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
>>>> start going from.
>>> Oh! I think I know.
>>> If I want to transform gsi to irq, I need to do below:
>>> 	int irq, entry, ioapic, pin;
>>>
>>> 	ioapic = mp_find_ioapic(gsi);
>>> 	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
>>> 	entry = find_irq_entry(ioapic, pin, mp_INT);
>>> 	irq = pin_2_irq(entry, ioapic, pin);
>>>
>>> Am I right?
>>
>> This looks plausible, yes.
> I dump all mpc_config_intsrc of array mp_irqs, it shows:
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15
> 
> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?

It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
disallows that.

Jan
Chen, Jiqian June 4, 2024, 3:04 a.m. UTC | #14
On 2024/5/30 23:51, Jan Beulich wrote:
> On 30.05.2024 13:19, Chen, Jiqian wrote:
>> On 2024/5/29 20:22, Jan Beulich wrote:
>>> On 29.05.2024 13:13, Chen, Jiqian wrote:
>>>> On 2024/5/29 15:10, Jan Beulich wrote:
>>>>> On 29.05.2024 08:56, Chen, Jiqian wrote:
>>>>>> On 2024/5/29 14:31, Jan Beulich wrote:
>>>>>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>>>>>> But I found in function init_irq_data:
>>>>>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>>>>>     {
>>>>>>>>         int rc;
>>>>>>>>
>>>>>>>>         desc = irq_to_desc(irq);
>>>>>>>>         desc->irq = irq;
>>>>>>>>
>>>>>>>>         rc = init_one_irq_desc(desc);
>>>>>>>>         if ( rc )
>>>>>>>>             return rc;
>>>>>>>>     }
>>>>>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>>>>>
>>>>>>> No, as explained before. I also don't see how you would derive that from the code above.
>>>>>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
>>>>>
>>>>> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
>>>>> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
>>>>> we're merely leveraging that every GSI has a corresponding IRQ;
>>>>> there are no assumptions made about the mapping between the two. Afaics at least.
>>>>>
>>>>>>> "nr_irqs_gsi" describes what its name says: The number of
>>>>>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>>>>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>>>>>> mapping.
>>>>>>>
>>>>>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>>>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>>>>>
>>>>>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>>>>>> practice (for reasons that would need working out).
>>>>>>>
>>>>>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>>>>>
>>>>>>> Again - no.
>>>>>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>>>>>> so that I can know how to do a conversion gsi from irq.
>>>>>
>>>>> I did point you at the ACPI Interrupt Source Override structure before.
>>>>> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
>>>>> start going from.
>>>> Oh! I think I know.
>>>> If I want to transform gsi to irq, I need to do below:
>>>> 	int irq, entry, ioapic, pin;
>>>>
>>>> 	ioapic = mp_find_ioapic(gsi);
>>>> 	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
>>>> 	entry = find_irq_entry(ioapic, pin, mp_INT);
>>>> 	irq = pin_2_irq(entry, ioapic, pin);
>>>>
>>>> Am I right?
>>>
>>> This looks plausible, yes.
>> I dump all mpc_config_intsrc of array mp_irqs, it shows:
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15
>>
>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
> 
> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
> disallows that.
Do you suggest me to add overrides for higher GSIs into array mp_irqs?

> 
> Jan
Jan Beulich June 4, 2024, 5:55 a.m. UTC | #15
On 04.06.2024 05:04, Chen, Jiqian wrote:
> On 2024/5/30 23:51, Jan Beulich wrote:
>> On 30.05.2024 13:19, Chen, Jiqian wrote:
>>> On 2024/5/29 20:22, Jan Beulich wrote:
>>>> On 29.05.2024 13:13, Chen, Jiqian wrote:
>>>>> On 2024/5/29 15:10, Jan Beulich wrote:
>>>>>> On 29.05.2024 08:56, Chen, Jiqian wrote:
>>>>>>> On 2024/5/29 14:31, Jan Beulich wrote:
>>>>>>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>>>>>>> But I found in function init_irq_data:
>>>>>>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>>>>>>     {
>>>>>>>>>         int rc;
>>>>>>>>>
>>>>>>>>>         desc = irq_to_desc(irq);
>>>>>>>>>         desc->irq = irq;
>>>>>>>>>
>>>>>>>>>         rc = init_one_irq_desc(desc);
>>>>>>>>>         if ( rc )
>>>>>>>>>             return rc;
>>>>>>>>>     }
>>>>>>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>>>>>>
>>>>>>>> No, as explained before. I also don't see how you would derive that from the code above.
>>>>>>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
>>>>>>
>>>>>> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
>>>>>> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
>>>>>> we're merely leveraging that every GSI has a corresponding IRQ;
>>>>>> there are no assumptions made about the mapping between the two. Afaics at least.
>>>>>>
>>>>>>>> "nr_irqs_gsi" describes what its name says: The number of
>>>>>>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>>>>>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>>>>>>> mapping.
>>>>>>>>
>>>>>>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>>>>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>>>>>>
>>>>>>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>>>>>>> practice (for reasons that would need working out).
>>>>>>>>
>>>>>>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>>>>>>
>>>>>>>> Again - no.
>>>>>>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>>>>>>> so that I can know how to do a conversion gsi from irq.
>>>>>>
>>>>>> I did point you at the ACPI Interrupt Source Override structure before.
>>>>>> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
>>>>>> start going from.
>>>>> Oh! I think I know.
>>>>> If I want to transform gsi to irq, I need to do below:
>>>>> 	int irq, entry, ioapic, pin;
>>>>>
>>>>> 	ioapic = mp_find_ioapic(gsi);
>>>>> 	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
>>>>> 	entry = find_irq_entry(ioapic, pin, mp_INT);
>>>>> 	irq = pin_2_irq(entry, ioapic, pin);
>>>>>
>>>>> Am I right?
>>>>
>>>> This looks plausible, yes.
>>> I dump all mpc_config_intsrc of array mp_irqs, it shows:
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15
>>>
>>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
>>
>> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
>> disallows that.
> Do you suggest me to add overrides for higher GSIs into array mp_irqs?

Why "add"? That's what mp_override_legacy_irq() already does, isn't it?
Assuming of course any are surfaced at all by ACPI.

Jan
Chen, Jiqian June 4, 2024, 6:01 a.m. UTC | #16
On 2024/6/4 13:55, Jan Beulich wrote:
> On 04.06.2024 05:04, Chen, Jiqian wrote:
>> On 2024/5/30 23:51, Jan Beulich wrote:
>>> On 30.05.2024 13:19, Chen, Jiqian wrote:
>>>> On 2024/5/29 20:22, Jan Beulich wrote:
>>>>> On 29.05.2024 13:13, Chen, Jiqian wrote:
>>>>>> On 2024/5/29 15:10, Jan Beulich wrote:
>>>>>>> On 29.05.2024 08:56, Chen, Jiqian wrote:
>>>>>>>> On 2024/5/29 14:31, Jan Beulich wrote:
>>>>>>>>> On 29.05.2024 04:41, Chen, Jiqian wrote:
>>>>>>>>>> But I found in function init_irq_data:
>>>>>>>>>>     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
>>>>>>>>>>     {
>>>>>>>>>>         int rc;
>>>>>>>>>>
>>>>>>>>>>         desc = irq_to_desc(irq);
>>>>>>>>>>         desc->irq = irq;
>>>>>>>>>>
>>>>>>>>>>         rc = init_one_irq_desc(desc);
>>>>>>>>>>         if ( rc )
>>>>>>>>>>             return rc;
>>>>>>>>>>     }
>>>>>>>>>> Does it mean that when irq < nr_irqs_gsi, the gsi and irq is a 1:1 mapping?
>>>>>>>>>
>>>>>>>>> No, as explained before. I also don't see how you would derive that from the code above.
>>>>>>>> Because here set desc->irq = irq, and it seems there is no other place to change this desc->irq, so, gsi 1 is considered to irq 1.
>>>>>>>
>>>>>>> What are you taking this from? The loop bound isn't nr_gsis, and the iteration
>>>>>>> variable isn't in GSI space either; it's in IRQ numbering space. In this loop
>>>>>>> we're merely leveraging that every GSI has a corresponding IRQ;
>>>>>>> there are no assumptions made about the mapping between the two. Afaics at least.
>>>>>>>
>>>>>>>>> "nr_irqs_gsi" describes what its name says: The number of
>>>>>>>>> IRQs mapping to a (_some_) GSI. That's to tell them from the non-GSI (i.e.
>>>>>>>>> mainly MSI) ones. There's no implication whatsoever on the IRQ <-> GSI
>>>>>>>>> mapping.
>>>>>>>>>
>>>>>>>>>> What's more, when using PHYSDEVOP_setup_gsi, it calls mp_register_gsi,
>>>>>>>>>> and in mp_register_gsi, it uses " desc = irq_to_desc(gsi); " to get irq_desc directly.
>>>>>>>>>
>>>>>>>>> Which may be wrong, while that wrong-ness may not have hit anyone in
>>>>>>>>> practice (for reasons that would need working out).
>>>>>>>>>
>>>>>>>>>> Combining above, can we consider "gsi == irq" when irq < nr_irqs_gsi ?
>>>>>>>>>
>>>>>>>>> Again - no.
>>>>>>>> Since you are certain that they are not equal, could you tell me where show they are not equal or where build their mappings,
>>>>>>>> so that I can know how to do a conversion gsi from irq.
>>>>>>>
>>>>>>> I did point you at the ACPI Interrupt Source Override structure before.
>>>>>>> We're parsing those in acpi_parse_int_src_ovr(), to give you a place to
>>>>>>> start going from.
>>>>>> Oh! I think I know.
>>>>>> If I want to transform gsi to irq, I need to do below:
>>>>>> 	int irq, entry, ioapic, pin;
>>>>>>
>>>>>> 	ioapic = mp_find_ioapic(gsi);
>>>>>> 	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
>>>>>> 	entry = find_irq_entry(ioapic, pin, mp_INT);
>>>>>> 	irq = pin_2_irq(entry, ioapic, pin);
>>>>>>
>>>>>> Am I right?
>>>>>
>>>>> This looks plausible, yes.
>>>> I dump all mpc_config_intsrc of array mp_irqs, it shows:
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15
>>>>
>>>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>>>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
>>>
>>> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
>>> disallows that.
>> Do you suggest me to add overrides for higher GSIs into array mp_irqs?
> 
> Why "add"? That's what mp_override_legacy_irq() already does, isn't it?
No. mp_override_legacy_irq only overrides for gsi < 16, but not for gsi >= 16(I dump all mappings from array mp_irqs).
In my environment, gsi of my dGPU is 24.
So, how do I process for gsi >= 16?

> Assuming of course any are surfaced at all by ACPI.
> 
> Jan
Jan Beulich June 4, 2024, 6:12 a.m. UTC | #17
On 04.06.2024 08:01, Chen, Jiqian wrote:
> On 2024/6/4 13:55, Jan Beulich wrote:
>> On 04.06.2024 05:04, Chen, Jiqian wrote:
>>> On 2024/5/30 23:51, Jan Beulich wrote:
>>>> On 30.05.2024 13:19, Chen, Jiqian wrote:
>>>>> I dump all mpc_config_intsrc of array mp_irqs, it shows:
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15
>>>>>
>>>>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>>>>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
>>>>
>>>> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
>>>> disallows that.
>>> Do you suggest me to add overrides for higher GSIs into array mp_irqs?
>>
>> Why "add"? That's what mp_override_legacy_irq() already does, isn't it?
> No. mp_override_legacy_irq only overrides for gsi < 16, but not for gsi >= 16(I dump all mappings from array mp_irqs).

I assume you mean you observe so ...

> In my environment, gsi of my dGPU is 24.

... on one specific system? The function is invoked from
acpi_parse_int_src_ovr(), and I can't spot any restriction to
IRQs less than 16 there.

Jan
Jan Beulich June 4, 2024, 6:19 a.m. UTC | #18
On 04.06.2024 08:01, Chen, Jiqian wrote:
> So, how do I process for gsi >= 16?

Oh, and to answer this as well: Isn't it as simple as treating
as 1:1 mapped any (valid) GSI you can't find in mp_irqs[]? You
only care about the mapping, not e.g. polarity or trigger mode
here, iirc.

Jan
Chen, Jiqian June 4, 2024, 6:33 a.m. UTC | #19
On 2024/6/4 14:12, Jan Beulich wrote:
> On 04.06.2024 08:01, Chen, Jiqian wrote:
>> On 2024/6/4 13:55, Jan Beulich wrote:
>>> On 04.06.2024 05:04, Chen, Jiqian wrote:
>>>> On 2024/5/30 23:51, Jan Beulich wrote:
>>>>> On 30.05.2024 13:19, Chen, Jiqian wrote:
>>>>>> I dump all mpc_config_intsrc of array mp_irqs, it shows:
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 0 dstapic 33 dstirq 2
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 15 srcbus 0 srcbusirq 9 dstapic 33 dstirq 9
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 1 dstapic 33 dstirq 1
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 3 dstapic 33 dstirq 3
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 4 dstapic 33 dstirq 4
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 5 dstapic 33 dstirq 5
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 6 dstapic 33 dstirq 6
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 7 dstapic 33 dstirq 7
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 8 dstapic 33 dstirq 8
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 10 dstapic 33 dstirq 10
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 11 dstapic 33 dstirq 11
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 12 dstapic 33 dstirq 12
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 13 dstapic 33 dstirq 13
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 14 dstapic 33 dstirq 14
>>>>>> (XEN) find_irq_entry type 3 irqtype 0 irqflag 0 srcbus 0 srcbusirq 15 dstapic 33 dstirq 15
>>>>>>
>>>>>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>>>>>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
>>>>>
>>>>> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
>>>>> disallows that.
>>>> Do you suggest me to add overrides for higher GSIs into array mp_irqs?
>>>
>>> Why "add"? That's what mp_override_legacy_irq() already does, isn't it?
>> No. mp_override_legacy_irq only overrides for gsi < 16, but not for gsi >= 16(I dump all mappings from array mp_irqs).
> 
> I assume you mean you observe so ...
No, after starting xen pvh dom0, I dump all entries from mp_irqs.

> 
>> In my environment, gsi of my dGPU is 24.
> 
> ... on one specific system? The function is invoked from
> acpi_parse_int_src_ovr(), and I can't spot any restriction to
> IRQs less than 16 there.
I didn't see any restriction too, but from the dump results, there are only 16 entries, see previous email. 

> 
> Jan
Jan Beulich June 4, 2024, 6:36 a.m. UTC | #20
On 04.06.2024 08:33, Chen, Jiqian wrote:
> On 2024/6/4 14:12, Jan Beulich wrote:
>> On 04.06.2024 08:01, Chen, Jiqian wrote:
>>> On 2024/6/4 13:55, Jan Beulich wrote:
>>>> On 04.06.2024 05:04, Chen, Jiqian wrote:
>>>>> On 2024/5/30 23:51, Jan Beulich wrote:
>>>>>> On 30.05.2024 13:19, Chen, Jiqian wrote:
>>>>>>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>>>>>>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
>>>>>>
>>>>>> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
>>>>>> disallows that.
>>>>> Do you suggest me to add overrides for higher GSIs into array mp_irqs?
>>>>
>>>> Why "add"? That's what mp_override_legacy_irq() already does, isn't it?
>>> No. mp_override_legacy_irq only overrides for gsi < 16, but not for gsi >= 16(I dump all mappings from array mp_irqs).
>>
>> I assume you mean you observe so ...
> No, after starting xen pvh dom0, I dump all entries from mp_irqs.

IOW really your answer is "yes" ...

>>> In my environment, gsi of my dGPU is 24.
>>
>> ... on one specific system?

... to this question I raised. Whatever you dump on any number of
systems, there's always the chance that there's another system
where things are different.

>> The function is invoked from
>> acpi_parse_int_src_ovr(), and I can't spot any restriction to
>> IRQs less than 16 there.
> I didn't see any restriction too, but from the dump results, there are only 16 entries, see previous email. 

Hence why I tried to point out that going from observations on a
particular system isn't enough.

Jan
Chen, Jiqian June 4, 2024, 8:18 a.m. UTC | #21
On 2024/6/4 14:36, Jan Beulich wrote:
> On 04.06.2024 08:33, Chen, Jiqian wrote:
>> On 2024/6/4 14:12, Jan Beulich wrote:
>>> On 04.06.2024 08:01, Chen, Jiqian wrote:
>>>> On 2024/6/4 13:55, Jan Beulich wrote:
>>>>> On 04.06.2024 05:04, Chen, Jiqian wrote:
>>>>>> On 2024/5/30 23:51, Jan Beulich wrote:
>>>>>>> On 30.05.2024 13:19, Chen, Jiqian wrote:
>>>>>>>> It seems only Legacy irq and gsi[0:15] has a mapping in mp_irqs.
>>>>>>>> Other gsi can be considered 1:1 mapping with irq? Or are there other places reflect the mapping between irq and gsi?
>>>>>>>
>>>>>>> It may be uncommon to have overrides for higher GSIs, but I don't think ACPI
>>>>>>> disallows that.
>>>>>> Do you suggest me to add overrides for higher GSIs into array mp_irqs?
>>>>>
>>>>> Why "add"? That's what mp_override_legacy_irq() already does, isn't it?
>>>> No. mp_override_legacy_irq only overrides for gsi < 16, but not for gsi >= 16(I dump all mappings from array mp_irqs).
>>>
>>> I assume you mean you observe so ...
>> No, after starting xen pvh dom0, I dump all entries from mp_irqs.
> 
> IOW really your answer is "yes" ...
> 
>>>> In my environment, gsi of my dGPU is 24.
>>>
>>> ... on one specific system?
> 
> ... to this question I raised. Whatever you dump on any number of
> systems, there's always the chance that there's another system
> where things are different.
> 
>>> The function is invoked from
>>> acpi_parse_int_src_ovr(), and I can't spot any restriction to
>>> IRQs less than 16 there.
>> I didn't see any restriction too, but from the dump results, there are only 16 entries, see previous email. 
> 
> Hence why I tried to point out that going from observations on a
> particular system isn't enough.
Anyway, I agree with you that I need to get mapping from mp_irqs.
I tried to get more debug information from my environment. And I attach them here, maybe you can find some problems.
acpi_parse_madt_ioapic_entries
	acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, MAX_IRQ_SOURCES);
		acpi_parse_int_src_ovr
			mp_override_legacy_irq
				only process two entries, irq 0 gsi 2 and irq 9 gsi 9
There are only two entries whose type is ACPI_MADT_TYPE_INTERRUPT_OVERRIDE in MADT table. Is it normal?
And
acpi_parse_madt_ioapic_entries
	mp_config_acpi_legacy_irqs
		process the other GSIs(< 16), so that the total number of mp_irqs is 16.

> 
> Jan
Jan Beulich June 4, 2024, 5:17 p.m. UTC | #22
On 04.06.2024 10:18, Chen, Jiqian wrote:
> I tried to get more debug information from my environment. And I attach them here, maybe you can find some problems.
> acpi_parse_madt_ioapic_entries
> 	acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, MAX_IRQ_SOURCES);
> 		acpi_parse_int_src_ovr
> 			mp_override_legacy_irq
> 				only process two entries, irq 0 gsi 2 and irq 9 gsi 9
> There are only two entries whose type is ACPI_MADT_TYPE_INTERRUPT_OVERRIDE in MADT table. Is it normal?

Yes, that's what you'd typically see (or just one such entry).

Jan
Chen, Jiqian June 5, 2024, 7:04 a.m. UTC | #23
On 2024/6/5 01:17, Jan Beulich wrote:
> On 04.06.2024 10:18, Chen, Jiqian wrote:
>> I tried to get more debug information from my environment. And I attach them here, maybe you can find some problems.
>> acpi_parse_madt_ioapic_entries
>> 	acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, MAX_IRQ_SOURCES);
>> 		acpi_parse_int_src_ovr
>> 			mp_override_legacy_irq
>> 				only process two entries, irq 0 gsi 2 and irq 9 gsi 9
>> There are only two entries whose type is ACPI_MADT_TYPE_INTERRUPT_OVERRIDE in MADT table. Is it normal?
> 
> Yes, that's what you'd typically see (or just one such entry).
Ok, let me conclude that acpi_parse_int_src_ovr get two entries from MADT table and add them into mp_irqs. They are [irq, gsi][0, 2] and [irq, gsi][9, 9].
Then in the following function mp_config_acpi_legacy_irqs initializes the 1:1 mapping of irq and gsi [0~15 except 2 and 9], and add them into mp_irqs.
But for high GSIs(>= 16), no mapping processing.
Right?

Is it that the Xen hypervisor lacks some handling of high GSIs?
For now, if hypervisor gets a high GSIs, it can't be transformed to irq, because there is no mapping between them.

> 
> Jan
Jan Beulich June 5, 2024, 10:09 a.m. UTC | #24
On 05.06.2024 09:04, Chen, Jiqian wrote:
> On 2024/6/5 01:17, Jan Beulich wrote:
>> On 04.06.2024 10:18, Chen, Jiqian wrote:
>>> I tried to get more debug information from my environment. And I attach them here, maybe you can find some problems.
>>> acpi_parse_madt_ioapic_entries
>>> 	acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, MAX_IRQ_SOURCES);
>>> 		acpi_parse_int_src_ovr
>>> 			mp_override_legacy_irq
>>> 				only process two entries, irq 0 gsi 2 and irq 9 gsi 9
>>> There are only two entries whose type is ACPI_MADT_TYPE_INTERRUPT_OVERRIDE in MADT table. Is it normal?
>>
>> Yes, that's what you'd typically see (or just one such entry).
> Ok, let me conclude that acpi_parse_int_src_ovr get two entries from MADT table and add them into mp_irqs. They are [irq, gsi][0, 2] and [irq, gsi][9, 9].
> Then in the following function mp_config_acpi_legacy_irqs initializes the 1:1 mapping of irq and gsi [0~15 except 2 and 9], and add them into mp_irqs.
> But for high GSIs(>= 16), no mapping processing.
> Right?

On that specific system of yours - yes. In the general case high GSIs
may have entries, too.

> Is it that the Xen hypervisor lacks some handling of high GSIs?

I don't think so. Unless you can point out something?

> For now, if hypervisor gets a high GSIs, it can't be transformed to irq, because there is no mapping between them.

No, in the absence of a source override (note the word "override") the
default identity mapping applies.

Jan
Chen, Jiqian June 5, 2024, 10:22 a.m. UTC | #25
On 2024/6/5 18:09, Jan Beulich wrote:
> On 05.06.2024 09:04, Chen, Jiqian wrote:
>> On 2024/6/5 01:17, Jan Beulich wrote:
>>> On 04.06.2024 10:18, Chen, Jiqian wrote:
>>>> I tried to get more debug information from my environment. And I attach them here, maybe you can find some problems.
>>>> acpi_parse_madt_ioapic_entries
>>>> 	acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, MAX_IRQ_SOURCES);
>>>> 		acpi_parse_int_src_ovr
>>>> 			mp_override_legacy_irq
>>>> 				only process two entries, irq 0 gsi 2 and irq 9 gsi 9
>>>> There are only two entries whose type is ACPI_MADT_TYPE_INTERRUPT_OVERRIDE in MADT table. Is it normal?
>>>
>>> Yes, that's what you'd typically see (or just one such entry).
>> Ok, let me conclude that acpi_parse_int_src_ovr get two entries from MADT table and add them into mp_irqs. They are [irq, gsi][0, 2] and [irq, gsi][9, 9].
>> Then in the following function mp_config_acpi_legacy_irqs initializes the 1:1 mapping of irq and gsi [0~15 except 2 and 9], and add them into mp_irqs.
>> But for high GSIs(>= 16), no mapping processing.
>> Right?
> 
> On that specific system of yours - yes. In the general case high GSIs
> may have entries, too.
> 
>> Is it that the Xen hypervisor lacks some handling of high GSIs?
> 
> I don't think so. Unless you can point out something?
Ok, so the implementation is still to get mapping from mp_irqs, I will change in next version.
Thank you.

> 
>> For now, if hypervisor gets a high GSIs, it can't be transformed to irq, because there is no mapping between them.
> 
> No, in the absence of a source override (note the word "override") the
> default identity mapping applies.
What is identity mapping? Like the mp_config_acpi_legacy_irqs does?

> 
> Jan
Jan Beulich June 6, 2024, 6:14 a.m. UTC | #26
On 05.06.2024 12:22, Chen, Jiqian wrote:
> On 2024/6/5 18:09, Jan Beulich wrote:
>> On 05.06.2024 09:04, Chen, Jiqian wrote:
>>> For now, if hypervisor gets a high GSIs, it can't be transformed to irq, because there is no mapping between them.
>>
>> No, in the absence of a source override (note the word "override") the
>> default identity mapping applies.
> What is identity mapping?

GSI == IRQ

Jan
diff mbox series

Patch

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 841db41ad7e4..c21a79d74be3 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1382,6 +1382,11 @@  int xc_domain_irq_permission(xc_interface *xch,
                              uint32_t pirq,
                              bool allow_access);
 
+int xc_domain_gsi_permission(xc_interface *xch,
+                             uint32_t domid,
+                             uint32_t gsi,
+                             bool allow_access);
+
 int xc_domain_iomem_permission(xc_interface *xch,
                                uint32_t domid,
                                unsigned long first_mfn,
diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index f2d9d14b4d9f..8540e84fda93 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -1394,6 +1394,21 @@  int xc_domain_irq_permission(xc_interface *xch,
     return do_domctl(xch, &domctl);
 }
 
+int xc_domain_gsi_permission(xc_interface *xch,
+                             uint32_t domid,
+                             uint32_t gsi,
+                             bool allow_access)
+{
+    struct xen_domctl domctl = {
+        .cmd = XEN_DOMCTL_gsi_permission,
+        .domain = domid,
+        .u.gsi_permission.gsi = gsi,
+        .u.gsi_permission.allow_access = allow_access,
+    };
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_domain_iomem_permission(xc_interface *xch,
                                uint32_t domid,
                                unsigned long first_mfn,
diff --git a/tools/libs/light/libxl_pci.c b/tools/libs/light/libxl_pci.c
index 7e44d4c3ae2b..1d1b81dd2844 100644
--- a/tools/libs/light/libxl_pci.c
+++ b/tools/libs/light/libxl_pci.c
@@ -1412,6 +1412,37 @@  static bool pci_supp_legacy_irq(void)
 #define PCI_SBDF(seg, bus, devfn) \
             ((((uint32_t)(seg)) << 16) | (PCI_DEVID(bus, devfn)))
 
+static int pci_device_set_gsi(libxl_ctx *ctx,
+                              libxl_domid domid,
+                              libxl_device_pci *pci,
+                              bool map,
+                              int *gsi_back)
+{
+    int r, gsi, pirq;
+    uint32_t sbdf;
+
+    sbdf = PCI_SBDF(pci->domain, pci->bus, (PCI_DEVFN(pci->dev, pci->func)));
+    r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
+    *gsi_back = r;
+    if (r < 0)
+        return r;
+
+    gsi = r;
+    pirq = r;
+    if (map)
+        r = xc_physdev_map_pirq(ctx->xch, domid, gsi, &pirq);
+    else
+        r = xc_physdev_unmap_pirq(ctx->xch, domid, pirq);
+    if (r)
+        return r;
+
+    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
+    if (r && errno == EOPNOTSUPP)
+        r = xc_domain_irq_permission(ctx->xch, domid, gsi, map);
+
+    return r;
+}
+
 static void pci_add_dm_done(libxl__egc *egc,
                             pci_add_state *pas,
                             int rc)
@@ -1424,10 +1455,10 @@  static void pci_add_dm_done(libxl__egc *egc,
     unsigned long long start, end, flags, size;
     int irq, i;
     int r;
-    uint32_t sbdf;
     uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
     uint32_t domainid = domid;
     bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
+    int gsi;
 
     /* Convenience aliases */
     bool starting = pas->starting;
@@ -1485,6 +1516,19 @@  static void pci_add_dm_done(libxl__egc *egc,
     fclose(f);
     if (!pci_supp_legacy_irq())
         goto out_no_irq;
+
+    r = pci_device_set_gsi(ctx, domid, pci, 1, &gsi);
+    if (gsi >= 0) {
+        if (r < 0) {
+            rc = ERROR_FAIL;
+            LOGED(ERROR, domainid,
+                  "pci_device_set_gsi gsi=%d (error=%d)", gsi, errno);
+            goto out;
+        } else {
+            goto process_permissive;
+        }
+    }
+    /* if gsi < 0, keep using irq */
     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
                                 pci->bus, pci->dev, pci->func);
     f = fopen(sysfs_path, "r");
@@ -1493,13 +1537,6 @@  static void pci_add_dm_done(libxl__egc *egc,
         goto out_no_irq;
     }
     if ((fscanf(f, "%u", &irq) == 1) && irq) {
-        sbdf = PCI_SBDF(pci->domain, pci->bus,
-                        (PCI_DEVFN(pci->dev, pci->func)));
-        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
-        /* if fail, keep using irq; if success, r is gsi, use gsi */
-        if (r != -1) {
-            irq = r;
-        }
         r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
         if (r < 0) {
             LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)",
@@ -1519,6 +1556,7 @@  static void pci_add_dm_done(libxl__egc *egc,
     }
     fclose(f);
 
+process_permissive:
     /* Don't restrict writes to the PCI config space from this VM */
     if (pci->permissive) {
         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
@@ -2186,10 +2224,10 @@  static void pci_remove_detached(libxl__egc *egc,
     int  irq = 0, i, stubdomid = 0;
     const char *sysfs_path;
     FILE *f;
-    uint32_t sbdf;
     uint32_t domainid = prs->domid;
     bool isstubdom;
     int r;
+    int gsi;
 
     /* Convenience aliases */
     libxl_device_pci *const pci = &prs->pci;
@@ -2245,6 +2283,15 @@  skip_bar:
     if (!pci_supp_legacy_irq())
         goto skip_legacy_irq;
 
+    r = pci_device_set_gsi(ctx, domid, pci, 0, &gsi);
+    if (gsi >= 0) {
+        if (r < 0) {
+            LOGED(ERROR, domainid,
+                  "pci_device_set_gsi gsi=%d (error=%d)", gsi, errno);
+        }
+        goto skip_legacy_irq;
+    }
+    /* if gsi < 0, keep using irq */
     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
                            pci->bus, pci->dev, pci->func);
 
@@ -2255,13 +2302,6 @@  skip_bar:
     }
 
     if ((fscanf(f, "%u", &irq) == 1) && irq) {
-        sbdf = PCI_SBDF(pci->domain, pci->bus,
-                        (PCI_DEVFN(pci->dev, pci->func)));
-        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
-        /* if fail, keep using irq; if success, r is gsi, use gsi */
-        if (r != -1) {
-            irq = r;
-        }
         rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
         if (rc < 0) {
             /*
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 9a72d57333e9..9b8a08b2a81d 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -237,6 +237,37 @@  long arch_do_domctl(
         break;
     }
 
+    case XEN_DOMCTL_gsi_permission:
+    {
+        unsigned int gsi = domctl->u.gsi_permission.gsi;
+        int allow = domctl->u.gsi_permission.allow_access;
+
+        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )
+        {
+            ret = -EOPNOTSUPP;
+            break;
+        }
+
+        if ( gsi >= nr_irqs_gsi )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        if ( !irq_access_permitted(current->domain, gsi) ||
+             xsm_irq_permission(XSM_HOOK, d, gsi, allow) )
+        {
+            ret = -EPERM;
+            break;
+        }
+
+        if ( allow )
+            ret = irq_permit_access(d, gsi);
+        else
+            ret = irq_deny_access(d, gsi);
+        break;
+    }
+
     case XEN_DOMCTL_getpageframeinfo3:
     {
         unsigned int num = domctl->u.getpageframeinfo3.num;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index a33f9ec32b08..47e95f9ee824 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -447,6 +447,13 @@  struct xen_domctl_irq_permission {
 };
 
 
+/* XEN_DOMCTL_gsi_permission */
+struct xen_domctl_gsi_permission {
+    uint32_t gsi;
+    uint8_t allow_access;    /* flag to specify enable/disable of x86 gsi access */
+};
+
+
 /* XEN_DOMCTL_iomem_permission */
 struct xen_domctl_iomem_permission {
     uint64_aligned_t first_mfn;/* first page (physical page number) in range */
@@ -1277,6 +1284,7 @@  struct xen_domctl {
 #define XEN_DOMCTL_vmtrace_op                    84
 #define XEN_DOMCTL_get_paging_mempool_size       85
 #define XEN_DOMCTL_set_paging_mempool_size       86
+#define XEN_DOMCTL_gsi_permission                87
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1299,6 +1307,7 @@  struct xen_domctl {
         struct xen_domctl_setdomainhandle   setdomainhandle;
         struct xen_domctl_setdebugging      setdebugging;
         struct xen_domctl_irq_permission    irq_permission;
+        struct xen_domctl_gsi_permission    gsi_permission;
         struct xen_domctl_iomem_permission  iomem_permission;
         struct xen_domctl_ioport_permission ioport_permission;
         struct xen_domctl_hypercall_init    hypercall_init;
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 5e88c71b8e22..a5b134c91101 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -685,6 +685,7 @@  static int cf_check flask_domctl(struct domain *d, int cmd)
     case XEN_DOMCTL_shadow_op:
     case XEN_DOMCTL_ioport_permission:
     case XEN_DOMCTL_ioport_mapping:
+    case XEN_DOMCTL_gsi_permission:
 #endif
 #ifdef CONFIG_HAS_PASSTHROUGH
     /*