diff mbox series

[RESEND,v5,5/5] hw/pci: ensure PCIE devices are plugged into only slot 0 of PCIE port

Message ID 20230626161244.4145-6-anisinha@redhat.com (mailing list archive)
State New, archived
Headers show
Series test and QEMU fixes to ensure proper PCIE device usage | expand

Commit Message

Ani Sinha June 26, 2023, 4:12 p.m. UTC
PCI Express ports only have one slot, so PCI Express devices can only be
plugged into slot 0 on a PCIE port. Enforce it.

CC: jusual@redhat.com
CC: imammedo@redhat.com
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Reviewed-by: Julia Suvorova <jusual@redhat.com>
---
 hw/pci/pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

Comments

Igor Mammedov June 27, 2023, 9:02 a.m. UTC | #1
On Mon, 26 Jun 2023 21:42:44 +0530
Ani Sinha <anisinha@redhat.com> wrote:

> PCI Express ports only have one slot, so PCI Express devices can only be
> plugged into slot 0 on a PCIE port. Enforce it.

btw, previously you mentioned ARI.
So if we turn it on, wouldn't this patch actually become regression?

> 
> CC: jusual@redhat.com
> CC: imammedo@redhat.com
> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> ---
>  hw/pci/pci.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index bf38905b7d..426af133b0 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -64,6 +64,7 @@ bool pci_available = true;
>  static char *pcibus_get_dev_path(DeviceState *dev);
>  static char *pcibus_get_fw_dev_path(DeviceState *dev);
>  static void pcibus_reset(BusState *qbus);
> +static bool pcie_has_upstream_port(PCIDevice *dev);
>  
>  static Property pci_props[] = {
>      DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
>                     name);
>  
>         return NULL;
> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> +                   " parent device only allows plugging into slot 0.",
> +                   PCI_SLOT(devfn), name);
> +        return NULL;
>      }
>  
>      pci_dev->devfn = devfn;
Ani Sinha June 27, 2023, 9:53 a.m. UTC | #2
> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> 
> On Mon, 26 Jun 2023 21:42:44 +0530
> Ani Sinha <anisinha@redhat.com> wrote:
> 
>> PCI Express ports only have one slot, so PCI Express devices can only be
>> plugged into slot 0 on a PCIE port. Enforce it.
> 
> btw, previously you mentioned ARI.
> So if we turn it on, wouldn't this patch actually become regression?

If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.

> 
>> 
>> CC: jusual@redhat.com
>> CC: imammedo@redhat.com
>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
>> ---
>> hw/pci/pci.c | 6 ++++++
>> 1 file changed, 6 insertions(+)
>> 
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index bf38905b7d..426af133b0 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -64,6 +64,7 @@ bool pci_available = true;
>> static char *pcibus_get_dev_path(DeviceState *dev);
>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
>> static void pcibus_reset(BusState *qbus);
>> +static bool pcie_has_upstream_port(PCIDevice *dev);
>> 
>> static Property pci_props[] = {
>>     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
>>                    name);
>> 
>>        return NULL;
>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
>> +                   " parent device only allows plugging into slot 0.",
>> +                   PCI_SLOT(devfn), name);
>> +        return NULL;
>>     }
>> 
>>     pci_dev->devfn = devfn;
>
Michael S. Tsirkin June 27, 2023, 11:55 a.m. UTC | #3
On Tue, Jun 27, 2023 at 03:23:04PM +0530, Ani Sinha wrote:
> 
> 
> > On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> > 
> > On Mon, 26 Jun 2023 21:42:44 +0530
> > Ani Sinha <anisinha@redhat.com> wrote:
> > 
> >> PCI Express ports only have one slot, so PCI Express devices can only be
> >> plugged into slot 0 on a PCIE port. Enforce it.
> > 
> > btw, previously you mentioned ARI.
> > So if we turn it on, wouldn't this patch actually become regression?
> 
> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.

We will just fix pci_get_function_0.

> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.

ok

> > 
> >> 
> >> CC: jusual@redhat.com
> >> CC: imammedo@redhat.com
> >> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> >> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> >> ---
> >> hw/pci/pci.c | 6 ++++++
> >> 1 file changed, 6 insertions(+)
> >> 
> >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >> index bf38905b7d..426af133b0 100644
> >> --- a/hw/pci/pci.c
> >> +++ b/hw/pci/pci.c
> >> @@ -64,6 +64,7 @@ bool pci_available = true;
> >> static char *pcibus_get_dev_path(DeviceState *dev);
> >> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> >> static void pcibus_reset(BusState *qbus);
> >> +static bool pcie_has_upstream_port(PCIDevice *dev);
> >> 
> >> static Property pci_props[] = {
> >>     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> >> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> >>                    name);
> >> 
> >>        return NULL;
> >> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> >> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> >> +                   " parent device only allows plugging into slot 0.",
> >> +                   PCI_SLOT(devfn), name);
> >> +        return NULL;
> >>     }
> >> 
> >>     pci_dev->devfn = devfn;
> >
Ani Sinha June 27, 2023, 11:55 a.m. UTC | #4
> On 27-Jun-2023, at 3:23 PM, Ani Sinha <anisinha@redhat.com> wrote:
> 
> 
> 
>> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
>> 
>> On Mon, 26 Jun 2023 21:42:44 +0530
>> Ani Sinha <anisinha@redhat.com> wrote:
>> 
>>> PCI Express ports only have one slot, so PCI Express devices can only be
>>> plugged into slot 0 on a PCIE port. Enforce it.
>> 
>> btw, previously you mentioned ARI.
>> So if we turn it on, wouldn't this patch actually become regression?

Looking at https://pcisig.com/sites/default/files/specification_documents/ECN-alt-rid-interpretation-070604.pdf, section 7.23, seems a root port does not support ARI but it can support ARI forwarding capability (section 7.8.5).
Also with ARI enabled, the device cannot have a non-zero device number. Also, shouldn't any code path that uses PCI_SLOT() should probably also check for ARI if it wants to be ARI complaint?

Anyways these are just facts I could find but I am not sure if this would answer your above question.

> 
> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.
> 
>> 
>>> 
>>> CC: jusual@redhat.com
>>> CC: imammedo@redhat.com
>>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
>>> ---
>>> hw/pci/pci.c | 6 ++++++
>>> 1 file changed, 6 insertions(+)
>>> 
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index bf38905b7d..426af133b0 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -64,6 +64,7 @@ bool pci_available = true;
>>> static char *pcibus_get_dev_path(DeviceState *dev);
>>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
>>> static void pcibus_reset(BusState *qbus);
>>> +static bool pcie_has_upstream_port(PCIDevice *dev);
>>> 
>>> static Property pci_props[] = {
>>>    DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
>>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
>>>                   name);
>>> 
>>>       return NULL;
>>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
>>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
>>> +                   " parent device only allows plugging into slot 0.",
>>> +                   PCI_SLOT(devfn), name);
>>> +        return NULL;
>>>    }
>>> 
>>>    pci_dev->devfn = devfn;
>> 
>
Igor Mammedov June 27, 2023, 11:58 a.m. UTC | #5
On Tue, 27 Jun 2023 15:23:04 +0530
Ani Sinha <anisinha@redhat.com> wrote:

> > On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> > 
> > On Mon, 26 Jun 2023 21:42:44 +0530
> > Ani Sinha <anisinha@redhat.com> wrote:
> >   
> >> PCI Express ports only have one slot, so PCI Express devices can only be
> >> plugged into slot 0 on a PCIE port. Enforce it.  
> > 
> > btw, previously you mentioned ARI.
> > So if we turn it on, wouldn't this patch actually become regression?  
> 
> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.

I'm fine with this patch if you test it with ARI enabled and it won't break
something that has been working before this patch. Just mention what testing
you've done in commit message.

> 
> >   
> >> 
> >> CC: jusual@redhat.com
> >> CC: imammedo@redhat.com
> >> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> >> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> >> ---
> >> hw/pci/pci.c | 6 ++++++
> >> 1 file changed, 6 insertions(+)
> >> 
> >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >> index bf38905b7d..426af133b0 100644
> >> --- a/hw/pci/pci.c
> >> +++ b/hw/pci/pci.c
> >> @@ -64,6 +64,7 @@ bool pci_available = true;
> >> static char *pcibus_get_dev_path(DeviceState *dev);
> >> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> >> static void pcibus_reset(BusState *qbus);
> >> +static bool pcie_has_upstream_port(PCIDevice *dev);
> >> 
> >> static Property pci_props[] = {
> >>     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> >> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> >>                    name);
> >> 
> >>        return NULL;
> >> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> >> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> >> +                   " parent device only allows plugging into slot 0.",
> >> +                   PCI_SLOT(devfn), name);
> >> +        return NULL;
> >>     }
> >> 
> >>     pci_dev->devfn = devfn;  
> >   
>
Ani Sinha June 27, 2023, 12:01 p.m. UTC | #6
> On 27-Jun-2023, at 5:25 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> 
> On Tue, Jun 27, 2023 at 03:23:04PM +0530, Ani Sinha wrote:
>> 
>> 
>>> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
>>> 
>>> On Mon, 26 Jun 2023 21:42:44 +0530
>>> Ani Sinha <anisinha@redhat.com> wrote:
>>> 
>>>> PCI Express ports only have one slot, so PCI Express devices can only be
>>>> plugged into slot 0 on a PCIE port. Enforce it.
>>> 
>>> btw, previously you mentioned ARI.
>>> So if we turn it on, wouldn't this patch actually become regression?
>> 
>> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> 
> We will just fix pci_get_function_0.

Any code with PCI_SLOT() I believe also would need fixing?

> 
>> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.
> 
> ok
> 
>>> 
>>>> 
>>>> CC: jusual@redhat.com
>>>> CC: imammedo@redhat.com
>>>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
>>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>>>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
>>>> ---
>>>> hw/pci/pci.c | 6 ++++++
>>>> 1 file changed, 6 insertions(+)
>>>> 
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index bf38905b7d..426af133b0 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -64,6 +64,7 @@ bool pci_available = true;
>>>> static char *pcibus_get_dev_path(DeviceState *dev);
>>>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
>>>> static void pcibus_reset(BusState *qbus);
>>>> +static bool pcie_has_upstream_port(PCIDevice *dev);
>>>> 
>>>> static Property pci_props[] = {
>>>>    DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
>>>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
>>>>                   name);
>>>> 
>>>>       return NULL;
>>>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
>>>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
>>>> +                   " parent device only allows plugging into slot 0.",
>>>> +                   PCI_SLOT(devfn), name);
>>>> +        return NULL;
>>>>    }
>>>> 
>>>>    pci_dev->devfn = devfn;
>>> 
>
Michael S. Tsirkin June 27, 2023, 12:06 p.m. UTC | #7
On Tue, Jun 27, 2023 at 05:31:37PM +0530, Ani Sinha wrote:
> 
> 
> > On 27-Jun-2023, at 5:25 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > 
> > On Tue, Jun 27, 2023 at 03:23:04PM +0530, Ani Sinha wrote:
> >> 
> >> 
> >>> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> >>> 
> >>> On Mon, 26 Jun 2023 21:42:44 +0530
> >>> Ani Sinha <anisinha@redhat.com> wrote:
> >>> 
> >>>> PCI Express ports only have one slot, so PCI Express devices can only be
> >>>> plugged into slot 0 on a PCIE port. Enforce it.
> >>> 
> >>> btw, previously you mentioned ARI.
> >>> So if we turn it on, wouldn't this patch actually become regression?
> >> 
> >> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> > 
> > We will just fix pci_get_function_0.
> 
> Any code with PCI_SLOT() I believe also would need fixing?

Oh, absolutely.

> > 
> >> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.
> > 
> > ok
> > 
> >>> 
> >>>> 
> >>>> CC: jusual@redhat.com
> >>>> CC: imammedo@redhat.com
> >>>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> >>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> >>>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> >>>> ---
> >>>> hw/pci/pci.c | 6 ++++++
> >>>> 1 file changed, 6 insertions(+)
> >>>> 
> >>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>>> index bf38905b7d..426af133b0 100644
> >>>> --- a/hw/pci/pci.c
> >>>> +++ b/hw/pci/pci.c
> >>>> @@ -64,6 +64,7 @@ bool pci_available = true;
> >>>> static char *pcibus_get_dev_path(DeviceState *dev);
> >>>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> >>>> static void pcibus_reset(BusState *qbus);
> >>>> +static bool pcie_has_upstream_port(PCIDevice *dev);
> >>>> 
> >>>> static Property pci_props[] = {
> >>>>    DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> >>>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> >>>>                   name);
> >>>> 
> >>>>       return NULL;
> >>>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> >>>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> >>>> +                   " parent device only allows plugging into slot 0.",
> >>>> +                   PCI_SLOT(devfn), name);
> >>>> +        return NULL;
> >>>>    }
> >>>> 
> >>>>    pci_dev->devfn = devfn;
> >>> 
> >
Michael S. Tsirkin June 27, 2023, 12:23 p.m. UTC | #8
On Tue, Jun 27, 2023 at 01:58:49PM +0200, Igor Mammedov wrote:
> On Tue, 27 Jun 2023 15:23:04 +0530
> Ani Sinha <anisinha@redhat.com> wrote:
> 
> > > On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> > > 
> > > On Mon, 26 Jun 2023 21:42:44 +0530
> > > Ani Sinha <anisinha@redhat.com> wrote:
> > >   
> > >> PCI Express ports only have one slot, so PCI Express devices can only be
> > >> plugged into slot 0 on a PCIE port. Enforce it.  
> > > 
> > > btw, previously you mentioned ARI.
> > > So if we turn it on, wouldn't this patch actually become regression?  
> > 
> > If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> > Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.
> 
> I'm fine with this patch if you test it with ARI enabled and it won't break
> something that has been working before this patch. Just mention what testing
> you've done in commit message.

Oh yes. That's why it was checking !vf originally. It's because the most
common use of ARI is SRIOV, so it works a a kind of hack.

> > 
> > >   
> > >> 
> > >> CC: jusual@redhat.com
> > >> CC: imammedo@redhat.com
> > >> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> > >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> > >> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> > >> ---
> > >> hw/pci/pci.c | 6 ++++++
> > >> 1 file changed, 6 insertions(+)
> > >> 
> > >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > >> index bf38905b7d..426af133b0 100644
> > >> --- a/hw/pci/pci.c
> > >> +++ b/hw/pci/pci.c
> > >> @@ -64,6 +64,7 @@ bool pci_available = true;
> > >> static char *pcibus_get_dev_path(DeviceState *dev);
> > >> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> > >> static void pcibus_reset(BusState *qbus);
> > >> +static bool pcie_has_upstream_port(PCIDevice *dev);
> > >> 
> > >> static Property pci_props[] = {
> > >>     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> > >> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> > >>                    name);
> > >> 
> > >>        return NULL;
> > >> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> > >> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> > >> +                   " parent device only allows plugging into slot 0.",
> > >> +                   PCI_SLOT(devfn), name);
> > >> +        return NULL;
> > >>     }
> > >> 
> > >>     pci_dev->devfn = devfn;  
> > >   
> >
Ani Sinha June 27, 2023, 12:29 p.m. UTC | #9
> On 27-Jun-2023, at 5:53 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> 
> On Tue, Jun 27, 2023 at 01:58:49PM +0200, Igor Mammedov wrote:
>> On Tue, 27 Jun 2023 15:23:04 +0530
>> Ani Sinha <anisinha@redhat.com> wrote:
>> 
>>>> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
>>>> 
>>>> On Mon, 26 Jun 2023 21:42:44 +0530
>>>> Ani Sinha <anisinha@redhat.com> wrote:
>>>> 
>>>>> PCI Express ports only have one slot, so PCI Express devices can only be
>>>>> plugged into slot 0 on a PCIE port. Enforce it.  
>>>> 
>>>> btw, previously you mentioned ARI.
>>>> So if we turn it on, wouldn't this patch actually become regression?  
>>> 
>>> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
>>> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.
>> 
>> I'm fine with this patch if you test it with ARI enabled and it won't break
>> something that has been working before this patch. Just mention what testing
>> you've done in commit message.
> 
> Oh yes. That's why it was checking !vf originally. It's because the most
> common use of ARI is SRIOV, so it works a a kind of hack.

Ok so should I put it back?
Also I was thinking of running the qtest and avocado test mentioned in https://www.qemu.org/docs/master/system/devices/igb.html . Not sure if it would be enough to test ARI.

> 
>>> 
>>>> 
>>>>> 
>>>>> CC: jusual@redhat.com
>>>>> CC: imammedo@redhat.com
>>>>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
>>>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>>>>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
>>>>> ---
>>>>> hw/pci/pci.c | 6 ++++++
>>>>> 1 file changed, 6 insertions(+)
>>>>> 
>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>> index bf38905b7d..426af133b0 100644
>>>>> --- a/hw/pci/pci.c
>>>>> +++ b/hw/pci/pci.c
>>>>> @@ -64,6 +64,7 @@ bool pci_available = true;
>>>>> static char *pcibus_get_dev_path(DeviceState *dev);
>>>>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
>>>>> static void pcibus_reset(BusState *qbus);
>>>>> +static bool pcie_has_upstream_port(PCIDevice *dev);
>>>>> 
>>>>> static Property pci_props[] = {
>>>>>    DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
>>>>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
>>>>>                   name);
>>>>> 
>>>>>       return NULL;
>>>>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
>>>>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
>>>>> +                   " parent device only allows plugging into slot 0.",
>>>>> +                   PCI_SLOT(devfn), name);
>>>>> +        return NULL;
>>>>>    }
>>>>> 
>>>>>    pci_dev->devfn = devfn;
Igor Mammedov June 27, 2023, 12:38 p.m. UTC | #10
On Tue, 27 Jun 2023 08:23:25 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Tue, Jun 27, 2023 at 01:58:49PM +0200, Igor Mammedov wrote:
> > On Tue, 27 Jun 2023 15:23:04 +0530
> > Ani Sinha <anisinha@redhat.com> wrote:
> >   
> > > > On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> > > > 
> > > > On Mon, 26 Jun 2023 21:42:44 +0530
> > > > Ani Sinha <anisinha@redhat.com> wrote:
> > > >     
> > > >> PCI Express ports only have one slot, so PCI Express devices can only be
> > > >> plugged into slot 0 on a PCIE port. Enforce it.    
> > > > 
> > > > btw, previously you mentioned ARI.
> > > > So if we turn it on, wouldn't this patch actually become regression?    
> > > 
> > > If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> > > Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.  
> > 
> > I'm fine with this patch if you test it with ARI enabled and it won't break
> > something that has been working before this patch. Just mention what testing
> > you've done in commit message.  
> 
> Oh yes. That's why it was checking !vf originally. It's because the most
> common use of ARI is SRIOV, so it works a a kind of hack.

should we check for ARI cap instead of vf hack?
why we haven't that from the beginning?

> 
> > >   
> > > >     
> > > >> 
> > > >> CC: jusual@redhat.com
> > > >> CC: imammedo@redhat.com
> > > >> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> > > >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> > > >> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> > > >> ---
> > > >> hw/pci/pci.c | 6 ++++++
> > > >> 1 file changed, 6 insertions(+)
> > > >> 
> > > >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > >> index bf38905b7d..426af133b0 100644
> > > >> --- a/hw/pci/pci.c
> > > >> +++ b/hw/pci/pci.c
> > > >> @@ -64,6 +64,7 @@ bool pci_available = true;
> > > >> static char *pcibus_get_dev_path(DeviceState *dev);
> > > >> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> > > >> static void pcibus_reset(BusState *qbus);
> > > >> +static bool pcie_has_upstream_port(PCIDevice *dev);
> > > >> 
> > > >> static Property pci_props[] = {
> > > >>     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> > > >> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> > > >>                    name);
> > > >> 
> > > >>        return NULL;
> > > >> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> > > >> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> > > >> +                   " parent device only allows plugging into slot 0.",
> > > >> +                   PCI_SLOT(devfn), name);
> > > >> +        return NULL;
> > > >>     }
> > > >> 
> > > >>     pci_dev->devfn = devfn;    
> > > >     
> > >   
>
Michael S. Tsirkin June 27, 2023, 2:23 p.m. UTC | #11
On Tue, Jun 27, 2023 at 05:59:02PM +0530, Ani Sinha wrote:
> 
> 
> > On 27-Jun-2023, at 5:53 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > 
> > On Tue, Jun 27, 2023 at 01:58:49PM +0200, Igor Mammedov wrote:
> >> On Tue, 27 Jun 2023 15:23:04 +0530
> >> Ani Sinha <anisinha@redhat.com> wrote:
> >> 
> >>>> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> >>>> 
> >>>> On Mon, 26 Jun 2023 21:42:44 +0530
> >>>> Ani Sinha <anisinha@redhat.com> wrote:
> >>>> 
> >>>>> PCI Express ports only have one slot, so PCI Express devices can only be
> >>>>> plugged into slot 0 on a PCIE port. Enforce it.  
> >>>> 
> >>>> btw, previously you mentioned ARI.
> >>>> So if we turn it on, wouldn't this patch actually become regression?  
> >>> 
> >>> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> >>> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.
> >> 
> >> I'm fine with this patch if you test it with ARI enabled and it won't break
> >> something that has been working before this patch. Just mention what testing
> >> you've done in commit message.
> > 
> > Oh yes. That's why it was checking !vf originally. It's because the most
> > common use of ARI is SRIOV, so it works a a kind of hack.
> 
> Ok so should I put it back?
> Also I was thinking of running the qtest and avocado test mentioned in https://www.qemu.org/docs/master/system/devices/igb.html . Not sure if it would be enough to test ARI.

Well you need > 8 VFs for that.

> > 
> >>> 
> >>>> 
> >>>>> 
> >>>>> CC: jusual@redhat.com
> >>>>> CC: imammedo@redhat.com
> >>>>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> >>>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> >>>>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> >>>>> ---
> >>>>> hw/pci/pci.c | 6 ++++++
> >>>>> 1 file changed, 6 insertions(+)
> >>>>> 
> >>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>>>> index bf38905b7d..426af133b0 100644
> >>>>> --- a/hw/pci/pci.c
> >>>>> +++ b/hw/pci/pci.c
> >>>>> @@ -64,6 +64,7 @@ bool pci_available = true;
> >>>>> static char *pcibus_get_dev_path(DeviceState *dev);
> >>>>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> >>>>> static void pcibus_reset(BusState *qbus);
> >>>>> +static bool pcie_has_upstream_port(PCIDevice *dev);
> >>>>> 
> >>>>> static Property pci_props[] = {
> >>>>>    DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> >>>>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> >>>>>                   name);
> >>>>> 
> >>>>>       return NULL;
> >>>>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> >>>>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> >>>>> +                   " parent device only allows plugging into slot 0.",
> >>>>> +                   PCI_SLOT(devfn), name);
> >>>>> +        return NULL;
> >>>>>    }
> >>>>> 
> >>>>>    pci_dev->devfn = devfn;
Michael S. Tsirkin June 27, 2023, 2:27 p.m. UTC | #12
On Tue, Jun 27, 2023 at 02:38:44PM +0200, Igor Mammedov wrote:
> On Tue, 27 Jun 2023 08:23:25 -0400
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Tue, Jun 27, 2023 at 01:58:49PM +0200, Igor Mammedov wrote:
> > > On Tue, 27 Jun 2023 15:23:04 +0530
> > > Ani Sinha <anisinha@redhat.com> wrote:
> > >   
> > > > > On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
> > > > > 
> > > > > On Mon, 26 Jun 2023 21:42:44 +0530
> > > > > Ani Sinha <anisinha@redhat.com> wrote:
> > > > >     
> > > > >> PCI Express ports only have one slot, so PCI Express devices can only be
> > > > >> plugged into slot 0 on a PCIE port. Enforce it.    
> > > > > 
> > > > > btw, previously you mentioned ARI.
> > > > > So if we turn it on, wouldn't this patch actually become regression?    
> > > > 
> > > > If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
> > > > Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.  
> > > 
> > > I'm fine with this patch if you test it with ARI enabled and it won't break
> > > something that has been working before this patch. Just mention what testing
> > > you've done in commit message.  
> > 
> > Oh yes. That's why it was checking !vf originally. It's because the most
> > common use of ARI is SRIOV, so it works a a kind of hack.
> 
> should we check for ARI cap instead of vf hack?
> why we haven't that from the beginning?

Maybe. ARI is a capability, driver has to activate it, so it's not 100%
It does not help that our ARI implementation is broken in several
places.


> > 
> > > >   
> > > > >     
> > > > >> 
> > > > >> CC: jusual@redhat.com
> > > > >> CC: imammedo@redhat.com
> > > > >> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
> > > > >> Signed-off-by: Ani Sinha <anisinha@redhat.com>
> > > > >> Reviewed-by: Julia Suvorova <jusual@redhat.com>
> > > > >> ---
> > > > >> hw/pci/pci.c | 6 ++++++
> > > > >> 1 file changed, 6 insertions(+)
> > > > >> 
> > > > >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > >> index bf38905b7d..426af133b0 100644
> > > > >> --- a/hw/pci/pci.c
> > > > >> +++ b/hw/pci/pci.c
> > > > >> @@ -64,6 +64,7 @@ bool pci_available = true;
> > > > >> static char *pcibus_get_dev_path(DeviceState *dev);
> > > > >> static char *pcibus_get_fw_dev_path(DeviceState *dev);
> > > > >> static void pcibus_reset(BusState *qbus);
> > > > >> +static bool pcie_has_upstream_port(PCIDevice *dev);
> > > > >> 
> > > > >> static Property pci_props[] = {
> > > > >>     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> > > > >> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
> > > > >>                    name);
> > > > >> 
> > > > >>        return NULL;
> > > > >> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
> > > > >> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
> > > > >> +                   " parent device only allows plugging into slot 0.",
> > > > >> +                   PCI_SLOT(devfn), name);
> > > > >> +        return NULL;
> > > > >>     }
> > > > >> 
> > > > >>     pci_dev->devfn = devfn;    
> > > > >     
> > > >   
> >
Ani Sinha June 28, 2023, 3:02 p.m. UTC | #13
> On 27-Jun-2023, at 7:57 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> 
> On Tue, Jun 27, 2023 at 02:38:44PM +0200, Igor Mammedov wrote:
>> On Tue, 27 Jun 2023 08:23:25 -0400
>> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>> 
>>> On Tue, Jun 27, 2023 at 01:58:49PM +0200, Igor Mammedov wrote:
>>>> On Tue, 27 Jun 2023 15:23:04 +0530
>>>> Ani Sinha <anisinha@redhat.com> wrote:
>>>> 
>>>>>> On 27-Jun-2023, at 2:32 PM, Igor Mammedov <imammedo@redhat.com> wrote:
>>>>>> 
>>>>>> On Mon, 26 Jun 2023 21:42:44 +0530
>>>>>> Ani Sinha <anisinha@redhat.com> wrote:
>>>>>> 
>>>>>>> PCI Express ports only have one slot, so PCI Express devices can only be
>>>>>>> plugged into slot 0 on a PCIE port. Enforce it.    
>>>>>> 
>>>>>> btw, previously you mentioned ARI.
>>>>>> So if we turn it on, wouldn't this patch actually become regression?    
>>>>> 
>>>>> If ARI breaks this, it will break other areas in QEMU too, ex anywhere pci_get_function_0() is used.
>>>>> Regardless, I think at least the tests are worth fixing, particularly the mess with hd-geo-test.  
>>>> 
>>>> I'm fine with this patch if you test it with ARI enabled and it won't break
>>>> something that has been working before this patch. Just mention what testing
>>>> you've done in commit message.  
>>> 
>>> Oh yes. That's why it was checking !vf originally. It's because the most
>>> common use of ARI is SRIOV, so it works a a kind of hack.
>> 
>> should we check for ARI cap instead of vf hack?
>> why we haven't that from the beginning?
> 
> Maybe.

Maybe not. I tried doing 

!pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI)

But sadly since the device has not been fully initialised and the config space memory has not been allocated yet, we can’t check for that capability at that point in the code.

Seems checking for vfs is the only best approximation at this point.

> ARI is a capability, driver has to activate it, so it's not 100%
> It does not help that our ARI implementation is broken in several
> places.
> 
> 
>>> 
>>>>> 
>>>>>> 
>>>>>>> 
>>>>>>> CC: jusual@redhat.com
>>>>>>> CC: imammedo@redhat.com
>>>>>>> Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
>>>>>>> Signed-off-by: Ani Sinha <anisinha@redhat.com>
>>>>>>> Reviewed-by: Julia Suvorova <jusual@redhat.com>
>>>>>>> ---
>>>>>>> hw/pci/pci.c | 6 ++++++
>>>>>>> 1 file changed, 6 insertions(+)
>>>>>>> 
>>>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>>>> index bf38905b7d..426af133b0 100644
>>>>>>> --- a/hw/pci/pci.c
>>>>>>> +++ b/hw/pci/pci.c
>>>>>>> @@ -64,6 +64,7 @@ bool pci_available = true;
>>>>>>> static char *pcibus_get_dev_path(DeviceState *dev);
>>>>>>> static char *pcibus_get_fw_dev_path(DeviceState *dev);
>>>>>>> static void pcibus_reset(BusState *qbus);
>>>>>>> +static bool pcie_has_upstream_port(PCIDevice *dev);
>>>>>>> 
>>>>>>> static Property pci_props[] = {
>>>>>>>    DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
>>>>>>> @@ -1189,6 +1190,11 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
>>>>>>>                   name);
>>>>>>> 
>>>>>>>       return NULL;
>>>>>>> +    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
>>>>>>> +        error_setg(errp, "PCI: slot %d is not valid for %s,"
>>>>>>> +                   " parent device only allows plugging into slot 0.",
>>>>>>> +                   PCI_SLOT(devfn), name);
>>>>>>> +        return NULL;
>>>>>>>    }
>>>>>>> 
>>>>>>>    pci_dev->devfn = devfn;    
>>>>>> 
>>>>> 
>>> 
>
diff mbox series

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index bf38905b7d..426af133b0 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -64,6 +64,7 @@  bool pci_available = true;
 static char *pcibus_get_dev_path(DeviceState *dev);
 static char *pcibus_get_fw_dev_path(DeviceState *dev);
 static void pcibus_reset(BusState *qbus);
+static bool pcie_has_upstream_port(PCIDevice *dev);
 
 static Property pci_props[] = {
     DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
@@ -1189,6 +1190,11 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
                    name);
 
        return NULL;
+    } else if (pcie_has_upstream_port(pci_dev) && PCI_SLOT(devfn)) {
+        error_setg(errp, "PCI: slot %d is not valid for %s,"
+                   " parent device only allows plugging into slot 0.",
+                   PCI_SLOT(devfn), name);
+        return NULL;
     }
 
     pci_dev->devfn = devfn;