[v3,4/4] iommu / pci: re-implement XEN_DOMCTL_get_device_group...
diff mbox series

Message ID 20190716101657.23327-5-paul.durrant@citrix.com
State New
Headers show
Series
  • iommu groups + cleanup
Related show

Commit Message

Paul Durrant July 16, 2019, 10:16 a.m. UTC
... using the new iommu_group infrastructure.

Because 'sibling' devices are now members of the same iommu_group,
implement the domctl by looking up the iommu_group of the pdev with the
matching SBDF and then finding all the assigned pdevs that are in the
group.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Jan Beulich <jbeulich@suse.com>

v3:
 - Make 'max_sdevs' parameter in iommu_get_device_group() unsigned.
 - Add missing check of max_sdevs to avoid buffer overflow.

v2:
 - Re-implement in the absence of a per-group devs list.
 - Make use of pci_sbdf_t.
---
 xen/drivers/passthrough/groups.c | 46 ++++++++++++++++++++++++++++++++++++
 xen/drivers/passthrough/pci.c    | 51 ++--------------------------------------
 xen/include/xen/iommu.h          |  3 +++
 3 files changed, 51 insertions(+), 49 deletions(-)

Comments

Roger Pau Monné July 16, 2019, 11:28 a.m. UTC | #1
On Tue, Jul 16, 2019 at 11:16:57AM +0100, Paul Durrant wrote:
> ... using the new iommu_group infrastructure.
> 
> Because 'sibling' devices are now members of the same iommu_group,
> implement the domctl by looking up the iommu_group of the pdev with the
> matching SBDF and then finding all the assigned pdevs that are in the
> group.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Jan Beulich <jbeulich@suse.com>
> 
> v3:
>  - Make 'max_sdevs' parameter in iommu_get_device_group() unsigned.
>  - Add missing check of max_sdevs to avoid buffer overflow.
> 
> v2:
>  - Re-implement in the absence of a per-group devs list.
>  - Make use of pci_sbdf_t.
> ---
>  xen/drivers/passthrough/groups.c | 46 ++++++++++++++++++++++++++++++++++++
>  xen/drivers/passthrough/pci.c    | 51 ++--------------------------------------
>  xen/include/xen/iommu.h          |  3 +++
>  3 files changed, 51 insertions(+), 49 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/groups.c b/xen/drivers/passthrough/groups.c
> index c6d00980b6..4e6e8022c1 100644
> --- a/xen/drivers/passthrough/groups.c
> +++ b/xen/drivers/passthrough/groups.c
> @@ -12,8 +12,12 @@
>   * GNU General Public License for more details.
>   */
>  
> +#include <xen/guest_access.h>
>  #include <xen/iommu.h>
> +#include <xen/pci.h>
>  #include <xen/radix-tree.h>
> +#include <xen/sched.h>
> +#include <xsm/xsm.h>
>  
>  struct iommu_group {
>      unsigned int id;
> @@ -81,6 +85,48 @@ int iommu_group_assign(struct pci_dev *pdev, void *arg)
>      return 0;
>  }
>  
> +int iommu_get_device_group(struct domain *d, pci_sbdf_t sbdf,
> +                           XEN_GUEST_HANDLE_64(uint32) buf,
> +                           unsigned int max_sdevs)
> +{
> +    struct iommu_group *grp = NULL;
> +    struct pci_dev *pdev;
> +    unsigned int i = 0;
> +
> +    pcidevs_lock();
> +
> +    for_each_pdev ( d, pdev )
> +    {
> +        if ( pdev->sbdf.sbdf == sbdf.sbdf )
> +        {
> +            grp = pdev->grp;
> +            break;
> +        }
> +    }
> +
> +    if ( !grp )
> +        goto out;
> +
> +    for_each_pdev ( d, pdev )
> +    {
> +        if ( xsm_get_device_group(XSM_HOOK, pdev->sbdf.sbdf) ||
> +             pdev->grp != grp )
> +            continue;
> +
> +        if ( i < max_sdevs &&

AFAICT you are adding the check here in order to keep current
behaviour?

But isn't it wrong to not report to the caller that the buffer was
smaller than required, and that the returned result is partial?

I don't see any way a caller can differentiate between a result that
uses the full buffer and one that's actually partial due to smaller
than required buffer provided. I think this function should return
-ENOSPC for such case.

Thanks, Roger.
Paul Durrant July 16, 2019, 12:20 p.m. UTC | #2
> -----Original Message-----
> From: Roger Pau Monne <roger.pau@citrix.com>
> Sent: 16 July 2019 12:28
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: xen-devel@lists.xenproject.org; Jan Beulich <jbeulich@suse.com>
> Subject: Re: [Xen-devel] [PATCH v3 4/4] iommu / pci: re-implement XEN_DOMCTL_get_device_group...
> 
> On Tue, Jul 16, 2019 at 11:16:57AM +0100, Paul Durrant wrote:
> > ... using the new iommu_group infrastructure.
> >
> > Because 'sibling' devices are now members of the same iommu_group,
> > implement the domctl by looking up the iommu_group of the pdev with the
> > matching SBDF and then finding all the assigned pdevs that are in the
> > group.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Jan Beulich <jbeulich@suse.com>
> >
> > v3:
> >  - Make 'max_sdevs' parameter in iommu_get_device_group() unsigned.
> >  - Add missing check of max_sdevs to avoid buffer overflow.
> >
> > v2:
> >  - Re-implement in the absence of a per-group devs list.
> >  - Make use of pci_sbdf_t.
> > ---
> >  xen/drivers/passthrough/groups.c | 46 ++++++++++++++++++++++++++++++++++++
> >  xen/drivers/passthrough/pci.c    | 51 ++--------------------------------------
> >  xen/include/xen/iommu.h          |  3 +++
> >  3 files changed, 51 insertions(+), 49 deletions(-)
> >
> > diff --git a/xen/drivers/passthrough/groups.c b/xen/drivers/passthrough/groups.c
> > index c6d00980b6..4e6e8022c1 100644
> > --- a/xen/drivers/passthrough/groups.c
> > +++ b/xen/drivers/passthrough/groups.c
> > @@ -12,8 +12,12 @@
> >   * GNU General Public License for more details.
> >   */
> >
> > +#include <xen/guest_access.h>
> >  #include <xen/iommu.h>
> > +#include <xen/pci.h>
> >  #include <xen/radix-tree.h>
> > +#include <xen/sched.h>
> > +#include <xsm/xsm.h>
> >
> >  struct iommu_group {
> >      unsigned int id;
> > @@ -81,6 +85,48 @@ int iommu_group_assign(struct pci_dev *pdev, void *arg)
> >      return 0;
> >  }
> >
> > +int iommu_get_device_group(struct domain *d, pci_sbdf_t sbdf,
> > +                           XEN_GUEST_HANDLE_64(uint32) buf,
> > +                           unsigned int max_sdevs)
> > +{
> > +    struct iommu_group *grp = NULL;
> > +    struct pci_dev *pdev;
> > +    unsigned int i = 0;
> > +
> > +    pcidevs_lock();
> > +
> > +    for_each_pdev ( d, pdev )
> > +    {
> > +        if ( pdev->sbdf.sbdf == sbdf.sbdf )
> > +        {
> > +            grp = pdev->grp;
> > +            break;
> > +        }
> > +    }
> > +
> > +    if ( !grp )
> > +        goto out;
> > +
> > +    for_each_pdev ( d, pdev )
> > +    {
> > +        if ( xsm_get_device_group(XSM_HOOK, pdev->sbdf.sbdf) ||
> > +             pdev->grp != grp )
> > +            continue;
> > +
> > +        if ( i < max_sdevs &&
> 
> AFAICT you are adding the check here in order to keep current
> behaviour?

Yes.

> But isn't it wrong to not report to the caller that the buffer was
> smaller than required, and that the returned result is partial?

Given that there is zero documentation I think your guess is as good as mine as to what intention of the implementor was.

> 
> I don't see any way a caller can differentiate between a result that
> uses the full buffer and one that's actually partial due to smaller
> than required buffer provided. I think this function should return
> -ENOSPC for such case.

I'd prefer to stick to the principle of no change in behaviour. TBH I have not found any caller of xc_get_device_group() apart from a python binding and who knows what piece of antiquated code might sit on the other side of that. FWIW that code sets max_sdevs to 1024 so it's unlikely to run out of space so an ENOSPC might be ok. Still, I'd like to hear maintainer opinions on this.

  Paul

> 
> Thanks, Roger.
Jan Beulich July 24, 2019, 3:20 p.m. UTC | #3
On 16.07.2019 14:20, Paul Durrant wrote:
>> From: Roger Pau Monne <roger.pau@citrix.com>
>> Sent: 16 July 2019 12:28
>>
>> On Tue, Jul 16, 2019 at 11:16:57AM +0100, Paul Durrant wrote:
>>> @@ -81,6 +85,48 @@ int iommu_group_assign(struct pci_dev *pdev, void *arg)
>>>       return 0;
>>>   }
>>>
>>> +int iommu_get_device_group(struct domain *d, pci_sbdf_t sbdf,
>>> +                           XEN_GUEST_HANDLE_64(uint32) buf,
>>> +                           unsigned int max_sdevs)
>>> +{
>>> +    struct iommu_group *grp = NULL;
>>> +    struct pci_dev *pdev;
>>> +    unsigned int i = 0;
>>> +
>>> +    pcidevs_lock();
>>> +
>>> +    for_each_pdev ( d, pdev )
>>> +    {
>>> +        if ( pdev->sbdf.sbdf == sbdf.sbdf )
>>> +        {
>>> +            grp = pdev->grp;
>>> +            break;
>>> +        }
>>> +    }
>>> +
>>> +    if ( !grp )
>>> +        goto out;
>>> +
>>> +    for_each_pdev ( d, pdev )
>>> +    {
>>> +        if ( xsm_get_device_group(XSM_HOOK, pdev->sbdf.sbdf) ||
>>> +             pdev->grp != grp )
>>> +            continue;
>>> +
>>> +        if ( i < max_sdevs &&
>>
>> AFAICT you are adding the check here in order to keep current
>> behaviour?
> 
> Yes.
> 
>> But isn't it wrong to not report to the caller that the buffer was
>> smaller than required, and that the returned result is partial?
> 
> Given that there is zero documentation I think your guess is as good
> as mine as to what intention of the implementor was.
> 
>>
>> I don't see any way a caller can differentiate between a result that
>> uses the full buffer and one that's actually partial due to smaller
>> than required buffer provided. I think this function should return
>> -ENOSPC for such case.
> 
> I'd prefer to stick to the principle of no change in behaviour. TBH I
> have not found any caller of xc_get_device_group() apart from a python
> binding and who knows what piece of antiquated code might sit on the
> other side of that. FWIW that code sets max_sdevs to 1024 so it's
> unlikely to run out of space so an ENOSPC might be ok. Still, I'd like
> to hear maintainer opinions on this.

How about we try to find a sufficiently backwards compatible solution
which still allows recognizing insufficient buffer space? First of all
the common null-handle approach could be used to get a total count.
There's not much risk of this value getting stale between two
successive domctl-s. And then returning -ENOBUFS upon exceeding the
provided buffer shouldn't really break pre-existing code: It surely
would misbehave anyway if the group was larger than what they think
it is.

Another option would be to isolate all of this compatibility stuff
into the libxc wrapper, making the hypercall itself return the actual
count irrespective of the passed in buffer size (i.e. a generalization
of the null-handle model).

Jan
Jan Beulich July 24, 2019, 3:27 p.m. UTC | #4
On 16.07.2019 12:16, Paul Durrant wrote:
> +int iommu_get_device_group(struct domain *d, pci_sbdf_t sbdf,
> +                           XEN_GUEST_HANDLE_64(uint32) buf,
> +                           unsigned int max_sdevs)
> +{
> +    struct iommu_group *grp = NULL;
> +    struct pci_dev *pdev;
> +    unsigned int i = 0;
> +
> +    pcidevs_lock();
> +
> +    for_each_pdev ( d, pdev )
> +    {
> +        if ( pdev->sbdf.sbdf == sbdf.sbdf )
> +        {
> +            grp = pdev->grp;
> +            break;
> +        }
> +    }
> +
> +    if ( !grp )
> +        goto out;
> +
> +    for_each_pdev ( d, pdev )
> +    {
> +        if ( xsm_get_device_group(XSM_HOOK, pdev->sbdf.sbdf) ||
> +             pdev->grp != grp )
> +            continue;
> +
> +        if ( i < max_sdevs &&
> +             unlikely(copy_to_guest_offset(buf, i++, &pdev->sbdf.sbdf, 1)) )

If you want to avoid breaking existing callers, you'll have to mimic
here ...

> -static int iommu_get_device_group(
> -    struct domain *d, u16 seg, u8 bus, u8 devfn,
> -    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
> -{
> -    const struct domain_iommu *hd = dom_iommu(d);
> -    struct pci_dev *pdev;
> -    int group_id, sdev_id;
> -    u32 bdf;
> -    int i = 0;
> -    const struct iommu_ops *ops = hd->platform_ops;
> -
> -    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
> -        return 0;
> -
> -    group_id = ops->get_device_group_id(seg, bus, devfn);
> -
> -    pcidevs_lock();
> -    for_each_pdev( d, pdev )
> -    {
> -        if ( (pdev->seg != seg) ||
> -             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
> -            continue;
> -
> -        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
> -            continue;
> -
> -        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
> -        if ( (sdev_id == group_id) && (i < max_sdevs) )
> -        {
> -            bdf = 0;
> -            bdf |= (pdev->bus & 0xff) << 16;
> -            bdf |= (pdev->devfn & 0xff) << 8;
> -
> -            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )

... this rather odd organization of BDF. Omitting the segment is, I
think, fine, as I don't expect groups to extend past segment
boundaries (and iirc neither Intel's nor AMD's implementation have
any means for this to happen).

Jan

Patch
diff mbox series

diff --git a/xen/drivers/passthrough/groups.c b/xen/drivers/passthrough/groups.c
index c6d00980b6..4e6e8022c1 100644
--- a/xen/drivers/passthrough/groups.c
+++ b/xen/drivers/passthrough/groups.c
@@ -12,8 +12,12 @@ 
  * GNU General Public License for more details.
  */
 
+#include <xen/guest_access.h>
 #include <xen/iommu.h>
+#include <xen/pci.h>
 #include <xen/radix-tree.h>
+#include <xen/sched.h>
+#include <xsm/xsm.h>
 
 struct iommu_group {
     unsigned int id;
@@ -81,6 +85,48 @@  int iommu_group_assign(struct pci_dev *pdev, void *arg)
     return 0;
 }
 
+int iommu_get_device_group(struct domain *d, pci_sbdf_t sbdf,
+                           XEN_GUEST_HANDLE_64(uint32) buf,
+                           unsigned int max_sdevs)
+{
+    struct iommu_group *grp = NULL;
+    struct pci_dev *pdev;
+    unsigned int i = 0;
+
+    pcidevs_lock();
+
+    for_each_pdev ( d, pdev )
+    {
+        if ( pdev->sbdf.sbdf == sbdf.sbdf )
+        {
+            grp = pdev->grp;
+            break;
+        }
+    }
+
+    if ( !grp )
+        goto out;
+
+    for_each_pdev ( d, pdev )
+    {
+        if ( xsm_get_device_group(XSM_HOOK, pdev->sbdf.sbdf) ||
+             pdev->grp != grp )
+            continue;
+
+        if ( i < max_sdevs &&
+             unlikely(copy_to_guest_offset(buf, i++, &pdev->sbdf.sbdf, 1)) )
+        {
+            pcidevs_unlock();
+            return -EFAULT;
+        }
+    }
+
+ out:
+    pcidevs_unlock();
+
+    return i;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 4bb9996049..7171b50557 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1564,53 +1564,6 @@  int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
     return ret;
 }
 
-static int iommu_get_device_group(
-    struct domain *d, u16 seg, u8 bus, u8 devfn,
-    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
-{
-    const struct domain_iommu *hd = dom_iommu(d);
-    struct pci_dev *pdev;
-    int group_id, sdev_id;
-    u32 bdf;
-    int i = 0;
-    const struct iommu_ops *ops = hd->platform_ops;
-
-    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
-        return 0;
-
-    group_id = ops->get_device_group_id(seg, bus, devfn);
-
-    pcidevs_lock();
-    for_each_pdev( d, pdev )
-    {
-        if ( (pdev->seg != seg) ||
-             ((pdev->bus == bus) && (pdev->devfn == devfn)) )
-            continue;
-
-        if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
-            continue;
-
-        sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
-        if ( (sdev_id == group_id) && (i < max_sdevs) )
-        {
-            bdf = 0;
-            bdf |= (pdev->bus & 0xff) << 16;
-            bdf |= (pdev->devfn & 0xff) << 8;
-
-            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
-            {
-                pcidevs_unlock();
-                return -1;
-            }
-            i++;
-        }
-    }
-
-    pcidevs_unlock();
-
-    return i;
-}
-
 void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
 {
     pcidevs_lock();
@@ -1667,11 +1620,11 @@  int iommu_do_pci_domctl(
         max_sdevs = domctl->u.get_device_group.max_sdevs;
         sdevs = domctl->u.get_device_group.sdev_array;
 
-        ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
+        ret = iommu_get_device_group(d, PCI_SBDF3(seg, bus, devfn), sdevs,
+                                     max_sdevs);
         if ( ret < 0 )
         {
             dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
-            ret = -EFAULT;
             domctl->u.get_device_group.num_sdevs = 0;
         }
         else
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index c93f580fdc..6301833219 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -321,6 +321,9 @@  extern struct page_list_head iommu_pt_cleanup_list;
 
 void iommu_groups_init(void);
 int iommu_group_assign(struct pci_dev *pdev, void *arg);
+int iommu_get_device_group(struct domain *d, pci_sbdf_t sbdf,
+                           XEN_GUEST_HANDLE_64(uint32) buf,
+                           unsigned int max_sdevs);
 
 #endif /* CONFIG_HAS_PCI */