diff mbox

[v9,3/3] VT-d: Fix vt-d Device-TLB flush timeout issue

Message ID 1459522059-102365-4-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu April 1, 2016, 2:47 p.m. UTC
If Device-TLB flush timed out, we would hide the target ATS
device and crash the domain owning this ATS device. If impacted
domain is hardware domain, just throw out a warning (done in
queue_invalidate_wait).

By hiding the device, we make sure it can't be assigned to
any domain any longer (see device_assigned).

Signed-off-by: Quan Xu <quan.xu@intel.com>
---
 xen/drivers/passthrough/pci.c         |  6 ++--
 xen/drivers/passthrough/vtd/extern.h  |  3 +-
 xen/drivers/passthrough/vtd/qinval.c  | 58 +++++++++++++++++++++++++++++++++--
 xen/drivers/passthrough/vtd/x86/ats.c | 11 ++++---
 xen/include/xen/pci.h                 |  1 +
 5 files changed, 68 insertions(+), 11 deletions(-)

Comments

Jan Beulich April 5, 2016, 9:47 a.m. UTC | #1
>>> On 01.04.16 at 16:47, <quan.xu@intel.com> wrote:
> If Device-TLB flush timed out, we would hide the target ATS

Please re-consider the use of the word "would" in all your patch
descriptions.

> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -217,6 +217,58 @@ static int invalidate_sync(struct iommu *iommu)
>      return 0;
>  }
>  
> +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
> +                                         u16 seg, u8 bus, u8 devfn)
> +{
> +    struct domain *d = NULL;
> +    struct pci_dev *pdev;
> +
> +    if ( test_bit(did, iommu->domid_bitmap) )
> +        d = rcu_lock_domain_by_id(iommu->domid_map[did]);
> +
> +    if ( d == NULL )
> +        return;

This should be accompanied by a comment explaining why
returning here without indicating any error is okay.

> +    pcidevs_lock();
> +
> +    for_each_pdev(d, pdev)
> +    {
> +        if ( ( pdev->seg == seg ) &&
> +             ( pdev->bus == bus ) &&
> +             ( pdev->devfn == devfn ) )

Stray blanks.

> +        {
> +            ASSERT(pdev->domain);
> +            list_del(&pdev->domain_list);
> +            pdev->domain = NULL;
> +            pci_hide_existing_device(pdev);
> +            break;
> +        }
> +    }
> +
> +    pcidevs_unlock();
> +
> +    if ( !is_hardware_domain(d) )
> +        domain_crash(d);

else printk();

> @@ -251,11 +303,13 @@ int qinval_device_iotlb(struct iommu *iommu,
>  }
>  
>  int qinval_device_iotlb_sync(struct iommu *iommu,
> -    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
> +    u32 max_invs_pend, u16 did, u16 seg, u8 bus, u8 devfn, u16 size, u64 addr)
>  {
> +    u16 sid = PCI_BDF2(bus, devfn);

Pointless local variable (but being a matter of taste, the VT-d
maintainers will have the final say).

> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -116,6 +116,7 @@ const unsigned long *pci_get_ro_map(u16 seg);
>  int pci_add_device(u16 seg, u8 bus, u8 devfn,
>                     const struct pci_dev_info *, nodeid_t node);
>  int pci_remove_device(u16 seg, u8 bus, u8 devfn);
> +void pci_hide_existing_device(struct pci_dev *pdev);
>  int pci_ro_device(int seg, int bus, int devfn);
>  int pci_hide_device(int bus, int devfn);

Please move the addition here.

Jan
Quan Xu April 10, 2016, 2:28 p.m. UTC | #2
On April 05, 2016 5:48pm, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 01.04.16 at 16:47, <quan.xu@intel.com> wrote:
> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
> > +                                         u16 seg, u8 bus, u8 devfn)
> {
> > +    struct domain *d = NULL;
> > +    struct pci_dev *pdev;
> > +
> > +    if ( test_bit(did, iommu->domid_bitmap) )
> > +        d = rcu_lock_domain_by_id(iommu->domid_map[did]);
> > +
> > +    if ( d == NULL )
> > +        return;
> 
> This should be accompanied by a comment explaining why returning here
> without indicating any error is okay.
> 

I'd say:
"The domain has been freed, and the device no longer belongs to this domain."



> > +        {
> > +            ASSERT(pdev->domain);
> > +            list_del(&pdev->domain_list);
> > +            pdev->domain = NULL;
> > +            pci_hide_existing_device(pdev);
> > +            break;
> > +        }
> > +    }
> > +
> > +    pcidevs_unlock();
> > +
> > +    if ( !is_hardware_domain(d) )
> > +        domain_crash(d);
> 
> else printk();
> 

As I mentioned in commit log "If impacted domain is hardware domain, just throw out a warning (done in queue_invalidate_wait).",
I am not sure whether we really need a printk() at this point or not.

> > @@ -251,11 +303,13 @@ int qinval_device_iotlb(struct iommu *iommu,  }
> >
> >  int qinval_device_iotlb_sync(struct iommu *iommu,
> > -    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
> > +    u32 max_invs_pend, u16 did, u16 seg, u8 bus, u8 devfn, u16 size,
> > + u64 addr)
> >  {
> > +    u16 sid = PCI_BDF2(bus, devfn);
> 
> Pointless local variable (but being a matter of taste, the VT-d maintainers will
> have the final say).
> 

I will drop this local variable.

> > --- a/xen/include/xen/pci.h
> > +++ b/xen/include/xen/pci.h
> > @@ -116,6 +116,7 @@ const unsigned long *pci_get_ro_map(u16 seg);  int
> > pci_add_device(u16 seg, u8 bus, u8 devfn,
> >                     const struct pci_dev_info *, nodeid_t node);  int
> > pci_remove_device(u16 seg, u8 bus, u8 devfn);
> > +void pci_hide_existing_device(struct pci_dev *pdev);
> >  int pci_ro_device(int seg, int bus, int devfn);  int
> > pci_hide_device(int bus, int devfn);
> 
> Please move the addition here.
> 


Agreed.


Quan
Jan Beulich April 11, 2016, 4:17 p.m. UTC | #3
>>> On 10.04.16 at 16:28, <quan.xu@intel.com> wrote:
> On April 05, 2016 5:48pm, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 01.04.16 at 16:47, <quan.xu@intel.com> wrote:
>> > +        {
>> > +            ASSERT(pdev->domain);
>> > +            list_del(&pdev->domain_list);
>> > +            pdev->domain = NULL;
>> > +            pci_hide_existing_device(pdev);
>> > +            break;
>> > +        }
>> > +    }
>> > +
>> > +    pcidevs_unlock();
>> > +
>> > +    if ( !is_hardware_domain(d) )
>> > +        domain_crash(d);
>> 
>> else printk();
>> 
> 
> As I mentioned in commit log "If impacted domain is hardware domain, just 
> throw out a warning (done in queue_invalidate_wait).",
> I am not sure whether we really need a printk() at this point or not.

I think the printk() belongs here; I do agree there's no need for two
printk()s on the same call tree.

Jan
diff mbox

Patch

diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 9f1716a..9a214c6 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -420,7 +420,7 @@  static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
     xfree(pdev);
 }
 
-static void _pci_hide_device(struct pci_dev *pdev)
+void pci_hide_existing_device(struct pci_dev *pdev)
 {
     if ( pdev->domain )
         return;
@@ -437,7 +437,7 @@  int __init pci_hide_device(int bus, int devfn)
     pdev = alloc_pdev(get_pseg(0), bus, devfn);
     if ( pdev )
     {
-        _pci_hide_device(pdev);
+        pci_hide_existing_device(pdev);
         rc = 0;
     }
     pcidevs_unlock();
@@ -467,7 +467,7 @@  int __init pci_ro_device(int seg, int bus, int devfn)
     }
 
     __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
-    _pci_hide_device(pdev);
+    pci_hide_existing_device(pdev);
 
     return 0;
 }
diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index 6d3187d..94e2c11 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -62,7 +62,8 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 int qinval_device_iotlb(struct iommu *iommu,
                         u32 max_invs_pend, u16 sid, u16 size, u64 addr);
 int qinval_device_iotlb_sync(struct iommu *iommu, u32 max_invs_pend,
-                             u16 sid, u16 size, u64 addr);
+                             u16 did, u16 seg, u8 bus, u8 devfn,
+                             u16 size, u64 addr);
 
 unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c
index d12661b..2c54dc0 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -217,6 +217,58 @@  static int invalidate_sync(struct iommu *iommu)
     return 0;
 }
 
+static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
+                                         u16 seg, u8 bus, u8 devfn)
+{
+    struct domain *d = NULL;
+    struct pci_dev *pdev;
+
+    if ( test_bit(did, iommu->domid_bitmap) )
+        d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+
+    if ( d == NULL )
+        return;
+
+    pcidevs_lock();
+
+    for_each_pdev(d, pdev)
+    {
+        if ( ( pdev->seg == seg ) &&
+             ( pdev->bus == bus ) &&
+             ( pdev->devfn == devfn ) )
+        {
+            ASSERT(pdev->domain);
+            list_del(&pdev->domain_list);
+            pdev->domain = NULL;
+            pci_hide_existing_device(pdev);
+            break;
+        }
+    }
+
+    pcidevs_unlock();
+
+    if ( !is_hardware_domain(d) )
+        domain_crash(d);
+
+    rcu_unlock_domain(d);
+}
+
+int dev_invalidate_sync(struct iommu *iommu, u16 did,
+                        u16 seg, u8 bus, u8 devfn)
+{
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    int rc = 0;
+
+    if ( qi_ctrl->qinval_maddr )
+    {
+        rc = queue_invalidate_wait(iommu, 0, 1, 1);
+        if ( rc == -ETIMEDOUT )
+            dev_invalidate_iotlb_timeout(iommu, did, seg, bus, devfn);
+    }
+
+    return rc;
+}
+
 int qinval_device_iotlb(struct iommu *iommu,
     u32 max_invs_pend, u16 sid, u16 size, u64 addr)
 {
@@ -251,11 +303,13 @@  int qinval_device_iotlb(struct iommu *iommu,
 }
 
 int qinval_device_iotlb_sync(struct iommu *iommu,
-    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+    u32 max_invs_pend, u16 did, u16 seg, u8 bus, u8 devfn, u16 size, u64 addr)
 {
+    u16 sid = PCI_BDF2(bus, devfn);
+
     qinval_device_iotlb(iommu, max_invs_pend, sid, size, addr);
 
-    return invalidate_sync(iommu);
+    return dev_invalidate_sync(iommu, did, seg, bus, devfn);
 }
 
 static void queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c
index 7b1c07b..5d1ebea 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -116,7 +116,6 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 
     list_for_each_entry( pdev, &ats_devices, list )
     {
-        u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
         bool_t sbit;
 
         /* Only invalidate devices that belong to this IOMMU */
@@ -133,8 +132,9 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
             /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
             sbit = 1;
             addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
-            ret = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth,
-                                           sid, sbit, addr);
+            ret = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth, did,
+                                           pdev->seg, pdev->bus, pdev->devfn,
+                                           sbit, addr);
             break;
         case DMA_TLB_PSI_FLUSH:
             if ( !device_in_domain(iommu, pdev, did) )
@@ -153,8 +153,9 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
                 addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
             }
 
-            ret = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth,
-                                           sid, sbit, addr);
+            ret = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth, did,
+                                           pdev->seg, pdev->bus, pdev->devfn,
+                                           sbit, addr);
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 6ed29dd..bb9f791 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -116,6 +116,7 @@  const unsigned long *pci_get_ro_map(u16 seg);
 int pci_add_device(u16 seg, u8 bus, u8 devfn,
                    const struct pci_dev_info *, nodeid_t node);
 int pci_remove_device(u16 seg, u8 bus, u8 devfn);
+void pci_hide_existing_device(struct pci_dev *pdev);
 int pci_ro_device(int seg, int bus, int devfn);
 int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);