diff mbox

[v8,2/3] VT-d: Wrap a _sync version for all VT-d flush interfaces

Message ID 1458799079-79825-2-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu March 24, 2016, 5:57 a.m. UTC
For consistency, we wrap a _sync version for all VT-d flush interfaces.
It simplifies caller logic and makes code more readable as well.

Signed-off-by: Quan Xu <quan.xu@intel.com>
---
 xen/drivers/passthrough/vtd/extern.h  |   2 +
 xen/drivers/passthrough/vtd/qinval.c  | 173 ++++++++++++++++++++--------------
 xen/drivers/passthrough/vtd/x86/ats.c |  12 +--
 3 files changed, 106 insertions(+), 81 deletions(-)

Comments

Dario Faggioli March 24, 2016, 1:56 p.m. UTC | #1
On Thu, 2016-03-24 at 13:57 +0800, Quan Xu wrote:
> For consistency, we wrap a _sync version for all VT-d flush
> interfaces.
>
I'm sorry, maybe it's me, but "for consistency" with what?

I see from where this comes, if I look at v7. But when this patch will
be committed, what it is doing and why we decided to do it should be
evident by just reading the changelog, without having to google for the
review history.

So, please, try to describe the situation a little bit better (e.g., do
we have inconsistencies, right now? Is this needed for avoiding
introducing inconsistencies by means of this series? Etc.).

> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -72,6 +72,70 @@ static void qinval_update_qtail(struct iommu
> *iommu, unsigned int index)
>      dmar_writeq(iommu->reg, DMAR_IQT_REG, (val <<
> QINVAL_INDEX_SHIFT));
>  }
>  
> +static int __must_check queue_invalidate_wait(struct iommu *iommu,
> +    u8 iflag, u8 sw, u8 fn)
> +{
>
It looks like you are "just" moving this function, without making any
change to the code, is this the case?

Assuming it is, mixing pure code movement and functional changes in the
same patch makes reviewing the patch itself harder. And pure code motio
is also bad for archaeologists (`git blame' would point at this commit
for all of this function!).

So, I'd say either isolate the code movement in a pre-patch, or try
using forward declarations. Given how moving messes up history, my
personal preference would be for the latter.

> +static int invalidate_sync(struct iommu *iommu)
> +{
> +    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
> +
> +    if ( qi_ctrl->qinval_maddr )
> +        return queue_invalidate_wait(iommu, 0, 1, 1);
> +
> +    return 0;
> +}

Same for this, even worse, in terms of how hard this makes to review
this patch, as can be seen...

> @@ -135,65 +208,12 @@ static void queue_invalidate_iotlb(struct iommu
> *iommu,
>
> -static int invalidate_sync(struct iommu *iommu)
> +static int queue_invalidate_iotlb_sync(struct iommu *iommu,
> +    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
>  {
> -    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
> +    queue_invalidate_iotlb(iommu, granu, dr, dw, did, am, ih, addr);
>  
> -    if ( qi_ctrl->qinval_maddr )
> -        return queue_invalidate_wait(iommu, 0, 1, 1);
> -    return 0;
> +    return invalidate_sync(iommu);
>  }
>  
...here!

The rest of this patch looks fine to me, with only one more doubt.
Here:

> --- a/xen/drivers/passthrough/vtd/x86/ats.c
> +++ b/xen/drivers/passthrough/vtd/x86/ats.c
> @@ -118,7 +118,6 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16
> did,
>      {
>          u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
>          bool_t sbit;
> -        int rc = 0;
>  
>          /* Only invalidate devices that belong to this IOMMU */
>          if ( pdev->iommu != iommu )
> @@ -134,8 +133,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16
> did,
>              /* invalidate all translations:
> sbit=1,bit_63=0,bit[62:12]=1 */
>              sbit = 1;
>              addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
> -            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
> -                                     sid, sbit, addr);
> +            ret = qinval_device_iotlb_sync(iommu, pdev-
> >ats_queue_depth,
> +                                           sid, sbit, addr);
>              break;
>          case DMA_TLB_PSI_FLUSH:
>              if ( !device_in_domain(iommu, pdev, did) )
> @@ -154,16 +153,13 @@ int dev_invalidate_iotlb(struct iommu *iommu,
> u16 did,
>                  addr |= (((u64)1 << (size_order - 1)) - 1) <<
> PAGE_SHIFT_4K;
>              }
>  
> -            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
> -                                     sid, sbit, addr);
> +            ret = qinval_device_iotlb_sync(iommu, pdev-
> >ats_queue_depth,
> +                                           sid, sbit, addr);
>              break;
>          default:
>              dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush
> type\n");
>              return -EOPNOTSUPP;
>          }
> -
> -        if ( !ret )
> -            ret = rc;
>      }
>  
>      return ret;
>
Am I misreading something or we are introducing synchronous handling,
which was not there before?

If yes, is it ok to do this in this patch?

And if yes again, I think that it at least should be noted in the
changelog, as it would mean that the patch is not only introducing some
wrappers.

Regads,
Dario
Dario Faggioli March 24, 2016, 3:06 p.m. UTC | #2
On Thu, 2016-03-24 at 14:56 +0100, Dario Faggioli wrote:
> On Thu, 2016-03-24 at 13:57 +0800, Quan Xu wrote:
> > 
> > @@ -134,8 +133,8 @@ int dev_invalidate_iotlb(struct iommu *iommu,
> > u16
> > did,
> >              /* invalidate all translations:
> > sbit=1,bit_63=0,bit[62:12]=1 */
> >              sbit = 1;
> >              addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
> > -            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
> > -                                     sid, sbit, addr);
> > +            ret = qinval_device_iotlb_sync(iommu, pdev-
> > > 
> > > ats_queue_depth,
> > +                                           sid, sbit, addr);
> >              break;
> >          case DMA_TLB_PSI_FLUSH:
> >              if ( !device_in_domain(iommu, pdev, did) )
> > @@ -154,16 +153,13 @@ int dev_invalidate_iotlb(struct iommu *iommu,
> > u16 did,
> >                  addr |= (((u64)1 << (size_order - 1)) - 1) <<
> > PAGE_SHIFT_4K;
> >              }
> >  
> > -            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
> > -                                     sid, sbit, addr);
> > +            ret = qinval_device_iotlb_sync(iommu, pdev-
> > > 
> > > ats_queue_depth,
> > +                                           sid, sbit, addr);
> >              break;
> >          default:
> >              dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush
> > type\n");
> >              return -EOPNOTSUPP;
> >          }
> > -
> > -        if ( !ret )
> > -            ret = rc;
> >      }
> >  
> >      return ret;
> > 
> Am I misreading something or we are introducing synchronous handling,
> which was not there before?
> 
> If yes, is it ok to do this in this patch?
> 
> And if yes again, I think that it at least should be noted in the
> changelog, as it would mean that the patch is not only introducing
> some
> wrappers.
> 
Ok, I think I see what's happening here. Before this patch,
invalidate_sync() was being called inside qinval_device_iotlb(), so we
were synchronous already, and we need to continue to be like that, by
calling the _sync() variants.

Yes, if this is what happens, this also looks ok to me.

Sorry for the noise.

Regards,
Dario

> Regads,
> Dario
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
Quan Xu March 25, 2016, 3:11 a.m. UTC | #3
On March 24, 2016 11:06pm, <dario.faggioli@citrix.com> wrote:
> On Thu, 2016-03-24 at 14:56 +0100, Dario Faggioli wrote:

> > On Thu, 2016-03-24 at 13:57 +0800, Quan Xu wrote:

> > >

> > > @@ -134,8 +133,8 @@ int dev_invalidate_iotlb(struct iommu *iommu,

> > > u16

> > > did,

> > >              /* invalidate all translations:

> > > sbit=1,bit_63=0,bit[62:12]=1 */

> > >              sbit = 1;

> > >              addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;

> > > -            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,

> > > -                                     sid, sbit, addr);

> > > +            ret = qinval_device_iotlb_sync(iommu, pdev-

> > > >

> > > > ats_queue_depth,

> > > +                                           sid, sbit,

> addr);

> > >              break;

> > >          case DMA_TLB_PSI_FLUSH:

> > >              if ( !device_in_domain(iommu, pdev, did) ) @@ -154,16

> > > +153,13 @@ int dev_invalidate_iotlb(struct iommu *iommu,

> > > u16 did,

> > >                  addr |= (((u64)1 << (size_order - 1)) - 1) <<

> > > PAGE_SHIFT_4K;

> > >              }

> > >

> > > -            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,

> > > -                                     sid, sbit, addr);

> > > +            ret = qinval_device_iotlb_sync(iommu, pdev-

> > > >

> > > > ats_queue_depth,

> > > +                                           sid, sbit,

> addr);

> > >              break;

> > >          default:

> > >              dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d

> flush

> > > type\n");

> > >              return -EOPNOTSUPP;

> > >          }

> > > -

> > > -        if ( !ret )

> > > -            ret = rc;

> > >      }

> > >

> > >      return ret;

> > >

> > Am I misreading something or we are introducing synchronous handling,

> > which was not there before?

> >

> > If yes, is it ok to do this in this patch?

> >

> > And if yes again, I think that it at least should be noted in the

> > changelog, as it would mean that the patch is not only introducing

> > some wrappers.

> >

> Ok, I think I see what's happening here. Before this patch,

> invalidate_sync() was being called inside qinval_device_iotlb(), so we were

> synchronous already, and we need to continue to be like that, by calling the

> _sync() variants.

> 

yes, it not as well understood, but to me, it is difficult to describe it in changelog.
Let me elaborate briefly on the evolution:
1. In original code, without my patch, it is:
        ...
        if ( flush_dev_iotlb )
            ret = dev_invalidate_iotlb(iommu, did, addr, size_order, type);
        rc = invalidate_sync(iommu);
        ...


In dev_invalidate_iotlb(), it scans ats_devices list the calls qinval_device_iotlb() to flush the Device-TLB via 'Device-TLB Invalidate Descriptor'.
In invalidate_sync(), it synchronize with hardware for the invalidation request descriptors submitted before the wait descriptor via 'Invalidation Wait Descriptor'.

If we assign multiple ATS devices to a domain and invalidate_sync() times out, we couldn't find which one times out. Then,

2. in my previous patch, I put the invalidate_sync() variant (-as we need to pass down the device's SBDF to hide the ATS device) within dev_invalidate_iotlb() to flush 
the ATS device one by one. if it timed out, I could find the bogus device and hide it.

3. as Kevin mentioned, I put invalidation sync within dev_invalidate_iotlb, while for all other IOMMU invalidations the sync is put after. That is the consistency issue.
  That's also why I provide _sync version in v8. (could you help me enhance the description? :) )

> Yes, if this is what happens, this also looks ok to me.

> Sorry for the noise.

> 

Dario, feel free to express yourself. As you know, I'd appreciate your (and the other's) comments.:)

Quan
diff mbox

Patch

diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index d4d37c3..6d3187d 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -61,6 +61,8 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 
 int qinval_device_iotlb(struct iommu *iommu,
                         u32 max_invs_pend, u16 sid, u16 size, u64 addr);
+int qinval_device_iotlb_sync(struct iommu *iommu, u32 max_invs_pend,
+                             u16 sid, u16 size, u64 addr);
 
 unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c
index 52ba2c2..ad9e265 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -72,6 +72,70 @@  static void qinval_update_qtail(struct iommu *iommu, unsigned int index)
     dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << QINVAL_INDEX_SHIFT));
 }
 
+static int __must_check queue_invalidate_wait(struct iommu *iommu,
+    u8 iflag, u8 sw, u8 fn)
+{
+    s_time_t timeout;
+    volatile u32 poll_slot = QINVAL_STAT_INIT;
+    unsigned int index;
+    unsigned long flags;
+    u64 entry_base;
+    struct qinval_entry *qinval_entry, *qinval_entries;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
+                 ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
+    qinval_entries = map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
+
+    qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
+    qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
+    qinval_entry->q.inv_wait_dsc.lo.sw = sw;
+    qinval_entry->q.inv_wait_dsc.lo.fn = fn;
+    qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.lo.sdata = QINVAL_STAT_DONE;
+    qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(&poll_slot) >> 2;
+
+    unmap_vtd_domain_page(qinval_entries);
+    qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+    /* Now we don't support interrupt method */
+    if ( sw )
+    {
+        /* In case all wait descriptor writes to same addr with same data */
+        timeout = NOW() + IOMMU_QI_TIMEOUT;
+        while ( poll_slot != QINVAL_STAT_DONE )
+        {
+            if ( NOW() > timeout )
+            {
+                print_qi_regs(iommu);
+                printk(XENLOG_WARNING VTDPREFIX
+                       "Queue invalidate wait descriptor timed out.\n");
+                return -ETIMEDOUT;
+            }
+
+            cpu_relax();
+        }
+
+        return 0;
+    }
+
+    return -EOPNOTSUPP;
+}
+
+static int invalidate_sync(struct iommu *iommu)
+{
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( qi_ctrl->qinval_maddr )
+        return queue_invalidate_wait(iommu, 0, 1, 1);
+
+    return 0;
+}
+
 static void queue_invalidate_context(struct iommu *iommu,
     u16 did, u16 source_id, u8 function_mask, u8 granu)
 {
@@ -102,6 +166,15 @@  static void queue_invalidate_context(struct iommu *iommu,
     unmap_vtd_domain_page(qinval_entries);
 }
 
+static int queue_invalidate_context_sync(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    queue_invalidate_context(iommu, did, source_id,
+                             function_mask, granu);
+
+    return invalidate_sync(iommu);
+}
+
 static void queue_invalidate_iotlb(struct iommu *iommu,
     u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
 {
@@ -135,65 +208,12 @@  static void queue_invalidate_iotlb(struct iommu *iommu,
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
-static int __must_check queue_invalidate_wait(struct iommu *iommu,
-    u8 iflag, u8 sw, u8 fn)
-{
-    s_time_t timeout;
-    volatile u32 poll_slot = QINVAL_STAT_INIT;
-    unsigned int index;
-    unsigned long flags;
-    u64 entry_base;
-    struct qinval_entry *qinval_entry, *qinval_entries;
-
-    spin_lock_irqsave(&iommu->register_lock, flags);
-    index = qinval_next_index(iommu);
-    entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
-                 ((index >> QINVAL_ENTRY_ORDER) << PAGE_SHIFT);
-    qinval_entries = map_vtd_domain_page(entry_base);
-    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
-
-    qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
-    qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
-    qinval_entry->q.inv_wait_dsc.lo.sw = sw;
-    qinval_entry->q.inv_wait_dsc.lo.fn = fn;
-    qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
-    qinval_entry->q.inv_wait_dsc.lo.sdata = QINVAL_STAT_DONE;
-    qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
-    qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(&poll_slot) >> 2;
-
-    unmap_vtd_domain_page(qinval_entries);
-    qinval_update_qtail(iommu, index);
-    spin_unlock_irqrestore(&iommu->register_lock, flags);
-
-    /* Now we don't support interrupt method */
-    if ( sw )
-    {
-        /* In case all wait descriptor writes to same addr with same data */
-        timeout = NOW() + IOMMU_QI_TIMEOUT;
-        while ( poll_slot != QINVAL_STAT_DONE )
-        {
-            if ( NOW() > timeout )
-            {
-                print_qi_regs(iommu);
-                printk(XENLOG_WARNING VTDPREFIX
-                       "Queue invalidate wait descriptor timed out.\n");
-                return -ETIMEDOUT;
-            }
-            cpu_relax();
-        }
-        return 0;
-    }
-
-    return -EOPNOTSUPP;
-}
-
-static int invalidate_sync(struct iommu *iommu)
+static int queue_invalidate_iotlb_sync(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
 {
-    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    queue_invalidate_iotlb(iommu, granu, dr, dw, did, am, ih, addr);
 
-    if ( qi_ctrl->qinval_maddr )
-        return queue_invalidate_wait(iommu, 0, 1, 1);
-    return 0;
+    return invalidate_sync(iommu);
 }
 
 int qinval_device_iotlb(struct iommu *iommu,
@@ -229,6 +249,14 @@  int qinval_device_iotlb(struct iommu *iommu,
     return 0;
 }
 
+int qinval_device_iotlb_sync(struct iommu *iommu,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    qinval_device_iotlb(iommu, max_invs_pend, sid, size, addr);
+
+    return invalidate_sync(iommu);
+}
+
 static void queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
 {
     unsigned long flags;
@@ -256,7 +284,7 @@  static void queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
-static int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+static int queue_invalidate_iec_sync(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
 {
     int ret;
 
@@ -273,12 +301,12 @@  static int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
 
 int iommu_flush_iec_global(struct iommu *iommu)
 {
-    return __iommu_flush_iec(iommu, IEC_GLOBAL_INVL, 0, 0);
+    return queue_invalidate_iec_sync(iommu, IEC_GLOBAL_INVL, 0, 0);
 }
 
 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx)
 {
-   return __iommu_flush_iec(iommu, IEC_INDEX_INVL, im, iidx);
+   return queue_invalidate_iec_sync(iommu, IEC_INDEX_INVL, im, iidx);
 }
 
 static int flush_context_qi(
@@ -304,11 +332,9 @@  static int flush_context_qi(
     }
 
     if ( qi_ctrl->qinval_maddr != 0 )
-    {
-        queue_invalidate_context(iommu, did, sid, fm,
-                                 type >> DMA_CCMD_INVL_GRANU_OFFSET);
-        ret = invalidate_sync(iommu);
-    }
+        ret = queue_invalidate_context_sync(iommu, did, sid, fm,
+                  type >> DMA_CCMD_INVL_GRANU_OFFSET);
+
     return ret;
 }
 
@@ -338,23 +364,24 @@  static int flush_iotlb_qi(
 
     if ( qi_ctrl->qinval_maddr != 0 )
     {
-        int rc;
-
         /* use queued invalidation */
         if (cap_write_drain(iommu->cap))
             dw = 1;
         if (cap_read_drain(iommu->cap))
             dr = 1;
         /* Need to conside the ih bit later */
-        queue_invalidate_iotlb(iommu,
-                               type >> DMA_TLB_FLUSH_GRANU_OFFSET, dr,
-                               dw, did, size_order, 0, addr);
+        ret = queue_invalidate_iotlb_sync(iommu,
+                  type >> DMA_TLB_FLUSH_GRANU_OFFSET, dr, dw, did,
+                  size_order, 0, addr);
+
+        /* TODO: Timeout error handling to be added later */
+        if ( ret )
+            return ret;
+
         if ( flush_dev_iotlb )
             ret = dev_invalidate_iotlb(iommu, did, addr, size_order, type);
-        rc = invalidate_sync(iommu);
-        if ( !ret )
-            ret = rc;
     }
+
     return ret;
 }
 
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c
index 334b9c1..7b1c07b 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -118,7 +118,6 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
     {
         u16 sid = PCI_BDF2(pdev->bus, pdev->devfn);
         bool_t sbit;
-        int rc = 0;
 
         /* Only invalidate devices that belong to this IOMMU */
         if ( pdev->iommu != iommu )
@@ -134,8 +133,8 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
             /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
             sbit = 1;
             addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
-            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
-                                     sid, sbit, addr);
+            ret = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth,
+                                           sid, sbit, addr);
             break;
         case DMA_TLB_PSI_FLUSH:
             if ( !device_in_domain(iommu, pdev, did) )
@@ -154,16 +153,13 @@  int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
                 addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
             }
 
-            rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
-                                     sid, sbit, addr);
+            ret = qinval_device_iotlb_sync(iommu, pdev->ats_queue_depth,
+                                           sid, sbit, addr);
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
             return -EOPNOTSUPP;
         }
-
-        if ( !ret )
-            ret = rc;
     }
 
     return ret;