diff mbox

[v5,09/10] IOMMU: propagate IOMMU Device-TLB flush error up to IOMMU suspending

Message ID 1463558911-98187-10-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu May 18, 2016, 8:08 a.m. UTC
Propagate the IOMMU Device-TLB flush error up to IOMMU suspending.

Signed-off-by: Quan Xu <quan.xu@intel.com>

CC: Jan Beulich <jbeulich@suse.com>
CC: Liu Jinsong <jinsong.liu@alibaba-inc.com>
CC: Keir Fraser <keir@xen.org>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien.grall@arm.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: Feng Wu <feng.wu@intel.com>
---
 xen/arch/x86/acpi/power.c                     | 72 ++++++++++++++++++++-------
 xen/drivers/passthrough/amd/iommu_init.c      |  9 +++-
 xen/drivers/passthrough/amd/pci_amd_iommu.c   |  2 +-
 xen/drivers/passthrough/iommu.c               |  6 ++-
 xen/drivers/passthrough/vtd/iommu.c           | 45 +++++++++++++----
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  2 +-
 xen/include/xen/iommu.h                       |  4 +-
 7 files changed, 105 insertions(+), 35 deletions(-)

Comments

Jan Beulich May 24, 2016, 8:21 a.m. UTC | #1
>>> On 18.05.16 at 10:08, <quan.xu@intel.com> wrote:
> --- a/xen/arch/x86/acpi/power.c
> +++ b/xen/arch/x86/acpi/power.c
> @@ -43,36 +43,68 @@ struct acpi_sleep_info acpi_sinfo;
>  
>  void do_suspend_lowlevel(void);
>  
> +enum dev_power_saved
> +{
> +    SAVED_NONE,    /* None of device power saved */
> +    SAVED_CONSOLE, /* Device power of console saved */
> +    SAVED_TIME,    /* Device power of time saved */
> +    SAVED_I8259A,  /* Device power of I8259A saved */
> +    SAVED_IOAPIC,  /* Device power of IOAPIC saved */
> +    SAVED_IOMMU,   /* Device power of IOMMU saved */
> +    SAVED_LAPIC,   /* Device power of LAPIC saved */
> +};

Please avoid comments saying nothing substantially different than
the code they accompany, and also not helping the reader to
understand the commented code.

>  static int device_power_down(void)
>  {
> -    console_suspend();
> +    if ( console_suspend() )
> +        return SAVED_CONSOLE;

I said so on the previous round, and I need to repeat it now: If
console_suspend() fails, you saved _nothing_.

> -    time_suspend();
> +    if ( time_suspend() )
> +        return SAVED_TIME;
>  
> -    i8259A_suspend();
> +    if ( i8259A_suspend() )
> +        return SAVED_I8259A;
>  
> +    /* ioapic_suspend cannot fail */
>      ioapic_suspend();
>  
> -    iommu_suspend();
> +    if ( iommu_suspend() )
> +        return SAVED_IOMMU;
>  
> -    lapic_suspend();
> +    if ( lapic_suspend() )
> +        return SAVED_LAPIC;
>  
>      return 0;

And this silently means SAVED_NONE, whereas here you saved
everything. Yielding clearly bogus code ...

> -static void device_power_up(void)
> +static void device_power_up(enum dev_power_saved saved)
>  {
> -    lapic_resume();
> -
> -    iommu_resume();
> -
> -    ioapic_resume();
> -
> -    i8259A_resume();
> -
> -    time_resume();
> -
> -    console_resume();
> +    switch ( saved )
> +    {
> +    case SAVED_NONE:
> +        lapic_resume();

... here and ...

> @@ -196,7 +232,7 @@ static int enter_state(u32 state)
>      write_cr4(cr4 & ~X86_CR4_MCE);
>      write_efer(read_efer());
>  
> -    device_power_up();
> +    device_power_up(SAVED_NONE);

... here.

> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -541,20 +541,28 @@ static int iommu_flush_iotlb_psi(
>      return status;
>  }
>  
> -static void iommu_flush_all(void)
> +static int __must_check iommu_flush_all(void)
>  {
>      struct acpi_drhd_unit *drhd;
>      struct iommu *iommu;
>      int flush_dev_iotlb;
> +    int rc = 0;
>  
>      flush_all_cache();
>      for_each_drhd_unit ( drhd )
>      {
> +        int ret;
> +
>          iommu = drhd->iommu;
>          iommu_flush_context_global(iommu, 0);
>          flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
> -        iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
> +        ret = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
> +
> +        if ( unlikely(ret) )
> +            rc = ret;

Same as for earlier patches - "if ( unlikely(!rc) )" please.

Also, having come here - did I miss iommu_flush_iotlb_global() gaining
a __must_check annotation somewhere? And the struct iommu_flush
pointers and handlers? And, by analogy, iommu_flush_context_*()?

Jan
Quan Xu May 25, 2016, 6:41 a.m. UTC | #2
On May 24, 2016 4:22 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 18.05.16 at 10:08, <quan.xu@intel.com> wrote:
> > --- a/xen/arch/x86/acpi/power.c
> > +++ b/xen/arch/x86/acpi/power.c
> > @@ -43,36 +43,68 @@ struct acpi_sleep_info acpi_sinfo;
> >
> >  void do_suspend_lowlevel(void);
> >
> > +enum dev_power_saved
> > +{
> > +    SAVED_NONE,    /* None of device power saved */
> > +    SAVED_CONSOLE, /* Device power of console saved */
> > +    SAVED_TIME,    /* Device power of time saved */
> > +    SAVED_I8259A,  /* Device power of I8259A saved */
> > +    SAVED_IOAPIC,  /* Device power of IOAPIC saved */
> > +    SAVED_IOMMU,   /* Device power of IOMMU saved */
> > +    SAVED_LAPIC,   /* Device power of LAPIC saved */
> > +};
> 
> Please avoid comments saying nothing substantially different than the code
> they accompany, and also not helping the reader to understand the
> commented code.
>

I'll drop these comments in v6.

 
> >  static int device_power_down(void)
> >  {
> > -    console_suspend();
> > +    if ( console_suspend() )
> > +        return SAVED_CONSOLE;
> 
> I said so on the previous round, and I need to repeat it now: If
> console_suspend() fails, you saved _nothing_.
> 

Ah, we may have some different views for SAVED_*, which I mean has been saved and we are no need to resume.

e.g. if console_suspend() fails, I did return SAVED_CONSOLE, reading my patch again, and I really resume nothing at all.

device_power_up()
{
     ...
    +    case SAVED_CONSOLE:
    +        break;
    ...
}


I know we can also propagate SAVED_NONE for console_suspend() failure, then we need adjust device_power_up() relevantly.


> > -    time_suspend();
> > +    if ( time_suspend() )
> > +        return SAVED_TIME;
> >
> > -    i8259A_suspend();
> > +    if ( i8259A_suspend() )
> > +        return SAVED_I8259A;
> >
> > +    /* ioapic_suspend cannot fail */
> >      ioapic_suspend();
> >
> > -    iommu_suspend();
> > +    if ( iommu_suspend() )
> > +        return SAVED_IOMMU;
> >
> > -    lapic_suspend();
> > +    if ( lapic_suspend() )
> > +        return SAVED_LAPIC;
> >
> >      return 0;
> 
> And this silently means SAVED_NONE, whereas here you saved everything.
> Yielding clearly bogus code ...
>


 '0' is just on success here.  Look at the condition where we call device_power_up():

+        if ( error > 0 )
+            device_power_up(error);

Then, it is not bogus code.
 

> > -static void device_power_up(void)
> > +static void device_power_up(enum dev_power_saved saved)
> >  {
> > -    lapic_resume();
> > -
> > -    iommu_resume();
> > -
> > -    ioapic_resume();
> > -
> > -    i8259A_resume();
> > -
> > -    time_resume();
> > -
> > -    console_resume();
> > +    switch ( saved )
> > +    {
> > +    case SAVED_NONE:
> > +        lapic_resume();
> 
> ... here and ...
> 
> > @@ -196,7 +232,7 @@ static int enter_state(u32 state)
> >      write_cr4(cr4 & ~X86_CR4_MCE);
> >      write_efer(read_efer());
> >
> > -    device_power_up();
> > +    device_power_up(SAVED_NONE);
> 
> ... here.

Then we need to call all of *_resume(). I think the logic is correct, but the SAVED_*  may be not what you suggested.

> 
> > --- a/xen/drivers/passthrough/vtd/iommu.c
> > +++ b/xen/drivers/passthrough/vtd/iommu.c
> > @@ -541,20 +541,28 @@ static int iommu_flush_iotlb_psi(
> >      return status;
> >  }
> >
> > -static void iommu_flush_all(void)
> > +static int __must_check iommu_flush_all(void)
> >  {
> >      struct acpi_drhd_unit *drhd;
> >      struct iommu *iommu;
> >      int flush_dev_iotlb;
> > +    int rc = 0;
> >
> >      flush_all_cache();
> >      for_each_drhd_unit ( drhd )
> >      {
> > +        int ret;
> > +
> >          iommu = drhd->iommu;
> >          iommu_flush_context_global(iommu, 0);
> >          flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
> > -        iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
> > +        ret = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
> > +
> > +        if ( unlikely(ret) )
> > +            rc = ret;
> 
> Same as for earlier patches - "if ( unlikely(!rc) )" please.
> 


Yes,


> Also, having come here - did I miss iommu_flush_iotlb_global() gaining a
> __must_check annotation somewhere? 

I will add __must_check annotation to iommu_flush_iotlb_global().

> And the struct iommu_flush pointers
> and handlers? And, by analogy, iommu_flush_context_*()?
> 

I am better only add __must_check annotation to flush->iotlb and handlers,
but leaving flush->context/handers and  iommu_flush_context_*() as are in current patch set..
the coming patch set will fix them.

Quan
Jan Beulich May 25, 2016, 8:06 a.m. UTC | #3
>>> On 25.05.16 at 08:41, <quan.xu@intel.com> wrote:
> On May 24, 2016 4:22 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 18.05.16 at 10:08, <quan.xu@intel.com> wrote:
>> >  static int device_power_down(void)
>> >  {
>> > -    console_suspend();
>> > +    if ( console_suspend() )
>> > +        return SAVED_CONSOLE;
>> 
>> I said so on the previous round, and I need to repeat it now: If
>> console_suspend() fails, you saved _nothing_.
>> 
> 
> Ah, we may have some different views for SAVED_*, which I mean has been 
> saved and we are no need to resume.
> 
> e.g. if console_suspend() fails, I did return SAVED_CONSOLE, reading my 
> patch again, and I really resume nothing at all.
> 
> device_power_up()
> {
>      ...
>     +    case SAVED_CONSOLE:
>     +        break;
>     ...
> }
> 
> 
> I know we can also propagate SAVED_NONE for console_suspend() failure, then 
> we need adjust device_power_up() relevantly.

My main point is that the names of these enumerators should reflect
their purpose. If one reads "SAVED_CONSOLE", then (s)he should be
allowed this to mean that console state was saved (and hence needs
to be restored upon error / resume).

>> > -    time_suspend();
>> > +    if ( time_suspend() )
>> > +        return SAVED_TIME;
>> >
>> > -    i8259A_suspend();
>> > +    if ( i8259A_suspend() )
>> > +        return SAVED_I8259A;
>> >
>> > +    /* ioapic_suspend cannot fail */
>> >      ioapic_suspend();
>> >
>> > -    iommu_suspend();
>> > +    if ( iommu_suspend() )
>> > +        return SAVED_IOMMU;
>> >
>> > -    lapic_suspend();
>> > +    if ( lapic_suspend() )
>> > +        return SAVED_LAPIC;
>> >
>> >      return 0;
>> 
>> And this silently means SAVED_NONE, whereas here you saved everything.
>> Yielding clearly bogus code ...
>>
> 
> 
>  '0' is just on success here.  Look at the condition where we call 
> device_power_up():
> 
> +        if ( error > 0 )
> +            device_power_up(error);
> 
> Then, it is not bogus code.

See above: Zero should not mean both "nothing saved" and "saved
everything".

>> Also, having come here - did I miss iommu_flush_iotlb_global() gaining a
>> __must_check annotation somewhere? 
> 
> I will add __must_check annotation to iommu_flush_iotlb_global().
> 
>> And the struct iommu_flush pointers
>> and handlers? And, by analogy, iommu_flush_context_*()?
> 
> I am better only add __must_check annotation to flush->iotlb and handlers,
> but leaving flush->context/handers and  iommu_flush_context_*() as are in 
> current patch set..
> the coming patch set will fix them.

I don't follow the logic behind this: The purpose of this series is to
make sure flushing errors get properly bubbled up, which includes
adding __must_check annotations. I'm not saying this needs to
happen in this patch, but it should happen in this series (and please
following the same basic model: A caller or a __must_check function
should either already be __must_check, or should become so at the
same time).

Jan
Quan Xu May 25, 2016, 3:13 p.m. UTC | #4
On May 25, 2016 4:07 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 25.05.16 at 08:41, <quan.xu@intel.com> wrote:
> > On May 24, 2016 4:22 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >> >>> On 18.05.16 at 10:08, <quan.xu@intel.com> wrote:
> >> >  static int device_power_down(void)  {
> >> > -    console_suspend();
> >> > +    if ( console_suspend() )
> >> > +        return SAVED_CONSOLE;
> >>
> >> I said so on the previous round, and I need to repeat it now: If
> >> console_suspend() fails, you saved _nothing_.
> >>
> >
> > Ah, we may have some different views for SAVED_*, which I mean has
> > been saved and we are no need to resume.
> >
> > e.g. if console_suspend() fails, I did return SAVED_CONSOLE, reading
> > my patch again, and I really resume nothing at all.
> >
> > device_power_up()
> > {
> >      ...
> >     +    case SAVED_CONSOLE:
> >     +        break;
> >     ...
> > }
> >
> >
> > I know we can also propagate SAVED_NONE for console_suspend() failure,
> > then we need adjust device_power_up() relevantly.
> 
> My main point is that the names of these enumerators should reflect their
> purpose. If one reads "SAVED_CONSOLE", then (s)he should be allowed this
> to mean that console state was saved (and hence needs to be restored upon
> error / resume).
> 


I'll follow this.. it is much better than what I understand.


> >> > -    time_suspend();
> >> > +    if ( time_suspend() )
> >> > +        return SAVED_TIME;
> >> >
> >> > -    i8259A_suspend();
> >> > +    if ( i8259A_suspend() )
> >> > +        return SAVED_I8259A;
> >> >
> >> > +    /* ioapic_suspend cannot fail */
> >> >      ioapic_suspend();
> >> >
> >> > -    iommu_suspend();
> >> > +    if ( iommu_suspend() )
> >> > +        return SAVED_IOMMU;
> >> >
> >> > -    lapic_suspend();
> >> > +    if ( lapic_suspend() )
> >> > +        return SAVED_LAPIC;
> >> >
> >> >      return 0;
> >>
> >> And this silently means SAVED_NONE, whereas here you saved everything.
> >> Yielding clearly bogus code ...
> >>
> >
> >
> >  '0' is just on success here.  Look at the condition where we call
> > device_power_up():
> >
> > +        if ( error > 0 )
> > +            device_power_up(error);
> >
> > Then, it is not bogus code.
> 
> See above: Zero should not mean both "nothing saved" and "saved
> everything".
> 
> >> Also, having come here - did I miss iommu_flush_iotlb_global()
> >> gaining a __must_check annotation somewhere?
> >
> > I will add __must_check annotation to iommu_flush_iotlb_global().
> >
> >> And the struct iommu_flush pointers
> >> and handlers? And, by analogy, iommu_flush_context_*()?
> >
> > I am better only add __must_check annotation to flush->iotlb and
> > handlers, but leaving flush->context/handers and
> > iommu_flush_context_*() as are in current patch set..
> > the coming patch set will fix them.
> 
> I don't follow the logic behind this: The purpose of this series is to make sure
> flushing errors get properly bubbled up, which includes adding __must_check
> annotations. I'm not saying this needs to happen in this patch, but it should
> happen in this series

I will add a patch..

> (and please following the same basic model: A caller or a
> __must_check function should either already be __must_check, or should
> become so at the same time).
> 

Agreed.

Quan
diff mbox

Patch

diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index 2885e31..8e60c75 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -43,36 +43,68 @@  struct acpi_sleep_info acpi_sinfo;
 
 void do_suspend_lowlevel(void);
 
+enum dev_power_saved
+{
+    SAVED_NONE,    /* None of device power saved */
+    SAVED_CONSOLE, /* Device power of console saved */
+    SAVED_TIME,    /* Device power of time saved */
+    SAVED_I8259A,  /* Device power of I8259A saved */
+    SAVED_IOAPIC,  /* Device power of IOAPIC saved */
+    SAVED_IOMMU,   /* Device power of IOMMU saved */
+    SAVED_LAPIC,   /* Device power of LAPIC saved */
+};
+
 static int device_power_down(void)
 {
-    console_suspend();
+    if ( console_suspend() )
+        return SAVED_CONSOLE;
 
-    time_suspend();
+    if ( time_suspend() )
+        return SAVED_TIME;
 
-    i8259A_suspend();
+    if ( i8259A_suspend() )
+        return SAVED_I8259A;
 
+    /* ioapic_suspend cannot fail */
     ioapic_suspend();
 
-    iommu_suspend();
+    if ( iommu_suspend() )
+        return SAVED_IOMMU;
 
-    lapic_suspend();
+    if ( lapic_suspend() )
+        return SAVED_LAPIC;
 
     return 0;
 }
 
-static void device_power_up(void)
+static void device_power_up(enum dev_power_saved saved)
 {
-    lapic_resume();
-
-    iommu_resume();
-
-    ioapic_resume();
-
-    i8259A_resume();
-
-    time_resume();
-
-    console_resume();
+    switch ( saved )
+    {
+    case SAVED_NONE:
+        lapic_resume();
+        /* fall through */
+    case SAVED_LAPIC:
+        iommu_resume();
+        /* fall through */
+    case SAVED_IOMMU:
+        ioapic_resume();
+        /* fall through */
+    case SAVED_IOAPIC:
+        i8259A_resume();
+        /* fall through */
+    case SAVED_I8259A:
+        time_resume();
+        /* fall through */
+    case SAVED_TIME:
+        console_resume();
+        /* fall through */
+    case SAVED_CONSOLE:
+        break;
+    default:
+        BUG();
+        break;
+    }
 }
 
 static void freeze_domains(void)
@@ -169,6 +201,10 @@  static int enter_state(u32 state)
     {
         printk(XENLOG_ERR "Some devices failed to power down.");
         system_state = SYS_STATE_resume;
+
+        if ( error > 0 )
+            device_power_up(error);
+
         goto done;
     }
 
@@ -196,7 +232,7 @@  static int enter_state(u32 state)
     write_cr4(cr4 & ~X86_CR4_MCE);
     write_efer(read_efer());
 
-    device_power_up();
+    device_power_up(SAVED_NONE);
 
     mcheck_init(&boot_cpu_data, 0);
     write_cr4(cr4);
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index 4536106..0b68596 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1339,7 +1339,14 @@  static void invalidate_all_devices(void)
     iterate_ivrs_mappings(_invalidate_all_devices);
 }
 
-void amd_iommu_suspend(void)
+int amd_iommu_suspend(void)
+{
+    amd_iommu_crash_shutdown();
+
+    return 0;
+}
+
+void amd_iommu_crash_shutdown(void)
 {
     struct amd_iommu *iommu;
 
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 86d6fb3..f162703 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -639,6 +639,6 @@  const struct iommu_ops amd_iommu_ops = {
     .suspend = amd_iommu_suspend,
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
-    .crash_shutdown = amd_iommu_suspend,
+    .crash_shutdown = amd_iommu_crash_shutdown,
     .dump_p2m_table = amd_dump_p2m_table,
 };
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 92d37c5..b610a1f 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -379,10 +379,12 @@  int __init iommu_setup(void)
     return rc;
 }
 
-void iommu_suspend()
+int iommu_suspend()
 {
     if ( iommu_enabled )
-        iommu_get_ops()->suspend();
+        return iommu_get_ops()->suspend();
+
+    return 0;
 }
 
 void iommu_resume()
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index a6caf97..3d07139 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -541,20 +541,28 @@  static int iommu_flush_iotlb_psi(
     return status;
 }
 
-static void iommu_flush_all(void)
+static int __must_check iommu_flush_all(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int flush_dev_iotlb;
+    int rc = 0;
 
     flush_all_cache();
     for_each_drhd_unit ( drhd )
     {
+        int ret;
+
         iommu = drhd->iommu;
         iommu_flush_context_global(iommu, 0);
         flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
-        iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+        ret = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+
+        if ( unlikely(ret) )
+            rc = ret;
     }
+
+    return rc;
 }
 
 static int __must_check iommu_flush_iotlb(struct domain *d,
@@ -1270,7 +1278,9 @@  static void __hwdom_init intel_iommu_hwdom_init(struct domain *d)
     setup_hwdom_pci_devices(d, setup_hwdom_device);
     setup_hwdom_rmrr(d);
 
-    iommu_flush_all();
+    if ( iommu_flush_all() )
+        printk(XENLOG_WARNING VTDPREFIX
+               " IOMMU flush all failed for hardware domain.\n");
 
     for_each_drhd_unit ( drhd )
     {
@@ -2051,7 +2061,7 @@  int adjust_vtd_irq_affinities(void)
 }
 __initcall(adjust_vtd_irq_affinities);
 
-static int init_vtd_hw(void)
+static int __must_check init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
@@ -2149,8 +2159,8 @@  static int init_vtd_hw(void)
             return -EIO;
         }
     }
-    iommu_flush_all();
-    return 0;
+
+    return iommu_flush_all();
 }
 
 static void __hwdom_init setup_hwdom_rmrr(struct domain *d)
@@ -2439,16 +2449,27 @@  static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
 }
 
 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
-static void vtd_suspend(void)
+
+static int __must_check vtd_suspend(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     u32    i;
+    int rc;
 
     if ( !iommu_enabled )
-        return;
+        return 0;
+
+    rc = iommu_flush_all();
+
+    if ( unlikely(rc) )
+    {
+        printk(XENLOG_WARNING VTDPREFIX
+               " suspend: IOMMU flush all failed %d.\n",
+               rc);
 
-    iommu_flush_all();
+        return rc;
+    }
 
     for_each_drhd_unit ( drhd )
     {
@@ -2477,6 +2498,8 @@  static void vtd_suspend(void)
         if ( !iommu_intremap && iommu_qinval )
             disable_qinval(iommu);
     }
+
+    return 0;
 }
 
 static void vtd_crash_shutdown(void)
@@ -2487,7 +2510,9 @@  static void vtd_crash_shutdown(void)
     if ( !iommu_enabled )
         return;
 
-    iommu_flush_all();
+    if ( iommu_flush_all() )
+        printk(XENLOG_WARNING VTDPREFIX
+               " crash shutdown: IOMMU flush all failed.\n");
 
     for_each_drhd_unit ( drhd )
     {
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 57b6cc1..570885a 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -119,7 +119,7 @@  extern unsigned long *shared_intremap_inuse;
 
 /* power management support */
 void amd_iommu_resume(void);
-void amd_iommu_suspend(void);
+int __must_check amd_iommu_suspend(void);
 void amd_iommu_crash_shutdown(void);
 
 /* guest iommu support */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 760cf65..7851a81 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -175,7 +175,7 @@  struct iommu_ops {
     unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
     int (*setup_hpet_msi)(struct msi_desc *);
 #endif /* CONFIG_X86 */
-    void (*suspend)(void);
+    int __must_check (*suspend)(void);
     void (*resume)(void);
     void (*share_p2m)(struct domain *d);
     void (*crash_shutdown)(void);
@@ -186,7 +186,7 @@  struct iommu_ops {
     void (*dump_p2m_table)(struct domain *d);
 };
 
-void iommu_suspend(void);
+int __must_check iommu_suspend(void);
 void iommu_resume(void);
 void iommu_crash_shutdown(void);
 int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);