diff mbox

[v4,09/10] IOMMU: propagate IOMMU Device-TLB flush error up to IOMMU suspending

Message ID 1462524880-67205-10-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu May 6, 2016, 8:54 a.m. UTC
Propagate the IOMMU Device-TLB flush error up to IOMMU suspending.

Signed-off-by: Quan Xu <quan.xu@intel.com>

CC: Jan Beulich <jbeulich@suse.com>
CC: Liu Jinsong <jinsong.liu@alibaba-inc.com>
CC: Keir Fraser <keir@xen.org>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien.grall@arm.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: Feng Wu <feng.wu@intel.com>
---
 xen/arch/x86/acpi/power.c                     | 72 ++++++++++++++++++++-------
 xen/drivers/passthrough/amd/iommu_init.c      |  9 +++-
 xen/drivers/passthrough/amd/pci_amd_iommu.c   |  2 +-
 xen/drivers/passthrough/iommu.c               |  7 ++-
 xen/drivers/passthrough/vtd/iommu.c           | 39 +++++++++++----
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  2 +-
 xen/include/xen/iommu.h                       |  4 +-
 7 files changed, 101 insertions(+), 34 deletions(-)

Comments

Jan Beulich May 10, 2016, 9:24 a.m. UTC | #1
>>> On 06.05.16 at 10:54, <quan.xu@intel.com> wrote:
> --- a/xen/arch/x86/acpi/power.c
> +++ b/xen/arch/x86/acpi/power.c
> @@ -43,36 +43,71 @@ struct acpi_sleep_info acpi_sinfo;
>  
>  void do_suspend_lowlevel(void);
>  
> +enum dev_power_type
> +{
> +    /* Use for all of device power type */

No sure what this comment is meant to say. Also if you mean it to
apply to just TYPE_ALL, it would probably better be placed on that
same line. In any event it is missing a full stop.

> +    TYPE_ALL,
> +    TYPE_CONSOLE,
> +    TYPE_TIME,
> +    TYPE_I8259A,
> +    TYPE_IOAPIC,
> +    TYPE_IOMMU,
> +    TYPE_LAPIC,
> +    /* Use for error */

Same here, but ...

> +    TYPE_UNKNOWN

... this isn't used anywhere anyway, and hence should be left out.
Errors would better be reported via -E... values anyway.

>  static int device_power_down(void)
>  {
> -    console_suspend();
> +    if ( console_suspend() )
> +        return TYPE_CONSOLE;

This (together with the resume side) makes me guess that the use
of TYPE_ as a prefix confused not just me, but also you: Here you
want to tell the caller that everything _prior_ to console suspend
(which happens to be nothing in this specific case) needs to be
undone. Yet below you use TYPE_CONSOLE as an indication that
console_resume() needs to be called.

> -    time_suspend();
> +    if ( time_suspend() )
> +        return TYPE_TIME;
>  
> -    i8259A_suspend();
> +    if ( i8259A_suspend() )
> +        return TYPE_I8259A;
>  
> +    /* ioapic_suspend should never fail */
>      ioapic_suspend();

The comment is bogus: "should" means it can in theory. Yet the
function having void return type means it just cannot fail.

> -static void device_power_up(void)
> +static void device_power_up(enum dev_power_type type)
>  {
> -    lapic_resume();
> -
> -    iommu_resume();
> -
> -    ioapic_resume();
> -
> -    i8259A_resume();
> -
> -    time_resume();
> -
> -    console_resume();
> +    switch ( type )
> +    {
> +    case TYPE_ALL:
> +        /* fall through */
> +    case TYPE_LAPIC:

No need for a fall-through comment between immediately adjacent
case labels.

> @@ -169,6 +204,7 @@ static int enter_state(u32 state)
>      {
>          printk(XENLOG_ERR "Some devices failed to power down.");
>          system_state = SYS_STATE_resume;
> +        device_power_up(error);

Either error's and device_power_down()'s return type get changed
to enum dev_power_type, or this needs a "error > 0" conditional.

> @@ -1267,7 +1273,9 @@ static void __hwdom_init intel_iommu_hwdom_init(struct domain *d)
>      setup_hwdom_pci_devices(d, setup_hwdom_device);
>      setup_hwdom_rmrr(d);
>  
> -    iommu_flush_all();
> +    if ( iommu_flush_all() )
> +        printk(XENLOG_WARNING VTDPREFIX
> +               " intel_iommu_hwdom_init: IOMMU flush all failed.\n");

As said (and I think a number of times) before: At least when you
already hold the error value in your hands, please also log it. Also
personally I'm opposed to including function names in non-debug
log messages, but I'll leave that decision to the VT-d maintainers
here.

Jan
Quan Xu May 13, 2016, 3:39 a.m. UTC | #2
On May 10, 2016 5:25 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 06.05.16 at 10:54, <quan.xu@intel.com> wrote:

> > --- a/xen/arch/x86/acpi/power.c

> > +++ b/xen/arch/x86/acpi/power.c

> >  static int device_power_down(void)

> >  {

> > -    console_suspend();

> > +    if ( console_suspend() )

> > +        return TYPE_CONSOLE;

> 

> This (together with the resume side) makes me guess that the use of TYPE_ as

> a prefix confused not just me, but also you:


Yes,  this is really not a good prefix, and probably pretty bad to use 'ERROR_'.
What about 'PRIOR_'?  then I also need to adjust  device_power_up() as ...


> Here you want to tell the caller

> that everything _prior_ to console suspend (which happens to be nothing in

> this specific case) needs to be undone. Yet below you use TYPE_CONSOLE as

> an indication that

> console_resume() needs to be called.


... this,

+    switch ( prior )
+    {
       ...
+        time_resume();
+    case PRIOR_TIME:
+        console_resume();
+    case PRIOR_CONSOLE:
        ...
+    }

> 

> > -    time_suspend();

> > +    if ( time_suspend() )

> > +        return TYPE_TIME;

> >

> > -    i8259A_suspend();

> > +    if ( i8259A_suspend() )

> > +        return TYPE_I8259A;

> >

> > +    /* ioapic_suspend should never fail */

> >      ioapic_suspend();

> 

> The comment is bogus: "should" means it can in theory. Yet the function

> having void return type means it just cannot fail.

> 


I'll use 'cannot', instead of 'should'.
Another question, I check the code again, and the rest of the functions (console_suspend/ time_suspend/ i8259A_suspend / ioapic_suspend / lapic_suspend ), in device_power_down(), always returned '0'.
Maybe I need to fix these functions  annotation from 'int' to 'void', and then I can add a comment on the device_power_down().  

More that, if the   ' iommu_suspend()'  is the only fail, could we re-consider the previous v2/v3 solution with 'goto'?

> > @@ -169,6 +204,7 @@ static int enter_state(u32 state)

> >      {

> >          printk(XENLOG_ERR "Some devices failed to power down.");

> >          system_state = SYS_STATE_resume;

> > +        device_power_up(error);

> 

> Either error's and device_power_down()'s return type get changed to enum

> dev_power_type, or this needs a "error > 0" conditional.

> 

> > @@ -1267,7 +1273,9 @@ static void __hwdom_init

> intel_iommu_hwdom_init(struct domain *d)

> >      setup_hwdom_pci_devices(d, setup_hwdom_device);

> >      setup_hwdom_rmrr(d);

> >

> > -    iommu_flush_all();

> > +    if ( iommu_flush_all() )

> > +        printk(XENLOG_WARNING VTDPREFIX

> > +               " intel_iommu_hwdom_init: IOMMU flush all failed.\n");

> 

> As said (and I think a number of times) before: At least when you already hold

> the error value in your hands, please also log it.


Agreed, and I will apply for other patches.

> Also personally I'm opposed to

> including function names in non-debug log messages, but I'll leave that

> decision to the VT-d maintainers here.

> 


Albeit Reluctantly, I will fix it.

Quan
Jan Beulich May 13, 2016, 6:16 a.m. UTC | #3
>>> On 13.05.16 at 05:39, <quan.xu@intel.com> wrote:
> On May 10, 2016 5:25 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 06.05.16 at 10:54, <quan.xu@intel.com> wrote:
>> > --- a/xen/arch/x86/acpi/power.c
>> > +++ b/xen/arch/x86/acpi/power.c
>> >  static int device_power_down(void)
>> >  {
>> > -    console_suspend();
>> > +    if ( console_suspend() )
>> > +        return TYPE_CONSOLE;
>> 
>> This (together with the resume side) makes me guess that the use of TYPE_ as
>> a prefix confused not just me, but also you:
> 
> Yes,  this is really not a good prefix, and probably pretty bad to use 
> 'ERROR_'.
> What about 'PRIOR_'?  then I also need to adjust  device_power_up() as ...

What about SAVED_?

>> > -    time_suspend();
>> > +    if ( time_suspend() )
>> > +        return TYPE_TIME;
>> >
>> > -    i8259A_suspend();
>> > +    if ( i8259A_suspend() )
>> > +        return TYPE_I8259A;
>> >
>> > +    /* ioapic_suspend should never fail */
>> >      ioapic_suspend();
>> 
>> The comment is bogus: "should" means it can in theory. Yet the function
>> having void return type means it just cannot fail.
>> 
> 
> I'll use 'cannot', instead of 'should'.
> Another question, I check the code again, and the rest of the functions 
> (console_suspend/ time_suspend/ i8259A_suspend / ioapic_suspend / 
> lapic_suspend ), in device_power_down(), always returned '0'.
> Maybe I need to fix these functions  annotation from 'int' to 'void', and 
> then I can add a comment on the device_power_down().  

Please don't. Generally the possibility of failure exists, and hence if
functions have already been written to account for that, we shouldn't
strip that capability out.

Jan
Quan Xu May 13, 2016, 6:27 a.m. UTC | #4
On May 13, 2016 2:17 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 13.05.16 at 05:39, <quan.xu@intel.com> wrote:
> > On May 10, 2016 5:25 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >> >>> On 06.05.16 at 10:54, <quan.xu@intel.com> wrote:
> >> > --- a/xen/arch/x86/acpi/power.c
> >> > +++ b/xen/arch/x86/acpi/power.c
> >> >  static int device_power_down(void)  {
> >> > -    console_suspend();
> >> > +    if ( console_suspend() )
> >> > +        return TYPE_CONSOLE;
> >>
> >> This (together with the resume side) makes me guess that the use of
> >> TYPE_ as a prefix confused not just me, but also you:
> >
> > Yes,  this is really not a good prefix, and probably pretty bad to use
> > 'ERROR_'.
> > What about 'PRIOR_'?  then I also need to adjust  device_power_up() as ...
> 
> What about SAVED_?
>

much better, I'll  take it in v5. Thanks.

> >> > -    time_suspend();
> >> > +    if ( time_suspend() )
> >> > +        return TYPE_TIME;
> >> >
> >> > -    i8259A_suspend();
> >> > +    if ( i8259A_suspend() )
> >> > +        return TYPE_I8259A;
> >> >
> >> > +    /* ioapic_suspend should never fail */
> >> >      ioapic_suspend();
> >>
> >> The comment is bogus: "should" means it can in theory. Yet the
> >> function having void return type means it just cannot fail.
> >>
> >
> > I'll use 'cannot', instead of 'should'.
> > Another question, I check the code again, and the rest of the
> > functions (console_suspend/ time_suspend/ i8259A_suspend /
> > ioapic_suspend / lapic_suspend ), in device_power_down(), always returned
> '0'.
> > Maybe I need to fix these functions  annotation from 'int' to 'void',
> > and then I can add a comment on the device_power_down().
> 
> Please don't. Generally the possibility of failure exists, and hence if functions
> have already been written to account for that, we shouldn't strip that
> capability out.
> 

Got it.

Quan
diff mbox

Patch

diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index 2885e31..1f7b8bf 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -43,36 +43,71 @@  struct acpi_sleep_info acpi_sinfo;
 
 void do_suspend_lowlevel(void);
 
+enum dev_power_type
+{
+    /* Use for all of device power type */
+    TYPE_ALL,
+    TYPE_CONSOLE,
+    TYPE_TIME,
+    TYPE_I8259A,
+    TYPE_IOAPIC,
+    TYPE_IOMMU,
+    TYPE_LAPIC,
+    /* Use for error */
+    TYPE_UNKNOWN
+};
+
 static int device_power_down(void)
 {
-    console_suspend();
+    if ( console_suspend() )
+        return TYPE_CONSOLE;
 
-    time_suspend();
+    if ( time_suspend() )
+        return TYPE_TIME;
 
-    i8259A_suspend();
+    if ( i8259A_suspend() )
+        return TYPE_I8259A;
 
+    /* ioapic_suspend should never fail */
     ioapic_suspend();
 
-    iommu_suspend();
+    if ( iommu_suspend() )
+        return TYPE_IOMMU;
 
-    lapic_suspend();
+    if ( lapic_suspend() )
+        return TYPE_LAPIC;
 
     return 0;
 }
 
-static void device_power_up(void)
+static void device_power_up(enum dev_power_type type)
 {
-    lapic_resume();
-
-    iommu_resume();
-
-    ioapic_resume();
-
-    i8259A_resume();
-
-    time_resume();
-
-    console_resume();
+    switch ( type )
+    {
+    case TYPE_ALL:
+        /* fall through */
+    case TYPE_LAPIC:
+        lapic_resume();
+        /* fall through */
+    case TYPE_IOMMU:
+        iommu_resume();
+        /* fall through */
+    case TYPE_IOAPIC:
+        ioapic_resume();
+        /* fall through */
+    case TYPE_I8259A:
+        i8259A_resume();
+        /* fall through */
+    case TYPE_TIME:
+        time_resume();
+        /* fall through */
+    case TYPE_CONSOLE:
+        console_resume();
+        break;
+    default:
+        BUG();
+        break;
+    }
 }
 
 static void freeze_domains(void)
@@ -169,6 +204,7 @@  static int enter_state(u32 state)
     {
         printk(XENLOG_ERR "Some devices failed to power down.");
         system_state = SYS_STATE_resume;
+        device_power_up(error);
         goto done;
     }
 
@@ -196,7 +232,7 @@  static int enter_state(u32 state)
     write_cr4(cr4 & ~X86_CR4_MCE);
     write_efer(read_efer());
 
-    device_power_up();
+    device_power_up(TYPE_ALL);
 
     mcheck_init(&boot_cpu_data, 0);
     write_cr4(cr4);
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index 4536106..02588aa 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1339,12 +1339,19 @@  static void invalidate_all_devices(void)
     iterate_ivrs_mappings(_invalidate_all_devices);
 }
 
-void amd_iommu_suspend(void)
+int amd_iommu_suspend(void)
 {
     struct amd_iommu *iommu;
 
     for_each_amd_iommu ( iommu )
         disable_iommu(iommu);
+
+    return 0;
+}
+
+void amd_iommu_crash_shutdown(void)
+{
+    amd_iommu_suspend();
 }
 
 void amd_iommu_resume(void)
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 70b7475..ac40f63 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -626,6 +626,6 @@  const struct iommu_ops amd_iommu_ops = {
     .suspend = amd_iommu_suspend,
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
-    .crash_shutdown = amd_iommu_suspend,
+    .crash_shutdown = amd_iommu_crash_shutdown,
     .dump_p2m_table = amd_dump_p2m_table,
 };
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index c013d91..8b26b1c 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -404,11 +404,14 @@  int iommu_do_domctl(
     return ret;
 }
 
-void iommu_suspend()
+int __must_check iommu_suspend()
 {
     const struct iommu_ops *ops = iommu_get_ops();
+
     if ( iommu_enabled )
-        ops->suspend();
+        return ops->suspend();
+
+    return 0;
 }
 
 void iommu_share_p2m_table(struct domain* d)
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index bf77417..29cf870 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -541,11 +541,12 @@  static int iommu_flush_iotlb_psi(
     return status;
 }
 
-static void iommu_flush_all(void)
+static int __must_check iommu_flush_all(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int flush_dev_iotlb;
+    int rc = 0, ret;
 
     flush_all_cache();
     for_each_drhd_unit ( drhd )
@@ -553,8 +554,13 @@  static void iommu_flush_all(void)
         iommu = drhd->iommu;
         iommu_flush_context_global(iommu, 0);
         flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
-        iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+        ret = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+
+        if ( ret )
+            rc = ret;
     }
+
+    return rc;
 }
 
 static int iommu_flush_iotlb(struct domain *d, unsigned long gfn,
@@ -1267,7 +1273,9 @@  static void __hwdom_init intel_iommu_hwdom_init(struct domain *d)
     setup_hwdom_pci_devices(d, setup_hwdom_device);
     setup_hwdom_rmrr(d);
 
-    iommu_flush_all();
+    if ( iommu_flush_all() )
+        printk(XENLOG_WARNING VTDPREFIX
+               " intel_iommu_hwdom_init: IOMMU flush all failed.\n");
 
     for_each_drhd_unit ( drhd )
     {
@@ -2126,8 +2134,8 @@  static int init_vtd_hw(void)
             return -EIO;
         }
     }
-    iommu_flush_all();
-    return 0;
+
+    return iommu_flush_all();
 }
 
 static void __hwdom_init setup_hwdom_rmrr(struct domain *d)
@@ -2416,16 +2424,25 @@  static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
 }
 
 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
-static void vtd_suspend(void)
+
+static int vtd_suspend(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     u32    i;
+    int rc;
 
     if ( !iommu_enabled )
-        return;
+        return 0;
 
-    iommu_flush_all();
+    rc = iommu_flush_all();
+
+    if ( rc )
+    {
+        printk(XENLOG_WARNING VTDPREFIX
+               " vtd_suspend: IOMMU flush all failed.\n");
+        return rc;
+    }
 
     for_each_drhd_unit ( drhd )
     {
@@ -2454,6 +2471,8 @@  static void vtd_suspend(void)
         if ( !iommu_intremap && iommu_qinval )
             disable_qinval(iommu);
     }
+
+    return 0;
 }
 
 static void vtd_crash_shutdown(void)
@@ -2464,7 +2483,9 @@  static void vtd_crash_shutdown(void)
     if ( !iommu_enabled )
         return;
 
-    iommu_flush_all();
+    if ( iommu_flush_all() )
+        printk(XENLOG_WARNING VTDPREFIX
+               " vtd_crash_shutdown: IOMMU flush all failed.\n");
 
     for_each_drhd_unit ( drhd )
     {
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 9c51172..f540fc8 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -119,7 +119,7 @@  extern unsigned long *shared_intremap_inuse;
 
 /* power management support */
 void amd_iommu_resume(void);
-void amd_iommu_suspend(void);
+int amd_iommu_suspend(void);
 void amd_iommu_crash_shutdown(void);
 
 /* guest iommu support */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 6a64014..fee5a8f 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -175,7 +175,7 @@  struct iommu_ops {
     unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
     int (*setup_hpet_msi)(struct msi_desc *);
 #endif /* CONFIG_X86 */
-    void (*suspend)(void);
+    int (*suspend)(void);
     void (*resume)(void);
     void (*share_p2m)(struct domain *d);
     void (*crash_shutdown)(void);
@@ -185,7 +185,7 @@  struct iommu_ops {
     void (*dump_p2m_table)(struct domain *d);
 };
 
-void iommu_suspend(void);
+int __must_check iommu_suspend(void);
 void iommu_resume(void);
 void iommu_crash_shutdown(void);
 int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);