diff mbox

[v2,09/11] IOMMU: propagate IOMMU Device-TLB flush error up to IOMMU suspending

Message ID 1460988011-17758-10-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu April 18, 2016, 2 p.m. UTC
While IOMMU Device-TLB flush timed out, xen calls panic() at present.
However the existing panic() is going to be eliminated, so we must
propagate the IOMMU Device-TLB flush error up to IOMMU suspending.

Signed-off-by: Quan Xu <quan.xu@intel.com>

CC: Jan Beulich <jbeulich@suse.com>
CC: Liu Jinsong <jinsong.liu@alibaba-inc.com>
CC: Keir Fraser <keir@xen.org>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
CC: Stefano Stabellini <stefano.stabellini@citrix.com>
CC: Julien Grall <julien.grall@arm.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: Feng Wu <feng.wu@intel.com>
---
 xen/arch/x86/acpi/power.c                     | 14 +++++++++++++-
 xen/drivers/passthrough/amd/iommu_init.c      |  9 ++++++++-
 xen/drivers/passthrough/amd/pci_amd_iommu.c   |  2 +-
 xen/drivers/passthrough/arm/smmu.c            | 10 ++++++----
 xen/drivers/passthrough/iommu.c               |  7 +++++--
 xen/drivers/passthrough/vtd/iommu.c           | 22 +++++++++++++++++-----
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  2 +-
 xen/include/xen/iommu.h                       |  4 ++--
 8 files changed, 53 insertions(+), 17 deletions(-)

Comments

Jan Beulich April 25, 2016, 11:52 a.m. UTC | #1
>>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
> --- a/xen/arch/x86/acpi/power.c
> +++ b/xen/arch/x86/acpi/power.c
> @@ -45,19 +45,31 @@ void do_suspend_lowlevel(void);
>  
>  static int device_power_down(void)
>  {
> +    int err;
> +
>      console_suspend();
>  
>      time_suspend();
>  
>      i8259A_suspend();
>  
> -    ioapic_suspend();
> +    err = iommu_suspend();
> +    if ( err )
> +        goto iommu_suspend_error;
>  
>      iommu_suspend();

A little more care please - this is definitely not what you meant.

> + iommu_suspend_error:
> +    ioapic_resume();
> +    i8259A_resume();
> +    time_resume();
> +    console_resume();
> +
> +    return err;

Instead of mostly repeating what device_power_up() does I
wonder whether it wouldn't be better to re-use that function for
the error path here. Either by suitably making information
available to device_power_up() to skip the call to lapic_resume()
or by making lapic_resume() itself recognize it got called without
lapic_suspend() having got called first.

> --- a/xen/drivers/passthrough/arm/smmu.c
> +++ b/xen/drivers/passthrough/arm/smmu.c
> @@ -2540,7 +2540,7 @@ static int force_stage = 2;
>   */
>  static u32 platform_features = ARM_SMMU_FEAT_COHERENT_WALK;
>  
> -static void arm_smmu_iotlb_flush_all(struct domain *d)
> +static int arm_smmu_iotlb_flush_all(struct domain *d)
>  {
>  	struct arm_smmu_xen_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
>  	struct iommu_domain *cfg;
> @@ -2557,13 +2557,15 @@ static void arm_smmu_iotlb_flush_all(struct domain *d)
>  		arm_smmu_tlb_inv_context(cfg->priv);
>  	}
>  	spin_unlock(&smmu_domain->lock);
> +
> +    return 0;
>  }

Even if indentation looks inconsistent in this file, please make your
addition match surrounding code.

> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -542,11 +542,12 @@ static int iommu_flush_iotlb_psi(
>      return status;
>  }
>  
> -static void iommu_flush_all(void)
> +static int iommu_flush_all(void)

__must_check

> @@ -554,8 +555,13 @@ static void iommu_flush_all(void)
>          iommu = drhd->iommu;
>          iommu_flush_context_global(iommu, 0);
>          flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
> -        iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
> +        rc = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
> +
> +        if ( rc )
> +            break;

This again violates the best effort flushing principle.

> @@ -2421,16 +2427,20 @@ static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
>  }
>  
>  static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
> -static void vtd_suspend(void)
> +static int vtd_suspend(void)

Please take the opportunity to add the missing blank line between
variable and function definition.

> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -157,7 +157,7 @@ struct iommu_ops {
>      unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
>      int (*setup_hpet_msi)(struct msi_desc *);
>  #endif /* CONFIG_X86 */
> -    void (*suspend)(void);
> +    int (*suspend)(void);
>      void (*resume)(void);
>      void (*share_p2m)(struct domain *d);
>      void (*crash_shutdown)(void);
> @@ -167,7 +167,7 @@ struct iommu_ops {
>      void (*dump_p2m_table)(struct domain *d);
>  };
>  
> -void iommu_suspend(void);
> +int iommu_suspend(void);

At the very least this one should become __must_check.

Jan
Julien Grall April 25, 2016, 1:58 p.m. UTC | #2
Hi Jan,

On 25/04/16 12:52, Jan Beulich wrote:
>>>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
>> --- a/xen/drivers/passthrough/arm/smmu.c
>> +++ b/xen/drivers/passthrough/arm/smmu.c
>> @@ -2540,7 +2540,7 @@ static int force_stage = 2;
>>    */
>>   static u32 platform_features = ARM_SMMU_FEAT_COHERENT_WALK;
>>
>> -static void arm_smmu_iotlb_flush_all(struct domain *d)
>> +static int arm_smmu_iotlb_flush_all(struct domain *d)
>>   {
>>   	struct arm_smmu_xen_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
>>   	struct iommu_domain *cfg;
>> @@ -2557,13 +2557,15 @@ static void arm_smmu_iotlb_flush_all(struct domain *d)
>>   		arm_smmu_tlb_inv_context(cfg->priv);
>>   	}
>>   	spin_unlock(&smmu_domain->lock);
>> +
>> +    return 0;
>>   }
>
> Even if indentation looks inconsistent in this file, please make your
> addition match surrounding code.

The file smmu.c was imported from Linux and supposed to use only Linux 
coding style. This is in order to help porting fixes.

Regards,
Quan Xu April 25, 2016, 2:50 p.m. UTC | #3
On April 25, 2016 9:58pm, <julien.grall@arm.com> wrote:
> On 25/04/16 12:52, Jan Beulich wrote:
> >>>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
> >> --- a/xen/drivers/passthrough/arm/smmu.c
> >> +++ b/xen/drivers/passthrough/arm/smmu.c
> >> @@ -2540,7 +2540,7 @@ static int force_stage = 2;
> >>    */
> >>   static u32 platform_features = ARM_SMMU_FEAT_COHERENT_WALK;
> >>
> >> -static void arm_smmu_iotlb_flush_all(struct domain *d)
> >> +static int arm_smmu_iotlb_flush_all(struct domain *d)
> >>   {
> >>   	struct arm_smmu_xen_domain *smmu_domain =
> domain_hvm_iommu(d)->arch.priv;
> >>   	struct iommu_domain *cfg;
> >> @@ -2557,13 +2557,15 @@ static void arm_smmu_iotlb_flush_all(struct
> domain *d)
> >>   		arm_smmu_tlb_inv_context(cfg->priv);
> >>   	}
> >>   	spin_unlock(&smmu_domain->lock);
> >> +
> >> +    return 0;
> >>   }
> >
> > Even if indentation looks inconsistent in this file, please make your
> > addition match surrounding code.
> 
> The file smmu.c was imported from Linux and supposed to use only Linux
> coding style. This is in order to help porting fixes.
> 

 Julien,  A 'tab'  ? 
Quan
Julien Grall April 26, 2016, 9:40 a.m. UTC | #4
Hi Quan,

On 25/04/2016 15:50, Xu, Quan wrote:
> On April 25, 2016 9:58pm, <julien.grall@arm.com> wrote:
>> On 25/04/16 12:52, Jan Beulich wrote:
>>>>>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
>>>> --- a/xen/drivers/passthrough/arm/smmu.c
>>>> +++ b/xen/drivers/passthrough/arm/smmu.c
>>>> @@ -2540,7 +2540,7 @@ static int force_stage = 2;
>>>>     */
>>>>    static u32 platform_features = ARM_SMMU_FEAT_COHERENT_WALK;
>>>>
>>>> -static void arm_smmu_iotlb_flush_all(struct domain *d)
>>>> +static int arm_smmu_iotlb_flush_all(struct domain *d)
>>>>    {
>>>>    	struct arm_smmu_xen_domain *smmu_domain =
>> domain_hvm_iommu(d)->arch.priv;
>>>>    	struct iommu_domain *cfg;
>>>> @@ -2557,13 +2557,15 @@ static void arm_smmu_iotlb_flush_all(struct
>> domain *d)
>>>>    		arm_smmu_tlb_inv_context(cfg->priv);
>>>>    	}
>>>>    	spin_unlock(&smmu_domain->lock);
>>>> +
>>>> +    return 0;
>>>>    }
>>>
>>> Even if indentation looks inconsistent in this file, please make your
>>> addition match surrounding code.
>>
>> The file smmu.c was imported from Linux and supposed to use only Linux
>> coding style. This is in order to help porting fixes.
>>
>
>   Julien,  A 'tab'  ?

Yes, Linux is using hard tabulation for the indentation.

Regards,
Quan Xu April 26, 2016, 11:28 a.m. UTC | #5
On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
> > --- a/xen/arch/x86/acpi/power.c
> > +++ b/xen/arch/x86/acpi/power.c
> > @@ -45,19 +45,31 @@ void do_suspend_lowlevel(void);
> >
> >  static int device_power_down(void)
> >  {
> > +    int err;
> > +
> >      console_suspend();
> >
> >      time_suspend();
> >
> >      i8259A_suspend();
> >
> > -    ioapic_suspend();
> > +    err = iommu_suspend();
> > +    if ( err )
> > +        goto iommu_suspend_error;
> >
> >      iommu_suspend();
> 
> A little more care please - this is definitely not what you meant.
> 

Very sorry for this..:(

> > + iommu_suspend_error:
> > +    ioapic_resume();
> > +    i8259A_resume();
> > +    time_resume();
> > +    console_resume();
> > +
> > +    return err;
> 
> Instead of mostly repeating what device_power_up() does I wonder whether
> it wouldn't be better to re-use that function for the error path here. Either by
> suitably making information available to device_power_up() to skip the call to
> lapic_resume() or by making lapic_resume() itself recognize it got called
> without
> lapic_suspend() having got called first.
> 

To be honest, now this modification is much more readable to me, however I will reconsider your suggestion.

Quan
Quan Xu April 28, 2016, 2:14 p.m. UTC | #6
On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
> > --- a/xen/drivers/passthrough/vtd/iommu.c
> > +++ b/xen/drivers/passthrough/vtd/iommu.c


> > -static void iommu_flush_all(void)
> > +static int iommu_flush_all(void)
> 
> __must_check
> 

The iommu_flush_all() is also called in intel_iommu_hwdom_init()  and vtd_crash_shutdown().
As we were on the same page, we can ignore the error code propagation for these two call trees.

I wonder whether we really need this '__must_check' or not, furthermore, print-out message
Is pointless (at least, to me).

Jan, could I ignore this '__must_check '?

Quan
Jan Beulich April 28, 2016, 2:36 p.m. UTC | #7
>>> On 28.04.16 at 16:14, <quan.xu@intel.com> wrote:
> On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
>> > --- a/xen/drivers/passthrough/vtd/iommu.c
>> > +++ b/xen/drivers/passthrough/vtd/iommu.c
> 
> 
>> > -static void iommu_flush_all(void)
>> > +static int iommu_flush_all(void)
>> 
>> __must_check
>> 
> 
> The iommu_flush_all() is also called in intel_iommu_hwdom_init()  and 
> vtd_crash_shutdown().
> As we were on the same page, we can ignore the error code propagation for 
> these two call trees.

I don't know what you're referring to here with "we were on the same
page". I don't think I've ever agreed (in the context of this series) to
ignore any error returns.

Jan

> I wonder whether we really need this '__must_check' or not, furthermore, 
> print-out message
> Is pointless (at least, to me).
> 
> Jan, could I ignore this '__must_check '?
> 
> Quan
Quan Xu April 28, 2016, 3:03 p.m. UTC | #8
On April 28, 2016 10:36 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 28.04.16 at 16:14, <quan.xu@intel.com> wrote:

> > On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:

> >> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:

> >> > --- a/xen/drivers/passthrough/vtd/iommu.c

> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c

> >

> >

> >> > -static void iommu_flush_all(void)

> >> > +static int iommu_flush_all(void)

> >>

> >> __must_check

> >>

> >

> > The iommu_flush_all() is also called in intel_iommu_hwdom_init()  and

> > vtd_crash_shutdown().

> > As we were on the same page, we can ignore the error code propagation

> > for these two call trees.

> 

> I don't know what you're referring to here with "we were on the same page". I

> don't think I've ever agreed (in the context of this series) to ignore any error

> returns.

> 



Look at the below link. 
http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03234.html

I hope I understand this correctly.

Quan

> Jan

> 

> > I wonder whether we really need this '__must_check' or not,

> > furthermore, print-out message Is pointless (at least, to me).

> >

> > Jan, could I ignore this '__must_check '?

> >

> > Quan

> 

> 

> 

> 

> _______________________________________________

> Xen-devel mailing list

> Xen-devel@lists.xen.org

> http://lists.xen.org/xen-devel
Jan Beulich April 28, 2016, 3:12 p.m. UTC | #9
>>> On 28.04.16 at 17:03, <quan.xu@intel.com> wrote:
> On April 28, 2016 10:36 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 28.04.16 at 16:14, <quan.xu@intel.com> wrote:
>> > On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> >> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
>> >> > --- a/xen/drivers/passthrough/vtd/iommu.c
>> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c
>> >
>> >
>> >> > -static void iommu_flush_all(void)
>> >> > +static int iommu_flush_all(void)
>> >>
>> >> __must_check
>> >>
>> >
>> > The iommu_flush_all() is also called in intel_iommu_hwdom_init()  and
>> > vtd_crash_shutdown().
>> > As we were on the same page, we can ignore the error code propagation
>> > for these two call trees.
>> 
>> I don't know what you're referring to here with "we were on the same page". 
> I
>> don't think I've ever agreed (in the context of this series) to ignore any 
> error
>> returns.
>> 
> 
> 
> Look at the below link. 
> http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03234.html 

Which still talks about (conditionally) logging messages, not ignoring
of errors.

Jan
Quan Xu April 28, 2016, 4:08 p.m. UTC | #10
On April 28, 2016 11:12 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 28.04.16 at 17:03, <quan.xu@intel.com> wrote:

> > On April 28, 2016 10:36 PM, Jan Beulich <JBeulich@suse.com> wrote:

> >> >>> On 28.04.16 at 16:14, <quan.xu@intel.com> wrote:

> >> > On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:

> >> >> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:

> >> >> > --- a/xen/drivers/passthrough/vtd/iommu.c

> >> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c

> >> >

> >> >

> >> >> > -static void iommu_flush_all(void)

> >> >> > +static int iommu_flush_all(void)

> >> >>

> >> >> __must_check

> >> >>

> >> >

> >> > The iommu_flush_all() is also called in intel_iommu_hwdom_init()

> >> > and vtd_crash_shutdown().

> >> > As we were on the same page, we can ignore the error code

> >> > propagation for these two call trees.

> >>

> >> I don't know what you're referring to here with "we were on the same

> page".

> > I

> >> don't think I've ever agreed (in the context of this series) to

> >> ignore any

> > error

> >> returns.

> >>

> >

> >

> > Look at the below link.

> > http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03234.h

> > tml

> 

> Which still talks about (conditionally) logging messages, not ignoring of errors.

> 


Sorry, check it again.
in intel_iommu_hwdom_init() and  vtd_crash_shutdown(), 
I still need to log messages as:

if ( iommu_flush_all() )
     printk()

?

Thanks for your patience.
Quan
Tian, Kevin April 29, 2016, 2:20 a.m. UTC | #11
> From: Xu, Quan

> Sent: Friday, April 29, 2016 12:09 AM

> 

> On April 28, 2016 11:12 PM, Jan Beulich <JBeulich@suse.com> wrote:

> > >>> On 28.04.16 at 17:03, <quan.xu@intel.com> wrote:

> > > On April 28, 2016 10:36 PM, Jan Beulich <JBeulich@suse.com> wrote:

> > >> >>> On 28.04.16 at 16:14, <quan.xu@intel.com> wrote:

> > >> > On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com> wrote:

> > >> >> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:

> > >> >> > --- a/xen/drivers/passthrough/vtd/iommu.c

> > >> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c

> > >> >

> > >> >

> > >> >> > -static void iommu_flush_all(void)

> > >> >> > +static int iommu_flush_all(void)

> > >> >>

> > >> >> __must_check

> > >> >>

> > >> >

> > >> > The iommu_flush_all() is also called in intel_iommu_hwdom_init()

> > >> > and vtd_crash_shutdown().

> > >> > As we were on the same page, we can ignore the error code

> > >> > propagation for these two call trees.

> > >>

> > >> I don't know what you're referring to here with "we were on the same

> > page".

> > > I

> > >> don't think I've ever agreed (in the context of this series) to

> > >> ignore any

> > > error

> > >> returns.

> > >>

> > >

> > >

> > > Look at the below link.

> > > http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03234.h

> > > tml

> >

> > Which still talks about (conditionally) logging messages, not ignoring of errors.

> >

> 

> Sorry, check it again.

> in intel_iommu_hwdom_init() and  vtd_crash_shutdown(),

> I still need to log messages as:

> 

> if ( iommu_flush_all() )

>      printk()

> 

> ?

> 

> Thanks for your patience.

> Quan


Yes, but please make sure you only print once to be useful.

Thanks
Kevin
Quan Xu April 29, 2016, 2:41 a.m. UTC | #12
On April 29, 2016 10:21 AM, Tian, Kevin <kevin.tian@intel.com> wrote:
> > From: Xu, Quan

> > Sent: Friday, April 29, 2016 12:09 AM

> > On April 28, 2016 11:12 PM, Jan Beulich <JBeulich@suse.com> wrote:

> > > >>> On 28.04.16 at 17:03, <quan.xu@intel.com> wrote:

> > > > On April 28, 2016 10:36 PM, Jan Beulich <JBeulich@suse.com> wrote:

> > > >> >>> On 28.04.16 at 16:14, <quan.xu@intel.com> wrote:

> > > >> > On April 25, 2016 7:53 PM, Jan Beulich <JBeulich@suse.com>

> wrote:

> > > >> >> >>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:

> > > >> >> > --- a/xen/drivers/passthrough/vtd/iommu.c

> > > >> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c

> > > >> >

> > > >> >

> > > >> >> > -static void iommu_flush_all(void)

> > > >> >> > +static int iommu_flush_all(void)

> > > >> >>

> > > >> >> __must_check

> > > >> >>

> > > >> >

> > > >> > The iommu_flush_all() is also called in

> > > >> > intel_iommu_hwdom_init() and vtd_crash_shutdown().

> > > >> > As we were on the same page, we can ignore the error code

> > > >> > propagation for these two call trees.

> > > >>

> > > >> I don't know what you're referring to here with "we were on the

> > > >> same

> > > page".

> > > > I

> > > >> don't think I've ever agreed (in the context of this series) to

> > > >> ignore any

> > > > error

> > > >> returns.

> > > >>

> > > >

> > > >

> > > > Look at the below link.

> > > > http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg032

> > > > 34.h

> > > > tml

> > >

> > > Which still talks about (conditionally) logging messages, not ignoring of

> errors.

> > >

> >

> > Sorry, check it again.

> > in intel_iommu_hwdom_init() and  vtd_crash_shutdown(), I still need to

> > log messages as:

> >

> > if ( iommu_flush_all() )

> >      printk()

> >

> > ?

> >

> > Thanks for your patience.

> > Quan

> 

> Yes, but please make sure you only print once to be useful.

>


 
Thanks.
Now I check the status in caller to make the print include caller which is failed, instead print in iommu_flush_all().
i.e., 
vtd_crash_shutdown()
{
..
    if ( iommu_flush_all() )
        printk(XENLOG_WARNING VTDPREFIX
               " vtd_crash_shutdown: IOMMU flush all failed.\n");
..
}

I am afraid I still don't get the point. To be honest, in such a fix,
The print is not so useful to me ( Correct me, I will continue to enhance it).

Quan
Jan Beulich April 29, 2016, 7:13 a.m. UTC | #13
>>> On 29.04.16 at 04:41, <quan.xu@intel.com> wrote:
> Now I check the status in caller to make the print include caller which is 
> failed, instead print in iommu_flush_all().
> i.e., 
> vtd_crash_shutdown()
> {
> ..
>     if ( iommu_flush_all() )
>         printk(XENLOG_WARNING VTDPREFIX
>                " vtd_crash_shutdown: IOMMU flush all failed.\n");
> ..
> }
> 
> I am afraid I still don't get the point. To be honest, in such a fix,
> The print is not so useful to me ( Correct me, I will continue to enhance 
> it).

So do you think it would be more useful to leave the admin with no
clue why a system misbehaves, instead of providing clear indication?
IOW - what usefulness concerns do you have?

Jan
Quan Xu April 29, 2016, 8:23 a.m. UTC | #14
On April 29, 2016 3:14 PM, Jan Beulich <JBeulich@suse.com> wrote:
>  >>> On 29.04.16 at 04:41, <quan.xu@intel.com> wrote:
> > Now I check the status in caller to make the print include caller
> > which is failed, instead print in iommu_flush_all().
> > i.e.,
> > vtd_crash_shutdown()
> > {
> > ..
> >     if ( iommu_flush_all() )
> >         printk(XENLOG_WARNING VTDPREFIX
> >                " vtd_crash_shutdown: IOMMU flush all failed.\n"); ..
> > }
> >
> > I am afraid I still don't get the point. To be honest, in such a fix,
> > The print is not so useful to me ( Correct me, I will continue to
> > enhance it).
> 
> So do you think it would be more useful to leave the admin with no clue why a
> system misbehaves, instead of providing clear indication?
> IOW - what usefulness concerns do you have?
> 

Jan, I will follow your suggestion for v3. I look forward to hearing  the other CCed maintainers' opinions on this.

...
i.e., In above case, in case I am an admin, I may be much more interested in what causes vt-d crash,
keeping quite during crash. It is just a matter of my personal preference. Ignore me.
...

Quan
diff mbox

Patch

diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index 2885e31..9b87f00 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -45,19 +45,31 @@  void do_suspend_lowlevel(void);
 
 static int device_power_down(void)
 {
+    int err;
+
     console_suspend();
 
     time_suspend();
 
     i8259A_suspend();
 
-    ioapic_suspend();
+    err = iommu_suspend();
+    if ( err )
+        goto iommu_suspend_error;
 
     iommu_suspend();
 
     lapic_suspend();
 
     return 0;
+
+ iommu_suspend_error:
+    ioapic_resume();
+    i8259A_resume();
+    time_resume();
+    console_resume();
+
+    return err;
 }
 
 static void device_power_up(void)
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index 4536106..02588aa 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1339,12 +1339,19 @@  static void invalidate_all_devices(void)
     iterate_ivrs_mappings(_invalidate_all_devices);
 }
 
-void amd_iommu_suspend(void)
+int amd_iommu_suspend(void)
 {
     struct amd_iommu *iommu;
 
     for_each_amd_iommu ( iommu )
         disable_iommu(iommu);
+
+    return 0;
+}
+
+void amd_iommu_crash_shutdown(void)
+{
+    amd_iommu_suspend();
 }
 
 void amd_iommu_resume(void)
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index c8ee8dc..fb8e816 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -628,6 +628,6 @@  const struct iommu_ops amd_iommu_ops = {
     .suspend = amd_iommu_suspend,
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
-    .crash_shutdown = amd_iommu_suspend,
+    .crash_shutdown = amd_iommu_crash_shutdown,
     .dump_p2m_table = amd_dump_p2m_table,
 };
diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c
index 62da087..36c2c83 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -2540,7 +2540,7 @@  static int force_stage = 2;
  */
 static u32 platform_features = ARM_SMMU_FEAT_COHERENT_WALK;
 
-static void arm_smmu_iotlb_flush_all(struct domain *d)
+static int arm_smmu_iotlb_flush_all(struct domain *d)
 {
 	struct arm_smmu_xen_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
 	struct iommu_domain *cfg;
@@ -2557,13 +2557,15 @@  static void arm_smmu_iotlb_flush_all(struct domain *d)
 		arm_smmu_tlb_inv_context(cfg->priv);
 	}
 	spin_unlock(&smmu_domain->lock);
+
+    return 0;
 }
 
-static void arm_smmu_iotlb_flush(struct domain *d, unsigned long gfn,
-                                 unsigned int page_count)
+static int arm_smmu_iotlb_flush(struct domain *d, unsigned long gfn,
+                                unsigned int page_count)
 {
     /* ARM SMMU v1 doesn't have flush by VMA and VMID */
-    arm_smmu_iotlb_flush_all(d);
+    return arm_smmu_iotlb_flush_all(d);
 }
 
 static struct iommu_domain *arm_smmu_get_domain(struct domain *d,
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index d44fc39..98b934b 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -392,11 +392,14 @@  int iommu_do_domctl(
     return ret;
 }
 
-void iommu_suspend()
+int iommu_suspend()
 {
     const struct iommu_ops *ops = iommu_get_ops();
+
     if ( iommu_enabled )
-        ops->suspend();
+        return ops->suspend();
+
+    return 0;
 }
 
 void iommu_share_p2m_table(struct domain* d)
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index 930661a..a821306 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -542,11 +542,12 @@  static int iommu_flush_iotlb_psi(
     return status;
 }
 
-static void iommu_flush_all(void)
+static int iommu_flush_all(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int flush_dev_iotlb;
+    int rc = 0;
 
     flush_all_cache();
     for_each_drhd_unit ( drhd )
@@ -554,8 +555,13 @@  static void iommu_flush_all(void)
         iommu = drhd->iommu;
         iommu_flush_context_global(iommu, 0);
         flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
-        iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+        rc = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+
+        if ( rc )
+            break;
     }
+
+    return rc;
 }
 
 static int iommu_flush_iotlb(struct domain *d, unsigned long gfn,
@@ -2421,16 +2427,20 @@  static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn)
 }
 
 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
-static void vtd_suspend(void)
+static int vtd_suspend(void)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     u32    i;
+    int rc;
 
     if ( !iommu_enabled )
-        return;
+        return 0;
 
-    iommu_flush_all();
+    rc = iommu_flush_all();
+
+    if ( rc )
+        return rc;
 
     for_each_drhd_unit ( drhd )
     {
@@ -2459,6 +2469,8 @@  static void vtd_suspend(void)
         if ( !iommu_intremap && iommu_qinval )
             disable_qinval(iommu);
     }
+
+    return 0;
 }
 
 static void vtd_crash_shutdown(void)
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 9c51172..f540fc8 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -119,7 +119,7 @@  extern unsigned long *shared_intremap_inuse;
 
 /* power management support */
 void amd_iommu_resume(void);
-void amd_iommu_suspend(void);
+int amd_iommu_suspend(void);
 void amd_iommu_crash_shutdown(void);
 
 /* guest iommu support */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index ca1c409..8710dfe 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -157,7 +157,7 @@  struct iommu_ops {
     unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
     int (*setup_hpet_msi)(struct msi_desc *);
 #endif /* CONFIG_X86 */
-    void (*suspend)(void);
+    int (*suspend)(void);
     void (*resume)(void);
     void (*share_p2m)(struct domain *d);
     void (*crash_shutdown)(void);
@@ -167,7 +167,7 @@  struct iommu_ops {
     void (*dump_p2m_table)(struct domain *d);
 };
 
-void iommu_suspend(void);
+int iommu_suspend(void);
 void iommu_resume(void);
 void iommu_crash_shutdown(void);
 int iommu_get_reserved_device_memory(iommu_grdm_t *, void *);