diff mbox

[v2,03/11] IOMMU/MMU: enhance the call trees of IOMMU unmapping and mapping

Message ID 1460988011-17758-4-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu April 18, 2016, 2 p.m. UTC
If IOMMU mapping and unmapping failed, the domain (with the exception of
the hardware domain) is crashed, treated as a fatal error. Rollback can
be lighter weight.

For the hardware domain, we do things on a best effort basis. When rollback
is not feasible (in early initialization phase or trade-off of complexity),
at least, unmap upon maps error or then throw out error message.

IOMMU unmapping should perhaps continue despite an error, in an attempt to
do best effort cleanup.

Signed-off-by: Quan Xu <quan.xu@intel.com>

CC: Keir Fraser <keir@xen.org>
CC: Jan Beulich <jbeulich@suse.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: George Dunlap <george.dunlap@eu.citrix.com>
---
 xen/arch/x86/mm.c               | 13 ++++++++-----
 xen/arch/x86/mm/p2m-ept.c       | 25 ++++++++++++++++++++++---
 xen/arch/x86/mm/p2m-pt.c        | 22 ++++++++++++++++++----
 xen/arch/x86/mm/p2m.c           | 11 +++++++++--
 xen/drivers/passthrough/iommu.c | 14 +++++++++++++-
 5 files changed, 70 insertions(+), 15 deletions(-)

Comments

Tian, Kevin April 19, 2016, 6:44 a.m. UTC | #1
> From: Xu, Quan
> Sent: Monday, April 18, 2016 10:00 PM
> 
> If IOMMU mapping and unmapping failed, the domain (with the exception of
> the hardware domain) is crashed, treated as a fatal error. Rollback can
> be lighter weight.

What do you mean by 'lighter weight"? Please clarify it.

> 
> For the hardware domain, we do things on a best effort basis. When rollback
> is not feasible (in early initialization phase or trade-off of complexity),
> at least, unmap upon maps error or then throw out error message.

remove 'or'. Based on next sentence, is above specific for IOMMU mapping?

> 
> IOMMU unmapping should perhaps continue despite an error, in an attempt to
> do best effort cleanup.
> 
> Signed-off-by: Quan Xu <quan.xu@intel.com>
> 
> CC: Keir Fraser <keir@xen.org>
> CC: Jan Beulich <jbeulich@suse.com>
> CC: Andrew Cooper <andrew.cooper3@citrix.com>
> CC: Jun Nakajima <jun.nakajima@intel.com>
> CC: Kevin Tian <kevin.tian@intel.com>
> CC: George Dunlap <george.dunlap@eu.citrix.com>
> ---
>  xen/arch/x86/mm.c               | 13 ++++++++-----
>  xen/arch/x86/mm/p2m-ept.c       | 25 ++++++++++++++++++++++---
>  xen/arch/x86/mm/p2m-pt.c        | 22 ++++++++++++++++++----
>  xen/arch/x86/mm/p2m.c           | 11 +++++++++--
>  xen/drivers/passthrough/iommu.c | 14 +++++++++++++-
>  5 files changed, 70 insertions(+), 15 deletions(-)
> 
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index c997b53..5c4fb58 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info *page, unsigned
> long type,
>                             int preemptible)
>  {
>      unsigned long nx, x, y = page->u.inuse.type_info;
> -    int rc = 0;
> +    int rc = 0, ret = 0;
> 
>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
> 
> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info *page, unsigned
> long type,
>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
>          {
>              if ( (x & PGT_type_mask) == PGT_writable_page )
> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>              else if ( type == PGT_writable_page )
> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> -                               page_to_mfn(page),
> -                               IOMMUF_readable|IOMMUF_writable);
> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> +                                     page_to_mfn(page),
> +                                     IOMMUF_readable|IOMMUF_writable);
>          }
>      }
> 
> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info *page, unsigned
> long type,
>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
>          put_page(page);
> 
> +    if ( unlikely(ret) )
> +        rc = ret;
> +
>      return rc;
>  }
> 
> diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> index 3cb6868..22c8d17 100644
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -665,7 +665,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn,
> mfn_t mfn,
>      ept_entry_t *table, *ept_entry = NULL;
>      unsigned long gfn_remainder = gfn;
>      unsigned int i, target = order / EPT_TABLE_ORDER;
> -    int ret, rc = 0;
> +    int ret, err = 0, rc = 0;
>      bool_t direct_mmio = (p2mt == p2m_mmio_direct);
>      uint8_t ipat = 0;
>      bool_t need_modify_vtd_table = 1;
> @@ -830,10 +830,26 @@ out:
>          {
>              if ( iommu_flags )
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +                {
> +                    ret = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +
> +                    if ( unlikely(ret) )
> +                    {
> +                        while (i)
> +                            iommu_unmap_page(d, gfn + --i);

How about below?

while (i-- >= 0)
	iommu_unmap_page(d, gfn + i);

> +
> +                        err = ret;
> +                        break;
> +                    }
> +                }
>              else
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_unmap_page(d, gfn + i);
> +                {
> +                    ret = iommu_unmap_page(d, gfn + i);
> +
> +                    if ( unlikely(ret) )
> +                        err = ret;
> +                }
>          }
>      }
> 
> @@ -849,6 +865,9 @@ out:
>      if ( rc == 0 && p2m_is_hostp2m(p2m) )
>          p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
> 
> +    if ( unlikely(err) )
> +        rc = err;
> +

not sure we need three return values here: rc, ret, err...

>      return rc;
>  }
> 
> diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
> index 3d80612..db4257a 100644
> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -498,7 +498,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn,
> mfn_t mfn,
>      l1_pgentry_t intermediate_entry = l1e_empty();
>      l2_pgentry_t l2e_content;
>      l3_pgentry_t l3e_content;
> -    int rc;
> +    int rc, ret;
>      unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt);
>      /*
>       * old_mfn and iommu_old_flags control possible flush/update needs on the
> @@ -680,11 +680,25 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long
> gfn, mfn_t mfn,
>          }
>          else if ( iommu_pte_flags )
>              for ( i = 0; i < (1UL << page_order); i++ )
> -                iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
> -                               iommu_pte_flags);
> +            {
> +                ret = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
> +                                     iommu_pte_flags);
> +                if ( unlikely(ret) )
> +                {
> +                    while (i)
> +                        iommu_unmap_page(p2m->domain, gfn + --i);
> +
> +                    rc = ret;
> +                }
> +            }
>          else
>              for ( i = 0; i < (1UL << page_order); i++ )
> -                iommu_unmap_page(p2m->domain, gfn + i);
> +            {
> +                ret = iommu_unmap_page(p2m->domain, gfn + i);
> +
> +                if ( unlikely(ret) )
> +                    rc = ret;
> +            }
>      }
> 
>      /*
> diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> index b3fce1b..98450a4 100644
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -610,13 +610,20 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned long
> gfn, unsigned long mfn,
>      mfn_t mfn_return;
>      p2m_type_t t;
>      p2m_access_t a;
> +    int ret = 0, rc;
> 
>      if ( !paging_mode_translate(p2m->domain) )
>      {
>          if ( need_iommu(p2m->domain) )
>              for ( i = 0; i < (1 << page_order); i++ )
> -                iommu_unmap_page(p2m->domain, mfn + i);
> -        return 0;
> +            {
> +               rc = iommu_unmap_page(p2m->domain, mfn + i);
> +
> +               if ( unlikely(rc) )
> +                   ret = rc;
> +            }
> +
> +        return ret;
>      }
> 
>      ASSERT(gfn_locked_by_me(p2m, gfn));
> diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
> index 850101b..c351209 100644
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -172,6 +172,8 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
>      {
>          struct page_info *page;
>          unsigned int i = 0;
> +        int ret, rc = 0;
> +
>          page_list_for_each ( page, &d->page_list )
>          {
>              unsigned long mfn = page_to_mfn(page);
> @@ -182,10 +184,20 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
>                   ((page->u.inuse.type_info & PGT_type_mask)
>                    == PGT_writable_page) )
>                  mapping |= IOMMUF_writable;
> -            hd->platform_ops->map_page(d, gfn, mfn, mapping);
> +
> +            ret = hd->platform_ops->map_page(d, gfn, mfn, mapping);
> +
> +            if ( unlikely(ret) )
> +                rc = ret;
> +
>              if ( !(i++ & 0xfffff) )
>                  process_pending_softirqs();
>          }
> +
> +        if ( rc )
> +            printk(XENLOG_G_ERR
> +                   "dom%d: IOMMU mapping is failed for hardware domain.",
> +                   d->domain_id);
>      }
> 
>      return hd->platform_ops->hwdom_init(d);
> --
> 1.9.1
Quan Xu April 20, 2016, 5:27 a.m. UTC | #2
On  April 19, 2016 2:44pm, Tian, Kevin <kevin.tian@intel.com> wrote:
> > From: Xu, Quan
> > Sent: Monday, April 18, 2016 10:00 PM
> >
> > If IOMMU mapping and unmapping failed, the domain (with the exception
> > of the hardware domain) is crashed, treated as a fatal error. Rollback
> > can be lighter weight.
> 
> What do you mean by 'lighter weight"? Please clarify it.
> 
> >
> > For the hardware domain, we do things on a best effort basis. When
> > rollback is not feasible (in early initialization phase or trade-off
> > of complexity), at least, unmap upon maps error or then throw out error
> message.
> 
> remove 'or'. Based on next sentence, is above specific for IOMMU mapping?
> 
> >
> > IOMMU unmapping should perhaps continue despite an error, in an
> > attempt to do best effort cleanup.
> >



Could I enhance the commit log as below:
"""
If IOMMU mapping and unmapping failed, the domain (with the exception of the hardware domain) is crashed,
treated as a fatal error. Rollback can be lighter weight (at least errors need to be propagated).

IOMMU mapping breaks for an error, unmapping upon maps, throwing out error message and then reporting
the error up the call trees. When rollback is not feasible (in early initialization phase or trade-off of complexity)
for the hardware domain, we do things on a best effort basis, only throwing out error message.

IOMMU unmapping should perhaps continue despite an error, in an attempt to do best effort cleanup.
"""


I am still not sure whether we really need throw out error message for each IOMMU mapping or not.
If yes, I will throw out error message for each IOMMU mapping in next v3.


> > Signed-off-by: Quan Xu <quan.xu@intel.com>
> >
> > CC: Keir Fraser <keir@xen.org>
> > CC: Jan Beulich <jbeulich@suse.com>
> > CC: Andrew Cooper <andrew.cooper3@citrix.com>
> > CC: Jun Nakajima <jun.nakajima@intel.com>
> > CC: Kevin Tian <kevin.tian@intel.com>
> > CC: George Dunlap <george.dunlap@eu.citrix.com>
> > ---
> >  xen/arch/x86/mm.c               | 13 ++++++++-----
> >  xen/arch/x86/mm/p2m-ept.c       | 25 ++++++++++++++++++++++---
> >  xen/arch/x86/mm/p2m-pt.c        | 22 ++++++++++++++++++----
> >  xen/arch/x86/mm/p2m.c           | 11 +++++++++--
> >  xen/drivers/passthrough/iommu.c | 14 +++++++++++++-
> >  5 files changed, 70 insertions(+), 15 deletions(-)
> >
> > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index
> > c997b53..5c4fb58 100644
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info
> > *page, unsigned long type,
> >                             int preemptible)  {
> >      unsigned long nx, x, y = page->u.inuse.type_info;
> > -    int rc = 0;
> > +    int rc = 0, ret = 0;
> >
> >      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
> >
> > @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info
> > *page, unsigned long type,
> >          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
> >          {
> >              if ( (x & PGT_type_mask) == PGT_writable_page )
> > -                iommu_unmap_page(d, mfn_to_gmfn(d,
> page_to_mfn(page)));
> > +                ret = iommu_unmap_page(d, mfn_to_gmfn(d,
> > + page_to_mfn(page)));
> >              else if ( type == PGT_writable_page )
> > -                iommu_map_page(d, mfn_to_gmfn(d,
> page_to_mfn(page)),
> > -                               page_to_mfn(page),
> > -
> IOMMUF_readable|IOMMUF_writable);
> > +                ret = iommu_map_page(d, mfn_to_gmfn(d,
> page_to_mfn(page)),
> > +                                     page_to_mfn(page),
> > +
> > + IOMMUF_readable|IOMMUF_writable);
> >          }
> >      }
> >
> > @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info
> > *page, unsigned long type,
> >      if ( (x & PGT_partial) && !(nx & PGT_partial) )
> >          put_page(page);
> >
> > +    if ( unlikely(ret) )
> > +        rc = ret;
> > +
> >      return rc;
> >  }
> >
> > diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> > index 3cb6868..22c8d17 100644
> > --- a/xen/arch/x86/mm/p2m-ept.c
> > +++ b/xen/arch/x86/mm/p2m-ept.c
> > @@ -665,7 +665,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned
> > long gfn, mfn_t mfn,
> >      ept_entry_t *table, *ept_entry = NULL;
> >      unsigned long gfn_remainder = gfn;
> >      unsigned int i, target = order / EPT_TABLE_ORDER;
> > -    int ret, rc = 0;
> > +    int ret, err = 0, rc = 0;
> >      bool_t direct_mmio = (p2mt == p2m_mmio_direct);
> >      uint8_t ipat = 0;
> >      bool_t need_modify_vtd_table = 1; @@ -830,10 +830,26 @@ out:
> >          {
> >              if ( iommu_flags )
> >                  for ( i = 0; i < (1 << order); i++ )
> > -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i,
> iommu_flags);
> > +                {
> > +                    ret = iommu_map_page(d, gfn + i, mfn_x(mfn) + i,
> > + iommu_flags);
> > +
> > +                    if ( unlikely(ret) )
> > +                    {
> > +                        while (i)
> > +                            iommu_unmap_page(d, gfn + --i);
> 
> How about below?
> 
> while (i-- >= 0)
> 	iommu_unmap_page(d, gfn + i);
> 

this modification is based on discussion rooted at http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01779.html
wait for Jan's decision.


> > +
> > +                        err = ret;
> > +                        break;
> > +                    }
> > +                }
> >              else
> >                  for ( i = 0; i < (1 << order); i++ )
> > -                    iommu_unmap_page(d, gfn + i);
> > +                {
> > +                    ret = iommu_unmap_page(d, gfn + i);
> > +
> > +                    if ( unlikely(ret) )
> > +                        err = ret;
> > +                }
> >          }
> >      }
> >
> > @@ -849,6 +865,9 @@ out:
> >      if ( rc == 0 && p2m_is_hostp2m(p2m) )
> >          p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt,
> > p2ma);
> >
> > +    if ( unlikely(err) )
> > +        rc = err;
> > +
> 
> not sure we need three return values here: rc, ret, err...
> 

I am afraid that we need three return values here.
As similar as George mentioned, we need to make sure that the altp2m gets updated when the hostp2m is updated, even if the IOMMU mapping or unmapping fail.
if altp2m is enabled, rc cannot be non-zero here due to IOMMU mapping or unmapping, so I need another return value.

Quan


> >      return rc;
> >  }
> >
> > diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index
> > 3d80612..db4257a 100644
> > --- a/xen/arch/x86/mm/p2m-pt.c
> > +++ b/xen/arch/x86/mm/p2m-pt.c
> > @@ -498,7 +498,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
> unsigned
> > long gfn, mfn_t mfn,
> >      l1_pgentry_t intermediate_entry = l1e_empty();
> >      l2_pgentry_t l2e_content;
> >      l3_pgentry_t l3e_content;
> > -    int rc;
> > +    int rc, ret;
> >      unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt);
> >      /*
> >       * old_mfn and iommu_old_flags control possible flush/update
> > needs on the @@ -680,11 +680,25 @@ p2m_pt_set_entry(struct
> p2m_domain
> > *p2m, unsigned long gfn, mfn_t mfn,
> >          }
> >          else if ( iommu_pte_flags )
> >              for ( i = 0; i < (1UL << page_order); i++ )
> > -                iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
> > -                               iommu_pte_flags);
> > +            {
> > +                ret = iommu_map_page(p2m->domain, gfn + i,
> mfn_x(mfn) + i,
> > +                                     iommu_pte_flags);
> > +                if ( unlikely(ret) )
> > +                {
> > +                    while (i)
> > +                        iommu_unmap_page(p2m->domain, gfn + --i);
> > +
> > +                    rc = ret;
> > +                }
> > +            }
> >          else
> >              for ( i = 0; i < (1UL << page_order); i++ )
> > -                iommu_unmap_page(p2m->domain, gfn + i);
> > +            {
> > +                ret = iommu_unmap_page(p2m->domain, gfn + i);
> > +
> > +                if ( unlikely(ret) )
> > +                    rc = ret;
> > +            }
> >      }
> >
> >      /*
> > diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index
> > b3fce1b..98450a4 100644
> > --- a/xen/arch/x86/mm/p2m.c
> > +++ b/xen/arch/x86/mm/p2m.c
> > @@ -610,13 +610,20 @@ p2m_remove_page(struct p2m_domain *p2m,
> unsigned
> > long gfn, unsigned long mfn,
> >      mfn_t mfn_return;
> >      p2m_type_t t;
> >      p2m_access_t a;
> > +    int ret = 0, rc;
> >
> >      if ( !paging_mode_translate(p2m->domain) )
> >      {
> >          if ( need_iommu(p2m->domain) )
> >              for ( i = 0; i < (1 << page_order); i++ )
> > -                iommu_unmap_page(p2m->domain, mfn + i);
> > -        return 0;
> > +            {
> > +               rc = iommu_unmap_page(p2m->domain, mfn + i);
> > +
> > +               if ( unlikely(rc) )
> > +                   ret = rc;
> > +            }
> > +
> > +        return ret;
> >      }
> >
> >      ASSERT(gfn_locked_by_me(p2m, gfn)); diff --git
> > a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
> > index 850101b..c351209 100644
> > --- a/xen/drivers/passthrough/iommu.c
> > +++ b/xen/drivers/passthrough/iommu.c
> > @@ -172,6 +172,8 @@ void __hwdom_init iommu_hwdom_init(struct
> domain *d)
> >      {
> >          struct page_info *page;
> >          unsigned int i = 0;
> > +        int ret, rc = 0;
> > +
> >          page_list_for_each ( page, &d->page_list )
> >          {
> >              unsigned long mfn = page_to_mfn(page); @@ -182,10
> +184,20
> > @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
> >                   ((page->u.inuse.type_info & PGT_type_mask)
> >                    == PGT_writable_page) )
> >                  mapping |= IOMMUF_writable;
> > -            hd->platform_ops->map_page(d, gfn, mfn, mapping);
> > +
> > +            ret = hd->platform_ops->map_page(d, gfn, mfn, mapping);
> > +
> > +            if ( unlikely(ret) )
> > +                rc = ret;
> > +
> >              if ( !(i++ & 0xfffff) )
> >                  process_pending_softirqs();
> >          }
> > +
> > +        if ( rc )
> > +            printk(XENLOG_G_ERR
> > +                   "dom%d: IOMMU mapping is failed for hardware
> domain.",
> > +                   d->domain_id);
> >      }
> >
> >      return hd->platform_ops->hwdom_init(d);
> > --
> > 1.9.1
Tian, Kevin April 20, 2016, 5:44 a.m. UTC | #3
> From: Xu, Quan
> Sent: Wednesday, April 20, 2016 1:27 PM
> 
> On  April 19, 2016 2:44pm, Tian, Kevin <kevin.tian@intel.com> wrote:
> > > From: Xu, Quan
> > > Sent: Monday, April 18, 2016 10:00 PM
> > >
> > > If IOMMU mapping and unmapping failed, the domain (with the exception
> > > of the hardware domain) is crashed, treated as a fatal error. Rollback
> > > can be lighter weight.
> >
> > What do you mean by 'lighter weight"? Please clarify it.
> >
> > >
> > > For the hardware domain, we do things on a best effort basis. When
> > > rollback is not feasible (in early initialization phase or trade-off
> > > of complexity), at least, unmap upon maps error or then throw out error
> > message.
> >
> > remove 'or'. Based on next sentence, is above specific for IOMMU mapping?
> >
> > >
> > > IOMMU unmapping should perhaps continue despite an error, in an
> > > attempt to do best effort cleanup.
> > >
> 
> 
> 
> Could I enhance the commit log as below:
> """
> If IOMMU mapping and unmapping failed, the domain (with the exception of the hardware
> domain) is crashed,
> treated as a fatal error. Rollback can be lighter weight (at least errors need to be
> propagated).

Still not clear about 'lighter weight'. Then what is the 'heavier weight' side then?
Isn't "at least errors need to be propagated" exactly what 'rollback' means?

> 
> IOMMU mapping breaks for an error, unmapping upon maps, throwing out error message
> and then reporting
> the error up the call trees. When rollback is not feasible (in early initialization phase or
> trade-off of complexity)
> for the hardware domain, we do things on a best effort basis, only throwing out error
> message.

If above elaborates what earlier 'lighter weight" means, then please just
say a best-effort rollack.

> 
> IOMMU unmapping should perhaps continue despite an error, in an attempt to do best
> effort cleanup.
> """
> 
> 
> I am still not sure whether we really need throw out error message for each IOMMU
> mapping or not.
> If yes, I will throw out error message for each IOMMU mapping in next v3.

I'm not sure about your question here. Have to judge based on specific case.

> > >          {
> > >              if ( iommu_flags )
> > >                  for ( i = 0; i < (1 << order); i++ )
> > > -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i,
> > iommu_flags);
> > > +                {
> > > +                    ret = iommu_map_page(d, gfn + i, mfn_x(mfn) + i,
> > > + iommu_flags);
> > > +
> > > +                    if ( unlikely(ret) )
> > > +                    {
> > > +                        while (i)
> > > +                            iommu_unmap_page(d, gfn + --i);
> >
> > How about below?
> >
> > while (i-- >= 0)
> > 	iommu_unmap_page(d, gfn + i);
> >
> 
> this modification is based on discussion rooted at
> http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01779.html
> wait for Jan's decision.

Either way is OK. 'gfn + --I' just looks not very readable to me.
Jan Beulich April 20, 2016, 6:11 a.m. UTC | #4
>>> "Xu, Quan" <quan.xu@intel.com> 04/20/16 7:29 AM >>>
>I am still not sure whether we really need throw out error message for each IOMMU mapping or not.
>If yes, I will throw out error message for each IOMMU mapping in next v3.

Ideally not, if it's a batch that it failing, The question just is whether at the point you
issue the error message you can know another got already emitted. In no case
must this lead to spamming of the console originating from an unprivileged domain.

>> > +                    if ( unlikely(ret) )
>> > +                    {
>> > +                        while (i)
>> > +                            iommu_unmap_page(d, gfn + --i);
>> 
>> How about below?
>> 
>> while (i-- >= 0)
>> 	iommu_unmap_page(d, gfn + i);
> 
>this modification is based on discussion rooted at http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01779.html
>wait for Jan's decision.

But did you really _follow_ that discussion? The adjustment done by that patch
was specifically not deemed good, so the shape Kevin suggests is in line with
the outcome of that discussion (except that I'd suggest omitting the ">= 0", the
more that i at least ought to be unsigned here).

Jan
Quan Xu April 20, 2016, 6:26 a.m. UTC | #5
On April 20, 2016 2:12pm, <jbeulich@suse.com > wrote:
> >>> "Xu, Quan" <quan.xu@intel.com> 04/20/16 7:29 AM >>>
> Ideally not, if it's a batch that it failing, The question just is whether at the point
> you issue the error message you can know another got already emitted. In no
> case must this lead to spamming of the console originating from an unprivileged
> domain.
> 
> >> > +                    if ( unlikely(ret) )
> >> > +                    {
> >> > +                        while (i)
> >> > +                            iommu_unmap_page(d, gfn + --i);
> >>
> >> How about below?
> >>
> >> while (i-- >= 0)
> >> 	iommu_unmap_page(d, gfn + i);
> >
> >this modification is based on discussion rooted at
> >http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01779.ht
> >ml
> >wait for Jan's decision.
> 
> But did you really _follow_ that discussion? The adjustment done by that patch
> was specifically not deemed good, so the shape Kevin suggests is in line with the
> outcome of that discussion (except that I'd suggest omitting the ">= 0", the
> more that i at least ought to be unsigned here).
> 

Based on your suggestions, I will fix it as below:

+while ( i-- )
+    iommu_unmap_page(d, gfn + i);

Quan
Jan Beulich April 25, 2016, 9:50 a.m. UTC | #6
>>> On 18.04.16 at 16:00, <quan.xu@intel.com> wrote:
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
>                             int preemptible)
>  {
>      unsigned long nx, x, y = page->u.inuse.type_info;
> -    int rc = 0;
> +    int rc = 0, ret = 0;
>  
>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
>  
> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info *page, unsigned long type,
>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
>          {
>              if ( (x & PGT_type_mask) == PGT_writable_page )
> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>              else if ( type == PGT_writable_page )
> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> -                               page_to_mfn(page),
> -                               IOMMUF_readable|IOMMUF_writable);
> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> +                                     page_to_mfn(page),
> +                                     IOMMUF_readable|IOMMUF_writable);
>          }
>      }
>  
> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info *page, unsigned long type,
>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
>          put_page(page);
>  
> +    if ( unlikely(ret) )
> +        rc = ret;

Please don't overwrite the more relevant "rc" value in situations like
this in case that is already non-zero. In this specific case you can
actually get away without introducing a second error code variable,
since the only place rc gets altered is between the two hunks above,
and overwriting the rc value from map/unmap is then exactly what
we want (but I'd much appreciate if you added a comment to this
effect).

> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -665,7 +665,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>      ept_entry_t *table, *ept_entry = NULL;
>      unsigned long gfn_remainder = gfn;
>      unsigned int i, target = order / EPT_TABLE_ORDER;
> -    int ret, rc = 0;
> +    int ret, err = 0, rc = 0;

Just like Kevin, and despite your reply to him I do not see the need
fir the 3rd error indicator variable here:

> @@ -830,10 +830,26 @@ out:
>          {
>              if ( iommu_flags )
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +                {
> +                    ret = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +
> +                    if ( unlikely(ret) )
> +                    {
> +                        while (i)

while ( i-- )

> +                            iommu_unmap_page(d, gfn + --i);
> +
> +                        err = ret;
> +                        break;
> +                    }
> +                }
>              else
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_unmap_page(d, gfn + i);
> +                {
> +                    ret = iommu_unmap_page(d, gfn + i);
> +
> +                    if ( unlikely(ret) )
> +                        err = ret;
> +                }
>          }

You can simply utilize that rc is zero upon entering this if(), clearing
it back to zero here.

> @@ -849,6 +865,9 @@ out:
>      if ( rc == 0 && p2m_is_hostp2m(p2m) )
>          p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
>  
> +    if ( unlikely(err) )
> +        rc = err;

Same comment as for mm.c (and also for mm/p2m-pt.c and
mm/p2m.c below, albeit the impact there is limited - the only
difference is whether to return first or last error encountered, but
I'd still prefer the first one to be used, as for a crashed domain later
errors may end up being kind of meaningless wrt what caused the
domain to get crashed).

> @@ -182,10 +184,20 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
>                   ((page->u.inuse.type_info & PGT_type_mask)
>                    == PGT_writable_page) )
>                  mapping |= IOMMUF_writable;
> -            hd->platform_ops->map_page(d, gfn, mfn, mapping);
> +
> +            ret = hd->platform_ops->map_page(d, gfn, mfn, mapping);
> +
> +            if ( unlikely(ret) )
> +                rc = ret;
> +
>              if ( !(i++ & 0xfffff) )
>                  process_pending_softirqs();
>          }
> +
> +        if ( rc )
> +            printk(XENLOG_G_ERR
> +                   "dom%d: IOMMU mapping is failed for hardware domain.",
> +                   d->domain_id);

Leaving the admin / developer with no indication of at least one
specific case of a page where a failure occurred. As said in various
places before: Log messages should provide useful information for
at least getting started at investigating the issue, without first of
all having to further instrument the code.

In any event the "is" should be dropped from the text.

Jan
George Dunlap April 27, 2016, 3:48 p.m. UTC | #7
On 18/04/16 15:00, Quan Xu wrote:
> If IOMMU mapping and unmapping failed, the domain (with the exception of
> the hardware domain) is crashed, treated as a fatal error. Rollback can
> be lighter weight.
> 
> For the hardware domain, we do things on a best effort basis. When rollback
> is not feasible (in early initialization phase or trade-off of complexity),
> at least, unmap upon maps error or then throw out error message.
> 
> IOMMU unmapping should perhaps continue despite an error, in an attempt to
> do best effort cleanup.
> 
> Signed-off-by: Quan Xu <quan.xu@intel.com>

FWIW, looking through this I think Jan and Kevin covered everything I
would have brought up.

 -George
Quan Xu April 28, 2016, 1:18 a.m. UTC | #8
On April 27, 2016 11:48 PM, George Dunlap <george.dunlap@citrix.com> wrote:
> On 18/04/16 15:00, Quan Xu wrote:
> > If IOMMU mapping and unmapping failed, the domain (with the exception
> > of the hardware domain) is crashed, treated as a fatal error. Rollback
> > can be lighter weight.
> >
> > For the hardware domain, we do things on a best effort basis. When
> > rollback is not feasible (in early initialization phase or trade-off
> > of complexity), at least, unmap upon maps error or then throw out error
> message.
> >
> > IOMMU unmapping should perhaps continue despite an error, in an
> > attempt to do best effort cleanup.
> >
> > Signed-off-by: Quan Xu <quan.xu@intel.com>
> 
> FWIW, looking through this I think Jan and Kevin covered everything I would
> have brought up.
> 
George, thanks for your attention. Your comments as always are greatly appreciated too. :)
Quan
diff mbox

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index c997b53..5c4fb58 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2467,7 +2467,7 @@  static int __get_page_type(struct page_info *page, unsigned long type,
                            int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
-    int rc = 0;
+    int rc = 0, ret = 0;
 
     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 
@@ -2578,11 +2578,11 @@  static int __get_page_type(struct page_info *page, unsigned long type,
         if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
         {
             if ( (x & PGT_type_mask) == PGT_writable_page )
-                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
+                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
             else if ( type == PGT_writable_page )
-                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
-                               page_to_mfn(page),
-                               IOMMUF_readable|IOMMUF_writable);
+                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
+                                     page_to_mfn(page),
+                                     IOMMUF_readable|IOMMUF_writable);
         }
     }
 
@@ -2599,6 +2599,9 @@  static int __get_page_type(struct page_info *page, unsigned long type,
     if ( (x & PGT_partial) && !(nx & PGT_partial) )
         put_page(page);
 
+    if ( unlikely(ret) )
+        rc = ret;
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 3cb6868..22c8d17 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -665,7 +665,7 @@  ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     ept_entry_t *table, *ept_entry = NULL;
     unsigned long gfn_remainder = gfn;
     unsigned int i, target = order / EPT_TABLE_ORDER;
-    int ret, rc = 0;
+    int ret, err = 0, rc = 0;
     bool_t direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t ipat = 0;
     bool_t need_modify_vtd_table = 1;
@@ -830,10 +830,26 @@  out:
         {
             if ( iommu_flags )
                 for ( i = 0; i < (1 << order); i++ )
-                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
+                {
+                    ret = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
+
+                    if ( unlikely(ret) )
+                    {
+                        while (i)
+                            iommu_unmap_page(d, gfn + --i);
+
+                        err = ret;
+                        break;
+                    }
+                }
             else
                 for ( i = 0; i < (1 << order); i++ )
-                    iommu_unmap_page(d, gfn + i);
+                {
+                    ret = iommu_unmap_page(d, gfn + i);
+
+                    if ( unlikely(ret) )
+                        err = ret;
+                }
         }
     }
 
@@ -849,6 +865,9 @@  out:
     if ( rc == 0 && p2m_is_hostp2m(p2m) )
         p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
 
+    if ( unlikely(err) )
+        rc = err;
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 3d80612..db4257a 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -498,7 +498,7 @@  p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     l1_pgentry_t intermediate_entry = l1e_empty();
     l2_pgentry_t l2e_content;
     l3_pgentry_t l3e_content;
-    int rc;
+    int rc, ret;
     unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt);
     /*
      * old_mfn and iommu_old_flags control possible flush/update needs on the
@@ -680,11 +680,25 @@  p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         }
         else if ( iommu_pte_flags )
             for ( i = 0; i < (1UL << page_order); i++ )
-                iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
-                               iommu_pte_flags);
+            {
+                ret = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
+                                     iommu_pte_flags);
+                if ( unlikely(ret) )
+                {
+                    while (i)
+                        iommu_unmap_page(p2m->domain, gfn + --i);
+
+                    rc = ret;
+                }
+            }
         else
             for ( i = 0; i < (1UL << page_order); i++ )
-                iommu_unmap_page(p2m->domain, gfn + i);
+            {
+                ret = iommu_unmap_page(p2m->domain, gfn + i);
+
+                if ( unlikely(ret) )
+                    rc = ret;
+            }
     }
 
     /*
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index b3fce1b..98450a4 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -610,13 +610,20 @@  p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
     mfn_t mfn_return;
     p2m_type_t t;
     p2m_access_t a;
+    int ret = 0, rc;
 
     if ( !paging_mode_translate(p2m->domain) )
     {
         if ( need_iommu(p2m->domain) )
             for ( i = 0; i < (1 << page_order); i++ )
-                iommu_unmap_page(p2m->domain, mfn + i);
-        return 0;
+            {
+               rc = iommu_unmap_page(p2m->domain, mfn + i);
+
+               if ( unlikely(rc) )
+                   ret = rc;
+            }
+
+        return ret;
     }
 
     ASSERT(gfn_locked_by_me(p2m, gfn));
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 850101b..c351209 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -172,6 +172,8 @@  void __hwdom_init iommu_hwdom_init(struct domain *d)
     {
         struct page_info *page;
         unsigned int i = 0;
+        int ret, rc = 0;
+
         page_list_for_each ( page, &d->page_list )
         {
             unsigned long mfn = page_to_mfn(page);
@@ -182,10 +184,20 @@  void __hwdom_init iommu_hwdom_init(struct domain *d)
                  ((page->u.inuse.type_info & PGT_type_mask)
                   == PGT_writable_page) )
                 mapping |= IOMMUF_writable;
-            hd->platform_ops->map_page(d, gfn, mfn, mapping);
+
+            ret = hd->platform_ops->map_page(d, gfn, mfn, mapping);
+
+            if ( unlikely(ret) )
+                rc = ret;
+
             if ( !(i++ & 0xfffff) )
                 process_pending_softirqs();
         }
+
+        if ( rc )
+            printk(XENLOG_G_ERR
+                   "dom%d: IOMMU mapping is failed for hardware domain.",
+                   d->domain_id);
     }
 
     return hd->platform_ops->hwdom_init(d);