diff mbox

[v3,03/10] IOMMU/MMU: enhance the call trees of IOMMU unmapping and mapping

Message ID 1461921917-48394-4-git-send-email-quan.xu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Quan Xu April 29, 2016, 9:25 a.m. UTC
When IOMMU mapping is failed, we issue a best effort rollback, stopping
IOMMU mapping, unmapping the previous IOMMU maps and then reporting the
error up to the call trees. When rollback is not feasible (in early
initialization phase or trade-off of complexity) for the hardware domain,
we do things on a best effort basis, only throwing out an error message.

IOMMU unmapping should perhaps continue despite an error, in an attempt
to do best effort cleanup.

Signed-off-by: Quan Xu <quan.xu@intel.com>

CC: Keir Fraser <keir@xen.org>
CC: Jan Beulich <jbeulich@suse.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: George Dunlap <george.dunlap@eu.citrix.com>
---
 xen/arch/x86/mm.c               | 13 ++++++++-----
 xen/arch/x86/mm/p2m-ept.c       | 27 +++++++++++++++++++++++++--
 xen/arch/x86/mm/p2m-pt.c        | 24 ++++++++++++++++++++----
 xen/arch/x86/mm/p2m.c           | 11 +++++++++--
 xen/drivers/passthrough/iommu.c | 14 +++++++++++++-
 5 files changed, 75 insertions(+), 14 deletions(-)

Comments

Tian, Kevin May 4, 2016, 1:45 a.m. UTC | #1
> From: Xu, Quan
> Sent: Friday, April 29, 2016 5:25 PM
> 
> When IOMMU mapping is failed, we issue a best effort rollback, stopping
> IOMMU mapping, unmapping the previous IOMMU maps and then reporting the
> error up to the call trees. When rollback is not feasible (in early
> initialization phase or trade-off of complexity) for the hardware domain,
> we do things on a best effort basis, only throwing out an error message.
> 
> IOMMU unmapping should perhaps continue despite an error, in an attempt
> to do best effort cleanup.
> 
> Signed-off-by: Quan Xu <quan.xu@intel.com>
> 
> CC: Keir Fraser <keir@xen.org>
> CC: Jan Beulich <jbeulich@suse.com>
> CC: Andrew Cooper <andrew.cooper3@citrix.com>
> CC: Jun Nakajima <jun.nakajima@intel.com>
> CC: Kevin Tian <kevin.tian@intel.com>
> CC: George Dunlap <george.dunlap@eu.citrix.com>
> ---
>  xen/arch/x86/mm.c               | 13 ++++++++-----
>  xen/arch/x86/mm/p2m-ept.c       | 27 +++++++++++++++++++++++++--
>  xen/arch/x86/mm/p2m-pt.c        | 24 ++++++++++++++++++++----
>  xen/arch/x86/mm/p2m.c           | 11 +++++++++--
>  xen/drivers/passthrough/iommu.c | 14 +++++++++++++-
>  5 files changed, 75 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index a42097f..427097d 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info *page, unsigned
> long type,
>                             int preemptible)
>  {
>      unsigned long nx, x, y = page->u.inuse.type_info;
> -    int rc = 0;
> +    int rc = 0, ret = 0;
> 
>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
> 
> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info *page, unsigned
> long type,
>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
>          {
>              if ( (x & PGT_type_mask) == PGT_writable_page )
> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>              else if ( type == PGT_writable_page )
> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> -                               page_to_mfn(page),
> -                               IOMMUF_readable|IOMMUF_writable);
> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> +                                     page_to_mfn(page),
> +                                     IOMMUF_readable|IOMMUF_writable);
>          }
>      }
> 
> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info *page, unsigned
> long type,
>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
>          put_page(page);
> 
> +    if ( !rc )
> +        rc = ret;
> +
>      return rc;
>  }

I know there were quite some discussions before around above change (sorry I
didn't remember all of them). Just based on mental picture we should return
error where it firstly occurs. However above change looks favoring errors in
later "rc = alloc_page_type" over earlier iommu_map/unmap_page error. Is it
what we want? If there is a reason that we cannot return immediately upon
iommu_map/unmap, it's more reasonable to revert above check like below:

	if ( !ret )
		ret = rc;

	return ret;

> 
> diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> index 1ed5b47..df87944 100644
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -821,6 +821,8 @@ out:
>      if ( needs_sync )
>          ept_sync_domain(p2m);
> 
> +    ret = 0;
> +
>      /* For host p2m, may need to change VT-d page table.*/
>      if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) &&
>           need_modify_vtd_table )
> @@ -831,11 +833,29 @@ out:
>          {
>              if ( iommu_flags )
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +                {
> +                    rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +
> +                    if ( !ret && unlikely(rc) )

I think you should move check of ret before iommu_map_page, since we
should stop map against any error (either from best-effort unmap error side).

> +                    {
> +                        while ( i-- )
> +                            iommu_unmap_page(d, gfn + i);
> +
> +                        ret = rc;
> +                        break;
> +                    }
> +                }
>              else
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_unmap_page(d, gfn + i);
> +                {
> +                    rc = iommu_unmap_page(d, gfn + i);
> +
> +                    if ( !ret && unlikely(rc) )
> +                        ret = rc;
> +                }
>          }
> +
> +        rc = 0;
>      }
> 
Thanks
Kevin
Jan Beulich May 4, 2016, 8:40 a.m. UTC | #2
>>> On 04.05.16 at 03:45, <kevin.tian@intel.com> wrote:
>>  From: Xu, Quan
>> Sent: Friday, April 29, 2016 5:25 PM
>> --- a/xen/arch/x86/mm.c
>> +++ b/xen/arch/x86/mm.c
>> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info *page, unsigned
>> long type,
>>                             int preemptible)
>>  {
>>      unsigned long nx, x, y = page->u.inuse.type_info;
>> -    int rc = 0;
>> +    int rc = 0, ret = 0;
>> 
>>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
>> 
>> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info *page, unsigned
>> long type,
>>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
>>          {
>>              if ( (x & PGT_type_mask) == PGT_writable_page )
>> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>>              else if ( type == PGT_writable_page )
>> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
>> -                               page_to_mfn(page),
>> -                               IOMMUF_readable|IOMMUF_writable);
>> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
>> +                                     page_to_mfn(page),
>> +                                     IOMMUF_readable|IOMMUF_writable);
>>          }
>>      }
>> 
>> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info *page, unsigned
>> long type,
>>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
>>          put_page(page);
>> 
>> +    if ( !rc )
>> +        rc = ret;
>> +
>>      return rc;
>>  }
> 
> I know there were quite some discussions before around above change (sorry I
> didn't remember all of them). Just based on mental picture we should return
> error where it firstly occurs. However above change looks favoring errors in
> later "rc = alloc_page_type" over earlier iommu_map/unmap_page error. Is it
> what we want?

Yes, as that's the primary operation here.

> If there is a reason that we cannot return immediately upon
> iommu_map/unmap,

Since for Dom0 we don't call domain_crash(), we must not bypass
alloc_page_type() here. And even for DomU it would seem at least
fragile if we did - we better don't alter the refcounting behavior.

>> --- a/xen/arch/x86/mm/p2m-ept.c
>> +++ b/xen/arch/x86/mm/p2m-ept.c
>> @@ -821,6 +821,8 @@ out:
>>      if ( needs_sync )
>>          ept_sync_domain(p2m);
>> 
>> +    ret = 0;
>> +
>>      /* For host p2m, may need to change VT-d page table.*/
>>      if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) &&
>>           need_modify_vtd_table )
>> @@ -831,11 +833,29 @@ out:
>>          {
>>              if ( iommu_flags )
>>                  for ( i = 0; i < (1 << order); i++ )
>> -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
>> +                {
>> +                    rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
>> +
>> +                    if ( !ret && unlikely(rc) )
> 
> I think you should move check of ret before iommu_map_page, since we
> should stop map against any error (either from best-effort unmap error side).

Considering ret getting set to zero ahead of the loop plus ...

>> +                    {
>> +                        while ( i-- )
>> +                            iommu_unmap_page(d, gfn + i);
>> +
>> +                        ret = rc;
>> +                        break;

... this, it looks to me as if the checking of ret above is simply
unnecessary.

Jan
Quan Xu May 4, 2016, 9:13 a.m. UTC | #3
On May 04, 2016 4:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >>> On 04.05.16 at 03:45, <kevin.tian@intel.com> wrote:
> >>  From: Xu, Quan
> >> Sent: Friday, April 29, 2016 5:25 PM
> >> --- a/xen/arch/x86/mm.c
> >> +++ b/xen/arch/x86/mm.c
> >> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info
> >> *page, unsigned long type,
> >>                             int preemptible)  {
> >>      unsigned long nx, x, y = page->u.inuse.type_info;
> >> -    int rc = 0;
> >> +    int rc = 0, ret = 0;
> >>
> >>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
> >>
> >> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info
> >> *page, unsigned long type,
> >>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
> >>          {
> >>              if ( (x & PGT_type_mask) == PGT_writable_page )
> >> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
> >> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d,
> >> + page_to_mfn(page)));
> >>              else if ( type == PGT_writable_page )
> >> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> >> -                               page_to_mfn(page),
> >> -                               IOMMUF_readable|IOMMUF_writable);
> >> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> >> +                                     page_to_mfn(page),
> >> +
> >> + IOMMUF_readable|IOMMUF_writable);
> >>          }
> >>      }
> >>
> >> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info
> >> *page, unsigned long type,
> >>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
> >>          put_page(page);
> >>
> >> +    if ( !rc )
> >> +        rc = ret;
> >> +
> >>      return rc;
> >>  }
> >
> > I know there were quite some discussions before around above change
> > (sorry I didn't remember all of them). Just based on mental picture we
> > should return error where it firstly occurs. However above change
> > looks favoring errors in later "rc = alloc_page_type" over earlier
> > iommu_map/unmap_page error. Is it what we want?
> 
> Yes, as that's the primary operation here.
> 
> > If there is a reason that we cannot return immediately upon
> > iommu_map/unmap,
> 
> Since for Dom0 we don't call domain_crash(), we must not bypass
> alloc_page_type() here. And even for DomU it would seem at least fragile if we
> did - we better don't alter the refcounting behavior.
> 

A little bit confused.
Check it,  for this __get_page_type(), can I leave my modification as is?

> >> --- a/xen/arch/x86/mm/p2m-ept.c
> >> +++ b/xen/arch/x86/mm/p2m-ept.c
> >> @@ -821,6 +821,8 @@ out:
> >>      if ( needs_sync )
> >>          ept_sync_domain(p2m);
> >>
> >> +    ret = 0;
> >> +
> >>      /* For host p2m, may need to change VT-d page table.*/
> >>      if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) &&
> >>           need_modify_vtd_table )
> >> @@ -831,11 +833,29 @@ out:
> >>          {
> >>              if ( iommu_flags )
> >>                  for ( i = 0; i < (1 << order); i++ )
> >> -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> >> +                {
> >> +                    rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i,
> >> + iommu_flags);
> >> +
> >> +                    if ( !ret && unlikely(rc) )
> >
> > I think you should move check of ret before iommu_map_page, since we
> > should stop map against any error (either from best-effort unmap error
> side).
> 
> Considering ret getting set to zero ahead of the loop plus ...
> 
> >> +                    {
> >> +                        while ( i-- )
> >> +                            iommu_unmap_page(d, gfn + i);
> >> +
> >> +                        ret = rc;
> >> +                        break;
> 
> ... this, it looks to me as if the checking of ret above is simply unnecessary.
> 

Make sense. I'll drop ret check.

Quan
Jan Beulich May 4, 2016, 9:28 a.m. UTC | #4
>>> On 04.05.16 at 11:13, <quan.xu@intel.com> wrote:
> On May 04, 2016 4:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 04.05.16 at 03:45, <kevin.tian@intel.com> wrote:
>> >>  From: Xu, Quan
>> >> Sent: Friday, April 29, 2016 5:25 PM
>> >> --- a/xen/arch/x86/mm.c
>> >> +++ b/xen/arch/x86/mm.c
>> >> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info
>> >> *page, unsigned long type,
>> >>                             int preemptible)  {
>> >>      unsigned long nx, x, y = page->u.inuse.type_info;
>> >> -    int rc = 0;
>> >> +    int rc = 0, ret = 0;
>> >>
>> >>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
>> >>
>> >> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info
>> >> *page, unsigned long type,
>> >>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
>> >>          {
>> >>              if ( (x & PGT_type_mask) == PGT_writable_page )
>> >> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>> >> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d,
>> >> + page_to_mfn(page)));
>> >>              else if ( type == PGT_writable_page )
>> >> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
>> >> -                               page_to_mfn(page),
>> >> -                               IOMMUF_readable|IOMMUF_writable);
>> >> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
>> >> +                                     page_to_mfn(page),
>> >> +
>> >> + IOMMUF_readable|IOMMUF_writable);
>> >>          }
>> >>      }
>> >>
>> >> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info
>> >> *page, unsigned long type,
>> >>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
>> >>          put_page(page);
>> >>
>> >> +    if ( !rc )
>> >> +        rc = ret;
>> >> +
>> >>      return rc;
>> >>  }
>> >
>> > I know there were quite some discussions before around above change
>> > (sorry I didn't remember all of them). Just based on mental picture we
>> > should return error where it firstly occurs. However above change
>> > looks favoring errors in later "rc = alloc_page_type" over earlier
>> > iommu_map/unmap_page error. Is it what we want?
>> 
>> Yes, as that's the primary operation here.
>> 
>> > If there is a reason that we cannot return immediately upon
>> > iommu_map/unmap,
>> 
>> Since for Dom0 we don't call domain_crash(), we must not bypass
>> alloc_page_type() here. And even for DomU it would seem at least fragile if we
>> did - we better don't alter the refcounting behavior.
>> 
> 
> A little bit confused.
> Check it,  for this __get_page_type(), can I leave my modification as is?

Yes. I merely tried to explain the reason to Kevin.

Jan
George Dunlap May 4, 2016, 1:48 p.m. UTC | #5
On Fri, Apr 29, 2016 at 10:25 AM, Quan Xu <quan.xu@intel.com> wrote:
> When IOMMU mapping is failed, we issue a best effort rollback, stopping
> IOMMU mapping, unmapping the previous IOMMU maps and then reporting the
> error up to the call trees. When rollback is not feasible (in early
> initialization phase or trade-off of complexity) for the hardware domain,
> we do things on a best effort basis, only throwing out an error message.
>
> IOMMU unmapping should perhaps continue despite an error, in an attempt
> to do best effort cleanup.
>
> Signed-off-by: Quan Xu <quan.xu@intel.com>
>
> CC: Keir Fraser <keir@xen.org>
> CC: Jan Beulich <jbeulich@suse.com>
> CC: Andrew Cooper <andrew.cooper3@citrix.com>
> CC: Jun Nakajima <jun.nakajima@intel.com>
> CC: Kevin Tian <kevin.tian@intel.com>
> CC: George Dunlap <george.dunlap@eu.citrix.com>
> ---
>  xen/arch/x86/mm.c               | 13 ++++++++-----
>  xen/arch/x86/mm/p2m-ept.c       | 27 +++++++++++++++++++++++++--
>  xen/arch/x86/mm/p2m-pt.c        | 24 ++++++++++++++++++++----
>  xen/arch/x86/mm/p2m.c           | 11 +++++++++--
>  xen/drivers/passthrough/iommu.c | 14 +++++++++++++-
>  5 files changed, 75 insertions(+), 14 deletions(-)
>
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index a42097f..427097d 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
>                             int preemptible)
>  {
>      unsigned long nx, x, y = page->u.inuse.type_info;
> -    int rc = 0;
> +    int rc = 0, ret = 0;
>
>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
>
> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info *page, unsigned long type,
>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
>          {
>              if ( (x & PGT_type_mask) == PGT_writable_page )
> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
>              else if ( type == PGT_writable_page )
> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> -                               page_to_mfn(page),
> -                               IOMMUF_readable|IOMMUF_writable);
> +                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> +                                     page_to_mfn(page),
> +                                     IOMMUF_readable|IOMMUF_writable);
>          }
>      }
>
> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info *page, unsigned long type,
>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
>          put_page(page);
>
> +    if ( !rc )
> +        rc = ret;
> +
>      return rc;
>  }
>
> diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> index 1ed5b47..df87944 100644
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -821,6 +821,8 @@ out:
>      if ( needs_sync )
>          ept_sync_domain(p2m);
>
> +    ret = 0;
> +
>      /* For host p2m, may need to change VT-d page table.*/
>      if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) &&
>           need_modify_vtd_table )
> @@ -831,11 +833,29 @@ out:
>          {
>              if ( iommu_flags )
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +                {
> +                    rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
> +
> +                    if ( !ret && unlikely(rc) )
> +                    {
> +                        while ( i-- )
> +                            iommu_unmap_page(d, gfn + i);
> +
> +                        ret = rc;
> +                        break;
> +                    }
> +                }
>              else
>                  for ( i = 0; i < (1 << order); i++ )
> -                    iommu_unmap_page(d, gfn + i);
> +                {
> +                    rc = iommu_unmap_page(d, gfn + i);
> +
> +                    if ( !ret && unlikely(rc) )
> +                        ret = rc;
> +                }
>          }
> +
> +        rc = 0;
>      }

So the reason for doing this thing with setting ret=0, then using rc,
then setting rc to zero, is to make sure that the change is propagated
to the altp2m if necessary?

I'm not a fan of this sort of "implied signalling".  The check at the
altp2m conditional is meant to say, "everything went fine, go ahead
and propagate the change".  But with your changes, that's not what we
want anymore -- we want, "If the change actually made it into the
hostp2m, propagate it to the altp2m."

As such, I think it would be a lot clearer to just make a new boolean
variable, maybe "entry_written", which we initialize to false and then
set to true when the entry is actually written; and then change the
condition re the altp2m to "if ( entry_written && p2m_is_hostp2m(..)
)".

Then I think you could make the ret / rc use mirror what you do
elsewhere, where ret is used to store the return value of the function
call, and it's propagated to rc only if rc is not already set.

> @@ -680,11 +680,27 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>          }
>          else if ( iommu_pte_flags )
>              for ( i = 0; i < (1UL << page_order); i++ )
> -                iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
> -                               iommu_pte_flags);
> +            {
> +                ret = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
> +                                    iommu_pte_flags);
> +
> +                if ( !rc && unlikely(ret) )
> +                {
> +                    while ( i-- )
> +                        iommu_unmap_page(p2m->domain, gfn + i);
> +
> +                    rc = ret;
> +                    break;
> +                }

Hmm, is this conditional here right?  What the code actually says to
do is to always map pages, but to only unmap them on an error if there
have been no other errors in the function so far.

As it happens, at the moment rc cannot be non-zero here since any time
it's non-zero the code jumps down to the 'out' label, skipping this.
But if that ever changed, this would fail to unmap when it probably
should.

It seems like this be:

if ( unlikely(ret) )
{
  while ( i-- )
    iommu_unmap_page(p2m->domain, gfn + i);
  if ( !rc )
    rc = ret;
  break;
}

Or if you want to assume that rc will never be zero, then put an
ASSERT() just before the for loop here.

Everything else looks good to me.  Thanks again for your work on this.

 -George
Tian, Kevin May 5, 2016, 12:38 a.m. UTC | #6
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Wednesday, May 04, 2016 5:29 PM
> 
> >>> On 04.05.16 at 11:13, <quan.xu@intel.com> wrote:
> > On May 04, 2016 4:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
> >> >>> On 04.05.16 at 03:45, <kevin.tian@intel.com> wrote:
> >> >>  From: Xu, Quan
> >> >> Sent: Friday, April 29, 2016 5:25 PM
> >> >> --- a/xen/arch/x86/mm.c
> >> >> +++ b/xen/arch/x86/mm.c
> >> >> @@ -2467,7 +2467,7 @@ static int __get_page_type(struct page_info
> >> >> *page, unsigned long type,
> >> >>                             int preemptible)  {
> >> >>      unsigned long nx, x, y = page->u.inuse.type_info;
> >> >> -    int rc = 0;
> >> >> +    int rc = 0, ret = 0;
> >> >>
> >> >>      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
> >> >>
> >> >> @@ -2578,11 +2578,11 @@ static int __get_page_type(struct page_info
> >> >> *page, unsigned long type,
> >> >>          if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
> >> >>          {
> >> >>              if ( (x & PGT_type_mask) == PGT_writable_page )
> >> >> -                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
> >> >> +                ret = iommu_unmap_page(d, mfn_to_gmfn(d,
> >> >> + page_to_mfn(page)));
> >> >>              else if ( type == PGT_writable_page )
> >> >> -                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
> >> >> -                               page_to_mfn(page),
> >> >> -                               IOMMUF_readable|IOMMUF_writable);
> >> >> +                ret = iommu_map_page(d, mfn_to_gmfn(d,
> page_to_mfn(page)),
> >> >> +                                     page_to_mfn(page),
> >> >> +
> >> >> + IOMMUF_readable|IOMMUF_writable);
> >> >>          }
> >> >>      }
> >> >>
> >> >> @@ -2599,6 +2599,9 @@ static int __get_page_type(struct page_info
> >> >> *page, unsigned long type,
> >> >>      if ( (x & PGT_partial) && !(nx & PGT_partial) )
> >> >>          put_page(page);
> >> >>
> >> >> +    if ( !rc )
> >> >> +        rc = ret;
> >> >> +
> >> >>      return rc;
> >> >>  }
> >> >
> >> > I know there were quite some discussions before around above change
> >> > (sorry I didn't remember all of them). Just based on mental picture we
> >> > should return error where it firstly occurs. However above change
> >> > looks favoring errors in later "rc = alloc_page_type" over earlier
> >> > iommu_map/unmap_page error. Is it what we want?
> >>
> >> Yes, as that's the primary operation here.
> >>
> >> > If there is a reason that we cannot return immediately upon
> >> > iommu_map/unmap,
> >>
> >> Since for Dom0 we don't call domain_crash(), we must not bypass
> >> alloc_page_type() here. And even for DomU it would seem at least fragile if we
> >> did - we better don't alter the refcounting behavior.
> >>
> >
> > A little bit confused.
> > Check it,  for this __get_page_type(), can I leave my modification as is?
> 
> Yes. I merely tried to explain the reason to Kevin.
> 

Understand now.

Thanks
Kevin
diff mbox

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index a42097f..427097d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2467,7 +2467,7 @@  static int __get_page_type(struct page_info *page, unsigned long type,
                            int preemptible)
 {
     unsigned long nx, x, y = page->u.inuse.type_info;
-    int rc = 0;
+    int rc = 0, ret = 0;
 
     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 
@@ -2578,11 +2578,11 @@  static int __get_page_type(struct page_info *page, unsigned long type,
         if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) )
         {
             if ( (x & PGT_type_mask) == PGT_writable_page )
-                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
+                ret = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
             else if ( type == PGT_writable_page )
-                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
-                               page_to_mfn(page),
-                               IOMMUF_readable|IOMMUF_writable);
+                ret = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
+                                     page_to_mfn(page),
+                                     IOMMUF_readable|IOMMUF_writable);
         }
     }
 
@@ -2599,6 +2599,9 @@  static int __get_page_type(struct page_info *page, unsigned long type,
     if ( (x & PGT_partial) && !(nx & PGT_partial) )
         put_page(page);
 
+    if ( !rc )
+        rc = ret;
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 1ed5b47..df87944 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -821,6 +821,8 @@  out:
     if ( needs_sync )
         ept_sync_domain(p2m);
 
+    ret = 0;
+
     /* For host p2m, may need to change VT-d page table.*/
     if ( rc == 0 && p2m_is_hostp2m(p2m) && need_iommu(d) &&
          need_modify_vtd_table )
@@ -831,11 +833,29 @@  out:
         {
             if ( iommu_flags )
                 for ( i = 0; i < (1 << order); i++ )
-                    iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
+                {
+                    rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags);
+
+                    if ( !ret && unlikely(rc) )
+                    {
+                        while ( i-- )
+                            iommu_unmap_page(d, gfn + i);
+
+                        ret = rc;
+                        break;
+                    }
+                }
             else
                 for ( i = 0; i < (1 << order); i++ )
-                    iommu_unmap_page(d, gfn + i);
+                {
+                    rc = iommu_unmap_page(d, gfn + i);
+
+                    if ( !ret && unlikely(rc) )
+                        ret = rc;
+                }
         }
+
+        rc = 0;
     }
 
     unmap_domain_page(table);
@@ -850,6 +870,9 @@  out:
     if ( rc == 0 && p2m_is_hostp2m(p2m) )
         p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
 
+    if ( !rc )
+        rc = ret;
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 3d80612..9f5539e 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -498,7 +498,7 @@  p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
     l1_pgentry_t intermediate_entry = l1e_empty();
     l2_pgentry_t l2e_content;
     l3_pgentry_t l3e_content;
-    int rc;
+    int rc, ret;
     unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt);
     /*
      * old_mfn and iommu_old_flags control possible flush/update needs on the
@@ -680,11 +680,27 @@  p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
         }
         else if ( iommu_pte_flags )
             for ( i = 0; i < (1UL << page_order); i++ )
-                iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
-                               iommu_pte_flags);
+            {
+                ret = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
+                                    iommu_pte_flags);
+
+                if ( !rc && unlikely(ret) )
+                {
+                    while ( i-- )
+                        iommu_unmap_page(p2m->domain, gfn + i);
+
+                    rc = ret;
+                    break;
+                }
+            }
         else
             for ( i = 0; i < (1UL << page_order); i++ )
-                iommu_unmap_page(p2m->domain, gfn + i);
+            {
+                ret = iommu_unmap_page(p2m->domain, gfn + i);
+
+                if ( !rc && unlikely(ret) )
+                    rc = ret;
+            }
     }
 
     /*
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 6eef2f3..6a9bba1 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -636,13 +636,20 @@  p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
     mfn_t mfn_return;
     p2m_type_t t;
     p2m_access_t a;
+    int rc = 0, ret;
 
     if ( !paging_mode_translate(p2m->domain) )
     {
         if ( need_iommu(p2m->domain) )
             for ( i = 0; i < (1 << page_order); i++ )
-                iommu_unmap_page(p2m->domain, mfn + i);
-        return 0;
+            {
+               ret = iommu_unmap_page(p2m->domain, mfn + i);
+
+               if ( !rc && unlikely(ret) )
+                   rc = ret;
+            }
+
+        return rc;
     }
 
     ASSERT(gfn_locked_by_me(p2m, gfn));
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index a0003ac..d74433d 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -172,6 +172,8 @@  void __hwdom_init iommu_hwdom_init(struct domain *d)
     {
         struct page_info *page;
         unsigned int i = 0;
+        int ret, rc = 0;
+
         page_list_for_each ( page, &d->page_list )
         {
             unsigned long mfn = page_to_mfn(page);
@@ -182,10 +184,20 @@  void __hwdom_init iommu_hwdom_init(struct domain *d)
                  ((page->u.inuse.type_info & PGT_type_mask)
                   == PGT_writable_page) )
                 mapping |= IOMMUF_writable;
-            hd->platform_ops->map_page(d, gfn, mfn, mapping);
+
+            ret = hd->platform_ops->map_page(d, gfn, mfn, mapping);
+
+            if ( unlikely(ret) )
+                rc = ret;
+
             if ( !(i++ & 0xfffff) )
                 process_pending_softirqs();
         }
+
+        if ( rc )
+            printk(XENLOG_WARNING
+                   "iommu_hwdom_init: IOMMU mapping failed for dom%d.",
+                   d->domain_id);
     }
 
     return hd->platform_ops->hwdom_init(d);