diff mbox series

[1/3] AMD/IOMMU: don't needlessly trigger errors/crashes when unmapping a page

Message ID d41313de-f95d-729d-9cdb-bb19dc45d162@suse.com (mailing list archive)
State Superseded
Headers show
Series AMD/IOMMU: re-work mode updating | expand

Commit Message

Jan Beulich Nov. 6, 2019, 3:18 p.m. UTC
Unmapping a page which has never been mapped should be a no-op (note how
it already is in case there was no root page table allocated). There's
in particular no need to grow the number of page table levels in use,
and there's also no need to allocate intermediate page tables except
when needing to split a large page.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Andrew Cooper Nov. 6, 2019, 5:12 p.m. UTC | #1
On 06/11/2019 15:18, Jan Beulich wrote:
> Unmapping a page which has never been mapped should be a no-op (note how
> it already is in case there was no root page table allocated).

Which function are you talking about here?  iommu_pde_from_dfn() will
BUG() if no root was set up.

> There's
> in particular no need to grow the number of page table levels in use,
> and there's also no need to allocate intermediate page tables except
> when needing to split a large page.

To be honest, I've never been convinced that dynamically changing the
number of levels in the AMD IOMMU tables is clever.  It should be fixed
at 4 (like everything else) and suddenly a lot of runtime complexity
disappears.  (I'm fairly confident that we'll need a domain create
parameter to support 5 level paging in a rational way, so we won't even
include walk-length gymnastics then either.)

>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Jan Beulich Nov. 7, 2019, 10:13 a.m. UTC | #2
On 06.11.2019 18:12, Andrew Cooper wrote:
> On 06/11/2019 15:18, Jan Beulich wrote:
>> Unmapping a page which has never been mapped should be a no-op (note how
>> it already is in case there was no root page table allocated).
> 
> Which function are you talking about here?  iommu_pde_from_dfn() will
> BUG() if no root was set up.

amd_iommu_unmap_page() has such a check first thing.

>> There's
>> in particular no need to grow the number of page table levels in use,
>> and there's also no need to allocate intermediate page tables except
>> when needing to split a large page.
> 
> To be honest, I've never been convinced that dynamically changing the
> number of levels in the AMD IOMMU tables is clever.  It should be fixed
> at 4 (like everything else) and suddenly a lot of runtime complexity
> disappears.  (I'm fairly confident that we'll need a domain create
> parameter to support 5 level paging in a rational way, so we won't even
> include walk-length gymnastics then either.)

5-level paging for the CPU 1st-stage-translation is imo pretty orthogonal
to needing 5 levels of paging for 2nd-stage-translation (which also is
what the IOMMU code here is about).

>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

Thanks, Jan
Paul Durrant Nov. 7, 2019, 12:04 p.m. UTC | #3
On Wed, 6 Nov 2019 at 15:20, Jan Beulich <jbeulich@suse.com> wrote:
>
> Unmapping a page which has never been mapped should be a no-op (note how
> it already is in case there was no root page table allocated). There's
> in particular no need to grow the number of page table levels in use,
> and there's also no need to allocate intermediate page tables except
> when needing to split a large page.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Paul Durrant <paul@xen.org>

>
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -176,7 +176,7 @@ void iommu_dte_set_guest_cr3(struct amd_
>   * page tables.
>   */
>  static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
> -                              unsigned long pt_mfn[])
> +                              unsigned long pt_mfn[], bool map)
>  {
>      struct amd_iommu_pte *pde, *next_table_vaddr;
>      unsigned long  next_table_mfn;
> @@ -189,6 +189,13 @@ static int iommu_pde_from_dfn(struct dom
>
>      BUG_ON( table == NULL || level < 1 || level > 6 );
>
> +    /*
> +     * A frame number past what the current page tables can represent can't
> +     * possibly have a mapping.
> +     */
> +    if ( dfn >> (PTE_PER_TABLE_SHIFT * level) )
> +        return 0;
> +
>      next_table_mfn = mfn_x(page_to_mfn(table));
>
>      if ( level == 1 )
> @@ -246,6 +253,9 @@ static int iommu_pde_from_dfn(struct dom
>          /* Install lower level page table for non-present entries */
>          else if ( !pde->pr )
>          {
> +            if ( !map )
> +                return 0;
> +
>              if ( next_table_mfn == 0 )
>              {
>                  table = alloc_amd_iommu_pgtable();
> @@ -404,7 +414,7 @@ int amd_iommu_map_page(struct domain *d,
>          }
>      }
>
> -    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn) || (pt_mfn[1] == 0) )
> +    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn, true) || (pt_mfn[1] == 0) )
>      {
>          spin_unlock(&hd->arch.mapping_lock);
>          AMD_IOMMU_DEBUG("Invalid IO pagetable entry dfn = %"PRI_dfn"\n",
> @@ -439,24 +449,7 @@ int amd_iommu_unmap_page(struct domain *
>          return 0;
>      }
>
> -    /* Since HVM domain is initialized with 2 level IO page table,
> -     * we might need a deeper page table for lager dfn now */
> -    if ( is_hvm_domain(d) )
> -    {
> -        int rc = update_paging_mode(d, dfn_x(dfn));
> -
> -        if ( rc )
> -        {
> -            spin_unlock(&hd->arch.mapping_lock);
> -            AMD_IOMMU_DEBUG("Update page mode failed dfn = %"PRI_dfn"\n",
> -                            dfn_x(dfn));
> -            if ( rc != -EADDRNOTAVAIL )
> -                domain_crash(d);
> -            return rc;
> -        }
> -    }
> -
> -    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn) || (pt_mfn[1] == 0) )
> +    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn, false) )
>      {
>          spin_unlock(&hd->arch.mapping_lock);
>          AMD_IOMMU_DEBUG("Invalid IO pagetable entry dfn = %"PRI_dfn"\n",
> @@ -465,8 +458,11 @@ int amd_iommu_unmap_page(struct domain *
>          return -EFAULT;
>      }
>
> -    /* mark PTE as 'page not present' */
> -    *flush_flags |= clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
> +    if ( pt_mfn[1] )
> +    {
> +        /* Mark PTE as 'page not present'. */
> +        *flush_flags |= clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
> +    }
>
>      spin_unlock(&hd->arch.mapping_lock);
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xenproject.org
> https://lists.xenproject.org/mailman/listinfo/xen-devel
Jürgen Groß Nov. 12, 2019, 9:59 a.m. UTC | #4
On 06.11.19 16:18, Jan Beulich wrote:
> Unmapping a page which has never been mapped should be a no-op (note how
> it already is in case there was no root page table allocated). There's
> in particular no need to grow the number of page table levels in use,
> and there's also no need to allocate intermediate page tables except
> when needing to split a large page.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Release-acked-by: Juergen Gross <jgross@suse.com>


Juergen
diff mbox series

Patch

--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -176,7 +176,7 @@  void iommu_dte_set_guest_cr3(struct amd_
  * page tables.
  */
 static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
-                              unsigned long pt_mfn[])
+                              unsigned long pt_mfn[], bool map)
 {
     struct amd_iommu_pte *pde, *next_table_vaddr;
     unsigned long  next_table_mfn;
@@ -189,6 +189,13 @@  static int iommu_pde_from_dfn(struct dom
 
     BUG_ON( table == NULL || level < 1 || level > 6 );
 
+    /*
+     * A frame number past what the current page tables can represent can't
+     * possibly have a mapping.
+     */
+    if ( dfn >> (PTE_PER_TABLE_SHIFT * level) )
+        return 0;
+
     next_table_mfn = mfn_x(page_to_mfn(table));
 
     if ( level == 1 )
@@ -246,6 +253,9 @@  static int iommu_pde_from_dfn(struct dom
         /* Install lower level page table for non-present entries */
         else if ( !pde->pr )
         {
+            if ( !map )
+                return 0;
+
             if ( next_table_mfn == 0 )
             {
                 table = alloc_amd_iommu_pgtable();
@@ -404,7 +414,7 @@  int amd_iommu_map_page(struct domain *d,
         }
     }
 
-    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn) || (pt_mfn[1] == 0) )
+    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn, true) || (pt_mfn[1] == 0) )
     {
         spin_unlock(&hd->arch.mapping_lock);
         AMD_IOMMU_DEBUG("Invalid IO pagetable entry dfn = %"PRI_dfn"\n",
@@ -439,24 +449,7 @@  int amd_iommu_unmap_page(struct domain *
         return 0;
     }
 
-    /* Since HVM domain is initialized with 2 level IO page table,
-     * we might need a deeper page table for lager dfn now */
-    if ( is_hvm_domain(d) )
-    {
-        int rc = update_paging_mode(d, dfn_x(dfn));
-
-        if ( rc )
-        {
-            spin_unlock(&hd->arch.mapping_lock);
-            AMD_IOMMU_DEBUG("Update page mode failed dfn = %"PRI_dfn"\n",
-                            dfn_x(dfn));
-            if ( rc != -EADDRNOTAVAIL )
-                domain_crash(d);
-            return rc;
-        }
-    }
-
-    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn) || (pt_mfn[1] == 0) )
+    if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn, false) )
     {
         spin_unlock(&hd->arch.mapping_lock);
         AMD_IOMMU_DEBUG("Invalid IO pagetable entry dfn = %"PRI_dfn"\n",
@@ -465,8 +458,11 @@  int amd_iommu_unmap_page(struct domain *
         return -EFAULT;
     }
 
-    /* mark PTE as 'page not present' */
-    *flush_flags |= clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
+    if ( pt_mfn[1] )
+    {
+        /* Mark PTE as 'page not present'. */
+        *flush_flags |= clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
+    }
 
     spin_unlock(&hd->arch.mapping_lock);