diff mbox series

[v5,3/4] mm: make MEMF_no_refcount pages safe to assign

Message ID 20200129101643.1394-4-pdurrant@amazon.com (mailing list archive)
State Superseded
Headers show
Series purge free_shared_domheap_page() | expand

Commit Message

Paul Durrant Jan. 29, 2020, 10:16 a.m. UTC
Currently it is unsafe to assign a domheap page allocated with
MEMF_no_refcount to a domain because the domain't 'tot_pages' will not
be incremented, but will be decrement when the page is freed (since
free_domheap_pages() has no way of telling that the increment was skipped).

This patch allocates a new 'count_info' bit for a PGC_no_refcount flag
which is then used to mark domheap pages allocated with MEMF_no_refcount.
This then allows free_domheap_pages() to skip decrementing tot_pages when
appropriate and hence makes the pages safe to assign.

NOTE: The patch sets MEMF_no_refcount directly in alloc_domheap_pages()
      rather than in assign_pages() because the latter is called with
      MEMF_no_refcount by memory_exchange().

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Julien Grall <julien@xen.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Wei Liu <wl@xen.org>
Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>

v5:
 - Make sure PGC_no_refcount is set before assign_pages() is called
 - Don't bother to clear PGC_no_refcount in free_domheap_pages() and
   drop ASSERT in free_heap_pages()
 - Don't latch count_info in free_heap_pages()

v4:
 - New in v4
---
 xen/common/page_alloc.c  | 35 +++++++++++++++++++++++------------
 xen/include/asm-arm/mm.h |  5 ++++-
 xen/include/asm-x86/mm.h |  7 +++++--
 3 files changed, 32 insertions(+), 15 deletions(-)

Comments

Jan Beulich Jan. 29, 2020, 11:20 a.m. UTC | #1
On 29.01.2020 11:16, Paul Durrant wrote:
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -2287,11 +2287,14 @@ int assign_pages(
>  
>      for ( i = 0; i < (1 << order); i++ )
>      {
> +        unsigned long count_info = pg[i].count_info;
> +
>          ASSERT(page_get_owner(&pg[i]) == NULL);
> -        ASSERT(!pg[i].count_info);
> +        ASSERT(!(count_info & ~PGC_no_refcount));

I think this would benefit from being more strict: The flag may also
not be set if MEMF_no_refcount is clear in the flags passed in.

> @@ -2334,11 +2332,20 @@ struct page_info *alloc_domheap_pages(
>                                    memflags, d)) == NULL)) )
>           return NULL;
>  
> -    if ( d && !(memflags & MEMF_no_owner) &&
> -         assign_pages(d, pg, order, memflags) )
> +    if ( d && !(memflags & MEMF_no_owner) )
>      {
> -        free_heap_pages(pg, order, memflags & MEMF_no_scrub);
> -        return NULL;
> +        if ( memflags & MEMF_no_refcount )
> +        {
> +            unsigned long i;

With this you mean ...

> +            for ( i = 0; i < (1 << order); i++ )

... 1UL here. Or else "unsigned int" and 1U. The file isn't
very consistent with this, but at least locally to a single
functions things should match up.

Jan
diff mbox series

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 135e15bae0..984296c687 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -2287,11 +2287,14 @@  int assign_pages(
 
     for ( i = 0; i < (1 << order); i++ )
     {
+        unsigned long count_info = pg[i].count_info;
+
         ASSERT(page_get_owner(&pg[i]) == NULL);
-        ASSERT(!pg[i].count_info);
+        ASSERT(!(count_info & ~PGC_no_refcount));
         page_set_owner(&pg[i], d);
         smp_wmb(); /* Domain pointer must be visible before updating refcnt. */
-        pg[i].count_info = PGC_allocated | 1;
+        count_info &= PGC_no_refcount;
+        pg[i].count_info = count_info | PGC_allocated | 1;
         page_list_add_tail(&pg[i], &d->page_list);
     }
 
@@ -2317,11 +2320,6 @@  struct page_info *alloc_domheap_pages(
 
     if ( memflags & MEMF_no_owner )
         memflags |= MEMF_no_refcount;
-    else if ( (memflags & MEMF_no_refcount) && d )
-    {
-        ASSERT(!(memflags & MEMF_no_refcount));
-        return NULL;
-    }
 
     if ( !dma_bitsize )
         memflags &= ~MEMF_no_dma;
@@ -2334,11 +2332,20 @@  struct page_info *alloc_domheap_pages(
                                   memflags, d)) == NULL)) )
          return NULL;
 
-    if ( d && !(memflags & MEMF_no_owner) &&
-         assign_pages(d, pg, order, memflags) )
+    if ( d && !(memflags & MEMF_no_owner) )
     {
-        free_heap_pages(pg, order, memflags & MEMF_no_scrub);
-        return NULL;
+        if ( memflags & MEMF_no_refcount )
+        {
+            unsigned long i;
+
+            for ( i = 0; i < (1 << order); i++ )
+                pg[i].count_info |= PGC_no_refcount;
+        }
+        if ( assign_pages(d, pg, order, memflags) )
+        {
+            free_heap_pages(pg, order, memflags & MEMF_no_scrub);
+            return NULL;
+        }
     }
 
     return pg;
@@ -2371,6 +2378,8 @@  void free_domheap_pages(struct page_info *pg, unsigned int order)
 
         if ( likely(d) && likely(d != dom_cow) )
         {
+            long pages = 0;
+
             /* NB. May recursively lock from relinquish_memory(). */
             spin_lock_recursive(&d->page_alloc_lock);
 
@@ -2386,9 +2395,11 @@  void free_domheap_pages(struct page_info *pg, unsigned int order)
                     BUG();
                 }
                 arch_free_heap_page(d, &pg[i]);
+                if ( !(pg[i].count_info & PGC_no_refcount) )
+                    pages--;
             }
 
-            drop_dom_ref = !domain_adjust_tot_pages(d, -(1 << order));
+            drop_dom_ref = !domain_adjust_tot_pages(d, pages);
 
             spin_unlock_recursive(&d->page_alloc_lock);
 
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 333efd3a60..1076cc9713 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -119,9 +119,12 @@  struct page_info
 #define PGC_state_offlined PG_mask(2, 9)
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+/* Page is not reference counted */
+#define _PGC_no_refcount  PG_shift(10)
+#define PGC_no_refcount   PG_mask(1, 10)
 
 /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 2ca8882ad0..e75feea15e 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -77,9 +77,12 @@ 
 #define PGC_state_offlined PG_mask(2, 9)
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+/* Page is not reference counted */
+#define _PGC_no_refcount  PG_shift(10)
+#define PGC_no_refcount   PG_mask(1, 10)
 
- /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+/* Count of references to this frame. */
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 /*