diff mbox

[RESEND,RFC,2/8] mm: Place unscrubbed pages at the end of pagelist

Message ID 1488155829-2956-3-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky Feb. 27, 2017, 12:37 a.m. UTC
. so that it's easy to find pages that need to be scrubbed (those pages are
now marked with _PGC_need_scrub bit).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 xen/common/page_alloc.c  |   97 +++++++++++++++++++++++++++++++++++----------
 xen/include/asm-x86/mm.h |    4 ++
 2 files changed, 79 insertions(+), 22 deletions(-)

Comments

Andrew Cooper Feb. 27, 2017, 3:38 p.m. UTC | #1
On 27/02/17 00:37, Boris Ostrovsky wrote:
> . so that it's easy to find pages that need to be scrubbed (those pages are
> now marked with _PGC_need_scrub bit).
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
>  xen/common/page_alloc.c  |   97 +++++++++++++++++++++++++++++++++++----------
>  xen/include/asm-x86/mm.h |    4 ++
>  2 files changed, 79 insertions(+), 22 deletions(-)
>
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index 352eba9..653bb91 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -385,6 +385,8 @@ typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
>  static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
>  #define heap(node, zone, order) ((*_heap[node])[zone][order])
>  
> +static unsigned node_need_scrub[MAX_NUMNODES];

This will overflow if there is 16TB of ourstanding memory needing
scrubbed per node.

In the worst case, all available patches could be oustanding for
scrubbing, so should be counted using the same types.

> +
>  static unsigned long *avail[MAX_NUMNODES];
>  static long total_avail_pages;
>  
> @@ -935,11 +942,16 @@ static bool_t can_merge(struct page_info *head, unsigned int node,
>            (phys_to_nid(page_to_maddr(head)) != node) )
>          return 0;
>  
> +    if ( !!need_scrub ^
> +         !!test_bit(_PGC_need_scrub, &head->count_info) )
> +        return 0;
> +
>      return 1;
>  }
>  
>  static void merge_chunks(struct page_info *pg, unsigned int node,
> -                         unsigned int zone, unsigned int order)
> +                         unsigned int zone, unsigned int order,
> +                         bool_t need_scrub)

Can't you calculate need_scrub from *pg rather than passing an extra
parameter?

>  {
>      ASSERT(spin_is_locked(&heap_lock));
>  
> @@ -970,12 +982,49 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
>      }
>  
>      PFN_ORDER(pg) = order;
> -    page_list_add_tail(pg, &heap(node, zone, order));
> +    if ( need_scrub )
> +        page_list_add_tail(pg, &heap(node, zone, order));
> +    else
> +        page_list_add(pg, &heap(node, zone, order));
> +}
> +
> +static void scrub_free_pages(unsigned int node)
> +{
> +    struct page_info *pg;
> +    unsigned int i, zone;
> +    int order;
> +
> +    ASSERT(spin_is_locked(&heap_lock));
> +
> +    if ( !node_need_scrub[node] )
> +        return;
> +
> +    for ( zone = 0; zone < NR_ZONES; zone++ )
> +    {
> +        for ( order = MAX_ORDER; order >= 0; order-- )
> +        {
> +            while ( !page_list_empty(&heap(node, zone, order)) )
> +            {
> +                /* Unscrubbed pages are always at the end of the list. */
> +                pg = page_list_last(&heap(node, zone, order));
> +                if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )

&pg->count_info

> +                    break;
> +
> +                for ( i = 0; i < (1 << order); i++)

1U, and probably unsigned long.  Similarly later.

~Andrew
Boris Ostrovsky Feb. 27, 2017, 3:52 p.m. UTC | #2
>>  
>>  static void merge_chunks(struct page_info *pg, unsigned int node,
>> -                         unsigned int zone, unsigned int order)
>> +                         unsigned int zone, unsigned int order,
>> +                         bool_t need_scrub)
> Can't you calculate need_scrub from *pg rather than passing an extra
> parameter?

Right, I can just look at the head's PGC_need_scrub bit.

>>  {
>>      ASSERT(spin_is_locked(&heap_lock));
>>  
>> @@ -970,12 +982,49 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
>>      }
>>  
>>      PFN_ORDER(pg) = order;
>> -    page_list_add_tail(pg, &heap(node, zone, order));
>> +    if ( need_scrub )
>> +        page_list_add_tail(pg, &heap(node, zone, order));
>> +    else
>> +        page_list_add(pg, &heap(node, zone, order));
>> +}
>> +
>> +static void scrub_free_pages(unsigned int node)
>> +{
>> +    struct page_info *pg;
>> +    unsigned int i, zone;
>> +    int order;
>> +
>> +    ASSERT(spin_is_locked(&heap_lock));
>> +
>> +    if ( !node_need_scrub[node] )
>> +        return;
>> +
>> +    for ( zone = 0; zone < NR_ZONES; zone++ )
>> +    {
>> +        for ( order = MAX_ORDER; order >= 0; order-- )
>> +        {
>> +            while ( !page_list_empty(&heap(node, zone, order)) )
>> +            {
>> +                /* Unscrubbed pages are always at the end of the list. */
>> +                pg = page_list_last(&heap(node, zone, order));
>> +                if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
> &pg->count_info
>
>> +                    break;
>> +
>> +                for ( i = 0; i < (1 << order); i++)
> 1U, and probably unsigned long.  Similarly later.

I'll update sizing throughout the series.

I just noticed, BTW, that this routine lost something like

   page_list_del(pg, &heap(node, zone, order));
   merge_chunks(pg, node, zone, chunk_order, 0);

otherwise we just scrub the last chunk on each list.

This is all rewritten in a later patch which is why I missed this during
testing.

-boris
diff mbox

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 352eba9..653bb91 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -385,6 +385,8 @@  typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
+static unsigned node_need_scrub[MAX_NUMNODES];
+
 static unsigned long *avail[MAX_NUMNODES];
 static long total_avail_pages;
 
@@ -809,7 +811,7 @@  static struct page_info *alloc_heap_pages(
     while ( j != order )
     {
         PFN_ORDER(pg) = --j;
-        page_list_add_tail(pg, &heap(node, zone, j));
+        page_list_add(pg, &heap(node, zone, j));
         pg += 1 << j;
     }
 
@@ -829,6 +831,8 @@  static struct page_info *alloc_heap_pages(
         BUG_ON(pg[i].count_info != PGC_state_free);
         pg[i].count_info = PGC_state_inuse;
 
+        BUG_ON(test_bit(_PGC_need_scrub, &pg[i].count_info));
+
         if ( !(memflags & MEMF_no_tlbflush) )
             accumulate_tlbflush(&need_tlbflush, &pg[i],
                                 &tlbflush_timestamp);
@@ -899,7 +903,10 @@  static int reserve_offlined_page(struct page_info *head)
             {
             merge:
                 /* We don't consider merging outside the head_order. */
-                page_list_add_tail(cur_head, &heap(node, zone, cur_order));
+                if ( test_bit(_PGC_need_scrub, &cur_head->count_info) )
+                    page_list_add_tail(cur_head, &heap(node, zone, cur_order));
+                else
+                    page_list_add(cur_head, &heap(node, zone, cur_order));
                 PFN_ORDER(cur_head) = cur_order;
                 cur_head += (1 << cur_order);
                 break;
@@ -927,7 +934,7 @@  static int reserve_offlined_page(struct page_info *head)
 }
 
 static bool_t can_merge(struct page_info *head, unsigned int node,
-                        unsigned int order)
+                        unsigned int order, bool_t need_scrub)
 {
     if  ( !mfn_valid(page_to_mfn(head)) ||
           !page_state_is(head, free) ||
@@ -935,11 +942,16 @@  static bool_t can_merge(struct page_info *head, unsigned int node,
           (phys_to_nid(page_to_maddr(head)) != node) )
         return 0;
 
+    if ( !!need_scrub ^
+         !!test_bit(_PGC_need_scrub, &head->count_info) )
+        return 0;
+
     return 1;
 }
 
 static void merge_chunks(struct page_info *pg, unsigned int node,
-                         unsigned int zone, unsigned int order)
+                         unsigned int zone, unsigned int order,
+                         bool_t need_scrub)
 {
     ASSERT(spin_is_locked(&heap_lock));
 
@@ -951,7 +963,7 @@  static void merge_chunks(struct page_info *pg, unsigned int node,
         if ( (page_to_mfn(pg) & mask) )
         {
             /* Merge with predecessor block? */
-            if ( !can_merge(pg - mask, node, order) )
+            if ( !can_merge(pg - mask, node, order, need_scrub) )
                 break;
 
             pg -= mask;
@@ -960,7 +972,7 @@  static void merge_chunks(struct page_info *pg, unsigned int node,
         else
         {
             /* Merge with successor block? */
-            if ( !can_merge(pg + mask, node, order) )
+            if ( !can_merge(pg + mask, node, order, need_scrub) )
                 break;
 
             page_list_del(pg + mask, &heap(node, zone, order));
@@ -970,12 +982,49 @@  static void merge_chunks(struct page_info *pg, unsigned int node,
     }
 
     PFN_ORDER(pg) = order;
-    page_list_add_tail(pg, &heap(node, zone, order));
+    if ( need_scrub )
+        page_list_add_tail(pg, &heap(node, zone, order));
+    else
+        page_list_add(pg, &heap(node, zone, order));
+}
+
+static void scrub_free_pages(unsigned int node)
+{
+    struct page_info *pg;
+    unsigned int i, zone;
+    int order;
+
+    ASSERT(spin_is_locked(&heap_lock));
+
+    if ( !node_need_scrub[node] )
+        return;
+
+    for ( zone = 0; zone < NR_ZONES; zone++ )
+    {
+        for ( order = MAX_ORDER; order >= 0; order-- )
+        {
+            while ( !page_list_empty(&heap(node, zone, order)) )
+            {
+                /* Unscrubbed pages are always at the end of the list. */
+                pg = page_list_last(&heap(node, zone, order));
+                if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
+                    break;
+
+                for ( i = 0; i < (1 << order); i++)
+                {
+                    scrub_one_page(&pg[i]);
+                    pg[i].count_info &= ~PGC_need_scrub;
+                }
+
+                node_need_scrub[node] -= (1 << order);
+            }
+        }
+    }
 }
 
 /* Free 2^@order set of pages. */
 static void free_heap_pages(
-    struct page_info *pg, unsigned int order)
+    struct page_info *pg, unsigned int order, bool_t need_scrub)
 {
     unsigned long mfn = page_to_mfn(pg);
     unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
@@ -1024,11 +1073,22 @@  static void free_heap_pages(
         midsize_alloc_zone_pages = max(
             midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC);
 
-    merge_chunks(pg, node, zone, order);
+    if ( need_scrub && !tainted )
+    {
+        for ( i = 0; i < (1 << order); i++ )
+            pg[i].count_info |= PGC_need_scrub;
+
+        node_need_scrub[node] += (1 << order);
+    }
+
+    merge_chunks(pg, node, zone, order, need_scrub);
 
     if ( tainted )
         reserve_offlined_page(pg);
 
+    if ( need_scrub )
+        scrub_free_pages(node);
+
     spin_unlock(&heap_lock);
 }
 
@@ -1249,7 +1309,7 @@  unsigned int online_page(unsigned long mfn, uint32_t *status)
     spin_unlock(&heap_lock);
 
     if ( (y & PGC_state) == PGC_state_offlined )
-        free_heap_pages(pg, 0);
+        free_heap_pages(pg, 0, 0);
 
     return ret;
 }
@@ -1318,7 +1378,7 @@  static void init_heap_pages(
             nr_pages -= n;
         }
 
-        free_heap_pages(pg+i, 0);
+        free_heap_pages(pg + i, 0, 0);
     }
 }
 
@@ -1645,7 +1705,7 @@  void free_xenheap_pages(void *v, unsigned int order)
 
     memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
 
-    free_heap_pages(virt_to_page(v), order);
+    free_heap_pages(virt_to_page(v), order, 0);
 }
 
 #else
@@ -1699,12 +1759,9 @@  void free_xenheap_pages(void *v, unsigned int order)
     pg = virt_to_page(v);
 
     for ( i = 0; i < (1u << order); i++ )
-    {
-        scrub_one_page(&pg[i]);
         pg[i].count_info &= ~PGC_xen_heap;
-    }
 
-    free_heap_pages(pg, order);
+    free_heap_pages(pg, order, 1);
 }
 
 #endif
@@ -1813,7 +1870,7 @@  struct page_info *alloc_domheap_pages(
     if ( d && !(memflags & MEMF_no_owner) &&
          assign_pages(d, pg, order, memflags) )
     {
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, 0);
         return NULL;
     }
     
@@ -1881,11 +1938,7 @@  void free_domheap_pages(struct page_info *pg, unsigned int order)
             scrub = 1;
         }
 
-        if ( unlikely(scrub) )
-            for ( i = 0; i < (1 << order); i++ )
-                scrub_one_page(&pg[i]);
-
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, scrub);
     }
 
     if ( drop_dom_ref )
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index a66d5b1..b11124f 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -233,6 +233,10 @@  struct page_info
 #define PGC_count_width   PG_shift(9)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
+/* Page needs to be scrubbed */
+#define _PGC_need_scrub   PG_shift(10)
+#define PGC_need_scrub    PG_mask(1, 10)
+
 struct spage_info
 {
        unsigned long type_info;