diff mbox

[v2,7/9] mm: Keep pages available for allocation while scrubbing

Message ID 1491238256-5517-8-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky April 3, 2017, 4:50 p.m. UTC
Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 xen/common/page_alloc.c  |   76 ++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/mm.h |    4 ++
 xen/include/asm-x86/mm.h |    4 ++
 3 files changed, 81 insertions(+), 3 deletions(-)

Comments

Jan Beulich April 13, 2017, 3:59 p.m. UTC | #1
>>> On 03.04.17 at 18:50, <boris.ostrovsky@oracle.com> wrote:
> Instead of scrubbing pages while holding heap lock we can mark
> buddy's head as being scrubbed and drop the lock temporarily.
> If someone (most likely alloc_heap_pages()) tries to access
> this chunk it will signal the scrubber to abort scrub by setting
> head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
> processing each page and stops its work as soon as it sees it.

So if the scrubber managed to handle all but one page of, say, a
1Gb buddy, you'd re-do all of it synchronously? One more argument
to track dirty/scrubbed state per page instead of per buddy, I think.

> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -699,6 +699,18 @@ static void page_list_add_scrub(struct page_info *pg, 
> unsigned int node,
>          page_list_add(pg, &heap(node, zone, order));
>  }
>  
> +static void check_and_stop_scrub(struct page_info *head)
> +{
> +    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
> +    {
> +        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
> +        smp_mb();
> +        spin_lock_kick();

I think the barrier would better be in the kicking construct than
explicit here.

> @@ -785,10 +797,15 @@ static struct page_info *alloc_heap_pages(
>              {
>                  if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
>                  {
> -                    if ( (order == 0) || use_unscrubbed ||
> -                         !test_bit(_PGC_need_scrub, &pg->count_info) )
> +                    if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )

Any reason to change from -> to [0]. here?

> @@ -1074,12 +1096,34 @@ static unsigned int node_to_scrub(bool_t get_node)
>  }
>  
>  #define SCRUB_CHUNK_ORDER  8
> +
> +struct scrub_wait_state {
> +    struct page_info *pg;
> +    bool_t drop;
> +};
> +
> +static void scrub_continue(void *data)
> +{
> +    struct scrub_wait_state *st = (struct scrub_wait_state *)data;

Pointless cast.

> @@ -1203,6 +1271,8 @@ static void free_heap_pages(
>          if ( page_state_is(&pg[i], offlined) )
>              tainted = 1;
>  
> +        pg[i].u.free.scrub_state=0;

Style.

> --- a/xen/include/asm-arm/mm.h
> +++ b/xen/include/asm-arm/mm.h
> @@ -35,6 +35,10 @@ struct page_info
>          } inuse;
>          /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
>          struct {
> +#define PAGE_SCRUBBING      (1<<1)
> +#define PAGE_SCRUB_ABORT    (1<<2)

Any reason not to start from bit 0? I'm also not sure boolean flags
are the ideal solution here: You really only have three states afaict
(none, scrubbing, abort).

Jan
diff mbox

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 1c23991..666b79a 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -699,6 +699,18 @@  static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        smp_mb();
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
@@ -785,10 +797,15 @@  static struct page_info *alloc_heap_pages(
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         !test_bit(_PGC_need_scrub, &pg->count_info) )
+                    if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
                         goto found;
 
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        goto found;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -919,6 +936,8 @@  static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
+
     page_list_del(head, &heap(node, zone, head_order));
 
     while ( cur_head < (head + (1 << head_order)) )
@@ -995,6 +1014,9 @@  static bool_t can_merge(struct page_info *buddy, unsigned int node,
          !!test_bit(_PGC_need_scrub, &buddy->count_info) )
         return false;
 
+    if ( buddy->u.free.scrub_state & PAGE_SCRUBBING )
+        return false;
+
     return true;
 }
 
@@ -1074,12 +1096,34 @@  static unsigned int node_to_scrub(bool_t get_node)
 }
 
 #define SCRUB_CHUNK_ORDER  8
+
+struct scrub_wait_state {
+    struct page_info *pg;
+    bool_t drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = (struct scrub_wait_state *)data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this chunk. Release it. */
+        st->drop = true;
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool_t scrub_free_pages()
 {
     struct page_info *pg;
     unsigned int i, zone;
     unsigned int num_scrubbed, scrub_order, start, end;
     bool_t preempt, is_frag;
+    struct scrub_wait_state st;
     int order, cpu = smp_processor_id();
     nodeid_t node;
 
@@ -1100,7 +1144,10 @@  bool_t scrub_free_pages()
                 if ( !test_bit(_PGC_need_scrub, &pg->count_info) )
                     break;
 
-                page_list_del(pg, &heap(node, zone, order));
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
 
                 scrub_order = MIN(order, SCRUB_CHUNK_ORDER);
                 num_scrubbed = 0;
@@ -1108,7 +1155,15 @@  bool_t scrub_free_pages()
                 while ( num_scrubbed < (1 << order) )
                 {
                     for ( i = 0; i < (1 << scrub_order); i++ )
+                    {
                         scrub_one_page(&pg[num_scrubbed + i]);
+                        if ( ACCESS_ONCE(pg->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+                        {
+                            /* Someone wants this chunk. Drop everything. */
+                            pg->u.free.scrub_state = 0;
+                            goto out_nolock;
+                        }
+                    }
 
                     num_scrubbed += (1 << scrub_order);
                     if ( softirq_pending(cpu) )
@@ -1119,6 +1174,15 @@  bool_t scrub_free_pages()
                     }
                 }
 
+                st.pg = pg;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                if ( st.drop )
+                    goto out;
+
+                page_list_del(pg, &heap(node, zone, order));
+
                 start = 0;
                 end = num_scrubbed;
 
@@ -1156,6 +1220,8 @@  bool_t scrub_free_pages()
                     end += (1 << chunk_order);
                  }
 
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt )
                     goto out;
             }
@@ -1164,6 +1230,8 @@  bool_t scrub_free_pages()
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1203,6 +1271,8 @@  static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state=0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 149940b..b2d9dd3 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -35,6 +35,10 @@  struct page_info
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+#define PAGE_SCRUBBING      (1<<1)
+#define PAGE_SCRUB_ABORT    (1<<2)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index f3d4443..31e53e9 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -87,6 +87,10 @@  struct page_info
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+#define PAGE_SCRUBBING      (1<<1)
+#define PAGE_SCRUB_ABORT    (1<<2)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;