diff mbox

[v4,6/8] mm: Keep heap accessible to others while scrubbing

Message ID 1495209040-11101-7-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky May 19, 2017, 3:50 p.m. UTC
Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v4:
* Drop unnecessary ACCESS_ONCE in scrub_free_pages, add smp_wmb()
  in scrub_continue()
* Keep track of first_dirty in scrub_wait_state

 xen/common/page_alloc.c  | 100 ++++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/mm.h |   4 ++
 xen/include/asm-x86/mm.h |   4 ++
 3 files changed, 103 insertions(+), 5 deletions(-)

Comments

Jan Beulich June 12, 2017, 8:30 a.m. UTC | #1
>>> On 19.05.17 at 17:50, <boris.ostrovsky@oracle.com> wrote:
> @@ -1090,24 +1131,51 @@ bool scrub_free_pages(void)
>          do {
>              while ( !page_list_empty(&heap(node, zone, order)) )
>              {
> -                unsigned int i;
> +                unsigned int i, dirty_cnt;
> +                struct scrub_wait_state st;
>  
>                  /* Unscrubbed pages are always at the end of the list. */
>                  pg = page_list_last(&heap(node, zone, order));
>                  if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
>                      break;
>  
> +                ASSERT(!pg->u.free.scrub_state);
> +                pg->u.free.scrub_state = PAGE_SCRUBBING;
> +
> +                spin_unlock(&heap_lock);
> +
> +                dirty_cnt = 0;
>                  for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
>                  {
>                      cnt++;
>                      if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
>                      {
>                          scrub_one_page(&pg[i]);
> +                        /*
> +                         * We can modify count_info without holding heap
> +                         * lock since we effectively locked this buddy by
> +                         * setting its scrub_state.
> +                         */
>                          pg[i].count_info &= ~PGC_need_scrub;
> -                        node_need_scrub[node]--;
> +                        dirty_cnt++;
>                          cnt += 100; /* scrubbed pages add heavier weight. */
>                      }
>  
> +                    if ( pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
> +                    {
> +                        /* Someone wants this chunk. Drop everything. */
> +
> +                        pg->u.free.first_dirty = (i == (1U << order)) ?

Similar like for the earlier patch, this condition is always false
(do to the condition in the loop header) afaict, and ...

> +                            INVALID_DIRTY_IDX : i + 1; 

... you'd again risk storing 1U << order into first_dirty here.

> @@ -1121,6 +1189,17 @@ bool scrub_free_pages(void)
>                      }
>                  }
>  
> +                st.pg = pg;
> +                st.first_dirty = (i == (1UL << order)) ?
> +                    INVALID_DIRTY_IDX : i + 1;

Same here then.

> +                st.drop = false;
> +                spin_lock_cb(&heap_lock, scrub_continue, &st);
> +
> +                node_need_scrub[node] -= dirty_cnt;
> +
> +                if ( st.drop )
> +                    goto out;
> +
>                  if ( i == (1U << order) )
>                  {
>                      page_list_del(pg, &heap(node, zone, order));
> @@ -1128,7 +1207,9 @@ bool scrub_free_pages(void)
>                  }
>                  else
>                      pg->u.free.first_dirty = i + 1;
> - 
> +

Please avoid adding the trailing blank in the first place (in the
earlier patch).

> @@ -1175,6 +1258,8 @@ static void free_heap_pages(
>          if ( page_state_is(&pg[i], offlined) )
>              tainted = 1;
>  
> +        pg[i].u.free.scrub_state = 0;

Is this really needed for every page in the buddy?

Jan
Boris Ostrovsky June 12, 2017, 5:11 p.m. UTC | #2
>> @@ -1175,6 +1258,8 @@ static void free_heap_pages(
>>          if ( page_state_is(&pg[i], offlined) )
>>              tainted = 1;
>>  
>> +        pg[i].u.free.scrub_state = 0;
> Is this really needed for every page in the buddy?
>

The concern here is that is we break the buddy (in alloc_heap_pages(),
for example) this may still be set. But perhaps I just should be careful
about clearing this on the head when a (sub-)buddy is added to the heap.

-boris
diff mbox

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6e505b1..b365305 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@  static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int memflags,
@@ -738,9 +749,15 @@  static struct page_info *get_free_buddy(unsigned int zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                         return pg;
+
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -925,6 +942,7 @@  static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1069,6 +1087,29 @@  static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1090,24 +1131,51 @@  bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     cnt++;
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
 
+                    if ( pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order)) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = 0;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemtion. But also count non-scrubbing loop iteration
@@ -1121,6 +1189,17 @@  bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                st.first_dirty = (i == (1UL << order)) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i == (1U << order) )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1128,7 +1207,9 @@  bool scrub_free_pages(void)
                 }
                 else
                     pg->u.free.first_dirty = i + 1;
- 
+
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1137,6 +1218,8 @@  bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1175,6 +1258,8 @@  static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state = 0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
@@ -1218,6 +1303,8 @@  static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1240,6 +1327,9 @@  static void free_heap_pages(
                  (PFN_ORDER(successor) != order) ||
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
+
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
 
             need_scrub |= (successor->u.free.first_dirty != INVALID_DIRTY_IDX);
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 38d4fba..148d60b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -46,6 +46,10 @@  struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index e20f161..f46b242 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -90,6 +90,10 @@  struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;