diff mbox

[v3,7/9] mm: Keep pages available for allocation while scrubbing

Message ID 1492184258-3277-8-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky April 14, 2017, 3:37 p.m. UTC
Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v3:
* Adjusted page_info's scrub_state definitions but kept them as binary
  flags since I think having both PAGE_SCRUBBING and PAGE_SCRUB_ABORT
  bits set make sense.

 xen/common/page_alloc.c  |   92 ++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/mm.h |    4 ++
 xen/include/asm-x86/mm.h |    4 ++
 3 files changed, 93 insertions(+), 7 deletions(-)

Comments

Jan Beulich May 4, 2017, 4:03 p.m. UTC | #1
>>> On 14.04.17 at 17:37, <boris.ostrovsky@oracle.com> wrote:
> Instead of scrubbing pages while holding heap lock we can mark
> buddy's head as being scrubbed and drop the lock temporarily.
> If someone (most likely alloc_heap_pages()) tries to access
> this chunk it will signal the scrubber to abort scrub by setting
> head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
> processing each page and stops its work as soon as it sees it.

Explanation sounds fine, but didn't you earlier indicate you think
yourself that the title is not really suitable (anymore)? I'm of
that opinion, at least, as pages are always available now, it's just
that the latency to get hold of the heap lock is higher before this
change than what we want it to be.

Looking at the patch itself is not overly useful as it seems without
the disposition of patch 5 being clear.

Jan
Boris Ostrovsky May 4, 2017, 5:26 p.m. UTC | #2
On 05/04/2017 12:03 PM, Jan Beulich wrote:
>>>> On 14.04.17 at 17:37, <boris.ostrovsky@oracle.com> wrote:
>> Instead of scrubbing pages while holding heap lock we can mark
>> buddy's head as being scrubbed and drop the lock temporarily.
>> If someone (most likely alloc_heap_pages()) tries to access
>> this chunk it will signal the scrubber to abort scrub by setting
>> head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
>> processing each page and stops its work as soon as it sees it.
> Explanation sounds fine, but didn't you earlier indicate you think
> yourself that the title is not really suitable (anymore)? I'm of
> that opinion, at least, as pages are always available now, it's just
> that the latency to get hold of the heap lock is higher before this
> change than what we want it to be.

Uhm... Yes, that's what it meant to say ;-)

Should have been "Keep *heap* available to others while scrubbing"

-boris

>
> Looking at the patch itself is not overly useful as it seems without
> the disposition of patch 5 being clear.
>
> Jan
>
Jan Beulich May 5, 2017, 10:28 a.m. UTC | #3
>>> On 04.05.17 at 19:26, <boris.ostrovsky@oracle.com> wrote:
> On 05/04/2017 12:03 PM, Jan Beulich wrote:
>>>>> On 14.04.17 at 17:37, <boris.ostrovsky@oracle.com> wrote:
>>> Instead of scrubbing pages while holding heap lock we can mark
>>> buddy's head as being scrubbed and drop the lock temporarily.
>>> If someone (most likely alloc_heap_pages()) tries to access
>>> this chunk it will signal the scrubber to abort scrub by setting
>>> head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
>>> processing each page and stops its work as soon as it sees it.
>> Explanation sounds fine, but didn't you earlier indicate you think
>> yourself that the title is not really suitable (anymore)? I'm of
>> that opinion, at least, as pages are always available now, it's just
>> that the latency to get hold of the heap lock is higher before this
>> change than what we want it to be.
> 
> Uhm... Yes, that's what it meant to say ;-)
> 
> Should have been "Keep *heap* available to others while scrubbing"

And perhaps s/available/accessible/ ?

Jan
diff mbox

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 0b2dff1..514a4a1 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@  static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
@@ -780,10 +791,15 @@  static struct page_info *alloc_heap_pages(
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         !pg->u.free.dirty_head )
+                    if ( !pg->u.free.dirty_head )
                         goto found;
 
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        goto found;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -921,6 +937,8 @@  static int reserve_offlined_page(struct page_info *head)
 
     head->u.free.dirty_head = false;
 
+    check_and_stop_scrub(head);
+
     page_list_del(head, &heap(node, zone, head_order));
 
     while ( cur_head < (head + (1 << head_order)) )
@@ -1027,6 +1045,9 @@  merge_and_free_buddy(struct page_info *pg, unsigned int node,
              (phys_to_nid(page_to_maddr(buddy)) != node) )
             break;
 
+        if ( buddy->u.free.scrub_state & PAGE_SCRUBBING )
+            break;
+
         page_list_del(buddy, &heap(node, zone, order));
         need_scrub |= buddy->u.free.dirty_head;
         buddy->u.free.dirty_head = false;
@@ -1098,14 +1119,35 @@  static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
     unsigned int zone, order, scrub_order;
-    unsigned long i, num_processed, start, end;
+    unsigned long i, num_processed, start, end, dirty_cnt;
     unsigned int cpu = smp_processor_id();
     bool preempt = false, is_frag;
     nodeid_t node;
+    struct scrub_wait_state st;
 
     /* Scrubbing granularity. */
 #define SCRUB_CHUNK_ORDER  8
@@ -1134,8 +1176,13 @@  bool scrub_free_pages(void)
                 if ( !pg->u.free.dirty_head )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
                 scrub_order = MIN(order, SCRUB_CHUNK_ORDER);
-                num_processed = 0;
+                num_processed = dirty_cnt = 0;
                 is_frag = false;
                 while ( num_processed < (1UL << order) )
                 {
@@ -1145,8 +1192,24 @@  bool scrub_free_pages(void)
                         if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                         {
                             scrub_one_page(&pg[i]);
+                            /*
+                             * We can modify count_info without holding heap
+                             * lock since we effectively locked this buddy by
+                             * setting its scrub_state.
+                             */
                             pg[i].count_info &= ~PGC_need_scrub;
-                            node_need_scrub[node]--;
+                            dirty_cnt++;
+                        }
+
+                        if ( ACCESS_ONCE(pg->u.free.scrub_state) &
+                             PAGE_SCRUB_ABORT )
+                        {
+                            /* Someone wants this chunk. Drop everything. */
+                            pg->u.free.scrub_state = 0;
+                            spin_lock(&heap_lock);
+                            node_need_scrub[node] -= dirty_cnt;
+                            spin_unlock(&heap_lock);
+                            goto out_nolock;
                         }
                     }
 
@@ -1159,11 +1222,20 @@  bool scrub_free_pages(void)
                     }
                 }
 
-                start = 0;
-                end = num_processed;
+                st.pg = pg;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
 
                 page_list_del(pg, &heap(node, zone, order));
 
+                start = 0;
+                end = num_processed;
+
                 /* Merge clean pages */
                 while ( start < end )
                 {
@@ -1194,6 +1266,8 @@  bool scrub_free_pages(void)
                     end += (1UL << chunk_order);
                 }
 
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1202,6 +1276,8 @@  bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1240,6 +1316,8 @@  static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state = 0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index abc3f6b..b333b16 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -43,6 +43,10 @@  struct page_info
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+#define PAGE_SCRUBBING      (1<<0)
+#define PAGE_SCRUB_ABORT    (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
             /* Set on a buddy head if the buddy has unscrubbed pages. */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 5cf528a..d00c4a1 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -87,6 +87,10 @@  struct page_info
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
+#define PAGE_SCRUBBING      (1<<0)
+#define PAGE_SCRUB_ABORT    (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
             /* Set on a buddy head if the buddy has unscrubbed pages. */