@@ -694,6 +694,17 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
+static void check_and_stop_scrub(struct page_info *head)
+{
+ if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+ {
+ head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+ spin_lock_kick();
+ while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+ cpu_relax();
+ }
+}
+
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
unsigned int zone_lo, unsigned int zone_hi,
@@ -780,10 +791,15 @@ static struct page_info *alloc_heap_pages(
{
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
{
- if ( (order == 0) || use_unscrubbed ||
- !pg->u.free.dirty_head )
+ if ( !pg->u.free.dirty_head )
goto found;
+ if ( (order == 0) || use_unscrubbed )
+ {
+ check_and_stop_scrub(pg);
+ goto found;
+ }
+
page_list_add_tail(pg, &heap(node, zone, j));
}
}
@@ -921,6 +937,8 @@ static int reserve_offlined_page(struct page_info *head)
head->u.free.dirty_head = false;
+ check_and_stop_scrub(head);
+
page_list_del(head, &heap(node, zone, head_order));
while ( cur_head < (head + (1 << head_order)) )
@@ -1027,6 +1045,9 @@ merge_and_free_buddy(struct page_info *pg, unsigned int node,
(phys_to_nid(page_to_maddr(buddy)) != node) )
break;
+ if ( buddy->u.free.scrub_state & PAGE_SCRUBBING )
+ break;
+
page_list_del(buddy, &heap(node, zone, order));
need_scrub |= buddy->u.free.dirty_head;
buddy->u.free.dirty_head = false;
@@ -1098,14 +1119,35 @@ static unsigned int node_to_scrub(bool get_node)
return closest;
}
+struct scrub_wait_state {
+ struct page_info *pg;
+ bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+ struct scrub_wait_state *st = data;
+
+ if ( st->drop )
+ return;
+
+ if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+ {
+ /* There is a waiter for this buddy. Release it. */
+ st->drop = true;
+ st->pg->u.free.scrub_state = 0;
+ }
+}
+
bool scrub_free_pages(void)
{
struct page_info *pg;
unsigned int zone, order, scrub_order;
- unsigned long i, num_processed, start, end;
+ unsigned long i, num_processed, start, end, dirty_cnt;
unsigned int cpu = smp_processor_id();
bool preempt = false, is_frag;
nodeid_t node;
+ struct scrub_wait_state st;
/* Scrubbing granularity. */
#define SCRUB_CHUNK_ORDER 8
@@ -1134,8 +1176,13 @@ bool scrub_free_pages(void)
if ( !pg->u.free.dirty_head )
break;
+ ASSERT(!pg->u.free.scrub_state);
+ pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+ spin_unlock(&heap_lock);
+
scrub_order = MIN(order, SCRUB_CHUNK_ORDER);
- num_processed = 0;
+ num_processed = dirty_cnt = 0;
is_frag = false;
while ( num_processed < (1UL << order) )
{
@@ -1145,8 +1192,24 @@ bool scrub_free_pages(void)
if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
{
scrub_one_page(&pg[i]);
+ /*
+ * We can modify count_info without holding heap
+ * lock since we effectively locked this buddy by
+ * setting its scrub_state.
+ */
pg[i].count_info &= ~PGC_need_scrub;
- node_need_scrub[node]--;
+ dirty_cnt++;
+ }
+
+ if ( ACCESS_ONCE(pg->u.free.scrub_state) &
+ PAGE_SCRUB_ABORT )
+ {
+ /* Someone wants this chunk. Drop everything. */
+ pg->u.free.scrub_state = 0;
+ spin_lock(&heap_lock);
+ node_need_scrub[node] -= dirty_cnt;
+ spin_unlock(&heap_lock);
+ goto out_nolock;
}
}
@@ -1159,11 +1222,20 @@ bool scrub_free_pages(void)
}
}
- start = 0;
- end = num_processed;
+ st.pg = pg;
+ st.drop = false;
+ spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+ node_need_scrub[node] -= dirty_cnt;
+
+ if ( st.drop )
+ goto out;
page_list_del(pg, &heap(node, zone, order));
+ start = 0;
+ end = num_processed;
+
/* Merge clean pages */
while ( start < end )
{
@@ -1194,6 +1266,8 @@ bool scrub_free_pages(void)
end += (1UL << chunk_order);
}
+ pg->u.free.scrub_state = 0;
+
if ( preempt || (node_need_scrub[node] == 0) )
goto out;
}
@@ -1202,6 +1276,8 @@ bool scrub_free_pages(void)
out:
spin_unlock(&heap_lock);
+
+ out_nolock:
node_clear(node, node_scrubbing);
return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
}
@@ -1240,6 +1316,8 @@ static void free_heap_pages(
if ( page_state_is(&pg[i], offlined) )
tainted = 1;
+ pg[i].u.free.scrub_state = 0;
+
/* If a page has no owner it will need no safety TLB flush. */
pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
if ( pg[i].u.free.need_tlbflush )
@@ -43,6 +43,10 @@ struct page_info
} inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
+#define PAGE_SCRUBBING (1<<0)
+#define PAGE_SCRUB_ABORT (1<<1)
+ unsigned char scrub_state;
+
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
/* Set on a buddy head if the buddy has unscrubbed pages. */
@@ -87,6 +87,10 @@ struct page_info
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
+#define PAGE_SCRUBBING (1<<0)
+#define PAGE_SCRUB_ABORT (1<<1)
+ unsigned char scrub_state;
+
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
/* Set on a buddy head if the buddy has unscrubbed pages. */
Instead of scrubbing pages while holding heap lock we can mark buddy's head as being scrubbed and drop the lock temporarily. If someone (most likely alloc_heap_pages()) tries to access this chunk it will signal the scrubber to abort scrub by setting head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after processing each page and stops its work as soon as it sees it. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- Changes in v3: * Adjusted page_info's scrub_state definitions but kept them as binary flags since I think having both PAGE_SCRUBBING and PAGE_SCRUB_ABORT bits set make sense. xen/common/page_alloc.c | 92 ++++++++++++++++++++++++++++++++++++++++++--- xen/include/asm-arm/mm.h | 4 ++ xen/include/asm-x86/mm.h | 4 ++ 3 files changed, 93 insertions(+), 7 deletions(-)