@@ -687,6 +687,7 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
{
PFN_ORDER(pg) = order;
pg->u.free.first_dirty = first_dirty;
+ pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
if ( first_dirty != INVALID_DIRTY_IDX )
page_list_add_tail(pg, &heap(node, zone, order));
@@ -694,6 +695,25 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
+static void check_and_stop_scrub(struct page_info *head)
+{
+ if ( head->u.free.scrub_state == BUDDY_SCRUBBING )
+ {
+ struct page_info pg;
+
+ head->u.free.scrub_state = BUDDY_SCRUB_ABORT;
+ spin_lock_kick();
+ for ( ; ; )
+ {
+ /* Can't ACCESS_ONCE() a bitfield. */
+ pg.u.free.val = ACCESS_ONCE(head->u.free.val);
+ if ( pg.u.free.scrub_state != BUDDY_SCRUB_ABORT )
+ break;
+ cpu_relax();
+ }
+ }
+}
+
static struct page_info *get_free_buddy(unsigned int zone_lo,
unsigned int zone_hi,
unsigned int order, unsigned int memflags,
@@ -738,14 +758,19 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
{
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
{
+ if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+ return pg;
/*
* We grab single pages (order=0) even if they are
* unscrubbed. Given that scrubbing one page is fairly quick
* it is not worth breaking higher orders.
*/
- if ( (order == 0) || use_unscrubbed ||
- pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+ if ( (order == 0) || use_unscrubbed )
+ {
+ check_and_stop_scrub(pg);
return pg;
+ }
+
page_list_add_tail(pg, &heap(node, zone, j));
}
}
@@ -928,6 +953,7 @@ static int reserve_offlined_page(struct page_info *head)
cur_head = head;
+ check_and_stop_scrub(head);
/*
* We may break the buddy so let's mark the head as clean. Then, when
* merging chunks back into the heap, we will see whether the chunk has
@@ -1084,6 +1110,29 @@ static unsigned int node_to_scrub(bool get_node)
return closest;
}
+struct scrub_wait_state {
+ struct page_info *pg;
+ unsigned int first_dirty;
+ bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+ struct scrub_wait_state *st = data;
+
+ if ( st->drop )
+ return;
+
+ if ( st->pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+ {
+ /* There is a waiter for this buddy. Release it. */
+ st->drop = true;
+ st->pg->u.free.first_dirty = st->first_dirty;
+ smp_wmb();
+ st->pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+ }
+}
+
bool scrub_free_pages(void)
{
struct page_info *pg;
@@ -1106,25 +1155,53 @@ bool scrub_free_pages(void)
do {
while ( !page_list_empty(&heap(node, zone, order)) )
{
- unsigned int i;
+ unsigned int i, dirty_cnt;
+ struct scrub_wait_state st;
/* Unscrubbed pages are always at the end of the list. */
pg = page_list_last(&heap(node, zone, order));
if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
break;
+ ASSERT(!pg->u.free.scrub_state);
+ pg->u.free.scrub_state = BUDDY_SCRUBBING;
+
+ spin_unlock(&heap_lock);
+
+ dirty_cnt = 0;
+
for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
{
if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
{
scrub_one_page(&pg[i]);
+ /*
+ * We can modify count_info without holding heap
+ * lock since we effectively locked this buddy by
+ * setting its scrub_state.
+ */
pg[i].count_info &= ~PGC_need_scrub;
- node_need_scrub[node]--;
+ dirty_cnt++;
cnt += 100; /* scrubbed pages add heavier weight. */
}
else
cnt++;
+ if ( pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+ {
+ /* Someone wants this chunk. Drop everything. */
+
+ pg->u.free.first_dirty = (i == (1U << order) - 1) ?
+ INVALID_DIRTY_IDX : i + 1;
+ smp_wmb();
+ pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
+ spin_lock(&heap_lock);
+ node_need_scrub[node] -= dirty_cnt;
+ spin_unlock(&heap_lock);
+ goto out_nolock;
+ }
+
/*
* Scrub a few (8) pages before becoming eligible for
* preemption. But also count non-scrubbing loop iterations
@@ -1138,6 +1215,17 @@ bool scrub_free_pages(void)
}
}
+ st.pg = pg;
+ st.first_dirty = (i >= (1UL << order) - 1) ?
+ INVALID_DIRTY_IDX : i + 1;
+ st.drop = false;
+ spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+ node_need_scrub[node] -= dirty_cnt;
+
+ if ( st.drop )
+ goto out;
+
if ( i >= (1U << order) - 1 )
{
page_list_del(pg, &heap(node, zone, order));
@@ -1146,6 +1234,8 @@ bool scrub_free_pages(void)
else
pg->u.free.first_dirty = i + 1;
+ pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
if ( preempt || (node_need_scrub[node] == 0) )
goto out;
}
@@ -1154,6 +1244,8 @@ bool scrub_free_pages(void)
out:
spin_unlock(&heap_lock);
+
+ out_nolock:
node_clear(node, node_scrubbing);
return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
}
@@ -1235,6 +1327,8 @@ static void free_heap_pages(
(phys_to_nid(page_to_maddr(predecessor)) != node) )
break;
+ check_and_stop_scrub(predecessor);
+
page_list_del(predecessor, &heap(node, zone, order));
if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1256,6 +1350,9 @@ static void free_heap_pages(
(PFN_ORDER(successor) != order) ||
(phys_to_nid(page_to_maddr(successor)) != node) )
break;
+
+ check_and_stop_scrub(successor);
+
page_list_del(successor, &heap(node, zone, order));
if ( successor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -42,18 +42,26 @@ struct page_info
unsigned long type_info;
} inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
- struct {
- /* Do TLBs need flushing for safety before next page use? */
- unsigned long need_tlbflush:1;
-
- /*
- * Index of the first *possibly* unscrubbed page in the buddy.
- * One more than maximum possible order (MAX_ORDER+1) to
- * accommodate INVALID_DIRTY_IDX.
- */
+ union {
+ struct {
+ /* Do TLBs need flushing for safety before next page use? */
+ unsigned long need_tlbflush:1;
+
+ /*
+ * Index of the first *possibly* unscrubbed page in the buddy.
+ * One more than maximum possible order (MAX_ORDER+1) to
+ * accommodate INVALID_DIRTY_IDX.
+ */
#define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
- unsigned long first_dirty:MAX_ORDER + 2;
+ unsigned long first_dirty:MAX_ORDER + 2;
+
+#define BUDDY_NOT_SCRUBBING 0
+#define BUDDY_SCRUBBING 1
+#define BUDDY_SCRUB_ABORT 2
+ unsigned long scrub_state:2;
+ };
+ unsigned long val;
} free;
} u;
@@ -86,17 +86,26 @@ struct page_info
} sh;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
- struct {
- /* Do TLBs need flushing for safety before next page use? */
- unsigned long need_tlbflush:1;
-
- /*
- * Index of the first *possibly* unscrubbed page in the buddy.
- * One more than maximum possible order (MAX_ORDER+1) to
- * accommodate INVALID_DIRTY_IDX.
- */
+ union {
+ struct {
+ /* Do TLBs need flushing for safety before next page use? */
+ unsigned long need_tlbflush:1;
+
+ /*
+ * Index of the first *possibly* unscrubbed page in the buddy.
+ * One more than maximum possible order (MAX_ORDER+1) to
+ * accommodate INVALID_DIRTY_IDX.
+ */
#define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
- unsigned long first_dirty:MAX_ORDER + 2;
+ unsigned long first_dirty:MAX_ORDER + 2;
+
+#define BUDDY_NOT_SCRUBBING 0
+#define BUDDY_SCRUBBING 1
+#define BUDDY_SCRUB_ABORT 2
+ unsigned long scrub_state:2;
+ };
+
+ unsigned long val;
} free;
} u;
Instead of scrubbing pages while holding heap lock we can mark buddy's head as being scrubbed and drop the lock temporarily. If someone (most likely alloc_heap_pages()) tries to access this chunk it will signal the scrubber to abort scrub by setting head's BUDDY_SCRUB_ABORT bit. The scrubber checks this bit after processing each page and stops its work as soon as it sees it. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- Changes in v5: * Fixed off-by-one error in setting first_dirty * Changed struct page_info.u.free to a union to permit use of ACCESS_ONCE in check_and_stop_scrub() * Renamed PAGE_SCRUBBING etc. macros to BUDDY_SCRUBBING etc xen/common/page_alloc.c | 105 +++++++++++++++++++++++++++++++++++++++++++++-- xen/include/asm-arm/mm.h | 28 ++++++++----- xen/include/asm-x86/mm.h | 29 ++++++++----- 3 files changed, 138 insertions(+), 24 deletions(-)