Message ID | 1502908394-9760-2-git-send-email-boris.ostrovsky@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Boris, On 16/08/17 19:33, Boris Ostrovsky wrote: > .. so that it's easy to find pages that need to be scrubbed (those pages are > now marked with _PGC_need_scrub bit). > > We keep track of the first unscrubbed page in a page buddy using first_dirty > field. For now it can have two values, 0 (whole buddy needs scrubbing) or > INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches > will allow scrubbing to be interrupted, resulting in first_dirty taking any > value. > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> For the ARM bits: Acked-by: Julien Grall <julien.grall@arm.com> Cheers, > --- > Changes in v8: > * Changed x86's definition of page_info.u.free from using bitfields to natural > datatypes > * Swapped order of bitfields in page_info.u.free for ARM > * Added BUILD_BUG_ON to check page_info.u.free.first_dirty size on x86, moved > previously defined BUILD_BUG_ON from init_heap_pages() to init_boot_pages() > (to avoid introducing extra '#ifdef x86' and to keep both together) > > xen/common/page_alloc.c | 159 ++++++++++++++++++++++++++++++++++++++++------- > xen/include/asm-arm/mm.h | 17 ++++- > xen/include/asm-x86/mm.h | 15 +++++ > 3 files changed, 167 insertions(+), 24 deletions(-) > > diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c > index 444ecf3..a39fd81 100644 > --- a/xen/common/page_alloc.c > +++ b/xen/common/page_alloc.c > @@ -261,7 +261,11 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe) > #ifdef CONFIG_X86 > const unsigned long *badpage = NULL; > unsigned int i, array_size; > + > + BUILD_BUG_ON(8 * sizeof(((struct page_info *)0)->u.free.first_dirty) < > + MAX_ORDER + 1); > #endif > + BUILD_BUG_ON(sizeof(((struct page_info *)0)->u) != sizeof(unsigned long)); > > ps = round_pgup(ps); > pe = round_pgdown(pe); > @@ -375,6 +379,8 @@ typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; > static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; > #define heap(node, zone, order) ((*_heap[node])[zone][order]) > > +static unsigned long node_need_scrub[MAX_NUMNODES]; > + > static unsigned long *avail[MAX_NUMNODES]; > static long total_avail_pages; > > @@ -670,13 +676,30 @@ static void check_low_mem_virq(void) > } > } > > +/* Pages that need a scrub are added to tail, otherwise to head. */ > +static void page_list_add_scrub(struct page_info *pg, unsigned int node, > + unsigned int zone, unsigned int order, > + unsigned int first_dirty) > +{ > + PFN_ORDER(pg) = order; > + pg->u.free.first_dirty = first_dirty; > + > + if ( first_dirty != INVALID_DIRTY_IDX ) > + { > + ASSERT(first_dirty < (1U << order)); > + page_list_add_tail(pg, &heap(node, zone, order)); > + } > + else > + page_list_add(pg, &heap(node, zone, order)); > +} > + > /* Allocate 2^@order contiguous pages. */ > static struct page_info *alloc_heap_pages( > unsigned int zone_lo, unsigned int zone_hi, > unsigned int order, unsigned int memflags, > struct domain *d) > { > - unsigned int i, j, zone = 0, nodemask_retry = 0; > + unsigned int i, j, zone = 0, nodemask_retry = 0, first_dirty; > nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node; > unsigned long request = 1UL << order; > struct page_info *pg; > @@ -790,12 +813,26 @@ static struct page_info *alloc_heap_pages( > return NULL; > > found: > + > + first_dirty = pg->u.free.first_dirty; > + > /* We may have to halve the chunk a number of times. */ > while ( j != order ) > { > - PFN_ORDER(pg) = --j; > - page_list_add_tail(pg, &heap(node, zone, j)); > - pg += 1 << j; > + j--; > + page_list_add_scrub(pg, node, zone, j, > + (1U << j) > first_dirty ? > + first_dirty : INVALID_DIRTY_IDX); > + pg += 1U << j; > + > + if ( first_dirty != INVALID_DIRTY_IDX ) > + { > + /* Adjust first_dirty */ > + if ( first_dirty >= 1U << j ) > + first_dirty -= 1U << j; > + else > + first_dirty = 0; /* We've moved past original first_dirty */ > + } > } > > ASSERT(avail[node][zone] >= request); > @@ -842,12 +879,20 @@ static int reserve_offlined_page(struct page_info *head) > unsigned int node = phys_to_nid(page_to_maddr(head)); > int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0; > struct page_info *cur_head; > - int cur_order; > + unsigned int cur_order, first_dirty; > > ASSERT(spin_is_locked(&heap_lock)); > > cur_head = head; > > + /* > + * We may break the buddy so let's mark the head as clean. Then, when > + * merging chunks back into the heap, we will see whether the chunk has > + * unscrubbed pages and set its first_dirty properly. > + */ > + first_dirty = head->u.free.first_dirty; > + head->u.free.first_dirty = INVALID_DIRTY_IDX; > + > page_list_del(head, &heap(node, zone, head_order)); > > while ( cur_head < (head + (1 << head_order)) ) > @@ -858,6 +903,8 @@ static int reserve_offlined_page(struct page_info *head) > if ( page_state_is(cur_head, offlined) ) > { > cur_head++; > + if ( first_dirty != INVALID_DIRTY_IDX && first_dirty ) > + first_dirty--; > continue; > } > > @@ -884,9 +931,20 @@ static int reserve_offlined_page(struct page_info *head) > { > merge: > /* We don't consider merging outside the head_order. */ > - page_list_add_tail(cur_head, &heap(node, zone, cur_order)); > - PFN_ORDER(cur_head) = cur_order; > + page_list_add_scrub(cur_head, node, zone, cur_order, > + (1U << cur_order) > first_dirty ? > + first_dirty : INVALID_DIRTY_IDX); > cur_head += (1 << cur_order); > + > + /* Adjust first_dirty if needed. */ > + if ( first_dirty != INVALID_DIRTY_IDX ) > + { > + if ( first_dirty >= 1U << cur_order ) > + first_dirty -= 1U << cur_order; > + else > + first_dirty = 0; > + } > + > break; > } > } > @@ -911,9 +969,53 @@ static int reserve_offlined_page(struct page_info *head) > return count; > } > > +static void scrub_free_pages(unsigned int node) > +{ > + struct page_info *pg; > + unsigned int zone; > + > + ASSERT(spin_is_locked(&heap_lock)); > + > + if ( !node_need_scrub[node] ) > + return; > + > + for ( zone = 0; zone < NR_ZONES; zone++ ) > + { > + unsigned int order = MAX_ORDER; > + > + do { > + while ( !page_list_empty(&heap(node, zone, order)) ) > + { > + unsigned int i; > + > + /* Unscrubbed pages are always at the end of the list. */ > + pg = page_list_last(&heap(node, zone, order)); > + if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX ) > + break; > + > + for ( i = pg->u.free.first_dirty; i < (1U << order); i++) > + { > + if ( test_bit(_PGC_need_scrub, &pg[i].count_info) ) > + { > + scrub_one_page(&pg[i]); > + pg[i].count_info &= ~PGC_need_scrub; > + node_need_scrub[node]--; > + } > + } > + > + page_list_del(pg, &heap(node, zone, order)); > + page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX); > + > + if ( node_need_scrub[node] == 0 ) > + return; > + } > + } while ( order-- != 0 ); > + } > +} > + > /* Free 2^@order set of pages. */ > static void free_heap_pages( > - struct page_info *pg, unsigned int order) > + struct page_info *pg, unsigned int order, bool need_scrub) > { > unsigned long mask, mfn = page_to_mfn(pg); > unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0; > @@ -953,10 +1055,20 @@ static void free_heap_pages( > /* This page is not a guest frame any more. */ > page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ > set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY); > + > + if ( need_scrub ) > + pg[i].count_info |= PGC_need_scrub; > } > > avail[node][zone] += 1 << order; > total_avail_pages += 1 << order; > + if ( need_scrub ) > + { > + node_need_scrub[node] += 1 << order; > + pg->u.free.first_dirty = 0; > + } > + else > + pg->u.free.first_dirty = INVALID_DIRTY_IDX; > > if ( tmem_enabled() ) > midsize_alloc_zone_pages = max( > @@ -980,6 +1092,12 @@ static void free_heap_pages( > > page_list_del(predecessor, &heap(node, zone, order)); > > + /* Keep predecessor's first_dirty if it is already set. */ > + if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX && > + pg->u.free.first_dirty != INVALID_DIRTY_IDX ) > + predecessor->u.free.first_dirty = (1U << order) + > + pg->u.free.first_dirty; > + > pg = predecessor; > } > else > @@ -999,12 +1117,14 @@ static void free_heap_pages( > order++; > } > > - PFN_ORDER(pg) = order; > - page_list_add_tail(pg, &heap(node, zone, order)); > + page_list_add_scrub(pg, node, zone, order, pg->u.free.first_dirty); > > if ( tainted ) > reserve_offlined_page(pg); > > + if ( need_scrub ) > + scrub_free_pages(node); > + > spin_unlock(&heap_lock); > } > > @@ -1225,7 +1345,7 @@ unsigned int online_page(unsigned long mfn, uint32_t *status) > spin_unlock(&heap_lock); > > if ( (y & PGC_state) == PGC_state_offlined ) > - free_heap_pages(pg, 0); > + free_heap_pages(pg, 0, false); > > return ret; > } > @@ -1294,7 +1414,7 @@ static void init_heap_pages( > nr_pages -= n; > } > > - free_heap_pages(pg+i, 0); > + free_heap_pages(pg + i, 0, false); > } > } > > @@ -1621,7 +1741,7 @@ void free_xenheap_pages(void *v, unsigned int order) > > memguard_guard_range(v, 1 << (order + PAGE_SHIFT)); > > - free_heap_pages(virt_to_page(v), order); > + free_heap_pages(virt_to_page(v), order, false); > } > > #else > @@ -1675,12 +1795,9 @@ void free_xenheap_pages(void *v, unsigned int order) > pg = virt_to_page(v); > > for ( i = 0; i < (1u << order); i++ ) > - { > - scrub_one_page(&pg[i]); > pg[i].count_info &= ~PGC_xen_heap; > - } > > - free_heap_pages(pg, order); > + free_heap_pages(pg, order, true); > } > > #endif > @@ -1789,7 +1906,7 @@ struct page_info *alloc_domheap_pages( > if ( d && !(memflags & MEMF_no_owner) && > assign_pages(d, pg, order, memflags) ) > { > - free_heap_pages(pg, order); > + free_heap_pages(pg, order, false); > return NULL; > } > > @@ -1857,11 +1974,7 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) > scrub = 1; > } > > - if ( unlikely(scrub) ) > - for ( i = 0; i < (1 << order); i++ ) > - scrub_one_page(&pg[i]); > - > - free_heap_pages(pg, order); > + free_heap_pages(pg, order, scrub); > } > > if ( drop_dom_ref ) > diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h > index ef84b72..3b3d38f 100644 > --- a/xen/include/asm-arm/mm.h > +++ b/xen/include/asm-arm/mm.h > @@ -43,8 +43,16 @@ struct page_info > } inuse; > /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ > struct { > + /* > + * Index of the first *possibly* unscrubbed page in the buddy. > + * One more bit than maximum possible order to accommodate > + * INVALID_DIRTY_IDX. > + */ > +#define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1) > + unsigned long first_dirty:MAX_ORDER + 1; > + > /* Do TLBs need flushing for safety before next page use? */ > - bool_t need_tlbflush; > + bool need_tlbflush:1; > } free; > > } u; > @@ -107,6 +115,13 @@ struct page_info > #define PGC_count_width PG_shift(9) > #define PGC_count_mask ((1UL<<PGC_count_width)-1) > > +/* > + * Page needs to be scrubbed. Since this bit can only be set on a page that is > + * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit. > + */ > +#define _PGC_need_scrub _PGC_allocated > +#define PGC_need_scrub PGC_allocated > + > extern mfn_t xenheap_mfn_start, xenheap_mfn_end; > extern vaddr_t xenheap_virt_end; > #ifdef CONFIG_ARM_64 > diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h > index 2bf3f33..86b1723 100644 > --- a/xen/include/asm-x86/mm.h > +++ b/xen/include/asm-x86/mm.h > @@ -87,6 +87,14 @@ struct page_info > > /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ > struct { > + /* > + * Index of the first *possibly* unscrubbed page in the buddy. > + * One more bit than maximum possible order to accommodate > + * INVALID_DIRTY_IDX. > + */ > +#define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1) > + unsigned int first_dirty; > + > /* Do TLBs need flushing for safety before next page use? */ > bool_t need_tlbflush; > } free; > @@ -233,6 +241,13 @@ struct page_info > #define PGC_count_width PG_shift(9) > #define PGC_count_mask ((1UL<<PGC_count_width)-1) > > +/* > + * Page needs to be scrubbed. Since this bit can only be set on a page that is > + * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit. > + */ > +#define _PGC_need_scrub _PGC_allocated > +#define PGC_need_scrub PGC_allocated > + > #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap) > #define is_xen_heap_mfn(mfn) \ > (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn))) >
>>> On 16.08.17 at 20:33, <boris.ostrovsky@oracle.com> wrote: > .. so that it's easy to find pages that need to be scrubbed (those pages are > now marked with _PGC_need_scrub bit). > > We keep track of the first unscrubbed page in a page buddy using first_dirty > field. For now it can have two values, 0 (whole buddy needs scrubbing) or > INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches > will allow scrubbing to be interrupted, resulting in first_dirty taking any > value. > > Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> with one remark: > --- a/xen/common/page_alloc.c > +++ b/xen/common/page_alloc.c > @@ -261,7 +261,11 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe) > #ifdef CONFIG_X86 > const unsigned long *badpage = NULL; > unsigned int i, array_size; > + > + BUILD_BUG_ON(8 * sizeof(((struct page_info *)0)->u.free.first_dirty) < > + MAX_ORDER + 1); > #endif > + BUILD_BUG_ON(sizeof(((struct page_info *)0)->u) != sizeof(unsigned long)); As I'm generally opposed to casts whenever one can get away without, I dislike these as well. In the case here, short of a local variable of suitable type, I'd suggest using frame_table instead of the open-coded cast. If you're fine with that, this can easily be done while committing. Jan
On 08/18/2017 05:11 AM, Jan Beulich wrote: >>>> On 16.08.17 at 20:33, <boris.ostrovsky@oracle.com> wrote: >> .. so that it's easy to find pages that need to be scrubbed (those pages are >> now marked with _PGC_need_scrub bit). >> >> We keep track of the first unscrubbed page in a page buddy using first_dirty >> field. For now it can have two values, 0 (whole buddy needs scrubbing) or >> INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches >> will allow scrubbing to be interrupted, resulting in first_dirty taking any >> value. >> >> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> > Reviewed-by: Jan Beulich <jbeulich@suse.com> > with one remark: > >> --- a/xen/common/page_alloc.c >> +++ b/xen/common/page_alloc.c >> @@ -261,7 +261,11 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe) >> #ifdef CONFIG_X86 >> const unsigned long *badpage = NULL; >> unsigned int i, array_size; >> + >> + BUILD_BUG_ON(8 * sizeof(((struct page_info *)0)->u.free.first_dirty) < >> + MAX_ORDER + 1); >> #endif >> + BUILD_BUG_ON(sizeof(((struct page_info *)0)->u) != sizeof(unsigned long)); > As I'm generally opposed to casts whenever one can get away > without, I dislike these as well. In the case here, short of a local > variable of suitable type, I'd suggest using frame_table instead > of the open-coded cast. If you're fine with that, this can easily > be done while committing. Sure. -boris
>>> On 17.08.17 at 12:30, <julien.grall@arm.com> wrote: > On 16/08/17 19:33, Boris Ostrovsky wrote: >> .. so that it's easy to find pages that need to be scrubbed (those pages are >> now marked with _PGC_need_scrub bit). >> >> We keep track of the first unscrubbed page in a page buddy using first_dirty >> field. For now it can have two values, 0 (whole buddy needs scrubbing) or >> INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent > patches >> will allow scrubbing to be interrupted, resulting in first_dirty taking any >> value. >> >> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> > > For the ARM bits: > > Acked-by: Julien Grall <julien.grall@arm.com> I've started committing the series when I noticed patches 4, 5, and 6 are still lacking ARM side acks. Jan
Hi Jan, On 21/08/17 14:49, Jan Beulich wrote: >>>> On 17.08.17 at 12:30, <julien.grall@arm.com> wrote: >> On 16/08/17 19:33, Boris Ostrovsky wrote: >>> .. so that it's easy to find pages that need to be scrubbed (those pages are >>> now marked with _PGC_need_scrub bit). >>> >>> We keep track of the first unscrubbed page in a page buddy using first_dirty >>> field. For now it can have two values, 0 (whole buddy needs scrubbing) or >>> INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent >> patches >>> will allow scrubbing to be interrupted, resulting in first_dirty taking any >>> value. >>> >>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> >> >> For the ARM bits: >> >> Acked-by: Julien Grall <julien.grall@arm.com> > > I've started committing the series when I noticed patches 4, 5, and 6 > are still lacking ARM side acks. Whoops, thank you for the remainder. You can ack my ack on the 4, 5, and 6: Acked-by: Julien Grall <julien.grall@arm.com> Cheers,
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 444ecf3..a39fd81 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -261,7 +261,11 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe) #ifdef CONFIG_X86 const unsigned long *badpage = NULL; unsigned int i, array_size; + + BUILD_BUG_ON(8 * sizeof(((struct page_info *)0)->u.free.first_dirty) < + MAX_ORDER + 1); #endif + BUILD_BUG_ON(sizeof(((struct page_info *)0)->u) != sizeof(unsigned long)); ps = round_pgup(ps); pe = round_pgdown(pe); @@ -375,6 +379,8 @@ typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; #define heap(node, zone, order) ((*_heap[node])[zone][order]) +static unsigned long node_need_scrub[MAX_NUMNODES]; + static unsigned long *avail[MAX_NUMNODES]; static long total_avail_pages; @@ -670,13 +676,30 @@ static void check_low_mem_virq(void) } } +/* Pages that need a scrub are added to tail, otherwise to head. */ +static void page_list_add_scrub(struct page_info *pg, unsigned int node, + unsigned int zone, unsigned int order, + unsigned int first_dirty) +{ + PFN_ORDER(pg) = order; + pg->u.free.first_dirty = first_dirty; + + if ( first_dirty != INVALID_DIRTY_IDX ) + { + ASSERT(first_dirty < (1U << order)); + page_list_add_tail(pg, &heap(node, zone, order)); + } + else + page_list_add(pg, &heap(node, zone, order)); +} + /* Allocate 2^@order contiguous pages. */ static struct page_info *alloc_heap_pages( unsigned int zone_lo, unsigned int zone_hi, unsigned int order, unsigned int memflags, struct domain *d) { - unsigned int i, j, zone = 0, nodemask_retry = 0; + unsigned int i, j, zone = 0, nodemask_retry = 0, first_dirty; nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node; unsigned long request = 1UL << order; struct page_info *pg; @@ -790,12 +813,26 @@ static struct page_info *alloc_heap_pages( return NULL; found: + + first_dirty = pg->u.free.first_dirty; + /* We may have to halve the chunk a number of times. */ while ( j != order ) { - PFN_ORDER(pg) = --j; - page_list_add_tail(pg, &heap(node, zone, j)); - pg += 1 << j; + j--; + page_list_add_scrub(pg, node, zone, j, + (1U << j) > first_dirty ? + first_dirty : INVALID_DIRTY_IDX); + pg += 1U << j; + + if ( first_dirty != INVALID_DIRTY_IDX ) + { + /* Adjust first_dirty */ + if ( first_dirty >= 1U << j ) + first_dirty -= 1U << j; + else + first_dirty = 0; /* We've moved past original first_dirty */ + } } ASSERT(avail[node][zone] >= request); @@ -842,12 +879,20 @@ static int reserve_offlined_page(struct page_info *head) unsigned int node = phys_to_nid(page_to_maddr(head)); int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0; struct page_info *cur_head; - int cur_order; + unsigned int cur_order, first_dirty; ASSERT(spin_is_locked(&heap_lock)); cur_head = head; + /* + * We may break the buddy so let's mark the head as clean. Then, when + * merging chunks back into the heap, we will see whether the chunk has + * unscrubbed pages and set its first_dirty properly. + */ + first_dirty = head->u.free.first_dirty; + head->u.free.first_dirty = INVALID_DIRTY_IDX; + page_list_del(head, &heap(node, zone, head_order)); while ( cur_head < (head + (1 << head_order)) ) @@ -858,6 +903,8 @@ static int reserve_offlined_page(struct page_info *head) if ( page_state_is(cur_head, offlined) ) { cur_head++; + if ( first_dirty != INVALID_DIRTY_IDX && first_dirty ) + first_dirty--; continue; } @@ -884,9 +931,20 @@ static int reserve_offlined_page(struct page_info *head) { merge: /* We don't consider merging outside the head_order. */ - page_list_add_tail(cur_head, &heap(node, zone, cur_order)); - PFN_ORDER(cur_head) = cur_order; + page_list_add_scrub(cur_head, node, zone, cur_order, + (1U << cur_order) > first_dirty ? + first_dirty : INVALID_DIRTY_IDX); cur_head += (1 << cur_order); + + /* Adjust first_dirty if needed. */ + if ( first_dirty != INVALID_DIRTY_IDX ) + { + if ( first_dirty >= 1U << cur_order ) + first_dirty -= 1U << cur_order; + else + first_dirty = 0; + } + break; } } @@ -911,9 +969,53 @@ static int reserve_offlined_page(struct page_info *head) return count; } +static void scrub_free_pages(unsigned int node) +{ + struct page_info *pg; + unsigned int zone; + + ASSERT(spin_is_locked(&heap_lock)); + + if ( !node_need_scrub[node] ) + return; + + for ( zone = 0; zone < NR_ZONES; zone++ ) + { + unsigned int order = MAX_ORDER; + + do { + while ( !page_list_empty(&heap(node, zone, order)) ) + { + unsigned int i; + + /* Unscrubbed pages are always at the end of the list. */ + pg = page_list_last(&heap(node, zone, order)); + if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX ) + break; + + for ( i = pg->u.free.first_dirty; i < (1U << order); i++) + { + if ( test_bit(_PGC_need_scrub, &pg[i].count_info) ) + { + scrub_one_page(&pg[i]); + pg[i].count_info &= ~PGC_need_scrub; + node_need_scrub[node]--; + } + } + + page_list_del(pg, &heap(node, zone, order)); + page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX); + + if ( node_need_scrub[node] == 0 ) + return; + } + } while ( order-- != 0 ); + } +} + /* Free 2^@order set of pages. */ static void free_heap_pages( - struct page_info *pg, unsigned int order) + struct page_info *pg, unsigned int order, bool need_scrub) { unsigned long mask, mfn = page_to_mfn(pg); unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0; @@ -953,10 +1055,20 @@ static void free_heap_pages( /* This page is not a guest frame any more. */ page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY); + + if ( need_scrub ) + pg[i].count_info |= PGC_need_scrub; } avail[node][zone] += 1 << order; total_avail_pages += 1 << order; + if ( need_scrub ) + { + node_need_scrub[node] += 1 << order; + pg->u.free.first_dirty = 0; + } + else + pg->u.free.first_dirty = INVALID_DIRTY_IDX; if ( tmem_enabled() ) midsize_alloc_zone_pages = max( @@ -980,6 +1092,12 @@ static void free_heap_pages( page_list_del(predecessor, &heap(node, zone, order)); + /* Keep predecessor's first_dirty if it is already set. */ + if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX && + pg->u.free.first_dirty != INVALID_DIRTY_IDX ) + predecessor->u.free.first_dirty = (1U << order) + + pg->u.free.first_dirty; + pg = predecessor; } else @@ -999,12 +1117,14 @@ static void free_heap_pages( order++; } - PFN_ORDER(pg) = order; - page_list_add_tail(pg, &heap(node, zone, order)); + page_list_add_scrub(pg, node, zone, order, pg->u.free.first_dirty); if ( tainted ) reserve_offlined_page(pg); + if ( need_scrub ) + scrub_free_pages(node); + spin_unlock(&heap_lock); } @@ -1225,7 +1345,7 @@ unsigned int online_page(unsigned long mfn, uint32_t *status) spin_unlock(&heap_lock); if ( (y & PGC_state) == PGC_state_offlined ) - free_heap_pages(pg, 0); + free_heap_pages(pg, 0, false); return ret; } @@ -1294,7 +1414,7 @@ static void init_heap_pages( nr_pages -= n; } - free_heap_pages(pg+i, 0); + free_heap_pages(pg + i, 0, false); } } @@ -1621,7 +1741,7 @@ void free_xenheap_pages(void *v, unsigned int order) memguard_guard_range(v, 1 << (order + PAGE_SHIFT)); - free_heap_pages(virt_to_page(v), order); + free_heap_pages(virt_to_page(v), order, false); } #else @@ -1675,12 +1795,9 @@ void free_xenheap_pages(void *v, unsigned int order) pg = virt_to_page(v); for ( i = 0; i < (1u << order); i++ ) - { - scrub_one_page(&pg[i]); pg[i].count_info &= ~PGC_xen_heap; - } - free_heap_pages(pg, order); + free_heap_pages(pg, order, true); } #endif @@ -1789,7 +1906,7 @@ struct page_info *alloc_domheap_pages( if ( d && !(memflags & MEMF_no_owner) && assign_pages(d, pg, order, memflags) ) { - free_heap_pages(pg, order); + free_heap_pages(pg, order, false); return NULL; } @@ -1857,11 +1974,7 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) scrub = 1; } - if ( unlikely(scrub) ) - for ( i = 0; i < (1 << order); i++ ) - scrub_one_page(&pg[i]); - - free_heap_pages(pg, order); + free_heap_pages(pg, order, scrub); } if ( drop_dom_ref ) diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index ef84b72..3b3d38f 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -43,8 +43,16 @@ struct page_info } inuse; /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ struct { + /* + * Index of the first *possibly* unscrubbed page in the buddy. + * One more bit than maximum possible order to accommodate + * INVALID_DIRTY_IDX. + */ +#define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1) + unsigned long first_dirty:MAX_ORDER + 1; + /* Do TLBs need flushing for safety before next page use? */ - bool_t need_tlbflush; + bool need_tlbflush:1; } free; } u; @@ -107,6 +115,13 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<<PGC_count_width)-1) +/* + * Page needs to be scrubbed. Since this bit can only be set on a page that is + * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit. + */ +#define _PGC_need_scrub _PGC_allocated +#define PGC_need_scrub PGC_allocated + extern mfn_t xenheap_mfn_start, xenheap_mfn_end; extern vaddr_t xenheap_virt_end; #ifdef CONFIG_ARM_64 diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 2bf3f33..86b1723 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -87,6 +87,14 @@ struct page_info /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ struct { + /* + * Index of the first *possibly* unscrubbed page in the buddy. + * One more bit than maximum possible order to accommodate + * INVALID_DIRTY_IDX. + */ +#define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1) + unsigned int first_dirty; + /* Do TLBs need flushing for safety before next page use? */ bool_t need_tlbflush; } free; @@ -233,6 +241,13 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<<PGC_count_width)-1) +/* + * Page needs to be scrubbed. Since this bit can only be set on a page that is + * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit. + */ +#define _PGC_need_scrub _PGC_allocated +#define PGC_need_scrub PGC_allocated + #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap) #define is_xen_heap_mfn(mfn) \ (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
.. so that it's easy to find pages that need to be scrubbed (those pages are now marked with _PGC_need_scrub bit). We keep track of the first unscrubbed page in a page buddy using first_dirty field. For now it can have two values, 0 (whole buddy needs scrubbing) or INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches will allow scrubbing to be interrupted, resulting in first_dirty taking any value. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- Changes in v8: * Changed x86's definition of page_info.u.free from using bitfields to natural datatypes * Swapped order of bitfields in page_info.u.free for ARM * Added BUILD_BUG_ON to check page_info.u.free.first_dirty size on x86, moved previously defined BUILD_BUG_ON from init_heap_pages() to init_boot_pages() (to avoid introducing extra '#ifdef x86' and to keep both together) xen/common/page_alloc.c | 159 ++++++++++++++++++++++++++++++++++++++++------- xen/include/asm-arm/mm.h | 17 ++++- xen/include/asm-x86/mm.h | 15 +++++ 3 files changed, 167 insertions(+), 24 deletions(-)