[v2] mm/page_alloc: Allow high-order pages to be stored on the per-cpu lists

Message ID	20210611135753.GC30378@techsingularity.net (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=QBBA=LF=kvack.org=owner-linux-mm@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 5296C613FA Date: Fri, 11 Jun 2021 14:57:53 +0100 From: Mel Gorman <mgorman@techsingularity.net> To: Andrew Morton <akpm@linux-foundation.org> Cc: Zi Yan <ziy@nvidia.com>, Dave Hansen <dave.hansen@linux.intel.com>, Vlastimil Babka <vbabka@suse.cz>, Michal Hocko <mhocko@kernel.org>, Jesper Dangaard Brouer <brouer@redhat.com>, LKML <linux-kernel@vger.kernel.org>, Linux-MM <linux-mm@kvack.org>, Mel Gorman <mgorman@techsingularity.net> Subject: [PATCH v2] mm/page_alloc: Allow high-order pages to be stored on the per-cpu lists Message-ID: <20210611135753.GC30378@techsingularity.net> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 Content-Disposition: inline User-Agent: Mutt/1.10.1 (2018-07-13) Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	[v2] mm/page_alloc: Allow high-order pages to be stored on the per-cpu lists \| expand [v2] mm/page_alloc: Allow high-order pages to be stored on the per-cpu lists

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e20d98c62beb..f1bed5b847ec 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -333,6 +333,24 @@ enum zone_watermarks { NR_WMARK }; +/* + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional + * for pageblock size for THP if configured. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define NR_PCP_THP 1 +#else +#define NR_PCP_THP 0 +#endif +#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP)) + +/* + * Shift to encode migratetype and order in the same integer, with order + * in the least significant bits. + */ +#define NR_PCP_ORDER_WIDTH 8 +#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1) + #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) @@ -349,7 +367,7 @@ struct per_cpu_pages { #endif /* Lists of pages, one per migrate type stored on the pcp-lists */ - struct list_head lists[MIGRATE_PCPTYPES]; + struct list_head lists[NR_PCP_LISTS]; }; struct per_cpu_zonestat { diff --git a/mm/internal.h b/mm/internal.h index b2bad3a4c426..935f6baaa847 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -198,7 +198,7 @@ extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; -extern void free_unref_page(struct page *page); +extern void free_unref_page(struct page *page, unsigned int order); extern void free_unref_page_list(struct list_head *list); extern void zone_pcp_update(struct zone *zone, int cpu_online); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f24f509c3ee3..8472bae567f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -676,10 +676,53 @@ static void bad_page(struct page *page, const char *reason) add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } +static inline unsigned int order_to_pindex(int migratetype, int order) +{ + int base = order; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (order > PAGE_ALLOC_COSTLY_ORDER) { + VM_BUG_ON(order != pageblock_order); + base = PAGE_ALLOC_COSTLY_ORDER + 1; + } +#else + VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); +#endif + + return (MIGRATE_PCPTYPES * base) + migratetype; +} + +static inline int pindex_to_order(unsigned int pindex) +{ + int order = pindex / MIGRATE_PCPTYPES; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (order > PAGE_ALLOC_COSTLY_ORDER) { + order = pageblock_order; + VM_BUG_ON(order != pageblock_order); + } +#else + VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); +#endif + + return order; +} + +static inline bool pcp_allowed_order(unsigned int order) +{ + if (order <= PAGE_ALLOC_COSTLY_ORDER) + return true; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (order == pageblock_order) + return true; +#endif + return false; +} + static inline void free_the_page(struct page *page, unsigned int order) { - if (order == 0) /* Via pcp? */ - free_unref_page(page); + if (pcp_allowed_order(order)) /* Via pcp? */ + free_unref_page(page, order); else __free_pages_ok(page, order, FPI_NONE); } @@ -702,7 +745,7 @@ static inline void free_the_page(struct page *page, unsigned int order) void free_compound_page(struct page *page) { mem_cgroup_uncharge(page); - __free_pages_ok(page, compound_order(page), FPI_NONE); + free_the_page(page, compound_order(page)); } void prep_compound_page(struct page *page, unsigned int order) @@ -1352,9 +1395,9 @@ static __always_inline bool free_pages_prepare(struct page *page, * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when * moved from pcp lists to free lists. */ -static bool free_pcp_prepare(struct page *page) +static bool free_pcp_prepare(struct page *page, unsigned int order) { - return free_pages_prepare(page, 0, true, FPI_NONE); + return free_pages_prepare(page, order, true, FPI_NONE); } static bool bulkfree_pcp_prepare(struct page *page) @@ -1371,12 +1414,12 @@ static bool bulkfree_pcp_prepare(struct page *page) * debug_pagealloc enabled, they are checked also immediately when being freed * to the pcp lists. */ -static bool free_pcp_prepare(struct page *page) +static bool free_pcp_prepare(struct page *page, unsigned int order) { if (debug_pagealloc_enabled_static()) - return free_pages_prepare(page, 0, true, FPI_NONE); + return free_pages_prepare(page, order, true, FPI_NONE); else - return free_pages_prepare(page, 0, false, FPI_NONE); + return free_pages_prepare(page, order, false, FPI_NONE); } static bool bulkfree_pcp_prepare(struct page *page) @@ -1408,8 +1451,10 @@ static inline void prefetch_buddy(struct page *page) static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp) { - int migratetype = 0; + int pindex = 0; int batch_free = 0; + int nr_freed = 0; + unsigned int order; int prefetch_nr = READ_ONCE(pcp->batch); bool isolated_pageblocks; struct page *page, *tmp; @@ -1420,7 +1465,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, * below while (list_empty(list)) loop. */ count = min(pcp->count, count); - while (count) { + while (count > 0) { struct list_head *list; /* @@ -1432,24 +1477,31 @@ static void free_pcppages_bulk(struct zone *zone, int count, */ do { batch_free++; - if (++migratetype == MIGRATE_PCPTYPES) - migratetype = 0; - list = &pcp->lists[migratetype]; + if (++pindex == NR_PCP_LISTS) + pindex = 0; + list = &pcp->lists[pindex]; } while (list_empty(list)); /* This is the only non-empty list. Free them all. */ - if (batch_free == MIGRATE_PCPTYPES) + if (batch_free == NR_PCP_LISTS) batch_free = count; + order = pindex_to_order(pindex); + BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH)); do { page = list_last_entry(list, struct page, lru); /* must delete to avoid corrupting pcp list */ list_del(&page->lru); - pcp->count--; + nr_freed += 1 << order; + count -= 1 << order; if (bulkfree_pcp_prepare(page)) continue; + /* Encode order with the migratetype */ + page->index <<= NR_PCP_ORDER_WIDTH; + page->index |= order; + list_add_tail(&page->lru, &head); /* @@ -1465,8 +1517,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, prefetch_buddy(page); prefetch_nr--; } - } while (--count && --batch_free && !list_empty(list)); + } while (count > 0 && --batch_free && !list_empty(list)); } + pcp->count -= nr_freed; /* * local_lock_irq held so equivalent to spin_lock_irqsave for @@ -1481,14 +1534,19 @@ static void free_pcppages_bulk(struct zone *zone, int count, */ list_for_each_entry_safe(page, tmp, &head, lru) { int mt = get_pcppage_migratetype(page); + + /* mt has been encoded with the order (see above) */ + order = mt & NR_PCP_ORDER_MASK; + mt >>= NR_PCP_ORDER_WIDTH; + /* MIGRATE_ISOLATE page should not go to pcplists */ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); /* Pageblock could have been isolated meanwhile */ if (unlikely(isolated_pageblocks)) mt = get_pageblock_migratetype(page); - __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); - trace_mm_page_pcpu_drain(page, 0, mt); + __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE); + trace_mm_page_pcpu_drain(page, order, mt); } spin_unlock(&zone->lock); } @@ -3265,11 +3323,12 @@ void mark_free_pages(struct zone *zone) } #endif /* CONFIG_PM */ -static bool free_unref_page_prepare(struct page *page, unsigned long pfn) +static bool free_unref_page_prepare(struct page *page, unsigned long pfn, + unsigned int order) { int migratetype; - if (!free_pcp_prepare(page)) + if (!free_pcp_prepare(page, order)) return false; migratetype = get_pfnblock_migratetype(page, pfn); @@ -3319,16 +3378,18 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone) } static void free_unref_page_commit(struct page *page, unsigned long pfn, - int migratetype) + int migratetype, unsigned int order) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; int high; + int pindex; __count_vm_event(PGFREE); pcp = this_cpu_ptr(zone->per_cpu_pageset); - list_add(&page->lru, &pcp->lists[migratetype]); - pcp->count++; + pindex = order_to_pindex(migratetype, order); + list_add(&page->lru, &pcp->lists[pindex]); + pcp->count += 1 << order; high = nr_pcp_high(pcp, zone); if (pcp->count >= high) { int batch = READ_ONCE(pcp->batch); @@ -3338,15 +3399,15 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn, } /* - * Free a 0-order page + * Free a pcp page */ -void free_unref_page(struct page *page) +void free_unref_page(struct page *page, unsigned int order) { unsigned long flags; unsigned long pfn = page_to_pfn(page); int migratetype; - if (!free_unref_page_prepare(page, pfn)) + if (!free_unref_page_prepare(page, pfn, order)) return; /* @@ -3359,14 +3420,14 @@ void free_unref_page(struct page *page) migratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE); + free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); return; } migratetype = MIGRATE_MOVABLE; } local_lock_irqsave(&pagesets.lock, flags); - free_unref_page_commit(page, pfn, migratetype); + free_unref_page_commit(page, pfn, migratetype, order); local_unlock_irqrestore(&pagesets.lock, flags); } @@ -3383,7 +3444,7 @@ void free_unref_page_list(struct list_head *list) /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { pfn = page_to_pfn(page); - if (!free_unref_page_prepare(page, pfn)) + if (!free_unref_page_prepare(page, pfn, 0)) list_del(&page->lru); /* @@ -3415,7 +3476,7 @@ void free_unref_page_list(struct list_head *list) set_page_private(page, 0); migratetype = get_pcppage_migratetype(page); trace_mm_page_free_batched(page); - free_unref_page_commit(page, pfn, migratetype); + free_unref_page_commit(page, pfn, migratetype, 0); /* * Guard against excessive IRQ disabled times when we get @@ -3551,7 +3612,8 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, /* Remove page from the per-cpu list, caller must protect the list */ static inline -struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, +struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, + int migratetype, unsigned int alloc_flags, struct per_cpu_pages *pcp, struct list_head *list) @@ -3560,16 +3622,30 @@ struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, do { if (list_empty(list)) { - pcp->count += rmqueue_bulk(zone, 0, - READ_ONCE(pcp->batch), list, + int batch = READ_ONCE(pcp->batch); + int alloced; + + /* + * Scale batch relative to order if batch implies + * free pages can be stored on the PCP. Batch can + * be 1 for small zones or for boot pagesets which + * should never store free pages as the pages may + * belong to arbitrary zones. + */ + if (batch > 1) + batch = max(batch >> order, 2); + alloced = rmqueue_bulk(zone, order, + batch, list, migratetype, alloc_flags); + + pcp->count += alloced << order; if (unlikely(list_empty(list))) return NULL; } page = list_first_entry(list, struct page, lru); list_del(&page->lru); - pcp->count--; + pcp->count -= 1 << order; } while (check_new_pcp(page)); return page; @@ -3577,8 +3653,9 @@ struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, /* Lock and remove page from the per-cpu list */ static struct page *rmqueue_pcplist(struct zone *preferred_zone, - struct zone *zone, gfp_t gfp_flags, - int migratetype, unsigned int alloc_flags) + struct zone *zone, unsigned int order, + gfp_t gfp_flags, int migratetype, + unsigned int alloc_flags) { struct per_cpu_pages *pcp; struct list_head *list; @@ -3594,8 +3671,8 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, */ pcp = this_cpu_ptr(zone->per_cpu_pageset); pcp->free_factor >>= 1; - list = &pcp->lists[migratetype]; - page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); + list = &pcp->lists[order_to_pindex(migratetype, order)]; + page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list); local_unlock_irqrestore(&pagesets.lock, flags); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); @@ -3616,15 +3693,15 @@ struct page *rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; - if (likely(order == 0)) { + if (likely(pcp_allowed_order(order))) { /* * MIGRATE_MOVABLE pcplist could have the pages on CMA area and * we need to skip it when CMA area isn't allowed. */ if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA || migratetype != MIGRATE_MOVABLE) { - page = rmqueue_pcplist(preferred_zone, zone, gfp_flags, - migratetype, alloc_flags); + page = rmqueue_pcplist(preferred_zone, zone, order, + gfp_flags, migratetype, alloc_flags); goto out; } } @@ -5196,7 +5273,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, /* Attempt the batch allocation */ local_lock_irqsave(&pagesets.lock, flags); pcp = this_cpu_ptr(zone->per_cpu_pageset); - pcp_list = &pcp->lists[ac.migratetype]; + pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)]; while (nr_populated < nr_pages) { @@ -5206,7 +5283,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, continue; } - page = __rmqueue_pcplist(zone, ac.migratetype, alloc_flags, + page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags, pcp, pcp_list); if (unlikely(!page)) { /* Try and get at least one page */ @@ -6756,13 +6833,13 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats) { - int migratetype; + int pindex; memset(pcp, 0, sizeof(*pcp)); memset(pzstats, 0, sizeof(*pzstats)); - for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) - INIT_LIST_HEAD(&pcp->lists[migratetype]); + for (pindex = 0; pindex < NR_PCP_LISTS; pindex++) + INIT_LIST_HEAD(&pcp->lists[pindex]); /* * Set batch and high values safe for a boot pageset. A true percpu diff --git a/mm/swap.c b/mm/swap.c index dfb48cf9c2c9..b953039e087b 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -95,7 +95,7 @@ static void __put_single_page(struct page *page) { __page_cache_release(page); mem_cgroup_uncharge(page); - free_unref_page(page); + free_unref_page(page, 0); } static void __put_compound_page(struct page *page)

[v2] mm/page_alloc: Allow high-order pages to be stored on the per-cpu lists

Commit Message

Comments

Patch