[RFC,23/26] mm: page_alloc: kill highatomic

Message ID	20230418191313.268131-24-hannes@cmpxchg.org (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Johannes Weiner <hannes@cmpxchg.org> To: linux-mm@kvack.org Cc: Kaiyang Zhao <kaiyang2@cs.cmu.edu>, Mel Gorman <mgorman@techsingularity.net>, Vlastimil Babka <vbabka@suse.cz>, David Rientjes <rientjes@google.com>, linux-kernel@vger.kernel.org, kernel-team@fb.com Subject: [RFC PATCH 23/26] mm: page_alloc: kill highatomic Date: Tue, 18 Apr 2023 15:13:10 -0400 Message-Id: <20230418191313.268131-24-hannes@cmpxchg.org> In-Reply-To: <20230418191313.268131-1-hannes@cmpxchg.org> References: <20230418191313.268131-1-hannes@cmpxchg.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	mm: reliable huge page allocator \| expand [RFC,00/26] mm: reliable huge page allocator [RFC,01/26] block: bdev: blockdev page cache is movable [RFC,02/26] mm: compaction: avoid GFP_NOFS deadlocks [RFC,03/26] mm: make pageblock_order 2M per default [RFC,04/26] mm: page_isolation: write proper kerneldoc [RFC,05/26] mm: page_alloc: per-migratetype pcplist for THPs [RFC,06/26] mm: page_alloc: consolidate free page accounting [RFC,07/26] mm: page_alloc: move capture_control to the page allocator [RFC,08/26] mm: page_alloc: claim blocks during compaction capturing [RFC,09/26] mm: page_alloc: move expand() above compaction_capture() [RFC,10/26] mm: page_alloc: allow compaction capturing from larger blocks [RFC,11/26] mm: page_alloc: introduce MIGRATE_FREE [RFC,12/26] mm: page_alloc: per-migratetype free counts [RFC,13/26] mm: compaction: remove compaction result helpers [RFC,14/26] mm: compaction: simplify should_compact_retry() [RFC,15/26] mm: compaction: simplify free block check in suitable_migration_target() [RFC,16/26] mm: compaction: improve compaction_suitable() accuracy [RFC,17/26] mm: compaction: refactor __compaction_suitable() [RFC,18/26] mm: compaction: remove unnecessary is_via_compact_memory() checks [RFC,19/26] mm: compaction: drop redundant watermark check in compaction_zonelist_suitable() [RFC,20/26] mm: vmscan: use compaction_suitable() check in kswapd [RFC,21/26] mm: compaction: align compaction goals with reclaim goals [RFC,22/26] mm: page_alloc: manage free memory in whole pageblocks [RFC,23/26] mm: page_alloc: kill highatomic [RFC,24/26] mm: page_alloc: kill watermark boosting [RFC,25/26] mm: page_alloc: disallow fallbacks when 2M defrag is enabled [RFC,26/26] mm: page_alloc: add sanity checks for migratetypes

diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 65a78773dcca..78b5176d354e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -19,8 +19,6 @@ static inline int gfp_migratetype(const gfp_t gfp_flags) BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE); - BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >> - GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d1083ab81998..c705f2f7c829 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -44,8 +44,7 @@ enum migratetype { MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, - MIGRATE_FREE, + MIGRATE_FREE = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -142,7 +141,6 @@ enum zone_stat_item { NR_FREE_UNMOVABLE, NR_FREE_MOVABLE, NR_FREE_RECLAIMABLE, - NR_FREE_HIGHATOMIC, NR_FREE_FREE, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, @@ -713,8 +711,6 @@ struct zone { unsigned long _watermark[NR_WMARK]; unsigned long watermark_boost; - unsigned long nr_reserved_highatomic; - /* * We don't know if the memory that we're going to allocate will be * freeable or/and it will be released eventually, so to avoid totally diff --git a/mm/internal.h b/mm/internal.h index 5c76455f8042..24f43f5db88b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -778,11 +778,6 @@ extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags vmaflag_names[]; extern const struct trace_print_flags gfpflag_names[]; -static inline bool is_migrate_highatomic(enum migratetype migratetype) -{ - return migratetype == MIGRATE_HIGHATOMIC; -} - void setup_zone_pageset(struct zone *zone); struct migration_target_control { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6f0bfc226c36..e8ae04feb1bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -379,7 +379,6 @@ const char * const migratetype_names[MIGRATE_TYPES] = { "Unmovable", "Movable", "Reclaimable", - "HighAtomic", "Free", #ifdef CONFIG_CMA "CMA", @@ -1202,7 +1201,7 @@ static inline void __free_one_page(struct page *page, * We want to prevent merge between freepages on pageblock * without fallbacks and normal pageblock. Without this, * pageblock isolation could cause incorrect freepage or CMA - * accounting or HIGHATOMIC accounting. + * accounting. */ if (migratetype != buddy_mt && (!migratetype_is_mergeable(migratetype) || @@ -2797,13 +2796,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, old_block_type = get_pageblock_migratetype(page); - /* - * This can happen due to races and we want to prevent broken - * highatomic accounting. - */ - if (is_migrate_highatomic(old_block_type)) - goto single_page; - /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { change_pageblock_range(page, current_order, start_type); @@ -2918,126 +2910,6 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, return -1; } -/* - * Reserve a pageblock for exclusive use of high-order atomic allocations if - * there are no empty page blocks that contain a page with a suitable order - */ -static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, - unsigned int alloc_order) -{ - int mt; - unsigned long max_managed, flags; - - /* - * Limit the number reserved to 1 pageblock or roughly 1% of a zone. - * Check is race-prone but harmless. - */ - max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages; - if (zone->nr_reserved_highatomic >= max_managed) - return; - - spin_lock_irqsave(&zone->lock, flags); - - /* Recheck the nr_reserved_highatomic limit under the lock */ - if (zone->nr_reserved_highatomic >= max_managed) - goto out_unlock; - - /* Yoink! */ - mt = get_pageblock_migratetype(page); - /* Only reserve normal pageblocks (i.e., they can merge with others) */ - if (migratetype_is_mergeable(mt)) { - zone->nr_reserved_highatomic += pageblock_nr_pages; - set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); - move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC, NULL); - } - -out_unlock: - spin_unlock_irqrestore(&zone->lock, flags); -} - -/* - * Used when an allocation is about to fail under memory pressure. This - * potentially hurts the reliability of high-order allocations when under - * intense memory pressure but failed atomic allocations should be easier - * to recover from than an OOM. - * - * If @force is true, try to unreserve a pageblock even though highatomic - * pageblock is exhausted. - */ -static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, - bool force) -{ - struct zonelist *zonelist = ac->zonelist; - unsigned long flags; - struct zoneref *z; - struct zone *zone; - struct page *page; - int order; - bool ret; - - for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, - ac->nodemask) { - /* - * Preserve at least one pageblock unless memory pressure - * is really high. - */ - if (!force && zone->nr_reserved_highatomic <= - pageblock_nr_pages) - continue; - - spin_lock_irqsave(&zone->lock, flags); - for (order = 0; order < MAX_ORDER; order++) { - struct free_area *area = &(zone->free_area[order]); - int mt; - - page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); - if (!page) - continue; - - mt = get_pageblock_migratetype(page); - /* - * In page freeing path, migratetype change is racy so - * we can counter several free pages in a pageblock - * in this loop although we changed the pageblock type - * from highatomic to ac->migratetype. So we should - * adjust the count once. - */ - if (is_migrate_highatomic(mt)) { - /* - * It should never happen but changes to - * locking could inadvertently allow a per-cpu - * drain to add pages to MIGRATE_HIGHATOMIC - * while unreserving so be safe and watch for - * underflows. - */ - zone->nr_reserved_highatomic -= min( - pageblock_nr_pages, - zone->nr_reserved_highatomic); - } - - /* - * Convert to ac->migratetype and avoid the normal - * pageblock stealing heuristics. Minimally, the caller - * is doing the work and needs the pages. More - * importantly, if the block was always converted to - * MIGRATE_UNMOVABLE or another type then the number - * of pageblocks that cannot be completely freed - * may increase. - */ - set_pageblock_migratetype(page, ac->migratetype); - ret = move_freepages_block(zone, page, mt, - ac->migratetype, NULL); - if (ret) { - spin_unlock_irqrestore(&zone->lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&zone->lock, flags); - } - - return false; -} - /* * Try finding a free buddy page on the fallback list and put it on the free * list of requested migratetype, possibly along with other pages from the same @@ -3510,18 +3382,11 @@ void free_unref_page(struct page *page, unsigned int order) /* * We only track unmovable, reclaimable and movable on pcp lists. - * Place ISOLATE pages on the isolated list because they are being - * offlined but treat HIGHATOMIC as movable pages so we can get those - * areas back if necessary. Otherwise, we may have to free - * excessively into the page allocator */ migratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { - if (unlikely(is_migrate_isolate(migratetype) || migratetype == MIGRATE_FREE)) { - free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); - return; - } - migratetype = MIGRATE_MOVABLE; + free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); + return; } zone = page_zone(page); @@ -3740,24 +3605,11 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, unsigned long flags; do { - page = NULL; spin_lock_irqsave(&zone->lock, flags); - /* - * order-0 request can reach here when the pcplist is skipped - * due to non-CMA allocation context. HIGHATOMIC area is - * reserved for high-order atomic allocation, so order-0 - * request should skip it. - */ - if (order > 0 && alloc_flags & ALLOC_HARDER) - page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); - if (!page) { - page = __rmqueue(zone, order, migratetype, alloc_flags); - if (!page) { - spin_unlock_irqrestore(&zone->lock, flags); - return NULL; - } - } + page = __rmqueue(zone, order, migratetype, alloc_flags); spin_unlock_irqrestore(&zone->lock, flags); + if (!page) + return NULL; } while (check_new_pages(page, order)); __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); @@ -4003,8 +3855,6 @@ static long __zone_free_pages(struct zone *zone, int alloc_flags, bool safe) * compaction is necessary to restore the watermarks. */ free_pages = page_state(zone, NR_FREE_FREE, safe); - if (alloc_flags & (ALLOC_HARDER | ALLOC_OOM)) - free_pages += page_state(zone, NR_FREE_HIGHATOMIC, safe); if (IS_ENABLED(CONFIG_CMA) && (alloc_flags & ALLOC_CMA)) free_pages += page_state(zone, NR_FREE_CMA_PAGES, safe); @@ -4098,8 +3948,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif - if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC)) - return true; } return false; } @@ -4340,14 +4188,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, gfp_mask, alloc_flags, ac->migratetype); if (page) { prep_new_page(page, order, gfp_mask, alloc_flags); - - /* - * If this is a high-order atomic allocation then check - * if the pageblock should be reserved for the future - */ - if (unlikely(order && (alloc_flags & ALLOC_HARDER))) - reserve_highatomic_pageblock(page, zone, order); - return page; } else { #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT @@ -4844,7 +4684,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, * Shrink them and try again */ if (!page && !drained) { - unreserve_highatomic_pageblock(ac, false); drain_all_pages(NULL); drained = true; goto retry; @@ -5013,10 +4852,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, * Make sure we converge to OOM if we cannot make any progress * several times in the row. */ - if (*no_progress_loops > MAX_RECLAIM_RETRIES) { - /* Before OOM, exhaust highatomic_reserve */ - return unreserve_highatomic_pageblock(ac, true); - } + if (*no_progress_loops > MAX_RECLAIM_RETRIES) + return false; /* * Keep reclaiming pages while there is a chance this will lead @@ -6129,7 +5966,6 @@ static void show_migration_types(unsigned char type) [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_MOVABLE] = 'M', [MIGRATE_RECLAIMABLE] = 'E', - [MIGRATE_HIGHATOMIC] = 'H', [MIGRATE_FREE] = 'F', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', @@ -6194,7 +6030,7 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i " sec_pagetables:%lu bounce:%lu\n" " kernel_misc_reclaimable:%lu\n" " free:%lu free_unmovable:%lu free_movable:%lu\n" - " free_reclaimable:%lu free_highatomic:%lu free_free:%lu\n" + " free_reclaimable:%lu free_free:%lu\n" " free_cma:%lu free_pcp:%lu\n", global_node_page_state(NR_ACTIVE_ANON), global_node_page_state(NR_INACTIVE_ANON), @@ -6217,7 +6053,6 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i global_zone_page_state(NR_FREE_UNMOVABLE), global_zone_page_state(NR_FREE_MOVABLE), global_zone_page_state(NR_FREE_RECLAIMABLE), - global_zone_page_state(NR_FREE_HIGHATOMIC), global_zone_page_state(NR_FREE_FREE), global_zone_page_state(NR_FREE_CMA_PAGES), free_pcp); @@ -6301,13 +6136,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i " free_unmovable:%lukB" " free_movable:%lukB" " free_reclaimable:%lukB" - " free_highatomic:%lukB" " free_free:%lukB" " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" - " reserved_highatomic:%luKB" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" @@ -6327,13 +6160,11 @@ void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_i K(zone_page_state(zone, NR_FREE_UNMOVABLE)), K(zone_page_state(zone, NR_FREE_MOVABLE)), K(zone_page_state(zone, NR_FREE_RECLAIMABLE)), - K(zone_page_state(zone, NR_FREE_HIGHATOMIC)), K(zone_page_state(zone, NR_FREE_FREE)), K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), - K(zone->nr_reserved_highatomic), K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)), diff --git a/mm/vmstat.c b/mm/vmstat.c index c8b8e6e259da..a2f7b41564df 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1171,7 +1171,6 @@ const char * const vmstat_text[] = { "nr_free_unmovable", "nr_free_movable", "nr_free_reclaimable", - "nr_free_highatomic", "nr_free_free", "nr_zone_inactive_anon", "nr_zone_active_anon",

[RFC,23/26] mm: page_alloc: kill highatomic

Commit Message

Patch