Message ID | 20230825135918.4164671-4-willy@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Rearrange batched folio freeing | expand |
On 25/08/2023 14:59, Matthew Wilcox (Oracle) wrote: > Iterate over a folio_batch rather than a linked list. This is > easier for the CPU to prefetch and has a batch count naturally > built in so we don't need to track it. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > mm/internal.h | 5 +++-- > mm/page_alloc.c | 59 ++++++++++++++++++++++++++++++------------------- > 2 files changed, 39 insertions(+), 25 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index 7499b5ea1cf6..5c6a53371aeb 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -441,8 +441,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order, > gfp_t gfp_flags); > extern int user_min_free_kbytes; > > -extern void free_unref_page(struct page *page, unsigned int order); > -extern void free_unref_page_list(struct list_head *list); > +void free_unref_page(struct page *page, unsigned int order); > +void free_unref_folios(struct folio_batch *fbatch); > +void free_unref_page_list(struct list_head *list); > > extern void zone_pcp_reset(struct zone *zone); > extern void zone_pcp_disable(struct zone *zone); > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index f1ee96fd9bef..bca5c70b5576 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -32,6 +32,7 @@ > #include <linux/sysctl.h> > #include <linux/cpu.h> > #include <linux/cpuset.h> > +#include <linux/pagevec.h> > #include <linux/memory_hotplug.h> > #include <linux/nodemask.h> > #include <linux/vmstat.h> > @@ -2464,57 +2465,51 @@ void free_unref_page(struct page *page, unsigned int order) > } > > /* > - * Free a list of 0-order pages > + * Free a batch of 0-order pages > */ > -void free_unref_page_list(struct list_head *list) > +void free_unref_folios(struct folio_batch *folios) > { > unsigned long __maybe_unused UP_flags; > - struct folio *folio, *next; > struct per_cpu_pages *pcp = NULL; > struct zone *locked_zone = NULL; > - int batch_count = 0; > - int migratetype; > + int i, j, migratetype; > > - /* Prepare pages for freeing */ > - list_for_each_entry_safe(folio, next, list, lru) { > + /* Prepare folios for freeing */ > + for (i = 0, j = 0; i < folios->nr; i++) { > + struct folio *folio = folios->folios[i]; > unsigned long pfn = folio_pfn(folio); > - if (!free_unref_page_prepare(&folio->page, pfn, 0)) { > - list_del(&folio->lru); > + if (!free_unref_page_prepare(&folio->page, pfn, 0)) > continue; > - } > > /* > - * Free isolated pages directly to the allocator, see > + * Free isolated folios directly to the allocator, see > * comment in free_unref_page. > */ > migratetype = get_pcppage_migratetype(&folio->page); > if (unlikely(is_migrate_isolate(migratetype))) { > - list_del(&folio->lru); > free_one_page(folio_zone(folio), &folio->page, pfn, > 0, migratetype, FPI_NONE); > continue; > } > + if (j != i) > + folios->folios[j] = folio; > + j++; > } > + folios->nr = j; > > - list_for_each_entry_safe(folio, next, list, lru) { > + for (i = 0; i < folios->nr; i++) { > + struct folio *folio = folios->folios[i]; > struct zone *zone = folio_zone(folio); > > - list_del(&folio->lru); > migratetype = get_pcppage_migratetype(&folio->page); > > - /* > - * Either different zone requiring a different pcp lock or > - * excessive lock hold times when freeing a large list of > - * folios. > - */ > - if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) { Same comment as for release_pages(): the batch count is effectively halved. Does this have perf implications? I guess you'll need to benchmark... > + /* Different zone requires a different pcp lock */ > + if (zone != locked_zone) { > if (pcp) { > pcp_spin_unlock(pcp); > pcp_trylock_finish(UP_flags); > } > > - batch_count = 0; > - > /* > * trylock is necessary as folios may be getting freed > * from IRQ or SoftIRQ context after an IO completion. > @@ -2541,13 +2536,31 @@ void free_unref_page_list(struct list_head *list) > > trace_mm_page_free_batched(&folio->page); > free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0); > - batch_count++; > } > > if (pcp) { > pcp_spin_unlock(pcp); > pcp_trylock_finish(UP_flags); > } > + folios->nr = 0; Same nits as for previous patch: Better to use the APIs rather than manipulate the internal folio_batch state directly? > +} > + > +void free_unref_page_list(struct list_head *list) > +{ > + struct folio_batch fbatch; > + > + folio_batch_init(&fbatch); > + while (!list_empty(list)) { > + struct folio *folio = list_first_entry(list, struct folio, lru); > + > + list_del(&folio->lru); > + if (folio_batch_add(&fbatch, folio) > 0) > + continue; > + free_unref_folios(&fbatch); > + } > + > + if (fbatch.nr) > + free_unref_folios(&fbatch); > } > > /*
diff --git a/mm/internal.h b/mm/internal.h index 7499b5ea1cf6..5c6a53371aeb 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -441,8 +441,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; -extern void free_unref_page(struct page *page, unsigned int order); -extern void free_unref_page_list(struct list_head *list); +void free_unref_page(struct page *page, unsigned int order); +void free_unref_folios(struct folio_batch *fbatch); +void free_unref_page_list(struct list_head *list); extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_disable(struct zone *zone); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f1ee96fd9bef..bca5c70b5576 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,6 +32,7 @@ #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/cpuset.h> +#include <linux/pagevec.h> #include <linux/memory_hotplug.h> #include <linux/nodemask.h> #include <linux/vmstat.h> @@ -2464,57 +2465,51 @@ void free_unref_page(struct page *page, unsigned int order) } /* - * Free a list of 0-order pages + * Free a batch of 0-order pages */ -void free_unref_page_list(struct list_head *list) +void free_unref_folios(struct folio_batch *folios) { unsigned long __maybe_unused UP_flags; - struct folio *folio, *next; struct per_cpu_pages *pcp = NULL; struct zone *locked_zone = NULL; - int batch_count = 0; - int migratetype; + int i, j, migratetype; - /* Prepare pages for freeing */ - list_for_each_entry_safe(folio, next, list, lru) { + /* Prepare folios for freeing */ + for (i = 0, j = 0; i < folios->nr; i++) { + struct folio *folio = folios->folios[i]; unsigned long pfn = folio_pfn(folio); - if (!free_unref_page_prepare(&folio->page, pfn, 0)) { - list_del(&folio->lru); + if (!free_unref_page_prepare(&folio->page, pfn, 0)) continue; - } /* - * Free isolated pages directly to the allocator, see + * Free isolated folios directly to the allocator, see * comment in free_unref_page. */ migratetype = get_pcppage_migratetype(&folio->page); if (unlikely(is_migrate_isolate(migratetype))) { - list_del(&folio->lru); free_one_page(folio_zone(folio), &folio->page, pfn, 0, migratetype, FPI_NONE); continue; } + if (j != i) + folios->folios[j] = folio; + j++; } + folios->nr = j; - list_for_each_entry_safe(folio, next, list, lru) { + for (i = 0; i < folios->nr; i++) { + struct folio *folio = folios->folios[i]; struct zone *zone = folio_zone(folio); - list_del(&folio->lru); migratetype = get_pcppage_migratetype(&folio->page); - /* - * Either different zone requiring a different pcp lock or - * excessive lock hold times when freeing a large list of - * folios. - */ - if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) { + /* Different zone requires a different pcp lock */ + if (zone != locked_zone) { if (pcp) { pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); } - batch_count = 0; - /* * trylock is necessary as folios may be getting freed * from IRQ or SoftIRQ context after an IO completion. @@ -2541,13 +2536,31 @@ void free_unref_page_list(struct list_head *list) trace_mm_page_free_batched(&folio->page); free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0); - batch_count++; } if (pcp) { pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); } + folios->nr = 0; +} + +void free_unref_page_list(struct list_head *list) +{ + struct folio_batch fbatch; + + folio_batch_init(&fbatch); + while (!list_empty(list)) { + struct folio *folio = list_first_entry(list, struct folio, lru); + + list_del(&folio->lru); + if (folio_batch_add(&fbatch, folio) > 0) + continue; + free_unref_folios(&fbatch); + } + + if (fbatch.nr) + free_unref_folios(&fbatch); } /*
Iterate over a folio_batch rather than a linked list. This is easier for the CPU to prefetch and has a batch count naturally built in so we don't need to track it. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- mm/internal.h | 5 +++-- mm/page_alloc.c | 59 ++++++++++++++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 25 deletions(-)