diff mbox series

[RFC,03/14] mm: Add free_unref_folios()

Message ID 20230825135918.4164671-4-willy@infradead.org (mailing list archive)
State New
Headers show
Series Rearrange batched folio freeing | expand

Commit Message

Matthew Wilcox Aug. 25, 2023, 1:59 p.m. UTC
Iterate over a folio_batch rather than a linked list.  This is
easier for the CPU to prefetch and has a batch count naturally
built in so we don't need to track it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/internal.h   |  5 +++--
 mm/page_alloc.c | 59 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 39 insertions(+), 25 deletions(-)

Comments

Ryan Roberts Aug. 31, 2023, 2:39 p.m. UTC | #1
On 25/08/2023 14:59, Matthew Wilcox (Oracle) wrote:
> Iterate over a folio_batch rather than a linked list.  This is
> easier for the CPU to prefetch and has a batch count naturally
> built in so we don't need to track it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/internal.h   |  5 +++--
>  mm/page_alloc.c | 59 ++++++++++++++++++++++++++++++-------------------
>  2 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index 7499b5ea1cf6..5c6a53371aeb 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -441,8 +441,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
>  					gfp_t gfp_flags);
>  extern int user_min_free_kbytes;
>  
> -extern void free_unref_page(struct page *page, unsigned int order);
> -extern void free_unref_page_list(struct list_head *list);
> +void free_unref_page(struct page *page, unsigned int order);
> +void free_unref_folios(struct folio_batch *fbatch);
> +void free_unref_page_list(struct list_head *list);
>  
>  extern void zone_pcp_reset(struct zone *zone);
>  extern void zone_pcp_disable(struct zone *zone);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index f1ee96fd9bef..bca5c70b5576 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -32,6 +32,7 @@
>  #include <linux/sysctl.h>
>  #include <linux/cpu.h>
>  #include <linux/cpuset.h>
> +#include <linux/pagevec.h>
>  #include <linux/memory_hotplug.h>
>  #include <linux/nodemask.h>
>  #include <linux/vmstat.h>
> @@ -2464,57 +2465,51 @@ void free_unref_page(struct page *page, unsigned int order)
>  }
>  
>  /*
> - * Free a list of 0-order pages
> + * Free a batch of 0-order pages
>   */
> -void free_unref_page_list(struct list_head *list)
> +void free_unref_folios(struct folio_batch *folios)
>  {
>  	unsigned long __maybe_unused UP_flags;
> -	struct folio *folio, *next;
>  	struct per_cpu_pages *pcp = NULL;
>  	struct zone *locked_zone = NULL;
> -	int batch_count = 0;
> -	int migratetype;
> +	int i, j, migratetype;
>  
> -	/* Prepare pages for freeing */
> -	list_for_each_entry_safe(folio, next, list, lru) {
> +	/* Prepare folios for freeing */
> +	for (i = 0, j = 0; i < folios->nr; i++) {
> +		struct folio *folio = folios->folios[i];
>  		unsigned long pfn = folio_pfn(folio);
> -		if (!free_unref_page_prepare(&folio->page, pfn, 0)) {
> -			list_del(&folio->lru);
> +		if (!free_unref_page_prepare(&folio->page, pfn, 0))
>  			continue;
> -		}
>  
>  		/*
> -		 * Free isolated pages directly to the allocator, see
> +		 * Free isolated folios directly to the allocator, see
>  		 * comment in free_unref_page.
>  		 */
>  		migratetype = get_pcppage_migratetype(&folio->page);
>  		if (unlikely(is_migrate_isolate(migratetype))) {
> -			list_del(&folio->lru);
>  			free_one_page(folio_zone(folio), &folio->page, pfn,
>  					0, migratetype, FPI_NONE);
>  			continue;
>  		}
> +		if (j != i)
> +			folios->folios[j] = folio;
> +		j++;
>  	}
> +	folios->nr = j;
>  
> -	list_for_each_entry_safe(folio, next, list, lru) {
> +	for (i = 0; i < folios->nr; i++) {
> +		struct folio *folio = folios->folios[i];
>  		struct zone *zone = folio_zone(folio);
>  
> -		list_del(&folio->lru);
>  		migratetype = get_pcppage_migratetype(&folio->page);
>  
> -		/*
> -		 * Either different zone requiring a different pcp lock or
> -		 * excessive lock hold times when freeing a large list of
> -		 * folios.
> -		 */
> -		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {

Same comment as for release_pages(): the batch count is effectively halved. Does
this have perf implications? I guess you'll need to benchmark...

> +		/* Different zone requires a different pcp lock */
> +		if (zone != locked_zone) {
>  			if (pcp) {
>  				pcp_spin_unlock(pcp);
>  				pcp_trylock_finish(UP_flags);
>  			}
>  
> -			batch_count = 0;
> -
>  			/*
>  			 * trylock is necessary as folios may be getting freed
>  			 * from IRQ or SoftIRQ context after an IO completion.
> @@ -2541,13 +2536,31 @@ void free_unref_page_list(struct list_head *list)
>  
>  		trace_mm_page_free_batched(&folio->page);
>  		free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0);
> -		batch_count++;
>  	}
>  
>  	if (pcp) {
>  		pcp_spin_unlock(pcp);
>  		pcp_trylock_finish(UP_flags);
>  	}
> +	folios->nr = 0;

Same nits as for previous patch: Better to use the APIs rather than manipulate
the internal folio_batch state directly?

> +}
> +
> +void free_unref_page_list(struct list_head *list)
> +{
> +	struct folio_batch fbatch;
> +
> +	folio_batch_init(&fbatch);
> +	while (!list_empty(list)) {
> +		struct folio *folio = list_first_entry(list, struct folio, lru);
> +
> +		list_del(&folio->lru);
> +		if (folio_batch_add(&fbatch, folio) > 0)
> +			continue;
> +		free_unref_folios(&fbatch);
> +	}
> +
> +	if (fbatch.nr)
> +		free_unref_folios(&fbatch);
>  }
>  
>  /*
diff mbox series

Patch

diff --git a/mm/internal.h b/mm/internal.h
index 7499b5ea1cf6..5c6a53371aeb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -441,8 +441,9 @@  extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
-extern void free_unref_page(struct page *page, unsigned int order);
-extern void free_unref_page_list(struct list_head *list);
+void free_unref_page(struct page *page, unsigned int order);
+void free_unref_folios(struct folio_batch *fbatch);
+void free_unref_page_list(struct list_head *list);
 
 extern void zone_pcp_reset(struct zone *zone);
 extern void zone_pcp_disable(struct zone *zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f1ee96fd9bef..bca5c70b5576 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -32,6 +32,7 @@ 
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/pagevec.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmstat.h>
@@ -2464,57 +2465,51 @@  void free_unref_page(struct page *page, unsigned int order)
 }
 
 /*
- * Free a list of 0-order pages
+ * Free a batch of 0-order pages
  */
-void free_unref_page_list(struct list_head *list)
+void free_unref_folios(struct folio_batch *folios)
 {
 	unsigned long __maybe_unused UP_flags;
-	struct folio *folio, *next;
 	struct per_cpu_pages *pcp = NULL;
 	struct zone *locked_zone = NULL;
-	int batch_count = 0;
-	int migratetype;
+	int i, j, migratetype;
 
-	/* Prepare pages for freeing */
-	list_for_each_entry_safe(folio, next, list, lru) {
+	/* Prepare folios for freeing */
+	for (i = 0, j = 0; i < folios->nr; i++) {
+		struct folio *folio = folios->folios[i];
 		unsigned long pfn = folio_pfn(folio);
-		if (!free_unref_page_prepare(&folio->page, pfn, 0)) {
-			list_del(&folio->lru);
+		if (!free_unref_page_prepare(&folio->page, pfn, 0))
 			continue;
-		}
 
 		/*
-		 * Free isolated pages directly to the allocator, see
+		 * Free isolated folios directly to the allocator, see
 		 * comment in free_unref_page.
 		 */
 		migratetype = get_pcppage_migratetype(&folio->page);
 		if (unlikely(is_migrate_isolate(migratetype))) {
-			list_del(&folio->lru);
 			free_one_page(folio_zone(folio), &folio->page, pfn,
 					0, migratetype, FPI_NONE);
 			continue;
 		}
+		if (j != i)
+			folios->folios[j] = folio;
+		j++;
 	}
+	folios->nr = j;
 
-	list_for_each_entry_safe(folio, next, list, lru) {
+	for (i = 0; i < folios->nr; i++) {
+		struct folio *folio = folios->folios[i];
 		struct zone *zone = folio_zone(folio);
 
-		list_del(&folio->lru);
 		migratetype = get_pcppage_migratetype(&folio->page);
 
-		/*
-		 * Either different zone requiring a different pcp lock or
-		 * excessive lock hold times when freeing a large list of
-		 * folios.
-		 */
-		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
+		/* Different zone requires a different pcp lock */
+		if (zone != locked_zone) {
 			if (pcp) {
 				pcp_spin_unlock(pcp);
 				pcp_trylock_finish(UP_flags);
 			}
 
-			batch_count = 0;
-
 			/*
 			 * trylock is necessary as folios may be getting freed
 			 * from IRQ or SoftIRQ context after an IO completion.
@@ -2541,13 +2536,31 @@  void free_unref_page_list(struct list_head *list)
 
 		trace_mm_page_free_batched(&folio->page);
 		free_unref_page_commit(zone, pcp, &folio->page, migratetype, 0);
-		batch_count++;
 	}
 
 	if (pcp) {
 		pcp_spin_unlock(pcp);
 		pcp_trylock_finish(UP_flags);
 	}
+	folios->nr = 0;
+}
+
+void free_unref_page_list(struct list_head *list)
+{
+	struct folio_batch fbatch;
+
+	folio_batch_init(&fbatch);
+	while (!list_empty(list)) {
+		struct folio *folio = list_first_entry(list, struct folio, lru);
+
+		list_del(&folio->lru);
+		if (folio_batch_add(&fbatch, folio) > 0)
+			continue;
+		free_unref_folios(&fbatch);
+	}
+
+	if (fbatch.nr)
+		free_unref_folios(&fbatch);
 }
 
 /*