diff mbox series

[v6,05/11] mm: split a folio in minimum folio order chunks

Message ID 20240529134509.120826-6-kernel@pankajraghav.com (mailing list archive)
State Superseded
Headers show
Series enable bs > ps in XFS | expand

Commit Message

Pankaj Raghav (Samsung) May 29, 2024, 1:45 p.m. UTC
From: Luis Chamberlain <mcgrof@kernel.org>

split_folio() and split_folio_to_list() assume order 0, to support
minorder we must expand these to check the folio mapping order and use
that.

Set new_order to be at least minimum folio order if it is set in
split_huge_page_to_list() so that we can maintain minimum folio order
requirement in the page cache.

Update the debugfs write files used for testing to ensure the order
is respected as well. We simply enforce the min order when a file
mapping is used.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 include/linux/huge_mm.h | 14 ++++++++----
 mm/huge_memory.c        | 50 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 57 insertions(+), 7 deletions(-)

Comments

Hannes Reinecke June 3, 2024, 6:34 a.m. UTC | #1
On 5/29/24 15:45, Pankaj Raghav (Samsung) wrote:
> From: Luis Chamberlain <mcgrof@kernel.org>
> 
> split_folio() and split_folio_to_list() assume order 0, to support
> minorder we must expand these to check the folio mapping order and use
> that.
> 
> Set new_order to be at least minimum folio order if it is set in
> split_huge_page_to_list() so that we can maintain minimum folio order
> requirement in the page cache.
> 
> Update the debugfs write files used for testing to ensure the order
> is respected as well. We simply enforce the min order when a file
> mapping is used.
> 
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> ---
>   include/linux/huge_mm.h | 14 ++++++++----
>   mm/huge_memory.c        | 50 ++++++++++++++++++++++++++++++++++++++---
>   2 files changed, 57 insertions(+), 7 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
Matthew Wilcox June 3, 2024, 12:36 p.m. UTC | #2
On Wed, May 29, 2024 at 03:45:03PM +0200, Pankaj Raghav (Samsung) wrote:
> @@ -3572,14 +3600,19 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
>  
>  	for (index = off_start; index < off_end; index += nr_pages) {
>  		struct folio *folio = filemap_get_folio(mapping, index);
> +		unsigned int min_order, target_order = new_order;
>  
>  		nr_pages = 1;
>  		if (IS_ERR(folio))
>  			continue;
>  
> -		if (!folio_test_large(folio))
> +		if (!folio->mapping || !folio_test_large(folio))
>  			goto next;

This check is useless.  folio->mapping is set to NULL on truncate,
but you haven't done anything to prevent truncate yet.  That happens
later when you lock the folio.

> +		min_order = mapping_min_folio_order(mapping);

You should hoist this out of the loop.

> +		if (new_order < min_order)
> +			target_order = min_order;
> +
>  		total++;
>  		nr_pages = folio_nr_pages(folio);
>  
> @@ -3589,7 +3622,18 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
>  		if (!folio_trylock(folio))
>  			goto next;
>  
> -		if (!split_folio_to_order(folio, new_order))
> +		if (!folio_test_anon(folio)) {

Please explain how a folio _in a file_ can be anon?

> +			unsigned int min_order;
> +
> +			if (!folio->mapping)
> +				goto next;
> +
> +			min_order = mapping_min_folio_order(folio->mapping);
> +			if (new_order < target_order)
> +				target_order = min_order;

Why is this being repeated?

> +		}
> +
> +		if (!split_folio_to_order(folio, target_order))
>  			split++;
>  
>  		folio_unlock(folio);
> -- 
> 2.34.1
>
Pankaj Raghav (Samsung) June 4, 2024, 10:29 a.m. UTC | #3
On Mon, Jun 03, 2024 at 01:36:46PM +0100, Matthew Wilcox wrote:
> On Wed, May 29, 2024 at 03:45:03PM +0200, Pankaj Raghav (Samsung) wrote:
> > @@ -3572,14 +3600,19 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
> >  
> >  	for (index = off_start; index < off_end; index += nr_pages) {
> >  		struct folio *folio = filemap_get_folio(mapping, index);
> > +		unsigned int min_order, target_order = new_order;
> >  
> >  		nr_pages = 1;
> >  		if (IS_ERR(folio))
> >  			continue;
> >  
> > -		if (!folio_test_large(folio))
> > +		if (!folio->mapping || !folio_test_large(folio))
> >  			goto next;
> 
> This check is useless.  folio->mapping is set to NULL on truncate,
> but you haven't done anything to prevent truncate yet.  That happens
> later when you lock the folio.
> 
> > +		min_order = mapping_min_folio_order(mapping);
> 
> You should hoist this out of the loop.
> 
> > +		if (new_order < min_order)
> > +			target_order = min_order;
> > +
> >  		total++;
> >  		nr_pages = folio_nr_pages(folio);
> >  
> > @@ -3589,7 +3622,18 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
> >  		if (!folio_trylock(folio))
> >  			goto next;
> >  
> > -		if (!split_folio_to_order(folio, new_order))
> > +		if (!folio_test_anon(folio)) {
> 
> Please explain how a folio _in a file_ can be anon?
> 
> > +			unsigned int min_order;
> > +
> > +			if (!folio->mapping)
> > +				goto next;
> > +
> > +			min_order = mapping_min_folio_order(folio->mapping);
> > +			if (new_order < target_order)
> > +				target_order = min_order;
> 
> Why is this being repeated?
> 
> > +		}

You are right. There are some repetition and checks that are not needed.
I will clean this function for the next revision. 

Thanks.

--
Pankaj
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 87682498a5af..6a8e527b78a2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -88,6 +88,8 @@  extern struct kobj_attribute shmem_enabled_attr;
 #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
 	(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
 
+#define split_folio(f) split_folio_to_list(f, NULL)
+
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
 #define HPAGE_PMD_SHIFT PMD_SHIFT
 #define HPAGE_PUD_SHIFT PUD_SHIFT
@@ -307,9 +309,10 @@  unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add
 bool can_split_folio(struct folio *folio, int *pextra_pins);
 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order);
+int split_folio_to_list(struct folio *folio, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
-	return split_huge_page_to_list_to_order(page, NULL, 0);
+	return split_folio(page_folio(page));
 }
 void deferred_split_folio(struct folio *folio);
 
@@ -474,6 +477,12 @@  static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
+
+static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+	return 0;
+}
+
 static inline void deferred_split_folio(struct folio *folio) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
@@ -578,7 +587,4 @@  static inline int split_folio_to_order(struct folio *folio, int new_order)
 	return split_folio_to_list_to_order(folio, NULL, new_order);
 }
 
-#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
-#define split_folio(f) split_folio_to_order(f, 0)
-
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cf9ead052d2a..e4e0b3431dc6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3068,6 +3068,9 @@  bool can_split_folio(struct folio *folio, int *pextra_pins)
  * released, or if some unexpected race happened (e.g., anon VMA disappeared,
  * truncation).
  *
+ * Callers should ensure that the order respects the address space mapping
+ * min-order if one is set.
+ *
  * Returns -EINVAL when trying to split to an order that is incompatible
  * with the folio. Splitting to order 0 is compatible with all folios.
  */
@@ -3143,6 +3146,7 @@  int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		mapping = NULL;
 		anon_vma_lock_write(anon_vma);
 	} else {
+		unsigned int min_order;
 		gfp_t gfp;
 
 		mapping = folio->mapping;
@@ -3153,6 +3157,14 @@  int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 			goto out;
 		}
 
+		min_order = mapping_min_folio_order(folio->mapping);
+		if (new_order < min_order) {
+			VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
+				     min_order);
+			ret = -EINVAL;
+			goto out;
+		}
+
 		gfp = current_gfp_context(mapping_gfp_mask(mapping) &
 							GFP_RECLAIM_MASK);
 
@@ -3264,6 +3276,21 @@  int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 	return ret;
 }
 
+int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+	unsigned int min_order = 0;
+
+	if (!folio_test_anon(folio)) {
+		if (!folio->mapping) {
+			count_vm_event(THP_SPLIT_PAGE_FAILED);
+			return -EBUSY;
+		}
+		min_order = mapping_min_folio_order(folio->mapping);
+	}
+
+	return split_huge_page_to_list_to_order(&folio->page, list, min_order);
+}
+
 void __folio_undo_large_rmappable(struct folio *folio)
 {
 	struct deferred_split *ds_queue;
@@ -3493,6 +3520,7 @@  static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		struct vm_area_struct *vma = vma_lookup(mm, addr);
 		struct page *page;
 		struct folio *folio;
+		unsigned int target_order = new_order;
 
 		if (!vma)
 			break;
@@ -3529,7 +3557,7 @@  static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio_to_order(folio, new_order))
+		if (!split_folio_to_order(folio, target_order))
 			split++;
 
 		folio_unlock(folio);
@@ -3572,14 +3600,19 @@  static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 
 	for (index = off_start; index < off_end; index += nr_pages) {
 		struct folio *folio = filemap_get_folio(mapping, index);
+		unsigned int min_order, target_order = new_order;
 
 		nr_pages = 1;
 		if (IS_ERR(folio))
 			continue;
 
-		if (!folio_test_large(folio))
+		if (!folio->mapping || !folio_test_large(folio))
 			goto next;
 
+		min_order = mapping_min_folio_order(mapping);
+		if (new_order < min_order)
+			target_order = min_order;
+
 		total++;
 		nr_pages = folio_nr_pages(folio);
 
@@ -3589,7 +3622,18 @@  static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio_to_order(folio, new_order))
+		if (!folio_test_anon(folio)) {
+			unsigned int min_order;
+
+			if (!folio->mapping)
+				goto next;
+
+			min_order = mapping_min_folio_order(folio->mapping);
+			if (new_order < target_order)
+				target_order = min_order;
+		}
+
+		if (!split_folio_to_order(folio, target_order))
 			split++;
 
 		folio_unlock(folio);