diff mbox series

[RFC,2/3] mm: shmem: add large folio support to the write and fallocate paths

Message ID 05a31373ee1aa40a6a85d4897324a400686e2fb1.1721720891.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Support large folios for tmpfs | expand

Commit Message

Baolin Wang July 24, 2024, 7:03 a.m. UTC
From: Daniel Gomez <da.gomez@samsung.com>

Add large folio support for shmem write and fallocate paths matching the
same high order preference mechanism used in the iomap buffered IO path
as used in __filemap_get_folio().

Add shmem_mapping_size_order() to get a hint for the order of the folio
based on the file size which takes care of the mapping requirements.

Swap does not support high order folios for now, so make it order-0 in
case swap is enabled.

If the top level huge page (controlled by '/sys/kernel/mm/transparent_hugepage/shmem_enabled')
is enabled, we just allow PMD sized THP to keep interface backward
compatibility.

Co-developed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 include/linux/shmem_fs.h |  4 +--
 mm/huge_memory.c         |  2 +-
 mm/shmem.c               | 57 ++++++++++++++++++++++++++++++++++++----
 3 files changed, 55 insertions(+), 8 deletions(-)

Comments

Daniel Gomez Aug. 4, 2024, 6:46 p.m. UTC | #1
On Wed, Jul 24, 2024 at 03:03:59PM GMT, Baolin Wang wrote:
> From: Daniel Gomez <da.gomez@samsung.com>
> 
> Add large folio support for shmem write and fallocate paths matching the
> same high order preference mechanism used in the iomap buffered IO path
> as used in __filemap_get_folio().
> 
> Add shmem_mapping_size_order() to get a hint for the order of the folio
> based on the file size which takes care of the mapping requirements.
> 
> Swap does not support high order folios for now, so make it order-0 in
> case swap is enabled.
> 
> If the top level huge page (controlled by '/sys/kernel/mm/transparent_hugepage/shmem_enabled')
> is enabled, we just allow PMD sized THP to keep interface backward
> compatibility.
> 
> Co-developed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> Signed-off-by: Daniel Gomez <da.gomez@samsung.com>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  include/linux/shmem_fs.h |  4 +--
>  mm/huge_memory.c         |  2 +-
>  mm/shmem.c               | 57 ++++++++++++++++++++++++++++++++++++----
>  3 files changed, 55 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 34beaca2f853..fb0771218f1b 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -113,11 +113,11 @@ int shmem_unuse(unsigned int type);
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  				struct vm_area_struct *vma, pgoff_t index,
> -				bool shmem_huge_force);
> +				bool shmem_huge_force, size_t len);
>  #else
>  static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  				struct vm_area_struct *vma, pgoff_t index,
> -				bool shmem_huge_force)
> +				bool shmem_huge_force, size_t len)
>  {
>  	return 0;
>  }
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index e555fcdd19d4..a8fc3b9e4034 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -162,7 +162,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>  	if (!in_pf && shmem_file(vma->vm_file))
>  		return shmem_allowable_huge_orders(file_inode(vma->vm_file),
>  						   vma, vma->vm_pgoff,
> -						   !enforce_sysfs);
> +						   !enforce_sysfs, PAGE_SIZE);
>  
>  	if (!vma_is_anonymous(vma)) {
>  		/*
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 92ed09527682..cc0c1b790267 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1630,10 +1630,47 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>  	return result;
>  }
>  
> +/**
> + * shmem_mapping_size_order - Get maximum folio order for the given file size.
> + * @mapping: Target address_space.
> + * @index: The page index.
> + * @size: The suggested size of the folio to create.
> + *
> + * This returns a high order for folios (when supported) based on the file size
> + * which the mapping currently allows at the given index. The index is relevant
> + * due to alignment considerations the mapping might have. The returned order
> + * may be less than the size passed.
> + *
> + * Like __filemap_get_folio order calculation.
> + *
> + * Return: The order.
> + */
> +static inline unsigned int
> +shmem_mapping_size_order(struct address_space *mapping, pgoff_t index,
> +			 size_t size, struct shmem_sb_info *sbinfo)
> +{
> +	unsigned int order = ilog2(size);
> +
> +	if ((order <= PAGE_SHIFT) ||
> +	    (!mapping_large_folio_support(mapping) || !sbinfo->noswap))
> +		return 0;
> +
> +	order -= PAGE_SHIFT;
> +
> +	/* If we're not aligned, allocate a smaller folio */
> +	if (index & ((1UL << order) - 1))
> +		order = __ffs(index);
> +
> +	order = min_t(size_t, order, MAX_PAGECACHE_ORDER);
> +
> +	/* Order-1 not supported due to THP dependency */
> +	return (order == 1) ? 0 : order;
> +}

I have an updated version of shmem_mapping_size_order() that I didn't posted but
uses get_order() instead as suggested in [1]:

[1] https://lore.kernel.org/all/ZT7rd3CSr+VnKj7v@casper.infradead.org/

/**
 * shmem_mapping_size_order - Get maximum folio order for the given file size.
 * @mapping: Target address_space.
 * @index: The page index.
 * @size: The suggested size of the folio to create.
 *
 * This returns a high order for folios (when supported) based on the file size
 * which the mapping currently allows at the given index. The index is relevant
 * due to alignment considerations the mapping might have. The returned order
 * may be less than the size passed.
 *
 * Like __filemap_get_folio order calculation.
 *
 * Return: The order.
 */
static inline unsigned int
shmem_mapping_size_order(struct address_space *mapping, pgoff_t index,
                        size_t size)
 * Return: The order.
 */
static inline unsigned int
shmem_mapping_size_order(struct address_space *mapping, pgoff_t index,
                        size_t size)
{
       unsigned int order = get_order(max_t(size_t, size, PAGE_SIZE));

       if (!mapping_large_folio_support(mapping))
               return 0;

       /* If we're not aligned, allocate a smaller folio */
       if (index & ((1UL << order) - 1))
               order = __ffs(index);

       return min_t(size_t, order, MAX_PAGECACHE_ORDER);
}

order-1 is already supported by commit [2], so I've removed that condition as
well.

[2] 8897277acfef7f70fdecc054073bea2542fc7a1b ("mm: support order-1 folios in the
page cache").

> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  				struct vm_area_struct *vma, pgoff_t index,
> -				bool shmem_huge_force)
> +				bool shmem_huge_force, size_t len)
>  {
>  	unsigned long mask = READ_ONCE(huge_shmem_orders_always);
>  	unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
> @@ -1659,10 +1696,20 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  						vma, vm_flags);
>  	if (!vma || !vma_is_anon_shmem(vma)) {
>  		/*
> -		 * For tmpfs, we now only support PMD sized THP if huge page
> -		 * is enabled, otherwise fallback to order 0.
> +		 * For tmpfs, if top level huge page is enabled, we just allow
> +		 * PMD size THP to keep interface backward compatibility.
> +		 */
> +		if (global_huge)
> +			return BIT(HPAGE_PMD_ORDER);
> +
> +		/*
> +		 * Otherwise, get a highest order hint based on the size of
> +		 * write and fallocate paths, then will try each allowable
> +		 * huge orders.
>  		 */
> -		return global_huge ? BIT(HPAGE_PMD_ORDER) : 0;
> +		order = shmem_mapping_size_order(inode->i_mapping, index,
> +						 len, SHMEM_SB(inode->i_sb));
> +		return BIT(order + 1) - 1;
>  	}
>  
>  	/*
> @@ -2174,7 +2221,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
>  	}
>  
>  	/* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */
> -	orders = shmem_allowable_huge_orders(inode, vma, index, false);
> +	orders = shmem_allowable_huge_orders(inode, vma, index, false, len);
>  	if (orders > 0) {
>  		gfp_t huge_gfp;
>  
> -- 
> 2.39.3
>
diff mbox series

Patch

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 34beaca2f853..fb0771218f1b 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -113,11 +113,11 @@  int shmem_unuse(unsigned int type);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force);
+				bool shmem_huge_force, size_t len);
 #else
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force)
+				bool shmem_huge_force, size_t len)
 {
 	return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e555fcdd19d4..a8fc3b9e4034 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -162,7 +162,7 @@  unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 	if (!in_pf && shmem_file(vma->vm_file))
 		return shmem_allowable_huge_orders(file_inode(vma->vm_file),
 						   vma, vma->vm_pgoff,
-						   !enforce_sysfs);
+						   !enforce_sysfs, PAGE_SIZE);
 
 	if (!vma_is_anonymous(vma)) {
 		/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 92ed09527682..cc0c1b790267 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1630,10 +1630,47 @@  static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
 	return result;
 }
 
+/**
+ * shmem_mapping_size_order - Get maximum folio order for the given file size.
+ * @mapping: Target address_space.
+ * @index: The page index.
+ * @size: The suggested size of the folio to create.
+ *
+ * This returns a high order for folios (when supported) based on the file size
+ * which the mapping currently allows at the given index. The index is relevant
+ * due to alignment considerations the mapping might have. The returned order
+ * may be less than the size passed.
+ *
+ * Like __filemap_get_folio order calculation.
+ *
+ * Return: The order.
+ */
+static inline unsigned int
+shmem_mapping_size_order(struct address_space *mapping, pgoff_t index,
+			 size_t size, struct shmem_sb_info *sbinfo)
+{
+	unsigned int order = ilog2(size);
+
+	if ((order <= PAGE_SHIFT) ||
+	    (!mapping_large_folio_support(mapping) || !sbinfo->noswap))
+		return 0;
+
+	order -= PAGE_SHIFT;
+
+	/* If we're not aligned, allocate a smaller folio */
+	if (index & ((1UL << order) - 1))
+		order = __ffs(index);
+
+	order = min_t(size_t, order, MAX_PAGECACHE_ORDER);
+
+	/* Order-1 not supported due to THP dependency */
+	return (order == 1) ? 0 : order;
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force)
+				bool shmem_huge_force, size_t len)
 {
 	unsigned long mask = READ_ONCE(huge_shmem_orders_always);
 	unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
@@ -1659,10 +1696,20 @@  unsigned long shmem_allowable_huge_orders(struct inode *inode,
 						vma, vm_flags);
 	if (!vma || !vma_is_anon_shmem(vma)) {
 		/*
-		 * For tmpfs, we now only support PMD sized THP if huge page
-		 * is enabled, otherwise fallback to order 0.
+		 * For tmpfs, if top level huge page is enabled, we just allow
+		 * PMD size THP to keep interface backward compatibility.
+		 */
+		if (global_huge)
+			return BIT(HPAGE_PMD_ORDER);
+
+		/*
+		 * Otherwise, get a highest order hint based on the size of
+		 * write and fallocate paths, then will try each allowable
+		 * huge orders.
 		 */
-		return global_huge ? BIT(HPAGE_PMD_ORDER) : 0;
+		order = shmem_mapping_size_order(inode->i_mapping, index,
+						 len, SHMEM_SB(inode->i_sb));
+		return BIT(order + 1) - 1;
 	}
 
 	/*
@@ -2174,7 +2221,7 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 	}
 
 	/* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */
-	orders = shmem_allowable_huge_orders(inode, vma, index, false);
+	orders = shmem_allowable_huge_orders(inode, vma, index, false, len);
 	if (orders > 0) {
 		gfp_t huge_gfp;