diff mbox series

[06/12] xfs: remove the kmalloc to page allocator fallback

Message ID 20250305140532.158563-7-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/12] xfs: unmapped buffer item size straddling mismatch | expand

Commit Message

Christoph Hellwig March 5, 2025, 2:05 p.m. UTC
Since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment
for kmalloc(power-of-two)", kmalloc and friends guarantee that power of
two sized allocations are naturally aligned.  Limit our use of kmalloc
for buffers to these power of two sizes and remove the fallback to
the page allocator for this case, but keep a check in addition to
trusting the slab allocator to get the alignment right.

Also refactor the kmalloc path to reuse various calculations for the
size and gfp flags.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_buf.c | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

Comments

Darrick J. Wong March 5, 2025, 6:18 p.m. UTC | #1
On Wed, Mar 05, 2025 at 07:05:23AM -0700, Christoph Hellwig wrote:
> Since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment
> for kmalloc(power-of-two)", kmalloc and friends guarantee that power of
> two sized allocations are naturally aligned.  Limit our use of kmalloc
> for buffers to these power of two sizes and remove the fallback to
> the page allocator for this case, but keep a check in addition to
> trusting the slab allocator to get the alignment right.
> 
> Also refactor the kmalloc path to reuse various calculations for the
> size and gfp flags.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_buf.c | 48 ++++++++++++++++++++++++------------------------
>  1 file changed, 24 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 18ec1c1fbca1..073246d4352f 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -243,23 +243,23 @@ xfs_buf_free(
>  
>  static int
>  xfs_buf_alloc_kmem(
> -	struct xfs_buf	*bp,
> -	xfs_buf_flags_t	flags)
> +	struct xfs_buf		*bp,
> +	size_t			size,
> +	gfp_t			gfp_mask)
>  {
> -	gfp_t		gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL;
> -	size_t		size = BBTOB(bp->b_length);
> -
> -	/* Assure zeroed buffer for non-read cases. */
> -	if (!(flags & XBF_READ))
> -		gfp_mask |= __GFP_ZERO;
> +	ASSERT(is_power_of_2(size));
> +	ASSERT(size < PAGE_SIZE);
>  
> -	bp->b_addr = kmalloc(size, gfp_mask);
> +	bp->b_addr = kmalloc(size, gfp_mask | __GFP_NOFAIL);
>  	if (!bp->b_addr)
>  		return -ENOMEM;
>  
> -	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
> -	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
> -		/* b_addr spans two pages - use alloc_page instead */
> +	/*
> +	 * Slab guarantees that we get back naturally aligned allocations for
> +	 * power of two sizes.  Keep this check as the canary in the coal mine
> +	 * if anything changes in slab.
> +	 */
> +	if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)bp->b_addr, size))) {
>  		kfree(bp->b_addr);
>  		bp->b_addr = NULL;
>  		return -ENOMEM;
> @@ -300,18 +300,22 @@ xfs_buf_alloc_backing_mem(
>  	if (xfs_buftarg_is_mem(bp->b_target))
>  		return xmbuf_map_page(bp);
>  
> -	/*
> -	 * For buffers that fit entirely within a single page, first attempt to
> -	 * allocate the memory from the heap to minimise memory usage.  If we
> -	 * can't get heap memory for these small buffers, we fall back to using
> -	 * the page allocator.
> -	 */
> -	if (size < PAGE_SIZE && xfs_buf_alloc_kmem(new_bp, flags) == 0)
> -		return 0;
> +	/* Assure zeroed buffer for non-read cases. */
> +	if (!(flags & XBF_READ))
> +		gfp_mask |= __GFP_ZERO;
>  
>  	if (flags & XBF_READ_AHEAD)
>  		gfp_mask |= __GFP_NORETRY;
>  
> +	/*
> +	 * For buffers smaller than PAGE_SIZE use a kmalloc allocation if that
> +	 * is properly aligned.  The slab allocator now guarantees an aligned
> +	 * allocation for all power of two sizes, we matches most of the smaller

Same suggestion:

"...which matches most of the smaller than PAGE_SIZE buffers..."

as last time.

with the comment fixed,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> +	 * than PAGE_SIZE buffers used by XFS.
> +	 */
> +	if (size < PAGE_SIZE && is_power_of_2(size))
> +		return xfs_buf_alloc_kmem(bp, size, gfp_mask);
> +
>  	/* Make sure that we have a page list */
>  	bp->b_page_count = DIV_ROUND_UP(size, PAGE_SIZE);
>  	if (bp->b_page_count <= XB_PAGES) {
> @@ -324,10 +328,6 @@ xfs_buf_alloc_backing_mem(
>  	}
>  	bp->b_flags |= _XBF_PAGES;
>  
> -	/* Assure zeroed buffer for non-read cases. */
> -	if (!(flags & XBF_READ))
> -		gfp_mask |= __GFP_ZERO;
> -
>  	/*
>  	 * Bulk filling of pages can take multiple calls. Not filling the entire
>  	 * array is not an allocation failure, so don't back off if we get at
> -- 
> 2.45.2
> 
>
Dave Chinner March 5, 2025, 9:02 p.m. UTC | #2
On Wed, Mar 05, 2025 at 07:05:23AM -0700, Christoph Hellwig wrote:
> Since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment
> for kmalloc(power-of-two)", kmalloc and friends guarantee that power of
> two sized allocations are naturally aligned.  Limit our use of kmalloc
> for buffers to these power of two sizes and remove the fallback to
> the page allocator for this case, but keep a check in addition to
> trusting the slab allocator to get the alignment right.
> 
> Also refactor the kmalloc path to reuse various calculations for the
> size and gfp flags.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
.....
> @@ -300,18 +300,22 @@ xfs_buf_alloc_backing_mem(
>  	if (xfs_buftarg_is_mem(bp->b_target))
>  		return xmbuf_map_page(bp);
>  
> -	/*
> -	 * For buffers that fit entirely within a single page, first attempt to
> -	 * allocate the memory from the heap to minimise memory usage.  If we
> -	 * can't get heap memory for these small buffers, we fall back to using
> -	 * the page allocator.
> -	 */
> -	if (size < PAGE_SIZE && xfs_buf_alloc_kmem(new_bp, flags) == 0)
> -		return 0;
> +	/* Assure zeroed buffer for non-read cases. */
> +	if (!(flags & XBF_READ))
> +		gfp_mask |= __GFP_ZERO;

We should probably drop this zeroing altogether.

The higher level code cannot assume a buffer gained for write has
been zeroed by the xfs_trans_get_buf() path contains zeros. e.g. if
the buffer was in cache when the get_buf() call occurs, it
will contain the whatever was in the buffer, not zeros. This occurs
even if the buffer was STALE in cache at the time of the get()
operation.

Hence callers must always initialise the entire buffer themselves
(and they do!), hence allocating zeroed buffers when we are going
to zero it ourselves anyway is really unnecessary overhead...

This may not matter for 4kB block size filesystems, but it may make
a difference for 64kB block size filesystems, especially when we are
only doing a get() on the buffer to mark it stale in a transaction
and never actually use the contents of it...

-Dave.
Christoph Hellwig March 5, 2025, 11:32 p.m. UTC | #3
On Wed, Mar 05, 2025 at 10:18:12AM -0800, Darrick J. Wong wrote:
> > +	/*
> > +	 * For buffers smaller than PAGE_SIZE use a kmalloc allocation if that
> > +	 * is properly aligned.  The slab allocator now guarantees an aligned
> > +	 * allocation for all power of two sizes, we matches most of the smaller
> 
> Same suggestion:
> 
> "...which matches most of the smaller than PAGE_SIZE buffers..."

Hmm, I thought I had fixed that, but I for sure now have now.
Christoph Hellwig March 5, 2025, 11:38 p.m. UTC | #4
On Thu, Mar 06, 2025 at 08:02:33AM +1100, Dave Chinner wrote:
> > +	if (!(flags & XBF_READ))
> > +		gfp_mask |= __GFP_ZERO;
> 
> We should probably drop this zeroing altogether.

Maybe.  Not in this patch or series, though and whoever wants to submit
it needs to explain why the rationale used for adding it in commit
3219e8cf0dade9884d3c6cb432d433b4ca56875d doesn't apply any more.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 18ec1c1fbca1..073246d4352f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -243,23 +243,23 @@  xfs_buf_free(
 
 static int
 xfs_buf_alloc_kmem(
-	struct xfs_buf	*bp,
-	xfs_buf_flags_t	flags)
+	struct xfs_buf		*bp,
+	size_t			size,
+	gfp_t			gfp_mask)
 {
-	gfp_t		gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL;
-	size_t		size = BBTOB(bp->b_length);
-
-	/* Assure zeroed buffer for non-read cases. */
-	if (!(flags & XBF_READ))
-		gfp_mask |= __GFP_ZERO;
+	ASSERT(is_power_of_2(size));
+	ASSERT(size < PAGE_SIZE);
 
-	bp->b_addr = kmalloc(size, gfp_mask);
+	bp->b_addr = kmalloc(size, gfp_mask | __GFP_NOFAIL);
 	if (!bp->b_addr)
 		return -ENOMEM;
 
-	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
-	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
-		/* b_addr spans two pages - use alloc_page instead */
+	/*
+	 * Slab guarantees that we get back naturally aligned allocations for
+	 * power of two sizes.  Keep this check as the canary in the coal mine
+	 * if anything changes in slab.
+	 */
+	if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)bp->b_addr, size))) {
 		kfree(bp->b_addr);
 		bp->b_addr = NULL;
 		return -ENOMEM;
@@ -300,18 +300,22 @@  xfs_buf_alloc_backing_mem(
 	if (xfs_buftarg_is_mem(bp->b_target))
 		return xmbuf_map_page(bp);
 
-	/*
-	 * For buffers that fit entirely within a single page, first attempt to
-	 * allocate the memory from the heap to minimise memory usage.  If we
-	 * can't get heap memory for these small buffers, we fall back to using
-	 * the page allocator.
-	 */
-	if (size < PAGE_SIZE && xfs_buf_alloc_kmem(new_bp, flags) == 0)
-		return 0;
+	/* Assure zeroed buffer for non-read cases. */
+	if (!(flags & XBF_READ))
+		gfp_mask |= __GFP_ZERO;
 
 	if (flags & XBF_READ_AHEAD)
 		gfp_mask |= __GFP_NORETRY;
 
+	/*
+	 * For buffers smaller than PAGE_SIZE use a kmalloc allocation if that
+	 * is properly aligned.  The slab allocator now guarantees an aligned
+	 * allocation for all power of two sizes, we matches most of the smaller
+	 * than PAGE_SIZE buffers used by XFS.
+	 */
+	if (size < PAGE_SIZE && is_power_of_2(size))
+		return xfs_buf_alloc_kmem(bp, size, gfp_mask);
+
 	/* Make sure that we have a page list */
 	bp->b_page_count = DIV_ROUND_UP(size, PAGE_SIZE);
 	if (bp->b_page_count <= XB_PAGES) {
@@ -324,10 +328,6 @@  xfs_buf_alloc_backing_mem(
 	}
 	bp->b_flags |= _XBF_PAGES;
 
-	/* Assure zeroed buffer for non-read cases. */
-	if (!(flags & XBF_READ))
-		gfp_mask |= __GFP_ZERO;
-
 	/*
 	 * Bulk filling of pages can take multiple calls. Not filling the entire
 	 * array is not an allocation failure, so don't back off if we get at