diff mbox series

[07/10] xfs: only free posteof blocks on first close

Message ID 20240623053532.857496-8-hch@lst.de (mailing list archive)
State Superseded, archived
Headers show
Series [01/10] xfs: fix freeing speculative preallocations for preallocated files | expand

Commit Message

Christoph Hellwig June 23, 2024, 5:34 a.m. UTC
From: "Darrick J. Wong" <djwong@kernel.org>

Certain workloads fragment files on XFS very badly, such as a software
package that creates a number of threads, each of which repeatedly run
the sequence: open a file, perform a synchronous write, and close the
file, which defeats the speculative preallocation mechanism.  We work
around this problem by only deleting posteof blocks the /first/ time a
file is closed to preserve the behavior that unpacking a tarball lays
out files one after the other with no gaps.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
[hch: rebased, updated comment, renamed the flag]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c  | 32 +++++++++++---------------------
 fs/xfs/xfs_inode.h |  4 ++--
 2 files changed, 13 insertions(+), 23 deletions(-)

Comments

Darrick J. Wong June 24, 2024, 3:46 p.m. UTC | #1
On Sun, Jun 23, 2024 at 07:34:52AM +0200, Christoph Hellwig wrote:
> From: "Darrick J. Wong" <djwong@kernel.org>
> 
> Certain workloads fragment files on XFS very badly, such as a software
> package that creates a number of threads, each of which repeatedly run
> the sequence: open a file, perform a synchronous write, and close the
> file, which defeats the speculative preallocation mechanism.  We work
> around this problem by only deleting posteof blocks the /first/ time a
> file is closed to preserve the behavior that unpacking a tarball lays
> out files one after the other with no gaps.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> [hch: rebased, updated comment, renamed the flag]
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Someone please review this?  The last person to try was Dave, five years
ago, and I do not know if he ever saw what it did to various workloads.

https://lore.kernel.org/linux-xfs/20190315034237.GL23020@dastard/

--D

> ---
>  fs/xfs/xfs_file.c  | 32 +++++++++++---------------------
>  fs/xfs/xfs_inode.h |  4 ++--
>  2 files changed, 13 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 8d70171678fe24..de52aceabebc27 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1215,15 +1215,21 @@ xfs_file_release(
>  	 * exposed to that problem.
>  	 */
>  	if (xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED)) {
> -		xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
> +		xfs_iflags_clear(ip, XFS_EOFBLOCKS_RELEASED);
>  		if (ip->i_delayed_blks > 0)
>  			filemap_flush(inode->i_mapping);
>  	}
>  
>  	/*
>  	 * XFS aggressively preallocates post-EOF space to generate contiguous
> -	 * allocations for writers that append to the end of the file and we
> -	 * try to free these when an open file context is released.
> +	 * allocations for writers that append to the end of the file.
> +	 *
> +	 * To support workloads that close and reopen the file frequently, these
> +	 * preallocations usually persist after a close unless it is the first
> +	 * close for the inode.  This is a tradeoff to generate tightly packed
> +	 * data layouts for unpacking tarballs or similar archives that write
> +	 * one file after another without going back to it while keeping the
> +	 * preallocation for files that have recurring open/write/close cycles.
>  	 *
>  	 * There is no point in freeing blocks here for open but unlinked files
>  	 * as they will be taken care of by the inactivation path soon.
> @@ -1241,25 +1247,9 @@ xfs_file_release(
>  	    (file->f_mode & FMODE_WRITE) &&
>  	    xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
>  		if (xfs_can_free_eofblocks(ip) &&
> -		    !xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) {
> -			/*
> -			 * Check if the inode is being opened, written and
> -			 * closed frequently and we have delayed allocation
> -			 * blocks outstanding (e.g. streaming writes from the
> -			 * NFS server), truncating the blocks past EOF will
> -			 * cause fragmentation to occur.
> -			 *
> -			 * In this case don't do the truncation, but we have to
> -			 * be careful how we detect this case. Blocks beyond EOF
> -			 * show up as i_delayed_blks even when the inode is
> -			 * clean, so we need to truncate them away first before
> -			 * checking for a dirty release. Hence on the first
> -			 * dirty close we will still remove the speculative
> -			 * allocation, but after that we will leave it in place.
> -			 */
> +		    !xfs_iflags_test(ip, XFS_EOFBLOCKS_RELEASED)) {
>  			xfs_free_eofblocks(ip);
> -			if (ip->i_delayed_blks)
> -				xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
> +			xfs_iflags_set(ip, XFS_EOFBLOCKS_RELEASED);
>  		}
>  		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
>  	}
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index ae9851226f9913..548a4f00bcae1b 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -336,7 +336,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
>  #define XFS_INEW		(1 << 3) /* inode has just been allocated */
>  #define XFS_IPRESERVE_DM_FIELDS	(1 << 4) /* has legacy DMAPI fields set */
>  #define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
> -#define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
> +#define XFS_EOFBLOCKS_RELEASED	(1 << 6) /* eofblocks were freed in ->release */
>  #define XFS_IFLUSHING		(1 << 7) /* inode is being flushed */
>  #define __XFS_IPINNED_BIT	8	 /* wakeup key for zero pin count */
>  #define XFS_IPINNED		(1 << __XFS_IPINNED_BIT)
> @@ -383,7 +383,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
>   */
>  #define XFS_IRECLAIM_RESET_FLAGS	\
>  	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
> -	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
> +	 XFS_EOFBLOCKS_RELEASED | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
>  	 XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)
>  
>  /*
> -- 
> 2.43.0
> 
>
Christoph Hellwig June 24, 2024, 4:08 p.m. UTC | #2
On Mon, Jun 24, 2024 at 08:46:21AM -0700, Darrick J. Wong wrote:
> On Sun, Jun 23, 2024 at 07:34:52AM +0200, Christoph Hellwig wrote:
> > From: "Darrick J. Wong" <djwong@kernel.org>
> > 
> > Certain workloads fragment files on XFS very badly, such as a software
> > package that creates a number of threads, each of which repeatedly run
> > the sequence: open a file, perform a synchronous write, and close the
> > file, which defeats the speculative preallocation mechanism.  We work
> > around this problem by only deleting posteof blocks the /first/ time a
> > file is closed to preserve the behavior that unpacking a tarball lays
> > out files one after the other with no gaps.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > [hch: rebased, updated comment, renamed the flag]
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Someone please review this?  The last person to try was Dave, five years
> ago, and I do not know if he ever saw what it did to various workloads.
> 
> https://lore.kernel.org/linux-xfs/20190315034237.GL23020@dastard/

Well, the read-only check Dave suggested is in the previous patch,
and the tests he sent cover the relevant synthetic workloads.  What
else are you looking for?
Darrick J. Wong June 24, 2024, 4:49 p.m. UTC | #3
On Mon, Jun 24, 2024 at 06:08:23PM +0200, Christoph Hellwig wrote:
> On Mon, Jun 24, 2024 at 08:46:21AM -0700, Darrick J. Wong wrote:
> > On Sun, Jun 23, 2024 at 07:34:52AM +0200, Christoph Hellwig wrote:
> > > From: "Darrick J. Wong" <djwong@kernel.org>
> > > 
> > > Certain workloads fragment files on XFS very badly, such as a software
> > > package that creates a number of threads, each of which repeatedly run
> > > the sequence: open a file, perform a synchronous write, and close the
> > > file, which defeats the speculative preallocation mechanism.  We work
> > > around this problem by only deleting posteof blocks the /first/ time a
> > > file is closed to preserve the behavior that unpacking a tarball lays
> > > out files one after the other with no gaps.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > [hch: rebased, updated comment, renamed the flag]
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > 
> > Someone please review this?  The last person to try was Dave, five years
> > ago, and I do not know if he ever saw what it did to various workloads.
> > 
> > https://lore.kernel.org/linux-xfs/20190315034237.GL23020@dastard/
> 
> Well, the read-only check Dave suggested is in the previous patch,
> and the tests he sent cover the relevant synthetic workloads.  What
> else are you looking for?

Nothing -- it looks fine to me, but as it's authored by me, I can't
meaningfully slap an RVB tag on it, can I?

Eh I've done the rest of the series; let's try it anyway:
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D
diff mbox series

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8d70171678fe24..de52aceabebc27 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1215,15 +1215,21 @@  xfs_file_release(
 	 * exposed to that problem.
 	 */
 	if (xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED)) {
-		xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
+		xfs_iflags_clear(ip, XFS_EOFBLOCKS_RELEASED);
 		if (ip->i_delayed_blks > 0)
 			filemap_flush(inode->i_mapping);
 	}
 
 	/*
 	 * XFS aggressively preallocates post-EOF space to generate contiguous
-	 * allocations for writers that append to the end of the file and we
-	 * try to free these when an open file context is released.
+	 * allocations for writers that append to the end of the file.
+	 *
+	 * To support workloads that close and reopen the file frequently, these
+	 * preallocations usually persist after a close unless it is the first
+	 * close for the inode.  This is a tradeoff to generate tightly packed
+	 * data layouts for unpacking tarballs or similar archives that write
+	 * one file after another without going back to it while keeping the
+	 * preallocation for files that have recurring open/write/close cycles.
 	 *
 	 * There is no point in freeing blocks here for open but unlinked files
 	 * as they will be taken care of by the inactivation path soon.
@@ -1241,25 +1247,9 @@  xfs_file_release(
 	    (file->f_mode & FMODE_WRITE) &&
 	    xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
 		if (xfs_can_free_eofblocks(ip) &&
-		    !xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) {
-			/*
-			 * Check if the inode is being opened, written and
-			 * closed frequently and we have delayed allocation
-			 * blocks outstanding (e.g. streaming writes from the
-			 * NFS server), truncating the blocks past EOF will
-			 * cause fragmentation to occur.
-			 *
-			 * In this case don't do the truncation, but we have to
-			 * be careful how we detect this case. Blocks beyond EOF
-			 * show up as i_delayed_blks even when the inode is
-			 * clean, so we need to truncate them away first before
-			 * checking for a dirty release. Hence on the first
-			 * dirty close we will still remove the speculative
-			 * allocation, but after that we will leave it in place.
-			 */
+		    !xfs_iflags_test(ip, XFS_EOFBLOCKS_RELEASED)) {
 			xfs_free_eofblocks(ip);
-			if (ip->i_delayed_blks)
-				xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
+			xfs_iflags_set(ip, XFS_EOFBLOCKS_RELEASED);
 		}
 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 	}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ae9851226f9913..548a4f00bcae1b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -336,7 +336,7 @@  static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
 #define XFS_INEW		(1 << 3) /* inode has just been allocated */
 #define XFS_IPRESERVE_DM_FIELDS	(1 << 4) /* has legacy DMAPI fields set */
 #define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
-#define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
+#define XFS_EOFBLOCKS_RELEASED	(1 << 6) /* eofblocks were freed in ->release */
 #define XFS_IFLUSHING		(1 << 7) /* inode is being flushed */
 #define __XFS_IPINNED_BIT	8	 /* wakeup key for zero pin count */
 #define XFS_IPINNED		(1 << __XFS_IPINNED_BIT)
@@ -383,7 +383,7 @@  static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
  */
 #define XFS_IRECLAIM_RESET_FLAGS	\
 	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
+	 XFS_EOFBLOCKS_RELEASED | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
 	 XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)
 
 /*