diff mbox series

[2/7] xfs: refactor the predicate part of xfs_free_eofblocks

Message ID 161040740813.1582286.3253329052236449810.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs: consolidate posteof and cowblocks cleanup | expand

Commit Message

Darrick J. Wong Jan. 11, 2021, 11:23 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Refactor the part of _free_eofblocks that decides if it's really going
to truncate post-EOF blocks into a separate helper function.  The
upcoming deferred inode inactivation patch requires us to be able to
decide this prior to actual inactivation.  No functionality changes.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_bmap_util.c |  109 +++++++++++++++++++++---------------------------
 fs/xfs/xfs_inode.c     |   36 ++++++++++++++++
 fs/xfs/xfs_inode.h     |    2 +
 3 files changed, 85 insertions(+), 62 deletions(-)

Comments

Christoph Hellwig Jan. 13, 2021, 2:57 p.m. UTC | #1
On Mon, Jan 11, 2021 at 03:23:28PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Refactor the part of _free_eofblocks that decides if it's really going
> to truncate post-EOF blocks into a separate helper function.  The
> upcoming deferred inode inactivation patch requires us to be able to
> decide this prior to actual inactivation.  No functionality changes.

Is there any specific reason why the new xfs_has_eofblocks helper is in
xfs_inode.c?  That just makes following the logic a little harder.

> +
> +/*
> + * Decide if this inode have post-EOF blocks.  The caller is responsible
> + * for knowing / caring about the PREALLOC/APPEND flags.
> + */
> +int
> +xfs_has_eofblocks(
> +	struct xfs_inode	*ip,
> +	bool			*has)
> +{
> +	struct xfs_bmbt_irec	imap;
> +	struct xfs_mount	*mp = ip->i_mount;
> +	xfs_fileoff_t		end_fsb;
> +	xfs_fileoff_t		last_fsb;
> +	xfs_filblks_t		map_len;
> +	int			nimaps;
> +	int			error;
> +
> +	*has = false;
> +	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
> +	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
> +	if (last_fsb <= end_fsb)
> +		return 0;

Where does this strange magic come from?

> +	map_len = last_fsb - end_fsb;
> +
> +	nimaps = 1;
> +	xfs_ilock(ip, XFS_ILOCK_SHARED);
> +	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
> +	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> +
> +	if (error || nimaps == 0)
> +		return error;
> +
> +	*has = imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
> +	return 0;

I think this logic could be simplified at lot by using xfs_iext_lookup
directly. Something like:

	*has = false;

	xfs_ilock(ip, XFS_ILOCK_SHARED);
	if (ip->i_delayed_blks) {
		*has = true;
		goto out_unlock;
	}
	if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
		if (error)
			goto out_unlock;
        }
	if (xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap))
		*has = true;
out_unlock:
	xfs_iunlock(ip, XFS_ILOCK_SHARED);
	return error;
Darrick J. Wong Jan. 14, 2021, 10:49 p.m. UTC | #2
On Wed, Jan 13, 2021 at 03:57:15PM +0100, Christoph Hellwig wrote:
> On Mon, Jan 11, 2021 at 03:23:28PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Refactor the part of _free_eofblocks that decides if it's really going
> > to truncate post-EOF blocks into a separate helper function.  The
> > upcoming deferred inode inactivation patch requires us to be able to
> > decide this prior to actual inactivation.  No functionality changes.
> 
> Is there any specific reason why the new xfs_has_eofblocks helper is in
> xfs_inode.c?  That just makes following the logic a little harder.

I ... have no idea.  This patch also isn't technically needed until we
get to the deferred inactivation patchset, but I'll reshuffle the deck
and move this function back to xfs_bmap_util.c.

> > +
> > +/*
> > + * Decide if this inode have post-EOF blocks.  The caller is responsible
> > + * for knowing / caring about the PREALLOC/APPEND flags.
> > + */
> > +int
> > +xfs_has_eofblocks(
> > +	struct xfs_inode	*ip,
> > +	bool			*has)
> > +{
> > +	struct xfs_bmbt_irec	imap;
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +	xfs_fileoff_t		end_fsb;
> > +	xfs_fileoff_t		last_fsb;
> > +	xfs_filblks_t		map_len;
> > +	int			nimaps;
> > +	int			error;
> > +
> > +	*has = false;
> > +	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
> > +	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
> > +	if (last_fsb <= end_fsb)
> > +		return 0;
> 
> Where does this strange magic come from?

It comes straight from xfs_free_eofblocks.

I /think/ the purpose of this is to avoid touching any file that's
larger than the page cache supports (i.e. 16T on 32-bit) because inode
inactivation causes pagecache invalidation, and trying to invalidate
with too high a pgoff causes weird integer truncation problems.

> > +	map_len = last_fsb - end_fsb;
> > +
> > +	nimaps = 1;
> > +	xfs_ilock(ip, XFS_ILOCK_SHARED);
> > +	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
> > +	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> > +
> > +	if (error || nimaps == 0)
> > +		return error;
> > +
> > +	*has = imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
> > +	return 0;
> 
> I think this logic could be simplified at lot by using xfs_iext_lookup
> directly. Something like:
> 
> 	*has = false;
> 
> 	xfs_ilock(ip, XFS_ILOCK_SHARED);
> 	if (ip->i_delayed_blks) {
> 		*has = true;
> 		goto out_unlock;
> 	}
> 	if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
> 		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);

Is it even worth reading in the bmap btree to clear posteof blocks if we
haven't loaded it already?

--D

> 		if (error)
> 			goto out_unlock;
>         }
> 	if (xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap))
> 		*has = true;
> out_unlock:
> 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> 	return error;

--D
Christoph Hellwig Jan. 18, 2021, 5:38 p.m. UTC | #3
On Thu, Jan 14, 2021 at 02:49:53PM -0800, Darrick J. Wong wrote:
> > > +	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
> > > +	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
> > > +	if (last_fsb <= end_fsb)
> > > +		return 0;
> > 
> > Where does this strange magic come from?
> 
> It comes straight from xfs_free_eofblocks.
> 
> I /think/ the purpose of this is to avoid touching any file that's
> larger than the page cache supports (i.e. 16T on 32-bit) because inode
> inactivation causes pagecache invalidation, and trying to invalidate
> with too high a pgoff causes weird integer truncation problems.

Hmm.  At very least we need to document this, but we really should not
allow to even read in an inode successfully under this condition..

Screams for a nice little test case..

> > 	xfs_ilock(ip, XFS_ILOCK_SHARED);
> > 	if (ip->i_delayed_blks) {
> > 		*has = true;
> > 		goto out_unlock;
> > 	}
> > 	if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
> > 		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> 
> Is it even worth reading in the bmap btree to clear posteof blocks if we
> haven't loaded it already?

True, we can return early in that case.  I'm also not sure what the
i_delayed_blks check is supposed to do, as delalloc extents do show
up in the iext tree just like normal extents.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 602011f06fb6..16e996ae0ff3 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -637,78 +637,63 @@  xfs_free_eofblocks(
 	struct xfs_inode	*ip)
 {
 	struct xfs_trans	*tp;
-	int			error;
-	xfs_fileoff_t		end_fsb;
-	xfs_fileoff_t		last_fsb;
-	xfs_filblks_t		map_len;
-	int			nimaps;
-	struct xfs_bmbt_irec	imap;
 	struct xfs_mount	*mp = ip->i_mount;
-
-	/*
-	 * Figure out if there are any blocks beyond the end
-	 * of the file.  If not, then there is nothing to do.
-	 */
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
-	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
-	if (last_fsb <= end_fsb)
-		return 0;
-	map_len = last_fsb - end_fsb;
-
-	nimaps = 1;
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	bool			has;
+	int			error;
 
 	/*
 	 * If there are blocks after the end of file, truncate the file to its
 	 * current size to free them up.
 	 */
-	if (!error && (nimaps != 0) &&
-	    (imap.br_startblock != HOLESTARTBLOCK ||
-	     ip->i_delayed_blks)) {
-		/*
-		 * Attach the dquots to the inode up front.
-		 */
-		error = xfs_qm_dqattach(ip);
-		if (error)
-			return error;
+	error = xfs_has_eofblocks(ip, &has);
+	if (error || !has)
+		return error;
 
-		/* wait on dio to ensure i_size has settled */
-		inode_dio_wait(VFS_I(ip));
+	/*
+	 * Attach the dquots to the inode up front.
+	 */
+	error = xfs_qm_dqattach(ip);
+	if (error)
+		return error;
 
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
-				&tp);
-		if (error) {
-			ASSERT(XFS_FORCED_SHUTDOWN(mp));
-			return error;
-		}
+	/* wait on dio to ensure i_size has settled */
+	inode_dio_wait(VFS_I(ip));
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
-
-		/*
-		 * Do not update the on-disk file size.  If we update the
-		 * on-disk file size and then the system crashes before the
-		 * contents of the file are flushed to disk then the files
-		 * may be full of holes (ie NULL files bug).
-		 */
-		error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
-					XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
-		if (error) {
-			/*
-			 * If we get an error at this point we simply don't
-			 * bother truncating the file.
-			 */
-			xfs_trans_cancel(tp);
-		} else {
-			error = xfs_trans_commit(tp);
-			if (!error)
-				xfs_inode_clear_eofblocks_tag(ip);
-		}
-
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+	if (error) {
+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		return error;
 	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
+
+	/*
+	 * Do not update the on-disk file size.  If we update the
+	 * on-disk file size and then the system crashes before the
+	 * contents of the file are flushed to disk then the files
+	 * may be full of holes (ie NULL files bug).
+	 */
+	error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
+				XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
+	if (error)
+		goto err_cancel;
+
+	error = xfs_trans_commit(tp);
+	if (error)
+		goto out_unlock;
+
+	xfs_inode_clear_eofblocks_tag(ip);
+	goto out_unlock;
+
+err_cancel:
+	/*
+	 * If we get an error at this point we simply don't
+	 * bother truncating the file.
+	 */
+	xfs_trans_cancel(tp);
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0e6cc33a33ad..d30b49cd60d7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3711,3 +3711,39 @@  xfs_iunlock2_io_mmap(
 	if (!same_inode)
 		inode_unlock(VFS_I(ip1));
 }
+
+/*
+ * Decide if this inode have post-EOF blocks.  The caller is responsible
+ * for knowing / caring about the PREALLOC/APPEND flags.
+ */
+int
+xfs_has_eofblocks(
+	struct xfs_inode	*ip,
+	bool			*has)
+{
+	struct xfs_bmbt_irec	imap;
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fileoff_t		end_fsb;
+	xfs_fileoff_t		last_fsb;
+	xfs_filblks_t		map_len;
+	int			nimaps;
+	int			error;
+
+	*has = false;
+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
+	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
+	if (last_fsb <= end_fsb)
+		return 0;
+	map_len = last_fsb - end_fsb;
+
+	nimaps = 1;
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	if (error || nimaps == 0)
+		return error;
+
+	*has = imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
+	return 0;
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index eca333f5f715..5295791be4e2 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -468,6 +468,8 @@  extern struct kmem_zone	*xfs_inode_zone;
 /* The default CoW extent size hint. */
 #define XFS_DEFAULT_COWEXTSZ_HINT 32
 
+int xfs_has_eofblocks(struct xfs_inode *ip, bool *has);
+
 int xfs_iunlink_init(struct xfs_perag *pag);
 void xfs_iunlink_destroy(struct xfs_perag *pag);