diff mbox series

[2/3] xfs: truncate should remove all blocks, not just to the end of the page cache

Message ID 157845706502.82882.5903950627987445484.stgit@magnolia (mailing list archive)
State Superseded
Headers show
Series xfs: fix maxbytes problems on 32-bit systems | expand

Commit Message

Darrick J. Wong Jan. 8, 2020, 4:17 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

xfs_itruncate_extents_flags() is supposed to unmap every block in a file
from EOF onwards.  Oddly, it uses s_maxbytes as the upper limit to the
bunmapi range, even though s_maxbytes reflects the highest offset the
pagecache can support, not the highest offset that XFS supports.

The result of this confusion is that if you create a 20T file on a
64-bit machine, mount the filesystem on a 32-bit machine, and remove the
file, we leak everything above 16T.  Fix this by capping the bunmapi
request at the maximum possible block offset, not s_maxbytes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c |   23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

Comments

Christoph Hellwig Jan. 8, 2020, 8:11 a.m. UTC | #1
On Tue, Jan 07, 2020 at 08:17:45PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> xfs_itruncate_extents_flags() is supposed to unmap every block in a file
> from EOF onwards.  Oddly, it uses s_maxbytes as the upper limit to the
> bunmapi range, even though s_maxbytes reflects the highest offset the
> pagecache can support, not the highest offset that XFS supports.
> 
> The result of this confusion is that if you create a 20T file on a
> 64-bit machine, mount the filesystem on a 32-bit machine, and remove the
> file, we leak everything above 16T.  Fix this by capping the bunmapi
> request at the maximum possible block offset, not s_maxbytes.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_inode.c |   23 +++++++++++------------
>  1 file changed, 11 insertions(+), 12 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index fc3aec26ef87..79799ab30c93 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -1518,7 +1518,6 @@ xfs_itruncate_extents_flags(
>  	struct xfs_mount	*mp = ip->i_mount;
>  	struct xfs_trans	*tp = *tpp;
>  	xfs_fileoff_t		first_unmap_block;
> -	xfs_fileoff_t		last_block;
>  	xfs_filblks_t		unmap_len;
>  	int			error = 0;
>  
> @@ -1540,21 +1539,21 @@ xfs_itruncate_extents_flags(
>  	 * the end of the file (in a crash where the space is allocated
>  	 * but the inode size is not yet updated), simply remove any
>  	 * blocks which show up between the new EOF and the maximum
> -	 * possible file size.  If the first block to be removed is
> -	 * beyond the maximum file size (ie it is the same as last_block),
> -	 * then there is nothing to do.
> +	 * possible file size.
> +	 *
> +	 * We have to free all the blocks to the bmbt maximum offset, even if
> +	 * the page cache can't scale that far.
>  	 */
>  	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
> -	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
> -	if (first_unmap_block == last_block)
> +	if (first_unmap_block == XFS_MAX_FILEOFF)
>  		return 0;
>  
> -	ASSERT(first_unmap_block < last_block);
> -	unmap_len = last_block - first_unmap_block + 1;
> -	while (!done) {
> +	ASSERT(first_unmap_block < XFS_MAX_FILEOFF);

Instead of the assert we could just do the early return for

	first_unmap_block >= XFS_MAX_FILEOFF

and throw in a WARN_ON_ONCE, as that condition really should be nothing
but a sanity check.

Otherwise this looks good to me.
Darrick J. Wong Jan. 8, 2020, 4:37 p.m. UTC | #2
On Wed, Jan 08, 2020 at 12:11:57AM -0800, Christoph Hellwig wrote:
> On Tue, Jan 07, 2020 at 08:17:45PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > xfs_itruncate_extents_flags() is supposed to unmap every block in a file
> > from EOF onwards.  Oddly, it uses s_maxbytes as the upper limit to the
> > bunmapi range, even though s_maxbytes reflects the highest offset the
> > pagecache can support, not the highest offset that XFS supports.
> > 
> > The result of this confusion is that if you create a 20T file on a
> > 64-bit machine, mount the filesystem on a 32-bit machine, and remove the
> > file, we leak everything above 16T.  Fix this by capping the bunmapi
> > request at the maximum possible block offset, not s_maxbytes.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_inode.c |   23 +++++++++++------------
> >  1 file changed, 11 insertions(+), 12 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index fc3aec26ef87..79799ab30c93 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -1518,7 +1518,6 @@ xfs_itruncate_extents_flags(
> >  	struct xfs_mount	*mp = ip->i_mount;
> >  	struct xfs_trans	*tp = *tpp;
> >  	xfs_fileoff_t		first_unmap_block;
> > -	xfs_fileoff_t		last_block;
> >  	xfs_filblks_t		unmap_len;
> >  	int			error = 0;
> >  
> > @@ -1540,21 +1539,21 @@ xfs_itruncate_extents_flags(
> >  	 * the end of the file (in a crash where the space is allocated
> >  	 * but the inode size is not yet updated), simply remove any
> >  	 * blocks which show up between the new EOF and the maximum
> > -	 * possible file size.  If the first block to be removed is
> > -	 * beyond the maximum file size (ie it is the same as last_block),
> > -	 * then there is nothing to do.
> > +	 * possible file size.
> > +	 *
> > +	 * We have to free all the blocks to the bmbt maximum offset, even if
> > +	 * the page cache can't scale that far.
> >  	 */
> >  	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
> > -	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
> > -	if (first_unmap_block == last_block)
> > +	if (first_unmap_block == XFS_MAX_FILEOFF)
> >  		return 0;
> >  
> > -	ASSERT(first_unmap_block < last_block);
> > -	unmap_len = last_block - first_unmap_block + 1;
> > -	while (!done) {
> > +	ASSERT(first_unmap_block < XFS_MAX_FILEOFF);
> 
> Instead of the assert we could just do the early return for
> 
> 	first_unmap_block >= XFS_MAX_FILEOFF
> 
> and throw in a WARN_ON_ONCE, as that condition really should be nothing
> but a sanity check.
> 
> Otherwise this looks good to me.

Ok, done.

--D
diff mbox series

Patch

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index fc3aec26ef87..79799ab30c93 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1518,7 +1518,6 @@  xfs_itruncate_extents_flags(
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp = *tpp;
 	xfs_fileoff_t		first_unmap_block;
-	xfs_fileoff_t		last_block;
 	xfs_filblks_t		unmap_len;
 	int			error = 0;
 
@@ -1540,21 +1539,21 @@  xfs_itruncate_extents_flags(
 	 * the end of the file (in a crash where the space is allocated
 	 * but the inode size is not yet updated), simply remove any
 	 * blocks which show up between the new EOF and the maximum
-	 * possible file size.  If the first block to be removed is
-	 * beyond the maximum file size (ie it is the same as last_block),
-	 * then there is nothing to do.
+	 * possible file size.
+	 *
+	 * We have to free all the blocks to the bmbt maximum offset, even if
+	 * the page cache can't scale that far.
 	 */
 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
-	if (first_unmap_block == last_block)
+	if (first_unmap_block == XFS_MAX_FILEOFF)
 		return 0;
 
-	ASSERT(first_unmap_block < last_block);
-	unmap_len = last_block - first_unmap_block + 1;
-	while (!done) {
+	ASSERT(first_unmap_block < XFS_MAX_FILEOFF);
+	unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
+	while (unmap_len > 0) {
 		ASSERT(tp->t_firstblock == NULLFSBLOCK);
-		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
-				    XFS_ITRUNC_MAX_EXTENTS, &done);
+		error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
+				flags, XFS_ITRUNC_MAX_EXTENTS);
 		if (error)
 			goto out;
 
@@ -1574,7 +1573,7 @@  xfs_itruncate_extents_flags(
 	if (whichfork == XFS_DATA_FORK) {
 		/* Remove all pending CoW reservations. */
 		error = xfs_reflink_cancel_cow_blocks(ip, &tp,
-				first_unmap_block, last_block, true);
+				first_unmap_block, XFS_MAX_FILEOFF, true);
 		if (error)
 			goto out;