[24/25] xfs: support returning partial reflink results
diff mbox series

Message ID 153938931226.8361.7365948775364411156.stgit@magnolia
State New
Headers show
Series
  • fs: fixes for serious clone/dedupe problems
Related show

Commit Message

Darrick J. Wong Oct. 13, 2018, 12:08 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Back when the XFS reflink code only supported clone_file_range, we were
only able to return zero or negative error codes to userspace.  However,
now that copy_file_range (which returns bytes copied) can use XFS'
clone_file_range, we have the opportunity to return partial results.
For example, if userspace sends a 1GB clone request and we run out of
space halfway through, we at least can tell userspace that we completed
512M of that request like a regular write.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c    |    5 +----
 fs/xfs/xfs_reflink.c |   20 +++++++++++++++-----
 fs/xfs/xfs_reflink.h |    2 +-
 3 files changed, 17 insertions(+), 10 deletions(-)

Comments

Christoph Hellwig Oct. 14, 2018, 5:35 p.m. UTC | #1
On Fri, Oct 12, 2018 at 05:08:32PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Back when the XFS reflink code only supported clone_file_range, we were
> only able to return zero or negative error codes to userspace.  However,
> now that copy_file_range (which returns bytes copied) can use XFS'
> clone_file_range, we have the opportunity to return partial results.
> For example, if userspace sends a 1GB clone request and we run out of
> space halfway through, we at least can tell userspace that we completed
> 512M of that request like a regular write.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_file.c    |    5 +----
>  fs/xfs/xfs_reflink.c |   20 +++++++++++++++-----
>  fs/xfs/xfs_reflink.h |    2 +-
>  3 files changed, 17 insertions(+), 10 deletions(-)
> 
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index bc9e94bcb7a3..b2b15b8dc4a1 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -928,14 +928,11 @@ xfs_file_remap_range(
>  	loff_t		len,
>  	unsigned int	remap_flags)
>  {
> -	int		ret;
> -
>  	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
>  		return -EINVAL;
>  
> -	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
> +	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
>  			len, remap_flags);

Is there any reason not to merge xfs_file_remap_range and
xfs_reflink_remap_range at this point?

>  STATIC int
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index e1592e751cc2..66a8ddb9c058 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1123,6 +1123,7 @@ xfs_reflink_remap_blocks(
>  	struct xfs_inode	*dest,
>  	xfs_fileoff_t		destoff,
>  	xfs_filblks_t		len,
> +	xfs_filblks_t		*remapped_len,
>  	xfs_off_t		new_isize)
>  {
>  	struct xfs_bmbt_irec	imap;
> @@ -1130,6 +1131,7 @@ xfs_reflink_remap_blocks(
>  	int			error = 0;
>  	xfs_filblks_t		range_len;
>  
> +	*remapped_len = 0;
>  	/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
>  	while (len) {
>  		uint		lock_mode;
> @@ -1168,6 +1170,7 @@ xfs_reflink_remap_blocks(
>  		srcoff += range_len;
>  		destoff += range_len;
>  		len -= range_len;
> +		*remapped_len += range_len;
>  	}
>  
>  	return 0;
> @@ -1391,7 +1394,7 @@ xfs_reflink_remap_prep(
>  /*
>   * Link a range of blocks from one file to another.
>   */
> -int
> +loff_t
>  xfs_reflink_remap_range(
>  	struct file		*file_in,
>  	loff_t			pos_in,
> @@ -1406,9 +1409,10 @@ xfs_reflink_remap_range(
>  	struct xfs_inode	*dest = XFS_I(inode_out);
>  	struct xfs_mount	*mp = src->i_mount;
>  	xfs_fileoff_t		sfsbno, dfsbno;
> -	xfs_filblks_t		fsblen;
> +	xfs_filblks_t		fsblen, remappedfsb = 0;
> +	loff_t			remapped_bytes = 0;
>  	xfs_extlen_t		cowextsize;
> -	ssize_t			ret;
> +	int			ret;
>  
>  	if (!xfs_sb_version_hasreflink(&mp->m_sb))
>  		return -EOPNOTSUPP;
> @@ -1424,11 +1428,17 @@ xfs_reflink_remap_range(
>  
>  	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
>  
> +	if (len == 0) {
> +		ret = 0;
> +		goto out_unlock;
> +	}

Looking at the final tree this looks like dead (and bogus) code:

	if (ret <= 0)
	        return ret;

        trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);

        if (len == 0) {
                ret = 0;
                goto out_unlock;
        }


> +
>  	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
>  	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
>  	fsblen = XFS_B_TO_FSB(mp, len);
>  	ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
> +			&remappedfsb, pos_out + len);
> +	remapped_bytes = min_t(int64_t, len, XFS_FSB_TO_B(mp, remappedfsb));
>  	if (ret)
>  		goto out_unlock;

Shouldn't we just follow the calling convention of the method here:

 negative return value:	error
 positive:		number of bytes handled
 
 Something like:

	done = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno,
			fsblen, pos_out + len);
	if (done < 0) {
		xfs_reflink_remap_unlock(file_in, file_out);
		trace_xfs_reflink_remap_range_error(dest, done, _RET_IP_);
		return done;
	}

>  
> @@ -1451,7 +1461,7 @@ xfs_reflink_remap_range(
>  	xfs_reflink_remap_unlock(file_in, file_out);
>  	if (ret)
>  		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);

And then we can drop this conditional here.
Dave Chinner Oct. 14, 2018, 11:05 p.m. UTC | #2
On Sun, Oct 14, 2018 at 10:35:46AM -0700, Christoph Hellwig wrote:
> On Fri, Oct 12, 2018 at 05:08:32PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Back when the XFS reflink code only supported clone_file_range, we were
> > only able to return zero or negative error codes to userspace.  However,
> > now that copy_file_range (which returns bytes copied) can use XFS'
> > clone_file_range, we have the opportunity to return partial results.
> > For example, if userspace sends a 1GB clone request and we run out of
> > space halfway through, we at least can tell userspace that we completed
> > 512M of that request like a regular write.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_file.c    |    5 +----
> >  fs/xfs/xfs_reflink.c |   20 +++++++++++++++-----
> >  fs/xfs/xfs_reflink.h |    2 +-
> >  3 files changed, 17 insertions(+), 10 deletions(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > index bc9e94bcb7a3..b2b15b8dc4a1 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -928,14 +928,11 @@ xfs_file_remap_range(
> >  	loff_t		len,
> >  	unsigned int	remap_flags)
> >  {
> > -	int		ret;
> > -
> >  	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
> >  		return -EINVAL;
> >  
> > -	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
> > +	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
> >  			len, remap_flags);
> 
> Is there any reason not to merge xfs_file_remap_range and
> xfs_reflink_remap_range at this point?

Yeah, that seems like a good idea to me - pulling all the
vfs/generic code interactions back up into xfs_file.c would match
how the rest of the file operations are layered w.r.t. external and
internal XFS code...

Cheers,

Dave.
Darrick J. Wong Oct. 15, 2018, 3:49 p.m. UTC | #3
On Mon, Oct 15, 2018 at 10:05:36AM +1100, Dave Chinner wrote:
> On Sun, Oct 14, 2018 at 10:35:46AM -0700, Christoph Hellwig wrote:
> > On Fri, Oct 12, 2018 at 05:08:32PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Back when the XFS reflink code only supported clone_file_range, we were
> > > only able to return zero or negative error codes to userspace.  However,
> > > now that copy_file_range (which returns bytes copied) can use XFS'
> > > clone_file_range, we have the opportunity to return partial results.
> > > For example, if userspace sends a 1GB clone request and we run out of
> > > space halfway through, we at least can tell userspace that we completed
> > > 512M of that request like a regular write.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > >  fs/xfs/xfs_file.c    |    5 +----
> > >  fs/xfs/xfs_reflink.c |   20 +++++++++++++++-----
> > >  fs/xfs/xfs_reflink.h |    2 +-
> > >  3 files changed, 17 insertions(+), 10 deletions(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > index bc9e94bcb7a3..b2b15b8dc4a1 100644
> > > --- a/fs/xfs/xfs_file.c
> > > +++ b/fs/xfs/xfs_file.c
> > > @@ -928,14 +928,11 @@ xfs_file_remap_range(
> > >  	loff_t		len,
> > >  	unsigned int	remap_flags)
> > >  {
> > > -	int		ret;
> > > -
> > >  	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
> > >  		return -EINVAL;
> > >  
> > > -	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
> > > +	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
> > >  			len, remap_flags);
> > 
> > Is there any reason not to merge xfs_file_remap_range and
> > xfs_reflink_remap_range at this point?
> 
> Yeah, that seems like a good idea to me - pulling all the
> vfs/generic code interactions back up into xfs_file.c would match
> how the rest of the file operations are layered w.r.t. external and
> internal XFS code...

Yeah, ditto ocfs2.  I'll look into throwing a few more refactors onto
the end of this series... 8-D

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

Patch
diff mbox series

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bc9e94bcb7a3..b2b15b8dc4a1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -928,14 +928,11 @@  xfs_file_remap_range(
 	loff_t		len,
 	unsigned int	remap_flags)
 {
-	int		ret;
-
 	if (!remap_check_flags(remap_flags, RFR_SAME_DATA))
 		return -EINVAL;
 
-	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 			len, remap_flags);
-	return ret < 0 ? ret : len;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e1592e751cc2..66a8ddb9c058 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1123,6 +1123,7 @@  xfs_reflink_remap_blocks(
 	struct xfs_inode	*dest,
 	xfs_fileoff_t		destoff,
 	xfs_filblks_t		len,
+	xfs_filblks_t		*remapped_len,
 	xfs_off_t		new_isize)
 {
 	struct xfs_bmbt_irec	imap;
@@ -1130,6 +1131,7 @@  xfs_reflink_remap_blocks(
 	int			error = 0;
 	xfs_filblks_t		range_len;
 
+	*remapped_len = 0;
 	/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
 	while (len) {
 		uint		lock_mode;
@@ -1168,6 +1170,7 @@  xfs_reflink_remap_blocks(
 		srcoff += range_len;
 		destoff += range_len;
 		len -= range_len;
+		*remapped_len += range_len;
 	}
 
 	return 0;
@@ -1391,7 +1394,7 @@  xfs_reflink_remap_prep(
 /*
  * Link a range of blocks from one file to another.
  */
-int
+loff_t
 xfs_reflink_remap_range(
 	struct file		*file_in,
 	loff_t			pos_in,
@@ -1406,9 +1409,10 @@  xfs_reflink_remap_range(
 	struct xfs_inode	*dest = XFS_I(inode_out);
 	struct xfs_mount	*mp = src->i_mount;
 	xfs_fileoff_t		sfsbno, dfsbno;
-	xfs_filblks_t		fsblen;
+	xfs_filblks_t		fsblen, remappedfsb = 0;
+	loff_t			remapped_bytes = 0;
 	xfs_extlen_t		cowextsize;
-	ssize_t			ret;
+	int			ret;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return -EOPNOTSUPP;
@@ -1424,11 +1428,17 @@  xfs_reflink_remap_range(
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
+	if (len == 0) {
+		ret = 0;
+		goto out_unlock;
+	}
+
 	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
 	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
 	fsblen = XFS_B_TO_FSB(mp, len);
 	ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-			pos_out + len);
+			&remappedfsb, pos_out + len);
+	remapped_bytes = min_t(int64_t, len, XFS_FSB_TO_B(mp, remappedfsb));
 	if (ret)
 		goto out_unlock;
 
@@ -1451,7 +1461,7 @@  xfs_reflink_remap_range(
 	xfs_reflink_remap_unlock(file_in, file_out);
 	if (ret)
 		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
-	return ret;
+	return remapped_bytes > 0 ? remapped_bytes : ret;
 }
 
 /*
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index c3c46c276fe1..cbc26ff79a8f 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -27,7 +27,7 @@  extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, loff_t len,
 		unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,