[01/11] xfs: reflink should break pnfs leases before sharing blocks
diff mbox

Message ID 151676028366.12349.7219306586282315379.stgit@magnolia
State Accepted
Headers show

Commit Message

Darrick J. Wong Jan. 24, 2018, 2:18 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Before we share blocks between files, we need to break the pnfs leases
on the layout before we start slicing and dicing the block map.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_reflink.c |   48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)



--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Brian Foster Jan. 24, 2018, 2:16 p.m. UTC | #1
On Tue, Jan 23, 2018 at 06:18:03PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Before we share blocks between files, we need to break the pnfs leases
> on the layout before we start slicing and dicing the block map.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_reflink.c |   48 +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 47aea2e..f89a725 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1245,6 +1245,50 @@ xfs_reflink_remap_blocks(
>  }
>  
>  /*
> + * Grab the exclusive iolock for a data copy from src to dest, making
> + * sure to abide vfs locking order (lowest pointer value goes first) and
> + * breaking the pnfs layout leases on dest before proceeding.  The loop
> + * is needed because we cannot call the blocking break_layout() with the
> + * src iolock held, and therefore have to back out both locks.
> + */
> +static int
> +xfs_iolock_two_inodes_and_break_layout(
> +	struct inode		*src,
> +	struct inode		*dest)
> +{
> +	bool			src_first = src < dest;
> +	bool			src_last = src > dest;
> +	int			error;
> +
> +retry:
> +	if (src_first) {
> +		inode_lock(src);
> +		inode_lock_nested(dest, I_MUTEX_NONDIR2);
> +	} else {
> +		inode_lock(dest);
> +	}
> +
> +	error = break_layout(dest, false);
> +	if (error == -EWOULDBLOCK) {
> +		inode_unlock(dest);
> +		if (src_first)
> +			inode_unlock(src);
> +		error = break_layout(dest, true);
> +		if (error)
> +			return error;
> +		goto retry;
> +	} else if (error) {
> +		inode_unlock(dest);
> +		if (src_first)
> +			inode_unlock(src);
> +		return error;
> +	}
> +	if (src_last)
> +		inode_lock_nested(src, I_MUTEX_NONDIR2);
> +	return 0;
> +}
> +
> +/*
>   * Link a range of blocks from one file to another.
>   */
>  int
> @@ -1274,7 +1318,9 @@ xfs_reflink_remap_range(
>  		return -EIO;
>  
>  	/* Lock both files against IO */
> -	lock_two_nondirectories(inode_in, inode_out);
> +	ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
> +	if (ret)
> +		return ret;
>  	if (same_inode)
>  		xfs_ilock(src, XFS_MMAPLOCK_EXCL);
>  	else
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
hch@infradead.org Jan. 26, 2018, 9:06 a.m. UTC | #2
On Tue, Jan 23, 2018 at 06:18:03PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Before we share blocks between files, we need to break the pnfs leases
> on the layout before we start slicing and dicing the block map.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/xfs/xfs_reflink.c |   48 +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 47 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 47aea2e..f89a725 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1245,6 +1245,50 @@ xfs_reflink_remap_blocks(
>  }
>  
>  /*
> + * Grab the exclusive iolock for a data copy from src to dest, making
> + * sure to abide vfs locking order (lowest pointer value goes first) and
> + * breaking the pnfs layout leases on dest before proceeding.  The loop
> + * is needed because we cannot call the blocking break_layout() with the
> + * src iolock held, and therefore have to back out both locks.
> + */
> +static int
> +xfs_iolock_two_inodes_and_break_layout(
> +	struct inode		*src,
> +	struct inode		*dest)
> +{
> +	bool			src_first = src < dest;
> +	bool			src_last = src > dest;

I find the double predicates here highly confusing.

Also the code doesn't seem to handle the src == dest case as
far as I can tell.

> +retry:
> +	if (src_first) {
> +		inode_lock(src);
> +		inode_lock_nested(dest, I_MUTEX_NONDIR2);
> +	} else {
> +		inode_lock(dest);
> +	}

Shouldn't this be replaced by a call to lock_two_nondirectories?
Even if that holds both locks over the noon-blocking break_layout
it makes things a lot simpler and only does an additional rountrip
for the layouts outstanding slow path.

> +	error = break_layout(dest, false);
> +	if (error == -EWOULDBLOCK) {
> +		inode_unlock(dest);
> +		if (src_first)
> +			inode_unlock(src);

unlock_two_nondirectories?

> +		error = break_layout(dest, true);
> +		if (error)
> +			return error;
> +		goto retry;
> +	} else if (error) {

no need for an else after a goto.

> +		inode_unlock(dest);
> +		if (src_first)
> +			inode_unlock(src);

unlock_two_nondirectories?

Also seems like this could be simplified to:

	if (error) {
		unlock_two_nondirectories()
		if (error == -EWOULDBLOCK)
			goto retry;
		return error;
	}

So I guess the whole thing could simply become something like:

retry:
	lock_two_nondirectories(src, dest);
	error = break_layout(dest, false);
	if (error) {
		unlock_two_nondirectories(src, dest);
		if (error == -EWOULDBLOCK)
			goto retry;
		return error;
	}

and could probably just be inlined into the caller..
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Jan. 26, 2018, 6:26 p.m. UTC | #3
On Fri, Jan 26, 2018 at 01:06:52AM -0800, Christoph Hellwig wrote:
> On Tue, Jan 23, 2018 at 06:18:03PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Before we share blocks between files, we need to break the pnfs leases
> > on the layout before we start slicing and dicing the block map.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  fs/xfs/xfs_reflink.c |   48 +++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 47 insertions(+), 1 deletion(-)
> > 
> > 
> > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> > index 47aea2e..f89a725 100644
> > --- a/fs/xfs/xfs_reflink.c
> > +++ b/fs/xfs/xfs_reflink.c
> > @@ -1245,6 +1245,50 @@ xfs_reflink_remap_blocks(
> >  }
> >  
> >  /*
> > + * Grab the exclusive iolock for a data copy from src to dest, making
> > + * sure to abide vfs locking order (lowest pointer value goes first) and
> > + * breaking the pnfs layout leases on dest before proceeding.  The loop
> > + * is needed because we cannot call the blocking break_layout() with the
> > + * src iolock held, and therefore have to back out both locks.
> > + */
> > +static int
> > +xfs_iolock_two_inodes_and_break_layout(
> > +	struct inode		*src,
> > +	struct inode		*dest)
> > +{
> > +	bool			src_first = src < dest;
> > +	bool			src_last = src > dest;
> 
> I find the double predicates here highly confusing.
> 
> Also the code doesn't seem to handle the src == dest case as
> far as I can tell.

I guess they are confusing; when src == dest, src_first and src_last are
both false.

> > +retry:
> > +	if (src_first) {
> > +		inode_lock(src);
> > +		inode_lock_nested(dest, I_MUTEX_NONDIR2);
> > +	} else {
> > +		inode_lock(dest);
> > +	}
> 
> Shouldn't this be replaced by a call to lock_two_nondirectories?
> Even if that holds both locks over the noon-blocking break_layout
> it makes things a lot simpler and only does an additional rountrip
> for the layouts outstanding slow path.
> 
> > +	error = break_layout(dest, false);
> > +	if (error == -EWOULDBLOCK) {
> > +		inode_unlock(dest);
> > +		if (src_first)
> > +			inode_unlock(src);
> 
> unlock_two_nondirectories?
> 
> > +		error = break_layout(dest, true);
> > +		if (error)
> > +			return error;
> > +		goto retry;
> > +	} else if (error) {
> 
> no need for an else after a goto.
> 
> > +		inode_unlock(dest);
> > +		if (src_first)
> > +			inode_unlock(src);
> 
> unlock_two_nondirectories?
> 
> Also seems like this could be simplified to:
> 
> 	if (error) {
> 		unlock_two_nondirectories()
> 		if (error == -EWOULDBLOCK)
> 			goto retry;
> 		return error;
> 	}
> 
> So I guess the whole thing could simply become something like:
> 
> retry:
> 	lock_two_nondirectories(src, dest);
> 	error = break_layout(dest, false);
> 	if (error) {
> 		unlock_two_nondirectories(src, dest);
> 		if (error == -EWOULDBLOCK)
> 			goto retry;
> 		return error;
> 	}
> 
> and could probably just be inlined into the caller..

Yeah, that's simpler... though at this point I'll have to put all this
into a new series having already pushed to for-next.  :/

(Sorry, kinda overburdened with this week)

--D

> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 47aea2e..f89a725 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1245,6 +1245,50 @@  xfs_reflink_remap_blocks(
 }
 
 /*
+ * Grab the exclusive iolock for a data copy from src to dest, making
+ * sure to abide vfs locking order (lowest pointer value goes first) and
+ * breaking the pnfs layout leases on dest before proceeding.  The loop
+ * is needed because we cannot call the blocking break_layout() with the
+ * src iolock held, and therefore have to back out both locks.
+ */
+static int
+xfs_iolock_two_inodes_and_break_layout(
+	struct inode		*src,
+	struct inode		*dest)
+{
+	bool			src_first = src < dest;
+	bool			src_last = src > dest;
+	int			error;
+
+retry:
+	if (src_first) {
+		inode_lock(src);
+		inode_lock_nested(dest, I_MUTEX_NONDIR2);
+	} else {
+		inode_lock(dest);
+	}
+
+	error = break_layout(dest, false);
+	if (error == -EWOULDBLOCK) {
+		inode_unlock(dest);
+		if (src_first)
+			inode_unlock(src);
+		error = break_layout(dest, true);
+		if (error)
+			return error;
+		goto retry;
+	} else if (error) {
+		inode_unlock(dest);
+		if (src_first)
+			inode_unlock(src);
+		return error;
+	}
+	if (src_last)
+		inode_lock_nested(src, I_MUTEX_NONDIR2);
+	return 0;
+}
+
+/*
  * Link a range of blocks from one file to another.
  */
 int
@@ -1274,7 +1318,9 @@  xfs_reflink_remap_range(
 		return -EIO;
 
 	/* Lock both files against IO */
-	lock_two_nondirectories(inode_in, inode_out);
+	ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
+	if (ret)
+		return ret;
 	if (same_inode)
 		xfs_ilock(src, XFS_MMAPLOCK_EXCL);
 	else