NFSv4: Fix _nfs4_do_setlk()

Message ID	20180730024052.28026-1-trond.myklebust@hammerspace.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-nfs-owner@kernel.org> From: Trond Myklebust <trondmy@gmail.com> To: linux-nfs@vger.kernel.org Cc: Kenneth Johansson <ken@kenjo.org>, Benjamin Coddington <bcodding@redhat.com>, Jeff Layton <jlayton@kernel.org> Subject: [PATCH] NFSv4: Fix _nfs4_do_setlk() Date: Sun, 29 Jul 2018 22:40:52 -0400 Message-Id: <20180730024052.28026-1-trond.myklebust@hammerspace.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-nfs-owner@vger.kernel.org Precedence: bulk
Series	NFSv4: Fix _nfs4_do_setlk() \| expand NFSv4: Fix _nfs4_do_setlk()

Message ID

20180730024052.28026-1-trond.myklebust@hammerspace.com (mailing list archive)

State

New, archived

Headers

From: Trond Myklebust <trondmy@gmail.com>
To: linux-nfs@vger.kernel.org
Cc: Kenneth Johansson <ken@kenjo.org>,
        Benjamin Coddington <bcodding@redhat.com>,
        Jeff Layton <jlayton@kernel.org>
Subject: [PATCH] NFSv4: Fix _nfs4_do_setlk()
Date: Sun, 29 Jul 2018 22:40:52 -0400
Message-Id: <20180730024052.28026-1-trond.myklebust@hammerspace.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Sender: linux-nfs-owner@vger.kernel.org
Precedence: bulk

Series

NFSv4: Fix _nfs4_do_setlk() | expand

Commit Message

Trond Myklebust July 30, 2018, 2:40 a.m. UTC

The patch to fix the case where a lock request was interrupted ended up
changing default handling of errors such as NFS4ERR_DENIED and caused the
client to immediately resend the lock request. Let's do a partial revert
of that request so that the default is now to exit, but change the way
we handle resends to take into account the fact that the user may have
interrupted the request.

Reported-by: Kenneth Johansson <ken@kenjo.org>
Fixes: a3cf9bca2ace ("NFSv4: Don't add a new lock on an interrupted wait..")
Cc: Benjamin Coddington <bcodding@redhat.com>
Cc: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

Comments

Jeff Layton July 30, 2018, 7:06 p.m. UTC | #1

On Sun, 2018-07-29 at 22:40 -0400, Trond Myklebust wrote:
> The patch to fix the case where a lock request was interrupted ended up
> changing default handling of errors such as NFS4ERR_DENIED and caused the
> client to immediately resend the lock request. Let's do a partial revert
> of that request so that the default is now to exit, but change the way
> we handle resends to take into account the fact that the user may have
> interrupted the request.
> 
> Reported-by: Kenneth Johansson <ken@kenjo.org>
> Fixes: a3cf9bca2ace ("NFSv4: Don't add a new lock on an interrupted wait..")
> Cc: Benjamin Coddington <bcodding@redhat.com>
> Cc: Jeff Layton <jlayton@kernel.org>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/nfs4proc.c | 26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f73a8315933f..8e482f634d60 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6501,34 +6501,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>  		if (data->arg.new_lock && !data->cancelled) {

Not specific to your patch, but I wonder if avoiding setting a lock
record after we've successfully issued a LOCK is the right thing to do
here.

Suppose we issue a LOCK request and it's successful, but the wait for it
is canceled before the reply comes in. The reply then comes in and
data->cancelled is now true and now we don't set the lock.

Eventually we end up calling locks_remove_posix but now there's not a
lock on the local list so we skip sending a LOCKU. Is that a potential
problem?

>  			data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
>  			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> -				break;
> +				goto out_restart;
>  		}
> -
>  		if (data->arg.new_lock_owner != 0) {
>  			nfs_confirm_seqid(&lsp->ls_seqid, 0);
>  			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
>  			set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
> -			goto out_done;
> -		} else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
> -			goto out_done;
> -
> +		} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
> +			goto out_restart;
>  		break;
>  	case -NFS4ERR_BAD_STATEID:
>  	case -NFS4ERR_OLD_STATEID:
>  	case -NFS4ERR_STALE_STATEID:
>  	case -NFS4ERR_EXPIRED:
>  		if (data->arg.new_lock_owner != 0) {
> -			if (nfs4_stateid_match(&data->arg.open_stateid,
> +			if (!nfs4_stateid_match(&data->arg.open_stateid,
>  						&lsp->ls_state->open_stateid))
> -				goto out_done;
> -		} else if (nfs4_stateid_match(&data->arg.lock_stateid,
> +				goto out_restart;
> +		} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
>  						&lsp->ls_stateid))
> -				goto out_done;
> +				goto out_restart;
>  	}
> -	if (!data->cancelled)
> -		rpc_restart_call_prepare(task);
>  out_done:
>  	dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
> +	return;
> +out_restart:
> +	if (!data->cancelled)
> +		rpc_restart_call_prepare(task);
> +	goto out_done;
>  }
>  
>  static void nfs4_lock_release(void *calldata)
> @@ -6537,7 +6537,7 @@ static void nfs4_lock_release(void *calldata)
>  
>  	dprintk("%s: begin!\n", __func__);
>  	nfs_free_seqid(data->arg.open_seqid);
> -	if (data->cancelled) {
> +	if (data->cancelled && data->rpc_status == 0) {
>  		struct rpc_task *task;
>  		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
>  				data->arg.lock_seqid);

Regardless of the question above, this should fix the most recent
regression, so let's take it for now and we can look at that bit more
closely later.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Trond Myklebust July 30, 2018, 7:19 p.m. UTC | #2

On Mon, 2018-07-30 at 15:06 -0400, Jeff Layton wrote:
> On Sun, 2018-07-29 at 22:40 -0400, Trond Myklebust wrote:
> > The patch to fix the case where a lock request was interrupted
> > ended up
> > changing default handling of errors such as NFS4ERR_DENIED and
> > caused the
> > client to immediately resend the lock request. Let's do a partial
> > revert
> > of that request so that the default is now to exit, but change the
> > way
> > we handle resends to take into account the fact that the user may
> > have
> > interrupted the request.
> > 
> > Reported-by: Kenneth Johansson <ken@kenjo.org>
> > Fixes: a3cf9bca2ace ("NFSv4: Don't add a new lock on an interrupted
> > wait..")
> > Cc: Benjamin Coddington <bcodding@redhat.com>
> > Cc: Jeff Layton <jlayton@kernel.org>
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  fs/nfs/nfs4proc.c | 26 +++++++++++++-------------
> >  1 file changed, 13 insertions(+), 13 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index f73a8315933f..8e482f634d60 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -6501,34 +6501,34 @@ static void nfs4_lock_done(struct rpc_task
> > *task, void *calldata)
> >  		if (data->arg.new_lock && !data->cancelled) {
> 
> Not specific to your patch, but I wonder if avoiding setting a lock
> record after we've successfully issued a LOCK is the right thing to
> do
> here.
> 
> Suppose we issue a LOCK request and it's successful, but the wait for
> it
> is canceled before the reply comes in. The reply then comes in and
> data->cancelled is now true and now we don't set the lock.
> 
> Eventually we end up calling locks_remove_posix but now there's not a
> lock on the local list so we skip sending a LOCKU. Is that a
> potential
> problem?

See below: nfs4_lock_release() will call nfs4_do_unlck() and undo the
lock in this case.

> 
> >  			data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
> >  			if (locks_lock_inode_wait(lsp->ls_state->inode, 
> > &data->fl) < 0)
> > -				break;
> > +				goto out_restart;
> >  		}
> > -
> >  		if (data->arg.new_lock_owner != 0) {
> >  			nfs_confirm_seqid(&lsp->ls_seqid, 0);
> >  			nfs4_stateid_copy(&lsp->ls_stateid, &data-
> > >res.stateid);
> >  			set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
> > -			goto out_done;
> > -		} else if (nfs4_update_lock_stateid(lsp, &data-
> > >res.stateid))
> > -			goto out_done;
> > -
> > +		} else if (!nfs4_update_lock_stateid(lsp, &data-
> > >res.stateid))
> > +			goto out_restart;
> >  		break;
> >  	case -NFS4ERR_BAD_STATEID:
> >  	case -NFS4ERR_OLD_STATEID:
> >  	case -NFS4ERR_STALE_STATEID:
> >  	case -NFS4ERR_EXPIRED:
> >  		if (data->arg.new_lock_owner != 0) {
> > -			if (nfs4_stateid_match(&data->arg.open_stateid,
> > +			if (!nfs4_stateid_match(&data-
> > >arg.open_stateid,
> >  						&lsp->ls_state-
> > >open_stateid))
> > -				goto out_done;
> > -		} else if (nfs4_stateid_match(&data->arg.lock_stateid,
> > +				goto out_restart;
> > +		} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
> >  						&lsp->ls_stateid))
> > -				goto out_done;
> > +				goto out_restart;
> >  	}
> > -	if (!data->cancelled)
> > -		rpc_restart_call_prepare(task);
> >  out_done:
> >  	dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
> > +	return;
> > +out_restart:
> > +	if (!data->cancelled)
> > +		rpc_restart_call_prepare(task);
> > +	goto out_done;
> >  }
> >  
> >  static void nfs4_lock_release(void *calldata)
> > @@ -6537,7 +6537,7 @@ static void nfs4_lock_release(void *calldata)
> >  
> >  	dprintk("%s: begin!\n", __func__);
> >  	nfs_free_seqid(data->arg.open_seqid);
> > -	if (data->cancelled) {
> > +	if (data->cancelled && data->rpc_status == 0) {
> >  		struct rpc_task *task;
> >  		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
> >  				data->arg.lock_seqid);
> 
> Regardless of the question above, this should fix the most recent
> regression, so let's take it for now and we can look at that bit more
> closely later.
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Benjamin Coddington Aug. 9, 2018, 10:15 a.m. UTC | #3

Hi Jeff and Trond, thanks for the fix-up and apologies for the 
fire-drill.

I ran this through my test and want to report that it still fixes up my
leftover lock problem, and looks right to me.

Ben

On 29 Jul 2018, at 22:40, Trond Myklebust wrote:

> The patch to fix the case where a lock request was interrupted ended 
> up
> changing default handling of errors such as NFS4ERR_DENIED and caused 
> the
> client to immediately resend the lock request. Let's do a partial 
> revert
> of that request so that the default is now to exit, but change the way
> we handle resends to take into account the fact that the user may have
> interrupted the request.
>
> Reported-by: Kenneth Johansson <ken@kenjo.org>
> Fixes: a3cf9bca2ace ("NFSv4: Don't add a new lock on an interrupted 
> wait..")
> Cc: Benjamin Coddington <bcodding@redhat.com>
> Cc: Jeff Layton <jlayton@kernel.org>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/nfs4proc.c | 26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index f73a8315933f..8e482f634d60 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6501,34 +6501,34 @@ static void nfs4_lock_done(struct rpc_task 
> *task, void *calldata)
>  		if (data->arg.new_lock && !data->cancelled) {
>  			data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
>  			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> -				break;
> +				goto out_restart;
>  		}
> -
>  		if (data->arg.new_lock_owner != 0) {
>  			nfs_confirm_seqid(&lsp->ls_seqid, 0);
>  			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
>  			set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
> -			goto out_done;
> -		} else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
> -			goto out_done;
> -
> +		} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
> +			goto out_restart;
>  		break;
>  	case -NFS4ERR_BAD_STATEID:
>  	case -NFS4ERR_OLD_STATEID:
>  	case -NFS4ERR_STALE_STATEID:
>  	case -NFS4ERR_EXPIRED:
>  		if (data->arg.new_lock_owner != 0) {
> -			if (nfs4_stateid_match(&data->arg.open_stateid,
> +			if (!nfs4_stateid_match(&data->arg.open_stateid,
>  						&lsp->ls_state->open_stateid))
> -				goto out_done;
> -		} else if (nfs4_stateid_match(&data->arg.lock_stateid,
> +				goto out_restart;
> +		} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
>  						&lsp->ls_stateid))
> -				goto out_done;
> +				goto out_restart;
>  	}
> -	if (!data->cancelled)
> -		rpc_restart_call_prepare(task);
>  out_done:
>  	dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
> +	return;
> +out_restart:
> +	if (!data->cancelled)
> +		rpc_restart_call_prepare(task);
> +	goto out_done;
>  }
>
>  static void nfs4_lock_release(void *calldata)
> @@ -6537,7 +6537,7 @@ static void nfs4_lock_release(void *calldata)
>
>  	dprintk("%s: begin!\n", __func__);
>  	nfs_free_seqid(data->arg.open_seqid);
> -	if (data->cancelled) {
> +	if (data->cancelled && data->rpc_status == 0) {
>  		struct rpc_task *task;
>  		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
>  				data->arg.lock_seqid);
> -- 
> 2.17.1
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f73a8315933f..8e482f634d60 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6501,34 +6501,34 @@  static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 		if (data->arg.new_lock && !data->cancelled) {
 			data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
 			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
-				break;
+				goto out_restart;
 		}
-
 		if (data->arg.new_lock_owner != 0) {
 			nfs_confirm_seqid(&lsp->ls_seqid, 0);
 			nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
 			set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
-			goto out_done;
-		} else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
-			goto out_done;
-
+		} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
+			goto out_restart;
 		break;
 	case -NFS4ERR_BAD_STATEID:
 	case -NFS4ERR_OLD_STATEID:
 	case -NFS4ERR_STALE_STATEID:
 	case -NFS4ERR_EXPIRED:
 		if (data->arg.new_lock_owner != 0) {
-			if (nfs4_stateid_match(&data->arg.open_stateid,
+			if (!nfs4_stateid_match(&data->arg.open_stateid,
 						&lsp->ls_state->open_stateid))
-				goto out_done;
-		} else if (nfs4_stateid_match(&data->arg.lock_stateid,
+				goto out_restart;
+		} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
 						&lsp->ls_stateid))
-				goto out_done;
+				goto out_restart;
 	}
-	if (!data->cancelled)
-		rpc_restart_call_prepare(task);
 out_done:
 	dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
+	return;
+out_restart:
+	if (!data->cancelled)
+		rpc_restart_call_prepare(task);
+	goto out_done;
 }
 
 static void nfs4_lock_release(void *calldata)
@@ -6537,7 +6537,7 @@  static void nfs4_lock_release(void *calldata)
 
 	dprintk("%s: begin!\n", __func__);
 	nfs_free_seqid(data->arg.open_seqid);
-	if (data->cancelled) {
+	if (data->cancelled && data->rpc_status == 0) {
 		struct rpc_task *task;
 		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
 				data->arg.lock_seqid);

NFSv4: Fix _nfs4_do_setlk()

Commit Message

Comments

Patch