[1/1] NFSv4.x recover from pre-mature loss of openstateid
diff mbox series

Message ID 20191216221304.25782-1-olga.kornievskaia@gmail.com
State New
Headers show
Series
  • [1/1] NFSv4.x recover from pre-mature loss of openstateid
Related show

Commit Message

Olga Kornievskaia Dec. 16, 2019, 10:13 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

Ever since the commit 0e0cb35b417f, it's possible to lose an open stateid
while retrying a CLOSE due to ERR_OLD_STATEID. Once that happens,
operations that require openstateid fail with EAGAIN which is propagated
to the application then tests like generic/446 and generic/168 fail with
"Resource temporarily unavailable".

Instead of returning this error, initiate state recovery when possible to
recover the open stateid and then try calling nfs4_select_rw_stateid()
again.

Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/nfs4proc.c  | 3 +++
 fs/nfs/nfs4state.c | 2 +-
 fs/nfs/pnfs.c      | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

Comments

Trond Myklebust Dec. 16, 2019, 10:41 p.m. UTC | #1
Hi Olga

On Mon, 2019-12-16 at 17:13 -0500, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> Ever since the commit 0e0cb35b417f, it's possible to lose an open
> stateid
> while retrying a CLOSE due to ERR_OLD_STATEID. Once that happens,
> operations that require openstateid fail with EAGAIN which is
> propagated
> to the application then tests like generic/446 and generic/168 fail
> with
> "Resource temporarily unavailable".
> 
> Instead of returning this error, initiate state recovery when
> possible to
> recover the open stateid and then try calling
> nfs4_select_rw_stateid()
> again.
> 
> Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in
> CLOSE/OPEN_DOWNGRADE")
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfs/nfs4proc.c  | 3 +++
>  fs/nfs/nfs4state.c | 2 +-
>  fs/nfs/pnfs.c      | 2 +-
>  3 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 76d37161409a..66f9631ba012 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -3239,6 +3239,8 @@ static int _nfs4_do_setattr(struct inode
> *inode,
>  		nfs_put_lock_context(l_ctx);
>  		if (status == -EIO)
>  			return -EBADF;
> +		else if (status)
> +			goto out;
>  	} else {
>  zero_stateid:
>  		nfs4_stateid_copy(&arg->stateid, &zero_stateid);
> @@ -3251,6 +3253,7 @@ static int _nfs4_do_setattr(struct inode
> *inode,
>  	put_cred(delegation_cred);
>  	if (status == 0 && ctx != NULL)
>  		renew_lease(server, timestamp);
> +out:
>  	trace_nfs4_setattr(inode, &arg->stateid, status);
>  	return status;
>  }
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 34552329233d..8686451893a6 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -1064,7 +1064,7 @@ int nfs4_select_rw_stateid(struct nfs4_state
> *state,
>  		 * choose to use.
>  		 */
>  		goto out;
> -	ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN;
> +	ret = nfs4_copy_open_stateid(dst, state) ? 0 :
> -NFS4ERR_BAD_STATEID;

This is not acceptable. We can't return NFSv4 error values to functions
that expect POSIX errors.

For instance pnfs_update_layout() tries to apply ERR_PTR() to this
return value, which breaks badly for non-POSIX errors (it returns an
invalid pointer that fails the IS_ERR() test).

>  out:
>  	if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
>  		dst->seqid = 0;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index cec3070ab577..11d646bbd2cb 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1998,7 +1998,7 @@ pnfs_update_layout(struct inode *ino,
>  			trace_pnfs_update_layout(ino, pos, count,
>  					iomode, lo, lseg,
>  					PNFS_UPDATE_LAYOUT_INVALID_OPEN
> );
> -			if (status != -EAGAIN)
> +			if (status != -EAGAIN && status !=
> -NFS4ERR_BAD_STATEID)
>  				goto out_unlock;
>  			spin_unlock(&ino->i_lock);
>  			nfs4_schedule_stateid_recovery(server, ctx-
> >state);

Patch
diff mbox series

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 76d37161409a..66f9631ba012 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3239,6 +3239,8 @@  static int _nfs4_do_setattr(struct inode *inode,
 		nfs_put_lock_context(l_ctx);
 		if (status == -EIO)
 			return -EBADF;
+		else if (status)
+			goto out;
 	} else {
 zero_stateid:
 		nfs4_stateid_copy(&arg->stateid, &zero_stateid);
@@ -3251,6 +3253,7 @@  static int _nfs4_do_setattr(struct inode *inode,
 	put_cred(delegation_cred);
 	if (status == 0 && ctx != NULL)
 		renew_lease(server, timestamp);
+out:
 	trace_nfs4_setattr(inode, &arg->stateid, status);
 	return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 34552329233d..8686451893a6 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1064,7 +1064,7 @@  int nfs4_select_rw_stateid(struct nfs4_state *state,
 		 * choose to use.
 		 */
 		goto out;
-	ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN;
+	ret = nfs4_copy_open_stateid(dst, state) ? 0 : -NFS4ERR_BAD_STATEID;
 out:
 	if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
 		dst->seqid = 0;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cec3070ab577..11d646bbd2cb 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1998,7 +1998,7 @@  pnfs_update_layout(struct inode *ino,
 			trace_pnfs_update_layout(ino, pos, count,
 					iomode, lo, lseg,
 					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
-			if (status != -EAGAIN)
+			if (status != -EAGAIN && status != -NFS4ERR_BAD_STATEID)
 				goto out_unlock;
 			spin_unlock(&ino->i_lock);
 			nfs4_schedule_stateid_recovery(server, ctx->state);