diff mbox

NFS: Don't disconnect open-owner on NFS4ERR_BAD_SEQID

Message ID 87bmyx3q3u.fsf@notabene.neil.brown.name
State New
Headers show

Commit Message

NeilBrown Oct. 7, 2016, 1:27 a.m. UTC
Hi again,
 I posted a version of this patch 4 months and got no reply, so
 thought it might be time to try again.
 This version includes a small change to handle the case when a
 delegation stateid gets a BAD_STATEID, in the context of the open-owner
 getting a BAD_SEQID.
 Obviously this whole issue can only happen if the server is buggy (or
 if the client is buggy, but I don't think it is), but it would be best
 to handle that case gracefully.  Currently it spins indefinitely.

Thanks,
NeilBrown

From: NeilBrown <neilb@suse.com>
Subject: [PATCH] NFS: Don't disconnect open-owner on NFS4ERR_BAD_SEQID

When an NFS4ERR_BAD_SEQID is received the open-owner is removed from
the ->state_owners rbtree so that it will no longer be used.

If any stateids attached to this open-owner are still in use, and if a
request using one get an NFS4ERR_BAD_STATEID reply, this can for bad.

The state is marked as needing recovery and the nfs4_state_manager()
is scheduled to clean up.  nfs4_state_manager() finds states to be
recovered by walking the state_owners rbtree.  As the open-owner is
not in the rbtree, the bad state is not found so nfs4_state_manager()
completes having done nothing.  The request is then retried, with a
predicatable result (indefinite retries).

This patch changes NFS4ERR_BAD_SEQID handling to leave the open-owner
in the rbtree but mark it a 'stale'.  With this the indefinite retries
no longer happen.  Errors get to user-space instead if recovery
doesn't work.

If the stateid is for a delegation, the result is more complex.
nfs4_state_manager() tries to return the delegation but uses the
open-owner with the bad seqid to open files on the server, and this
fails with more BAD_SEQID errors.  To avoid this we update the
so_seqid.create_time of the bad open-owner so that it looks to the server
like a new open-owner and an OPEN_CONFIRM is requested.  This allows
the return of the delagation to complete.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 fs/nfs/nfs4_fs.h   |  3 ++-
 fs/nfs/nfs4state.c | 22 +++++++++-------------
 2 files changed, 11 insertions(+), 14 deletions(-)

Comments

Trond Myklebust Oct. 7, 2016, 4:21 p.m. UTC | #1
> On Oct 6, 2016, at 21:27, NeilBrown <neilb@suse.com> wrote:

> 

> 

> Hi again,

> I posted a version of this patch 4 months and got no reply, so

> thought it might be time to try again.

> This version includes a small change to handle the case when a

> delegation stateid gets a BAD_STATEID, in the context of the open-owner

> getting a BAD_SEQID.

> Obviously this whole issue can only happen if the server is buggy (or

> if the client is buggy, but I don't think it is), but it would be best

> to handle that case gracefully.  Currently it spins indefinitely.

> 

> Thanks,

> NeilBrown

> 

> From: NeilBrown <neilb@suse.com>

> Subject: [PATCH] NFS: Don't disconnect open-owner on NFS4ERR_BAD_SEQID

> 

> When an NFS4ERR_BAD_SEQID is received the open-owner is removed from

> the ->state_owners rbtree so that it will no longer be used.

> 

> If any stateids attached to this open-owner are still in use, and if a

> request using one get an NFS4ERR_BAD_STATEID reply, this can for bad.

> 

> The state is marked as needing recovery and the nfs4_state_manager()

> is scheduled to clean up.  nfs4_state_manager() finds states to be

> recovered by walking the state_owners rbtree.  As the open-owner is

> not in the rbtree, the bad state is not found so nfs4_state_manager()

> completes having done nothing.  The request is then retried, with a

> predicatable result (indefinite retries).

> 

> This patch changes NFS4ERR_BAD_SEQID handling to leave the open-owner

> in the rbtree but mark it a 'stale'.  With this the indefinite retries

> no longer happen.  Errors get to user-space instead if recovery

> doesn't work.

> 

> If the stateid is for a delegation, the result is more complex.

> nfs4_state_manager() tries to return the delegation but uses the

> open-owner with the bad seqid to open files on the server, and this

> fails with more BAD_SEQID errors.  To avoid this we update the

> so_seqid.create_time of the bad open-owner so that it looks to the server

> like a new open-owner and an OPEN_CONFIRM is requested.  This allows

> the return of the delagation to complete.

> 

> Signed-off-by: NeilBrown <neilb@suse.com>

> ---

> fs/nfs/nfs4_fs.h   |  3 ++-

> fs/nfs/nfs4state.c | 22 +++++++++-------------

> 2 files changed, 11 insertions(+), 14 deletions(-)

> 

> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h

> index 3f0e459f2499..6be19814553f 100644

> --- a/fs/nfs/nfs4_fs.h

> +++ b/fs/nfs/nfs4_fs.h

> @@ -113,7 +113,8 @@ struct nfs4_state_owner {

> 

> enum {

> 	NFS_OWNER_RECLAIM_REBOOT,

> -	NFS_OWNER_RECLAIM_NOGRACE

> +	NFS_OWNER_RECLAIM_NOGRACE,

> +	NFS_OWNER_STALE,

> };

> 

> #define NFS_LOCK_NEW		0

> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

> index 74cc32490c7a..8ed2285fc527 100644

> --- a/fs/nfs/nfs4state.c

> +++ b/fs/nfs/nfs4state.c

> @@ -397,6 +397,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)

> 			p = &parent->rb_left;

> 		else if (cred > sp->so_cred)

> 			p = &parent->rb_right;

> +		else if (test_bit(NFS_OWNER_STALE, &sp->so_flags))

> +			p = &parent->rb_left;

> 		else {

> 			if (!list_empty(&sp->so_lru))

> 				list_del_init(&sp->so_lru);

> @@ -424,6 +426,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)

> 			p = &parent->rb_left;

> 		else if (new->so_cred > sp->so_cred)

> 			p = &parent->rb_right;

> +		else if (test_bit(NFS_OWNER_STALE, &sp->so_flags))

> +			p = &parent->rb_left;

> 		else {

> 			if (!list_empty(&sp->so_lru))

> 				list_del_init(&sp->so_lru);

> @@ -496,19 +500,11 @@ nfs4_alloc_state_owner(struct nfs_server *server,

> static void

> nfs4_drop_state_owner(struct nfs4_state_owner *sp)

> {

> -	struct rb_node *rb_node = &sp->so_server_node;

> -

> -	if (!RB_EMPTY_NODE(rb_node)) {

> -		struct nfs_server *server = sp->so_server;

> -		struct nfs_client *clp = server->nfs_client;

> -

> -		spin_lock(&clp->cl_lock);

> -		if (!RB_EMPTY_NODE(rb_node)) {

> -			rb_erase(rb_node, &server->state_owners);

> -			RB_CLEAR_NODE(rb_node);

> -		}

> -		spin_unlock(&clp->cl_lock);

> -	}

> +	set_bit(NFS_OWNER_STALE, &sp->so_flags);

> +	/* Delegation recall might insist on using this open_owner

> +	 * so reset it to force a new 'confirm' stage to be initiated.

> +	 */

> +	sp->so_seqid.create_time = ktime_get();


Hmm…. If you’re going to do this, then why mark the state_owner as stale at all? Why not just reset sp->so_seqid.flags and call nfs4_clear_open_state() to let the state recovery try to do what it can.
diff mbox

Patch

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3f0e459f2499..6be19814553f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -113,7 +113,8 @@  struct nfs4_state_owner {
 
 enum {
 	NFS_OWNER_RECLAIM_REBOOT,
-	NFS_OWNER_RECLAIM_NOGRACE
+	NFS_OWNER_RECLAIM_NOGRACE,
+	NFS_OWNER_STALE,
 };
 
 #define NFS_LOCK_NEW		0
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 74cc32490c7a..8ed2285fc527 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -397,6 +397,8 @@  nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 			p = &parent->rb_left;
 		else if (cred > sp->so_cred)
 			p = &parent->rb_right;
+		else if (test_bit(NFS_OWNER_STALE, &sp->so_flags))
+			p = &parent->rb_left;
 		else {
 			if (!list_empty(&sp->so_lru))
 				list_del_init(&sp->so_lru);
@@ -424,6 +426,8 @@  nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 			p = &parent->rb_left;
 		else if (new->so_cred > sp->so_cred)
 			p = &parent->rb_right;
+		else if (test_bit(NFS_OWNER_STALE, &sp->so_flags))
+			p = &parent->rb_left;
 		else {
 			if (!list_empty(&sp->so_lru))
 				list_del_init(&sp->so_lru);
@@ -496,19 +500,11 @@  nfs4_alloc_state_owner(struct nfs_server *server,
 static void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct rb_node *rb_node = &sp->so_server_node;
-
-	if (!RB_EMPTY_NODE(rb_node)) {
-		struct nfs_server *server = sp->so_server;
-		struct nfs_client *clp = server->nfs_client;
-
-		spin_lock(&clp->cl_lock);
-		if (!RB_EMPTY_NODE(rb_node)) {
-			rb_erase(rb_node, &server->state_owners);
-			RB_CLEAR_NODE(rb_node);
-		}
-		spin_unlock(&clp->cl_lock);
-	}
+	set_bit(NFS_OWNER_STALE, &sp->so_flags);
+	/* Delegation recall might insist on using this open_owner
+	 * so reset it to force a new 'confirm' stage to be initiated.
+	 */
+	sp->so_seqid.create_time = ktime_get();
 }
 
 static void nfs4_free_state_owner(struct nfs4_state_owner *sp)