diff mbox series

[v3,3/6] NFS: Shut down the nfs_client only after all the superblocks

Message ID 1cefd7cbadf0eaec5bae66e0870cdb89c7120070.1742941932.git.trond.myklebust@hammerspace.com (mailing list archive)
State New
Headers show
Series Ensure that ENETUNREACH terminates state recovery | expand

Commit Message

Trond Myklebust March 25, 2025, 10:35 p.m. UTC
From: Trond Myklebust <trond.myklebust@hammerspace.com>

The nfs_client manages state for all the superblocks in the
"cl_superblocks" list, so it must not be shut down until all of them are
gone.

Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when shutdown")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/sysfs.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

Comments

Jeff Layton March 26, 2025, 12:15 a.m. UTC | #1
On Tue, 2025-03-25 at 18:35 -0400, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> The nfs_client manages state for all the superblocks in the
> "cl_superblocks" list, so it must not be shut down until all of them are
> gone.
> 
> Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when shutdown")
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/sysfs.c | 22 +++++++++++++++++++++-
>  1 file changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
> index b30401b2c939..37cb2b776435 100644
> --- a/fs/nfs/sysfs.c
> +++ b/fs/nfs/sysfs.c
> @@ -14,6 +14,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/lockd/lockd.h>
>  
> +#include "internal.h"
>  #include "nfs4_fs.h"
>  #include "netns.h"
>  #include "sysfs.h"
> @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt)
>  	rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
>  }
>  
> +/*
> + * Shut down the nfs_client only once all the superblocks
> + * have been shut down.
> + */
> +static void shutdown_nfs_client(struct nfs_client *clp)
> +{
> +	struct nfs_server *server;
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
> +		if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
> +			rcu_read_unlock();
> +			return;
> +		}
> +	}
> +	rcu_read_unlock();
> +	nfs_mark_client_ready(clp, -EIO);
> +	shutdown_client(clp->cl_rpcclient);
> +}
> +

Isn't the upshot of this that a mount won't actually get shutdown until
you shutdown all of the mounts to the same server? Personally, I find
that acceptable, but we should note that it is a change in behavior.


>  static ssize_t
>  shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
>  				char *buf)
> @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
>  
>  	server->flags |= NFS_MOUNT_SHUTDOWN;
>  	shutdown_client(server->client);
> -	shutdown_client(server->nfs_client->cl_rpcclient);
>  
>  	if (!IS_ERR(server->client_acl))
>  		shutdown_client(server->client_acl);
> @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
>  	if (server->nlm_host)
>  		shutdown_client(server->nlm_host->h_rpcclnt);
>  out:
> +	shutdown_nfs_client(server->nfs_client);
>  	return count;
>  }
>
Trond Myklebust March 26, 2025, 12:37 a.m. UTC | #2
On Tue, 2025-03-25 at 20:15 -0400, Jeff Layton wrote:
> On Tue, 2025-03-25 at 18:35 -0400, trondmy@kernel.org wrote:
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > The nfs_client manages state for all the superblocks in the
> > "cl_superblocks" list, so it must not be shut down until all of
> > them are
> > gone.
> > 
> > Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when
> > shutdown")
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  fs/nfs/sysfs.c | 22 +++++++++++++++++++++-
> >  1 file changed, 21 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
> > index b30401b2c939..37cb2b776435 100644
> > --- a/fs/nfs/sysfs.c
> > +++ b/fs/nfs/sysfs.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/rcupdate.h>
> >  #include <linux/lockd/lockd.h>
> >  
> > +#include "internal.h"
> >  #include "nfs4_fs.h"
> >  #include "netns.h"
> >  #include "sysfs.h"
> > @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt
> > *clnt)
> >  	rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
> >  }
> >  
> > +/*
> > + * Shut down the nfs_client only once all the superblocks
> > + * have been shut down.
> > + */
> > +static void shutdown_nfs_client(struct nfs_client *clp)
> > +{
> > +	struct nfs_server *server;
> > +	rcu_read_lock();
> > +	list_for_each_entry_rcu(server, &clp->cl_superblocks,
> > client_link) {
> > +		if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
> > +			rcu_read_unlock();
> > +			return;
> > +		}
> > +	}
> > +	rcu_read_unlock();
> > +	nfs_mark_client_ready(clp, -EIO);
> > +	shutdown_client(clp->cl_rpcclient);
> > +}
> > +
> 
> Isn't the upshot of this that a mount won't actually get shutdown
> until
> you shutdown all of the mounts to the same server? Personally, I find
> that acceptable, but we should note that it is a change in behavior.

It means that the other mounts to the same server won't inevitably and
irrevocably lock up. I'm unhappy that I missed this bug when I applied
the patch, but that's not a reason to not fix it.

As I said in the above changelog, the nfs_client's RPC client is there
to manage state for all mounts to the same server in the same network
namespace. If you shut down that client while there are still mounts
that depend on it, then not only have you shot yourself in the foot,
but you're going to spend a lot of electrons to just loop madly when
those other mounts need to recover state but can't because you've
permanently shut down the only way for recovery threads to communicate
with the server.

> 
> 
> >  static ssize_t
> >  shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
> >  				char *buf)
> > @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct
> > kobj_attribute *attr,
> >  
> >  	server->flags |= NFS_MOUNT_SHUTDOWN;
> >  	shutdown_client(server->client);
> > -	shutdown_client(server->nfs_client->cl_rpcclient);
> >  
> >  	if (!IS_ERR(server->client_acl))
> >  		shutdown_client(server->client_acl);
> > @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct
> > kobj_attribute *attr,
> >  	if (server->nlm_host)
> >  		shutdown_client(server->nlm_host->h_rpcclnt);
> >  out:
> > +	shutdown_nfs_client(server->nfs_client);
> >  	return count;
> >  }
> >  
>
Benjamin Coddington March 26, 2025, 10:07 a.m. UTC | #3
On 25 Mar 2025, at 20:37, Trond Myklebust wrote:

> On Tue, 2025-03-25 at 20:15 -0400, Jeff Layton wrote:
>> On Tue, 2025-03-25 at 18:35 -0400, trondmy@kernel.org wrote:
>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>
>>> The nfs_client manages state for all the superblocks in the
>>> "cl_superblocks" list, so it must not be shut down until all of
>>> them are
>>> gone.
>>>
>>> Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when
>>> shutdown")
>>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> ---
>>>  fs/nfs/sysfs.c | 22 +++++++++++++++++++++-
>>>  1 file changed, 21 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
>>> index b30401b2c939..37cb2b776435 100644
>>> --- a/fs/nfs/sysfs.c
>>> +++ b/fs/nfs/sysfs.c
>>> @@ -14,6 +14,7 @@
>>>  #include <linux/rcupdate.h>
>>>  #include <linux/lockd/lockd.h>
>>>  
>>> +#include "internal.h"
>>>  #include "nfs4_fs.h"
>>>  #include "netns.h"
>>>  #include "sysfs.h"
>>> @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt
>>> *clnt)
>>>  	rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
>>>  }
>>>  
>>> +/*
>>> + * Shut down the nfs_client only once all the superblocks
>>> + * have been shut down.
>>> + */
>>> +static void shutdown_nfs_client(struct nfs_client *clp)
>>> +{
>>> +	struct nfs_server *server;
>>> +	rcu_read_lock();
>>> +	list_for_each_entry_rcu(server, &clp->cl_superblocks,
>>> client_link) {
>>> +		if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
>>> +			rcu_read_unlock();
>>> +			return;
>>> +		}
>>> +	}
>>> +	rcu_read_unlock();
>>> +	nfs_mark_client_ready(clp, -EIO);
>>> +	shutdown_client(clp->cl_rpcclient);
>>> +}
>>> +
>>
>> Isn't the upshot of this that a mount won't actually get shutdown
>> until
>> you shutdown all of the mounts to the same server? Personally, I find
>> that acceptable, but we should note that it is a change in behavior.
>
> It means that the other mounts to the same server won't inevitably and
> irrevocably lock up. I'm unhappy that I missed this bug when I applied
> the patch, but that's not a reason to not fix it.
>
> As I said in the above changelog, the nfs_client's RPC client is there
> to manage state for all mounts to the same server in the same network
> namespace. If you shut down that client while there are still mounts
> that depend on it, then not only have you shot yourself in the foot,
> but you're going to spend a lot of electrons to just loop madly when
> those other mounts need to recover state but can't because you've
> permanently shut down the only way for recovery threads to communicate
> with the server.

The only use for shutdown so far had been when the server is permanently
unavailable, so breaking mounts to the same server didn't matter.  I agree
this fix is the right thing to do now.

Reviewed-by: Benjamin Coddington <bcodding@redhat.com>

Ben
diff mbox series

Patch

diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index b30401b2c939..37cb2b776435 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -14,6 +14,7 @@ 
 #include <linux/rcupdate.h>
 #include <linux/lockd/lockd.h>
 
+#include "internal.h"
 #include "nfs4_fs.h"
 #include "netns.h"
 #include "sysfs.h"
@@ -228,6 +229,25 @@  static void shutdown_client(struct rpc_clnt *clnt)
 	rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
 }
 
+/*
+ * Shut down the nfs_client only once all the superblocks
+ * have been shut down.
+ */
+static void shutdown_nfs_client(struct nfs_client *clp)
+{
+	struct nfs_server *server;
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		if (!(server->flags & NFS_MOUNT_SHUTDOWN)) {
+			rcu_read_unlock();
+			return;
+		}
+	}
+	rcu_read_unlock();
+	nfs_mark_client_ready(clp, -EIO);
+	shutdown_client(clp->cl_rpcclient);
+}
+
 static ssize_t
 shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
 				char *buf)
@@ -259,7 +279,6 @@  shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 	server->flags |= NFS_MOUNT_SHUTDOWN;
 	shutdown_client(server->client);
-	shutdown_client(server->nfs_client->cl_rpcclient);
 
 	if (!IS_ERR(server->client_acl))
 		shutdown_client(server->client_acl);
@@ -267,6 +286,7 @@  shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (server->nlm_host)
 		shutdown_client(server->nlm_host->h_rpcclnt);
 out:
+	shutdown_nfs_client(server->nfs_client);
 	return count;
 }