Message ID | 1cefd7cbadf0eaec5bae66e0870cdb89c7120070.1742941932.git.trond.myklebust@hammerspace.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Ensure that ENETUNREACH terminates state recovery | expand |
On Tue, 2025-03-25 at 18:35 -0400, trondmy@kernel.org wrote: > From: Trond Myklebust <trond.myklebust@hammerspace.com> > > The nfs_client manages state for all the superblocks in the > "cl_superblocks" list, so it must not be shut down until all of them are > gone. > > Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when shutdown") > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > --- > fs/nfs/sysfs.c | 22 +++++++++++++++++++++- > 1 file changed, 21 insertions(+), 1 deletion(-) > > diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c > index b30401b2c939..37cb2b776435 100644 > --- a/fs/nfs/sysfs.c > +++ b/fs/nfs/sysfs.c > @@ -14,6 +14,7 @@ > #include <linux/rcupdate.h> > #include <linux/lockd/lockd.h> > > +#include "internal.h" > #include "nfs4_fs.h" > #include "netns.h" > #include "sysfs.h" > @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt) > rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); > } > > +/* > + * Shut down the nfs_client only once all the superblocks > + * have been shut down. > + */ > +static void shutdown_nfs_client(struct nfs_client *clp) > +{ > + struct nfs_server *server; > + rcu_read_lock(); > + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { > + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { > + rcu_read_unlock(); > + return; > + } > + } > + rcu_read_unlock(); > + nfs_mark_client_ready(clp, -EIO); > + shutdown_client(clp->cl_rpcclient); > +} > + Isn't the upshot of this that a mount won't actually get shutdown until you shutdown all of the mounts to the same server? Personally, I find that acceptable, but we should note that it is a change in behavior. > static ssize_t > shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, > char *buf) > @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, > > server->flags |= NFS_MOUNT_SHUTDOWN; > shutdown_client(server->client); > - shutdown_client(server->nfs_client->cl_rpcclient); > > if (!IS_ERR(server->client_acl)) > shutdown_client(server->client_acl); > @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, > if (server->nlm_host) > shutdown_client(server->nlm_host->h_rpcclnt); > out: > + shutdown_nfs_client(server->nfs_client); > return count; > } >
On Tue, 2025-03-25 at 20:15 -0400, Jeff Layton wrote: > On Tue, 2025-03-25 at 18:35 -0400, trondmy@kernel.org wrote: > > From: Trond Myklebust <trond.myklebust@hammerspace.com> > > > > The nfs_client manages state for all the superblocks in the > > "cl_superblocks" list, so it must not be shut down until all of > > them are > > gone. > > > > Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when > > shutdown") > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > > --- > > fs/nfs/sysfs.c | 22 +++++++++++++++++++++- > > 1 file changed, 21 insertions(+), 1 deletion(-) > > > > diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c > > index b30401b2c939..37cb2b776435 100644 > > --- a/fs/nfs/sysfs.c > > +++ b/fs/nfs/sysfs.c > > @@ -14,6 +14,7 @@ > > #include <linux/rcupdate.h> > > #include <linux/lockd/lockd.h> > > > > +#include "internal.h" > > #include "nfs4_fs.h" > > #include "netns.h" > > #include "sysfs.h" > > @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt > > *clnt) > > rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); > > } > > > > +/* > > + * Shut down the nfs_client only once all the superblocks > > + * have been shut down. > > + */ > > +static void shutdown_nfs_client(struct nfs_client *clp) > > +{ > > + struct nfs_server *server; > > + rcu_read_lock(); > > + list_for_each_entry_rcu(server, &clp->cl_superblocks, > > client_link) { > > + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { > > + rcu_read_unlock(); > > + return; > > + } > > + } > > + rcu_read_unlock(); > > + nfs_mark_client_ready(clp, -EIO); > > + shutdown_client(clp->cl_rpcclient); > > +} > > + > > Isn't the upshot of this that a mount won't actually get shutdown > until > you shutdown all of the mounts to the same server? Personally, I find > that acceptable, but we should note that it is a change in behavior. It means that the other mounts to the same server won't inevitably and irrevocably lock up. I'm unhappy that I missed this bug when I applied the patch, but that's not a reason to not fix it. As I said in the above changelog, the nfs_client's RPC client is there to manage state for all mounts to the same server in the same network namespace. If you shut down that client while there are still mounts that depend on it, then not only have you shot yourself in the foot, but you're going to spend a lot of electrons to just loop madly when those other mounts need to recover state but can't because you've permanently shut down the only way for recovery threads to communicate with the server. > > > > static ssize_t > > shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, > > char *buf) > > @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct > > kobj_attribute *attr, > > > > server->flags |= NFS_MOUNT_SHUTDOWN; > > shutdown_client(server->client); > > - shutdown_client(server->nfs_client->cl_rpcclient); > > > > if (!IS_ERR(server->client_acl)) > > shutdown_client(server->client_acl); > > @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct > > kobj_attribute *attr, > > if (server->nlm_host) > > shutdown_client(server->nlm_host->h_rpcclnt); > > out: > > + shutdown_nfs_client(server->nfs_client); > > return count; > > } > > >
On 25 Mar 2025, at 20:37, Trond Myklebust wrote: > On Tue, 2025-03-25 at 20:15 -0400, Jeff Layton wrote: >> On Tue, 2025-03-25 at 18:35 -0400, trondmy@kernel.org wrote: >>> From: Trond Myklebust <trond.myklebust@hammerspace.com> >>> >>> The nfs_client manages state for all the superblocks in the >>> "cl_superblocks" list, so it must not be shut down until all of >>> them are >>> gone. >>> >>> Fixes: 7d3e26a054c8 ("NFS: Cancel all existing RPC tasks when >>> shutdown") >>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> >>> --- >>> fs/nfs/sysfs.c | 22 +++++++++++++++++++++- >>> 1 file changed, 21 insertions(+), 1 deletion(-) >>> >>> diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c >>> index b30401b2c939..37cb2b776435 100644 >>> --- a/fs/nfs/sysfs.c >>> +++ b/fs/nfs/sysfs.c >>> @@ -14,6 +14,7 @@ >>> #include <linux/rcupdate.h> >>> #include <linux/lockd/lockd.h> >>> >>> +#include "internal.h" >>> #include "nfs4_fs.h" >>> #include "netns.h" >>> #include "sysfs.h" >>> @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt >>> *clnt) >>> rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); >>> } >>> >>> +/* >>> + * Shut down the nfs_client only once all the superblocks >>> + * have been shut down. >>> + */ >>> +static void shutdown_nfs_client(struct nfs_client *clp) >>> +{ >>> + struct nfs_server *server; >>> + rcu_read_lock(); >>> + list_for_each_entry_rcu(server, &clp->cl_superblocks, >>> client_link) { >>> + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { >>> + rcu_read_unlock(); >>> + return; >>> + } >>> + } >>> + rcu_read_unlock(); >>> + nfs_mark_client_ready(clp, -EIO); >>> + shutdown_client(clp->cl_rpcclient); >>> +} >>> + >> >> Isn't the upshot of this that a mount won't actually get shutdown >> until >> you shutdown all of the mounts to the same server? Personally, I find >> that acceptable, but we should note that it is a change in behavior. > > It means that the other mounts to the same server won't inevitably and > irrevocably lock up. I'm unhappy that I missed this bug when I applied > the patch, but that's not a reason to not fix it. > > As I said in the above changelog, the nfs_client's RPC client is there > to manage state for all mounts to the same server in the same network > namespace. If you shut down that client while there are still mounts > that depend on it, then not only have you shot yourself in the foot, > but you're going to spend a lot of electrons to just loop madly when > those other mounts need to recover state but can't because you've > permanently shut down the only way for recovery threads to communicate > with the server. The only use for shutdown so far had been when the server is permanently unavailable, so breaking mounts to the same server didn't matter. I agree this fix is the right thing to do now. Reviewed-by: Benjamin Coddington <bcodding@redhat.com> Ben
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index b30401b2c939..37cb2b776435 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -14,6 +14,7 @@ #include <linux/rcupdate.h> #include <linux/lockd/lockd.h> +#include "internal.h" #include "nfs4_fs.h" #include "netns.h" #include "sysfs.h" @@ -228,6 +229,25 @@ static void shutdown_client(struct rpc_clnt *clnt) rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); } +/* + * Shut down the nfs_client only once all the superblocks + * have been shut down. + */ +static void shutdown_nfs_client(struct nfs_client *clp) +{ + struct nfs_server *server; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + nfs_mark_client_ready(clp, -EIO); + shutdown_client(clp->cl_rpcclient); +} + static ssize_t shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -259,7 +279,6 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, server->flags |= NFS_MOUNT_SHUTDOWN; shutdown_client(server->client); - shutdown_client(server->nfs_client->cl_rpcclient); if (!IS_ERR(server->client_acl)) shutdown_client(server->client_acl); @@ -267,6 +286,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, if (server->nlm_host) shutdown_client(server->nlm_host->h_rpcclnt); out: + shutdown_nfs_client(server->nfs_client); return count; }