diff mbox series

nfsd: Ensure knfsd shuts down when the "nfsd" pseudofs is unmounted

Message ID 20210313210847.569041-1-trondmy@kernel.org (mailing list archive)
State New, archived
Headers show
Series nfsd: Ensure knfsd shuts down when the "nfsd" pseudofs is unmounted | expand

Commit Message

Trond Myklebust March 13, 2021, 9:08 p.m. UTC
From: Trond Myklebust <trond.myklebust@hammerspace.com>

In order to ensure that knfsd threads don't linger once the nfsd
pseudofs is unmounted (e.g. when the container is killed) we let
nfsd_umount() shut down those threads and wait for them to exit.

This also should ensure that we don't need to do a kernel mount of
the pseudofs, since the thread lifetime is now limited by the
lifetime of the filesystem.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfsd/netns.h     |  6 +++---
 fs/nfsd/nfs4state.c |  8 +-------
 fs/nfsd/nfsctl.c    | 14 ++------------
 fs/nfsd/nfsd.h      |  3 +--
 fs/nfsd/nfssvc.c    | 35 ++++++++++++++++++++++++++++++++++-
 5 files changed, 41 insertions(+), 25 deletions(-)

Comments

Chuck Lever March 15, 2021, 2:30 p.m. UTC | #1
> On Mar 13, 2021, at 4:08 PM, trondmy@kernel.org wrote:
> 
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> In order to ensure that knfsd threads don't linger once the nfsd
> pseudofs is unmounted (e.g. when the container is killed) we let
> nfsd_umount() shut down those threads and wait for them to exit.
> 
> This also should ensure that we don't need to do a kernel mount of
> the pseudofs, since the thread lifetime is now limited by the
> lifetime of the filesystem.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

Hey Trond, I've included your patch in the for-next topic branch at

git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git


> ---
> fs/nfsd/netns.h     |  6 +++---
> fs/nfsd/nfs4state.c |  8 +-------
> fs/nfsd/nfsctl.c    | 14 ++------------
> fs/nfsd/nfsd.h      |  3 +--
> fs/nfsd/nfssvc.c    | 35 ++++++++++++++++++++++++++++++++++-
> 5 files changed, 41 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index c330f5bd0cf3..a75abeb1e698 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -51,9 +51,6 @@ struct nfsd_net {
> 	bool grace_ended;
> 	time64_t boot_time;
> 
> -	/* internal mount of the "nfsd" pseudofilesystem: */
> -	struct vfsmount *nfsd_mnt;
> -
> 	struct dentry *nfsd_client_dir;
> 
> 	/*
> @@ -130,6 +127,9 @@ struct nfsd_net {
> 	wait_queue_head_t ntf_wq;
> 	atomic_t ntf_refcnt;
> 
> +	/* Allow umount to wait for nfsd state cleanup */
> +	struct completion nfsd_shutdown_complete;
> +
> 	/*
> 	 * clientid and stateid data for construction of net unique COPY
> 	 * stateids.
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 423fd6683f3a..8bf840661d67 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -7346,14 +7346,9 @@ nfs4_state_start_net(struct net *net)
> 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> 	int ret;
> 
> -	ret = get_nfsdfs(net);
> -	if (ret)
> -		return ret;
> 	ret = nfs4_state_create_net(net);
> -	if (ret) {
> -		mntput(nn->nfsd_mnt);
> +	if (ret)
> 		return ret;
> -	}
> 	locks_start_grace(net, &nn->nfsd4_manager);
> 	nfsd4_client_tracking_init(net);
> 	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
> @@ -7423,7 +7418,6 @@ nfs4_state_shutdown_net(struct net *net)
> 
> 	nfsd4_client_tracking_exit(net);
> 	nfs4_state_destroy_net(net);
> -	mntput(nn->nfsd_mnt);
> }
> 
> void
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index ef86ed23af82..02ff7f762e2d 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1416,6 +1416,8 @@ static void nfsd_umount(struct super_block *sb)
> {
> 	struct net *net = sb->s_fs_info;
> 
> +	nfsd_shutdown_threads(net);
> +
> 	kill_litter_super(sb);
> 	put_net(net);
> }
> @@ -1428,18 +1430,6 @@ static struct file_system_type nfsd_fs_type = {
> };
> MODULE_ALIAS_FS("nfsd");
> 
> -int get_nfsdfs(struct net *net)
> -{
> -	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> -	struct vfsmount *mnt;
> -
> -	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
> -	if (IS_ERR(mnt))
> -		return PTR_ERR(mnt);
> -	nn->nfsd_mnt = mnt;
> -	return 0;
> -}
> -
> #ifdef CONFIG_PROC_FS
> static int create_proc_exports_entry(void)
> {
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 8bdc37aa2c2e..27c1308ffc2b 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -93,13 +93,12 @@ int		nfsd_get_nrthreads(int n, int *, struct net *);
> int		nfsd_set_nrthreads(int n, int *, struct net *);
> int		nfsd_pool_stats_open(struct inode *, struct file *);
> int		nfsd_pool_stats_release(struct inode *, struct file *);
> +void		nfsd_shutdown_threads(struct net *net);
> 
> void		nfsd_destroy(struct net *net);
> 
> bool		i_am_nfsd(void);
> 
> -int get_nfsdfs(struct net *);
> -
> struct nfsdfs_client {
> 	struct kref cl_ref;
> 	void (*cl_release)(struct kref *kref);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6de406322106..f014b7aa0726 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -596,6 +596,37 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
> 	.svo_module		= THIS_MODULE,
> };
> 
> +static void nfsd_complete_shutdown(struct net *net)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +
> +	WARN_ON(!mutex_is_locked(&nfsd_mutex));
> +
> +	nn->nfsd_serv = NULL;
> +	complete(&nn->nfsd_shutdown_complete);
> +}
> +
> +void nfsd_shutdown_threads(struct net *net)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	struct svc_serv *serv;
> +
> +	mutex_lock(&nfsd_mutex);
> +	serv = nn->nfsd_serv;
> +	if (serv == NULL) {
> +		mutex_unlock(&nfsd_mutex);
> +		return;
> +	}
> +
> +	svc_get(serv);
> +	/* Kill outstanding nfsd threads */
> +	serv->sv_ops->svo_setup(serv, NULL, 0);
> +	nfsd_destroy(net);
> +	mutex_unlock(&nfsd_mutex);
> +	/* Wait for shutdown of nfsd_serv to complete */
> +	wait_for_completion(&nn->nfsd_shutdown_complete);
> +}
> +
> bool i_am_nfsd(void)
> {
> 	return kthread_func(current) == nfsd;
> @@ -618,11 +649,13 @@ int nfsd_create_serv(struct net *net)
> 						&nfsd_thread_sv_ops);
> 	if (nn->nfsd_serv == NULL)
> 		return -ENOMEM;
> +	init_completion(&nn->nfsd_shutdown_complete);
> 
> 	nn->nfsd_serv->sv_maxconn = nn->max_connections;
> 	error = svc_bind(nn->nfsd_serv, net);
> 	if (error < 0) {
> 		svc_destroy(nn->nfsd_serv);
> +		nfsd_complete_shutdown(net);
> 		return error;
> 	}
> 
> @@ -671,7 +704,7 @@ void nfsd_destroy(struct net *net)
> 		svc_shutdown_net(nn->nfsd_serv, net);
> 	svc_destroy(nn->nfsd_serv);
> 	if (destroy)
> -		nn->nfsd_serv = NULL;
> +		nfsd_complete_shutdown(net);
> }
> 
> int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
> -- 
> 2.30.2
> 

--
Chuck Lever
Bruce Fields March 15, 2021, 2:46 p.m. UTC | #2
On Sat, Mar 13, 2021 at 04:08:47PM -0500, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> In order to ensure that knfsd threads don't linger once the nfsd
> pseudofs is unmounted (e.g. when the container is killed) we let
> nfsd_umount() shut down those threads and wait for them to exit.
> 
> This also should ensure that we don't need to do a kernel mount of
> the pseudofs, since the thread lifetime is now limited by the
> lifetime of the filesystem.

The nfsd filesystem is per-container, and threads are global, so I don't
understand how this works.

--b.

> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfsd/netns.h     |  6 +++---
>  fs/nfsd/nfs4state.c |  8 +-------
>  fs/nfsd/nfsctl.c    | 14 ++------------
>  fs/nfsd/nfsd.h      |  3 +--
>  fs/nfsd/nfssvc.c    | 35 ++++++++++++++++++++++++++++++++++-
>  5 files changed, 41 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index c330f5bd0cf3..a75abeb1e698 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -51,9 +51,6 @@ struct nfsd_net {
>  	bool grace_ended;
>  	time64_t boot_time;
>  
> -	/* internal mount of the "nfsd" pseudofilesystem: */
> -	struct vfsmount *nfsd_mnt;
> -
>  	struct dentry *nfsd_client_dir;
>  
>  	/*
> @@ -130,6 +127,9 @@ struct nfsd_net {
>  	wait_queue_head_t ntf_wq;
>  	atomic_t ntf_refcnt;
>  
> +	/* Allow umount to wait for nfsd state cleanup */
> +	struct completion nfsd_shutdown_complete;
> +
>  	/*
>  	 * clientid and stateid data for construction of net unique COPY
>  	 * stateids.
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 423fd6683f3a..8bf840661d67 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -7346,14 +7346,9 @@ nfs4_state_start_net(struct net *net)
>  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  	int ret;
>  
> -	ret = get_nfsdfs(net);
> -	if (ret)
> -		return ret;
>  	ret = nfs4_state_create_net(net);
> -	if (ret) {
> -		mntput(nn->nfsd_mnt);
> +	if (ret)
>  		return ret;
> -	}
>  	locks_start_grace(net, &nn->nfsd4_manager);
>  	nfsd4_client_tracking_init(net);
>  	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
> @@ -7423,7 +7418,6 @@ nfs4_state_shutdown_net(struct net *net)
>  
>  	nfsd4_client_tracking_exit(net);
>  	nfs4_state_destroy_net(net);
> -	mntput(nn->nfsd_mnt);
>  }
>  
>  void
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index ef86ed23af82..02ff7f762e2d 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1416,6 +1416,8 @@ static void nfsd_umount(struct super_block *sb)
>  {
>  	struct net *net = sb->s_fs_info;
>  
> +	nfsd_shutdown_threads(net);
> +
>  	kill_litter_super(sb);
>  	put_net(net);
>  }
> @@ -1428,18 +1430,6 @@ static struct file_system_type nfsd_fs_type = {
>  };
>  MODULE_ALIAS_FS("nfsd");
>  
> -int get_nfsdfs(struct net *net)
> -{
> -	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> -	struct vfsmount *mnt;
> -
> -	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
> -	if (IS_ERR(mnt))
> -		return PTR_ERR(mnt);
> -	nn->nfsd_mnt = mnt;
> -	return 0;
> -}
> -
>  #ifdef CONFIG_PROC_FS
>  static int create_proc_exports_entry(void)
>  {
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 8bdc37aa2c2e..27c1308ffc2b 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -93,13 +93,12 @@ int		nfsd_get_nrthreads(int n, int *, struct net *);
>  int		nfsd_set_nrthreads(int n, int *, struct net *);
>  int		nfsd_pool_stats_open(struct inode *, struct file *);
>  int		nfsd_pool_stats_release(struct inode *, struct file *);
> +void		nfsd_shutdown_threads(struct net *net);
>  
>  void		nfsd_destroy(struct net *net);
>  
>  bool		i_am_nfsd(void);
>  
> -int get_nfsdfs(struct net *);
> -
>  struct nfsdfs_client {
>  	struct kref cl_ref;
>  	void (*cl_release)(struct kref *kref);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6de406322106..f014b7aa0726 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -596,6 +596,37 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
>  	.svo_module		= THIS_MODULE,
>  };
>  
> +static void nfsd_complete_shutdown(struct net *net)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +
> +	WARN_ON(!mutex_is_locked(&nfsd_mutex));
> +
> +	nn->nfsd_serv = NULL;
> +	complete(&nn->nfsd_shutdown_complete);
> +}
> +
> +void nfsd_shutdown_threads(struct net *net)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	struct svc_serv *serv;
> +
> +	mutex_lock(&nfsd_mutex);
> +	serv = nn->nfsd_serv;
> +	if (serv == NULL) {
> +		mutex_unlock(&nfsd_mutex);
> +		return;
> +	}
> +
> +	svc_get(serv);
> +	/* Kill outstanding nfsd threads */
> +	serv->sv_ops->svo_setup(serv, NULL, 0);
> +	nfsd_destroy(net);
> +	mutex_unlock(&nfsd_mutex);
> +	/* Wait for shutdown of nfsd_serv to complete */
> +	wait_for_completion(&nn->nfsd_shutdown_complete);
> +}
> +
>  bool i_am_nfsd(void)
>  {
>  	return kthread_func(current) == nfsd;
> @@ -618,11 +649,13 @@ int nfsd_create_serv(struct net *net)
>  						&nfsd_thread_sv_ops);
>  	if (nn->nfsd_serv == NULL)
>  		return -ENOMEM;
> +	init_completion(&nn->nfsd_shutdown_complete);
>  
>  	nn->nfsd_serv->sv_maxconn = nn->max_connections;
>  	error = svc_bind(nn->nfsd_serv, net);
>  	if (error < 0) {
>  		svc_destroy(nn->nfsd_serv);
> +		nfsd_complete_shutdown(net);
>  		return error;
>  	}
>  
> @@ -671,7 +704,7 @@ void nfsd_destroy(struct net *net)
>  		svc_shutdown_net(nn->nfsd_serv, net);
>  	svc_destroy(nn->nfsd_serv);
>  	if (destroy)
> -		nn->nfsd_serv = NULL;
> +		nfsd_complete_shutdown(net);
>  }
>  
>  int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
> -- 
> 2.30.2
>
Trond Myklebust March 15, 2021, 4:04 p.m. UTC | #3
On Mon, 2021-03-15 at 10:46 -0400, J. Bruce Fields wrote:
> On Sat, Mar 13, 2021 at 04:08:47PM -0500, trondmy@kernel.org wrote:
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > In order to ensure that knfsd threads don't linger once the nfsd
> > pseudofs is unmounted (e.g. when the container is killed) we let
> > nfsd_umount() shut down those threads and wait for them to exit.
> > 
> > This also should ensure that we don't need to do a kernel mount of
> > the pseudofs, since the thread lifetime is now limited by the
> > lifetime of the filesystem.
> 
> The nfsd filesystem is per-container, and threads are global, so I
> don't
> understand how this works.
> 

The knfsd threads are not global.

They are assigned at creation time to a struct svc_serv, which is a
per-container object. As you say above, all the control structures in
the nfsd filesystem are per-container.

Without this failsafe that shuts down those threads when the container
is killed, then you end up with orphaned threads. This is a real
problem when docker crashes and gets restarted (or if you do a 'kill -
9' on the knfsd container's init process).
Bruce Fields March 15, 2021, 7:18 p.m. UTC | #4
On Mon, Mar 15, 2021 at 04:04:05PM +0000, Trond Myklebust wrote:
> On Mon, 2021-03-15 at 10:46 -0400, J. Bruce Fields wrote:
> > On Sat, Mar 13, 2021 at 04:08:47PM -0500, trondmy@kernel.org wrote:
> > > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > 
> > > In order to ensure that knfsd threads don't linger once the nfsd
> > > pseudofs is unmounted (e.g. when the container is killed) we let
> > > nfsd_umount() shut down those threads and wait for them to exit.
> > > 
> > > This also should ensure that we don't need to do a kernel mount of
> > > the pseudofs, since the thread lifetime is now limited by the
> > > lifetime of the filesystem.
> > 
> > The nfsd filesystem is per-container, and threads are global, so I
> > don't
> > understand how this works.
> > 
> 
> The knfsd threads are not global.
> 
> They are assigned at creation time to a struct svc_serv, which is a
> per-container object. As you say above, all the control structures in
> the nfsd filesystem are per-container.

(Looks.)  And it's been that way from the start.  I don't know why I
thought otherwise.  Thanks!

> Without this failsafe that shuts down those threads when the container
> is killed, then you end up with orphaned threads. This is a real
> problem when docker crashes and gets restarted (or if you do a 'kill -
> 9' on the knfsd container's init process).

Makes sense, thanks again!

--b.
Nikos Tsironis Dec. 20, 2022, 1:43 p.m. UTC | #5
On 3/13/21 23:08, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> In order to ensure that knfsd threads don't linger once the nfsd
> pseudofs is unmounted (e.g. when the container is killed) we let
> nfsd_umount() shut down those threads and wait for them to exit.
> 
> This also should ensure that we don't need to do a kernel mount of
> the pseudofs, since the thread lifetime is now limited by the
> lifetime of the filesystem.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---

Hello,

This patch was merged in kernel v5.13, but the issue exists in older
kernels too.

Is there a reason that the patch was never backported to older stable
kernels?

Thanks,
Nikos.
Chuck Lever Dec. 22, 2022, 2:48 p.m. UTC | #6
[ Cc: Bruce removed because that address no longer works ]

> On Dec 20, 2022, at 8:43 AM, Nikos Tsironis <ntsironis@arrikto.com> wrote:
> 
> On 3/13/21 23:08, trondmy@kernel.org wrote:
>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>> In order to ensure that knfsd threads don't linger once the nfsd
>> pseudofs is unmounted (e.g. when the container is killed) we let
>> nfsd_umount() shut down those threads and wait for them to exit.
>> This also should ensure that we don't need to do a kernel mount of
>> the pseudofs, since the thread lifetime is now limited by the
>> lifetime of the filesystem.
>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>> ---
> 
> Hello,
> 
> This patch was merged in kernel v5.13, but the issue exists in older
> kernels too.
> 
> Is there a reason that the patch was never backported to older stable
> kernels?

Hello Nikos-

A probable reason is there is no Fixes: or Cc: stable@vger tag in the
merged commit, so it will not be cherry-picked by AUTOSEL. Another
reason might be that the patch does not apply cleanly to LTS kernels.

You can make a request to stable@ for this patch to be backported, but
I would prefer if you apply the patch and test it on each target kernel
before making such a request. Or, you can pick which LTS kernel(s) are
most relevant to you and ask for backport to only those.

A good test will have three parts:

- Make a positive confirmation the issue exists in that kernel

- Make sure the patch applies cleanly verbatim and causes no regression

- Make sure the patch fixes the issue in that kernel

It's a bit of (albeit mechanical) effort, and the Linux NFS community
doesn't have the resources to manage it for all patches going into
mainline to six different LTS kernels.


--
Chuck Lever
Nikos Tsironis Jan. 18, 2023, 2:05 p.m. UTC | #7
On 12/22/22 16:48, Chuck Lever III wrote:
> [ Cc: Bruce removed because that address no longer works ]
> 
>> On Dec 20, 2022, at 8:43 AM, Nikos Tsironis <ntsironis@arrikto.com> wrote:
>>
>> On 3/13/21 23:08, trondmy@kernel.org wrote:
>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> In order to ensure that knfsd threads don't linger once the nfsd
>>> pseudofs is unmounted (e.g. when the container is killed) we let
>>> nfsd_umount() shut down those threads and wait for them to exit.
>>> This also should ensure that we don't need to do a kernel mount of
>>> the pseudofs, since the thread lifetime is now limited by the
>>> lifetime of the filesystem.
>>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> ---
>>
>> Hello,
>>
>> This patch was merged in kernel v5.13, but the issue exists in older
>> kernels too.
>>
>> Is there a reason that the patch was never backported to older stable
>> kernels?
> 
> Hello Nikos-
> 
> A probable reason is there is no Fixes: or Cc: stable@vger tag in the
> merged commit, so it will not be cherry-picked by AUTOSEL. Another
> reason might be that the patch does not apply cleanly to LTS kernels.
> 
> You can make a request to stable@ for this patch to be backported, but
> I would prefer if you apply the patch and test it on each target kernel
> before making such a request. Or, you can pick which LTS kernel(s) are
> most relevant to you and ask for backport to only those.
> 
> A good test will have three parts:
> 
> - Make a positive confirmation the issue exists in that kernel
> 
> - Make sure the patch applies cleanly verbatim and causes no regression
> 
> - Make sure the patch fixes the issue in that kernel
> 
> It's a bit of (albeit mechanical) effort, and the Linux NFS community
> doesn't have the resources to manage it for all patches going into
> mainline to six different LTS kernels.
> 

Hello Chuck,

Thanks for the feedback, and sorry for the late reply.

I will backport the patch myself to LTS kernels 5.4 and 5.10, test it,
and send it to stable@ to request inclusion in these two kernels.

Nikos
diff mbox series

Patch

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index c330f5bd0cf3..a75abeb1e698 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -51,9 +51,6 @@  struct nfsd_net {
 	bool grace_ended;
 	time64_t boot_time;
 
-	/* internal mount of the "nfsd" pseudofilesystem: */
-	struct vfsmount *nfsd_mnt;
-
 	struct dentry *nfsd_client_dir;
 
 	/*
@@ -130,6 +127,9 @@  struct nfsd_net {
 	wait_queue_head_t ntf_wq;
 	atomic_t ntf_refcnt;
 
+	/* Allow umount to wait for nfsd state cleanup */
+	struct completion nfsd_shutdown_complete;
+
 	/*
 	 * clientid and stateid data for construction of net unique COPY
 	 * stateids.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 423fd6683f3a..8bf840661d67 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7346,14 +7346,9 @@  nfs4_state_start_net(struct net *net)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
 
-	ret = get_nfsdfs(net);
-	if (ret)
-		return ret;
 	ret = nfs4_state_create_net(net);
-	if (ret) {
-		mntput(nn->nfsd_mnt);
+	if (ret)
 		return ret;
-	}
 	locks_start_grace(net, &nn->nfsd4_manager);
 	nfsd4_client_tracking_init(net);
 	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
@@ -7423,7 +7418,6 @@  nfs4_state_shutdown_net(struct net *net)
 
 	nfsd4_client_tracking_exit(net);
 	nfs4_state_destroy_net(net);
-	mntput(nn->nfsd_mnt);
 }
 
 void
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ef86ed23af82..02ff7f762e2d 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1416,6 +1416,8 @@  static void nfsd_umount(struct super_block *sb)
 {
 	struct net *net = sb->s_fs_info;
 
+	nfsd_shutdown_threads(net);
+
 	kill_litter_super(sb);
 	put_net(net);
 }
@@ -1428,18 +1430,6 @@  static struct file_system_type nfsd_fs_type = {
 };
 MODULE_ALIAS_FS("nfsd");
 
-int get_nfsdfs(struct net *net)
-{
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct vfsmount *mnt;
-
-	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
-	if (IS_ERR(mnt))
-		return PTR_ERR(mnt);
-	nn->nfsd_mnt = mnt;
-	return 0;
-}
-
 #ifdef CONFIG_PROC_FS
 static int create_proc_exports_entry(void)
 {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 8bdc37aa2c2e..27c1308ffc2b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -93,13 +93,12 @@  int		nfsd_get_nrthreads(int n, int *, struct net *);
 int		nfsd_set_nrthreads(int n, int *, struct net *);
 int		nfsd_pool_stats_open(struct inode *, struct file *);
 int		nfsd_pool_stats_release(struct inode *, struct file *);
+void		nfsd_shutdown_threads(struct net *net);
 
 void		nfsd_destroy(struct net *net);
 
 bool		i_am_nfsd(void);
 
-int get_nfsdfs(struct net *);
-
 struct nfsdfs_client {
 	struct kref cl_ref;
 	void (*cl_release)(struct kref *kref);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6de406322106..f014b7aa0726 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -596,6 +596,37 @@  static const struct svc_serv_ops nfsd_thread_sv_ops = {
 	.svo_module		= THIS_MODULE,
 };
 
+static void nfsd_complete_shutdown(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
+	nn->nfsd_serv = NULL;
+	complete(&nn->nfsd_shutdown_complete);
+}
+
+void nfsd_shutdown_threads(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct svc_serv *serv;
+
+	mutex_lock(&nfsd_mutex);
+	serv = nn->nfsd_serv;
+	if (serv == NULL) {
+		mutex_unlock(&nfsd_mutex);
+		return;
+	}
+
+	svc_get(serv);
+	/* Kill outstanding nfsd threads */
+	serv->sv_ops->svo_setup(serv, NULL, 0);
+	nfsd_destroy(net);
+	mutex_unlock(&nfsd_mutex);
+	/* Wait for shutdown of nfsd_serv to complete */
+	wait_for_completion(&nn->nfsd_shutdown_complete);
+}
+
 bool i_am_nfsd(void)
 {
 	return kthread_func(current) == nfsd;
@@ -618,11 +649,13 @@  int nfsd_create_serv(struct net *net)
 						&nfsd_thread_sv_ops);
 	if (nn->nfsd_serv == NULL)
 		return -ENOMEM;
+	init_completion(&nn->nfsd_shutdown_complete);
 
 	nn->nfsd_serv->sv_maxconn = nn->max_connections;
 	error = svc_bind(nn->nfsd_serv, net);
 	if (error < 0) {
 		svc_destroy(nn->nfsd_serv);
+		nfsd_complete_shutdown(net);
 		return error;
 	}
 
@@ -671,7 +704,7 @@  void nfsd_destroy(struct net *net)
 		svc_shutdown_net(nn->nfsd_serv, net);
 	svc_destroy(nn->nfsd_serv);
 	if (destroy)
-		nn->nfsd_serv = NULL;
+		nfsd_complete_shutdown(net);
 }
 
 int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)