diff mbox series

[v8,09/18] nfsd: use percpu_ref to interlock nfsd_destroy_serv and nfsd_open_local_fh

Message ID 20240626182438.69539-10-snitzer@kernel.org (mailing list archive)
State New
Headers show
Series nfs/nfsd: add support for localio | expand

Commit Message

Mike Snitzer June 26, 2024, 6:24 p.m. UTC
Introduce nfsd_serv_get and nfsd_serv_put and update the nfsd code to
prevent nfsd_destroy_serv from destroying nn->nfsd_serv until any
client initiated localio calls to nfsd (that are _not_ in the context
of nfsd) are complete.

nfsd_open_local_fh is updated to nfsd_serv_get before opening its file
handle and then drop the reference using nfsd_serv_put at the end of
nfsd_open_local_fh.

This "interlock" working relies heavily on nfsd_open_local_fh()'s
maybe_get_net() safely dealing with the possibility that the struct
net (and nfsd_net by association) may have been destroyed by
nfsd_destroy_serv() via nfsd_shutdown_net().

Verified to fix an easy to hit crash that would occur if an nfsd
instance running in a container, with a localio client mounted, is
shutdown. Upon restart of the container and associated nfsd the client
would go on to crash due to NULL pointer dereference that occuured due
to the nfs client's localio attempting to nfsd_open_local_fh(), using
nn->nfsd_serv, without having a proper reference on nn->nfsd_serv.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 fs/nfsd/localio.c |  2 ++
 fs/nfsd/netns.h   |  8 +++++++-
 fs/nfsd/nfssvc.c  | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+), 1 deletion(-)

Comments

Jeff Layton June 27, 2024, 11:24 a.m. UTC | #1
On Wed, 2024-06-26 at 14:24 -0400, Mike Snitzer wrote:
> Introduce nfsd_serv_get and nfsd_serv_put and update the nfsd code to
> prevent nfsd_destroy_serv from destroying nn->nfsd_serv until any
> client initiated localio calls to nfsd (that are _not_ in the context
> of nfsd) are complete.
> 
> nfsd_open_local_fh is updated to nfsd_serv_get before opening its file
> handle and then drop the reference using nfsd_serv_put at the end of
> nfsd_open_local_fh.
> 
> This "interlock" working relies heavily on nfsd_open_local_fh()'s
> maybe_get_net() safely dealing with the possibility that the struct
> net (and nfsd_net by association) may have been destroyed by
> nfsd_destroy_serv() via nfsd_shutdown_net().
> 
> Verified to fix an easy to hit crash that would occur if an nfsd
> instance running in a container, with a localio client mounted, is
> shutdown. Upon restart of the container and associated nfsd the client
> would go on to crash due to NULL pointer dereference that occuured due
> to the nfs client's localio attempting to nfsd_open_local_fh(), using
> nn->nfsd_serv, without having a proper reference on nn->nfsd_serv.
> 
> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> ---
>  fs/nfsd/localio.c |  2 ++
>  fs/nfsd/netns.h   |  8 +++++++-
>  fs/nfsd/nfssvc.c  | 39 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
> index 48118db801b5..819589ae2008 100644
> --- a/fs/nfsd/localio.c
> +++ b/fs/nfsd/localio.c
> @@ -204,6 +204,7 @@ int nfsd_open_local_fh(struct net *net,
>  	}
>  	nn = net_generic(net, nfsd_net_id);
>  
> +	nfsd_serv_get(nn);
>  	serv = READ_ONCE(nn->nfsd_serv);
>  	if (unlikely(!serv)) {
>  		dprintk("%s: localio denied. Server not running\n", __func__);
> @@ -244,6 +245,7 @@ int nfsd_open_local_fh(struct net *net,
>  out_revertcred:
>  	revert_creds(save_cred);
>  out_net:
> +	nfsd_serv_put(nn);
>  	put_net(net);
>  	return status;
>  }
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 0c5a1d97e4ac..17a73c780ca1 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -13,6 +13,7 @@
>  #include <linux/filelock.h>
>  #include <linux/nfs4.h>
>  #include <linux/percpu_counter.h>
> +#include <linux/percpu-refcount.h>
>  #include <linux/siphash.h>
>  #include <linux/sunrpc/stats.h>
>  #include <linux/nfslocalio.h>
> @@ -140,7 +141,9 @@ struct nfsd_net {
>  
>  	struct svc_info nfsd_info;
>  #define nfsd_serv nfsd_info.serv
> -
> +	struct percpu_ref nfsd_serv_ref;
> +	struct completion nfsd_serv_confirm_done;
> +	struct completion nfsd_serv_free_done;
>  
>  	/*
>  	 * clientid and stateid data for construction of net unique COPY
> @@ -225,6 +228,9 @@ struct nfsd_net {
>  extern bool nfsd_support_version(int vers);
>  extern void nfsd_netns_free_versions(struct nfsd_net *nn);
>  
> +void nfsd_serv_get(struct nfsd_net *nn);
> +void nfsd_serv_put(struct nfsd_net *nn);
> +
>  extern unsigned int nfsd_net_id;
>  
>  void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index a477d2c5088a..be5acb7a4057 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -258,6 +258,30 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
>  	return 0;
>  }
>  
> +void nfsd_serv_get(struct nfsd_net *nn)
> +{
> +	percpu_ref_get(&nn->nfsd_serv_ref);
> +}
> +
> +void nfsd_serv_put(struct nfsd_net *nn)
> +{
> +	percpu_ref_put(&nn->nfsd_serv_ref);
> +}
> +
> +static void nfsd_serv_done(struct percpu_ref *ref)
> +{
> +	struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
> +
> +	complete(&nn->nfsd_serv_confirm_done);
> +}
> +
> +static void nfsd_serv_free(struct percpu_ref *ref)
> +{
> +	struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
> +
> +	complete(&nn->nfsd_serv_free_done);
> +}
> +
>  /*
>   * Maximum number of nfsd processes
>   */
> @@ -462,6 +486,7 @@ static void nfsd_shutdown_net(struct net *net)
>  		lockd_down(net);
>  		nn->lockd_up = false;
>  	}
> +	percpu_ref_exit(&nn->nfsd_serv_ref);
>  #if IS_ENABLED(CONFIG_NFSD_LOCALIO)
>  	list_del_rcu(&nn->nfsd_uuid.list);
>  #endif
> @@ -544,6 +569,13 @@ void nfsd_destroy_serv(struct net *net)
>  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  	struct svc_serv *serv = nn->nfsd_serv;
>  
> +	lockdep_assert_held(&nfsd_mutex);
> +
> +	percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done);
> +	wait_for_completion(&nn->nfsd_serv_confirm_done);
> +	wait_for_completion(&nn->nfsd_serv_free_done);
> +	/* percpu_ref_exit is called in nfsd_shutdown_net */
> +

At this point where you're waiting on these completion vars, what stops
the client from issuing new localio requests?


>  	spin_lock(&nfsd_notifier_lock);
>  	nn->nfsd_serv = NULL;
>  	spin_unlock(&nfsd_notifier_lock);
> @@ -666,6 +698,13 @@ int nfsd_create_serv(struct net *net)
>  	if (nn->nfsd_serv)
>  		return 0;
>  
> +	error = percpu_ref_init(&nn->nfsd_serv_ref, nfsd_serv_free,
> +				0, GFP_KERNEL);
> +	if (error)
> +		return error;
> +	init_completion(&nn->nfsd_serv_free_done);
> +	init_completion(&nn->nfsd_serv_confirm_done);
> +
>  	if (nfsd_max_blksize == 0)
>  		nfsd_max_blksize = nfsd_get_default_max_blksize();
>  	nfsd_reset_versions(nn);
Mike Snitzer June 27, 2024, 5:14 p.m. UTC | #2
On Thu, Jun 27, 2024 at 07:24:54AM -0400, Jeff Layton wrote:
> On Wed, 2024-06-26 at 14:24 -0400, Mike Snitzer wrote:
> > Introduce nfsd_serv_get and nfsd_serv_put and update the nfsd code to
> > prevent nfsd_destroy_serv from destroying nn->nfsd_serv until any
> > client initiated localio calls to nfsd (that are _not_ in the context
> > of nfsd) are complete.
> > 
> > nfsd_open_local_fh is updated to nfsd_serv_get before opening its file
> > handle and then drop the reference using nfsd_serv_put at the end of
> > nfsd_open_local_fh.
> > 
> > This "interlock" working relies heavily on nfsd_open_local_fh()'s
> > maybe_get_net() safely dealing with the possibility that the struct
> > net (and nfsd_net by association) may have been destroyed by
> > nfsd_destroy_serv() via nfsd_shutdown_net().
> > 
> > Verified to fix an easy to hit crash that would occur if an nfsd
> > instance running in a container, with a localio client mounted, is
> > shutdown. Upon restart of the container and associated nfsd the client
> > would go on to crash due to NULL pointer dereference that occuured due
> > to the nfs client's localio attempting to nfsd_open_local_fh(), using
> > nn->nfsd_serv, without having a proper reference on nn->nfsd_serv.
> > 
> > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > ---
> >  fs/nfsd/localio.c |  2 ++
> >  fs/nfsd/netns.h   |  8 +++++++-
> >  fs/nfsd/nfssvc.c  | 39 +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 48 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
> > index 48118db801b5..819589ae2008 100644
> > --- a/fs/nfsd/localio.c
> > +++ b/fs/nfsd/localio.c
> > @@ -204,6 +204,7 @@ int nfsd_open_local_fh(struct net *net,
> >  	}
> >  	nn = net_generic(net, nfsd_net_id);
> >  
> > +	nfsd_serv_get(nn);
> >  	serv = READ_ONCE(nn->nfsd_serv);
> >  	if (unlikely(!serv)) {
> >  		dprintk("%s: localio denied. Server not running\n", __func__);
> > @@ -244,6 +245,7 @@ int nfsd_open_local_fh(struct net *net,
> >  out_revertcred:
> >  	revert_creds(save_cred);
> >  out_net:
> > +	nfsd_serv_put(nn);
> >  	put_net(net);
> >  	return status;
> >  }
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 0c5a1d97e4ac..17a73c780ca1 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -13,6 +13,7 @@
> >  #include <linux/filelock.h>
> >  #include <linux/nfs4.h>
> >  #include <linux/percpu_counter.h>
> > +#include <linux/percpu-refcount.h>
> >  #include <linux/siphash.h>
> >  #include <linux/sunrpc/stats.h>
> >  #include <linux/nfslocalio.h>
> > @@ -140,7 +141,9 @@ struct nfsd_net {
> >  
> >  	struct svc_info nfsd_info;
> >  #define nfsd_serv nfsd_info.serv
> > -
> > +	struct percpu_ref nfsd_serv_ref;
> > +	struct completion nfsd_serv_confirm_done;
> > +	struct completion nfsd_serv_free_done;
> >  
> >  	/*
> >  	 * clientid and stateid data for construction of net unique COPY
> > @@ -225,6 +228,9 @@ struct nfsd_net {
> >  extern bool nfsd_support_version(int vers);
> >  extern void nfsd_netns_free_versions(struct nfsd_net *nn);
> >  
> > +void nfsd_serv_get(struct nfsd_net *nn);
> > +void nfsd_serv_put(struct nfsd_net *nn);
> > +
> >  extern unsigned int nfsd_net_id;
> >  
> >  void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index a477d2c5088a..be5acb7a4057 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -258,6 +258,30 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
> >  	return 0;
> >  }
> >  
> > +void nfsd_serv_get(struct nfsd_net *nn)
> > +{
> > +	percpu_ref_get(&nn->nfsd_serv_ref);
> > +}
> > +
> > +void nfsd_serv_put(struct nfsd_net *nn)
> > +{
> > +	percpu_ref_put(&nn->nfsd_serv_ref);
> > +}
> > +
> > +static void nfsd_serv_done(struct percpu_ref *ref)
> > +{
> > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
> > +
> > +	complete(&nn->nfsd_serv_confirm_done);
> > +}
> > +
> > +static void nfsd_serv_free(struct percpu_ref *ref)
> > +{
> > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
> > +
> > +	complete(&nn->nfsd_serv_free_done);
> > +}
> > +
> >  /*
> >   * Maximum number of nfsd processes
> >   */
> > @@ -462,6 +486,7 @@ static void nfsd_shutdown_net(struct net *net)
> >  		lockd_down(net);
> >  		nn->lockd_up = false;
> >  	}
> > +	percpu_ref_exit(&nn->nfsd_serv_ref);
> >  #if IS_ENABLED(CONFIG_NFSD_LOCALIO)
> >  	list_del_rcu(&nn->nfsd_uuid.list);
> >  #endif
> > @@ -544,6 +569,13 @@ void nfsd_destroy_serv(struct net *net)
> >  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> >  	struct svc_serv *serv = nn->nfsd_serv;
> >  
> > +	lockdep_assert_held(&nfsd_mutex);
> > +
> > +	percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done);
> > +	wait_for_completion(&nn->nfsd_serv_confirm_done);
> > +	wait_for_completion(&nn->nfsd_serv_free_done);
> > +	/* percpu_ref_exit is called in nfsd_shutdown_net */
> > +
> 
> At this point where you're waiting on these completion vars, what stops
> the client from issuing new localio requests?

The server going down.  The interlock is with nfsd_open_local_fh, so
once any active file opens complete the server will get these
completions and the server will go down.  The client will reconnect
if/when the server comes back up.

Maybe I'm not imaginative enough, but is there risk of never-ending
overlapping file opens that prevent nfsd_destroy_serv() from
progressing?

Mike
Jeff Layton June 27, 2024, 5:45 p.m. UTC | #3
On Thu, 2024-06-27 at 13:14 -0400, Mike Snitzer wrote:
> On Thu, Jun 27, 2024 at 07:24:54AM -0400, Jeff Layton wrote:
> > On Wed, 2024-06-26 at 14:24 -0400, Mike Snitzer wrote:
> > > Introduce nfsd_serv_get and nfsd_serv_put and update the nfsd
> > > code to
> > > prevent nfsd_destroy_serv from destroying nn->nfsd_serv until any
> > > client initiated localio calls to nfsd (that are _not_ in the
> > > context
> > > of nfsd) are complete.
> > > 
> > > nfsd_open_local_fh is updated to nfsd_serv_get before opening its
> > > file
> > > handle and then drop the reference using nfsd_serv_put at the end
> > > of
> > > nfsd_open_local_fh.
> > > 
> > > This "interlock" working relies heavily on nfsd_open_local_fh()'s
> > > maybe_get_net() safely dealing with the possibility that the
> > > struct
> > > net (and nfsd_net by association) may have been destroyed by
> > > nfsd_destroy_serv() via nfsd_shutdown_net().
> > > 
> > > Verified to fix an easy to hit crash that would occur if an nfsd
> > > instance running in a container, with a localio client mounted,
> > > is
> > > shutdown. Upon restart of the container and associated nfsd the
> > > client
> > > would go on to crash due to NULL pointer dereference that
> > > occuured due
> > > to the nfs client's localio attempting to nfsd_open_local_fh(),
> > > using
> > > nn->nfsd_serv, without having a proper reference on nn-
> > > >nfsd_serv.
> > > 
> > > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > > ---
> > >  fs/nfsd/localio.c |  2 ++
> > >  fs/nfsd/netns.h   |  8 +++++++-
> > >  fs/nfsd/nfssvc.c  | 39 +++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 48 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
> > > index 48118db801b5..819589ae2008 100644
> > > --- a/fs/nfsd/localio.c
> > > +++ b/fs/nfsd/localio.c
> > > @@ -204,6 +204,7 @@ int nfsd_open_local_fh(struct net *net,
> > >  	}
> > >  	nn = net_generic(net, nfsd_net_id);
> > >  
> > > +	nfsd_serv_get(nn);
> > >  	serv = READ_ONCE(nn->nfsd_serv);
> > >  	if (unlikely(!serv)) {
> > >  		dprintk("%s: localio denied. Server not
> > > running\n", __func__);
> > > @@ -244,6 +245,7 @@ int nfsd_open_local_fh(struct net *net,
> > >  out_revertcred:
> > >  	revert_creds(save_cred);
> > >  out_net:
> > > +	nfsd_serv_put(nn);
> > >  	put_net(net);
> > >  	return status;
> > >  }
> > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > > index 0c5a1d97e4ac..17a73c780ca1 100644
> > > --- a/fs/nfsd/netns.h
> > > +++ b/fs/nfsd/netns.h
> > > @@ -13,6 +13,7 @@
> > >  #include <linux/filelock.h>
> > >  #include <linux/nfs4.h>
> > >  #include <linux/percpu_counter.h>
> > > +#include <linux/percpu-refcount.h>
> > >  #include <linux/siphash.h>
> > >  #include <linux/sunrpc/stats.h>
> > >  #include <linux/nfslocalio.h>
> > > @@ -140,7 +141,9 @@ struct nfsd_net {
> > >  
> > >  	struct svc_info nfsd_info;
> > >  #define nfsd_serv nfsd_info.serv
> > > -
> > > +	struct percpu_ref nfsd_serv_ref;
> > > +	struct completion nfsd_serv_confirm_done;
> > > +	struct completion nfsd_serv_free_done;
> > >  
> > >  	/*
> > >  	 * clientid and stateid data for construction of net
> > > unique COPY
> > > @@ -225,6 +228,9 @@ struct nfsd_net {
> > >  extern bool nfsd_support_version(int vers);
> > >  extern void nfsd_netns_free_versions(struct nfsd_net *nn);
> > >  
> > > +void nfsd_serv_get(struct nfsd_net *nn);
> > > +void nfsd_serv_put(struct nfsd_net *nn);
> > > +
> > >  extern unsigned int nfsd_net_id;
> > >  
> > >  void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net
> > > *nn);
> > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > > index a477d2c5088a..be5acb7a4057 100644
> > > --- a/fs/nfsd/nfssvc.c
> > > +++ b/fs/nfsd/nfssvc.c
> > > @@ -258,6 +258,30 @@ int nfsd_minorversion(struct nfsd_net *nn,
> > > u32 minorversion, enum vers_op change
> > >  	return 0;
> > >  }
> > >  
> > > +void nfsd_serv_get(struct nfsd_net *nn)
> > > +{
> > > +	percpu_ref_get(&nn->nfsd_serv_ref);
> > > +}
> > > +
> > > +void nfsd_serv_put(struct nfsd_net *nn)
> > > +{
> > > +	percpu_ref_put(&nn->nfsd_serv_ref);
> > > +}
> > > +
> > > +static void nfsd_serv_done(struct percpu_ref *ref)
> > > +{
> > > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net,
> > > nfsd_serv_ref);
> > > +
> > > +	complete(&nn->nfsd_serv_confirm_done);
> > > +}
> > > +
> > > +static void nfsd_serv_free(struct percpu_ref *ref)
> > > +{
> > > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net,
> > > nfsd_serv_ref);
> > > +
> > > +	complete(&nn->nfsd_serv_free_done);
> > > +}
> > > +
> > >  /*
> > >   * Maximum number of nfsd processes
> > >   */
> > > @@ -462,6 +486,7 @@ static void nfsd_shutdown_net(struct net
> > > *net)
> > >  		lockd_down(net);
> > >  		nn->lockd_up = false;
> > >  	}
> > > +	percpu_ref_exit(&nn->nfsd_serv_ref);
> > >  #if IS_ENABLED(CONFIG_NFSD_LOCALIO)
> > >  	list_del_rcu(&nn->nfsd_uuid.list);
> > >  #endif
> > > @@ -544,6 +569,13 @@ void nfsd_destroy_serv(struct net *net)
> > >  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > >  	struct svc_serv *serv = nn->nfsd_serv;
> > >  
> > > +	lockdep_assert_held(&nfsd_mutex);
> > > +
> > > +	percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref,
> > > nfsd_serv_done);
> > > +	wait_for_completion(&nn->nfsd_serv_confirm_done);
> > > +	wait_for_completion(&nn->nfsd_serv_free_done);
> > > +	/* percpu_ref_exit is called in nfsd_shutdown_net */
> > > +
> > 
> > At this point where you're waiting on these completion vars, what
> > stops
> > the client from issuing new localio requests?
> 
> The server going down.  The interlock is with nfsd_open_local_fh, so
> once any active file opens complete the server will get these
> completions and the server will go down.  The client will reconnect
> if/when the server comes back up.
> 
> Maybe I'm not imaginative enough, but is there risk of never-ending
> overlapping file opens that prevent nfsd_destroy_serv() from
> progressing?
> 

Yeah, I think it's a possibility, especially if the underlying
filesystem is particularly slow. You can probably solve that by just
not handing out new references somehow once you're going to call
percpu_ref_kill_and_confirm.
NeilBrown June 28, 2024, 3:40 a.m. UTC | #4
On Fri, 28 Jun 2024, Jeff Layton wrote:
> On Thu, 2024-06-27 at 13:14 -0400, Mike Snitzer wrote:
> > On Thu, Jun 27, 2024 at 07:24:54AM -0400, Jeff Layton wrote:
> > > On Wed, 2024-06-26 at 14:24 -0400, Mike Snitzer wrote:
> > > > Introduce nfsd_serv_get and nfsd_serv_put and update the nfsd
> > > > code to
> > > > prevent nfsd_destroy_serv from destroying nn->nfsd_serv until any
> > > > client initiated localio calls to nfsd (that are _not_ in the
> > > > context
> > > > of nfsd) are complete.
> > > > 
> > > > nfsd_open_local_fh is updated to nfsd_serv_get before opening its
> > > > file
> > > > handle and then drop the reference using nfsd_serv_put at the end
> > > > of
> > > > nfsd_open_local_fh.
> > > > 
> > > > This "interlock" working relies heavily on nfsd_open_local_fh()'s
> > > > maybe_get_net() safely dealing with the possibility that the
> > > > struct
> > > > net (and nfsd_net by association) may have been destroyed by
> > > > nfsd_destroy_serv() via nfsd_shutdown_net().
> > > > 
> > > > Verified to fix an easy to hit crash that would occur if an nfsd
> > > > instance running in a container, with a localio client mounted,
> > > > is
> > > > shutdown. Upon restart of the container and associated nfsd the
> > > > client
> > > > would go on to crash due to NULL pointer dereference that
> > > > occuured due
> > > > to the nfs client's localio attempting to nfsd_open_local_fh(),
> > > > using
> > > > nn->nfsd_serv, without having a proper reference on nn-
> > > > >nfsd_serv.
> > > > 
> > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > > > ---
> > > >  fs/nfsd/localio.c |  2 ++
> > > >  fs/nfsd/netns.h   |  8 +++++++-
> > > >  fs/nfsd/nfssvc.c  | 39 +++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 48 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
> > > > index 48118db801b5..819589ae2008 100644
> > > > --- a/fs/nfsd/localio.c
> > > > +++ b/fs/nfsd/localio.c
> > > > @@ -204,6 +204,7 @@ int nfsd_open_local_fh(struct net *net,
> > > >  	}
> > > >  	nn = net_generic(net, nfsd_net_id);
> > > >  
> > > > +	nfsd_serv_get(nn);
> > > >  	serv = READ_ONCE(nn->nfsd_serv);
> > > >  	if (unlikely(!serv)) {
> > > >  		dprintk("%s: localio denied. Server not
> > > > running\n", __func__);
> > > > @@ -244,6 +245,7 @@ int nfsd_open_local_fh(struct net *net,
> > > >  out_revertcred:
> > > >  	revert_creds(save_cred);
> > > >  out_net:
> > > > +	nfsd_serv_put(nn);
> > > >  	put_net(net);
> > > >  	return status;
> > > >  }
> > > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > > > index 0c5a1d97e4ac..17a73c780ca1 100644
> > > > --- a/fs/nfsd/netns.h
> > > > +++ b/fs/nfsd/netns.h
> > > > @@ -13,6 +13,7 @@
> > > >  #include <linux/filelock.h>
> > > >  #include <linux/nfs4.h>
> > > >  #include <linux/percpu_counter.h>
> > > > +#include <linux/percpu-refcount.h>
> > > >  #include <linux/siphash.h>
> > > >  #include <linux/sunrpc/stats.h>
> > > >  #include <linux/nfslocalio.h>
> > > > @@ -140,7 +141,9 @@ struct nfsd_net {
> > > >  
> > > >  	struct svc_info nfsd_info;
> > > >  #define nfsd_serv nfsd_info.serv
> > > > -
> > > > +	struct percpu_ref nfsd_serv_ref;
> > > > +	struct completion nfsd_serv_confirm_done;
> > > > +	struct completion nfsd_serv_free_done;
> > > >  
> > > >  	/*
> > > >  	 * clientid and stateid data for construction of net
> > > > unique COPY
> > > > @@ -225,6 +228,9 @@ struct nfsd_net {
> > > >  extern bool nfsd_support_version(int vers);
> > > >  extern void nfsd_netns_free_versions(struct nfsd_net *nn);
> > > >  
> > > > +void nfsd_serv_get(struct nfsd_net *nn);
> > > > +void nfsd_serv_put(struct nfsd_net *nn);
> > > > +
> > > >  extern unsigned int nfsd_net_id;
> > > >  
> > > >  void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net
> > > > *nn);
> > > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > > > index a477d2c5088a..be5acb7a4057 100644
> > > > --- a/fs/nfsd/nfssvc.c
> > > > +++ b/fs/nfsd/nfssvc.c
> > > > @@ -258,6 +258,30 @@ int nfsd_minorversion(struct nfsd_net *nn,
> > > > u32 minorversion, enum vers_op change
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +void nfsd_serv_get(struct nfsd_net *nn)
> > > > +{
> > > > +	percpu_ref_get(&nn->nfsd_serv_ref);
> > > > +}
> > > > +
> > > > +void nfsd_serv_put(struct nfsd_net *nn)
> > > > +{
> > > > +	percpu_ref_put(&nn->nfsd_serv_ref);
> > > > +}
> > > > +
> > > > +static void nfsd_serv_done(struct percpu_ref *ref)
> > > > +{
> > > > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net,
> > > > nfsd_serv_ref);
> > > > +
> > > > +	complete(&nn->nfsd_serv_confirm_done);
> > > > +}
> > > > +
> > > > +static void nfsd_serv_free(struct percpu_ref *ref)
> > > > +{
> > > > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net,
> > > > nfsd_serv_ref);
> > > > +
> > > > +	complete(&nn->nfsd_serv_free_done);
> > > > +}
> > > > +
> > > >  /*
> > > >   * Maximum number of nfsd processes
> > > >   */
> > > > @@ -462,6 +486,7 @@ static void nfsd_shutdown_net(struct net
> > > > *net)
> > > >  		lockd_down(net);
> > > >  		nn->lockd_up = false;
> > > >  	}
> > > > +	percpu_ref_exit(&nn->nfsd_serv_ref);
> > > >  #if IS_ENABLED(CONFIG_NFSD_LOCALIO)
> > > >  	list_del_rcu(&nn->nfsd_uuid.list);
> > > >  #endif
> > > > @@ -544,6 +569,13 @@ void nfsd_destroy_serv(struct net *net)
> > > >  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > > >  	struct svc_serv *serv = nn->nfsd_serv;
> > > >  
> > > > +	lockdep_assert_held(&nfsd_mutex);
> > > > +
> > > > +	percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref,
> > > > nfsd_serv_done);
> > > > +	wait_for_completion(&nn->nfsd_serv_confirm_done);
> > > > +	wait_for_completion(&nn->nfsd_serv_free_done);
> > > > +	/* percpu_ref_exit is called in nfsd_shutdown_net */
> > > > +
> > > 
> > > At this point where you're waiting on these completion vars, what
> > > stops
> > > the client from issuing new localio requests?
> > 
> > The server going down.  The interlock is with nfsd_open_local_fh, so
> > once any active file opens complete the server will get these
> > completions and the server will go down.  The client will reconnect
> > if/when the server comes back up.
> > 
> > Maybe I'm not imaginative enough, but is there risk of never-ending
> > overlapping file opens that prevent nfsd_destroy_serv() from
> > progressing?
> > 
> 
> Yeah, I think it's a possibility, especially if the underlying
> filesystem is particularly slow. You can probably solve that by just
> not handing out new references somehow once you're going to call
> percpu_ref_kill_and_confirm.

percpu-refcount has just to tool you need.

Use percpu_ref_tryget_live().  nfsd_serv_get() must be able to fail, and
so it must use this.  If it does then it can safely read ->nfsd_serv
without checking for NULL.

NeilBrown
Mike Snitzer June 28, 2024, 3:49 a.m. UTC | #5
On Fri, Jun 28, 2024 at 01:40:16PM +1000, NeilBrown wrote:
> On Fri, 28 Jun 2024, Jeff Layton wrote:
> > On Thu, 2024-06-27 at 13:14 -0400, Mike Snitzer wrote:
> > > On Thu, Jun 27, 2024 at 07:24:54AM -0400, Jeff Layton wrote:
> > > > On Wed, 2024-06-26 at 14:24 -0400, Mike Snitzer wrote:
> > > > > Introduce nfsd_serv_get and nfsd_serv_put and update the nfsd
> > > > > code to
> > > > > prevent nfsd_destroy_serv from destroying nn->nfsd_serv until any
> > > > > client initiated localio calls to nfsd (that are _not_ in the
> > > > > context
> > > > > of nfsd) are complete.
> > > > > 
> > > > > nfsd_open_local_fh is updated to nfsd_serv_get before opening its
> > > > > file
> > > > > handle and then drop the reference using nfsd_serv_put at the end
> > > > > of
> > > > > nfsd_open_local_fh.
> > > > > 
> > > > > This "interlock" working relies heavily on nfsd_open_local_fh()'s
> > > > > maybe_get_net() safely dealing with the possibility that the
> > > > > struct
> > > > > net (and nfsd_net by association) may have been destroyed by
> > > > > nfsd_destroy_serv() via nfsd_shutdown_net().
> > > > > 
> > > > > Verified to fix an easy to hit crash that would occur if an nfsd
> > > > > instance running in a container, with a localio client mounted,
> > > > > is
> > > > > shutdown. Upon restart of the container and associated nfsd the
> > > > > client
> > > > > would go on to crash due to NULL pointer dereference that
> > > > > occuured due
> > > > > to the nfs client's localio attempting to nfsd_open_local_fh(),
> > > > > using
> > > > > nn->nfsd_serv, without having a proper reference on nn-
> > > > > >nfsd_serv.
> > > > > 
> > > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > > > > ---
> > > > >  fs/nfsd/localio.c |  2 ++
> > > > >  fs/nfsd/netns.h   |  8 +++++++-
> > > > >  fs/nfsd/nfssvc.c  | 39 +++++++++++++++++++++++++++++++++++++++
> > > > >  3 files changed, 48 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
> > > > > index 48118db801b5..819589ae2008 100644
> > > > > --- a/fs/nfsd/localio.c
> > > > > +++ b/fs/nfsd/localio.c
> > > > > @@ -204,6 +204,7 @@ int nfsd_open_local_fh(struct net *net,
> > > > >  	}
> > > > >  	nn = net_generic(net, nfsd_net_id);
> > > > >  
> > > > > +	nfsd_serv_get(nn);
> > > > >  	serv = READ_ONCE(nn->nfsd_serv);
> > > > >  	if (unlikely(!serv)) {
> > > > >  		dprintk("%s: localio denied. Server not
> > > > > running\n", __func__);
> > > > > @@ -244,6 +245,7 @@ int nfsd_open_local_fh(struct net *net,
> > > > >  out_revertcred:
> > > > >  	revert_creds(save_cred);
> > > > >  out_net:
> > > > > +	nfsd_serv_put(nn);
> > > > >  	put_net(net);
> > > > >  	return status;
> > > > >  }
> > > > > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > > > > index 0c5a1d97e4ac..17a73c780ca1 100644
> > > > > --- a/fs/nfsd/netns.h
> > > > > +++ b/fs/nfsd/netns.h
> > > > > @@ -13,6 +13,7 @@
> > > > >  #include <linux/filelock.h>
> > > > >  #include <linux/nfs4.h>
> > > > >  #include <linux/percpu_counter.h>
> > > > > +#include <linux/percpu-refcount.h>
> > > > >  #include <linux/siphash.h>
> > > > >  #include <linux/sunrpc/stats.h>
> > > > >  #include <linux/nfslocalio.h>
> > > > > @@ -140,7 +141,9 @@ struct nfsd_net {
> > > > >  
> > > > >  	struct svc_info nfsd_info;
> > > > >  #define nfsd_serv nfsd_info.serv
> > > > > -
> > > > > +	struct percpu_ref nfsd_serv_ref;
> > > > > +	struct completion nfsd_serv_confirm_done;
> > > > > +	struct completion nfsd_serv_free_done;
> > > > >  
> > > > >  	/*
> > > > >  	 * clientid and stateid data for construction of net
> > > > > unique COPY
> > > > > @@ -225,6 +228,9 @@ struct nfsd_net {
> > > > >  extern bool nfsd_support_version(int vers);
> > > > >  extern void nfsd_netns_free_versions(struct nfsd_net *nn);
> > > > >  
> > > > > +void nfsd_serv_get(struct nfsd_net *nn);
> > > > > +void nfsd_serv_put(struct nfsd_net *nn);
> > > > > +
> > > > >  extern unsigned int nfsd_net_id;
> > > > >  
> > > > >  void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net
> > > > > *nn);
> > > > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > > > > index a477d2c5088a..be5acb7a4057 100644
> > > > > --- a/fs/nfsd/nfssvc.c
> > > > > +++ b/fs/nfsd/nfssvc.c
> > > > > @@ -258,6 +258,30 @@ int nfsd_minorversion(struct nfsd_net *nn,
> > > > > u32 minorversion, enum vers_op change
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +void nfsd_serv_get(struct nfsd_net *nn)
> > > > > +{
> > > > > +	percpu_ref_get(&nn->nfsd_serv_ref);
> > > > > +}
> > > > > +
> > > > > +void nfsd_serv_put(struct nfsd_net *nn)
> > > > > +{
> > > > > +	percpu_ref_put(&nn->nfsd_serv_ref);
> > > > > +}
> > > > > +
> > > > > +static void nfsd_serv_done(struct percpu_ref *ref)
> > > > > +{
> > > > > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net,
> > > > > nfsd_serv_ref);
> > > > > +
> > > > > +	complete(&nn->nfsd_serv_confirm_done);
> > > > > +}
> > > > > +
> > > > > +static void nfsd_serv_free(struct percpu_ref *ref)
> > > > > +{
> > > > > +	struct nfsd_net *nn = container_of(ref, struct nfsd_net,
> > > > > nfsd_serv_ref);
> > > > > +
> > > > > +	complete(&nn->nfsd_serv_free_done);
> > > > > +}
> > > > > +
> > > > >  /*
> > > > >   * Maximum number of nfsd processes
> > > > >   */
> > > > > @@ -462,6 +486,7 @@ static void nfsd_shutdown_net(struct net
> > > > > *net)
> > > > >  		lockd_down(net);
> > > > >  		nn->lockd_up = false;
> > > > >  	}
> > > > > +	percpu_ref_exit(&nn->nfsd_serv_ref);
> > > > >  #if IS_ENABLED(CONFIG_NFSD_LOCALIO)
> > > > >  	list_del_rcu(&nn->nfsd_uuid.list);
> > > > >  #endif
> > > > > @@ -544,6 +569,13 @@ void nfsd_destroy_serv(struct net *net)
> > > > >  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > > > >  	struct svc_serv *serv = nn->nfsd_serv;
> > > > >  
> > > > > +	lockdep_assert_held(&nfsd_mutex);
> > > > > +
> > > > > +	percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref,
> > > > > nfsd_serv_done);
> > > > > +	wait_for_completion(&nn->nfsd_serv_confirm_done);
> > > > > +	wait_for_completion(&nn->nfsd_serv_free_done);
> > > > > +	/* percpu_ref_exit is called in nfsd_shutdown_net */
> > > > > +
> > > > 
> > > > At this point where you're waiting on these completion vars, what
> > > > stops
> > > > the client from issuing new localio requests?
> > > 
> > > The server going down.  The interlock is with nfsd_open_local_fh, so
> > > once any active file opens complete the server will get these
> > > completions and the server will go down.  The client will reconnect
> > > if/when the server comes back up.
> > > 
> > > Maybe I'm not imaginative enough, but is there risk of never-ending
> > > overlapping file opens that prevent nfsd_destroy_serv() from
> > > progressing?
> > > 
> > 
> > Yeah, I think it's a possibility, especially if the underlying
> > filesystem is particularly slow. You can probably solve that by just
> > not handing out new references somehow once you're going to call
> > percpu_ref_kill_and_confirm.
> 
> percpu-refcount has just to tool you need.
> 
> Use percpu_ref_tryget_live().  nfsd_serv_get() must be able to fail, and
> so it must use this.  If it does then it can safely read ->nfsd_serv
> without checking for NULL.

Indeed, I am actively coding the switch to using
percpu_ref_tryget_live, so your message confirming I was on the right
track is both timely and helpful.
diff mbox series

Patch

diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 48118db801b5..819589ae2008 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -204,6 +204,7 @@  int nfsd_open_local_fh(struct net *net,
 	}
 	nn = net_generic(net, nfsd_net_id);
 
+	nfsd_serv_get(nn);
 	serv = READ_ONCE(nn->nfsd_serv);
 	if (unlikely(!serv)) {
 		dprintk("%s: localio denied. Server not running\n", __func__);
@@ -244,6 +245,7 @@  int nfsd_open_local_fh(struct net *net,
 out_revertcred:
 	revert_creds(save_cred);
 out_net:
+	nfsd_serv_put(nn);
 	put_net(net);
 	return status;
 }
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 0c5a1d97e4ac..17a73c780ca1 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -13,6 +13,7 @@ 
 #include <linux/filelock.h>
 #include <linux/nfs4.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu-refcount.h>
 #include <linux/siphash.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/nfslocalio.h>
@@ -140,7 +141,9 @@  struct nfsd_net {
 
 	struct svc_info nfsd_info;
 #define nfsd_serv nfsd_info.serv
-
+	struct percpu_ref nfsd_serv_ref;
+	struct completion nfsd_serv_confirm_done;
+	struct completion nfsd_serv_free_done;
 
 	/*
 	 * clientid and stateid data for construction of net unique COPY
@@ -225,6 +228,9 @@  struct nfsd_net {
 extern bool nfsd_support_version(int vers);
 extern void nfsd_netns_free_versions(struct nfsd_net *nn);
 
+void nfsd_serv_get(struct nfsd_net *nn);
+void nfsd_serv_put(struct nfsd_net *nn);
+
 extern unsigned int nfsd_net_id;
 
 void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a477d2c5088a..be5acb7a4057 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -258,6 +258,30 @@  int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
 	return 0;
 }
 
+void nfsd_serv_get(struct nfsd_net *nn)
+{
+	percpu_ref_get(&nn->nfsd_serv_ref);
+}
+
+void nfsd_serv_put(struct nfsd_net *nn)
+{
+	percpu_ref_put(&nn->nfsd_serv_ref);
+}
+
+static void nfsd_serv_done(struct percpu_ref *ref)
+{
+	struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
+
+	complete(&nn->nfsd_serv_confirm_done);
+}
+
+static void nfsd_serv_free(struct percpu_ref *ref)
+{
+	struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_serv_ref);
+
+	complete(&nn->nfsd_serv_free_done);
+}
+
 /*
  * Maximum number of nfsd processes
  */
@@ -462,6 +486,7 @@  static void nfsd_shutdown_net(struct net *net)
 		lockd_down(net);
 		nn->lockd_up = false;
 	}
+	percpu_ref_exit(&nn->nfsd_serv_ref);
 #if IS_ENABLED(CONFIG_NFSD_LOCALIO)
 	list_del_rcu(&nn->nfsd_uuid.list);
 #endif
@@ -544,6 +569,13 @@  void nfsd_destroy_serv(struct net *net)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	struct svc_serv *serv = nn->nfsd_serv;
 
+	lockdep_assert_held(&nfsd_mutex);
+
+	percpu_ref_kill_and_confirm(&nn->nfsd_serv_ref, nfsd_serv_done);
+	wait_for_completion(&nn->nfsd_serv_confirm_done);
+	wait_for_completion(&nn->nfsd_serv_free_done);
+	/* percpu_ref_exit is called in nfsd_shutdown_net */
+
 	spin_lock(&nfsd_notifier_lock);
 	nn->nfsd_serv = NULL;
 	spin_unlock(&nfsd_notifier_lock);
@@ -666,6 +698,13 @@  int nfsd_create_serv(struct net *net)
 	if (nn->nfsd_serv)
 		return 0;
 
+	error = percpu_ref_init(&nn->nfsd_serv_ref, nfsd_serv_free,
+				0, GFP_KERNEL);
+	if (error)
+		return error;
+	init_completion(&nn->nfsd_serv_free_done);
+	init_completion(&nn->nfsd_serv_confirm_done);
+
 	if (nfsd_max_blksize == 0)
 		nfsd_max_blksize = nfsd_get_default_max_blksize();
 	nfsd_reset_versions(nn);