diff mbox series

[05/14] sunrpc: change sp_nrthreads from atomic_t to unsigned int.

Message ID 20240715074657.18174-6-neilb@suse.de (mailing list archive)
State New
Headers show
Series support automatic changes to nfsd thread count | expand

Commit Message

NeilBrown July 15, 2024, 7:14 a.m. UTC
sp_nrthreads is only ever accessed under the service mutex
  nlmsvc_mutex nfs_callback_mutex nfsd_mutex
so these is no need for it to be an atomic_t.

The fact that all code using it is single-threaded means that we can
simplify svc_pool_victim and remove the temporary elevation of
sp_nrthreads.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfsctl.c           |  2 +-
 fs/nfsd/nfssvc.c           |  2 +-
 include/linux/sunrpc/svc.h |  4 ++--
 net/sunrpc/svc.c           | 31 +++++++++++--------------------
 4 files changed, 15 insertions(+), 24 deletions(-)

Comments

Jeff Layton July 15, 2024, 2:12 p.m. UTC | #1
On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> sp_nrthreads is only ever accessed under the service mutex
>   nlmsvc_mutex nfs_callback_mutex nfsd_mutex
> so these is no need for it to be an atomic_t.
> 
> The fact that all code using it is single-threaded means that we can
> simplify svc_pool_victim and remove the temporary elevation of
> sp_nrthreads.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfsctl.c           |  2 +-
>  fs/nfsd/nfssvc.c           |  2 +-
>  include/linux/sunrpc/svc.h |  4 ++--
>  net/sunrpc/svc.c           | 31 +++++++++++--------------------
>  4 files changed, 15 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 5b0f2e0d7ccf..d85b6d1fa31f 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
>  			struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
>  
>  			err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
> -					  atomic_read(&sp->sp_nrthreads));
> +					  sp->sp_nrthreads);
>  			if (err)
>  				goto err_unlock;
>  		}
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 4438cdcd4873..7377422a34df 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
>  
>  	if (serv)
>  		for (i = 0; i < serv->sv_nrpools && i < n; i++)
> -			nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads);
> +			nthreads[i] = serv->sv_pools[i].sp_nrthreads;
>  	return 0;
>  }
>  
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index e4fa25fafa97..99e9345d829e 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -33,9 +33,9 @@
>   * node traffic on multi-node NUMA NFS servers.
>   */
>  struct svc_pool {
> -	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
> +	unsigned int		sp_id;		/* pool id; also node id on NUMA */
>  	struct lwq		sp_xprts;	/* pending transports */
> -	atomic_t		sp_nrthreads;	/* # of threads in pool */
> +	unsigned int		sp_nrthreads;	/* # of threads in pool */
>  	struct list_head	sp_all_threads;	/* all server threads */
>  	struct llist_head	sp_idle_threads; /* idle server threads */
>  
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 072ad115ae3d..0d8588bc693c 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  	serv->sv_nrthreads += 1;
>  	spin_unlock_bh(&serv->sv_lock);
>  
> -	atomic_inc(&pool->sp_nrthreads);
> +	pool->sp_nrthreads += 1;
>  
>  	/* Protected by whatever lock the service uses when calling
>  	 * svc_set_num_threads()
> @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
>  	struct svc_pool *pool;
>  	unsigned int i;
>  
> -retry:
>  	pool = target_pool;
>  
> -	if (pool != NULL) {
> -		if (atomic_inc_not_zero(&pool->sp_nrthreads))
> -			goto found_pool;
> -		return NULL;
> -	} else {
> +	if (!pool) {
>  		for (i = 0; i < serv->sv_nrpools; i++) {
>  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> -			if (atomic_inc_not_zero(&pool->sp_nrthreads))
> -				goto found_pool;
> +			if (pool->sp_nrthreads)
> +				break;
>  		}
> -		return NULL;
>  	}
>  
> -found_pool:
> -	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> -	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> -	if (!atomic_dec_and_test(&pool->sp_nrthreads))
> +	if (pool && pool->sp_nrthreads) {
> +		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> +		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
>  		return pool;
> -	/* Nothing left in this pool any more */
> -	clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
> -	clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> -	goto retry;
> +	}
> +	return NULL;
>  }
>  
>  static int
> @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  	if (!pool)
>  		nrservs -= serv->sv_nrthreads;
>  	else
> -		nrservs -= atomic_read(&pool->sp_nrthreads);
> +		nrservs -= pool->sp_nrthreads;
>  
>  	if (nrservs > 0)
>  		return svc_start_kthreads(serv, pool, nrservs);
> @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
>  
>  	list_del_rcu(&rqstp->rq_all);
>  
> -	atomic_dec(&pool->sp_nrthreads);
> +	pool->sp_nrthreads -= 1;
>  
>  	spin_lock_bh(&serv->sv_lock);
>  	serv->sv_nrthreads -= 1;

I don't think svc_exit_thread is called with the nfsd_mutex held, so if
several threads were exiting at the same time, they could race here.
Jeff Layton July 15, 2024, 2:33 p.m. UTC | #2
On Mon, 2024-07-15 at 10:12 -0400, Jeff Layton wrote:
> On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> > sp_nrthreads is only ever accessed under the service mutex
> >   nlmsvc_mutex nfs_callback_mutex nfsd_mutex
> > so these is no need for it to be an atomic_t.
> > 
> > The fact that all code using it is single-threaded means that we
> > can
> > simplify svc_pool_victim and remove the temporary elevation of
> > sp_nrthreads.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfsctl.c           |  2 +-
> >  fs/nfsd/nfssvc.c           |  2 +-
> >  include/linux/sunrpc/svc.h |  4 ++--
> >  net/sunrpc/svc.c           | 31 +++++++++++--------------------
> >  4 files changed, 15 insertions(+), 24 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 5b0f2e0d7ccf..d85b6d1fa31f 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff
> > *skb, struct genl_info *info)
> >  			struct svc_pool *sp = &nn->nfsd_serv-
> > >sv_pools[i];
> >  
> >  			err = nla_put_u32(skb,
> > NFSD_A_SERVER_THREADS,
> > -					  atomic_read(&sp-
> > >sp_nrthreads));
> > +					  sp->sp_nrthreads);
> >  			if (err)
> >  				goto err_unlock;
> >  		}
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 4438cdcd4873..7377422a34df 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads,
> > struct net *net)
> >  
> >  	if (serv)
> >  		for (i = 0; i < serv->sv_nrpools && i < n; i++)
> > -			nthreads[i] = atomic_read(&serv-
> > >sv_pools[i].sp_nrthreads);
> > +			nthreads[i] = serv-
> > >sv_pools[i].sp_nrthreads;
> >  	return 0;
> >  }
> >  
> > diff --git a/include/linux/sunrpc/svc.h
> > b/include/linux/sunrpc/svc.h
> > index e4fa25fafa97..99e9345d829e 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -33,9 +33,9 @@
> >   * node traffic on multi-node NUMA NFS servers.
> >   */
> >  struct svc_pool {
> > -	unsigned int		sp_id;	    	/* pool id; also
> > node id on NUMA */
> > +	unsigned int		sp_id;		/* pool id; also
> > node id on NUMA */
> >  	struct lwq		sp_xprts;	/* pending
> > transports */
> > -	atomic_t		sp_nrthreads;	/* # of threads in
> > pool */
> > +	unsigned int		sp_nrthreads;	/* # of threads in
> > pool */
> >  	struct list_head	sp_all_threads;	/* all
> > server threads */
> >  	struct llist_head	sp_idle_threads; /* idle server
> > threads */
> >  
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index 072ad115ae3d..0d8588bc693c 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv,
> > struct svc_pool *pool, int node)
> >  	serv->sv_nrthreads += 1;
> >  	spin_unlock_bh(&serv->sv_lock);
> >  
> > -	atomic_inc(&pool->sp_nrthreads);
> > +	pool->sp_nrthreads += 1;
> >  
> >  	/* Protected by whatever lock the service uses when
> > calling
> >  	 * svc_set_num_threads()
> > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct
> > svc_pool *target_pool,
> >  	struct svc_pool *pool;
> >  	unsigned int i;
> >  
> > -retry:
> >  	pool = target_pool;
> >  
> > -	if (pool != NULL) {
> > -		if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > -			goto found_pool;
> > -		return NULL;
> > -	} else {
> > +	if (!pool) {
> >  		for (i = 0; i < serv->sv_nrpools; i++) {
> >  			pool = &serv->sv_pools[--(*state) % serv-
> > >sv_nrpools];
> > -			if (atomic_inc_not_zero(&pool-
> > >sp_nrthreads))
> > -				goto found_pool;
> > +			if (pool->sp_nrthreads)
> > +				break;
> >  		}
> > -		return NULL;
> >  	}
> >  
> > -found_pool:
> > -	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > -	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > -	if (!atomic_dec_and_test(&pool->sp_nrthreads))
> > +	if (pool && pool->sp_nrthreads) {
> > +		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > +		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> >  		return pool;
> > -	/* Nothing left in this pool any more */
> > -	clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > -	clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > -	goto retry;
> > +	}
> > +	return NULL;
> >  }
> >  
> >  static int
> > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv,
> > struct svc_pool *pool, int nrservs)
> >  	if (!pool)
> >  		nrservs -= serv->sv_nrthreads;
> >  	else
> > -		nrservs -= atomic_read(&pool->sp_nrthreads);
> > +		nrservs -= pool->sp_nrthreads;
> >  
> >  	if (nrservs > 0)
> >  		return svc_start_kthreads(serv, pool, nrservs);
> > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  
> >  	list_del_rcu(&rqstp->rq_all);
> >  
> > -	atomic_dec(&pool->sp_nrthreads);
> > +	pool->sp_nrthreads -= 1;
> >  
> >  	spin_lock_bh(&serv->sv_lock);
> >  	serv->sv_nrthreads -= 1;
> 
> I don't think svc_exit_thread is called with the nfsd_mutex held, so
> if
> several threads were exiting at the same time, they could race here.
> 


Ok, the changelog on #7 might point out why I'm wron here.

nfsd() calls svc_exit_thread when exiting, but I missed that that would
imply that svc_stop_kthreads() is running in another task (and that the
nfsd_mutex would actually be held). It also looks like they do have to
be torn down serially as well, so there should be no race there after
all.

Either way, could I trouble you to add a comment about this above
svc_exit_thread? That's a really subtle interaction and it would be
good to document it.

Thanks,
NeilBrown July 16, 2024, 1:33 a.m. UTC | #3
On Tue, 16 Jul 2024, Jeff Layton wrote:
> On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> > sp_nrthreads is only ever accessed under the service mutex
> >   nlmsvc_mutex nfs_callback_mutex nfsd_mutex
> > so these is no need for it to be an atomic_t.
> > 
> > The fact that all code using it is single-threaded means that we can
> > simplify svc_pool_victim and remove the temporary elevation of
> > sp_nrthreads.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfsctl.c           |  2 +-
> >  fs/nfsd/nfssvc.c           |  2 +-
> >  include/linux/sunrpc/svc.h |  4 ++--
> >  net/sunrpc/svc.c           | 31 +++++++++++--------------------
> >  4 files changed, 15 insertions(+), 24 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 5b0f2e0d7ccf..d85b6d1fa31f 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
> >  			struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
> >  
> >  			err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
> > -					  atomic_read(&sp->sp_nrthreads));
> > +					  sp->sp_nrthreads);
> >  			if (err)
> >  				goto err_unlock;
> >  		}
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 4438cdcd4873..7377422a34df 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
> >  
> >  	if (serv)
> >  		for (i = 0; i < serv->sv_nrpools && i < n; i++)
> > -			nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads);
> > +			nthreads[i] = serv->sv_pools[i].sp_nrthreads;
> >  	return 0;
> >  }
> >  
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index e4fa25fafa97..99e9345d829e 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -33,9 +33,9 @@
> >   * node traffic on multi-node NUMA NFS servers.
> >   */
> >  struct svc_pool {
> > -	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
> > +	unsigned int		sp_id;		/* pool id; also node id on NUMA */
> >  	struct lwq		sp_xprts;	/* pending transports */
> > -	atomic_t		sp_nrthreads;	/* # of threads in pool */
> > +	unsigned int		sp_nrthreads;	/* # of threads in pool */
> >  	struct list_head	sp_all_threads;	/* all server threads */
> >  	struct llist_head	sp_idle_threads; /* idle server threads */
> >  
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index 072ad115ae3d..0d8588bc693c 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
> >  	serv->sv_nrthreads += 1;
> >  	spin_unlock_bh(&serv->sv_lock);
> >  
> > -	atomic_inc(&pool->sp_nrthreads);
> > +	pool->sp_nrthreads += 1;
> >  
> >  	/* Protected by whatever lock the service uses when calling
> >  	 * svc_set_num_threads()
> > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
> >  	struct svc_pool *pool;
> >  	unsigned int i;
> >  
> > -retry:
> >  	pool = target_pool;
> >  
> > -	if (pool != NULL) {
> > -		if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > -			goto found_pool;
> > -		return NULL;
> > -	} else {
> > +	if (!pool) {
> >  		for (i = 0; i < serv->sv_nrpools; i++) {
> >  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> > -			if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > -				goto found_pool;
> > +			if (pool->sp_nrthreads)
> > +				break;
> >  		}
> > -		return NULL;
> >  	}
> >  
> > -found_pool:
> > -	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > -	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > -	if (!atomic_dec_and_test(&pool->sp_nrthreads))
> > +	if (pool && pool->sp_nrthreads) {
> > +		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > +		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> >  		return pool;
> > -	/* Nothing left in this pool any more */
> > -	clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > -	clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > -	goto retry;
> > +	}
> > +	return NULL;
> >  }
> >  
> >  static int
> > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> >  	if (!pool)
> >  		nrservs -= serv->sv_nrthreads;
> >  	else
> > -		nrservs -= atomic_read(&pool->sp_nrthreads);
> > +		nrservs -= pool->sp_nrthreads;
> >  
> >  	if (nrservs > 0)
> >  		return svc_start_kthreads(serv, pool, nrservs);
> > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  
> >  	list_del_rcu(&rqstp->rq_all);
> >  
> > -	atomic_dec(&pool->sp_nrthreads);
> > +	pool->sp_nrthreads -= 1;
> >  
> >  	spin_lock_bh(&serv->sv_lock);
> >  	serv->sv_nrthreads -= 1;
> 
> I don't think svc_exit_thread is called with the nfsd_mutex held, so if
> several threads were exiting at the same time, they could race here.

This is subtle and deserves explanation in the commit.
svc_exit_thread() is called in a thread *after* svc_thread_should_stop()
has returned true.  That means RQ_VICTIM is set and most likely
SP_NEED_VICTIM was set

SP_NEED_VICTIM is set in svc_pool_victim() which is called from
svc_stop_kthreads() which requires that the mutex is held.
svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is
the last thing that svc_exit_thread() does.
So when svc_exit_thread() is called, the mutex is held by some other
thread that is calling svc_set_num_threads().

This is also why the list_del_rcu() in svc_exit_thread() is safe.

The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set
(only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case
nfsd() ensures that the mutex is held.

This was why
 [PATCH 07/14] Change unshare_fs_struct() to never fail.
was needed.  If that fails in the current code, svc_exit_thread() can be
called without the mutex - which is already a theoretical problem for
the list_del_rcu().

Thanks,
NeilBrown
Chuck Lever July 24, 2024, 7:36 p.m. UTC | #4
On Tue, Jul 16, 2024 at 11:33:40AM +1000, NeilBrown wrote:
> On Tue, 16 Jul 2024, Jeff Layton wrote:
> > On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> > > sp_nrthreads is only ever accessed under the service mutex
> > >   nlmsvc_mutex nfs_callback_mutex nfsd_mutex
> > > so these is no need for it to be an atomic_t.
> > > 
> > > The fact that all code using it is single-threaded means that we can
> > > simplify svc_pool_victim and remove the temporary elevation of
> > > sp_nrthreads.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/nfsd/nfsctl.c           |  2 +-
> > >  fs/nfsd/nfssvc.c           |  2 +-
> > >  include/linux/sunrpc/svc.h |  4 ++--
> > >  net/sunrpc/svc.c           | 31 +++++++++++--------------------
> > >  4 files changed, 15 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > > index 5b0f2e0d7ccf..d85b6d1fa31f 100644
> > > --- a/fs/nfsd/nfsctl.c
> > > +++ b/fs/nfsd/nfsctl.c
> > > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
> > >  			struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
> > >  
> > >  			err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
> > > -					  atomic_read(&sp->sp_nrthreads));
> > > +					  sp->sp_nrthreads);
> > >  			if (err)
> > >  				goto err_unlock;
> > >  		}
> > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > > index 4438cdcd4873..7377422a34df 100644
> > > --- a/fs/nfsd/nfssvc.c
> > > +++ b/fs/nfsd/nfssvc.c
> > > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
> > >  
> > >  	if (serv)
> > >  		for (i = 0; i < serv->sv_nrpools && i < n; i++)
> > > -			nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads);
> > > +			nthreads[i] = serv->sv_pools[i].sp_nrthreads;
> > >  	return 0;
> > >  }
> > >  
> > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > > index e4fa25fafa97..99e9345d829e 100644
> > > --- a/include/linux/sunrpc/svc.h
> > > +++ b/include/linux/sunrpc/svc.h
> > > @@ -33,9 +33,9 @@
> > >   * node traffic on multi-node NUMA NFS servers.
> > >   */
> > >  struct svc_pool {
> > > -	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
> > > +	unsigned int		sp_id;		/* pool id; also node id on NUMA */
> > >  	struct lwq		sp_xprts;	/* pending transports */
> > > -	atomic_t		sp_nrthreads;	/* # of threads in pool */
> > > +	unsigned int		sp_nrthreads;	/* # of threads in pool */
> > >  	struct list_head	sp_all_threads;	/* all server threads */
> > >  	struct llist_head	sp_idle_threads; /* idle server threads */
> > >  
> > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > > index 072ad115ae3d..0d8588bc693c 100644
> > > --- a/net/sunrpc/svc.c
> > > +++ b/net/sunrpc/svc.c
> > > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
> > >  	serv->sv_nrthreads += 1;
> > >  	spin_unlock_bh(&serv->sv_lock);
> > >  
> > > -	atomic_inc(&pool->sp_nrthreads);
> > > +	pool->sp_nrthreads += 1;
> > >  
> > >  	/* Protected by whatever lock the service uses when calling
> > >  	 * svc_set_num_threads()
> > > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
> > >  	struct svc_pool *pool;
> > >  	unsigned int i;
> > >  
> > > -retry:
> > >  	pool = target_pool;
> > >  
> > > -	if (pool != NULL) {
> > > -		if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > > -			goto found_pool;
> > > -		return NULL;
> > > -	} else {
> > > +	if (!pool) {
> > >  		for (i = 0; i < serv->sv_nrpools; i++) {
> > >  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> > > -			if (atomic_inc_not_zero(&pool->sp_nrthreads))
> > > -				goto found_pool;
> > > +			if (pool->sp_nrthreads)
> > > +				break;
> > >  		}
> > > -		return NULL;
> > >  	}
> > >  
> > > -found_pool:
> > > -	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > > -	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > > -	if (!atomic_dec_and_test(&pool->sp_nrthreads))
> > > +	if (pool && pool->sp_nrthreads) {
> > > +		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > > +		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > >  		return pool;
> > > -	/* Nothing left in this pool any more */
> > > -	clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > > -	clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > > -	goto retry;
> > > +	}
> > > +	return NULL;
> > >  }
> > >  
> > >  static int
> > > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> > >  	if (!pool)
> > >  		nrservs -= serv->sv_nrthreads;
> > >  	else
> > > -		nrservs -= atomic_read(&pool->sp_nrthreads);
> > > +		nrservs -= pool->sp_nrthreads;
> > >  
> > >  	if (nrservs > 0)
> > >  		return svc_start_kthreads(serv, pool, nrservs);
> > > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
> > >  
> > >  	list_del_rcu(&rqstp->rq_all);
> > >  
> > > -	atomic_dec(&pool->sp_nrthreads);
> > > +	pool->sp_nrthreads -= 1;
> > >  
> > >  	spin_lock_bh(&serv->sv_lock);
> > >  	serv->sv_nrthreads -= 1;
> > 
> > I don't think svc_exit_thread is called with the nfsd_mutex held, so if
> > several threads were exiting at the same time, they could race here.
> 
> This is subtle and deserves explanation in the commit.

Hi Neil, assuming you mean "commit message" here, are you planning
to resend 5/14 with this update?


> svc_exit_thread() is called in a thread *after* svc_thread_should_stop()
> has returned true.  That means RQ_VICTIM is set and most likely
> SP_NEED_VICTIM was set
> 
> SP_NEED_VICTIM is set in svc_pool_victim() which is called from
> svc_stop_kthreads() which requires that the mutex is held.
> svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is
> the last thing that svc_exit_thread() does.
> So when svc_exit_thread() is called, the mutex is held by some other
> thread that is calling svc_set_num_threads().
> 
> This is also why the list_del_rcu() in svc_exit_thread() is safe.
> 
> The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set
> (only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case
> nfsd() ensures that the mutex is held.
> 
> This was why
>  [PATCH 07/14] Change unshare_fs_struct() to never fail.
> was needed.  If that fails in the current code, svc_exit_thread() can be
> called without the mutex - which is already a theoretical problem for
> the list_del_rcu().
> 
> Thanks,
> NeilBrown
diff mbox series

Patch

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5b0f2e0d7ccf..d85b6d1fa31f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1769,7 +1769,7 @@  int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
 			struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
 
 			err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
-					  atomic_read(&sp->sp_nrthreads));
+					  sp->sp_nrthreads);
 			if (err)
 				goto err_unlock;
 		}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4438cdcd4873..7377422a34df 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -641,7 +641,7 @@  int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
 
 	if (serv)
 		for (i = 0; i < serv->sv_nrpools && i < n; i++)
-			nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads);
+			nthreads[i] = serv->sv_pools[i].sp_nrthreads;
 	return 0;
 }
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index e4fa25fafa97..99e9345d829e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -33,9 +33,9 @@ 
  * node traffic on multi-node NUMA NFS servers.
  */
 struct svc_pool {
-	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
+	unsigned int		sp_id;		/* pool id; also node id on NUMA */
 	struct lwq		sp_xprts;	/* pending transports */
-	atomic_t		sp_nrthreads;	/* # of threads in pool */
+	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
 	struct llist_head	sp_idle_threads; /* idle server threads */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 072ad115ae3d..0d8588bc693c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -725,7 +725,7 @@  svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	serv->sv_nrthreads += 1;
 	spin_unlock_bh(&serv->sv_lock);
 
-	atomic_inc(&pool->sp_nrthreads);
+	pool->sp_nrthreads += 1;
 
 	/* Protected by whatever lock the service uses when calling
 	 * svc_set_num_threads()
@@ -780,31 +780,22 @@  svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
 	struct svc_pool *pool;
 	unsigned int i;
 
-retry:
 	pool = target_pool;
 
-	if (pool != NULL) {
-		if (atomic_inc_not_zero(&pool->sp_nrthreads))
-			goto found_pool;
-		return NULL;
-	} else {
+	if (!pool) {
 		for (i = 0; i < serv->sv_nrpools; i++) {
 			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
-			if (atomic_inc_not_zero(&pool->sp_nrthreads))
-				goto found_pool;
+			if (pool->sp_nrthreads)
+				break;
 		}
-		return NULL;
 	}
 
-found_pool:
-	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
-	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
-	if (!atomic_dec_and_test(&pool->sp_nrthreads))
+	if (pool && pool->sp_nrthreads) {
+		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
 		return pool;
-	/* Nothing left in this pool any more */
-	clear_bit(SP_NEED_VICTIM, &pool->sp_flags);
-	clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
-	goto retry;
+	}
+	return NULL;
 }
 
 static int
@@ -883,7 +874,7 @@  svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	if (!pool)
 		nrservs -= serv->sv_nrthreads;
 	else
-		nrservs -= atomic_read(&pool->sp_nrthreads);
+		nrservs -= pool->sp_nrthreads;
 
 	if (nrservs > 0)
 		return svc_start_kthreads(serv, pool, nrservs);
@@ -953,7 +944,7 @@  svc_exit_thread(struct svc_rqst *rqstp)
 
 	list_del_rcu(&rqstp->rq_all);
 
-	atomic_dec(&pool->sp_nrthreads);
+	pool->sp_nrthreads -= 1;
 
 	spin_lock_bh(&serv->sv_lock);
 	serv->sv_nrthreads -= 1;