diff mbox series

[09/14] SUNRPC: change how svc threads are asked to exit.

Message ID 168966228865.11075.10638610963240650169.stgit@noble.brown (mailing list archive)
State New, archived
Headers show
Series Refactor SUNRPC svc thread code, and use llist | expand

Commit Message

NeilBrown July 18, 2023, 6:38 a.m. UTC
svc threads are currently stopped using kthread_stop().  This requires
identifying a specific thread.  However we don't care which thread
stops, just as long as one does.

So instead, set a flag in the svc_pool to say that a thread needs to
die, and have each thread check this flag instead of calling
kthread_should_stop().  The first to find it clear the flag and moves
towards shutting down.

This removes an explicit dependency on sp_all_threads which will make a
future patch simpler.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/lockd/svc.c                |    4 ++--
 fs/lockd/svclock.c            |    4 ++--
 fs/nfs/callback.c             |    2 +-
 fs/nfsd/nfs4proc.c            |    8 +++++---
 fs/nfsd/nfssvc.c              |    2 +-
 include/linux/lockd/lockd.h   |    2 +-
 include/linux/sunrpc/svc.h    |   22 +++++++++++++++++++++-
 include/trace/events/sunrpc.h |    5 +++--
 net/sunrpc/svc.c              |   39 +++++++++++++++++----------------------
 net/sunrpc/svc_xprt.c         |    8 +++-----
 10 files changed, 56 insertions(+), 40 deletions(-)

Comments

Jeff Layton July 20, 2023, 8:30 p.m. UTC | #1
On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> svc threads are currently stopped using kthread_stop().  This requires
> identifying a specific thread.  However we don't care which thread
> stops, just as long as one does.
> 
> So instead, set a flag in the svc_pool to say that a thread needs to
> die, and have each thread check this flag instead of calling
> kthread_should_stop().  The first to find it clear the flag and moves
> towards shutting down.
> 
> This removes an explicit dependency on sp_all_threads which will make a
> future patch simpler.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/lockd/svc.c                |    4 ++--
>  fs/lockd/svclock.c            |    4 ++--
>  fs/nfs/callback.c             |    2 +-
>  fs/nfsd/nfs4proc.c            |    8 +++++---
>  fs/nfsd/nfssvc.c              |    2 +-
>  include/linux/lockd/lockd.h   |    2 +-
>  include/linux/sunrpc/svc.h    |   22 +++++++++++++++++++++-
>  include/trace/events/sunrpc.h |    5 +++--
>  net/sunrpc/svc.c              |   39 +++++++++++++++++----------------------
>  net/sunrpc/svc_xprt.c         |    8 +++-----
>  10 files changed, 56 insertions(+), 40 deletions(-)
> 
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index 4f55cd42c2e6..30543edd2309 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -134,12 +134,12 @@ lockd(void *vrqstp)
>  	 * The main request loop. We don't terminate until the last
>  	 * NFS mount or NFS daemon has gone away.
>  	 */
> -	while (!kthread_should_stop()) {
> +	while (!svc_thread_should_stop(rqstp)) {
>  
>  		/* update sv_maxconn if it has changed */
>  		rqstp->rq_server->sv_maxconn = nlm_max_connections;
>  
> -		nlmsvc_retry_blocked();
> +		nlmsvc_retry_blocked(rqstp);
>  
>  		/*
>  		 * Find any work to do, such as a socket with data available,
> diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
> index 3d7bd5c04b36..fd399c9bea5c 100644
> --- a/fs/lockd/svclock.c
> +++ b/fs/lockd/svclock.c
> @@ -1009,13 +1009,13 @@ retry_deferred_block(struct nlm_block *block)
>   * be retransmitted.
>   */
>  void
> -nlmsvc_retry_blocked(void)
> +nlmsvc_retry_blocked(struct svc_rqst *rqstp)
>  {
>  	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
>  	struct nlm_block *block;
>  
>  	spin_lock(&nlm_blocked_lock);
> -	while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
> +	while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
>  		block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
>  
>  		if (block->b_when == NLM_NEVER)
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 660cec36c45c..c58ec2e66aaf 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
>  
>  	set_freezable();
>  
> -	while (!kthread_should_stop()) {
> +	while (!svc_thread_should_stop(rqstp)) {
>  		/*
>  		 * Listen for a request on the socket
>  		 */
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 157488290676..66024ed06181 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1306,7 +1306,8 @@ extern void nfs_sb_deactive(struct super_block *sb);
>   * setup a work entry in the ssc delayed unmount list.
>   */
>  static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> -				  struct nfsd4_ssc_umount_item **nsui)
> +				  struct nfsd4_ssc_umount_item **nsui,
> +				  struct svc_rqst *rqstp)
>  {
>  	struct nfsd4_ssc_umount_item *ni = NULL;
>  	struct nfsd4_ssc_umount_item *work = NULL;
> @@ -1329,7 +1330,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
>  			spin_unlock(&nn->nfsd_ssc_lock);
>  
>  			/* allow 20secs for mount/unmount for now - revisit */
> -			if (kthread_should_stop() ||
> +			if (svc_thread_should_stop(rqstp) ||
>  					(schedule_timeout(20*HZ) == 0)) {
>  				finish_wait(&nn->nfsd_ssc_waitq, &wait);
>  				kfree(work);
> @@ -1445,7 +1446,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
>  		goto out_free_rawdata;
>  	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>  
> -	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
> +	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
>  	if (status)
>  		goto out_free_devname;
>  	if ((*nsui)->nsui_vfsmount)
> @@ -1620,6 +1621,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
>  	if (bytes_total == 0)
>  		bytes_total = ULLONG_MAX;
>  	do {
> +		/* Only async copies can be stopped here */
>  		if (kthread_should_stop())
>  			break;
>  		bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index b536b254c59e..9b282c89ea87 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -971,7 +971,7 @@ nfsd(void *vrqstp)
>  	/*
>  	 * The main request loop
>  	 */
> -	while (!kthread_should_stop()) {
> +	while (!svc_thread_should_stop(rqstp)) {
>  		/* Update sv_maxconn if it has changed */
>  		rqstp->rq_server->sv_maxconn = nn->max_connections;
>  
> diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> index 0f016d69c996..9f565416d186 100644
> --- a/include/linux/lockd/lockd.h
> +++ b/include/linux/lockd/lockd.h
> @@ -282,7 +282,7 @@ __be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
>  			struct nlm_host *, struct nlm_lock *,
>  			struct nlm_lock *, struct nlm_cookie *);
>  __be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
> -void		  nlmsvc_retry_blocked(void);
> +void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
>  void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
>  					nlm_host_match_fn_t match);
>  void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 15d468d852b5..ea3ce1315416 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -51,6 +51,8 @@ struct svc_pool {
>  enum {
>  	SP_TASK_PENDING,	/* still work to do even if no xprt is queued */
>  	SP_CONGESTED,		/* all threads are busy, none idle */
> +	SP_NEED_VICTIM,		/* One thread needs to agree to exit */
> +	SP_VICTIM_REMAINS,	/* One thread needs to actually exit */
>  };
>  
>  /*
> @@ -275,7 +277,7 @@ enum {
>  	RQ_DROPME,	/* drop current reply */
>  	RQ_SPLICE_OK,	/* turned off in gss privacy to prevent encrypting page
>  			 * cache pages */
> -	RQ_VICTIM,	/* about to be shut down */
> +	RQ_VICTIM,	/* Have agreed to shut down */
>  	RQ_BUSY,	/* request is busy */
>  	RQ_DATA,	/* request has data */
>  };
> @@ -315,6 +317,24 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
>  	return (struct sockaddr *) &rqst->rq_daddr;
>  }
>  
> +/**
> + * svc_thread_should_stop - check if this thread should stop
> + * @rqstp: the thread that might need to stop
> + *
> + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
> + * are set.  The firs thread which sees SP_NEED_VICTIM clear it becoming
> + * the victim using this function.  It should then promptly call
> + * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
> + * so the task waiting for a thread to exit can wake and continue.
> + */
> +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
> +{
> +	if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
> +		set_bit(RQ_VICTIM, &rqstp->rq_flags);
> +
> +	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
> +}
> +
>  struct svc_deferred_req {
>  	u32			prot;	/* protocol (UDP or TCP) */
>  	struct svc_xprt		*xprt;
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index 60c8e03268d4..c79375e37dc2 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -2041,8 +2041,9 @@ TRACE_EVENT(svc_xprt_enqueue,
>  #define show_svc_pool_flags(x)						\
>  	__print_flags(x, "|",						\
>  		{ BIT(SP_TASK_PENDING),		"TASK_PENDING" },	\
> -		{ BIT(SP_CONGESTED),		"CONGESTED" })
> -
> +		{ BIT(SP_CONGESTED),		"CONGESTED" },		\
> +		{ BIT(SP_NEED_VICTIM),		"NEED_VICTIM" },	\
> +		{ BIT(SP_VICTIM_REMAINS),	"VICTIM_REMAINS" })
>  DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
>  	TP_PROTO(
>  		const struct svc_rqst *rqstp
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 56b4a88776d5..b18175ef74ec 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -731,19 +731,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
>  	return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
>  }
>  
> -static struct task_struct *
> +static struct svc_pool *
>  svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
>  {
>  	unsigned int i;
> -	struct task_struct *task = NULL;
>  
>  	if (pool != NULL) {
>  		spin_lock_bh(&pool->sp_lock);
> +		if (pool->sp_nrthreads)
> +			goto found_pool;
> +		spin_unlock_bh(&pool->sp_lock);
> +		return NULL;
>  	} else {
>  		for (i = 0; i < serv->sv_nrpools; i++) {
>  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
>  			spin_lock_bh(&pool->sp_lock);
> -			if (!list_empty(&pool->sp_all_threads))
> +			if (pool->sp_nrthreads)
>  				goto found_pool;
>  			spin_unlock_bh(&pool->sp_lock);
>  		}
> @@ -751,16 +754,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
>  	}
>  
>  found_pool:
> -	if (!list_empty(&pool->sp_all_threads)) {
> -		struct svc_rqst *rqstp;
> -
> -		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
> -		set_bit(RQ_VICTIM, &rqstp->rq_flags);
> -		list_del_rcu(&rqstp->rq_all);
> -		task = rqstp->rq_task;
> -	}
> +	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> +	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
>  	spin_unlock_bh(&pool->sp_lock);
> -	return task;
> +	return pool;
>  }
>  
>  static int
> @@ -801,18 +798,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  static int
>  svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  {
> -	struct svc_rqst	*rqstp;
> -	struct task_struct *task;
>  	unsigned int state = serv->sv_nrthreads-1;
> +	struct svc_pool *vpool;
>  
>  	do {
> -		task = svc_pool_victim(serv, pool, &state);
> -		if (task == NULL)
> +		vpool = svc_pool_victim(serv, pool, &state);
> +		if (!vpool)
>  			break;
> -		rqstp = kthread_data(task);
> -		/* Did we lose a race to svo_function threadfn? */
> -		if (kthread_stop(task) == -EINTR)
> -			svc_exit_thread(rqstp);
> +		svc_pool_wake_idle_thread(serv, vpool);
> +		wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
> +			    TASK_IDLE);

I'm not sure about this bit. Previously (AFAICT) we didn't shut down the
threads serially. With this change, we will be. Granted we only have to
wait until SP_VICTIM_REMAINS is clear. Does that happen early enough in
the shutdown process that you aren't worried about this?

>  		nrservs++;
>  	} while (nrservs < 0);
>  	return 0;
> @@ -932,8 +927,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
>  
>  	spin_lock_bh(&pool->sp_lock);
>  	pool->sp_nrthreads--;
> -	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
> -		list_del_rcu(&rqstp->rq_all);
>  	spin_unlock_bh(&pool->sp_lock);
>  
>  	spin_lock_bh(&serv->sv_lock);
> @@ -941,6 +934,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
>  	spin_unlock_bh(&serv->sv_lock);
>  	svc_sock_update_bufs(serv);
>  
> +	if (svc_thread_should_stop(rqstp))
> +		clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
>  	svc_rqst_free(rqstp);
>  
>  	svc_put(serv);
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 4fdf1aaa514b..948605e7043b 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -595,8 +595,6 @@ void svc_wake_up(struct svc_serv *serv)
>  		smp_wmb();
>  		return;
>  	}
> -
> -	trace_svc_wake_up(rqstp);
>  }
>  EXPORT_SYMBOL_GPL(svc_wake_up);
>  
> @@ -688,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
>  			continue;
>  
>  		set_current_state(TASK_IDLE);
> -		if (kthread_should_stop()) {
> +		if (svc_thread_should_stop(rqstp)) {
>  			set_current_state(TASK_RUNNING);
>  			return false;
>  		}
> @@ -726,7 +724,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
>  		return false;
>  
>  	/* are we shutting down? */
> -	if (kthread_should_stop())
> +	if (svc_thread_should_stop(rqstp))
>  		return false;
>  
>  	/* are we freezing? */
> @@ -787,7 +785,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
>  	}
>  #endif
>  
> -	if (!kthread_should_stop())
> +	if (!svc_thread_should_stop(rqstp))
>  		percpu_counter_inc(&pool->sp_threads_no_work);
>  	return;
>  out_found:
> 
>
NeilBrown July 20, 2023, 9:32 p.m. UTC | #2
On Fri, 21 Jul 2023, Jeff Layton wrote:
> On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> > svc threads are currently stopped using kthread_stop().  This requires
> > identifying a specific thread.  However we don't care which thread
> > stops, just as long as one does.
> > 
> > So instead, set a flag in the svc_pool to say that a thread needs to
> > die, and have each thread check this flag instead of calling
> > kthread_should_stop().  The first to find it clear the flag and moves
> > towards shutting down.
> > 
> > This removes an explicit dependency on sp_all_threads which will make a
> > future patch simpler.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/lockd/svc.c                |    4 ++--
> >  fs/lockd/svclock.c            |    4 ++--
> >  fs/nfs/callback.c             |    2 +-
> >  fs/nfsd/nfs4proc.c            |    8 +++++---
> >  fs/nfsd/nfssvc.c              |    2 +-
> >  include/linux/lockd/lockd.h   |    2 +-
> >  include/linux/sunrpc/svc.h    |   22 +++++++++++++++++++++-
> >  include/trace/events/sunrpc.h |    5 +++--
> >  net/sunrpc/svc.c              |   39 +++++++++++++++++----------------------
> >  net/sunrpc/svc_xprt.c         |    8 +++-----
> >  10 files changed, 56 insertions(+), 40 deletions(-)
> > 
> > diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> > index 4f55cd42c2e6..30543edd2309 100644
> > --- a/fs/lockd/svc.c
> > +++ b/fs/lockd/svc.c
> > @@ -134,12 +134,12 @@ lockd(void *vrqstp)
> >  	 * The main request loop. We don't terminate until the last
> >  	 * NFS mount or NFS daemon has gone away.
> >  	 */
> > -	while (!kthread_should_stop()) {
> > +	while (!svc_thread_should_stop(rqstp)) {
> >  
> >  		/* update sv_maxconn if it has changed */
> >  		rqstp->rq_server->sv_maxconn = nlm_max_connections;
> >  
> > -		nlmsvc_retry_blocked();
> > +		nlmsvc_retry_blocked(rqstp);
> >  
> >  		/*
> >  		 * Find any work to do, such as a socket with data available,
> > diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
> > index 3d7bd5c04b36..fd399c9bea5c 100644
> > --- a/fs/lockd/svclock.c
> > +++ b/fs/lockd/svclock.c
> > @@ -1009,13 +1009,13 @@ retry_deferred_block(struct nlm_block *block)
> >   * be retransmitted.
> >   */
> >  void
> > -nlmsvc_retry_blocked(void)
> > +nlmsvc_retry_blocked(struct svc_rqst *rqstp)
> >  {
> >  	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
> >  	struct nlm_block *block;
> >  
> >  	spin_lock(&nlm_blocked_lock);
> > -	while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
> > +	while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
> >  		block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
> >  
> >  		if (block->b_when == NLM_NEVER)
> > diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> > index 660cec36c45c..c58ec2e66aaf 100644
> > --- a/fs/nfs/callback.c
> > +++ b/fs/nfs/callback.c
> > @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
> >  
> >  	set_freezable();
> >  
> > -	while (!kthread_should_stop()) {
> > +	while (!svc_thread_should_stop(rqstp)) {
> >  		/*
> >  		 * Listen for a request on the socket
> >  		 */
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 157488290676..66024ed06181 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -1306,7 +1306,8 @@ extern void nfs_sb_deactive(struct super_block *sb);
> >   * setup a work entry in the ssc delayed unmount list.
> >   */
> >  static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> > -				  struct nfsd4_ssc_umount_item **nsui)
> > +				  struct nfsd4_ssc_umount_item **nsui,
> > +				  struct svc_rqst *rqstp)
> >  {
> >  	struct nfsd4_ssc_umount_item *ni = NULL;
> >  	struct nfsd4_ssc_umount_item *work = NULL;
> > @@ -1329,7 +1330,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> >  			spin_unlock(&nn->nfsd_ssc_lock);
> >  
> >  			/* allow 20secs for mount/unmount for now - revisit */
> > -			if (kthread_should_stop() ||
> > +			if (svc_thread_should_stop(rqstp) ||
> >  					(schedule_timeout(20*HZ) == 0)) {
> >  				finish_wait(&nn->nfsd_ssc_waitq, &wait);
> >  				kfree(work);
> > @@ -1445,7 +1446,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
> >  		goto out_free_rawdata;
> >  	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
> >  
> > -	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
> > +	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
> >  	if (status)
> >  		goto out_free_devname;
> >  	if ((*nsui)->nsui_vfsmount)
> > @@ -1620,6 +1621,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
> >  	if (bytes_total == 0)
> >  		bytes_total = ULLONG_MAX;
> >  	do {
> > +		/* Only async copies can be stopped here */
> >  		if (kthread_should_stop())
> >  			break;
> >  		bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index b536b254c59e..9b282c89ea87 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -971,7 +971,7 @@ nfsd(void *vrqstp)
> >  	/*
> >  	 * The main request loop
> >  	 */
> > -	while (!kthread_should_stop()) {
> > +	while (!svc_thread_should_stop(rqstp)) {
> >  		/* Update sv_maxconn if it has changed */
> >  		rqstp->rq_server->sv_maxconn = nn->max_connections;
> >  
> > diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> > index 0f016d69c996..9f565416d186 100644
> > --- a/include/linux/lockd/lockd.h
> > +++ b/include/linux/lockd/lockd.h
> > @@ -282,7 +282,7 @@ __be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
> >  			struct nlm_host *, struct nlm_lock *,
> >  			struct nlm_lock *, struct nlm_cookie *);
> >  __be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
> > -void		  nlmsvc_retry_blocked(void);
> > +void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
> >  void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
> >  					nlm_host_match_fn_t match);
> >  void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 15d468d852b5..ea3ce1315416 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -51,6 +51,8 @@ struct svc_pool {
> >  enum {
> >  	SP_TASK_PENDING,	/* still work to do even if no xprt is queued */
> >  	SP_CONGESTED,		/* all threads are busy, none idle */
> > +	SP_NEED_VICTIM,		/* One thread needs to agree to exit */
> > +	SP_VICTIM_REMAINS,	/* One thread needs to actually exit */
> >  };
> >  
> >  /*
> > @@ -275,7 +277,7 @@ enum {
> >  	RQ_DROPME,	/* drop current reply */
> >  	RQ_SPLICE_OK,	/* turned off in gss privacy to prevent encrypting page
> >  			 * cache pages */
> > -	RQ_VICTIM,	/* about to be shut down */
> > +	RQ_VICTIM,	/* Have agreed to shut down */
> >  	RQ_BUSY,	/* request is busy */
> >  	RQ_DATA,	/* request has data */
> >  };
> > @@ -315,6 +317,24 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
> >  	return (struct sockaddr *) &rqst->rq_daddr;
> >  }
> >  
> > +/**
> > + * svc_thread_should_stop - check if this thread should stop
> > + * @rqstp: the thread that might need to stop
> > + *
> > + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
> > + * are set.  The firs thread which sees SP_NEED_VICTIM clear it becoming
> > + * the victim using this function.  It should then promptly call
> > + * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
> > + * so the task waiting for a thread to exit can wake and continue.
> > + */
> > +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
> > +{
> > +	if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
> > +		set_bit(RQ_VICTIM, &rqstp->rq_flags);
> > +
> > +	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
> > +}
> > +
> >  struct svc_deferred_req {
> >  	u32			prot;	/* protocol (UDP or TCP) */
> >  	struct svc_xprt		*xprt;
> > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> > index 60c8e03268d4..c79375e37dc2 100644
> > --- a/include/trace/events/sunrpc.h
> > +++ b/include/trace/events/sunrpc.h
> > @@ -2041,8 +2041,9 @@ TRACE_EVENT(svc_xprt_enqueue,
> >  #define show_svc_pool_flags(x)						\
> >  	__print_flags(x, "|",						\
> >  		{ BIT(SP_TASK_PENDING),		"TASK_PENDING" },	\
> > -		{ BIT(SP_CONGESTED),		"CONGESTED" })
> > -
> > +		{ BIT(SP_CONGESTED),		"CONGESTED" },		\
> > +		{ BIT(SP_NEED_VICTIM),		"NEED_VICTIM" },	\
> > +		{ BIT(SP_VICTIM_REMAINS),	"VICTIM_REMAINS" })
> >  DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
> >  	TP_PROTO(
> >  		const struct svc_rqst *rqstp
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index 56b4a88776d5..b18175ef74ec 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -731,19 +731,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> >  	return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
> >  }
> >  
> > -static struct task_struct *
> > +static struct svc_pool *
> >  svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> >  {
> >  	unsigned int i;
> > -	struct task_struct *task = NULL;
> >  
> >  	if (pool != NULL) {
> >  		spin_lock_bh(&pool->sp_lock);
> > +		if (pool->sp_nrthreads)
> > +			goto found_pool;
> > +		spin_unlock_bh(&pool->sp_lock);
> > +		return NULL;
> >  	} else {
> >  		for (i = 0; i < serv->sv_nrpools; i++) {
> >  			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> >  			spin_lock_bh(&pool->sp_lock);
> > -			if (!list_empty(&pool->sp_all_threads))
> > +			if (pool->sp_nrthreads)
> >  				goto found_pool;
> >  			spin_unlock_bh(&pool->sp_lock);
> >  		}
> > @@ -751,16 +754,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
> >  	}
> >  
> >  found_pool:
> > -	if (!list_empty(&pool->sp_all_threads)) {
> > -		struct svc_rqst *rqstp;
> > -
> > -		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
> > -		set_bit(RQ_VICTIM, &rqstp->rq_flags);
> > -		list_del_rcu(&rqstp->rq_all);
> > -		task = rqstp->rq_task;
> > -	}
> > +	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > +	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> >  	spin_unlock_bh(&pool->sp_lock);
> > -	return task;
> > +	return pool;
> >  }
> >  
> >  static int
> > @@ -801,18 +798,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> >  static int
> >  svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> >  {
> > -	struct svc_rqst	*rqstp;
> > -	struct task_struct *task;
> >  	unsigned int state = serv->sv_nrthreads-1;
> > +	struct svc_pool *vpool;
> >  
> >  	do {
> > -		task = svc_pool_victim(serv, pool, &state);
> > -		if (task == NULL)
> > +		vpool = svc_pool_victim(serv, pool, &state);
> > +		if (!vpool)
> >  			break;
> > -		rqstp = kthread_data(task);
> > -		/* Did we lose a race to svo_function threadfn? */
> > -		if (kthread_stop(task) == -EINTR)
> > -			svc_exit_thread(rqstp);
> > +		svc_pool_wake_idle_thread(serv, vpool);
> > +		wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
> > +			    TASK_IDLE);
> 
> I'm not sure about this bit. Previously (AFAICT) we didn't shut down the
> threads serially. With this change, we will be. Granted we only have to
> wait until SP_VICTIM_REMAINS is clear. Does that happen early enough in
> the shutdown process that you aren't worried about this?

If by "previously" you mean "before v5.17-rc1~75^2~42", then you are
correct.
When we used send_sig to stop nfsd threads we sent the signals in a
tight loop and didn't wait.

Since then we use kthread_stop() which is synchronous.  It sets a flag,
wakes the threads, and waits on a completion.

With this patch the clear_and_wake_bit() happens very slightly
*before* the complete() which we currently wait for rather late in
exit() handling (complete_vfork_done() I think).

Thanks for the review - here and elsewhere.

NeilBrown


> 
> >  		nrservs++;
> >  	} while (nrservs < 0);
> >  	return 0;
> > @@ -932,8 +927,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  
> >  	spin_lock_bh(&pool->sp_lock);
> >  	pool->sp_nrthreads--;
> > -	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
> > -		list_del_rcu(&rqstp->rq_all);
> >  	spin_unlock_bh(&pool->sp_lock);
> >  
> >  	spin_lock_bh(&serv->sv_lock);
> > @@ -941,6 +934,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >  	spin_unlock_bh(&serv->sv_lock);
> >  	svc_sock_update_bufs(serv);
> >  
> > +	if (svc_thread_should_stop(rqstp))
> > +		clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> >  	svc_rqst_free(rqstp);
> >  
> >  	svc_put(serv);
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index 4fdf1aaa514b..948605e7043b 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -595,8 +595,6 @@ void svc_wake_up(struct svc_serv *serv)
> >  		smp_wmb();
> >  		return;
> >  	}
> > -
> > -	trace_svc_wake_up(rqstp);
> >  }
> >  EXPORT_SYMBOL_GPL(svc_wake_up);
> >  
> > @@ -688,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
> >  			continue;
> >  
> >  		set_current_state(TASK_IDLE);
> > -		if (kthread_should_stop()) {
> > +		if (svc_thread_should_stop(rqstp)) {
> >  			set_current_state(TASK_RUNNING);
> >  			return false;
> >  		}
> > @@ -726,7 +724,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> >  		return false;
> >  
> >  	/* are we shutting down? */
> > -	if (kthread_should_stop())
> > +	if (svc_thread_should_stop(rqstp))
> >  		return false;
> >  
> >  	/* are we freezing? */
> > @@ -787,7 +785,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
> >  	}
> >  #endif
> >  
> > -	if (!kthread_should_stop())
> > +	if (!svc_thread_should_stop(rqstp))
> >  		percpu_counter_inc(&pool->sp_threads_no_work);
> >  	return;
> >  out_found:
> > 
> > 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
>
diff mbox series

Patch

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 4f55cd42c2e6..30543edd2309 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -134,12 +134,12 @@  lockd(void *vrqstp)
 	 * The main request loop. We don't terminate until the last
 	 * NFS mount or NFS daemon has gone away.
 	 */
-	while (!kthread_should_stop()) {
+	while (!svc_thread_should_stop(rqstp)) {
 
 		/* update sv_maxconn if it has changed */
 		rqstp->rq_server->sv_maxconn = nlm_max_connections;
 
-		nlmsvc_retry_blocked();
+		nlmsvc_retry_blocked(rqstp);
 
 		/*
 		 * Find any work to do, such as a socket with data available,
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3d7bd5c04b36..fd399c9bea5c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -1009,13 +1009,13 @@  retry_deferred_block(struct nlm_block *block)
  * be retransmitted.
  */
 void
-nlmsvc_retry_blocked(void)
+nlmsvc_retry_blocked(struct svc_rqst *rqstp)
 {
 	unsigned long	timeout = MAX_SCHEDULE_TIMEOUT;
 	struct nlm_block *block;
 
 	spin_lock(&nlm_blocked_lock);
-	while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
+	while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
 		block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
 
 		if (block->b_when == NLM_NEVER)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 660cec36c45c..c58ec2e66aaf 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -78,7 +78,7 @@  nfs4_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	while (!kthread_should_stop()) {
+	while (!svc_thread_should_stop(rqstp)) {
 		/*
 		 * Listen for a request on the socket
 		 */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 157488290676..66024ed06181 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1306,7 +1306,8 @@  extern void nfs_sb_deactive(struct super_block *sb);
  * setup a work entry in the ssc delayed unmount list.
  */
 static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
-				  struct nfsd4_ssc_umount_item **nsui)
+				  struct nfsd4_ssc_umount_item **nsui,
+				  struct svc_rqst *rqstp)
 {
 	struct nfsd4_ssc_umount_item *ni = NULL;
 	struct nfsd4_ssc_umount_item *work = NULL;
@@ -1329,7 +1330,7 @@  static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
 			spin_unlock(&nn->nfsd_ssc_lock);
 
 			/* allow 20secs for mount/unmount for now - revisit */
-			if (kthread_should_stop() ||
+			if (svc_thread_should_stop(rqstp) ||
 					(schedule_timeout(20*HZ) == 0)) {
 				finish_wait(&nn->nfsd_ssc_waitq, &wait);
 				kfree(work);
@@ -1445,7 +1446,7 @@  nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 		goto out_free_rawdata;
 	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
 
-	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
+	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
 	if (status)
 		goto out_free_devname;
 	if ((*nsui)->nsui_vfsmount)
@@ -1620,6 +1621,7 @@  static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
 	if (bytes_total == 0)
 		bytes_total = ULLONG_MAX;
 	do {
+		/* Only async copies can be stopped here */
 		if (kthread_should_stop())
 			break;
 		bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b536b254c59e..9b282c89ea87 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -971,7 +971,7 @@  nfsd(void *vrqstp)
 	/*
 	 * The main request loop
 	 */
-	while (!kthread_should_stop()) {
+	while (!svc_thread_should_stop(rqstp)) {
 		/* Update sv_maxconn if it has changed */
 		rqstp->rq_server->sv_maxconn = nn->max_connections;
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0f016d69c996..9f565416d186 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -282,7 +282,7 @@  __be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
 			struct nlm_host *, struct nlm_lock *,
 			struct nlm_lock *, struct nlm_cookie *);
 __be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-void		  nlmsvc_retry_blocked(void);
+void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					nlm_host_match_fn_t match);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 15d468d852b5..ea3ce1315416 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -51,6 +51,8 @@  struct svc_pool {
 enum {
 	SP_TASK_PENDING,	/* still work to do even if no xprt is queued */
 	SP_CONGESTED,		/* all threads are busy, none idle */
+	SP_NEED_VICTIM,		/* One thread needs to agree to exit */
+	SP_VICTIM_REMAINS,	/* One thread needs to actually exit */
 };
 
 /*
@@ -275,7 +277,7 @@  enum {
 	RQ_DROPME,	/* drop current reply */
 	RQ_SPLICE_OK,	/* turned off in gss privacy to prevent encrypting page
 			 * cache pages */
-	RQ_VICTIM,	/* about to be shut down */
+	RQ_VICTIM,	/* Have agreed to shut down */
 	RQ_BUSY,	/* request is busy */
 	RQ_DATA,	/* request has data */
 };
@@ -315,6 +317,24 @@  static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
 	return (struct sockaddr *) &rqst->rq_daddr;
 }
 
+/**
+ * svc_thread_should_stop - check if this thread should stop
+ * @rqstp: the thread that might need to stop
+ *
+ * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
+ * are set.  The firs thread which sees SP_NEED_VICTIM clear it becoming
+ * the victim using this function.  It should then promptly call
+ * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
+ * so the task waiting for a thread to exit can wake and continue.
+ */
+static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
+{
+	if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
+		set_bit(RQ_VICTIM, &rqstp->rq_flags);
+
+	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
+}
+
 struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
 	struct svc_xprt		*xprt;
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 60c8e03268d4..c79375e37dc2 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -2041,8 +2041,9 @@  TRACE_EVENT(svc_xprt_enqueue,
 #define show_svc_pool_flags(x)						\
 	__print_flags(x, "|",						\
 		{ BIT(SP_TASK_PENDING),		"TASK_PENDING" },	\
-		{ BIT(SP_CONGESTED),		"CONGESTED" })
-
+		{ BIT(SP_CONGESTED),		"CONGESTED" },		\
+		{ BIT(SP_NEED_VICTIM),		"NEED_VICTIM" },	\
+		{ BIT(SP_VICTIM_REMAINS),	"VICTIM_REMAINS" })
 DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
 	TP_PROTO(
 		const struct svc_rqst *rqstp
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 56b4a88776d5..b18175ef74ec 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -731,19 +731,22 @@  svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 	return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
 }
 
-static struct task_struct *
+static struct svc_pool *
 svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 {
 	unsigned int i;
-	struct task_struct *task = NULL;
 
 	if (pool != NULL) {
 		spin_lock_bh(&pool->sp_lock);
+		if (pool->sp_nrthreads)
+			goto found_pool;
+		spin_unlock_bh(&pool->sp_lock);
+		return NULL;
 	} else {
 		for (i = 0; i < serv->sv_nrpools; i++) {
 			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
 			spin_lock_bh(&pool->sp_lock);
-			if (!list_empty(&pool->sp_all_threads))
+			if (pool->sp_nrthreads)
 				goto found_pool;
 			spin_unlock_bh(&pool->sp_lock);
 		}
@@ -751,16 +754,10 @@  svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
 	}
 
 found_pool:
-	if (!list_empty(&pool->sp_all_threads)) {
-		struct svc_rqst *rqstp;
-
-		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
-		set_bit(RQ_VICTIM, &rqstp->rq_flags);
-		list_del_rcu(&rqstp->rq_all);
-		task = rqstp->rq_task;
-	}
+	set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+	set_bit(SP_NEED_VICTIM, &pool->sp_flags);
 	spin_unlock_bh(&pool->sp_lock);
-	return task;
+	return pool;
 }
 
 static int
@@ -801,18 +798,16 @@  svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 static int
 svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
-	struct svc_rqst	*rqstp;
-	struct task_struct *task;
 	unsigned int state = serv->sv_nrthreads-1;
+	struct svc_pool *vpool;
 
 	do {
-		task = svc_pool_victim(serv, pool, &state);
-		if (task == NULL)
+		vpool = svc_pool_victim(serv, pool, &state);
+		if (!vpool)
 			break;
-		rqstp = kthread_data(task);
-		/* Did we lose a race to svo_function threadfn? */
-		if (kthread_stop(task) == -EINTR)
-			svc_exit_thread(rqstp);
+		svc_pool_wake_idle_thread(serv, vpool);
+		wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
+			    TASK_IDLE);
 		nrservs++;
 	} while (nrservs < 0);
 	return 0;
@@ -932,8 +927,6 @@  svc_exit_thread(struct svc_rqst *rqstp)
 
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads--;
-	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
-		list_del_rcu(&rqstp->rq_all);
 	spin_unlock_bh(&pool->sp_lock);
 
 	spin_lock_bh(&serv->sv_lock);
@@ -941,6 +934,8 @@  svc_exit_thread(struct svc_rqst *rqstp)
 	spin_unlock_bh(&serv->sv_lock);
 	svc_sock_update_bufs(serv);
 
+	if (svc_thread_should_stop(rqstp))
+		clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
 	svc_rqst_free(rqstp);
 
 	svc_put(serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4fdf1aaa514b..948605e7043b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -595,8 +595,6 @@  void svc_wake_up(struct svc_serv *serv)
 		smp_wmb();
 		return;
 	}
-
-	trace_svc_wake_up(rqstp);
 }
 EXPORT_SYMBOL_GPL(svc_wake_up);
 
@@ -688,7 +686,7 @@  static bool svc_alloc_arg(struct svc_rqst *rqstp)
 			continue;
 
 		set_current_state(TASK_IDLE);
-		if (kthread_should_stop()) {
+		if (svc_thread_should_stop(rqstp)) {
 			set_current_state(TASK_RUNNING);
 			return false;
 		}
@@ -726,7 +724,7 @@  rqst_should_sleep(struct svc_rqst *rqstp)
 		return false;
 
 	/* are we shutting down? */
-	if (kthread_should_stop())
+	if (svc_thread_should_stop(rqstp))
 		return false;
 
 	/* are we freezing? */
@@ -787,7 +785,7 @@  static void svc_wait_for_work(struct svc_rqst *rqstp)
 	}
 #endif
 
-	if (!kthread_should_stop())
+	if (!svc_thread_should_stop(rqstp))
 		percpu_counter_inc(&pool->sp_threads_no_work);
 	return;
 out_found: