diff mbox series

[2/2] sunrpc: allow svc threads to fail initialisation cleanly

Message ID 20240729212217.30747-3-neilb@suse.de (mailing list archive)
State New
Headers show
Series nfsd: fix handling of error from unshare_fs_struct() | expand

Commit Message

NeilBrown July 29, 2024, 9:19 p.m. UTC
If an svc thread needs to perform some initialisation that might fail,
it has no good way to handle the failure.

Before the thread can exit it must call svc_exit_thread(), but that
requires the service mutex to be held.  The thread cannot simply take
the mutex as that could deadlock if there is a concurrent attempt to
shut down all threads (which is unlikely, but not impossible).

nfsd currently call svc_exit_thread() unprotected in the unlikely event
that unshare_fs_struct() fails.

We can clean this up by introducing svc_thread_init_status() by which an
svc thread can report whether initialisation has succeeded.  If it has,
it continues normally into the action loop.  If it has not,
svc_thread_init_status() immediately aborts the thread.
svc_start_kthread() waits for either of these to happen, and calls
svc_exit_thread() (under the mutex) if the thread aborted.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/lockd/svc.c             |  2 ++
 fs/nfs/callback.c          |  2 ++
 fs/nfsd/nfssvc.c           |  9 +++------
 include/linux/sunrpc/svc.h | 28 ++++++++++++++++++++++++++++
 net/sunrpc/svc.c           | 11 +++++++++++
 5 files changed, 46 insertions(+), 6 deletions(-)

Comments

Jeff Layton Sept. 11, 2024, 6:33 p.m. UTC | #1
On Tue, 2024-07-30 at 07:19 +1000, NeilBrown wrote:
> If an svc thread needs to perform some initialisation that might fail,
> it has no good way to handle the failure.
> 
> Before the thread can exit it must call svc_exit_thread(), but that
> requires the service mutex to be held.  The thread cannot simply take
> the mutex as that could deadlock if there is a concurrent attempt to
> shut down all threads (which is unlikely, but not impossible).
> 
> nfsd currently call svc_exit_thread() unprotected in the unlikely event
> that unshare_fs_struct() fails.
> 
> We can clean this up by introducing svc_thread_init_status() by which an
> svc thread can report whether initialisation has succeeded.  If it has,
> it continues normally into the action loop.  If it has not,
> svc_thread_init_status() immediately aborts the thread.
> svc_start_kthread() waits for either of these to happen, and calls
> svc_exit_thread() (under the mutex) if the thread aborted.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/lockd/svc.c             |  2 ++
>  fs/nfs/callback.c          |  2 ++
>  fs/nfsd/nfssvc.c           |  9 +++------
>  include/linux/sunrpc/svc.h | 28 ++++++++++++++++++++++++++++
>  net/sunrpc/svc.c           | 11 +++++++++++
>  5 files changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index 71713309967d..4ec22c2f2ea3 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -124,6 +124,8 @@ lockd(void *vrqstp)
>  	struct net *net = &init_net;
>  	struct lockd_net *ln = net_generic(net, lockd_net_id);
>  
> +	svc_thread_init_status(rqstp, 0);
> +
>  	/* try_to_freeze() is called from svc_recv() */
>  	set_freezable();
>  
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 8adfcd4c8c1a..6cf92498a5ac 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -76,6 +76,8 @@ nfs4_callback_svc(void *vrqstp)
>  {
>  	struct svc_rqst *rqstp = vrqstp;
>  
> +	svc_thread_init_status(rqstp, 0);
> +
>  	set_freezable();
>  
>  	while (!svc_thread_should_stop(rqstp))
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index fca340b3ee91..1cef09a3c78e 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -875,11 +875,9 @@ nfsd(void *vrqstp)
>  
>  	/* At this point, the thread shares current->fs
>  	 * with the init process. We need to create files with the
> -	 * umask as defined by the client instead of init's umask. */
> -	if (unshare_fs_struct() < 0) {
> -		printk("Unable to start nfsd thread: out of memory\n");
> -		goto out;
> -	}
> +	 * umask as defined by the client instead of init's umask.
> +	 */
> +	svc_thread_init_status(rqstp, unshare_fs_struct());
>  
>  	current->fs->umask = 0;
>  
> @@ -901,7 +899,6 @@ nfsd(void *vrqstp)
>  
>  	atomic_dec(&nfsd_th_cnt);
>  
> -out:
>  	/* Release the thread */
>  	svc_exit_thread(rqstp);
>  	return 0;
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 99e9345d829e..437672bcaa22 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -21,6 +21,7 @@
>  #include <linux/wait.h>
>  #include <linux/mm.h>
>  #include <linux/pagevec.h>
> +#include <linux/kthread.h>
>  
>  /*
>   *
> @@ -232,6 +233,11 @@ struct svc_rqst {
>  	struct net		*rq_bc_net;	/* pointer to backchannel's
>  						 * net namespace
>  						 */
> +
> +	int			rq_err;		/* Thread sets this to inidicate
> +						 * initialisation success.
> +						 */
> +
>  	unsigned long	bc_to_initval;
>  	unsigned int	bc_to_retries;
>  	void **			rq_lease_breaker; /* The v4 client breaking a lease */
> @@ -305,6 +311,28 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
>  	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
>  }
>  
> +/**
> + * svc_thread_init_status - report whether thread has initialised successfully
> + * @rqstp: the thread in question
> + * @err: errno code
> + *
> + * After performing any initialisation that could fail, and before starting
> + * normal work, each sunrpc svc_thread must call svc_thread_init_status()
> + * with an appropriate error, or zero.
> + *
> + * If zero is passed, the thread is ready and must continue until
> + * svc_thread_should_stop() returns true.  If a non-zero error is passed
> + * the call will not return - the thread will exit.
> + */
> +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err)
> +{
> +	/* store_release ensures svc_start_kthreads() sees the error */
> +	smp_store_release(&rqstp->rq_err, err);
> +	wake_up_var(&rqstp->rq_err);
> +	if (err)
> +		kthread_exit(1);
> +}
> +
>  struct svc_deferred_req {
>  	u32			prot;	/* protocol (UDP or TCP) */
>  	struct svc_xprt		*xprt;
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index ae31ffea7a14..1e80fa67d8b7 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -706,6 +706,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>  	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>  		goto out_enomem;
>  
> +	rqstp->rq_err = -EAGAIN; /* No error yet */
> +
>  	serv->sv_nrthreads += 1;
>  	pool->sp_nrthreads += 1;
>  
> @@ -792,6 +794,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  	struct svc_pool *chosen_pool;
>  	unsigned int state = serv->sv_nrthreads-1;
>  	int node;
> +	int err;
>  
>  	do {
>  		nrservs--;
> @@ -814,6 +817,14 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  
>  		svc_sock_update_bufs(serv);
>  		wake_up_process(task);
> +
> +		/* load_acquire ensures we get value stored in svc_thread_init_status() */
> +		wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
> +		err = rqstp->rq_err;
> +		if (err) {
> +			svc_exit_thread(rqstp);
> +			return err;
> +		}
>  	} while (nrservs > 0);
>  
>  	return 0;


I hit a hang today on the client while running the nfs-interop test
under kdevops. The client is stuck in mount syscall, while trying to
set up the backchannel:

[ 1693.653257] INFO: task mount.nfs:13243 blocked for more than 120 seconds.
[ 1693.655827]       Not tainted 6.11.0-rc7-gffcadb41b696 #166
[ 1693.657858] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1693.661442] task:mount.nfs       state:D stack:0     pid:13243 tgid:13243 ppid:13242  flags:0x00004002
[ 1693.664648] Call Trace:
[ 1693.665639]  <TASK>
[ 1693.666482]  __schedule+0xc04/0x2750
[ 1693.668021]  ? __pfx___schedule+0x10/0x10
[ 1693.669562]  ? _raw_spin_lock_irqsave+0x98/0xf0
[ 1693.671213]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[ 1693.673109]  ? try_to_wake_up+0x141/0x1210
[ 1693.674763]  ? __pfx_try_to_wake_up+0x10/0x10
[ 1693.676612]  ? _raw_spin_unlock_irqrestore+0x23/0x40
[ 1693.678429]  schedule+0x73/0x2f0
[ 1693.679662]  svc_set_num_threads+0xbc8/0xf80 [sunrpc]
[ 1693.682007]  ? __pfx_svc_set_num_threads+0x10/0x10 [sunrpc]
[ 1693.684163]  ? __pfx_var_wake_function+0x10/0x10
[ 1693.685849]  ? __svc_create+0x6c0/0x980 [sunrpc]
[ 1693.687777]  nfs_callback_up+0x314/0xae0 [nfsv4]
[ 1693.689630]  nfs4_init_client+0x203/0x400 [nfsv4]
[ 1693.691468]  ? __pfx_nfs4_init_client+0x10/0x10 [nfsv4]
[ 1693.693502]  ? _raw_spin_lock_irqsave+0x98/0xf0
[ 1693.695141]  nfs4_set_client+0x2f4/0x520 [nfsv4]
[ 1693.696967]  ? __pfx_nfs4_set_client+0x10/0x10 [nfsv4]
[ 1693.699230]  nfs4_create_server+0x5f2/0xef0 [nfsv4]
[ 1693.701357]  ? _raw_spin_lock+0x85/0xe0
[ 1693.702758]  ? __pfx__raw_spin_lock+0x10/0x10
[ 1693.704344]  ? nfs_get_tree+0x61f/0x16a0 [nfs]
[ 1693.706160]  ? __pfx_nfs4_create_server+0x10/0x10 [nfsv4]
[ 1693.707376]  ? __module_get+0x26/0xc0
[ 1693.708061]  nfs4_try_get_tree+0xcd/0x250 [nfsv4]
[ 1693.708893]  vfs_get_tree+0x83/0x2d0
[ 1693.709534]  path_mount+0x88d/0x19a0
[ 1693.710100]  ? __pfx_path_mount+0x10/0x10
[ 1693.710718]  ? user_path_at+0xa4/0xe0
[ 1693.711303]  ? kmem_cache_free+0x143/0x3e0
[ 1693.711936]  __x64_sys_mount+0x1fb/0x270
[ 1693.712606]  ? __pfx___x64_sys_mount+0x10/0x10
[ 1693.713315]  do_syscall_64+0x4b/0x110
[ 1693.713889]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 1693.714660] RIP: 0033:0x7f05050418fe
[ 1693.715233] RSP: 002b:00007fff4e0f5728 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
[ 1693.716417] RAX: ffffffffffffffda RBX: 00007fff4e0f5900 RCX: 00007f05050418fe
[ 1693.717492] RDX: 00005559cea3ea70 RSI: 00005559cea3fe60 RDI: 00005559cea40140
[ 1693.718558] RBP: 00007fff4e0f5760 R08: 00005559cea41e60 R09: 00007f050510cb20
[ 1693.719620] R10: 0000000000000000 R11: 0000000000000246 R12: 00005559cea41e60
[ 1693.720735] R13: 00007fff4e0f5900 R14: 00005559cea41df0 R15: 0000000000000004


Looking at faddr2line:

$ ./scripts/faddr2line --list net/sunrpc/sunrpc.ko svc_set_num_threads+0xbc8/0xf80
svc_set_num_threads+0xbc8/0xf80:

svc_start_kthreads at /home/jlayton/git/kdevops/linux/net/sunrpc/svc.c:822 (discriminator 17)
 817 
 818                    svc_sock_update_bufs(serv);
 819                    wake_up_process(task);
 820 
 821                    /* load_acquire ensures we get value stored in svc_thread_init_status() */
>822<                   wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
 823                    err = rqstp->rq_err;
 824                    if (err) {
 825                            svc_exit_thread(rqstp);
 826                            return err;
 827                    }

(inlined by) svc_set_num_threads at /home/jlayton/git/kdevops/linux/net/sunrpc/svc.c:877 (discriminator 17)
 872                    nrservs -= serv->sv_nrthreads;
 873            else
 874                    nrservs -= pool->sp_nrthreads;
 875 
 876            if (nrservs > 0)
>877<                   return svc_start_kthreads(serv, pool, nrservs);
 878            if (nrservs < 0)
 879                    return svc_stop_kthreads(serv, pool, nrservs);
 880            return 0;
 881    }
 882    EXPORT_SYMBOL_GPL(svc_set_num_threads);

It looks like the callback thread started up properly and is past the svc_thread_init_status call.

$ sudo cat /proc/13246/stack
[<0>] svc_recv+0xcef/0x2020 [sunrpc]
[<0>] nfs4_callback_svc+0xb0/0x140 [nfsv4]
[<0>] kthread+0x29b/0x360
[<0>] ret_from_fork+0x30/0x70
[<0>] ret_from_fork_asm+0x1a/0x30

...so the status should have been updated properly. Barrier problem?
Chuck Lever III Sept. 13, 2024, 4:14 p.m. UTC | #2
> On Sep 11, 2024, at 2:33 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Tue, 2024-07-30 at 07:19 +1000, NeilBrown wrote:
>> If an svc thread needs to perform some initialisation that might fail,
>> it has no good way to handle the failure.
>> 
>> Before the thread can exit it must call svc_exit_thread(), but that
>> requires the service mutex to be held.  The thread cannot simply take
>> the mutex as that could deadlock if there is a concurrent attempt to
>> shut down all threads (which is unlikely, but not impossible).
>> 
>> nfsd currently call svc_exit_thread() unprotected in the unlikely event
>> that unshare_fs_struct() fails.
>> 
>> We can clean this up by introducing svc_thread_init_status() by which an
>> svc thread can report whether initialisation has succeeded.  If it has,
>> it continues normally into the action loop.  If it has not,
>> svc_thread_init_status() immediately aborts the thread.
>> svc_start_kthread() waits for either of these to happen, and calls
>> svc_exit_thread() (under the mutex) if the thread aborted.
>> 
>> Signed-off-by: NeilBrown <neilb@suse.de>
>> ---
>> fs/lockd/svc.c             |  2 ++
>> fs/nfs/callback.c          |  2 ++
>> fs/nfsd/nfssvc.c           |  9 +++------
>> include/linux/sunrpc/svc.h | 28 ++++++++++++++++++++++++++++
>> net/sunrpc/svc.c           | 11 +++++++++++
>> 5 files changed, 46 insertions(+), 6 deletions(-)
>> 
>> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
>> index 71713309967d..4ec22c2f2ea3 100644
>> --- a/fs/lockd/svc.c
>> +++ b/fs/lockd/svc.c
>> @@ -124,6 +124,8 @@ lockd(void *vrqstp)
>> struct net *net = &init_net;
>> struct lockd_net *ln = net_generic(net, lockd_net_id);
>> 
>> + svc_thread_init_status(rqstp, 0);
>> +
>> /* try_to_freeze() is called from svc_recv() */
>> set_freezable();
>> 
>> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
>> index 8adfcd4c8c1a..6cf92498a5ac 100644
>> --- a/fs/nfs/callback.c
>> +++ b/fs/nfs/callback.c
>> @@ -76,6 +76,8 @@ nfs4_callback_svc(void *vrqstp)
>> {
>> struct svc_rqst *rqstp = vrqstp;
>> 
>> + svc_thread_init_status(rqstp, 0);
>> +
>> set_freezable();
>> 
>> while (!svc_thread_should_stop(rqstp))
>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>> index fca340b3ee91..1cef09a3c78e 100644
>> --- a/fs/nfsd/nfssvc.c
>> +++ b/fs/nfsd/nfssvc.c
>> @@ -875,11 +875,9 @@ nfsd(void *vrqstp)
>> 
>> /* At this point, the thread shares current->fs
>>  * with the init process. We need to create files with the
>> -  * umask as defined by the client instead of init's umask. */
>> - if (unshare_fs_struct() < 0) {
>> - printk("Unable to start nfsd thread: out of memory\n");
>> - goto out;
>> - }
>> +  * umask as defined by the client instead of init's umask.
>> +  */
>> + svc_thread_init_status(rqstp, unshare_fs_struct());
>> 
>> current->fs->umask = 0;
>> 
>> @@ -901,7 +899,6 @@ nfsd(void *vrqstp)
>> 
>> atomic_dec(&nfsd_th_cnt);
>> 
>> -out:
>> /* Release the thread */
>> svc_exit_thread(rqstp);
>> return 0;
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index 99e9345d829e..437672bcaa22 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -21,6 +21,7 @@
>> #include <linux/wait.h>
>> #include <linux/mm.h>
>> #include <linux/pagevec.h>
>> +#include <linux/kthread.h>
>> 
>> /*
>>  *
>> @@ -232,6 +233,11 @@ struct svc_rqst {
>> struct net *rq_bc_net; /* pointer to backchannel's
>>  * net namespace
>>  */
>> +
>> + int rq_err; /* Thread sets this to inidicate
>> +  * initialisation success.
>> +  */
>> +
>> unsigned long bc_to_initval;
>> unsigned int bc_to_retries;
>> void ** rq_lease_breaker; /* The v4 client breaking a lease */
>> @@ -305,6 +311,28 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
>> return test_bit(RQ_VICTIM, &rqstp->rq_flags);
>> }
>> 
>> +/**
>> + * svc_thread_init_status - report whether thread has initialised successfully
>> + * @rqstp: the thread in question
>> + * @err: errno code
>> + *
>> + * After performing any initialisation that could fail, and before starting
>> + * normal work, each sunrpc svc_thread must call svc_thread_init_status()
>> + * with an appropriate error, or zero.
>> + *
>> + * If zero is passed, the thread is ready and must continue until
>> + * svc_thread_should_stop() returns true.  If a non-zero error is passed
>> + * the call will not return - the thread will exit.
>> + */
>> +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err)
>> +{
>> + /* store_release ensures svc_start_kthreads() sees the error */
>> + smp_store_release(&rqstp->rq_err, err);
>> + wake_up_var(&rqstp->rq_err);
>> + if (err)
>> + kthread_exit(1);
>> +}
>> +
>> struct svc_deferred_req {
>> u32 prot; /* protocol (UDP or TCP) */
>> struct svc_xprt *xprt;
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index ae31ffea7a14..1e80fa67d8b7 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -706,6 +706,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
>> if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
>> goto out_enomem;
>> 
>> + rqstp->rq_err = -EAGAIN; /* No error yet */
>> +
>> serv->sv_nrthreads += 1;
>> pool->sp_nrthreads += 1;
>> 
>> @@ -792,6 +794,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>> struct svc_pool *chosen_pool;
>> unsigned int state = serv->sv_nrthreads-1;
>> int node;
>> + int err;
>> 
>> do {
>> nrservs--;
>> @@ -814,6 +817,14 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>> 
>> svc_sock_update_bufs(serv);
>> wake_up_process(task);
>> +
>> + /* load_acquire ensures we get value stored in svc_thread_init_status() */
>> + wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
>> + err = rqstp->rq_err;
>> + if (err) {
>> + svc_exit_thread(rqstp);
>> + return err;
>> + }
>> } while (nrservs > 0);
>> 
>> return 0;
> 
> 
> I hit a hang today on the client while running the nfs-interop test
> under kdevops. The client is stuck in mount syscall, while trying to
> set up the backchannel:
> 
> [ 1693.653257] INFO: task mount.nfs:13243 blocked for more than 120 seconds.
> [ 1693.655827]       Not tainted 6.11.0-rc7-gffcadb41b696 #166
> [ 1693.657858] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [ 1693.661442] task:mount.nfs       state:D stack:0     pid:13243 tgid:13243 ppid:13242  flags:0x00004002
> [ 1693.664648] Call Trace:
> [ 1693.665639]  <TASK>
> [ 1693.666482]  __schedule+0xc04/0x2750
> [ 1693.668021]  ? __pfx___schedule+0x10/0x10
> [ 1693.669562]  ? _raw_spin_lock_irqsave+0x98/0xf0
> [ 1693.671213]  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
> [ 1693.673109]  ? try_to_wake_up+0x141/0x1210
> [ 1693.674763]  ? __pfx_try_to_wake_up+0x10/0x10
> [ 1693.676612]  ? _raw_spin_unlock_irqrestore+0x23/0x40
> [ 1693.678429]  schedule+0x73/0x2f0
> [ 1693.679662]  svc_set_num_threads+0xbc8/0xf80 [sunrpc]
> [ 1693.682007]  ? __pfx_svc_set_num_threads+0x10/0x10 [sunrpc]
> [ 1693.684163]  ? __pfx_var_wake_function+0x10/0x10
> [ 1693.685849]  ? __svc_create+0x6c0/0x980 [sunrpc]
> [ 1693.687777]  nfs_callback_up+0x314/0xae0 [nfsv4]
> [ 1693.689630]  nfs4_init_client+0x203/0x400 [nfsv4]
> [ 1693.691468]  ? __pfx_nfs4_init_client+0x10/0x10 [nfsv4]
> [ 1693.693502]  ? _raw_spin_lock_irqsave+0x98/0xf0
> [ 1693.695141]  nfs4_set_client+0x2f4/0x520 [nfsv4]
> [ 1693.696967]  ? __pfx_nfs4_set_client+0x10/0x10 [nfsv4]
> [ 1693.699230]  nfs4_create_server+0x5f2/0xef0 [nfsv4]
> [ 1693.701357]  ? _raw_spin_lock+0x85/0xe0
> [ 1693.702758]  ? __pfx__raw_spin_lock+0x10/0x10
> [ 1693.704344]  ? nfs_get_tree+0x61f/0x16a0 [nfs]
> [ 1693.706160]  ? __pfx_nfs4_create_server+0x10/0x10 [nfsv4]
> [ 1693.707376]  ? __module_get+0x26/0xc0
> [ 1693.708061]  nfs4_try_get_tree+0xcd/0x250 [nfsv4]
> [ 1693.708893]  vfs_get_tree+0x83/0x2d0
> [ 1693.709534]  path_mount+0x88d/0x19a0
> [ 1693.710100]  ? __pfx_path_mount+0x10/0x10
> [ 1693.710718]  ? user_path_at+0xa4/0xe0
> [ 1693.711303]  ? kmem_cache_free+0x143/0x3e0
> [ 1693.711936]  __x64_sys_mount+0x1fb/0x270
> [ 1693.712606]  ? __pfx___x64_sys_mount+0x10/0x10
> [ 1693.713315]  do_syscall_64+0x4b/0x110
> [ 1693.713889]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [ 1693.714660] RIP: 0033:0x7f05050418fe
> [ 1693.715233] RSP: 002b:00007fff4e0f5728 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
> [ 1693.716417] RAX: ffffffffffffffda RBX: 00007fff4e0f5900 RCX: 00007f05050418fe
> [ 1693.717492] RDX: 00005559cea3ea70 RSI: 00005559cea3fe60 RDI: 00005559cea40140
> [ 1693.718558] RBP: 00007fff4e0f5760 R08: 00005559cea41e60 R09: 00007f050510cb20
> [ 1693.719620] R10: 0000000000000000 R11: 0000000000000246 R12: 00005559cea41e60
> [ 1693.720735] R13: 00007fff4e0f5900 R14: 00005559cea41df0 R15: 0000000000000004
> 
> 
> Looking at faddr2line:
> 
> $ ./scripts/faddr2line --list net/sunrpc/sunrpc.ko svc_set_num_threads+0xbc8/0xf80
> svc_set_num_threads+0xbc8/0xf80:
> 
> svc_start_kthreads at /home/jlayton/git/kdevops/linux/net/sunrpc/svc.c:822 (discriminator 17)
> 817 
> 818                    svc_sock_update_bufs(serv);
> 819                    wake_up_process(task);
> 820 
> 821                    /* load_acquire ensures we get value stored in svc_thread_init_status() */
>> 822<                   wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
> 823                    err = rqstp->rq_err;
> 824                    if (err) {
> 825                            svc_exit_thread(rqstp);
> 826                            return err;
> 827                    }
> 
> (inlined by) svc_set_num_threads at /home/jlayton/git/kdevops/linux/net/sunrpc/svc.c:877 (discriminator 17)
> 872                    nrservs -= serv->sv_nrthreads;
> 873            else
> 874                    nrservs -= pool->sp_nrthreads;
> 875 
> 876            if (nrservs > 0)
>> 877<                   return svc_start_kthreads(serv, pool, nrservs);
> 878            if (nrservs < 0)
> 879                    return svc_stop_kthreads(serv, pool, nrservs);
> 880            return 0;
> 881    }
> 882    EXPORT_SYMBOL_GPL(svc_set_num_threads);
> 
> It looks like the callback thread started up properly and is past the svc_thread_init_status call.
> 
> $ sudo cat /proc/13246/stack
> [<0>] svc_recv+0xcef/0x2020 [sunrpc]
> [<0>] nfs4_callback_svc+0xb0/0x140 [nfsv4]
> [<0>] kthread+0x29b/0x360
> [<0>] ret_from_fork+0x30/0x70
> [<0>] ret_from_fork_asm+0x1a/0x30
> 
> ...so the status should have been updated properly. Barrier problem?

Speaking as NFSD release manager: I don't intend to send the
NFSD v6.12 PR until week of September 23rd.

I will either drop these patches before sending my PR, or I
can squash in a fix if one is found.


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 71713309967d..4ec22c2f2ea3 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -124,6 +124,8 @@  lockd(void *vrqstp)
 	struct net *net = &init_net;
 	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
+	svc_thread_init_status(rqstp, 0);
+
 	/* try_to_freeze() is called from svc_recv() */
 	set_freezable();
 
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 8adfcd4c8c1a..6cf92498a5ac 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -76,6 +76,8 @@  nfs4_callback_svc(void *vrqstp)
 {
 	struct svc_rqst *rqstp = vrqstp;
 
+	svc_thread_init_status(rqstp, 0);
+
 	set_freezable();
 
 	while (!svc_thread_should_stop(rqstp))
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fca340b3ee91..1cef09a3c78e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -875,11 +875,9 @@  nfsd(void *vrqstp)
 
 	/* At this point, the thread shares current->fs
 	 * with the init process. We need to create files with the
-	 * umask as defined by the client instead of init's umask. */
-	if (unshare_fs_struct() < 0) {
-		printk("Unable to start nfsd thread: out of memory\n");
-		goto out;
-	}
+	 * umask as defined by the client instead of init's umask.
+	 */
+	svc_thread_init_status(rqstp, unshare_fs_struct());
 
 	current->fs->umask = 0;
 
@@ -901,7 +899,6 @@  nfsd(void *vrqstp)
 
 	atomic_dec(&nfsd_th_cnt);
 
-out:
 	/* Release the thread */
 	svc_exit_thread(rqstp);
 	return 0;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 99e9345d829e..437672bcaa22 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -21,6 +21,7 @@ 
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/pagevec.h>
+#include <linux/kthread.h>
 
 /*
  *
@@ -232,6 +233,11 @@  struct svc_rqst {
 	struct net		*rq_bc_net;	/* pointer to backchannel's
 						 * net namespace
 						 */
+
+	int			rq_err;		/* Thread sets this to inidicate
+						 * initialisation success.
+						 */
+
 	unsigned long	bc_to_initval;
 	unsigned int	bc_to_retries;
 	void **			rq_lease_breaker; /* The v4 client breaking a lease */
@@ -305,6 +311,28 @@  static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
 	return test_bit(RQ_VICTIM, &rqstp->rq_flags);
 }
 
+/**
+ * svc_thread_init_status - report whether thread has initialised successfully
+ * @rqstp: the thread in question
+ * @err: errno code
+ *
+ * After performing any initialisation that could fail, and before starting
+ * normal work, each sunrpc svc_thread must call svc_thread_init_status()
+ * with an appropriate error, or zero.
+ *
+ * If zero is passed, the thread is ready and must continue until
+ * svc_thread_should_stop() returns true.  If a non-zero error is passed
+ * the call will not return - the thread will exit.
+ */
+static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err)
+{
+	/* store_release ensures svc_start_kthreads() sees the error */
+	smp_store_release(&rqstp->rq_err, err);
+	wake_up_var(&rqstp->rq_err);
+	if (err)
+		kthread_exit(1);
+}
+
 struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
 	struct svc_xprt		*xprt;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index ae31ffea7a14..1e80fa67d8b7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -706,6 +706,8 @@  svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
 		goto out_enomem;
 
+	rqstp->rq_err = -EAGAIN; /* No error yet */
+
 	serv->sv_nrthreads += 1;
 	pool->sp_nrthreads += 1;
 
@@ -792,6 +794,7 @@  svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	struct svc_pool *chosen_pool;
 	unsigned int state = serv->sv_nrthreads-1;
 	int node;
+	int err;
 
 	do {
 		nrservs--;
@@ -814,6 +817,14 @@  svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 
 		svc_sock_update_bufs(serv);
 		wake_up_process(task);
+
+		/* load_acquire ensures we get value stored in svc_thread_init_status() */
+		wait_var_event(&rqstp->rq_err, smp_load_acquire(&rqstp->rq_err) != -EAGAIN);
+		err = rqstp->rq_err;
+		if (err) {
+			svc_exit_thread(rqstp);
+			return err;
+		}
 	} while (nrservs > 0);
 
 	return 0;