diff mbox series

SUNRPC: increase max timeout for rebind to handle NFS server restart

Message ID 1680924600-11171-1-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series SUNRPC: increase max timeout for rebind to handle NFS server restart | expand

Commit Message

Dai Ngo April 8, 2023, 3:30 a.m. UTC
Currently call_bind_status places a hard limit of 9 seconds for retries
on EACCES error. This limit was done to prevent the RPC request from
being retried forever if the remote server has problem and never comes
up

However this 9 seconds timeout is too short, comparing to other RPC
timeouts which are generally in minutes. This causes intermittent failure
with EIO on the client side when there are lots of NLM activity and the
NFS server is restarted.

Instead of removing the max timeout for retry and relying on the RPC
timeout mechanism to handle the retry, which can lead to the RPC being
retried forever if the remote NLM service fails to come up. This patch
simply increases the max timeout of call_bind_status from 9 to 90 seconds
which should allow enough time for NLM to register after a restart, and
not retrying forever if there is real problem with the remote system.

Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock requests")
Reported-by: Helen Chao <helen.chao@oracle.com>
Tested-by: Helen Chao <helen.chao@oracle.com>
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 include/linux/sunrpc/clnt.h  | 3 +++
 include/linux/sunrpc/sched.h | 4 ++--
 net/sunrpc/clnt.c            | 2 +-
 net/sunrpc/sched.c           | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

Comments

Jeff Layton April 12, 2023, 5:50 p.m. UTC | #1
On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
> Currently call_bind_status places a hard limit of 9 seconds for retries
> on EACCES error. This limit was done to prevent the RPC request from
> being retried forever if the remote server has problem and never comes
> up
> 
> However this 9 seconds timeout is too short, comparing to other RPC
> timeouts which are generally in minutes. This causes intermittent failure
> with EIO on the client side when there are lots of NLM activity and the
> NFS server is restarted.
> 
> Instead of removing the max timeout for retry and relying on the RPC
> timeout mechanism to handle the retry, which can lead to the RPC being
> retried forever if the remote NLM service fails to come up. This patch
> simply increases the max timeout of call_bind_status from 9 to 90 seconds
> which should allow enough time for NLM to register after a restart, and
> not retrying forever if there is real problem with the remote system.
> 
> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock requests")
> Reported-by: Helen Chao <helen.chao@oracle.com>
> Tested-by: Helen Chao <helen.chao@oracle.com>
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  include/linux/sunrpc/clnt.h  | 3 +++
>  include/linux/sunrpc/sched.h | 4 ++--
>  net/sunrpc/clnt.c            | 2 +-
>  net/sunrpc/sched.c           | 3 ++-
>  4 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index 770ef2cb5775..81afc5ea2665 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>  #define RPC_CLNT_CREATE_REUSEPORT	(1UL << 11)
>  #define RPC_CLNT_CREATE_CONNECTED	(1UL << 12)
>  
> +#define	RPC_CLNT_REBIND_DELAY		3
> +#define	RPC_CLNT_REBIND_MAX_TIMEOUT	90
> +
>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>  struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
>  				const struct rpc_program *, u32);
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index b8ca3ecaf8d7..e9dc142f10bb 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -90,8 +90,8 @@ struct rpc_task {
>  #endif
>  	unsigned char		tk_priority : 2,/* Task priority */
>  				tk_garb_retry : 2,
> -				tk_cred_retry : 2,
> -				tk_rebind_retry : 2;
> +				tk_cred_retry : 2;
> +	unsigned char		tk_rebind_retry;
>  };
>  
>  typedef void			(*rpc_action)(struct rpc_task *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index fd7e1c630493..222578af6b01 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>  		if (task->tk_rebind_retry == 0)
>  			break;
>  		task->tk_rebind_retry--;
> -		rpc_delay(task, 3*HZ);
> +		rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>  		goto retry_timeout;
>  	case -ENOBUFS:
>  		rpc_delay(task, HZ >> 2);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index be587a308e05..5c18a35752aa 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>  	/* Initialize retry counters */
>  	task->tk_garb_retry = 2;
>  	task->tk_cred_retry = 2;
> -	task->tk_rebind_retry = 2;
> +	task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> +					RPC_CLNT_REBIND_DELAY;
>  
>  	/* starting timestamp */
>  	task->tk_start = ktime_get();

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Chuck Lever April 12, 2023, 5:55 p.m. UTC | #2
> On Apr 12, 2023, at 1:50 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>> Currently call_bind_status places a hard limit of 9 seconds for retries
>> on EACCES error. This limit was done to prevent the RPC request from
>> being retried forever if the remote server has problem and never comes
>> up
>> 
>> However this 9 seconds timeout is too short, comparing to other RPC
>> timeouts which are generally in minutes. This causes intermittent failure
>> with EIO on the client side when there are lots of NLM activity and the
>> NFS server is restarted.
>> 
>> Instead of removing the max timeout for retry and relying on the RPC
>> timeout mechanism to handle the retry, which can lead to the RPC being
>> retried forever if the remote NLM service fails to come up. This patch
>> simply increases the max timeout of call_bind_status from 9 to 90 seconds
>> which should allow enough time for NLM to register after a restart, and
>> not retrying forever if there is real problem with the remote system.
>> 
>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock requests")
>> Reported-by: Helen Chao <helen.chao@oracle.com>
>> Tested-by: Helen Chao <helen.chao@oracle.com>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> include/linux/sunrpc/clnt.h  | 3 +++
>> include/linux/sunrpc/sched.h | 4 ++--
>> net/sunrpc/clnt.c            | 2 +-
>> net/sunrpc/sched.c           | 3 ++-
>> 4 files changed, 8 insertions(+), 4 deletions(-)
>> 
>> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
>> index 770ef2cb5775..81afc5ea2665 100644
>> --- a/include/linux/sunrpc/clnt.h
>> +++ b/include/linux/sunrpc/clnt.h
>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>> #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
>> #define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
>> 
>> +#define RPC_CLNT_REBIND_DELAY 3
>> +#define RPC_CLNT_REBIND_MAX_TIMEOUT 90
>> +
>> struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
>> const struct rpc_program *, u32);
>> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>> --- a/include/linux/sunrpc/sched.h
>> +++ b/include/linux/sunrpc/sched.h
>> @@ -90,8 +90,8 @@ struct rpc_task {
>> #endif
>> unsigned char tk_priority : 2,/* Task priority */
>> tk_garb_retry : 2,
>> - tk_cred_retry : 2,
>> - tk_rebind_retry : 2;
>> + tk_cred_retry : 2;
>> + unsigned char tk_rebind_retry;
>> };
>> 
>> typedef void (*rpc_action)(struct rpc_task *);
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index fd7e1c630493..222578af6b01 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>> if (task->tk_rebind_retry == 0)
>> break;
>> task->tk_rebind_retry--;
>> - rpc_delay(task, 3*HZ);
>> + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>> goto retry_timeout;
>> case -ENOBUFS:
>> rpc_delay(task, HZ >> 2);
>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>> index be587a308e05..5c18a35752aa 100644
>> --- a/net/sunrpc/sched.c
>> +++ b/net/sunrpc/sched.c
>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>> /* Initialize retry counters */
>> task->tk_garb_retry = 2;
>> task->tk_cred_retry = 2;
>> - task->tk_rebind_retry = 2;
>> + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>> + RPC_CLNT_REBIND_DELAY;
>> 
>> /* starting timestamp */
>> task->tk_start = ktime_get();
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>

Thanks!

Since we still haven't heard from the client folks on this,
I can take this fix through the nfsd tree.


--
Chuck Lever
Dai Ngo April 12, 2023, 6:09 p.m. UTC | #3
On 4/12/23 10:55 AM, Chuck Lever III wrote:
>
>> On Apr 12, 2023, at 1:50 PM, Jeff Layton <jlayton@kernel.org> wrote:
>>
>> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>>> Currently call_bind_status places a hard limit of 9 seconds for retries
>>> on EACCES error. This limit was done to prevent the RPC request from
>>> being retried forever if the remote server has problem and never comes
>>> up
>>>
>>> However this 9 seconds timeout is too short, comparing to other RPC
>>> timeouts which are generally in minutes. This causes intermittent failure
>>> with EIO on the client side when there are lots of NLM activity and the
>>> NFS server is restarted.
>>>
>>> Instead of removing the max timeout for retry and relying on the RPC
>>> timeout mechanism to handle the retry, which can lead to the RPC being
>>> retried forever if the remote NLM service fails to come up. This patch
>>> simply increases the max timeout of call_bind_status from 9 to 90 seconds
>>> which should allow enough time for NLM to register after a restart, and
>>> not retrying forever if there is real problem with the remote system.
>>>
>>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock requests")
>>> Reported-by: Helen Chao <helen.chao@oracle.com>
>>> Tested-by: Helen Chao <helen.chao@oracle.com>
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>> include/linux/sunrpc/clnt.h  | 3 +++
>>> include/linux/sunrpc/sched.h | 4 ++--
>>> net/sunrpc/clnt.c            | 2 +-
>>> net/sunrpc/sched.c           | 3 ++-
>>> 4 files changed, 8 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
>>> index 770ef2cb5775..81afc5ea2665 100644
>>> --- a/include/linux/sunrpc/clnt.h
>>> +++ b/include/linux/sunrpc/clnt.h
>>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>> #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
>>> #define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
>>>
>>> +#define RPC_CLNT_REBIND_DELAY 3
>>> +#define RPC_CLNT_REBIND_MAX_TIMEOUT 90
>>> +
>>> struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
>>> const struct rpc_program *, u32);
>>> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
>>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>>> --- a/include/linux/sunrpc/sched.h
>>> +++ b/include/linux/sunrpc/sched.h
>>> @@ -90,8 +90,8 @@ struct rpc_task {
>>> #endif
>>> unsigned char tk_priority : 2,/* Task priority */
>>> tk_garb_retry : 2,
>>> - tk_cred_retry : 2,
>>> - tk_rebind_retry : 2;
>>> + tk_cred_retry : 2;
>>> + unsigned char tk_rebind_retry;
>>> };
>>>
>>> typedef void (*rpc_action)(struct rpc_task *);
>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>> index fd7e1c630493..222578af6b01 100644
>>> --- a/net/sunrpc/clnt.c
>>> +++ b/net/sunrpc/clnt.c
>>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>> if (task->tk_rebind_retry == 0)
>>> break;
>>> task->tk_rebind_retry--;
>>> - rpc_delay(task, 3*HZ);
>>> + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>> goto retry_timeout;
>>> case -ENOBUFS:
>>> rpc_delay(task, HZ >> 2);
>>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>>> index be587a308e05..5c18a35752aa 100644
>>> --- a/net/sunrpc/sched.c
>>> +++ b/net/sunrpc/sched.c
>>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>>> /* Initialize retry counters */
>>> task->tk_garb_retry = 2;
>>> task->tk_cred_retry = 2;
>>> - task->tk_rebind_retry = 2;
>>> + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>>> + RPC_CLNT_REBIND_DELAY;
>>>
>>> /* starting timestamp */
>>> task->tk_start = ktime_get();
>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> Thanks!
>
> Since we still haven't heard from the client folks on this,
> I can take this fix through the nfsd tree.

Thank you Jeff and Chuck!

-Dai
Trond Myklebust April 17, 2023, 8:49 p.m. UTC | #4
On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
> Currently call_bind_status places a hard limit of 9 seconds for
> retries
> on EACCES error. This limit was done to prevent the RPC request from
> being retried forever if the remote server has problem and never
> comes
> up
> 
> However this 9 seconds timeout is too short, comparing to other RPC
> timeouts which are generally in minutes. This causes intermittent
> failure
> with EIO on the client side when there are lots of NLM activity and
> the
> NFS server is restarted.
> 
> Instead of removing the max timeout for retry and relying on the RPC
> timeout mechanism to handle the retry, which can lead to the RPC
> being
> retried forever if the remote NLM service fails to come up. This
> patch
> simply increases the max timeout of call_bind_status from 9 to 90
> seconds
> which should allow enough time for NLM to register after a restart,
> and
> not retrying forever if there is real problem with the remote system.
> 
> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
> requests")
> Reported-by: Helen Chao <helen.chao@oracle.com>
> Tested-by: Helen Chao <helen.chao@oracle.com>
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  include/linux/sunrpc/clnt.h  | 3 +++
>  include/linux/sunrpc/sched.h | 4 ++--
>  net/sunrpc/clnt.c            | 2 +-
>  net/sunrpc/sched.c           | 3 ++-
>  4 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/sunrpc/clnt.h
> b/include/linux/sunrpc/clnt.h
> index 770ef2cb5775..81afc5ea2665 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>  #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>  #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>  
> +#define        RPC_CLNT_REBIND_DELAY           3
> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
> +
>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
>                                 const struct rpc_program *, u32);
> diff --git a/include/linux/sunrpc/sched.h
> b/include/linux/sunrpc/sched.h
> index b8ca3ecaf8d7..e9dc142f10bb 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -90,8 +90,8 @@ struct rpc_task {
>  #endif
>         unsigned char           tk_priority : 2,/* Task priority */
>                                 tk_garb_retry : 2,
> -                               tk_cred_retry : 2,
> -                               tk_rebind_retry : 2;
> +                               tk_cred_retry : 2;
> +       unsigned char           tk_rebind_retry;
>  };
>  
>  typedef void                   (*rpc_action)(struct rpc_task *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index fd7e1c630493..222578af6b01 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>                 if (task->tk_rebind_retry == 0)
>                         break;
>                 task->tk_rebind_retry--;
> -               rpc_delay(task, 3*HZ);
> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>                 goto retry_timeout;
>         case -ENOBUFS:
>                 rpc_delay(task, HZ >> 2);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index be587a308e05..5c18a35752aa 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>         /* Initialize retry counters */
>         task->tk_garb_retry = 2;
>         task->tk_cred_retry = 2;
> -       task->tk_rebind_retry = 2;
> +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> +                                       RPC_CLNT_REBIND_DELAY;

Why not just implement an exponential back off? If the server is slow
to come up, then pounding the rpcbind service every 3 seconds isn't
going to help.

>  
>         /* starting timestamp */
>         task->tk_start = ktime_get();
Chuck Lever April 17, 2023, 9:41 p.m. UTC | #5
> On Apr 17, 2023, at 4:49 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>> Currently call_bind_status places a hard limit of 9 seconds for
>> retries
>> on EACCES error. This limit was done to prevent the RPC request from
>> being retried forever if the remote server has problem and never
>> comes
>> up
>> 
>> However this 9 seconds timeout is too short, comparing to other RPC
>> timeouts which are generally in minutes. This causes intermittent
>> failure
>> with EIO on the client side when there are lots of NLM activity and
>> the
>> NFS server is restarted.
>> 
>> Instead of removing the max timeout for retry and relying on the RPC
>> timeout mechanism to handle the retry, which can lead to the RPC
>> being
>> retried forever if the remote NLM service fails to come up. This
>> patch
>> simply increases the max timeout of call_bind_status from 9 to 90
>> seconds
>> which should allow enough time for NLM to register after a restart,
>> and
>> not retrying forever if there is real problem with the remote system.
>> 
>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
>> requests")
>> Reported-by: Helen Chao <helen.chao@oracle.com>
>> Tested-by: Helen Chao <helen.chao@oracle.com>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>  include/linux/sunrpc/clnt.h  | 3 +++
>>  include/linux/sunrpc/sched.h | 4 ++--
>>  net/sunrpc/clnt.c            | 2 +-
>>  net/sunrpc/sched.c           | 3 ++-
>>  4 files changed, 8 insertions(+), 4 deletions(-)
>> 
>> diff --git a/include/linux/sunrpc/clnt.h
>> b/include/linux/sunrpc/clnt.h
>> index 770ef2cb5775..81afc5ea2665 100644
>> --- a/include/linux/sunrpc/clnt.h
>> +++ b/include/linux/sunrpc/clnt.h
>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>  #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>>  #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>>  
>> +#define        RPC_CLNT_REBIND_DELAY           3
>> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
>> +
>>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
>>                                 const struct rpc_program *, u32);
>> diff --git a/include/linux/sunrpc/sched.h
>> b/include/linux/sunrpc/sched.h
>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>> --- a/include/linux/sunrpc/sched.h
>> +++ b/include/linux/sunrpc/sched.h
>> @@ -90,8 +90,8 @@ struct rpc_task {
>>  #endif
>>         unsigned char           tk_priority : 2,/* Task priority */
>>                                 tk_garb_retry : 2,
>> -                               tk_cred_retry : 2,
>> -                               tk_rebind_retry : 2;
>> +                               tk_cred_retry : 2;
>> +       unsigned char           tk_rebind_retry;
>>  };
>>  
>>  typedef void                   (*rpc_action)(struct rpc_task *);
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index fd7e1c630493..222578af6b01 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>                 if (task->tk_rebind_retry == 0)
>>                         break;
>>                 task->tk_rebind_retry--;
>> -               rpc_delay(task, 3*HZ);
>> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>                 goto retry_timeout;
>>         case -ENOBUFS:
>>                 rpc_delay(task, HZ >> 2);
>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>> index be587a308e05..5c18a35752aa 100644
>> --- a/net/sunrpc/sched.c
>> +++ b/net/sunrpc/sched.c
>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>>         /* Initialize retry counters */
>>         task->tk_garb_retry = 2;
>>         task->tk_cred_retry = 2;
>> -       task->tk_rebind_retry = 2;
>> +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>> +                                       RPC_CLNT_REBIND_DELAY;
> 
> Why not just implement an exponential back off? If the server is slow
> to come up, then pounding the rpcbind service every 3 seconds isn't
> going to help.

Exponential backoff has the disadvantage that once we've gotten
to the longer retry times, it's easy for a client to wait quite
some time before it notices the rebind service is available.

But I have to wonder if retrying every 3 seconds is useful if
the request is going over TCP.


>>  
>>         /* starting timestamp */
>>         task->tk_start = ktime_get();

--
Chuck Lever
Trond Myklebust April 17, 2023, 9:48 p.m. UTC | #6
On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
> 
> 
> > On Apr 17, 2023, at 4:49 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
> > > Currently call_bind_status places a hard limit of 9 seconds for
> > > retries
> > > on EACCES error. This limit was done to prevent the RPC request
> > > from
> > > being retried forever if the remote server has problem and never
> > > comes
> > > up
> > > 
> > > However this 9 seconds timeout is too short, comparing to other
> > > RPC
> > > timeouts which are generally in minutes. This causes intermittent
> > > failure
> > > with EIO on the client side when there are lots of NLM activity
> > > and
> > > the
> > > NFS server is restarted.
> > > 
> > > Instead of removing the max timeout for retry and relying on the
> > > RPC
> > > timeout mechanism to handle the retry, which can lead to the RPC
> > > being
> > > retried forever if the remote NLM service fails to come up. This
> > > patch
> > > simply increases the max timeout of call_bind_status from 9 to 90
> > > seconds
> > > which should allow enough time for NLM to register after a
> > > restart,
> > > and
> > > not retrying forever if there is real problem with the remote
> > > system.
> > > 
> > > Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
> > > requests")
> > > Reported-by: Helen Chao <helen.chao@oracle.com>
> > > Tested-by: Helen Chao <helen.chao@oracle.com>
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > ---
> > >  include/linux/sunrpc/clnt.h  | 3 +++
> > >  include/linux/sunrpc/sched.h | 4 ++--
> > >  net/sunrpc/clnt.c            | 2 +-
> > >  net/sunrpc/sched.c           | 3 ++-
> > >  4 files changed, 8 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/include/linux/sunrpc/clnt.h
> > > b/include/linux/sunrpc/clnt.h
> > > index 770ef2cb5775..81afc5ea2665 100644
> > > --- a/include/linux/sunrpc/clnt.h
> > > +++ b/include/linux/sunrpc/clnt.h
> > > @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
> > >  #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
> > >  #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
> > >  
> > > +#define        RPC_CLNT_REBIND_DELAY           3
> > > +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
> > > +
> > >  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
> > >  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
> > >                                 const struct rpc_program *, u32);
> > > diff --git a/include/linux/sunrpc/sched.h
> > > b/include/linux/sunrpc/sched.h
> > > index b8ca3ecaf8d7..e9dc142f10bb 100644
> > > --- a/include/linux/sunrpc/sched.h
> > > +++ b/include/linux/sunrpc/sched.h
> > > @@ -90,8 +90,8 @@ struct rpc_task {
> > >  #endif
> > >         unsigned char           tk_priority : 2,/* Task priority
> > > */
> > >                                 tk_garb_retry : 2,
> > > -                               tk_cred_retry : 2,
> > > -                               tk_rebind_retry : 2;
> > > +                               tk_cred_retry : 2;
> > > +       unsigned char           tk_rebind_retry;
> > >  };
> > >  
> > >  typedef void                   (*rpc_action)(struct rpc_task *);
> > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > index fd7e1c630493..222578af6b01 100644
> > > --- a/net/sunrpc/clnt.c
> > > +++ b/net/sunrpc/clnt.c
> > > @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
> > >                 if (task->tk_rebind_retry == 0)
> > >                         break;
> > >                 task->tk_rebind_retry--;
> > > -               rpc_delay(task, 3*HZ);
> > > +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
> > >                 goto retry_timeout;
> > >         case -ENOBUFS:
> > >                 rpc_delay(task, HZ >> 2);
> > > diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> > > index be587a308e05..5c18a35752aa 100644
> > > --- a/net/sunrpc/sched.c
> > > +++ b/net/sunrpc/sched.c
> > > @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
> > > *task)
> > >         /* Initialize retry counters */
> > >         task->tk_garb_retry = 2;
> > >         task->tk_cred_retry = 2;
> > > -       task->tk_rebind_retry = 2;
> > > +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> > > +                                       RPC_CLNT_REBIND_DELAY;
> > 
> > Why not just implement an exponential back off? If the server is
> > slow
> > to come up, then pounding the rpcbind service every 3 seconds isn't
> > going to help.
> 
> Exponential backoff has the disadvantage that once we've gotten
> to the longer retry times, it's easy for a client to wait quite
> some time before it notices the rebind service is available.
> 
> But I have to wonder if retrying every 3 seconds is useful if
> the request is going over TCP.
> 

The problem isn't reliability: this is handling a case where we _are_
getting a reply from the server, just not one we wanted. EACCES here
means that the rpcbind server didn't return a valid entry for the
service we requested, presumably because the service hasn't been
registered yet.

So yes, an exponential back off is appropriate here.
Dai Ngo April 17, 2023, 9:51 p.m. UTC | #7
On 4/17/23 2:48 PM, Trond Myklebust wrote:
> On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
>>
>>> On Apr 17, 2023, at 4:49 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>>
>>> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>>>> Currently call_bind_status places a hard limit of 9 seconds for
>>>> retries
>>>> on EACCES error. This limit was done to prevent the RPC request
>>>> from
>>>> being retried forever if the remote server has problem and never
>>>> comes
>>>> up
>>>>
>>>> However this 9 seconds timeout is too short, comparing to other
>>>> RPC
>>>> timeouts which are generally in minutes. This causes intermittent
>>>> failure
>>>> with EIO on the client side when there are lots of NLM activity
>>>> and
>>>> the
>>>> NFS server is restarted.
>>>>
>>>> Instead of removing the max timeout for retry and relying on the
>>>> RPC
>>>> timeout mechanism to handle the retry, which can lead to the RPC
>>>> being
>>>> retried forever if the remote NLM service fails to come up. This
>>>> patch
>>>> simply increases the max timeout of call_bind_status from 9 to 90
>>>> seconds
>>>> which should allow enough time for NLM to register after a
>>>> restart,
>>>> and
>>>> not retrying forever if there is real problem with the remote
>>>> system.
>>>>
>>>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
>>>> requests")
>>>> Reported-by: Helen Chao <helen.chao@oracle.com>
>>>> Tested-by: Helen Chao <helen.chao@oracle.com>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>   include/linux/sunrpc/clnt.h  | 3 +++
>>>>   include/linux/sunrpc/sched.h | 4 ++--
>>>>   net/sunrpc/clnt.c            | 2 +-
>>>>   net/sunrpc/sched.c           | 3 ++-
>>>>   4 files changed, 8 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/include/linux/sunrpc/clnt.h
>>>> b/include/linux/sunrpc/clnt.h
>>>> index 770ef2cb5775..81afc5ea2665 100644
>>>> --- a/include/linux/sunrpc/clnt.h
>>>> +++ b/include/linux/sunrpc/clnt.h
>>>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>>>   #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>>>>   #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>>>>   
>>>> +#define        RPC_CLNT_REBIND_DELAY           3
>>>> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
>>>> +
>>>>   struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>>>   struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
>>>>                                  const struct rpc_program *, u32);
>>>> diff --git a/include/linux/sunrpc/sched.h
>>>> b/include/linux/sunrpc/sched.h
>>>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>>>> --- a/include/linux/sunrpc/sched.h
>>>> +++ b/include/linux/sunrpc/sched.h
>>>> @@ -90,8 +90,8 @@ struct rpc_task {
>>>>   #endif
>>>>          unsigned char           tk_priority : 2,/* Task priority
>>>> */
>>>>                                  tk_garb_retry : 2,
>>>> -                               tk_cred_retry : 2,
>>>> -                               tk_rebind_retry : 2;
>>>> +                               tk_cred_retry : 2;
>>>> +       unsigned char           tk_rebind_retry;
>>>>   };
>>>>   
>>>>   typedef void                   (*rpc_action)(struct rpc_task *);
>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>> index fd7e1c630493..222578af6b01 100644
>>>> --- a/net/sunrpc/clnt.c
>>>> +++ b/net/sunrpc/clnt.c
>>>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>>>                  if (task->tk_rebind_retry == 0)
>>>>                          break;
>>>>                  task->tk_rebind_retry--;
>>>> -               rpc_delay(task, 3*HZ);
>>>> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>>>                  goto retry_timeout;
>>>>          case -ENOBUFS:
>>>>                  rpc_delay(task, HZ >> 2);
>>>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>>>> index be587a308e05..5c18a35752aa 100644
>>>> --- a/net/sunrpc/sched.c
>>>> +++ b/net/sunrpc/sched.c
>>>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
>>>> *task)
>>>>          /* Initialize retry counters */
>>>>          task->tk_garb_retry = 2;
>>>>          task->tk_cred_retry = 2;
>>>> -       task->tk_rebind_retry = 2;
>>>> +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>>>> +                                       RPC_CLNT_REBIND_DELAY;
>>> Why not just implement an exponential back off? If the server is
>>> slow
>>> to come up, then pounding the rpcbind service every 3 seconds isn't
>>> going to help.
>> Exponential backoff has the disadvantage that once we've gotten
>> to the longer retry times, it's easy for a client to wait quite
>> some time before it notices the rebind service is available.
>>
>> But I have to wonder if retrying every 3 seconds is useful if
>> the request is going over TCP.
>>
> The problem isn't reliability: this is handling a case where we _are_
> getting a reply from the server, just not one we wanted. EACCES here
> means that the rpcbind server didn't return a valid entry for the
> service we requested, presumably because the service hasn't been
> registered yet.

That's correct.

>
> So yes, an exponential back off is appropriate here.

I think Chuck's point is still valid. It makes the client a little more
responsive; does not have to wait that long, and the overhead of a RPC
request every 3 seconds is not that significant.

-Dai

>
Trond Myklebust April 17, 2023, 9:53 p.m. UTC | #8
On Mon, 2023-04-17 at 14:51 -0700, dai.ngo@oracle.com wrote:
> 
> On 4/17/23 2:48 PM, Trond Myklebust wrote:
> > On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
> > > 
> > > > On Apr 17, 2023, at 4:49 PM, Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > 
> > > > On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
> > > > > Currently call_bind_status places a hard limit of 9 seconds
> > > > > for
> > > > > retries
> > > > > on EACCES error. This limit was done to prevent the RPC
> > > > > request
> > > > > from
> > > > > being retried forever if the remote server has problem and
> > > > > never
> > > > > comes
> > > > > up
> > > > > 
> > > > > However this 9 seconds timeout is too short, comparing to
> > > > > other
> > > > > RPC
> > > > > timeouts which are generally in minutes. This causes
> > > > > intermittent
> > > > > failure
> > > > > with EIO on the client side when there are lots of NLM
> > > > > activity
> > > > > and
> > > > > the
> > > > > NFS server is restarted.
> > > > > 
> > > > > Instead of removing the max timeout for retry and relying on
> > > > > the
> > > > > RPC
> > > > > timeout mechanism to handle the retry, which can lead to the
> > > > > RPC
> > > > > being
> > > > > retried forever if the remote NLM service fails to come up.
> > > > > This
> > > > > patch
> > > > > simply increases the max timeout of call_bind_status from 9
> > > > > to 90
> > > > > seconds
> > > > > which should allow enough time for NLM to register after a
> > > > > restart,
> > > > > and
> > > > > not retrying forever if there is real problem with the remote
> > > > > system.
> > > > > 
> > > > > Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
> > > > > requests")
> > > > > Reported-by: Helen Chao <helen.chao@oracle.com>
> > > > > Tested-by: Helen Chao <helen.chao@oracle.com>
> > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > ---
> > > > >   include/linux/sunrpc/clnt.h  | 3 +++
> > > > >   include/linux/sunrpc/sched.h | 4 ++--
> > > > >   net/sunrpc/clnt.c            | 2 +-
> > > > >   net/sunrpc/sched.c           | 3 ++-
> > > > >   4 files changed, 8 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/include/linux/sunrpc/clnt.h
> > > > > b/include/linux/sunrpc/clnt.h
> > > > > index 770ef2cb5775..81afc5ea2665 100644
> > > > > --- a/include/linux/sunrpc/clnt.h
> > > > > +++ b/include/linux/sunrpc/clnt.h
> > > > > @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
> > > > >   #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
> > > > >   #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
> > > > >   
> > > > > +#define        RPC_CLNT_REBIND_DELAY           3
> > > > > +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
> > > > > +
> > > > >   struct rpc_clnt *rpc_create(struct rpc_create_args *args);
> > > > >   struct rpc_clnt        *rpc_bind_new_program(struct
> > > > > rpc_clnt *,
> > > > >                                  const struct rpc_program *,
> > > > > u32);
> > > > > diff --git a/include/linux/sunrpc/sched.h
> > > > > b/include/linux/sunrpc/sched.h
> > > > > index b8ca3ecaf8d7..e9dc142f10bb 100644
> > > > > --- a/include/linux/sunrpc/sched.h
> > > > > +++ b/include/linux/sunrpc/sched.h
> > > > > @@ -90,8 +90,8 @@ struct rpc_task {
> > > > >   #endif
> > > > >          unsigned char           tk_priority : 2,/* Task
> > > > > priority
> > > > > */
> > > > >                                  tk_garb_retry : 2,
> > > > > -                               tk_cred_retry : 2,
> > > > > -                               tk_rebind_retry : 2;
> > > > > +                               tk_cred_retry : 2;
> > > > > +       unsigned char           tk_rebind_retry;
> > > > >   };
> > > > >   
> > > > >   typedef void                   (*rpc_action)(struct
> > > > > rpc_task *);
> > > > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > > > index fd7e1c630493..222578af6b01 100644
> > > > > --- a/net/sunrpc/clnt.c
> > > > > +++ b/net/sunrpc/clnt.c
> > > > > @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
> > > > >                  if (task->tk_rebind_retry == 0)
> > > > >                          break;
> > > > >                  task->tk_rebind_retry--;
> > > > > -               rpc_delay(task, 3*HZ);
> > > > > +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
> > > > >                  goto retry_timeout;
> > > > >          case -ENOBUFS:
> > > > >                  rpc_delay(task, HZ >> 2);
> > > > > diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> > > > > index be587a308e05..5c18a35752aa 100644
> > > > > --- a/net/sunrpc/sched.c
> > > > > +++ b/net/sunrpc/sched.c
> > > > > @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
> > > > > *task)
> > > > >          /* Initialize retry counters */
> > > > >          task->tk_garb_retry = 2;
> > > > >          task->tk_cred_retry = 2;
> > > > > -       task->tk_rebind_retry = 2;
> > > > > +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> > > > > +                                      
> > > > > RPC_CLNT_REBIND_DELAY;
> > > > Why not just implement an exponential back off? If the server
> > > > is
> > > > slow
> > > > to come up, then pounding the rpcbind service every 3 seconds
> > > > isn't
> > > > going to help.
> > > Exponential backoff has the disadvantage that once we've gotten
> > > to the longer retry times, it's easy for a client to wait quite
> > > some time before it notices the rebind service is available.
> > > 
> > > But I have to wonder if retrying every 3 seconds is useful if
> > > the request is going over TCP.
> > > 
> > The problem isn't reliability: this is handling a case where we
> > _are_
> > getting a reply from the server, just not one we wanted. EACCES
> > here
> > means that the rpcbind server didn't return a valid entry for the
> > service we requested, presumably because the service hasn't been
> > registered yet.
> 
> That's correct.
> 
> > 
> > So yes, an exponential back off is appropriate here.
> 
> I think Chuck's point is still valid. It makes the client a little
> more
> responsive; does not have to wait that long, and the overhead of a
> RPC
> request every 3 seconds is not that significant.
> > 

It is when you do it 30 times before giving up.
Chuck Lever April 17, 2023, 9:53 p.m. UTC | #9
> On Apr 17, 2023, at 5:48 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Apr 17, 2023, at 4:49 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>> 
>>> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>>>> Currently call_bind_status places a hard limit of 9 seconds for
>>>> retries
>>>> on EACCES error. This limit was done to prevent the RPC request
>>>> from
>>>> being retried forever if the remote server has problem and never
>>>> comes
>>>> up
>>>> 
>>>> However this 9 seconds timeout is too short, comparing to other
>>>> RPC
>>>> timeouts which are generally in minutes. This causes intermittent
>>>> failure
>>>> with EIO on the client side when there are lots of NLM activity
>>>> and
>>>> the
>>>> NFS server is restarted.
>>>> 
>>>> Instead of removing the max timeout for retry and relying on the
>>>> RPC
>>>> timeout mechanism to handle the retry, which can lead to the RPC
>>>> being
>>>> retried forever if the remote NLM service fails to come up. This
>>>> patch
>>>> simply increases the max timeout of call_bind_status from 9 to 90
>>>> seconds
>>>> which should allow enough time for NLM to register after a
>>>> restart,
>>>> and
>>>> not retrying forever if there is real problem with the remote
>>>> system.
>>>> 
>>>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
>>>> requests")
>>>> Reported-by: Helen Chao <helen.chao@oracle.com>
>>>> Tested-by: Helen Chao <helen.chao@oracle.com>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>  include/linux/sunrpc/clnt.h  | 3 +++
>>>>  include/linux/sunrpc/sched.h | 4 ++--
>>>>  net/sunrpc/clnt.c            | 2 +-
>>>>  net/sunrpc/sched.c           | 3 ++-
>>>>  4 files changed, 8 insertions(+), 4 deletions(-)
>>>> 
>>>> diff --git a/include/linux/sunrpc/clnt.h
>>>> b/include/linux/sunrpc/clnt.h
>>>> index 770ef2cb5775..81afc5ea2665 100644
>>>> --- a/include/linux/sunrpc/clnt.h
>>>> +++ b/include/linux/sunrpc/clnt.h
>>>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>>>  #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>>>>  #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>>>>  
>>>> +#define        RPC_CLNT_REBIND_DELAY           3
>>>> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
>>>> +
>>>>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>>>  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
>>>>                                 const struct rpc_program *, u32);
>>>> diff --git a/include/linux/sunrpc/sched.h
>>>> b/include/linux/sunrpc/sched.h
>>>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>>>> --- a/include/linux/sunrpc/sched.h
>>>> +++ b/include/linux/sunrpc/sched.h
>>>> @@ -90,8 +90,8 @@ struct rpc_task {
>>>>  #endif
>>>>         unsigned char           tk_priority : 2,/* Task priority
>>>> */
>>>>                                 tk_garb_retry : 2,
>>>> -                               tk_cred_retry : 2,
>>>> -                               tk_rebind_retry : 2;
>>>> +                               tk_cred_retry : 2;
>>>> +       unsigned char           tk_rebind_retry;
>>>>  };
>>>>  
>>>>  typedef void                   (*rpc_action)(struct rpc_task *);
>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>> index fd7e1c630493..222578af6b01 100644
>>>> --- a/net/sunrpc/clnt.c
>>>> +++ b/net/sunrpc/clnt.c
>>>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>>>                 if (task->tk_rebind_retry == 0)
>>>>                         break;
>>>>                 task->tk_rebind_retry--;
>>>> -               rpc_delay(task, 3*HZ);
>>>> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>>>                 goto retry_timeout;
>>>>         case -ENOBUFS:
>>>>                 rpc_delay(task, HZ >> 2);
>>>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>>>> index be587a308e05..5c18a35752aa 100644
>>>> --- a/net/sunrpc/sched.c
>>>> +++ b/net/sunrpc/sched.c
>>>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
>>>> *task)
>>>>         /* Initialize retry counters */
>>>>         task->tk_garb_retry = 2;
>>>>         task->tk_cred_retry = 2;
>>>> -       task->tk_rebind_retry = 2;
>>>> +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>>>> +                                       RPC_CLNT_REBIND_DELAY;
>>> 
>>> Why not just implement an exponential back off? If the server is
>>> slow
>>> to come up, then pounding the rpcbind service every 3 seconds isn't
>>> going to help.
>> 
>> Exponential backoff has the disadvantage that once we've gotten
>> to the longer retry times, it's easy for a client to wait quite
>> some time before it notices the rebind service is available.
>> 
>> But I have to wonder if retrying every 3 seconds is useful if
>> the request is going over TCP.
>> 
> 
> The problem isn't reliability: this is handling a case where we _are_
> getting a reply from the server, just not one we wanted. EACCES here
> means that the rpcbind server didn't return a valid entry for the
> service we requested, presumably because the service hasn't been
> registered yet.
> 
> So yes, an exponential back off is appropriate here.

OK, rpcbind is responding, but the registered service is not
ready yet.

But you've missed my point: exponential backoff is not desirable
if we want clients to recover quickly from a service restart.

If we keep the longest retry time short enough to keep recovery
responsive, it won't be all that much longer than 3 seconds.
Say, it might be 10 seconds, or 15 seconds. I don't see any
value in complicating the client's logic for that. Just make
RPC_CLNT_REBIND_DELAY longer.


--
Chuck Lever
Trond Myklebust April 17, 2023, 10:10 p.m. UTC | #10
On Mon, 2023-04-17 at 21:53 +0000, Chuck Lever III wrote:
> 
> 
> > On Apr 17, 2023, at 5:48 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
> > > 
> > > 
> > > > On Apr 17, 2023, at 4:49 PM, Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > 
> > > > On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
> > > > > Currently call_bind_status places a hard limit of 9 seconds
> > > > > for
> > > > > retries
> > > > > on EACCES error. This limit was done to prevent the RPC
> > > > > request
> > > > > from
> > > > > being retried forever if the remote server has problem and
> > > > > never
> > > > > comes
> > > > > up
> > > > > 
> > > > > However this 9 seconds timeout is too short, comparing to
> > > > > other
> > > > > RPC
> > > > > timeouts which are generally in minutes. This causes
> > > > > intermittent
> > > > > failure
> > > > > with EIO on the client side when there are lots of NLM
> > > > > activity
> > > > > and
> > > > > the
> > > > > NFS server is restarted.
> > > > > 
> > > > > Instead of removing the max timeout for retry and relying on
> > > > > the
> > > > > RPC
> > > > > timeout mechanism to handle the retry, which can lead to the
> > > > > RPC
> > > > > being
> > > > > retried forever if the remote NLM service fails to come up.
> > > > > This
> > > > > patch
> > > > > simply increases the max timeout of call_bind_status from 9
> > > > > to 90
> > > > > seconds
> > > > > which should allow enough time for NLM to register after a
> > > > > restart,
> > > > > and
> > > > > not retrying forever if there is real problem with the remote
> > > > > system.
> > > > > 
> > > > > Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
> > > > > requests")
> > > > > Reported-by: Helen Chao <helen.chao@oracle.com>
> > > > > Tested-by: Helen Chao <helen.chao@oracle.com>
> > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > ---
> > > > >  include/linux/sunrpc/clnt.h  | 3 +++
> > > > >  include/linux/sunrpc/sched.h | 4 ++--
> > > > >  net/sunrpc/clnt.c            | 2 +-
> > > > >  net/sunrpc/sched.c           | 3 ++-
> > > > >  4 files changed, 8 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/include/linux/sunrpc/clnt.h
> > > > > b/include/linux/sunrpc/clnt.h
> > > > > index 770ef2cb5775..81afc5ea2665 100644
> > > > > --- a/include/linux/sunrpc/clnt.h
> > > > > +++ b/include/linux/sunrpc/clnt.h
> > > > > @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
> > > > >  #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
> > > > >  #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
> > > > >  
> > > > > +#define        RPC_CLNT_REBIND_DELAY           3
> > > > > +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
> > > > > +
> > > > >  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
> > > > >  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt
> > > > > *,
> > > > >                                 const struct rpc_program *,
> > > > > u32);
> > > > > diff --git a/include/linux/sunrpc/sched.h
> > > > > b/include/linux/sunrpc/sched.h
> > > > > index b8ca3ecaf8d7..e9dc142f10bb 100644
> > > > > --- a/include/linux/sunrpc/sched.h
> > > > > +++ b/include/linux/sunrpc/sched.h
> > > > > @@ -90,8 +90,8 @@ struct rpc_task {
> > > > >  #endif
> > > > >         unsigned char           tk_priority : 2,/* Task
> > > > > priority
> > > > > */
> > > > >                                 tk_garb_retry : 2,
> > > > > -                               tk_cred_retry : 2,
> > > > > -                               tk_rebind_retry : 2;
> > > > > +                               tk_cred_retry : 2;
> > > > > +       unsigned char           tk_rebind_retry;
> > > > >  };
> > > > >  
> > > > >  typedef void                   (*rpc_action)(struct rpc_task
> > > > > *);
> > > > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > > > index fd7e1c630493..222578af6b01 100644
> > > > > --- a/net/sunrpc/clnt.c
> > > > > +++ b/net/sunrpc/clnt.c
> > > > > @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
> > > > >                 if (task->tk_rebind_retry == 0)
> > > > >                         break;
> > > > >                 task->tk_rebind_retry--;
> > > > > -               rpc_delay(task, 3*HZ);
> > > > > +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
> > > > >                 goto retry_timeout;
> > > > >         case -ENOBUFS:
> > > > >                 rpc_delay(task, HZ >> 2);
> > > > > diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> > > > > index be587a308e05..5c18a35752aa 100644
> > > > > --- a/net/sunrpc/sched.c
> > > > > +++ b/net/sunrpc/sched.c
> > > > > @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
> > > > > *task)
> > > > >         /* Initialize retry counters */
> > > > >         task->tk_garb_retry = 2;
> > > > >         task->tk_cred_retry = 2;
> > > > > -       task->tk_rebind_retry = 2;
> > > > > +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> > > > > +                                      
> > > > > RPC_CLNT_REBIND_DELAY;
> > > > 
> > > > Why not just implement an exponential back off? If the server
> > > > is
> > > > slow
> > > > to come up, then pounding the rpcbind service every 3 seconds
> > > > isn't
> > > > going to help.
> > > 
> > > Exponential backoff has the disadvantage that once we've gotten
> > > to the longer retry times, it's easy for a client to wait quite
> > > some time before it notices the rebind service is available.
> > > 
> > > But I have to wonder if retrying every 3 seconds is useful if
> > > the request is going over TCP.
> > > 
> > 
> > The problem isn't reliability: this is handling a case where we
> > _are_
> > getting a reply from the server, just not one we wanted. EACCES
> > here
> > means that the rpcbind server didn't return a valid entry for the
> > service we requested, presumably because the service hasn't been
> > registered yet.
> > 
> > So yes, an exponential back off is appropriate here.
> 
> OK, rpcbind is responding, but the registered service is not
> ready yet.
> 
> But you've missed my point: exponential backoff is not desirable
> if we want clients to recover quickly from a service restart.
> 
> If we keep the longest retry time short enough to keep recovery
> responsive, it won't be all that much longer than 3 seconds.
> Say, it might be 10 seconds, or 15 seconds. I don't see any
> value in complicating the client's logic for that. Just make
> RPC_CLNT_REBIND_DELAY longer.
> 

What's so complicated about exponential back off?

delay = RPC_CLNT_REBIND_DELAY * (1U << nretries);

...and then limit nretries to take values between 0 and 4. That gives
you a 93 second total wait, with the largest wait between retries being
48 seconds.
Dai Ngo April 17, 2023, 11:14 p.m. UTC | #11
On 4/17/23 2:53 PM, Trond Myklebust wrote:
> On Mon, 2023-04-17 at 14:51 -0700, dai.ngo@oracle.com wrote:
>> On 4/17/23 2:48 PM, Trond Myklebust wrote:
>>> On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
>>>>> On Apr 17, 2023, at 4:49 PM, Trond Myklebust
>>>>> <trondmy@hammerspace.com> wrote:
>>>>>
>>>>> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>>>>>> Currently call_bind_status places a hard limit of 9 seconds
>>>>>> for
>>>>>> retries
>>>>>> on EACCES error. This limit was done to prevent the RPC
>>>>>> request
>>>>>> from
>>>>>> being retried forever if the remote server has problem and
>>>>>> never
>>>>>> comes
>>>>>> up
>>>>>>
>>>>>> However this 9 seconds timeout is too short, comparing to
>>>>>> other
>>>>>> RPC
>>>>>> timeouts which are generally in minutes. This causes
>>>>>> intermittent
>>>>>> failure
>>>>>> with EIO on the client side when there are lots of NLM
>>>>>> activity
>>>>>> and
>>>>>> the
>>>>>> NFS server is restarted.
>>>>>>
>>>>>> Instead of removing the max timeout for retry and relying on
>>>>>> the
>>>>>> RPC
>>>>>> timeout mechanism to handle the retry, which can lead to the
>>>>>> RPC
>>>>>> being
>>>>>> retried forever if the remote NLM service fails to come up.
>>>>>> This
>>>>>> patch
>>>>>> simply increases the max timeout of call_bind_status from 9
>>>>>> to 90
>>>>>> seconds
>>>>>> which should allow enough time for NLM to register after a
>>>>>> restart,
>>>>>> and
>>>>>> not retrying forever if there is real problem with the remote
>>>>>> system.
>>>>>>
>>>>>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
>>>>>> requests")
>>>>>> Reported-by: Helen Chao <helen.chao@oracle.com>
>>>>>> Tested-by: Helen Chao <helen.chao@oracle.com>
>>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>>> ---
>>>>>>    include/linux/sunrpc/clnt.h  | 3 +++
>>>>>>    include/linux/sunrpc/sched.h | 4 ++--
>>>>>>    net/sunrpc/clnt.c            | 2 +-
>>>>>>    net/sunrpc/sched.c           | 3 ++-
>>>>>>    4 files changed, 8 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/include/linux/sunrpc/clnt.h
>>>>>> b/include/linux/sunrpc/clnt.h
>>>>>> index 770ef2cb5775..81afc5ea2665 100644
>>>>>> --- a/include/linux/sunrpc/clnt.h
>>>>>> +++ b/include/linux/sunrpc/clnt.h
>>>>>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>>>>>    #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>>>>>>    #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>>>>>>    
>>>>>> +#define        RPC_CLNT_REBIND_DELAY           3
>>>>>> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
>>>>>> +
>>>>>>    struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>>>>>    struct rpc_clnt        *rpc_bind_new_program(struct
>>>>>> rpc_clnt *,
>>>>>>                                   const struct rpc_program *,
>>>>>> u32);
>>>>>> diff --git a/include/linux/sunrpc/sched.h
>>>>>> b/include/linux/sunrpc/sched.h
>>>>>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>>>>>> --- a/include/linux/sunrpc/sched.h
>>>>>> +++ b/include/linux/sunrpc/sched.h
>>>>>> @@ -90,8 +90,8 @@ struct rpc_task {
>>>>>>    #endif
>>>>>>           unsigned char           tk_priority : 2,/* Task
>>>>>> priority
>>>>>> */
>>>>>>                                   tk_garb_retry : 2,
>>>>>> -                               tk_cred_retry : 2,
>>>>>> -                               tk_rebind_retry : 2;
>>>>>> +                               tk_cred_retry : 2;
>>>>>> +       unsigned char           tk_rebind_retry;
>>>>>>    };
>>>>>>    
>>>>>>    typedef void                   (*rpc_action)(struct
>>>>>> rpc_task *);
>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>>>> index fd7e1c630493..222578af6b01 100644
>>>>>> --- a/net/sunrpc/clnt.c
>>>>>> +++ b/net/sunrpc/clnt.c
>>>>>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>>>>>                   if (task->tk_rebind_retry == 0)
>>>>>>                           break;
>>>>>>                   task->tk_rebind_retry--;
>>>>>> -               rpc_delay(task, 3*HZ);
>>>>>> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>>>>>                   goto retry_timeout;
>>>>>>           case -ENOBUFS:
>>>>>>                   rpc_delay(task, HZ >> 2);
>>>>>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>>>>>> index be587a308e05..5c18a35752aa 100644
>>>>>> --- a/net/sunrpc/sched.c
>>>>>> +++ b/net/sunrpc/sched.c
>>>>>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
>>>>>> *task)
>>>>>>           /* Initialize retry counters */
>>>>>>           task->tk_garb_retry = 2;
>>>>>>           task->tk_cred_retry = 2;
>>>>>> -       task->tk_rebind_retry = 2;
>>>>>> +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>>>>>> +
>>>>>> RPC_CLNT_REBIND_DELAY;
>>>>> Why not just implement an exponential back off? If the server
>>>>> is
>>>>> slow
>>>>> to come up, then pounding the rpcbind service every 3 seconds
>>>>> isn't
>>>>> going to help.
>>>> Exponential backoff has the disadvantage that once we've gotten
>>>> to the longer retry times, it's easy for a client to wait quite
>>>> some time before it notices the rebind service is available.
>>>>
>>>> But I have to wonder if retrying every 3 seconds is useful if
>>>> the request is going over TCP.
>>>>
>>> The problem isn't reliability: this is handling a case where we
>>> _are_
>>> getting a reply from the server, just not one we wanted. EACCES
>>> here
>>> means that the rpcbind server didn't return a valid entry for the
>>> service we requested, presumably because the service hasn't been
>>> registered yet.
>> That's correct.
>>
>>> So yes, an exponential back off is appropriate here.
>> I think Chuck's point is still valid. It makes the client a little
>> more
>> responsive; does not have to wait that long, and the overhead of a
>> RPC
>> request every 3 seconds is not that significant.
> It is when you do it 30 times before giving up.

This is true for a dead NFS server, we save 25 RPC calls in a period
of ~90 seconds.

In the case of a head failover and the NFS service on the takeover
head can take up to 15 seconds to restart to pick up the new exports
from the fail head, then the client can potentially wait up to ~20
seconds, after the NFS server is ready, to resume normal operation.
IMHO, it's a 'long' time and I'd prioritize speed over saving some
overhead.

Having said that, I will make the change that Trond suggested if
Chuck and Jeff don't have any objection.

-Dai

>
Chuck Lever April 18, 2023, 12:06 a.m. UTC | #12
> On Apr 17, 2023, at 6:10 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Mon, 2023-04-17 at 21:53 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Apr 17, 2023, at 5:48 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>> 
>>> On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
>>>> 
>>>> 
>>>>> On Apr 17, 2023, at 4:49 PM, Trond Myklebust
>>>>> <trondmy@hammerspace.com> wrote:
>>>>> 
>>>>> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>>>>>> Currently call_bind_status places a hard limit of 9 seconds
>>>>>> for
>>>>>> retries
>>>>>> on EACCES error. This limit was done to prevent the RPC
>>>>>> request
>>>>>> from
>>>>>> being retried forever if the remote server has problem and
>>>>>> never
>>>>>> comes
>>>>>> up
>>>>>> 
>>>>>> However this 9 seconds timeout is too short, comparing to
>>>>>> other
>>>>>> RPC
>>>>>> timeouts which are generally in minutes. This causes
>>>>>> intermittent
>>>>>> failure
>>>>>> with EIO on the client side when there are lots of NLM
>>>>>> activity
>>>>>> and
>>>>>> the
>>>>>> NFS server is restarted.
>>>>>> 
>>>>>> Instead of removing the max timeout for retry and relying on
>>>>>> the
>>>>>> RPC
>>>>>> timeout mechanism to handle the retry, which can lead to the
>>>>>> RPC
>>>>>> being
>>>>>> retried forever if the remote NLM service fails to come up.
>>>>>> This
>>>>>> patch
>>>>>> simply increases the max timeout of call_bind_status from 9
>>>>>> to 90
>>>>>> seconds
>>>>>> which should allow enough time for NLM to register after a
>>>>>> restart,
>>>>>> and
>>>>>> not retrying forever if there is real problem with the remote
>>>>>> system.
>>>>>> 
>>>>>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock
>>>>>> requests")
>>>>>> Reported-by: Helen Chao <helen.chao@oracle.com>
>>>>>> Tested-by: Helen Chao <helen.chao@oracle.com>
>>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>>> ---
>>>>>>  include/linux/sunrpc/clnt.h  | 3 +++
>>>>>>  include/linux/sunrpc/sched.h | 4 ++--
>>>>>>  net/sunrpc/clnt.c            | 2 +-
>>>>>>  net/sunrpc/sched.c           | 3 ++-
>>>>>>  4 files changed, 8 insertions(+), 4 deletions(-)
>>>>>> 
>>>>>> diff --git a/include/linux/sunrpc/clnt.h
>>>>>> b/include/linux/sunrpc/clnt.h
>>>>>> index 770ef2cb5775..81afc5ea2665 100644
>>>>>> --- a/include/linux/sunrpc/clnt.h
>>>>>> +++ b/include/linux/sunrpc/clnt.h
>>>>>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>>>>>  #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>>>>>>  #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>>>>>>  
>>>>>> +#define        RPC_CLNT_REBIND_DELAY           3
>>>>>> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
>>>>>> +
>>>>>>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>>>>>  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt
>>>>>> *,
>>>>>>                                 const struct rpc_program *,
>>>>>> u32);
>>>>>> diff --git a/include/linux/sunrpc/sched.h
>>>>>> b/include/linux/sunrpc/sched.h
>>>>>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>>>>>> --- a/include/linux/sunrpc/sched.h
>>>>>> +++ b/include/linux/sunrpc/sched.h
>>>>>> @@ -90,8 +90,8 @@ struct rpc_task {
>>>>>>  #endif
>>>>>>         unsigned char           tk_priority : 2,/* Task
>>>>>> priority
>>>>>> */
>>>>>>                                 tk_garb_retry : 2,
>>>>>> -                               tk_cred_retry : 2,
>>>>>> -                               tk_rebind_retry : 2;
>>>>>> +                               tk_cred_retry : 2;
>>>>>> +       unsigned char           tk_rebind_retry;
>>>>>>  };
>>>>>>  
>>>>>>  typedef void                   (*rpc_action)(struct rpc_task
>>>>>> *);
>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>>>> index fd7e1c630493..222578af6b01 100644
>>>>>> --- a/net/sunrpc/clnt.c
>>>>>> +++ b/net/sunrpc/clnt.c
>>>>>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>>>>>                 if (task->tk_rebind_retry == 0)
>>>>>>                         break;
>>>>>>                 task->tk_rebind_retry--;
>>>>>> -               rpc_delay(task, 3*HZ);
>>>>>> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>>>>>                 goto retry_timeout;
>>>>>>         case -ENOBUFS:
>>>>>>                 rpc_delay(task, HZ >> 2);
>>>>>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>>>>>> index be587a308e05..5c18a35752aa 100644
>>>>>> --- a/net/sunrpc/sched.c
>>>>>> +++ b/net/sunrpc/sched.c
>>>>>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task
>>>>>> *task)
>>>>>>         /* Initialize retry counters */
>>>>>>         task->tk_garb_retry = 2;
>>>>>>         task->tk_cred_retry = 2;
>>>>>> -       task->tk_rebind_retry = 2;
>>>>>> +       task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>>>>>> +                                      
>>>>>> RPC_CLNT_REBIND_DELAY;
>>>>> 
>>>>> Why not just implement an exponential back off? If the server
>>>>> is
>>>>> slow
>>>>> to come up, then pounding the rpcbind service every 3 seconds
>>>>> isn't
>>>>> going to help.
>>>> 
>>>> Exponential backoff has the disadvantage that once we've gotten
>>>> to the longer retry times, it's easy for a client to wait quite
>>>> some time before it notices the rebind service is available.
>>>> 
>>>> But I have to wonder if retrying every 3 seconds is useful if
>>>> the request is going over TCP.
>>>> 
>>> 
>>> The problem isn't reliability: this is handling a case where we
>>> _are_
>>> getting a reply from the server, just not one we wanted. EACCES
>>> here
>>> means that the rpcbind server didn't return a valid entry for the
>>> service we requested, presumably because the service hasn't been
>>> registered yet.
>>> 
>>> So yes, an exponential back off is appropriate here.
>> 
>> OK, rpcbind is responding, but the registered service is not
>> ready yet.
>> 
>> But you've missed my point: exponential backoff is not desirable
>> if we want clients to recover quickly from a service restart.
>> 
>> If we keep the longest retry time short enough to keep recovery
>> responsive, it won't be all that much longer than 3 seconds.
>> Say, it might be 10 seconds, or 15 seconds. I don't see any
>> value in complicating the client's logic for that. Just make
>> RPC_CLNT_REBIND_DELAY longer.
>> 
> 
> What's so complicated about exponential back off?

I didn't claim that exponential backoff is /impossibly/
complicated. My point is that it is /unnecessarily/
complicated.

That extra bit of logic there doesn't buy you anything...
and the worst case behavior of exp. backoff is noticeably
worse than a fixed backoff, as Dai has pointed out twice
already.


> delay = RPC_CLNT_REBIND_DELAY * (1U << nretries);
> 
> ...and then limit nretries to take values between 0 and 4. That gives
> you a 93 second total wait, with the largest wait between retries being
> 48 seconds.

48 seconds is actually quite a long time. I know some
customers who would find that amount of recovery time
to be unacceptable.

This is not a hill I care to die on, however. Dai should
implement whatever you prefer. Thanks for your input.


--
Chuck Lever
Trond Myklebust April 18, 2023, 12:23 a.m. UTC | #13
On Mon, 2023-04-17 at 16:14 -0700, dai.ngo@oracle.com wrote:
> 
> On 4/17/23 2:53 PM, Trond Myklebust wrote:
> > On Mon, 2023-04-17 at 14:51 -0700, dai.ngo@oracle.com wrote:
> > > On 4/17/23 2:48 PM, Trond Myklebust wrote:
> > > > On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
> > > > > > On Apr 17, 2023, at 4:49 PM, Trond Myklebust
> > > > > > <trondmy@hammerspace.com> wrote:
> > > > > > 
> > > > > > On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
> > > > > > > Currently call_bind_status places a hard limit of 9
> > > > > > > seconds
> > > > > > > for
> > > > > > > retries
> > > > > > > on EACCES error. This limit was done to prevent the RPC
> > > > > > > request
> > > > > > > from
> > > > > > > being retried forever if the remote server has problem
> > > > > > > and
> > > > > > > never
> > > > > > > comes
> > > > > > > up
> > > > > > > 
> > > > > > > However this 9 seconds timeout is too short, comparing to
> > > > > > > other
> > > > > > > RPC
> > > > > > > timeouts which are generally in minutes. This causes
> > > > > > > intermittent
> > > > > > > failure
> > > > > > > with EIO on the client side when there are lots of NLM
> > > > > > > activity
> > > > > > > and
> > > > > > > the
> > > > > > > NFS server is restarted.
> > > > > > > 
> > > > > > > Instead of removing the max timeout for retry and relying
> > > > > > > on
> > > > > > > the
> > > > > > > RPC
> > > > > > > timeout mechanism to handle the retry, which can lead to
> > > > > > > the
> > > > > > > RPC
> > > > > > > being
> > > > > > > retried forever if the remote NLM service fails to come
> > > > > > > up.
> > > > > > > This
> > > > > > > patch
> > > > > > > simply increases the max timeout of call_bind_status from
> > > > > > > 9
> > > > > > > to 90
> > > > > > > seconds
> > > > > > > which should allow enough time for NLM to register after
> > > > > > > a
> > > > > > > restart,
> > > > > > > and
> > > > > > > not retrying forever if there is real problem with the
> > > > > > > remote
> > > > > > > system.
> > > > > > > 
> > > > > > > Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM
> > > > > > > unlock
> > > > > > > requests")
> > > > > > > Reported-by: Helen Chao <helen.chao@oracle.com>
> > > > > > > Tested-by: Helen Chao <helen.chao@oracle.com>
> > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > > > ---
> > > > > > >    include/linux/sunrpc/clnt.h  | 3 +++
> > > > > > >    include/linux/sunrpc/sched.h | 4 ++--
> > > > > > >    net/sunrpc/clnt.c            | 2 +-
> > > > > > >    net/sunrpc/sched.c           | 3 ++-
> > > > > > >    4 files changed, 8 insertions(+), 4 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/include/linux/sunrpc/clnt.h
> > > > > > > b/include/linux/sunrpc/clnt.h
> > > > > > > index 770ef2cb5775..81afc5ea2665 100644
> > > > > > > --- a/include/linux/sunrpc/clnt.h
> > > > > > > +++ b/include/linux/sunrpc/clnt.h
> > > > > > > @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
> > > > > > >    #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
> > > > > > >    #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
> > > > > > >    
> > > > > > > +#define        RPC_CLNT_REBIND_DELAY           3
> > > > > > > +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
> > > > > > > +
> > > > > > >    struct rpc_clnt *rpc_create(struct rpc_create_args
> > > > > > > *args);
> > > > > > >    struct rpc_clnt        *rpc_bind_new_program(struct
> > > > > > > rpc_clnt *,
> > > > > > >                                   const struct
> > > > > > > rpc_program *,
> > > > > > > u32);
> > > > > > > diff --git a/include/linux/sunrpc/sched.h
> > > > > > > b/include/linux/sunrpc/sched.h
> > > > > > > index b8ca3ecaf8d7..e9dc142f10bb 100644
> > > > > > > --- a/include/linux/sunrpc/sched.h
> > > > > > > +++ b/include/linux/sunrpc/sched.h
> > > > > > > @@ -90,8 +90,8 @@ struct rpc_task {
> > > > > > >    #endif
> > > > > > >           unsigned char           tk_priority : 2,/* Task
> > > > > > > priority
> > > > > > > */
> > > > > > >                                   tk_garb_retry : 2,
> > > > > > > -                               tk_cred_retry : 2,
> > > > > > > -                               tk_rebind_retry : 2;
> > > > > > > +                               tk_cred_retry : 2;
> > > > > > > +       unsigned char           tk_rebind_retry;
> > > > > > >    };
> > > > > > >    
> > > > > > >    typedef void                   (*rpc_action)(struct
> > > > > > > rpc_task *);
> > > > > > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > > > > > index fd7e1c630493..222578af6b01 100644
> > > > > > > --- a/net/sunrpc/clnt.c
> > > > > > > +++ b/net/sunrpc/clnt.c
> > > > > > > @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task
> > > > > > > *task)
> > > > > > >                   if (task->tk_rebind_retry == 0)
> > > > > > >                           break;
> > > > > > >                   task->tk_rebind_retry--;
> > > > > > > -               rpc_delay(task, 3*HZ);
> > > > > > > +               rpc_delay(task, RPC_CLNT_REBIND_DELAY *
> > > > > > > HZ);
> > > > > > >                   goto retry_timeout;
> > > > > > >           case -ENOBUFS:
> > > > > > >                   rpc_delay(task, HZ >> 2);
> > > > > > > diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> > > > > > > index be587a308e05..5c18a35752aa 100644
> > > > > > > --- a/net/sunrpc/sched.c
> > > > > > > +++ b/net/sunrpc/sched.c
> > > > > > > @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct
> > > > > > > rpc_task
> > > > > > > *task)
> > > > > > >           /* Initialize retry counters */
> > > > > > >           task->tk_garb_retry = 2;
> > > > > > >           task->tk_cred_retry = 2;
> > > > > > > -       task->tk_rebind_retry = 2;
> > > > > > > +       task->tk_rebind_retry =
> > > > > > > RPC_CLNT_REBIND_MAX_TIMEOUT /
> > > > > > > +
> > > > > > > RPC_CLNT_REBIND_DELAY;
> > > > > > Why not just implement an exponential back off? If the
> > > > > > server
> > > > > > is
> > > > > > slow
> > > > > > to come up, then pounding the rpcbind service every 3
> > > > > > seconds
> > > > > > isn't
> > > > > > going to help.
> > > > > Exponential backoff has the disadvantage that once we've
> > > > > gotten
> > > > > to the longer retry times, it's easy for a client to wait
> > > > > quite
> > > > > some time before it notices the rebind service is available.
> > > > > 
> > > > > But I have to wonder if retrying every 3 seconds is useful if
> > > > > the request is going over TCP.
> > > > > 
> > > > The problem isn't reliability: this is handling a case where we
> > > > _are_
> > > > getting a reply from the server, just not one we wanted. EACCES
> > > > here
> > > > means that the rpcbind server didn't return a valid entry for
> > > > the
> > > > service we requested, presumably because the service hasn't
> > > > been
> > > > registered yet.
> > > That's correct.
> > > 
> > > > So yes, an exponential back off is appropriate here.
> > > I think Chuck's point is still valid. It makes the client a
> > > little
> > > more
> > > responsive; does not have to wait that long, and the overhead of
> > > a
> > > RPC
> > > request every 3 seconds is not that significant.
> > It is when you do it 30 times before giving up.
> 
> This is true for a dead NFS server, we save 25 RPC calls in a period
> of ~90 seconds.
> 
> In the case of a head failover and the NFS service on the takeover
> head can take up to 15 seconds to restart to pick up the new exports
> from the fail head, then the client can potentially wait up to ~20
> seconds, after the NFS server is ready, to resume normal operation.
> IMHO, it's a 'long' time and I'd prioritize speed over saving some
> overhead.
> 

task->tk_rebind_retry is _only_ changed if the rpcbind server is up and
running, and returns an empty reply because the service we're looking
up isn't registered.
task->tk_rebind_retry isn't changed on any request timeout. It isn't
changed on any connection failure. It isn't changed by any other code
path in the RPC client.

So none of this applies to the case of a dead server.

It applies to the case of a live server, where rpcbind is running and
accessible to the client and where, for some reason or another, it is
taking an exceptionally long time to register the service we are
looking up the port for (either NLM or NFSv3).

So where are you seeing this process take 90 seconds? Why do we need to
wait for that long before we can finally conclude that the particular
service in question is not going to come back up?
Dai Ngo April 18, 2023, 1:04 a.m. UTC | #14
On 4/17/23 5:23 PM, Trond Myklebust wrote:
> On Mon, 2023-04-17 at 16:14 -0700, dai.ngo@oracle.com wrote:
>> On 4/17/23 2:53 PM, Trond Myklebust wrote:
>>> On Mon, 2023-04-17 at 14:51 -0700, dai.ngo@oracle.com wrote:
>>>> On 4/17/23 2:48 PM, Trond Myklebust wrote:
>>>>> On Mon, 2023-04-17 at 21:41 +0000, Chuck Lever III wrote:
>>>>>>> On Apr 17, 2023, at 4:49 PM, Trond Myklebust
>>>>>>> <trondmy@hammerspace.com> wrote:
>>>>>>>
>>>>>>> On Fri, 2023-04-07 at 20:30 -0700, Dai Ngo wrote:
>>>>>>>> Currently call_bind_status places a hard limit of 9
>>>>>>>> seconds
>>>>>>>> for
>>>>>>>> retries
>>>>>>>> on EACCES error. This limit was done to prevent the RPC
>>>>>>>> request
>>>>>>>> from
>>>>>>>> being retried forever if the remote server has problem
>>>>>>>> and
>>>>>>>> never
>>>>>>>> comes
>>>>>>>> up
>>>>>>>>
>>>>>>>> However this 9 seconds timeout is too short, comparing to
>>>>>>>> other
>>>>>>>> RPC
>>>>>>>> timeouts which are generally in minutes. This causes
>>>>>>>> intermittent
>>>>>>>> failure
>>>>>>>> with EIO on the client side when there are lots of NLM
>>>>>>>> activity
>>>>>>>> and
>>>>>>>> the
>>>>>>>> NFS server is restarted.
>>>>>>>>
>>>>>>>> Instead of removing the max timeout for retry and relying
>>>>>>>> on
>>>>>>>> the
>>>>>>>> RPC
>>>>>>>> timeout mechanism to handle the retry, which can lead to
>>>>>>>> the
>>>>>>>> RPC
>>>>>>>> being
>>>>>>>> retried forever if the remote NLM service fails to come
>>>>>>>> up.
>>>>>>>> This
>>>>>>>> patch
>>>>>>>> simply increases the max timeout of call_bind_status from
>>>>>>>> 9
>>>>>>>> to 90
>>>>>>>> seconds
>>>>>>>> which should allow enough time for NLM to register after
>>>>>>>> a
>>>>>>>> restart,
>>>>>>>> and
>>>>>>>> not retrying forever if there is real problem with the
>>>>>>>> remote
>>>>>>>> system.
>>>>>>>>
>>>>>>>> Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM
>>>>>>>> unlock
>>>>>>>> requests")
>>>>>>>> Reported-by: Helen Chao <helen.chao@oracle.com>
>>>>>>>> Tested-by: Helen Chao <helen.chao@oracle.com>
>>>>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>>>>> ---
>>>>>>>>     include/linux/sunrpc/clnt.h  | 3 +++
>>>>>>>>     include/linux/sunrpc/sched.h | 4 ++--
>>>>>>>>     net/sunrpc/clnt.c            | 2 +-
>>>>>>>>     net/sunrpc/sched.c           | 3 ++-
>>>>>>>>     4 files changed, 8 insertions(+), 4 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/include/linux/sunrpc/clnt.h
>>>>>>>> b/include/linux/sunrpc/clnt.h
>>>>>>>> index 770ef2cb5775..81afc5ea2665 100644
>>>>>>>> --- a/include/linux/sunrpc/clnt.h
>>>>>>>> +++ b/include/linux/sunrpc/clnt.h
>>>>>>>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>>>>>>>     #define RPC_CLNT_CREATE_REUSEPORT      (1UL << 11)
>>>>>>>>     #define RPC_CLNT_CREATE_CONNECTED      (1UL << 12)
>>>>>>>>     
>>>>>>>> +#define        RPC_CLNT_REBIND_DELAY           3
>>>>>>>> +#define        RPC_CLNT_REBIND_MAX_TIMEOUT     90
>>>>>>>> +
>>>>>>>>     struct rpc_clnt *rpc_create(struct rpc_create_args
>>>>>>>> *args);
>>>>>>>>     struct rpc_clnt        *rpc_bind_new_program(struct
>>>>>>>> rpc_clnt *,
>>>>>>>>                                    const struct
>>>>>>>> rpc_program *,
>>>>>>>> u32);
>>>>>>>> diff --git a/include/linux/sunrpc/sched.h
>>>>>>>> b/include/linux/sunrpc/sched.h
>>>>>>>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>>>>>>>> --- a/include/linux/sunrpc/sched.h
>>>>>>>> +++ b/include/linux/sunrpc/sched.h
>>>>>>>> @@ -90,8 +90,8 @@ struct rpc_task {
>>>>>>>>     #endif
>>>>>>>>            unsigned char           tk_priority : 2,/* Task
>>>>>>>> priority
>>>>>>>> */
>>>>>>>>                                    tk_garb_retry : 2,
>>>>>>>> -                               tk_cred_retry : 2,
>>>>>>>> -                               tk_rebind_retry : 2;
>>>>>>>> +                               tk_cred_retry : 2;
>>>>>>>> +       unsigned char           tk_rebind_retry;
>>>>>>>>     };
>>>>>>>>     
>>>>>>>>     typedef void                   (*rpc_action)(struct
>>>>>>>> rpc_task *);
>>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>>>>>> index fd7e1c630493..222578af6b01 100644
>>>>>>>> --- a/net/sunrpc/clnt.c
>>>>>>>> +++ b/net/sunrpc/clnt.c
>>>>>>>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task
>>>>>>>> *task)
>>>>>>>>                    if (task->tk_rebind_retry == 0)
>>>>>>>>                            break;
>>>>>>>>                    task->tk_rebind_retry--;
>>>>>>>> -               rpc_delay(task, 3*HZ);
>>>>>>>> +               rpc_delay(task, RPC_CLNT_REBIND_DELAY *
>>>>>>>> HZ);
>>>>>>>>                    goto retry_timeout;
>>>>>>>>            case -ENOBUFS:
>>>>>>>>                    rpc_delay(task, HZ >> 2);
>>>>>>>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>>>>>>>> index be587a308e05..5c18a35752aa 100644
>>>>>>>> --- a/net/sunrpc/sched.c
>>>>>>>> +++ b/net/sunrpc/sched.c
>>>>>>>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct
>>>>>>>> rpc_task
>>>>>>>> *task)
>>>>>>>>            /* Initialize retry counters */
>>>>>>>>            task->tk_garb_retry = 2;
>>>>>>>>            task->tk_cred_retry = 2;
>>>>>>>> -       task->tk_rebind_retry = 2;
>>>>>>>> +       task->tk_rebind_retry =
>>>>>>>> RPC_CLNT_REBIND_MAX_TIMEOUT /
>>>>>>>> +
>>>>>>>> RPC_CLNT_REBIND_DELAY;
>>>>>>> Why not just implement an exponential back off? If the
>>>>>>> server
>>>>>>> is
>>>>>>> slow
>>>>>>> to come up, then pounding the rpcbind service every 3
>>>>>>> seconds
>>>>>>> isn't
>>>>>>> going to help.
>>>>>> Exponential backoff has the disadvantage that once we've
>>>>>> gotten
>>>>>> to the longer retry times, it's easy for a client to wait
>>>>>> quite
>>>>>> some time before it notices the rebind service is available.
>>>>>>
>>>>>> But I have to wonder if retrying every 3 seconds is useful if
>>>>>> the request is going over TCP.
>>>>>>
>>>>> The problem isn't reliability: this is handling a case where we
>>>>> _are_
>>>>> getting a reply from the server, just not one we wanted. EACCES
>>>>> here
>>>>> means that the rpcbind server didn't return a valid entry for
>>>>> the
>>>>> service we requested, presumably because the service hasn't
>>>>> been
>>>>> registered yet.
>>>> That's correct.
>>>>
>>>>> So yes, an exponential back off is appropriate here.
>>>> I think Chuck's point is still valid. It makes the client a
>>>> little
>>>> more
>>>> responsive; does not have to wait that long, and the overhead of
>>>> a
>>>> RPC
>>>> request every 3 seconds is not that significant.
>>> It is when you do it 30 times before giving up.
>> This is true for a dead NFS server, we save 25 RPC calls in a period
>> of ~90 seconds.
>>
>> In the case of a head failover and the NFS service on the takeover
>> head can take up to 15 seconds to restart to pick up the new exports
>> from the fail head, then the client can potentially wait up to ~20
>> seconds, after the NFS server is ready, to resume normal operation.
>> IMHO, it's a 'long' time and I'd prioritize speed over saving some
>> overhead.
>>
> task->tk_rebind_retry is _only_ changed if the rpcbind server is up and
> running, and returns an empty reply because the service we're looking
> up isn't registered.
> task->tk_rebind_retry isn't changed on any request timeout. It isn't
> changed on any connection failure. It isn't changed by any other code
> path in the RPC client.
>
> So none of this applies to the case of a dead server.

Sorry if I'm not clear. What I meant by a dead server is a dead NFS
server and not rpcbind service. So in this case we get EACCES from
rpcbind and we retry.

>
> It applies to the case of a live server, where rpcbind is running and
> accessible to the client and where, for some reason or another, it is
> taking an exceptionally long time to register the service we are
> looking up the port for (either NLM or NFSv3).

Yes, this is the problem that I'm facing.

>
> So where are you seeing this process take 90 seconds? Why do we need to
> wait for that long before we can finally conclude that the particular
> service in question is not going to come back up?

90 secs wait is for when the NFS server never come up and we keep getting
EACCES from rpcbind for this whole time.

-Dai
Trond Myklebust April 18, 2023, 4:02 p.m. UTC | #15
On Mon, 2023-04-17 at 18:04 -0700, dai.ngo@oracle.com wrote:
> 
> On 4/17/23 5:23 PM, Trond Myklebust wrote:
> > task->tk_rebind_retry is _only_ changed if the rpcbind server is up
> > and
> > running, and returns an empty reply because the service we're
> > looking
> > up isn't registered.
> > task->tk_rebind_retry isn't changed on any request timeout. It
> > isn't
> > changed on any connection failure. It isn't changed by any other
> > code
> > path in the RPC client.
> > 
> > So none of this applies to the case of a dead server.
> 
> Sorry if I'm not clear. What I meant by a dead server is a dead NFS
> server and not rpcbind service. So in this case we get EACCES from
> rpcbind and we retry.
> 
> > 
> > It applies to the case of a live server, where rpcbind is running
> > and
> > accessible to the client and where, for some reason or another, it
> > is
> > taking an exceptionally long time to register the service we are
> > looking up the port for (either NLM or NFSv3).
> 
> Yes, this is the problem that I'm facing.
> 
> > 
> > So where are you seeing this process take 90 seconds? Why do we
> > need to
> > wait for that long before we can finally conclude that the
> > particular
> > service in question is not going to come back up?
> 
> 90 secs wait is for when the NFS server never come up and we keep
> getting
> EACCES from rpcbind for this whole time.
> 

OK, so the 90s is completely arbitrary then, and was only chosen
because it fits your particular server?

To me, that appears to invalidate the entire premise of commit
0b760113a3a1 that we can rely on rpcbind to tell us if the service is
present or not.
In that case, I'd rather rip out the task->tk_rebind_retry counter, and
just rely on standard hard/soft task semantics.
Dai Ngo April 18, 2023, 5:40 p.m. UTC | #16
On 4/18/23 9:02 AM, Trond Myklebust wrote:
> On Mon, 2023-04-17 at 18:04 -0700, dai.ngo@oracle.com wrote:
>> On 4/17/23 5:23 PM, Trond Myklebust wrote:
>>> task->tk_rebind_retry is _only_ changed if the rpcbind server is up
>>> and
>>> running, and returns an empty reply because the service we're
>>> looking
>>> up isn't registered.
>>> task->tk_rebind_retry isn't changed on any request timeout. It
>>> isn't
>>> changed on any connection failure. It isn't changed by any other
>>> code
>>> path in the RPC client.
>>>
>>> So none of this applies to the case of a dead server.
>> Sorry if I'm not clear. What I meant by a dead server is a dead NFS
>> server and not rpcbind service. So in this case we get EACCES from
>> rpcbind and we retry.
>>
>>> It applies to the case of a live server, where rpcbind is running
>>> and
>>> accessible to the client and where, for some reason or another, it
>>> is
>>> taking an exceptionally long time to register the service we are
>>> looking up the port for (either NLM or NFSv3).
>> Yes, this is the problem that I'm facing.
>>
>>> So where are you seeing this process take 90 seconds? Why do we
>>> need to
>>> wait for that long before we can finally conclude that the
>>> particular
>>> service in question is not going to come back up?
>> 90 secs wait is for when the NFS server never come up and we keep
>> getting
>> EACCES from rpcbind for this whole time.
>>
> OK, so the 90s is completely arbitrary then, and was only chosen
> because it fits your particular server?
>
> To me, that appears to invalidate the entire premise of commit
> 0b760113a3a1 that we can rely on rpcbind to tell us if the service is
> present or not.
> In that case, I'd rather rip out the task->tk_rebind_retry counter, and
> just rely on standard hard/soft task semantics.

Okay, I will resend the patch that relies on standard hard/soft task semantics.

Thanks,
-Dai
diff mbox series

Patch

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 770ef2cb5775..81afc5ea2665 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -162,6 +162,9 @@  struct rpc_add_xprt_test {
 #define RPC_CLNT_CREATE_REUSEPORT	(1UL << 11)
 #define RPC_CLNT_CREATE_CONNECTED	(1UL << 12)
 
+#define	RPC_CLNT_REBIND_DELAY		3
+#define	RPC_CLNT_REBIND_MAX_TIMEOUT	90
+
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				const struct rpc_program *, u32);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index b8ca3ecaf8d7..e9dc142f10bb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -90,8 +90,8 @@  struct rpc_task {
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
-				tk_cred_retry : 2,
-				tk_rebind_retry : 2;
+				tk_cred_retry : 2;
+	unsigned char		tk_rebind_retry;
 };
 
 typedef void			(*rpc_action)(struct rpc_task *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fd7e1c630493..222578af6b01 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2053,7 +2053,7 @@  call_bind_status(struct rpc_task *task)
 		if (task->tk_rebind_retry == 0)
 			break;
 		task->tk_rebind_retry--;
-		rpc_delay(task, 3*HZ);
+		rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
 		goto retry_timeout;
 	case -ENOBUFS:
 		rpc_delay(task, HZ >> 2);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index be587a308e05..5c18a35752aa 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -817,7 +817,8 @@  rpc_init_task_statistics(struct rpc_task *task)
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
 	task->tk_cred_retry = 2;
-	task->tk_rebind_retry = 2;
+	task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
+					RPC_CLNT_REBIND_DELAY;
 
 	/* starting timestamp */
 	task->tk_start = ktime_get();