[v2,1/1] SUNRPC dont update timeout value on connection reset
diff mbox series

Message ID 20200713161842.90553-1-olga.kornievskaia@gmail.com
State New
Headers show
Series
  • [v2,1/1] SUNRPC dont update timeout value on connection reset
Related show

Commit Message

Olga Kornievskaia July 13, 2020, 4:18 p.m. UTC
Current behaviour: every time a v3 operation is re-sent to the server
we update (double) the timeout. There is no distinction between whether
or not the previous timer had expired before the re-sent happened.

Here's the scenario:
1. Client sends a v3 operation
2. Server RST-s the connection (prior to the timeout) (eg., connection
is immediately reset)
3. Client re-sends a v3 operation but the timeout is now 120sec.

As a result, an application sees 2mins pause before a retry in case
server again does not reply.

Instead, this patch proposes to keep track off when the minor timeout
should happen and if it didn't, then don't update the new timeout.

v2: using the suggested approach by Trond Myklebust and not use the 
sliding timeout. 

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 include/linux/sunrpc/xprt.h | 1 +
 net/sunrpc/xprt.c           | 9 +++++++++
 2 files changed, 10 insertions(+)

Comments

Trond Myklebust July 13, 2020, 5:10 p.m. UTC | #1
Hi Olga,

On Mon, 2020-07-13 at 12:18 -0400, Olga Kornievskaia wrote:
> Current behaviour: every time a v3 operation is re-sent to the server
> we update (double) the timeout. There is no distinction between
> whether
> or not the previous timer had expired before the re-sent happened.
> 
> Here's the scenario:
> 1. Client sends a v3 operation
> 2. Server RST-s the connection (prior to the timeout) (eg.,
> connection
> is immediately reset)
> 3. Client re-sends a v3 operation but the timeout is now 120sec.
> 
> As a result, an application sees 2mins pause before a retry in case
> server again does not reply.
> 
> Instead, this patch proposes to keep track off when the minor timeout
> should happen and if it didn't, then don't update the new timeout.
> 
> v2: using the suggested approach by Trond Myklebust and not use the 
> sliding timeout. 
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  include/linux/sunrpc/xprt.h | 1 +
>  net/sunrpc/xprt.c           | 9 +++++++++
>  2 files changed, 10 insertions(+)
> 
> diff --git a/include/linux/sunrpc/xprt.h
> b/include/linux/sunrpc/xprt.h
> index e64bd82..a603d48 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -101,6 +101,7 @@ struct rpc_rqst {
>  							 * used in the
> softirq.
>  							 */
>  	unsigned long		rq_majortimeo;	/* major timeout
> alarm */
> +	unsigned long		rq_minortimeo;	/* minor timeout
> alarm */
>  	unsigned long		rq_timeout;	/* Current timeout
> value */
>  	ktime_t			rq_rtt;		/* round-trip time */
>  	unsigned int		rq_retries;	/* # of retries */
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index d5cc5db..92fc9fd 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -607,6 +607,11 @@ static void xprt_reset_majortimeo(struct
> rpc_rqst *req)
>  	req->rq_majortimeo += xprt_calc_majortimeo(req);
>  }
>  
> +static void xprt_reset_minortimeo(struct rpc_rqst *req)
> +{
> +	req->rq_minortimeo = jiffies + req->rq_timeout;

Shouldn't this just be: req->rq_minortimeo += req->rq_timeout; ?

> +}
> +
>  static void xprt_init_majortimeo(struct rpc_task *task, struct
> rpc_rqst *req)
>  {
>  	unsigned long time_init;
> @@ -618,6 +623,7 @@ static void xprt_init_majortimeo(struct rpc_task
> *task, struct rpc_rqst *req)
>  		time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
>  	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
>  	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
> +	req->rq_minortimeo = time_init + req->rq_timeout;
>  }
>  
>  /**
> @@ -631,6 +637,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
>  	const struct rpc_timeout *to = req->rq_task->tk_client-
> >cl_timeout;
>  	int status = 0;
>  
> +	if (time_before(jiffies, req->rq_minortimeo))
> +		return status;
>  	if (time_before(jiffies, req->rq_majortimeo)) {
>  		if (to->to_exponential)
>  			req->rq_timeout <<= 1;
> @@ -649,6 +657,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
>  		spin_unlock(&xprt->transport_lock);
>  		status = -ETIMEDOUT;
>  	}
> +	xprt_reset_minortimeo(req);
>  
>  	if (req->rq_timeout == 0) {
>  		printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout =
> 0!\n");

Otherwise it looks good to me.

Patch
diff mbox series

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index e64bd82..a603d48 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -101,6 +101,7 @@  struct rpc_rqst {
 							 * used in the softirq.
 							 */
 	unsigned long		rq_majortimeo;	/* major timeout alarm */
+	unsigned long		rq_minortimeo;	/* minor timeout alarm */
 	unsigned long		rq_timeout;	/* Current timeout value */
 	ktime_t			rq_rtt;		/* round-trip time */
 	unsigned int		rq_retries;	/* # of retries */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d5cc5db..92fc9fd 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -607,6 +607,11 @@  static void xprt_reset_majortimeo(struct rpc_rqst *req)
 	req->rq_majortimeo += xprt_calc_majortimeo(req);
 }
 
+static void xprt_reset_minortimeo(struct rpc_rqst *req)
+{
+	req->rq_minortimeo = jiffies + req->rq_timeout;
+}
+
 static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
 {
 	unsigned long time_init;
@@ -618,6 +623,7 @@  static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
 		time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
 	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
 	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+	req->rq_minortimeo = time_init + req->rq_timeout;
 }
 
 /**
@@ -631,6 +637,8 @@  int xprt_adjust_timeout(struct rpc_rqst *req)
 	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
 	int status = 0;
 
+	if (time_before(jiffies, req->rq_minortimeo))
+		return status;
 	if (time_before(jiffies, req->rq_majortimeo)) {
 		if (to->to_exponential)
 			req->rq_timeout <<= 1;
@@ -649,6 +657,7 @@  int xprt_adjust_timeout(struct rpc_rqst *req)
 		spin_unlock(&xprt->transport_lock);
 		status = -ETIMEDOUT;
 	}
+	xprt_reset_minortimeo(req);
 
 	if (req->rq_timeout == 0) {
 		printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");