diff mbox series

[v3,1/2] SUNRPC: Fixup v4.1 backchannel request timeouts

Message ID e28038fba1243f00b0dd66b7c5296a1e181645ea.1702496910.git.bcodding@redhat.com (mailing list archive)
State New
Headers show
Series [v3,1/2] SUNRPC: Fixup v4.1 backchannel request timeouts | expand

Commit Message

Benjamin Coddington Dec. 13, 2023, 7:49 p.m. UTC
After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on
the sending list"), any 4.1 backchannel tasks placed on the sending queue
would immediately return with -ETIMEDOUT since their req timers are zero.

Initialize the backchannel's rpc_rqst timeout parameters from the xprt's
default timeout settings.

Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on the sending list")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 net/sunrpc/xprt.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

Comments

Jeffrey Layton Jan. 4, 2024, 1:37 p.m. UTC | #1
On Wed, 2023-12-13 at 14:49 -0500, Benjamin Coddington wrote:
> After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on
> the sending list"), any 4.1 backchannel tasks placed on the sending queue
> would immediately return with -ETIMEDOUT since their req timers are zero.
> 
> Initialize the backchannel's rpc_rqst timeout parameters from the xprt's
> default timeout settings.
> 
> Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on the sending list")
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  net/sunrpc/xprt.c | 23 ++++++++++++++---------
>  1 file changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 2364c485540c..6cc9ffac962d 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
>  		jiffies + nsecs_to_jiffies(-delta);
>  }
>  
> -static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
> +static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req,
> +		const struct rpc_timeout *to)
>  {
> -	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
>  	unsigned long majortimeo = req->rq_timeout;
>  
>  	if (to->to_exponential)
> @@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
>  	return majortimeo;
>  }
>  
> -static void xprt_reset_majortimeo(struct rpc_rqst *req)
> +static void xprt_reset_majortimeo(struct rpc_rqst *req,
> +		const struct rpc_timeout *to)
>  {
> -	req->rq_majortimeo += xprt_calc_majortimeo(req);
> +	req->rq_majortimeo += xprt_calc_majortimeo(req, to);
>  }
>  
>  static void xprt_reset_minortimeo(struct rpc_rqst *req)
> @@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req)
>  	req->rq_minortimeo += req->rq_timeout;
>  }
>  
> -static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
> +static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req,
> +		const struct rpc_timeout *to)
>  {
>  	unsigned long time_init;
>  	struct rpc_xprt *xprt = req->rq_xprt;
> @@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
>  		time_init = jiffies;
>  	else
>  		time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
> -	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
> -	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
> +
> +	req->rq_timeout = to->to_initval;
> +	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to);
>  	req->rq_minortimeo = time_init + req->rq_timeout;
>  }
>  
> @@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
>  	} else {
>  		req->rq_timeout = to->to_initval;
>  		req->rq_retries = 0;
> -		xprt_reset_majortimeo(req);
> +		xprt_reset_majortimeo(req, to);
>  		/* Reset the RTT counters == "slow start" */
>  		spin_lock(&xprt->transport_lock);
>  		rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
> @@ -1886,7 +1889,7 @@ xprt_request_init(struct rpc_task *task)
>  	req->rq_snd_buf.bvec = NULL;
>  	req->rq_rcv_buf.bvec = NULL;
>  	req->rq_release_snd_buf = NULL;
> -	xprt_init_majortimeo(task, req);
> +	xprt_init_majortimeo(task, req, task->tk_client->cl_timeout);
>  
>  	trace_xprt_reserve(req);
>  }
> @@ -1996,6 +1999,8 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
>  	 */
>  	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
>  		xbufp->tail[0].iov_len;
> +
> +	xprt_init_majortimeo(task, req, req->rq_xprt->timeout);
>  }
>  #endif
>  

As I mentioned in the email thread here, I've been seeing some hangs
with v6.7-rc8 kernels when testing TLS support:

https://lore.kernel.org/linux-nfs/8C3DFB5D-B967-4D59-BFC5-7B25315DB9AB@redhat.com/T/#t

With this patchset in place, I've been unable to reproduce it.


I'm not sure if this is the right way to fix this, but this does seem to
be a real problem and I have a semi-reliable way to reproduce it if you
need me to test a fix for it.

Cheers,
diff mbox series

Patch

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2364c485540c..6cc9ffac962d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,9 +651,9 @@  static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
 		jiffies + nsecs_to_jiffies(-delta);
 }
 
-static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req,
+		const struct rpc_timeout *to)
 {
-	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
 	unsigned long majortimeo = req->rq_timeout;
 
 	if (to->to_exponential)
@@ -665,9 +665,10 @@  static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
 	return majortimeo;
 }
 
-static void xprt_reset_majortimeo(struct rpc_rqst *req)
+static void xprt_reset_majortimeo(struct rpc_rqst *req,
+		const struct rpc_timeout *to)
 {
-	req->rq_majortimeo += xprt_calc_majortimeo(req);
+	req->rq_majortimeo += xprt_calc_majortimeo(req, to);
 }
 
 static void xprt_reset_minortimeo(struct rpc_rqst *req)
@@ -675,7 +676,8 @@  static void xprt_reset_minortimeo(struct rpc_rqst *req)
 	req->rq_minortimeo += req->rq_timeout;
 }
 
-static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req,
+		const struct rpc_timeout *to)
 {
 	unsigned long time_init;
 	struct rpc_xprt *xprt = req->rq_xprt;
@@ -684,8 +686,9 @@  static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
 		time_init = jiffies;
 	else
 		time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
-	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
-	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+
+	req->rq_timeout = to->to_initval;
+	req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to);
 	req->rq_minortimeo = time_init + req->rq_timeout;
 }
 
@@ -713,7 +716,7 @@  int xprt_adjust_timeout(struct rpc_rqst *req)
 	} else {
 		req->rq_timeout = to->to_initval;
 		req->rq_retries = 0;
-		xprt_reset_majortimeo(req);
+		xprt_reset_majortimeo(req, to);
 		/* Reset the RTT counters == "slow start" */
 		spin_lock(&xprt->transport_lock);
 		rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
@@ -1886,7 +1889,7 @@  xprt_request_init(struct rpc_task *task)
 	req->rq_snd_buf.bvec = NULL;
 	req->rq_rcv_buf.bvec = NULL;
 	req->rq_release_snd_buf = NULL;
-	xprt_init_majortimeo(task, req);
+	xprt_init_majortimeo(task, req, task->tk_client->cl_timeout);
 
 	trace_xprt_reserve(req);
 }
@@ -1996,6 +1999,8 @@  xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
 	 */
 	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
 		xbufp->tail[0].iov_len;
+
+	xprt_init_majortimeo(task, req, req->rq_xprt->timeout);
 }
 #endif