diff mbox series

SUNRPC: Fix disconnection races

Message ID 20181217200524.105368-1-trond.myklebust@hammerspace.com (mailing list archive)
State New, archived
Headers show
Series SUNRPC: Fix disconnection races | expand

Commit Message

Trond Myklebust Dec. 17, 2018, 8:05 p.m. UTC
When the socket is closed, we need to call xprt_disconnect_done() in order
to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.

However, we also want to ensure that we don't wake them up before the socket
is closed, since that would cause thundering herd issues with everyone
piling up to retransmit before the TCP shutdown dance has completed.
Only the task that holds XPRT_LOCKED needs to wake up early in order to
allow the close to complete.

Reported-by: Dave Wysochanski <dwysocha@redhat.com>
Reported-by: Scott Mayhew <smayhew@redhat.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprt.c     |  4 +++-
 net/sunrpc/xprtsock.c | 10 ++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

Comments

Chuck Lever Dec. 17, 2018, 10:06 p.m. UTC | #1
> On Dec 17, 2018, at 3:05 PM, Trond Myklebust <trondmy@gmail.com> wrote:
> 
> When the socket is closed, we need to call xprt_disconnect_done() in order
> to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.
> 
> However, we also want to ensure that we don't wake them up before the socket
> is closed, since that would cause thundering herd issues with everyone
> piling up to retransmit before the TCP shutdown dance has completed.
> Only the task that holds XPRT_LOCKED needs to wake up early in order to
> allow the close to complete.
> 
> Reported-by: Dave Wysochanski <dwysocha@redhat.com>
> Reported-by: Scott Mayhew <smayhew@redhat.com>
> Cc: Chuck Lever <chuck.lever@oracle.com>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> net/sunrpc/xprt.c     |  4 +++-
> net/sunrpc/xprtsock.c | 10 ++++------
> 2 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index ce927002862a..089e53a87379 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
> 	/* Try to schedule an autoclose RPC call */
> 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> -	xprt_wake_pending_tasks(xprt, -EAGAIN);
> +	else if (xprt->snd_task)
> +		rpc_wake_up_queued_task_set_status(&xprt->pending,
> +				xprt->snd_task, -ENOTCONN);
> 	spin_unlock_bh(&xprt->transport_lock);
> }
> EXPORT_SYMBOL_GPL(xprt_force_disconnect);

Hrm, well I'm not sure xprt_connect_status is ready to deal with -ENOTCONN.

     kworker/2:3-2342  [002]  3706.246289: rpc_task_wakeup:      task:39992@5 flags=0001 state=001e status=-107 timeout=120000 queue=xprt_pending
     kworker/0:7-2424  [000]  3706.246296: xprtrdma_reconnect:   peer=[192.168.2.55]:20049 r_xprt=0xffff88886c465000
  kworker/u25:28-2270  [004]  3706.246321: rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-107 action=xprt_connect_status
  kworker/u25:28-2270  [004]  3706.246323: rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5 action=call_connect_status
  kworker/u25:28-2270  [004]  3706.246323: rpc_connect_status:   task:39992@5 status=-5
  kworker/u25:28-2270  [004]  3706.246324: rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5 action=rpc_exit_task
  kworker/u25:28-2270  [004]  3706.246327: rpc_stats_latency:    task:39992@5 xid=0x2f1ad350 nfsv3 READ backlog=0 rtt=0 execute=13028
  kworker/u25:28-2270  [004]  3706.246333: xprtrdma_rpc_done:    task:39992@5 req=0xffff88886a438000 rep=(nil)

static void xprt_connect_status(struct rpc_task *task)
{
        switch (task->tk_status) {
        case 0:
                dprintk("RPC: %5u xprt_connect_status: connection established\n",
                                task->tk_pid);
                break;
        case -ECONNREFUSED:
        case -ECONNRESET:
        case -ECONNABORTED:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
        case -EPIPE:
        case -EAGAIN:
                dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
                break;
        case -ETIMEDOUT:
                dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
                                "out\n", task->tk_pid);
                break;
        default:
                dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
                                "server %s\n", task->tk_pid, -task->tk_status,
                                task->tk_rqstp->rq_xprt->servername);
                task->tk_status = -EIO;
        }    
}

Maybe the wake-up status should remain -EAGAIN ?


> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 8a5e823e0b33..f0b3700cec95 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
> 
> 	trace_rpc_socket_close(xprt, sock);
> 	sock_release(sock);
> +
> +	xprt_disconnect_done(xprt);
> }
> 
> /**
> @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
> 
> 	xs_reset_transport(transport);
> 	xprt->reestablish_timeout = 0;
> -
> -	xprt_disconnect_done(xprt);
> }
> 
> static void xs_inject_disconnect(struct rpc_xprt *xprt)
> @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
> 					&transport->sock_state))
> 			xprt_clear_connecting(xprt);
> 		clear_bit(XPRT_CLOSING, &xprt->state);
> -		if (sk->sk_err)
> -			xprt_wake_pending_tasks(xprt, -sk->sk_err);
> 		/* Trigger the socket release */
> 		xs_tcp_force_close(xprt);
> 	}
> @@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
> 	trace_rpc_socket_connect(xprt, sock, 0);
> 	status = 0;
> out:
> -	xprt_unlock_connect(xprt, transport);
> 	xprt_clear_connecting(xprt);
> +	xprt_unlock_connect(xprt, transport);
> 	xprt_wake_pending_tasks(xprt, status);
> }
> 
> @@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
> 	}
> 	status = -EAGAIN;
> out:
> -	xprt_unlock_connect(xprt, transport);
> 	xprt_clear_connecting(xprt);
> +	xprt_unlock_connect(xprt, transport);
> 	xprt_wake_pending_tasks(xprt, status);
> }
> 
> -- 
> 2.19.2
> 

--
Chuck Lever
Trond Myklebust Dec. 17, 2018, 10:24 p.m. UTC | #2
On Mon, 2018-12-17 at 17:06 -0500, Chuck Lever wrote:
> > On Dec 17, 2018, at 3:05 PM, Trond Myklebust <trondmy@gmail.com>
> > wrote:
> > 
> > When the socket is closed, we need to call xprt_disconnect_done()
> > in order
> > to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping
> > tasks.
> > 
> > However, we also want to ensure that we don't wake them up before
> > the socket
> > is closed, since that would cause thundering herd issues with
> > everyone
> > piling up to retransmit before the TCP shutdown dance has
> > completed.
> > Only the task that holds XPRT_LOCKED needs to wake up early in
> > order to
> > allow the close to complete.
> > 
> > Reported-by: Dave Wysochanski <dwysocha@redhat.com>
> > Reported-by: Scott Mayhew <smayhew@redhat.com>
> > Cc: Chuck Lever <chuck.lever@oracle.com>
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> > net/sunrpc/xprt.c     |  4 +++-
> > net/sunrpc/xprtsock.c | 10 ++++------
> > 2 files changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> > index ce927002862a..089e53a87379 100644
> > --- a/net/sunrpc/xprt.c
> > +++ b/net/sunrpc/xprt.c
> > @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt
> > *xprt)
> > 	/* Try to schedule an autoclose RPC call */
> > 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> > 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> > -	xprt_wake_pending_tasks(xprt, -EAGAIN);
> > +	else if (xprt->snd_task)
> > +		rpc_wake_up_queued_task_set_status(&xprt->pending,
> > +				xprt->snd_task, -ENOTCONN);
> > 	spin_unlock_bh(&xprt->transport_lock);
> > }
> > EXPORT_SYMBOL_GPL(xprt_force_disconnect);
> 
> Hrm, well I'm not sure xprt_connect_status is ready to deal with
> -ENOTCONN.
> 
>      kworker/2:3-2342  [002]  3706.246289:
> rpc_task_wakeup:      task:39992@5 flags=0001 state=001e status=-107
> timeout=120000 queue=xprt_pending
>      kworker/0:7-2424  [000]  3706.246296:
> xprtrdma_reconnect:   peer=[192.168.2.55]:20049
> r_xprt=0xffff88886c465000
>   kworker/u25:28-2270  [004]  3706.246321:
> rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-107
> action=xprt_connect_status
>   kworker/u25:28-2270  [004]  3706.246323:
> rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5
> action=call_connect_status
>   kworker/u25:28-2270  [004]  3706.246323:
> rpc_connect_status:   task:39992@5 status=-5
>   kworker/u25:28-2270  [004]  3706.246324:
> rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5
> action=rpc_exit_task
>   kworker/u25:28-2270  [004]  3706.246327:
> rpc_stats_latency:    task:39992@5 xid=0x2f1ad350 nfsv3 READ
> backlog=0 rtt=0 execute=13028
>   kworker/u25:28-2270  [004]  3706.246333:
> xprtrdma_rpc_done:    task:39992@5 req=0xffff88886a438000 rep=(nil)
> 
> static void xprt_connect_status(struct rpc_task *task)
> {
>         switch (task->tk_status) {
>         case 0:
>                 dprintk("RPC: %5u xprt_connect_status: connection
> established\n",
>                                 task->tk_pid);
>                 break;
>         case -ECONNREFUSED:
>         case -ECONNRESET:
>         case -ECONNABORTED:
>         case -ENETUNREACH:
>         case -EHOSTUNREACH:
>         case -EPIPE:
>         case -EAGAIN:
>                 dprintk("RPC: %5u xprt_connect_status: retrying\n",
> task->tk_pid);
>                 break;
>         case -ETIMEDOUT:
>                 dprintk("RPC: %5u xprt_connect_status: connect
> attempt timed "
>                                 "out\n", task->tk_pid);
>                 break;
>         default:
>                 dprintk("RPC: %5u xprt_connect_status: error %d
> connecting to "
>                                 "server %s\n", task->tk_pid, -task-
> >tk_status,
>                                 task->tk_rqstp->rq_xprt->servername);
>                 task->tk_status = -EIO;
>         }    
> }
> 
> Maybe the wake-up status should remain -EAGAIN ?


Actually, that brings up a very good point. Given that a number of
things can wake up tasks on xprt->pending, and set an error code, why
do we want xprt_connect_status() to change the error codes on us? That
looks like it can go wrong in quite a few different ways.

Furthermore, call_connect_status() is supposed to perform more or less
the same service, and already knows how to deal with more errors.

> 
> > diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> > index 8a5e823e0b33..f0b3700cec95 100644
> > --- a/net/sunrpc/xprtsock.c
> > +++ b/net/sunrpc/xprtsock.c
> > @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct
> > sock_xprt *transport)
> > 
> > 	trace_rpc_socket_close(xprt, sock);
> > 	sock_release(sock);
> > +
> > +	xprt_disconnect_done(xprt);
> > }
> > 
> > /**
> > @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
> > 
> > 	xs_reset_transport(transport);
> > 	xprt->reestablish_timeout = 0;
> > -
> > -	xprt_disconnect_done(xprt);
> > }
> > 
> > static void xs_inject_disconnect(struct rpc_xprt *xprt)
> > @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock
> > *sk)
> > 					&transport->sock_state))
> > 			xprt_clear_connecting(xprt);
> > 		clear_bit(XPRT_CLOSING, &xprt->state);
> > -		if (sk->sk_err)
> > -			xprt_wake_pending_tasks(xprt, -sk->sk_err);
> > 		/* Trigger the socket release */
> > 		xs_tcp_force_close(xprt);
> > 	}
> > @@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct
> > work_struct *work)
> > 	trace_rpc_socket_connect(xprt, sock, 0);
> > 	status = 0;
> > out:
> > -	xprt_unlock_connect(xprt, transport);
> > 	xprt_clear_connecting(xprt);
> > +	xprt_unlock_connect(xprt, transport);
> > 	xprt_wake_pending_tasks(xprt, status);
> > }
> > 
> > @@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct
> > work_struct *work)
> > 	}
> > 	status = -EAGAIN;
> > out:
> > -	xprt_unlock_connect(xprt, transport);
> > 	xprt_clear_connecting(xprt);
> > +	xprt_unlock_connect(xprt, transport);
> > 	xprt_wake_pending_tasks(xprt, status);
> > }
> > 
> > -- 
> > 2.19.2
> > 
> 
> --
> Chuck Lever
> 
> 
>
diff mbox series

Patch

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce927002862a..089e53a87379 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,7 +680,9 @@  void xprt_force_disconnect(struct rpc_xprt *xprt)
 	/* Try to schedule an autoclose RPC call */
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-	xprt_wake_pending_tasks(xprt, -EAGAIN);
+	else if (xprt->snd_task)
+		rpc_wake_up_queued_task_set_status(&xprt->pending,
+				xprt->snd_task, -ENOTCONN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_force_disconnect);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8a5e823e0b33..f0b3700cec95 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1217,6 +1217,8 @@  static void xs_reset_transport(struct sock_xprt *transport)
 
 	trace_rpc_socket_close(xprt, sock);
 	sock_release(sock);
+
+	xprt_disconnect_done(xprt);
 }
 
 /**
@@ -1237,8 +1239,6 @@  static void xs_close(struct rpc_xprt *xprt)
 
 	xs_reset_transport(transport);
 	xprt->reestablish_timeout = 0;
-
-	xprt_disconnect_done(xprt);
 }
 
 static void xs_inject_disconnect(struct rpc_xprt *xprt)
@@ -1489,8 +1489,6 @@  static void xs_tcp_state_change(struct sock *sk)
 					&transport->sock_state))
 			xprt_clear_connecting(xprt);
 		clear_bit(XPRT_CLOSING, &xprt->state);
-		if (sk->sk_err)
-			xprt_wake_pending_tasks(xprt, -sk->sk_err);
 		/* Trigger the socket release */
 		xs_tcp_force_close(xprt);
 	}
@@ -2092,8 +2090,8 @@  static void xs_udp_setup_socket(struct work_struct *work)
 	trace_rpc_socket_connect(xprt, sock, 0);
 	status = 0;
 out:
-	xprt_unlock_connect(xprt, transport);
 	xprt_clear_connecting(xprt);
+	xprt_unlock_connect(xprt, transport);
 	xprt_wake_pending_tasks(xprt, status);
 }
 
@@ -2329,8 +2327,8 @@  static void xs_tcp_setup_socket(struct work_struct *work)
 	}
 	status = -EAGAIN;
 out:
-	xprt_unlock_connect(xprt, transport);
 	xprt_clear_connecting(xprt);
+	xprt_unlock_connect(xprt, transport);
 	xprt_wake_pending_tasks(xprt, status);
 }