diff mbox

[3.2.5] NFSv3 CLOSE_WAIT hang

Message ID 4FA345DA4F4AE44899BD2B03EEEC2FA908F9F734@SACEXCMBX04-PRD.hq.netapp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Trond Myklebust Sept. 12, 2012, 8:54 p.m. UTC
On Tue, 2012-09-11 at 18:17 -0400, Trond Myklebust wrote:
> On Tue, 2012-09-11 at 12:40 -0700, Simon Kirby wrote:

> > Hello!

> > 

> > This problem still bites us rarely, and we've been using TCP NFS for some

> > time. However, our case seems to be narrowed it down to a very long

> > storage hang on the knfsd side. If storage never has any problems, we

> > don't see the NFS client hang. I was going to try to make a test-case by

> > forcing the server to hang, but I never got around to this. Meanwhile,

> > I've been running the clients with the debugging patches I posted

> > earlier, and it always prints the 'xprt_force_disconnect(): setting

> > XPRT_CLOSE_WAIT" warning before hanging. If Apache is in sendfile() at

> > the time, it seems to get stuck forever; otherwise, it might recover.

> 

> Does the "if (test_and_set_bit(XPRT_LOCK) == 0)" condition immediately

> following that succeed so that queue_work() is called?

> 

> > http://www.spinics.net/lists/linux-nfs/msg29495.html

> > http://0x.ca/sim/ref/3.2.10/dmesg

> > 

> > I suppose we could try 3.5 at this point.

> 

> If you've been keeping up with the 3.2 stable releases, then I wouldn't

> expect any major differences to the sunrpc code, but it might be worth a

> try in case the networking layer has changed.


Hi Simon,

Can you try the following patch, and see if it addresses the TCP "server
hangs" case?

Cheers
  Trond
8<----------------------------------------------------------------------
From 99330d09cc1074fbdc64089fa0a3f8dbdc74daaf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>

Date: Wed, 12 Sep 2012 16:49:15 -0400
Subject: [PATCH] SUNRPC: Ensure that the TCP socket is closed when in
 CLOSE_WAIT

Instead of doing a shutdown() call, we need to do an actual close().
Ditto if/when the server is sending us junk RPC headers.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

---
 net/sunrpc/xprtsock.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

-- 
1.7.11.4


-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

Comments

Simon Kirby Sept. 19, 2012, 10:01 p.m. UTC | #1
On Tue, Sep 11, 2012 at 10:17:25PM +0000, Myklebust, Trond wrote:

> Does the "if (test_and_set_bit(XPRT_LOCK) == 0)" condition immediately
> following that succeed so that queue_work() is called?

Yes, it seems to:

[146957.793093] RPC: server initated shutdown -- state 8 conn 1 dead 0 zapped 1 sk_shutdown 1
[146957.793418] xprt_force_disconnect(): setting XPRT_CLOSE_WAIT
[146957.799288] xprt_force_disconnect(): setting XPRT_LOCKED worked, calling queue_work()

On Wed, Sep 12, 2012 at 08:54:14PM +0000, Myklebust, Trond wrote:

> On Tue, 2012-09-11 at 18:17 -0400, Trond Myklebust wrote:
> 
> Hi Simon,
> 
> Can you try the following patch, and see if it addresses the TCP "server
> hangs" case?
> 
> Cheers
>   Trond
> 8<----------------------------------------------------------------------
> From 99330d09cc1074fbdc64089fa0a3f8dbdc74daaf Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <Trond.Myklebust@netapp.com>
> Date: Wed, 12 Sep 2012 16:49:15 -0400
> Subject: [PATCH] SUNRPC: Ensure that the TCP socket is closed when in
>  CLOSE_WAIT
> 
> Instead of doing a shutdown() call, we need to do an actual close().
> Ditto if/when the server is sending us junk RPC headers.
> 
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> ---
>  net/sunrpc/xprtsock.c | 21 ++++++++++++++++-----
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index a35b8e5..d1988cf 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1025,6 +1025,16 @@ static void xs_udp_data_ready(struct sock *sk, int len)
>  	read_unlock_bh(&sk->sk_callback_lock);
>  }
>  
> +/*
> + * Helper function to force a TCP close if the server is sending
> + * junk and/or it has put us in CLOSE_WAIT
> + */
> +static void xs_tcp_force_close(struct rpc_xprt *xprt)
> +{
> +	set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
> +	xprt_force_disconnect(xprt);
> +}
> +
>  static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
>  {
>  	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
> @@ -1051,7 +1061,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
>  	/* Sanity check of the record length */
>  	if (unlikely(transport->tcp_reclen < 8)) {
>  		dprintk("RPC:       invalid TCP record fragment length\n");
> -		xprt_force_disconnect(xprt);
> +		xs_tcp_force_close(xprt);
>  		return;
>  	}
>  	dprintk("RPC:       reading TCP record fragment of length %d\n",
> @@ -1132,7 +1142,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
>  		break;
>  	default:
>  		dprintk("RPC:       invalid request message type\n");
> -		xprt_force_disconnect(&transport->xprt);
> +		xs_tcp_force_close(&transport->xprt);
>  	}
>  	xs_tcp_check_fraghdr(transport);
>  }
> @@ -1455,6 +1465,8 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
>  static void xs_sock_mark_closed(struct rpc_xprt *xprt)
>  {
>  	smp_mb__before_clear_bit();
> +	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
> +	clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
>  	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
>  	clear_bit(XPRT_CLOSING, &xprt->state);
>  	smp_mb__after_clear_bit();
> @@ -1512,8 +1524,8 @@ static void xs_tcp_state_change(struct sock *sk)
>  		break;
>  	case TCP_CLOSE_WAIT:
>  		/* The server initiated a shutdown of the socket */
> -		xprt_force_disconnect(xprt);
>  		xprt->connect_cookie++;
> +		xs_tcp_force_close(xprt);
>  	case TCP_CLOSING:
>  		/*
>  		 * If the server closed down the connection, make sure that
> @@ -2199,8 +2211,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
>  		/* We're probably in TIME_WAIT. Get rid of existing socket,
>  		 * and retry
>  		 */
> -		set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
> -		xprt_force_disconnect(xprt);
> +		xs_tcp_force_close(xprt);
>  		break;
>  	case -ECONNREFUSED:
>  	case -ECONNRESET:
> -- 
> 1.7.11.4

Yes, based on data collected today, this seems to fix the problem!
Awesome! :)

Simon-
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust Sept. 19, 2012, 10:11 p.m. UTC | #2
On Wed, 2012-09-19 at 15:01 -0700, Simon Kirby wrote:

> Yes, based on data collected today, this seems to fix the problem!

> Awesome! :)


Great! I'm adding "Tested-by:" and "Cc: stable" lines since you've been
seeing this for some time.
-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com
Simon Kirby Oct. 12, 2012, 8:15 a.m. UTC | #3
On Wed, Sep 19, 2012 at 10:11:57PM +0000, Myklebust, Trond wrote:

> On Wed, 2012-09-19 at 15:01 -0700, Simon Kirby wrote:
> 
> > Yes, based on data collected today, this seems to fix the problem!
> > Awesome! :)
> 
> Great! I'm adding "Tested-by:" and "Cc: stable" lines since you've been
> seeing this for some time.

This has been working well, except we just saw one case now where we
still got a CLOSE_WAIT hang. The issue was that the queue_work() was not
called because setting XPRT_LOCKED failed, which you asked about before.

This trace from our debugging patches:

WARNING: at net/sunrpc/xprt.c:648 xprt_force_disconnect+0x12a/0x140() 
Hardware name: PowerEdge 1950 
xprt_force_disconnect(): setting XPRT_LOCKED failed                                                                                                                                                
Modules linked in: ipmi_devintf ipmi_si ipmi_msghandler xt_recent nf_conntrack_ftp xt_state xt_owner nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 bnx2 serio_raw evdev                             
Pid: 0, comm: swapper/1 Tainted: G        W    3.2.28-hw+ #25 
Call Trace: 
 <IRQ>  [<ffffffff816b74ba>] ? xprt_force_disconnect+0x12a/0x140 
 [<ffffffff8105db70>] warn_slowpath_common+0x80/0xc0 
 [<ffffffff8105dca9>] warn_slowpath_fmt+0x69/0x70 
 [<ffffffff81701c8c>] ? printk+0x67/0x69 
 [<ffffffff81704ef1>] ? _raw_spin_lock_bh+0x11/0x40 
 [<ffffffff816b74ba>] xprt_force_disconnect+0x12a/0x140 
 [<ffffffff816ba99c>] xs_tcp_state_change+0xdc/0x340 
 [<ffffffff81669deb>] tcp_fin+0x17b/0x210 
 [<ffffffff8166aaf2>] tcp_data_queue+0x4c2/0xe70 
 [<ffffffff8166e259>] ? tcp_validate_incoming+0x109/0x340 
 [<ffffffff8166e5d9>] tcp_rcv_established+0x149/0xb50 
 [<ffffffff8167711c>] tcp_v4_do_rcv+0x15c/0x2e0 
 [<ffffffff8164b6e4>] ? nf_iterate+0x84/0xb0 
 [<ffffffff816778ef>] tcp_v4_rcv+0x64f/0x8b0 
 [<ffffffff8164b85d>] ? nf_hook_slow+0x6d/0x130 
 [<ffffffff81656160>] ? ip_rcv+0x2f0/0x2f0 
 [<ffffffff8165620a>] ip_local_deliver_finish+0xaa/0x1d0 
 [<ffffffff816563bd>] ip_local_deliver+0x8d/0xa0 
 [<ffffffff81655bdb>] ip_rcv_finish+0x7b/0x310 
 [<ffffffff816560ed>] ip_rcv+0x27d/0x2f0 
 [<ffffffff816272b7>] __netif_receive_skb+0x437/0x490 
 [<ffffffff81629148>] netif_receive_skb+0x78/0x80 
...
-pid- flgs status -client- --rqstp- -timeout ---ops--
60022 0080    -11 ffff880096e5bc00   (null)        0 ffffffff817beb20 nfsv3 FSINFO a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff880142c1d800) state=16, snd_task=0
SUNRPC: xprt(ffff880032175800) state=16, snd_task=0
SUNRPC: xprt(ffff8800c2da9000) state=16, snd_task=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff880221977000) state=16, snd_task=0
SUNRPC: xprt(ffff88022212f800) state=16, snd_task=0
SUNRPC: xprt(ffff880221504000) state=0, snd_task=0
SUNRPC: xprt(ffff88022509c800) state=18, snd_task=0
SUNRPC: xprt(ffff88022509c800) state=18, snd_task=0
SUNRPC: xprt(ffff880222711000) state=18, snd_task=0
SUNRPC: xprt(ffff880222711000) state=18, snd_task=0
SUNRPC: xprt(ffff880221737800) state=18, snd_task=0
SUNRPC: xprt(ffff880221737800) state=18, snd_task=0
SUNRPC: xprt(ffff88021f275800) state=18, snd_task=0
SUNRPC: xprt(ffff88021f275800) state=18, snd_task=0
SUNRPC: xprt(ffff8802252b3800) state=18, snd_task=0
SUNRPC: xprt(ffff880222704000) state=18, snd_task=0
SUNRPC: xprt(ffff880222704000) state=18, snd_task=0
SUNRPC: xprt(ffff8802252b3800) state=18, snd_task=0
SUNRPC: xprt(ffff8802252b3800) state=18, snd_task=0
SUNRPC: xprt(ffff88022234e800) state=18, snd_task=0
SUNRPC: xprt(ffff88022234e800) state=18, snd_task=0
SUNRPC: xprt(ffff880221734000) state=18, snd_task=0
SUNRPC: xprt(ffff880221734000) state=18, snd_task=0
SUNRPC: xprt(ffff880222702000) state=18, snd_task=0
SUNRPC: xprt(ffff880222702000) state=18, snd_task=0
 1758 0080    -11 ffff880225562400   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1770 0080    -11 ffff880225562400   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1799 0080    -11 ffff880225562400   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
59728 0001    -11 ffff8802253d9e00   (null)        0 ffffffff8173ff20 nfsv3 COMMIT a:call_reserveresult q:xprt_sending
 1449 0080    -11 ffff8802253d9e00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1766 0080    -11 ffff8802253d9e00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1811 0080    -11 ffff8802253d9e00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
 1431 0080    -11 ffff8802253d6000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1604 0080    -11 ffff8802253d6000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1851 0080    -11 ffff8802253d6000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1870 0080    -11 ffff8802253d6000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 5693 0080    -11 ffff8802253d6000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 5791 0080    -11 ffff8802253d6000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
 1871 0080    -11 ffff880225522800   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
 1442 0080    -11 ffff8802253d7600   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1457 0080    -11 ffff8802253d7600   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1829 0080    -11 ffff8802253d7600   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 5784 0080    -11 ffff8802253d7600   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
 1625 0080    -11 ffff880225522000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 2152 0080    -11 ffff880225522000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 2196 0080    -11 ffff880225522000   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
 1517 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1733 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1741 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 1872 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 2211 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 4316 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 4350 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 LOOKUP a:call_reserveresult q:xprt_sending
 4353 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 5862 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 6982 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
 9869 0080    -11 ffff8802253d5a00   (null)        0 ffffffff817beb20 nfsv3 ACCESS a:call_reserveresult q:xprt_sending
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
SUNRPC: xprt(ffff88021e87d800) state=283, snd_task->tk_pid=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0
SUNRPC: xprt(ffff880225098800) state=16, snd_task=0
SUNRPC: xprt(ffff88022509b000) state=18, snd_task=0

We've only seen this once, so we're definitely better than before.

Simon-
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a35b8e5..d1988cf 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1025,6 +1025,16 @@  static void xs_udp_data_ready(struct sock *sk, int len)
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
+/*
+ * Helper function to force a TCP close if the server is sending
+ * junk and/or it has put us in CLOSE_WAIT
+ */
+static void xs_tcp_force_close(struct rpc_xprt *xprt)
+{
+	set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+	xprt_force_disconnect(xprt);
+}
+
 static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1051,7 +1061,7 @@  static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
 	/* Sanity check of the record length */
 	if (unlikely(transport->tcp_reclen < 8)) {
 		dprintk("RPC:       invalid TCP record fragment length\n");
-		xprt_force_disconnect(xprt);
+		xs_tcp_force_close(xprt);
 		return;
 	}
 	dprintk("RPC:       reading TCP record fragment of length %d\n",
@@ -1132,7 +1142,7 @@  static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
 		break;
 	default:
 		dprintk("RPC:       invalid request message type\n");
-		xprt_force_disconnect(&transport->xprt);
+		xs_tcp_force_close(&transport->xprt);
 	}
 	xs_tcp_check_fraghdr(transport);
 }
@@ -1455,6 +1465,8 @@  static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
 static void xs_sock_mark_closed(struct rpc_xprt *xprt)
 {
 	smp_mb__before_clear_bit();
+	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+	clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	clear_bit(XPRT_CLOSING, &xprt->state);
 	smp_mb__after_clear_bit();
@@ -1512,8 +1524,8 @@  static void xs_tcp_state_change(struct sock *sk)
 		break;
 	case TCP_CLOSE_WAIT:
 		/* The server initiated a shutdown of the socket */
-		xprt_force_disconnect(xprt);
 		xprt->connect_cookie++;
+		xs_tcp_force_close(xprt);
 	case TCP_CLOSING:
 		/*
 		 * If the server closed down the connection, make sure that
@@ -2199,8 +2211,7 @@  static void xs_tcp_setup_socket(struct work_struct *work)
 		/* We're probably in TIME_WAIT. Get rid of existing socket,
 		 * and retry
 		 */
-		set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
-		xprt_force_disconnect(xprt);
+		xs_tcp_force_close(xprt);
 		break;
 	case -ECONNREFUSED:
 	case -ECONNRESET: