diff mbox series

[v3,40/44] SUNRPC: Simplify TCP receive code by switching to using iterators

Message ID 20180917130335.112832-41-trond.myklebust@hammerspace.com (mailing list archive)
State New, archived
Headers show
Series Convert RPC client transmission to a queued model | expand

Commit Message

Trond Myklebust Sept. 17, 2018, 1:03 p.m. UTC
Most of this code should also be reusable with other socket types.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprtsock.h |  19 +-
 include/trace/events/sunrpc.h   |  15 +-
 net/sunrpc/xprtsock.c           | 694 +++++++++++++++-----------------
 3 files changed, 335 insertions(+), 393 deletions(-)

Comments

Trond Myklebust Sept. 17, 2018, 8:44 p.m. UTC | #1
On Mon, 2018-09-17 at 09:03 -0400, Trond Myklebust wrote:
> Most of this code should also be reusable with other socket types.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  include/linux/sunrpc/xprtsock.h |  19 +-
>  include/trace/events/sunrpc.h   |  15 +-
>  net/sunrpc/xprtsock.c           | 694 +++++++++++++++---------------
> --
>  3 files changed, 335 insertions(+), 393 deletions(-)
> 
> diff --git a/include/linux/sunrpc/xprtsock.h
> b/include/linux/sunrpc/xprtsock.h
> index 005cfb6e7238..458bfe0137f5 100644
> --- a/include/linux/sunrpc/xprtsock.h
> +++ b/include/linux/sunrpc/xprtsock.h
> @@ -31,15 +31,16 @@ struct sock_xprt {
>  	 * State of TCP reply receive
>  	 */
>  	struct {
> -		__be32		fraghdr,
> +		struct {
> +			__be32	fraghdr,
>  				xid,
>  				calldir;
> +		} __attribute__((packed));
>  
>  		u32		offset,
>  				len;
>  
> -		unsigned long	copied,
> -				flags;
> +		unsigned long	copied;
>  	} recv;
>  
>  	/*
> @@ -76,21 +77,9 @@ struct sock_xprt {
>  	void			(*old_error_report)(struct sock *);
>  };
>  
> -/*
> - * TCP receive state flags
> - */
> -#define TCP_RCV_LAST_FRAG	(1UL << 0)
> -#define TCP_RCV_COPY_FRAGHDR	(1UL << 1)
> -#define TCP_RCV_COPY_XID	(1UL << 2)
> -#define TCP_RCV_COPY_DATA	(1UL << 3)
> -#define TCP_RCV_READ_CALLDIR	(1UL << 4)
> -#define TCP_RCV_COPY_CALLDIR	(1UL << 5)
> -
>  /*
>   * TCP RPC flags
>   */
> -#define TCP_RPC_REPLY		(1UL << 6)
> -
>  #define XPRT_SOCK_CONNECTING	1U
>  #define XPRT_SOCK_DATA_READY	(2)
>  #define XPRT_SOCK_UPD_TIMEOUT	(3)
> diff --git a/include/trace/events/sunrpc.h
> b/include/trace/events/sunrpc.h
> index 0aa347194e0f..19e08d12696c 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -497,16 +497,6 @@ TRACE_EVENT(xs_tcp_data_ready,
>  			__get_str(port), __entry->err, __entry->total)
>  );
>  
> -#define rpc_show_sock_xprt_flags(flags) \
> -	__print_flags(flags, "|", \
> -		{ TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
> -		{ TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
> -		{ TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
> -		{ TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
> -		{ TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
> -		{ TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
> -		{ TCP_RPC_REPLY, "TCP_RPC_REPLY" })
> -
>  TRACE_EVENT(xs_tcp_data_recv,
>  	TP_PROTO(struct sock_xprt *xs),
>  
> @@ -516,7 +506,6 @@ TRACE_EVENT(xs_tcp_data_recv,
>  		__string(addr, xs-
> >xprt.address_strings[RPC_DISPLAY_ADDR])
>  		__string(port, xs-
> >xprt.address_strings[RPC_DISPLAY_PORT])
>  		__field(u32, xid)
> -		__field(unsigned long, flags)
>  		__field(unsigned long, copied)
>  		__field(unsigned int, reclen)
>  		__field(unsigned long, offset)
> @@ -526,15 +515,13 @@ TRACE_EVENT(xs_tcp_data_recv,
>  		__assign_str(addr, xs-
> >xprt.address_strings[RPC_DISPLAY_ADDR]);
>  		__assign_str(port, xs-
> >xprt.address_strings[RPC_DISPLAY_PORT]);
>  		__entry->xid = be32_to_cpu(xs->recv.xid);
> -		__entry->flags = xs->recv.flags;
>  		__entry->copied = xs->recv.copied;
>  		__entry->reclen = xs->recv.len;
>  		__entry->offset = xs->recv.offset;
>  	),
>  
> -	TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu
> reclen=%u offset=%lu",
> +	TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u
> offset=%lu",
>  			__get_str(addr), __get_str(port), __entry->xid,
> -			rpc_show_sock_xprt_flags(__entry->flags),
>  			__entry->copied, __entry->reclen, __entry-
> >offset)
>  );
>  
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index f16406228ead..5269ad98bb08 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -47,13 +47,13 @@
>  #include <net/checksum.h>
>  #include <net/udp.h>
>  #include <net/tcp.h>
> +#include <linux/bvec.h>
> +#include <linux/uio.h>
>  
>  #include <trace/events/sunrpc.h>
>  
>  #include "sunrpc.h"
>  
> -#define RPC_TCP_READ_CHUNK_SZ	(3*512*1024)
> -
>  static void xs_close(struct rpc_xprt *xprt);
>  static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
>  		struct socket *sock);
> @@ -325,6 +325,320 @@ static void xs_free_peer_addresses(struct
> rpc_xprt *xprt)
>  		}
>  }
>  
> +static size_t
> +xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
> +{
> +	size_t i,n;
> +
> +	if (!(buf->flags & XDRBUF_SPARSE_PAGES))
> +		return want;
> +	if (want > buf->page_len)
> +		want = buf->page_len;
> +	n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +	for (i = 0; i < n; i++) {
> +		if (buf->pages[i])
> +			continue;
> +		buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
> +		if (!buf->pages[i]) {
> +			buf->page_len = (i * PAGE_SIZE) - buf-
> >page_base;
> +			return buf->page_len;
> +		}
> +	}
> +	return want;
> +}
> +
> +static ssize_t
> +xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags,
> size_t seek)
> +{
> +	ssize_t ret;
> +	if (seek != 0)
> +		iov_iter_advance(&msg->msg_iter, seek);
> +	ret = sock_recvmsg(sock, msg, flags);
> +	return ret > 0 ? ret + seek : ret;
> +}
> +
> +static ssize_t
> +xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
> +		struct kvec *kvec, size_t count, size_t seek)
> +{
> +	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1,
> count);
> +	return xs_sock_recvmsg(sock, msg, flags, seek);
> +}
> +
> +static ssize_t
> +xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
> +		struct bio_vec *bvec, unsigned long nr, size_t count,
> +		size_t seek)
> +{
> +	iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr,
> count);
> +	return xs_sock_recvmsg(sock, msg, flags, seek);
> +}
> +
> +static ssize_t
> +xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
> +		size_t count)
> +{
> +	struct kvec kvec = { 0 };
> +	return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count,
> 0);
> +}
> +
> +static ssize_t
> +xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
> +		struct xdr_buf *buf, size_t count, size_t seek, size_t
> *read)
> +{
> +	size_t want, seek_init = seek, offset = 0;
> +	ssize_t ret;
> +
> +	if (seek < buf->head[0].iov_len) {
> +		want = min_t(size_t, count, buf->head[0].iov_len);
> +		ret = xs_read_kvec(sock, msg, flags, &buf->head[0],
> want, seek);
> +		if (ret <= 0)
> +			goto sock_err;
> +		offset += ret;
> +		if (offset == count || msg->msg_flags &
> (MSG_EOR|MSG_TRUNC))
> +			goto out;
> +		if (ret != want)
> +			goto eagain;
> +		seek = 0;
> +	} else {
> +		seek -= buf->head[0].iov_len;
> +		offset += buf->head[0].iov_len;
> +	}
> +	if (buf->page_len && seek < buf->page_len) {
> +		want = min_t(size_t, count - offset, buf->page_len);
> +		want = xs_alloc_sparse_pages(buf, want, GFP_NOWAIT);
> +		ret = xs_read_bvec(sock, msg, flags, buf->bvec,
> +				xdr_buf_pagecount(buf),
> +				want + buf->page_base,
> +				seek + buf->page_base);
> +		if (ret <= 0)
> +			goto sock_err;
> +		offset += ret;

There is a bug here that has been fixed up in the linux-nfs.org testing
branch.

> +		if (offset == count || msg->msg_flags &
> (MSG_EOR|MSG_TRUNC))
> +			goto out;
> +		if (ret != want)
> +			goto eagain;
> +		seek = 0;
> +	} else {
> +		seek -= buf->page_len;
> +		offset += buf->page_len;
> +	}
> +	if (buf->tail[0].iov_len && seek < buf->tail[0].iov_len) {
> +		want = min_t(size_t, count - offset, buf-
> >tail[0].iov_len);
> +		ret = xs_read_kvec(sock, msg, flags, &buf->tail[0],
> want, seek);
> +		if (ret <= 0)
> +			goto sock_err;
> +		offset += ret;
> +		if (offset == count || msg->msg_flags &
> (MSG_EOR|MSG_TRUNC))
> +			goto out;
> +		if (ret != want)
> +			goto eagain;
> +	} else
> +		offset += buf->tail[0].iov_len;
> +	ret = -EMSGSIZE;
> +	msg->msg_flags |= MSG_TRUNC;
> +out:
> +	*read = offset - seek_init;
> +	return ret;
> +eagain:
> +	ret = -EAGAIN;
> +	goto out;
> +sock_err:
> +	offset += seek;
> +	goto out;
> +}
> +
> +static void
> +xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
> +{
> +	if (!transport->recv.copied) {
> +		if (buf->head[0].iov_len >= transport->recv.offset)
> +			memcpy(buf->head[0].iov_base,
> +					&transport->recv.xid,
> +					transport->recv.offset);
> +		transport->recv.copied = transport->recv.offset;
> +	}
> +}
> +
> +static bool
> +xs_read_stream_request_done(struct sock_xprt *transport)
> +{
> +	return transport->recv.fraghdr &
> cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
> +}
> +
> +static ssize_t
> +xs_read_stream_request(struct sock_xprt *transport, struct msghdr
> *msg,
> +		int flags, struct rpc_rqst *req)
> +{
> +	struct xdr_buf *buf = &req->rq_private_buf;
> +	size_t want, read;
> +	ssize_t ret;
> +
> +	xs_read_header(transport, buf);
> +
> +	want = transport->recv.len - transport->recv.offset;
> +	ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
> +			transport->recv.copied + want, transport-
> >recv.copied,
> +			&read);
> +	transport->recv.offset += read;
> +	transport->recv.copied += read;
> +	if (transport->recv.offset == transport->recv.len) {
> +		if (xs_read_stream_request_done(transport))
> +			msg->msg_flags |= MSG_EOR;
> +		return transport->recv.copied;
> +	}
> +
> +	switch (ret) {
> +	case -EMSGSIZE:
> +		return transport->recv.copied;
> +	case 0:
> +		return -ESHUTDOWN;
> +	default:
> +		if (ret < 0)
> +			return ret;
> +	}
> +	return -EAGAIN;
> +}
> +
> +static size_t
> +xs_read_stream_headersize(bool isfrag)
> +{
> +	if (isfrag)
> +		return sizeof(__be32);
> +	return 3 * sizeof(__be32);
> +}
> +
> +static ssize_t
> +xs_read_stream_header(struct sock_xprt *transport, struct msghdr
> *msg,
> +		int flags, size_t want, size_t seek)
> +{
> +	struct kvec kvec = {
> +		.iov_base = &transport->recv.fraghdr,
> +		.iov_len = want,
> +	};
> +	return xs_read_kvec(transport->sock, msg, flags, &kvec, want,
> seek);
> +}
> +
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> +static ssize_t
> +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg,
> int flags)
> +{
> +	struct rpc_xprt *xprt = &transport->xprt;
> +	struct rpc_rqst *req;
> +	ssize_t ret;
> +
> +	/* Look up and lock the request corresponding to the given XID
> */
> +	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
> +	if (!req) {
> +		printk(KERN_WARNING "Callback slot table
> overflowed\n");
> +		return -ESHUTDOWN;
> +	}
> +
> +	ret = xs_read_stream_request(transport, msg, flags, req);
> +	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
> +		xprt_complete_bc_request(req, ret);
> +
> +	return ret;
> +}
> +#else /* CONFIG_SUNRPC_BACKCHANNEL */
> +static ssize_t
> +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg,
> int flags)
> +{
> +	return -ESHUTDOWN;
> +}
> +#endif /* CONFIG_SUNRPC_BACKCHANNEL */
> +
> +static ssize_t
> +xs_read_stream_reply(struct sock_xprt *transport, struct msghdr
> *msg, int flags)
> +{
> +	struct rpc_xprt *xprt = &transport->xprt;
> +	struct rpc_rqst *req;
> +	ssize_t ret = 0;
> +
> +	/* Look up and lock the request corresponding to the given XID
> */
> +	spin_lock(&xprt->queue_lock);
> +	req = xprt_lookup_rqst(xprt, transport->recv.xid);
> +	if (!req) {
> +		msg->msg_flags |= MSG_TRUNC;
> +		goto out;
> +	}
> +	xprt_pin_rqst(req);
> +	spin_unlock(&xprt->queue_lock);
> +
> +	ret = xs_read_stream_request(transport, msg, flags, req);
> +
> +	spin_lock(&xprt->queue_lock);
> +	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
> +		xprt_complete_rqst(req->rq_task, ret);
> +	xprt_unpin_rqst(req);
> +out:
> +	spin_unlock(&xprt->queue_lock);
> +	return ret;
> +}
> +
> +static ssize_t
> +xs_read_stream(struct sock_xprt *transport, int flags)
> +{
> +	struct msghdr msg = { 0 };
> +	size_t want, read = 0;
> +	ssize_t ret = 0;
> +
> +	if (transport->recv.len == 0) {
> +		want = xs_read_stream_headersize(transport->recv.copied 
> != 0);
> +		ret = xs_read_stream_header(transport, &msg, flags,
> want,
> +				transport->recv.offset);
> +		if (ret <= 0)
> +			goto out_err;
> +		transport->recv.offset = ret;
> +		if (ret != want) {
> +			ret = -EAGAIN;
> +			goto out_err;
> +		}
> +		transport->recv.len = be32_to_cpu(transport-
> >recv.fraghdr) &
> +			RPC_FRAGMENT_SIZE_MASK;
> +		transport->recv.offset -= sizeof(transport-
> >recv.fraghdr);
> +		read = ret;
> +	}
> +
> +	switch (be32_to_cpu(transport->recv.calldir)) {
> +	case RPC_CALL:
> +		ret = xs_read_stream_call(transport, &msg, flags);
> +		break;
> +	case RPC_REPLY:
> +		ret = xs_read_stream_reply(transport, &msg, flags);
> +	}
> +	if (msg.msg_flags & MSG_TRUNC) {
> +		transport->recv.calldir = cpu_to_be32(-1);
> +		transport->recv.copied = -1;
> +	}
> +	if (ret < 0)
> +		goto out_err;
> +	read += ret;
> +	if (transport->recv.offset < transport->recv.len) {
> +		ret = xs_read_discard(transport->sock, &msg, flags,
> +				transport->recv.len - transport-
> >recv.offset);
> +		if (ret <= 0)
> +			goto out_err;
> +		transport->recv.offset += ret;
> +		read += ret;

...and another bug here.

> +	}
> +	if (xs_read_stream_request_done(transport)) {
> +		trace_xs_tcp_data_recv(transport);
> +		transport->recv.copied = 0;
> +	}
> +	transport->recv.offset = 0;
> +	transport->recv.len = 0;
> +	return read;
> +out_err:
> +	switch (ret) {
> +	case 0:
> +	case -ESHUTDOWN:
> +		xprt_force_disconnect(&transport->xprt);
> +		return -ESHUTDOWN;
> +	}
> +	return ret;
> +}
> +
>  #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
>  
>  static int xs_send_kvec(struct socket *sock, struct sockaddr *addr,
> int addrlen, struct kvec *vec, unsigned int base, int more)
> @@ -484,6 +798,12 @@ static int xs_nospace(struct rpc_rqst *req)
>  	return ret;
>  }
>  
> +static void
> +xs_stream_prepare_request(struct rpc_rqst *req)
> +{
> +	req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf,
> GFP_NOIO);
> +}
> +
>  /*
>   * Determine if the previous message in the stream was aborted
> before it
>   * could complete transmission.
> @@ -1157,263 +1477,7 @@ static void xs_tcp_force_close(struct
> rpc_xprt *xprt)
>  	xprt_force_disconnect(xprt);
>  }
>  
> -static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct
> xdr_skb_reader *desc)
> -{
> -	struct sock_xprt *transport = container_of(xprt, struct
> sock_xprt, xprt);
> -	size_t len, used;
> -	char *p;
> -
> -	p = ((char *) &transport->recv.fraghdr) + transport-
> >recv.offset;
> -	len = sizeof(transport->recv.fraghdr) - transport->recv.offset;
> -	used = xdr_skb_read_bits(desc, p, len);
> -	transport->recv.offset += used;
> -	if (used != len)
> -		return;
> -
> -	transport->recv.len = ntohl(transport->recv.fraghdr);
> -	if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT)
> -		transport->recv.flags |= TCP_RCV_LAST_FRAG;
> -	else
> -		transport->recv.flags &= ~TCP_RCV_LAST_FRAG;
> -	transport->recv.len &= RPC_FRAGMENT_SIZE_MASK;
> -
> -	transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR;
> -	transport->recv.offset = 0;
> -
> -	/* Sanity check of the record length */
> -	if (unlikely(transport->recv.len < 8)) {
> -		dprintk("RPC:       invalid TCP record fragment
> length\n");
> -		xs_tcp_force_close(xprt);
> -		return;
> -	}
> -	dprintk("RPC:       reading TCP record fragment of length
> %d\n",
> -			transport->recv.len);
> -}
> -
> -static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
> -{
> -	if (transport->recv.offset == transport->recv.len) {
> -		transport->recv.flags |= TCP_RCV_COPY_FRAGHDR;
> -		transport->recv.offset = 0;
> -		if (transport->recv.flags & TCP_RCV_LAST_FRAG) {
> -			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
> -			transport->recv.flags |= TCP_RCV_COPY_XID;
> -			transport->recv.copied = 0;
> -		}
> -	}
> -}
> -
> -static inline void xs_tcp_read_xid(struct sock_xprt *transport,
> struct xdr_skb_reader *desc)
> -{
> -	size_t len, used;
> -	char *p;
> -
> -	len = sizeof(transport->recv.xid) - transport->recv.offset;
> -	dprintk("RPC:       reading XID (%zu bytes)\n", len);
> -	p = ((char *) &transport->recv.xid) + transport->recv.offset;
> -	used = xdr_skb_read_bits(desc, p, len);
> -	transport->recv.offset += used;
> -	if (used != len)
> -		return;
> -	transport->recv.flags &= ~TCP_RCV_COPY_XID;
> -	transport->recv.flags |= TCP_RCV_READ_CALLDIR;
> -	transport->recv.copied = 4;
> -	dprintk("RPC:       reading %s XID %08x\n",
> -			(transport->recv.flags & TCP_RPC_REPLY) ?
> "reply for"
> -							      :
> "request with",
> -			ntohl(transport->recv.xid));
> -	xs_tcp_check_fraghdr(transport);
> -}
> -
> -static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
> -				       struct xdr_skb_reader *desc)
> -{
> -	size_t len, used;
> -	u32 offset;
> -	char *p;
> -
> -	/*
> -	 * We want transport->recv.offset to be 8 at the end of this
> routine
> -	 * (4 bytes for the xid and 4 bytes for the call/reply flag).
> -	 * When this function is called for the first time,
> -	 * transport->recv.offset is 4 (after having already read the
> xid).
> -	 */
> -	offset = transport->recv.offset - sizeof(transport->recv.xid);
> -	len = sizeof(transport->recv.calldir) - offset;
> -	dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n",
> len);
> -	p = ((char *) &transport->recv.calldir) + offset;
> -	used = xdr_skb_read_bits(desc, p, len);
> -	transport->recv.offset += used;
> -	if (used != len)
> -		return;
> -	transport->recv.flags &= ~TCP_RCV_READ_CALLDIR;
> -	/*
> -	 * We don't yet have the XDR buffer, so we will write the
> calldir
> -	 * out after we get the buffer from the 'struct rpc_rqst'
> -	 */
> -	switch (ntohl(transport->recv.calldir)) {
> -	case RPC_REPLY:
> -		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
> -		transport->recv.flags |= TCP_RCV_COPY_DATA;
> -		transport->recv.flags |= TCP_RPC_REPLY;
> -		break;
> -	case RPC_CALL:
> -		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
> -		transport->recv.flags |= TCP_RCV_COPY_DATA;
> -		transport->recv.flags &= ~TCP_RPC_REPLY;
> -		break;
> -	default:
> -		dprintk("RPC:       invalid request message type\n");
> -		xs_tcp_force_close(&transport->xprt);
> -	}
> -	xs_tcp_check_fraghdr(transport);
> -}
> -
> -static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
> -				     struct xdr_skb_reader *desc,
> -				     struct rpc_rqst *req)
> -{
> -	struct sock_xprt *transport =
> -				container_of(xprt, struct sock_xprt,
> xprt);
> -	struct xdr_buf *rcvbuf;
> -	size_t len;
> -	ssize_t r;
> -
> -	rcvbuf = &req->rq_private_buf;
> -
> -	if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) {
> -		/*
> -		 * Save the RPC direction in the XDR buffer
> -		 */
> -		memcpy(rcvbuf->head[0].iov_base + transport-
> >recv.copied,
> -			&transport->recv.calldir,
> -			sizeof(transport->recv.calldir));
> -		transport->recv.copied += sizeof(transport-
> >recv.calldir);
> -		transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR;
> -	}
> -
> -	len = desc->count;
> -	if (len > transport->recv.len - transport->recv.offset)
> -		desc->count = transport->recv.len - transport-
> >recv.offset;
> -	r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied,
> -					  desc, xdr_skb_read_bits);
> -
> -	if (desc->count) {
> -		/* Error when copying to the receive buffer,
> -		 * usually because we weren't able to allocate
> -		 * additional buffer pages. All we can do now
> -		 * is turn off TCP_RCV_COPY_DATA, so the request
> -		 * will not receive any additional updates,
> -		 * and time out.
> -		 * Any remaining data from this record will
> -		 * be discarded.
> -		 */
> -		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
> -		dprintk("RPC:       XID %08x truncated request\n",
> -				ntohl(transport->recv.xid));
> -		dprintk("RPC:       xprt = %p, recv.copied = %lu, "
> -				"recv.offset = %u, recv.len = %u\n",
> -				xprt, transport->recv.copied,
> -				transport->recv.offset, transport-
> >recv.len);
> -		return;
> -	}
> -
> -	transport->recv.copied += r;
> -	transport->recv.offset += r;
> -	desc->count = len - r;
> -
> -	dprintk("RPC:       XID %08x read %zd bytes\n",
> -			ntohl(transport->recv.xid), r);
> -	dprintk("RPC:       xprt = %p, recv.copied = %lu, recv.offset =
> %u, "
> -			"recv.len = %u\n", xprt, transport-
> >recv.copied,
> -			transport->recv.offset, transport->recv.len);
> -
> -	if (transport->recv.copied == req->rq_private_buf.buflen)
> -		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
> -	else if (transport->recv.offset == transport->recv.len) {
> -		if (transport->recv.flags & TCP_RCV_LAST_FRAG)
> -			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
> -	}
> -}
> -
> -/*
> - * Finds the request corresponding to the RPC xid and invokes the
> common
> - * tcp read code to read the data.
> - */
> -static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
> -				    struct xdr_skb_reader *desc)
> -{
> -	struct sock_xprt *transport =
> -				container_of(xprt, struct sock_xprt,
> xprt);
> -	struct rpc_rqst *req;
> -
> -	dprintk("RPC:       read reply XID %08x\n", ntohl(transport-
> >recv.xid));
> -
> -	/* Find and lock the request corresponding to this xid */
> -	spin_lock(&xprt->queue_lock);
> -	req = xprt_lookup_rqst(xprt, transport->recv.xid);
> -	if (!req) {
> -		dprintk("RPC:       XID %08x request not found!\n",
> -				ntohl(transport->recv.xid));
> -		spin_unlock(&xprt->queue_lock);
> -		return -1;
> -	}
> -	xprt_pin_rqst(req);
> -	spin_unlock(&xprt->queue_lock);
> -
> -	xs_tcp_read_common(xprt, desc, req);
> -
> -	spin_lock(&xprt->queue_lock);
> -	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
> -		xprt_complete_rqst(req->rq_task, transport-
> >recv.copied);
> -	xprt_unpin_rqst(req);
> -	spin_unlock(&xprt->queue_lock);
> -	return 0;
> -}
> -
>  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> -/*
> - * Obtains an rpc_rqst previously allocated and invokes the common
> - * tcp read code to read the data.  The result is placed in the
> callback
> - * queue.
> - * If we're unable to obtain the rpc_rqst we schedule the closing of
> the
> - * connection and return -1.
> - */
> -static int xs_tcp_read_callback(struct rpc_xprt *xprt,
> -				       struct xdr_skb_reader *desc)
> -{
> -	struct sock_xprt *transport =
> -				container_of(xprt, struct sock_xprt,
> xprt);
> -	struct rpc_rqst *req;
> -
> -	/* Look up the request corresponding to the given XID */
> -	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
> -	if (req == NULL) {
> -		printk(KERN_WARNING "Callback slot table
> overflowed\n");
> -		xprt_force_disconnect(xprt);
> -		return -1;
> -	}
> -
> -	dprintk("RPC:       read callback  XID %08x\n", ntohl(req-
> >rq_xid));
> -	xs_tcp_read_common(xprt, desc, req);
> -
> -	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
> -		xprt_complete_bc_request(req, transport->recv.copied);
> -
> -	return 0;
> -}
> -
> -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
> -					struct xdr_skb_reader *desc)
> -{
> -	struct sock_xprt *transport =
> -				container_of(xprt, struct sock_xprt,
> xprt);
> -
> -	return (transport->recv.flags & TCP_RPC_REPLY) ?
> -		xs_tcp_read_reply(xprt, desc) :
> -		xs_tcp_read_callback(xprt, desc);
> -}
> -
>  static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
>  {
>  	int ret;
> @@ -1429,106 +1493,14 @@ static size_t xs_tcp_bc_maxpayload(struct
> rpc_xprt *xprt)
>  {
>  	return PAGE_SIZE;
>  }
> -#else
> -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
> -					struct xdr_skb_reader *desc)
> -{
> -	return xs_tcp_read_reply(xprt, desc);
> -}
>  #endif /* CONFIG_SUNRPC_BACKCHANNEL */
>  
> -/*
> - * Read data off the transport.  This can be either an RPC_CALL or
> an
> - * RPC_REPLY.  Relay the processing to helper functions.
> - */
> -static void xs_tcp_read_data(struct rpc_xprt *xprt,
> -				    struct xdr_skb_reader *desc)
> -{
> -	struct sock_xprt *transport =
> -				container_of(xprt, struct sock_xprt,
> xprt);
> -
> -	if (_xs_tcp_read_data(xprt, desc) == 0)
> -		xs_tcp_check_fraghdr(transport);
> -	else {
> -		/*
> -		 * The transport_lock protects the request handling.
> -		 * There's no need to hold it to update the recv.flags.
> -		 */
> -		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
> -	}
> -}
> -
> -static inline void xs_tcp_read_discard(struct sock_xprt *transport,
> struct xdr_skb_reader *desc)
> -{
> -	size_t len;
> -
> -	len = transport->recv.len - transport->recv.offset;
> -	if (len > desc->count)
> -		len = desc->count;
> -	desc->count -= len;
> -	desc->offset += len;
> -	transport->recv.offset += len;
> -	dprintk("RPC:       discarded %zu bytes\n", len);
> -	xs_tcp_check_fraghdr(transport);
> -}
> -
> -static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct
> sk_buff *skb, unsigned int offset, size_t len)
> -{
> -	struct rpc_xprt *xprt = rd_desc->arg.data;
> -	struct sock_xprt *transport = container_of(xprt, struct
> sock_xprt, xprt);
> -	struct xdr_skb_reader desc = {
> -		.skb	= skb,
> -		.offset	= offset,
> -		.count	= len,
> -	};
> -	size_t ret;
> -
> -	dprintk("RPC:       xs_tcp_data_recv started\n");
> -	do {
> -		trace_xs_tcp_data_recv(transport);
> -		/* Read in a new fragment marker if necessary */
> -		/* Can we ever really expect to get completely empty
> fragments? */
> -		if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) {
> -			xs_tcp_read_fraghdr(xprt, &desc);
> -			continue;
> -		}
> -		/* Read in the xid if necessary */
> -		if (transport->recv.flags & TCP_RCV_COPY_XID) {
> -			xs_tcp_read_xid(transport, &desc);
> -			continue;
> -		}
> -		/* Read in the call/reply flag */
> -		if (transport->recv.flags & TCP_RCV_READ_CALLDIR) {
> -			xs_tcp_read_calldir(transport, &desc);
> -			continue;
> -		}
> -		/* Read in the request data */
> -		if (transport->recv.flags & TCP_RCV_COPY_DATA) {
> -			xs_tcp_read_data(xprt, &desc);
> -			continue;
> -		}
> -		/* Skip over any trailing bytes on short reads */
> -		xs_tcp_read_discard(transport, &desc);
> -	} while (desc.count);
> -	ret = len - desc.count;
> -	if (ret < rd_desc->count)
> -		rd_desc->count -= ret;
> -	else
> -		rd_desc->count = 0;
> -	trace_xs_tcp_data_recv(transport);
> -	dprintk("RPC:       xs_tcp_data_recv done\n");
> -	return ret;
> -}
> -
>  static void xs_tcp_data_receive(struct sock_xprt *transport)
>  {
>  	struct rpc_xprt *xprt = &transport->xprt;
>  	struct sock *sk;
> -	read_descriptor_t rd_desc = {
> -		.arg.data = xprt,
> -	};
> -	unsigned long total = 0;
> -	int read = 0;
> +	size_t read = 0;
> +	ssize_t ret = 0;
>  
>  restart:
>  	mutex_lock(&transport->recv_mutex);
> @@ -1536,18 +1508,12 @@ static void xs_tcp_data_receive(struct
> sock_xprt *transport)
>  	if (sk == NULL)
>  		goto out;
>  
> -	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
>  	for (;;) {
> -		rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
> -		lock_sock(sk);
> -		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
> -		if (rd_desc.count != 0 || read < 0) {
> -			clear_bit(XPRT_SOCK_DATA_READY, &transport-
> >sock_state);
> -			release_sock(sk);
> +		clear_bit(XPRT_SOCK_DATA_READY, &transport-
> >sock_state);
> +		ret = xs_read_stream(transport, MSG_DONTWAIT |
> MSG_NOSIGNAL);
> +		if (ret < 0)
>  			break;
> -		}
> -		release_sock(sk);
> -		total += read;
> +		read += ret;
>  		if (need_resched()) {
>  			mutex_unlock(&transport->recv_mutex);
>  			cond_resched();
> @@ -1558,7 +1524,7 @@ static void xs_tcp_data_receive(struct
> sock_xprt *transport)
>  		queue_work(xprtiod_workqueue, &transport->recv_worker);
>  out:
>  	mutex_unlock(&transport->recv_mutex);
> -	trace_xs_tcp_data_ready(xprt, read, total);
> +	trace_xs_tcp_data_ready(xprt, ret, read);
>  }
>  
>  static void xs_tcp_data_receive_workfn(struct work_struct *work)
> @@ -2380,7 +2346,6 @@ static int xs_tcp_finish_connecting(struct
> rpc_xprt *xprt, struct socket *sock)
>  	transport->recv.offset = 0;
>  	transport->recv.len = 0;
>  	transport->recv.copied = 0;
> -	transport->recv.flags = TCP_RCV_COPY_FRAGHDR |
> TCP_RCV_COPY_XID;
>  	transport->xmit.offset = 0;
>  
>  	/* Tell the socket layer to start connecting... */
> @@ -2802,6 +2767,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
>  	.connect		= xs_connect,
>  	.buf_alloc		= rpc_malloc,
>  	.buf_free		= rpc_free,
> +	.prepare_request	= xs_stream_prepare_request,
>  	.send_request		= xs_tcp_send_request,
>  	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
>  	.close			= xs_tcp_shutdown,
Catalin Marinas Nov. 9, 2018, 11:19 a.m. UTC | #2
Hi Trond,

On Mon, Sep 17, 2018 at 09:03:31AM -0400, Trond Myklebust wrote:
> Most of this code should also be reusable with other socket types.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  include/linux/sunrpc/xprtsock.h |  19 +-
>  include/trace/events/sunrpc.h   |  15 +-
>  net/sunrpc/xprtsock.c           | 694 +++++++++++++++-----------------
>  3 files changed, 335 insertions(+), 393 deletions(-)

With latest mainline (24ccea7e102d, it includes Al Viro's iov_iter
fixup) I started hitting some severe slowdown and systemd timeouts with
nfsroot on arm64 machines (physical or guests under KVM). Interestingly,
it only happens when the client kernel is configured with 64K pages, the
4K pages configuration runs fine. It also runs fine if I add rsize=65536
to the nfsroot= argument.

Bisecting led me to commit 277e4ab7d530 ("SUNRPC: Simplify TCP receive
code by switching to using iterators"). Prior to this commit, it works
fine.

Some more info:

- defconfig with CONFIG_ARM64_64K_PAGES enabled

- kernel cmdline arg: nfsroot=<some-server>:/srv/nfs/debian-arm64,tcp,v4

- if it matters, the server is also an arm64 machine running 4.19 with
  4K pages configuration

I haven't figured out what's wrong or even how to debug this as I'm not
familiar with the sunrpc code. Any suggestion?

Thanks.
Cristian Marussi Nov. 29, 2018, 7:28 p.m. UTC | #3
Hi Trond, Catalin

On 09/11/2018 11:19, Catalin Marinas wrote:
> Hi Trond,
> 
> On Mon, Sep 17, 2018 at 09:03:31AM -0400, Trond Myklebust wrote:
>> Most of this code should also be reusable with other socket types.
>>
>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>> ---
>>  include/linux/sunrpc/xprtsock.h |  19 +-
>>  include/trace/events/sunrpc.h   |  15 +-
>>  net/sunrpc/xprtsock.c           | 694 +++++++++++++++-----------------
>>  3 files changed, 335 insertions(+), 393 deletions(-)
> 
> With latest mainline (24ccea7e102d, it includes Al Viro's iov_iter
> fixup) I started hitting some severe slowdown and systemd timeouts with
> nfsroot on arm64 machines (physical or guests under KVM). Interestingly,
> it only happens when the client kernel is configured with 64K pages, the
> 4K pages configuration runs fine. It also runs fine if I add rsize=65536
> to the nfsroot= argument.
> 
> Bisecting led me to commit 277e4ab7d530 ("SUNRPC: Simplify TCP receive
> code by switching to using iterators"). Prior to this commit, it works
> fine.
> 
> Some more info:
> 
> - defconfig with CONFIG_ARM64_64K_PAGES enabled
> 
> - kernel cmdline arg: nfsroot=<some-server>:/srv/nfs/debian-arm64,tcp,v4
> 
> - if it matters, the server is also an arm64 machine running 4.19 with
>   4K pages configuration
> 
> I haven't figured out what's wrong or even how to debug this as I'm not
> familiar with the sunrpc code. Any suggestion?
> 
> Thanks.
> 

I've done a bit of experiments/observations with this since it was seriously
impacting all form of testing on arm64 with a 64K pages configuration.

I can confirm rsize=65536 workaround above mentioned by Catalin is effective
also for me, as it is to reset back before the commit mentioned in the subject.

In the following I tested instead with:

- linus arm64 v4.20-rc1 64K pages
  + "Debug Lockups and Hangs" Enabled
- hw Juno-r2
- fully NFS mounted rootfs (Debian 9)
- NO rsize workaround
- NFS Client config(nfsstat -m)
Flags:
rw,relatime,vers=4.0,rsize=4096,wsize=4096,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.0.1,local_lock=none,addr=192.168.0.254

Observations:

1. despite some general boot slow down (not so evident in my setup), I hit the
issue when simply trying to launch LTP or LKP tests (all is in NFS mounted
rootfs): the immediately observable behavior is just that the application gets
'apparently' stuck straight away (NO output whatsoever). Waiting some seconds
yields no progress or result NOR any Lockup is detected by Kernel. A good deal
of effort is needed to kill the process at this point...BUT it is feasible (many
SIGSTOP + KILL)...and the system is back alive.


2. running again LKP via 'strace' we can observe the process apparently starting
fine but then suddenly hanging randomly multiple times: at first on an execve()
and then on some read() while trying to load its own file components; each hang
lasts 30-45 seconds approximately.

In LKP as an example:

$ strace lkp run ./dbench-100%.yaml

....
newfstatat(AT_FDCWD, "/opt/lkp-tests/bin/run-local", {st_mode=S_IFREG|0755,
st_size=4367, ...}, 0) = 0
faccessat(AT_FDCWD, "/opt/lkp-tests/bin/run-local", X_OK) = 0
execve("/opt/lkp-tests/bin/run-local", ["/opt/lkp-tests/bin/run-local",
"./dbench-100%.yaml"], [/* 12 vars */]                            <<< HANGGG
...
...
30-40 secs..
....
openat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems.rb",
O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7
fstat(7, {st_mode=S_IFREG|0644, st_size=33018, ...}) = 0
close(7)                                = 0
getuid()                                = 0
geteuid()                               = 0
getgid()                                = 0
getegid()                               = 0
oopenat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems.rb",
O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7
fcntl(7, F_SETFL, O_RDONLY)             = 0
fstat(7, {st_mode=S_IFREG|0644, st_size=33018, ...}) = 0
fstat(7, {st_mode=S_IFREG|0644, st_size=33018, ...}) = 0
ioctl(7, TCGETS, 0xffffdd0895d8)        = -1 ENOTTY (Inappropriate ioctl for device)
read(7, <<<HANGGSSS

....
~30-40 secs
....

"# frozen_string_literal: true\n# "..., 8192) = 8192
read(7, ") as the standard configuration "..., 8192) = 8192
brk(0xaaaaeea70000)                     = 0xaaaaeea70000
read(7, "ady been required, then we have "..., 8192) = 8192
read(7, "lf.user_home\n    @user_home ||= "..., 8192) = 8192
read(7, "    require \"rubygems/defaults/#"..., 8192) = 250
read(7, "", 8192)                       = 0
close(7)                                = 0
....
....

openat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems/specification.rb",
O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7
fstat(7, {st_mode=S_IFREG|0644, st_size=81998, ...}) = 0
close(7)                                = 0
getuid()                                = 0
geteuid()                               = 0
getgid()                                = 0
getegid()                               = 0
openat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems/specification.rb",
O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7
fcntl(7, F_SETFL, O_RDONLY)             = 0
fstat(7, {st_mode=S_IFREG|0644, st_size=81998, ...}) = 0
fstat(7, {st_mode=S_IFREG|0644, st_size=81998, ...}) = 0
ioctl(7, TCGETS, 0xffffef390b38)        = -1 ENOTTY (Inappropriate ioctl for device)
read(7, "# -*- coding: utf-8 -*-\n# frozen"..., 8192) = 8192
read(7, "e platform attribute appropriate"..., 8192) = 8192
read(7, "sealicense.com/.\n  #\n  # You sho"..., 8192) = 8192
read(7, " TODO: find all extraneous adds\n"..., 8192) = 8192
read(7, "rn a list of all outdated local "..., 8192) = 8192
read(7, "          = authors.collect { |a"..., 8192) = 8192
read(7, "ends on.\n  #\n  # Use #add_depend"..., 8192) = 8192
read(7, "ns true you\n  # probably want to"..., 8192) = 8192
read(7,  <<<< HANGGGGG

Note that this last hang happens halfway through a file read !


3. Having a look at the underlying network traffic with Wireshark I could see in
fact that the NFS packets stop flowing completely for 30-40s when all of the
above happens...but I cannot see any error or timeout or NFS retries.
Same happened when I tried to reduce NFS timeo to 150 (15secs) from the original
600 (60 secs). This lack of retries was confirmed by stats:
root@sqwt-ubuntu:/opt/lkp-tests# nfsstat -r
Client rpc stats:
calls      retrans    authrefrsh
16008      0          16019

The only notable thing is a routine TCP KeepAlive sent by the NFS Client TCP
stack during the 30/40 secs quiet window: NFS data flow restarts anyway after
another 10/12 secs after the KeepAlive is sent and ACKed by the server, so it
does not seem the trigger itself for the restart.


4. Waiting forever (minutes) I was finally be able to see LKP completing
initialization and the dbench test being run. Below you can see the results with
and without the rsize workaround:

DBENCH SANE RESULTS WITH WORKAROUND RSIZE=65536
------------------------------------------------
...
   6    122759     5.62 MB/sec  execute 599 sec  latency 84.304 ms
   6  cleanup 600 sec
   0  cleanup 600 sec

 Operation      Count    AvgLat    MaxLat
 ----------------------------------------
 NTCreateX     108397    11.100   358.921
 Close          79762    16.243   406.213
 Rename          4582    19.399   259.180
 Unlink         21728     3.610   190.564
 Qpathinfo      98367     1.773   289.554
 Qfileinfo      17163     9.917   232.687
 Qfsinfo        17903     2.130   216.804
 Sfileinfo       8828    17.427   234.069
 Find           37915     3.478   287.326
 WriteX         53503     0.048     2.992
 ReadX         169707     0.592   199.341
 LockX            350    13.536   242.800
 UnlockX          350     2.801   124.317
 Flush           7548    20.248   229.864

Throughput 5.61504 MB/sec  6 clients  6 procs  max_latency=406.225 ms


DBENCH RESULTS WITHOUT WORKAROUND
---------------------------------
...
  6    111273     5.06 MB/sec  execute 599 sec  latency 4066.072 ms
   6  cleanup 600 sec
   3  cleanup 601 sec
   0  cleanup 601 sec

 Operation      Count    AvgLat    MaxLat
 ----------------------------------------
 NTCreateX      97674    12.244 13773.786
 Close          71583    19.126 13774.836
 Rename          4135    19.177   277.171
 Unlink         19881     4.286 12842.441
 Qpathinfo      88303     1.942 12923.151
 Qfileinfo      15305     9.636   203.481
 Qfsinfo        16305     1.801   227.405
 Sfileinfo       7960    15.871   186.799
 Find           34164     3.409   255.098
 WriteX         48105     0.053     5.428
 ReadX         152460     0.926 13759.913
 LockX            314     7.562    53.131
 UnlockX          314     1.847    47.083
 Flush           6872    19.222   200.180

Throughput 5.06232 MB/sec  6 clients  6 procs  max_latency=13774.850 ms

Then I tried to running also:

./nfstest_io -d /mnt/t/data -v all -n 10 -r 3600

with WORKAROUND it took:
INFO: 2018-11-28 16:31:25.031280  TIME:        449 secs

WITHOUT:
INFO: 2018-11-28 17:55:39.438688  TIME:       1348 secs


4. All of the above 'slowliness' disappeared when I re-run the same tests the
second time, since NFS had cached all locally in the first run probably.


5. Reboot hangs similarly.


The fact that the traffic stops without triggering any NFS timeo and retry makes
me thing that are the egressing NFS rpc requests themselves that got stuck
somehow (but I'm far from being an NFS expert)

Any idea or thoughts ? Additional sensible test cases to run ?
Or hint on where to look inside NFS code Trond ?
(I'll re-test with a newer 4.20 RC and trying to ftrace something...in the next
days)

Thanks

Cristian
Trond Myklebust Nov. 29, 2018, 7:56 p.m. UTC | #4
On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote:
> Hi Trond, Catalin
> 
> On 09/11/2018 11:19, Catalin Marinas wrote:
> > Hi Trond,
> > 
> > On Mon, Sep 17, 2018 at 09:03:31AM -0400, Trond Myklebust wrote:
> > > Most of this code should also be reusable with other socket
> > > types.
> > > 
> > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > ---
> > >  include/linux/sunrpc/xprtsock.h |  19 +-
> > >  include/trace/events/sunrpc.h   |  15 +-
> > >  net/sunrpc/xprtsock.c           | 694 +++++++++++++++-----------
> > > ------
> > >  3 files changed, 335 insertions(+), 393 deletions(-)
> > 
> > With latest mainline (24ccea7e102d, it includes Al Viro's iov_iter
> > fixup) I started hitting some severe slowdown and systemd timeouts
> > with
> > nfsroot on arm64 machines (physical or guests under KVM).
> > Interestingly,
> > it only happens when the client kernel is configured with 64K
> > pages, the
> > 4K pages configuration runs fine. It also runs fine if I add
> > rsize=65536
> > to the nfsroot= argument.
> > 
> > Bisecting led me to commit 277e4ab7d530 ("SUNRPC: Simplify TCP
> > receive
> > code by switching to using iterators"). Prior to this commit, it
> > works
> > fine.
> > 
> > Some more info:
> > 
> > - defconfig with CONFIG_ARM64_64K_PAGES enabled
> > 
> > - kernel cmdline arg: nfsroot=<some-server>:/srv/nfs/debian-
> > arm64,tcp,v4
> > 
> > - if it matters, the server is also an arm64 machine running 4.19
> > with
> >   4K pages configuration
> > 

Question to you both: when this happens, does /proc/*/stack show any of
the processes hanging in the socket or sunrpc code? If so, can you
please send me examples of those stack traces (i.e. the contents of
/proc/<pid>/stack for the processes that are hanging)

I'd be particularly interested  if the processes in question are
related to the rpciod workqueue.

Thanks
   Trond
Cristian Marussi Nov. 30, 2018, 4:19 p.m. UTC | #5
Hi

On 29/11/2018 19:56, Trond Myklebust wrote:
> On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote:
>> Hi Trond, Catalin
[snip]
> 
> Question to you both: when this happens, does /proc/*/stack show any of
> the processes hanging in the socket or sunrpc code? If so, can you
> please send me examples of those stack traces (i.e. the contents of
> /proc/<pid>/stack for the processes that are hanging)

(using a reverse shell since starting ssh causes a lot of pain and traffic)

Looking at NFS traffic holes(30-40 secs) to detect Client side various HANGS
----------------------------------------------------------------------------

root@sqwt-ubuntu:/opt/lkp-tests# nc -lk -e /bin/bash -s 192.168.0.1 -p 1235 &
root@sqwt-ubuntu:/opt/lkp-tests# lkp run ./dbench-100%.yaml

$ nc 192.168.0.1 1235

cat /proc/2833/cmdline
ruby/opt/lkp-tests/bin/run-local./dbench-100%.yaml

HANG CLOSE
----------
cat /proc/2833/stack
[<0>] __switch_to+0x6c/0x90
[<0>] rpc_wait_bit_killable+0x2c/0xb0
[<0>] __rpc_wait_for_completion_task+0x3c/0x48
[<0>] nfs4_do_close+0x1ec/0x2b0
[<0>] __nfs4_close+0x130/0x198
[<0>] nfs4_close_sync+0x34/0x40
[<0>] nfs4_close_context+0x40/0x50
[<0>] __put_nfs_open_context+0xac/0x118
[<0>] nfs_file_clear_open_context+0x38/0x58
[<0>] nfs_file_release+0x7c/0x90
[<0>] __fput+0x94/0x1c0
[<0>] ____fput+0x20/0x30
[<0>] task_work_run+0x98/0xb8
[<0>] do_notify_resume+0x2d0/0x318
[<0>] work_pending+0x8/0x10
[<0>] 0xffffffffffffffff

HANG READ
---------
cat /proc/2833/stack
[<0>] __switch_to+0x6c/0x90
[<0>] io_schedule+0x20/0x40
[<0>] wait_on_page_bit_killable+0x164/0x260
[<0>] generic_file_read_iter+0x1c4/0x820
[<0>] nfs_file_read+0xa4/0x108
[<0>] __vfs_read+0x120/0x170
[<0>] vfs_read+0x94/0x150
[<0>] ksys_read+0x6c/0xd8
[<0>] __arm64_sys_read+0x24/0x30
[<0>] el0_svc_handler+0x7c/0x118
[<0>] el0_svc+0x8/0xc
[<0>] 0xffffffffffffffff


HANG STAT
---------
cat /proc/2833/stack
[<0>] __switch_to+0x6c/0x90
[<0>] rpc_wait_bit_killable+0x2c/0xb0
[<0>] __rpc_execute+0x1cc/0x528
[<0>] rpc_execute+0xe4/0x1b0
[<0>] rpc_run_task+0x130/0x168
[<0>] nfs4_call_sync_sequence+0x80/0xc8
[<0>] _nfs4_proc_getattr+0xc8/0xf8
[<0>] nfs4_proc_getattr+0x88/0x1d8
[<0>] __nfs_revalidate_inode+0x1f8/0x468
[<0>] nfs_getattr+0x14c/0x420
[<0>] vfs_getattr_nosec+0x7c/0x98
[<0>] vfs_getattr+0x48/0x58
[<0>] vfs_statx+0xb4/0x118
[<0>] __se_sys_newfstatat+0x58/0x98
[<0>] __arm64_sys_newfstatat+0x24/0x30
[<0>] el0_svc_handler+0x7c/0x118
[<0>] el0_svc+0x8/0xc
[<0>] 0xffffffffffffffff

....


Looking at a straced lkp to detect HANGS
----------------------------------------

cat /proc/2878/cmdline
ruby/opt/lkp-tests/bin/run-local./dbench-100%.yaml

HANG READ
----------
cat /proc/2878/stack
[<0>] __switch_to+0x6c/0x90
[<0>] io_schedule+0x20/0x40
[<0>] wait_on_page_bit_killable+0x164/0x260
[<0>] generic_file_read_iter+0x1c4/0x820
[<0>] nfs_file_read+0xa4/0x108
[<0>] __vfs_read+0x120/0x170
[<0>] vfs_read+0x94/0x150
[<0>] ksys_read+0x6c/0xd8
[<0>] __arm64_sys_read+0x24/0x30
[<0>] el0_svc_handler+0x7c/0x118
[<0>] el0_svc+0x8/0xc
[<0>] 0xffffffffffffffff

...

cat /proc/2878/status
Name:	ruby
Umask:	0022
State:	D (disk sleep)
Tgid:	2878
Ngid:	0
Pid:	2878
PPid:	2876
TracerPid:	2876
Uid:	0	0	0	0
Gid:	0	0	0	0
FDSize:	64
Groups:	
NStgid:	2878
NSpid:	2878
NSpgid:	2876
NSsid:	2822
VmPeak:	   24192 kB
VmSize:	   24192 kB
VmLck:	       0 kB
VmPin:	       0 kB
VmHWM:	   13376 kB
VmRSS:	   13376 kB
RssAnon:	    8768 kB
RssFile:	    4608 kB
RssShmem:	       0 kB
VmData:	    9792 kB
VmStk:	    8192 kB
VmExe:	      64 kB
VmLib:	    5888 kB
VmPTE:	     320 kB
VmSwap:	       0 kB
HugetlbPages:	       0 kB
CoreDumping:	0
Threads:	2
SigQ:	0/7534
SigPnd:	0000000000000000
ShdPnd:	0000000000000000
SigBlk:	0000000000000000
SigIgn:	0000000000000000
SigCgt:	00000001c2007e4f
CapInh:	0000000000000000
CapPrm:	0000003fffffffff
CapEff:	0000003fffffffff
CapBnd:	0000003fffffffff
CapAmb:	0000000000000000
NoNewPrivs:	0
Seccomp:	0
Speculation_Store_Bypass:	unknown
Cpus_allowed:	3f
Cpus_allowed_list:	0-5
Mems_allowed:	1
Mems_allowed_list:	0
voluntary_ctxt_switches:	7547
nonvoluntary_ctxt_switches:	564


Thanks

Cristian
Trond Myklebust Nov. 30, 2018, 7:31 p.m. UTC | #6
On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote:
> Hi
> 
> On 29/11/2018 19:56, Trond Myklebust wrote:
> > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote:
> > > Hi Trond, Catalin
> [snip]
> > Question to you both: when this happens, does /proc/*/stack show
> > any of
> > the processes hanging in the socket or sunrpc code? If so, can you
> > please send me examples of those stack traces (i.e. the contents of
> > /proc/<pid>/stack for the processes that are hanging)
> 
> (using a reverse shell since starting ssh causes a lot of pain and
> traffic)
> 
> Looking at NFS traffic holes(30-40 secs) to detect Client side
> various HANGS
> -------------------------------------------------------------------
> ---------
> 
> root@sqwt-ubuntu:/opt/lkp-tests# nc -lk -e /bin/bash -s 192.168.0.1
> -p 1235 &
> root@sqwt-ubuntu:/opt/lkp-tests# lkp run ./dbench-100%.yaml
> 
> $ nc 192.168.0.1 1235
> 
> cat /proc/2833/cmdline
> ruby/opt/lkp-tests/bin/run-local./dbench-100%.yaml
> 
> HANG CLOSE
> ----------
> cat /proc/2833/stack
> [<0>] __switch_to+0x6c/0x90
> [<0>] rpc_wait_bit_killable+0x2c/0xb0
> [<0>] __rpc_wait_for_completion_task+0x3c/0x48
> [<0>] nfs4_do_close+0x1ec/0x2b0
> [<0>] __nfs4_close+0x130/0x198
> [<0>] nfs4_close_sync+0x34/0x40
> [<0>] nfs4_close_context+0x40/0x50
> [<0>] __put_nfs_open_context+0xac/0x118
> [<0>] nfs_file_clear_open_context+0x38/0x58
> [<0>] nfs_file_release+0x7c/0x90
> [<0>] __fput+0x94/0x1c0
> [<0>] ____fput+0x20/0x30
> [<0>] task_work_run+0x98/0xb8
> [<0>] do_notify_resume+0x2d0/0x318
> [<0>] work_pending+0x8/0x10
> [<0>] 0xffffffffffffffff
> 
> HANG READ
> ---------
> cat /proc/2833/stack
> [<0>] __switch_to+0x6c/0x90
> [<0>] io_schedule+0x20/0x40
> [<0>] wait_on_page_bit_killable+0x164/0x260
> [<0>] generic_file_read_iter+0x1c4/0x820
> [<0>] nfs_file_read+0xa4/0x108
> [<0>] __vfs_read+0x120/0x170
> [<0>] vfs_read+0x94/0x150
> [<0>] ksys_read+0x6c/0xd8
> [<0>] __arm64_sys_read+0x24/0x30
> [<0>] el0_svc_handler+0x7c/0x118
> [<0>] el0_svc+0x8/0xc
> [<0>] 0xffffffffffffffff
> 
> 
> HANG STAT
> ---------
> cat /proc/2833/stack
> [<0>] __switch_to+0x6c/0x90
> [<0>] rpc_wait_bit_killable+0x2c/0xb0
> [<0>] __rpc_execute+0x1cc/0x528
> [<0>] rpc_execute+0xe4/0x1b0
> [<0>] rpc_run_task+0x130/0x168
> [<0>] nfs4_call_sync_sequence+0x80/0xc8
> [<0>] _nfs4_proc_getattr+0xc8/0xf8
> [<0>] nfs4_proc_getattr+0x88/0x1d8
> [<0>] __nfs_revalidate_inode+0x1f8/0x468
> [<0>] nfs_getattr+0x14c/0x420
> [<0>] vfs_getattr_nosec+0x7c/0x98
> [<0>] vfs_getattr+0x48/0x58
> [<0>] vfs_statx+0xb4/0x118
> [<0>] __se_sys_newfstatat+0x58/0x98
> [<0>] __arm64_sys_newfstatat+0x24/0x30
> [<0>] el0_svc_handler+0x7c/0x118
> [<0>] el0_svc+0x8/0xc
> [<0>] 0xffffffffffffffff
> 
> ....

Is there anything else blocked in the RPC layer? The above are all
standard tasks waiting for the rpciod/xprtiod workqueues to complete
the calls to the server.
Trond Myklebust Dec. 2, 2018, 4:44 p.m. UTC | #7
On Fri, 2018-11-30 at 14:31 -0500, Trond Myklebust wrote:
> On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote:
> > Hi
> > 
> > On 29/11/2018 19:56, Trond Myklebust wrote:
> > > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote:
> > > > Hi Trond, Catalin
> > [snip]
> > > Question to you both: when this happens, does /proc/*/stack show
> > > any of
> > > the processes hanging in the socket or sunrpc code? If so, can
> > > you
> > > please send me examples of those stack traces (i.e. the contents
> > > of
> > > /proc/<pid>/stack for the processes that are hanging)
> > 
> > (using a reverse shell since starting ssh causes a lot of pain and
> > traffic)
> > 
> > Looking at NFS traffic holes(30-40 secs) to detect Client side
> > various HANGS
> > -------------------------------------------------------------------
> > 

Hi Cristian and Catalin

Chuck and I have identified a few issues that might have an effect on
the hangs you report. Could you please give the linux-next branch in my
repository on git.linux-nfs.org (
https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/linux-next
) a try?

git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next

Thanks!
   Trond
Catalin Marinas Dec. 3, 2018, 11:45 a.m. UTC | #8
Hi Trond,

On Sun, Dec 02, 2018 at 04:44:49PM +0000, Trond Myklebust wrote:
> On Fri, 2018-11-30 at 14:31 -0500, Trond Myklebust wrote:
> > On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote:
> > > On 29/11/2018 19:56, Trond Myklebust wrote:
> > > > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote:
> > > > Question to you both: when this happens, does /proc/*/stack show
> > > > any of the processes hanging in the socket or sunrpc code? If
> > > > so, can you please send me examples of those stack traces (i.e.
> > > > the contents of /proc/<pid>/stack for the processes that are
> > > > hanging)
> > > 
> > > (using a reverse shell since starting ssh causes a lot of pain and
> > > traffic)
> > > 
> > > Looking at NFS traffic holes(30-40 secs) to detect Client side
> > > various HANGS
> 
> Chuck and I have identified a few issues that might have an effect on
> the hangs you report. Could you please give the linux-next branch in my
> repository on git.linux-nfs.org (
> https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/linux-next
> ) a try?
> 
> git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next

I tried, unfortunately there's no difference for me (I merged the above
branch on top of 4.20-rc5).
Cristian Marussi Dec. 3, 2018, 11:53 a.m. UTC | #9
Hi

On 03/12/2018 11:45, Catalin Marinas wrote:
> Hi Trond,
> 
> On Sun, Dec 02, 2018 at 04:44:49PM +0000, Trond Myklebust wrote:
>> On Fri, 2018-11-30 at 14:31 -0500, Trond Myklebust wrote:
>>> On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote:
>>>> On 29/11/2018 19:56, Trond Myklebust wrote:
>>>>> On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote:
>>>>> Question to you both: when this happens, does /proc/*/stack show
>>>>> any of the processes hanging in the socket or sunrpc code? If
>>>>> so, can you please send me examples of those stack traces (i.e.
>>>>> the contents of /proc/<pid>/stack for the processes that are
>>>>> hanging)
>>>>
>>>> (using a reverse shell since starting ssh causes a lot of pain and
>>>> traffic)
>>>>
>>>> Looking at NFS traffic holes(30-40 secs) to detect Client side
>>>> various HANGS
>>
>> Chuck and I have identified a few issues that might have an effect on
>> the hangs you report. Could you please give the linux-next branch in my
>> repository on git.linux-nfs.org (
>> https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/linux-next
>> ) a try?
>>
>> git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
> 
> I tried, unfortunately there's no difference for me (I merged the above
> branch on top of 4.20-rc5).
> 

same for me. Issue still there.

Beside I saw some differences in the dbench result which I used for testing.

From the dbench (comparing with previous mail) it seems that
Unlink and Qpathinfo MaxLat has normalized.

 Operation      Count    AvgLat    MaxLat
 ----------------------------------------
 NTCreateX      90820    13.613 13855.620
 Close          66565    18.075 13853.289
 Rename          3845    23.668   326.642
 Unlink         18450     4.581   186.062
 Qpathinfo      82068     2.677   280.203
 Qfileinfo      14235    10.357   176.373
 Qfsinfo        15156     2.822   242.794
 Sfileinfo       7400    17.018   240.546
 Find           31812     5.988   277.332
 WriteX         44735     0.155    14.685
 ReadX         141872     0.741 13817.870
 LockX            288    10.558    96.179
 UnlockX          288     3.307    57.939
 Flush           6389    20.427   187.429


> Is there anything else blocked in the RPC layer? The above are all
> standard tasks waiting for the rpciod/xprtiod workqueues to complete
> the calls to the server.
cat  /proc/692/stack
[<0>] __switch_to+0x6c/0x90
[<0>] rescuer_thread+0x2e8/0x360
[<0>] kthread+0x134/0x138
[<0>] ret_from_fork+0x10/0x1c
[<0>] 0xffffffffffffffff

I was now trying to collect more evidence ftracing during the quiet-stuck-period
till the restart happens.

Thanks

Cristian
Cristian Marussi Dec. 3, 2018, 6:54 p.m. UTC | #10
Hi

On 03/12/2018 11:53, Cristian Marussi wrote:
> Hi
> 
[snip]
> same for me. Issue still there.
> 
> Beside I saw some differences in the dbench result which I used for testing.
> 
> From the dbench (comparing with previous mail) it seems that
> Unlink and Qpathinfo MaxLat has normalized.
> 
>  Operation      Count    AvgLat    MaxLat
>  ----------------------------------------
>  NTCreateX      90820    13.613 13855.620
>  Close          66565    18.075 13853.289
>  Rename          3845    23.668   326.642
>  Unlink         18450     4.581   186.062
>  Qpathinfo      82068     2.677   280.203
>  Qfileinfo      14235    10.357   176.373
>  Qfsinfo        15156     2.822   242.794
>  Sfileinfo       7400    17.018   240.546
>  Find           31812     5.988   277.332
>  WriteX         44735     0.155    14.685
>  ReadX         141872     0.741 13817.870
>  LockX            288    10.558    96.179
>  UnlockX          288     3.307    57.939
>  Flush           6389    20.427   187.429
> 
> 
>> Is there anything else blocked in the RPC layer? The above are all
>> standard tasks waiting for the rpciod/xprtiod workqueues to complete
>> the calls to the server.
> cat  /proc/692/stack
> [<0>] __switch_to+0x6c/0x90
> [<0>] rescuer_thread+0x2e8/0x360
> [<0>] kthread+0x134/0x138
> [<0>] ret_from_fork+0x10/0x1c
> [<0>] 0xffffffffffffffff
> 
> I was now trying to collect more evidence ftracing during the quiet-stuck-period
> till the restart happens.
> 

attached to this mail there is a 3secs ftrace function-graph taken during the
quiet/stalled period of an 'LKP run dbench'; issued directly from console (no
ssh or netcat shell traffic).

Ftrace filter was pre-set as:

set_ftrace_filter was set to : nfs* rpc* xprt* tcp*

and tracing started once NO traffic was observed flowing on Wireshark.

Using ARM64 64k pages on Linux NFS next branch like previous mail this morning.

Thanks

Cristian
# tracer: function_graph
#
# CPU  DURATION                  FUNCTION CALLS
# |     |   |                     |   |   |   |
 3)               |  nfs_permission() {
 3)               |    rpc_lookup_cred_nonblock() {
 3)               |      rpcauth_lookupcred() {
 3)   3.720 us    |        rpcauth_lookup_credcache();
 3)   8.100 us    |      }
 3) + 11.500 us   |    }
 3)               |    nfs_do_access() {
 3)               |      nfs_check_cache_invalid() {
 3)               |        nfs4_have_delegation() {
 3)   1.220 us    |          nfs4_is_valid_delegation();
 3)   4.560 us    |        }
 3)   8.040 us    |      }
 3) + 11.420 us   |    }
 3) + 36.140 us   |  }
 3)               |  nfs4_lookup_revalidate() {
 3)               |    nfs4_do_lookup_revalidate() {
 3)               |      nfs_do_lookup_revalidate() {
 3)               |        nfs4_have_delegation() {
 3)   1.200 us    |          nfs4_is_valid_delegation();
 3)   3.860 us    |        }
 3)               |        nfs_check_verifier() {
 3)               |          nfs_mapping_need_revalidate_inode() {
 3)               |            nfs_check_cache_invalid() {
 3)               |              nfs4_have_delegation() {
 3)   1.180 us    |                nfs4_is_valid_delegation();
 3)   3.740 us    |              }
 3)   6.280 us    |            }
 3)   8.840 us    |          }
 3) + 11.860 us   |        }
 3)   1.360 us    |        nfs_lookup_verify_inode();
 3)   1.440 us    |        nfs_advise_use_readdirplus();
 3) + 27.520 us   |      }
 3) + 30.940 us   |    }
 3) + 34.260 us   |  }
 3)               |  nfs_permission() {
 3)               |    rpc_lookup_cred_nonblock() {
 3)               |      rpcauth_lookupcred() {
 3)   1.720 us    |        rpcauth_lookup_credcache();
 3)   4.300 us    |      }
 3)   6.700 us    |    }
 3)               |    nfs_do_access() {
 3)               |      nfs_check_cache_invalid() {
 3)               |        nfs4_have_delegation() {
 3)   1.220 us    |          nfs4_is_valid_delegation();
 3)   3.700 us    |        }
 3)   6.320 us    |      }
 3)   9.840 us    |    }
 3) + 20.480 us   |  }
 3)               |  nfs4_lookup_revalidate() {
 3)               |    nfs4_do_lookup_revalidate() {
 3)               |      nfs4_have_delegation() {
 3)   1.580 us    |        nfs4_is_valid_delegation();
 3)   1.180 us    |        nfs_mark_delegation_referenced();
 3)   7.060 us    |      }
 3)   9.620 us    |    }
 3) + 12.320 us   |  }
 3)   1.260 us    |  nfs_permission();
 3)               |  nfs4_file_open() {
 3)   1.360 us    |    nfs_check_flags();
 3)               |    rpc_lookup_cred() {
 3)               |      rpcauth_lookupcred() {
 3)   1.580 us    |        rpcauth_lookup_credcache();
 3)   4.080 us    |      }
 3)   6.640 us    |    }
 3)   1.700 us    |    nfs_sb_active();
 3)               |    nfs4_atomic_open() {
 3)               |      nfs4_do_open() {
 3)   2.780 us    |        nfs4_get_state_owner();
 3)               |        nfs4_client_recover_expired_lease() {
 3)               |          nfs4_wait_clnt_recover() {
 3)               |            nfs_put_client() {
 3)   1.940 us    |              nfs_put_client.part.2();
 3)   4.920 us    |            }
 3)   8.140 us    |          }
 3) + 11.180 us   |        }
 3)               |        nfs4_opendata_alloc() {
 3)   1.360 us    |          nfs4_label_alloc();
 3)   1.160 us    |          nfs4_label_alloc();
 3)   2.180 us    |          nfs_alloc_seqid();
 3)   1.300 us    |          nfs_sb_active();
 3)               |          nfs_fattr_init() {
 3)   1.380 us    |            nfs_inc_attr_generation_counter();
 3)   4.760 us    |          }
 3)   1.160 us    |          nfs_fattr_init_names();
 3) + 25.480 us   |        }
 3)   3.520 us    |        nfs4_get_open_state();
 3)               |        nfs4_run_open_task() {
 3)               |          rpc_run_task() {
 3)               |            rpc_new_task() {
 3)   1.280 us    |              xprt_get();
 3)   6.040 us    |            }
 3)               |            xprt_iter_get_next() {
 3)               |              xprt_iter_get_helper() {
 3)   1.220 us    |                xprt_iter_first_entry();
 3)   1.480 us    |                xprt_get();
 3)   7.100 us    |              }
 3) + 10.300 us   |            }
 3)               |            rpc_execute() {
 3) + 12.100 us   |              rpc_make_runnable();
 3) + 15.260 us   |            }
 3) + 39.520 us   |          }
 3) ! 129.680 us  |          rpc_wait_bit_killable();
 2)               |  rpc_async_schedule() {
 2)               |    rpc_prepare_task() {
 2)               |      nfs4_open_prepare() {
 2)   1.260 us    |        nfs_wait_on_sequence();
 2)   0.820 us    |        nfs_mark_delegation_referenced();
 2)   0.740 us    |        nfs4_sequence_done();
 2)   8.780 us    |      }
 2) + 11.360 us   |    }
 2)               |    rpc_release_resources_task() {
 2)   1.480 us    |      xprt_release();
 2)               |      rpc_task_release_client() {
 2)   0.780 us    |        rpc_release_client();
 2)   1.120 us    |        xprt_put();
 2)   5.380 us    |      }
 2) + 10.780 us   |    }
 2) + 32.280 us   |  }
 3)               |          rpc_put_task() {
 3)               |            rpc_do_put_task() {
 3)               |              rpc_release_resources_task() {
 3)   1.460 us    |                xprt_release();
 3)   1.380 us    |                rpc_task_release_client();
 3)   7.340 us    |              }
 3)               |              rpc_free_task() {
 3)               |                nfs4_open_release() {
 3)   1.520 us    |                  nfs4_opendata_put.part.8();
 3)   4.480 us    |                }
 3)   8.080 us    |              }
 3) + 19.980 us   |            }
 3) + 23.560 us   |          }
 3) ! 203.540 us  |        }
 3)   1.420 us    |        nfs_mark_delegation_referenced();
 3)   1.500 us    |        nfs_release_seqid();
 3)               |        nfs_may_open() {
 3)               |          nfs_do_access() {
 3)               |            nfs_check_cache_invalid() {
 3)               |              nfs4_have_delegation() {
 3)   1.200 us    |                nfs4_is_valid_delegation();
 3)   1.200 us    |                nfs_mark_delegation_referenced();
 3)   6.260 us    |              }
 3)   8.860 us    |            }
 3) + 11.860 us   |          }
 3) + 14.480 us   |        }
 3)   1.260 us    |        nfs_mark_delegation_referenced();
 3)   1.220 us    |        nfs4_state_set_mode_locked();
 3)   1.500 us    |        nfs_release_seqid();
 3)   1.540 us    |        nfs_inode_attach_open_context();
 3)   1.420 us    |        nfs4_sequence_free_slot();
 3)               |        nfs4_opendata_put.part.8() {
 3)   1.380 us    |          nfs4_lgopen_release();
 3)               |          nfs_free_seqid() {
 3)   1.380 us    |            nfs_release_seqid();
 3)   4.260 us    |          }
 3)   1.280 us    |          nfs4_sequence_free_slot();
 3)   1.620 us    |          nfs4_put_open_state();
 3)   1.400 us    |          nfs4_put_state_owner();
 3)   1.280 us    |          nfs_sb_deactive();
 3)   1.340 us    |          nfs_fattr_free_names();
 3) + 25.680 us   |        }
 3)   1.260 us    |        nfs4_put_state_owner();
 3) ! 329.940 us  |      }
 3) ! 333.420 us  |    }
 3)   1.360 us    |    nfs_file_set_open_context();
 3) ! 356.720 us  |  }
 1)   2.520 us    |  nfs4_xattr_get_nfs4_label();
 1)               |  nfs_file_read() {
 1)   0.900 us    |    nfs_start_io_read();
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.820 us    |            nfs4_is_valid_delegation();
 1)   0.800 us    |            nfs_mark_delegation_referenced();
 1)   4.700 us    |          }
 1)   6.540 us    |        }
 1)   8.220 us    |      }
 1) + 10.360 us   |    }
 1)   0.860 us    |    nfs_end_io_read();
 1) + 21.200 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   1.280 us    |        rpcauth_lookup_credcache();
 1)   3.400 us    |      }
 1)   5.400 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   0.760 us    |          nfs_mark_delegation_referenced();
 1)   4.040 us    |        }
 1)   5.620 us    |      }
 1)   7.360 us    |    }
 1) + 16.080 us   |  }
 1)               |  nfs_file_read() {
 1)   0.800 us    |    nfs_start_io_read();
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.860 us    |            nfs4_is_valid_delegation();
 1)   0.780 us    |            nfs_mark_delegation_referenced();
 1)   4.020 us    |          }
 1)   6.120 us    |        }
 1)   7.660 us    |      }
 1)   9.320 us    |    }
 1)   0.920 us    |    nfs_end_io_read();
 1) + 15.620 us   |  }
 1)               |  nfs_file_read() {
 1)   0.800 us    |    nfs_start_io_read();
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.900 us    |            nfs_mark_delegation_referenced();
 1)   4.040 us    |          }
 1)   5.580 us    |        }
 1)   7.120 us    |      }
 1)   8.720 us    |    }
 1)   0.740 us    |    nfs_end_io_read();
 1) + 14.000 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.880 us    |        rpcauth_lookup_credcache();
 1)   2.440 us    |      }
 1)   4.020 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.360 us    |        }
 1)   4.040 us    |      }
 1)   5.920 us    |    }
 1) + 12.820 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.460 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.860 us    |                nfs4_is_valid_delegation();
 1)   2.420 us    |              }
 1)   4.180 us    |            }
 1)   5.700 us    |          }
 1)   7.240 us    |        }
 1)   1.020 us    |        nfs_lookup_verify_inode();
 1)   0.780 us    |        nfs_advise_use_readdirplus();
 1) + 16.180 us   |      }
 1) + 17.860 us   |    }
 1) + 20.040 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.820 us    |        rpcauth_lookup_credcache();
 1)   2.520 us    |      }
 1)   4.100 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.720 us    |          nfs4_is_valid_delegation();
 1)   2.400 us    |        }
 1)   3.960 us    |      }
 1)   5.760 us    |    }
 1) + 12.360 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs4_have_delegation() {
 1)   0.740 us    |        nfs4_is_valid_delegation();
 1)   2.380 us    |      }
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.280 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.320 us    |              }
 1)   3.820 us    |            }
 1)   5.340 us    |          }
 1)   6.860 us    |        }
 1)   0.840 us    |        nfs_lookup_verify_inode();
 1)   1.100 us    |        nfs_advise_use_readdirplus();
 1) + 15.400 us   |      }
 1) + 20.240 us   |    }
 1) + 21.800 us   |  }
 1)               |  nfs_get_link() {
 1)               |    nfs_revalidate_mapping_rcu() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   2.320 us    |          }
 1)   3.940 us    |        }
 1)   5.600 us    |      }
 1)   7.360 us    |    }
 1)   9.500 us    |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.820 us    |        rpcauth_lookup_credcache();
 1)   2.380 us    |      }
 1)   3.940 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.280 us    |        }
 1)   3.860 us    |      }
 1)   5.380 us    |    }
 1) + 11.960 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.720 us    |          nfs4_is_valid_delegation();
 1)   2.300 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.300 us    |              }
 1)   3.840 us    |            }
 1)   5.360 us    |          }
 1)   6.860 us    |        }
 1)   0.740 us    |        nfs_lookup_verify_inode();
 1)   0.740 us    |        nfs_advise_use_readdirplus();
 1) + 14.680 us   |      }
 1) + 16.260 us   |    }
 1) + 17.940 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.840 us    |        rpcauth_lookup_credcache();
 1)   2.520 us    |      }
 1)   4.040 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.280 us    |        }
 1)   3.820 us    |      }
 1)   5.620 us    |    }
 1) + 12.020 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs4_have_delegation() {
 1)   0.820 us    |        nfs4_is_valid_delegation();
 1)   0.760 us    |        nfs_mark_delegation_referenced();
 1)   4.120 us    |      }
 1)   5.720 us    |    }
 1)   7.280 us    |  }
 1)   0.800 us    |  nfs_permission();
 1)               |  nfs4_file_open() {
 1)   0.760 us    |    nfs_check_flags();
 1)               |    rpc_lookup_cred() {
 1)               |      rpcauth_lookupcred() {
 1)   0.900 us    |        rpcauth_lookup_credcache();
 1)   2.420 us    |      }
 1)   3.920 us    |    }
 1)   0.900 us    |    nfs_sb_active();
 1)               |    nfs4_atomic_open() {
 1)               |      nfs4_do_open() {
 1)   1.400 us    |        nfs4_get_state_owner();
 1)               |        nfs4_client_recover_expired_lease() {
 1)               |          nfs4_wait_clnt_recover() {
 1)               |            nfs_put_client() {
 1)   0.960 us    |              nfs_put_client.part.2();
 1)   2.620 us    |            }
 1)   4.500 us    |          }
 1)   6.120 us    |        }
 1)               |        nfs4_opendata_alloc() {
 1)   0.980 us    |          nfs4_label_alloc();
 1)   0.720 us    |          nfs4_label_alloc();
 1)   0.840 us    |          nfs_alloc_seqid();
 1)   0.800 us    |          nfs_sb_active();
 1)               |          nfs_fattr_init() {
 1)   0.780 us    |            nfs_inc_attr_generation_counter();
 1)   2.460 us    |          }
 1)   0.740 us    |          nfs_fattr_init_names();
 1) + 13.720 us   |        }
 1)   1.020 us    |        nfs4_get_open_state();
 1)               |        nfs4_run_open_task() {
 1)               |          rpc_run_task() {
 1)               |            rpc_new_task() {
 1)   0.820 us    |              xprt_get();
 1)   3.600 us    |            }
 1)               |            xprt_iter_get_next() {
 1)               |              xprt_iter_get_helper() {
 1)   0.760 us    |                xprt_iter_first_entry();
 1)   0.760 us    |                xprt_get();
 1)   4.160 us    |              }
 1)   5.980 us    |            }
 1)               |            rpc_execute() {
 1)   6.000 us    |              rpc_make_runnable();
 1)   7.740 us    |            }
 1) + 21.180 us   |          }
 1) ! 104.520 us  |          rpc_wait_bit_killable();
 2)               |  rpc_async_schedule() {
 2)               |    rpc_prepare_task() {
 2)               |      nfs4_open_prepare() {
 2)   1.040 us    |        nfs_wait_on_sequence();
 2)   0.920 us    |        nfs_mark_delegation_referenced();
 2)   0.740 us    |        nfs4_sequence_done();
 2)   6.600 us    |      }
 2)   8.480 us    |    }
 2)               |    rpc_release_resources_task() {
 2)   0.780 us    |      xprt_release();
 2)               |      rpc_task_release_client() {
 2)   0.780 us    |        rpc_release_client();
 2)   0.760 us    |        xprt_put();
 2)   4.220 us    |      }
 2)   7.880 us    |    }
 2) + 24.700 us   |  }
 1)               |          rpc_put_task() {
 1)               |            rpc_do_put_task() {
 1)               |              rpc_release_resources_task() {
 1)   0.840 us    |                xprt_release();
 1)   0.760 us    |                rpc_task_release_client();
 1)   4.080 us    |              }
 1)               |              rpc_free_task() {
 1)               |                nfs4_open_release() {
 1)   0.800 us    |                  nfs4_opendata_put.part.8();
 1)   2.400 us    |                }
 1)   4.420 us    |              }
 1) + 11.240 us   |            }
 1) + 13.020 us   |          }
 1) ! 143.480 us  |        }
 1)   0.760 us    |        nfs_mark_delegation_referenced();
 1)   0.820 us    |        nfs_release_seqid();
 1)               |        nfs_may_open() {
 1)               |          nfs_do_access() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   0.860 us    |                nfs_mark_delegation_referenced();
 1)   4.060 us    |              }
 1)   5.640 us    |            }
 1)   7.480 us    |          }
 1)   9.040 us    |        }
 1)   0.860 us    |        nfs_mark_delegation_referenced();
 1)   0.760 us    |        nfs4_state_set_mode_locked();
 1)   0.880 us    |        nfs_release_seqid();
 1)   1.420 us    |        nfs_inode_attach_open_context();
 1)   0.720 us    |        nfs4_sequence_free_slot();
 1)               |        nfs4_opendata_put.part.8() {
 1)   0.720 us    |          nfs4_lgopen_release();
 1)               |          nfs_free_seqid() {
 1)   0.760 us    |            nfs_release_seqid();
 1)   2.540 us    |          }
 1)   0.740 us    |          nfs4_sequence_free_slot();
 1)   0.800 us    |          nfs4_put_open_state();
 1)   0.800 us    |          nfs4_put_state_owner();
 1)   0.780 us    |          nfs_sb_deactive();
 1)   0.780 us    |          nfs_fattr_free_names();
 1) + 15.020 us   |        }
 1)   0.760 us    |        nfs4_put_state_owner();
 1) ! 213.580 us  |      }
 1) ! 215.300 us  |    }
 1)   0.760 us    |    nfs_file_set_open_context();
 1) ! 227.840 us  |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.900 us    |        rpcauth_lookup_credcache();
 1)   2.480 us    |      }
 1)   4.080 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.780 us    |          nfs4_is_valid_delegation();
 1)   0.760 us    |          nfs_mark_delegation_referenced();
 1)   3.960 us    |        }
 1)   5.520 us    |      }
 1)   7.100 us    |    }
 1) + 14.040 us   |  }
 1)               |  nfs_file_read() {
 1)   0.780 us    |    nfs_start_io_read();
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.740 us    |            nfs_mark_delegation_referenced();
 1)   3.900 us    |          }
 1)   5.440 us    |        }
 1)   7.000 us    |      }
 1)   8.720 us    |    }
 1)   0.780 us    |    nfs_end_io_read();
 1) + 15.060 us   |  }
 1)               |  nfs_file_read() {
 1)   0.760 us    |    nfs_start_io_read();
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.740 us    |            nfs_mark_delegation_referenced();
 1)   3.860 us    |          }
 1)   5.360 us    |        }
 1)   6.900 us    |      }
 1)   8.500 us    |    }
 1)   0.760 us    |    nfs_end_io_read();
 1) + 14.420 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.780 us    |            nfs_mark_delegation_referenced();
 1)   4.080 us    |          }
 1)   5.800 us    |        }
 1)   7.400 us    |      }
 1)   9.140 us    |    }
 1) + 11.760 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.740 us    |            nfs_mark_delegation_referenced();
 1)   3.900 us    |          }
 1)   5.420 us    |        }
 1)   6.920 us    |      }
 1)   8.540 us    |    }
 1) + 10.300 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.780 us    |            nfs_mark_delegation_referenced();
 1)   4.020 us    |          }
 1)   5.600 us    |        }
 1)   7.160 us    |      }
 1)   8.820 us    |    }
 1) + 10.740 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.760 us    |            nfs_mark_delegation_referenced();
 1)   3.880 us    |          }
 1)   5.400 us    |        }
 1)   6.920 us    |      }
 1)   8.740 us    |    }
 1) + 10.400 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   1.080 us    |        rpcauth_lookup_credcache();
 1)   2.860 us    |      }
 1)   4.560 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.760 us    |          nfs4_is_valid_delegation();
 1)   2.400 us    |        }
 1)   4.080 us    |      }
 1)   5.980 us    |    }
 1) + 13.600 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.380 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.260 us    |              }
 1)   3.800 us    |            }
 1)   5.620 us    |          }
 1)   7.180 us    |        }
 1)   0.900 us    |        nfs_lookup_verify_inode();
 1)   0.740 us    |        nfs_advise_use_readdirplus();
 1) + 15.580 us   |      }
 1) + 17.120 us   |    }
 1) + 18.720 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.900 us    |        rpcauth_lookup_credcache();
 1)   2.520 us    |      }
 1)   4.240 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.760 us    |          nfs4_is_valid_delegation();
 1)   2.600 us    |        }
 1)   4.260 us    |      }
 1)   6.400 us    |    }
 1) + 13.080 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.480 us    |              }
 1)   4.080 us    |            }
 1)   5.600 us    |          }
 1)   7.240 us    |        }
 1)   0.740 us    |        nfs_lookup_revalidate_done();
 1) + 10.560 us   |      }
 1) + 12.080 us   |    }
 1) + 13.840 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   1.080 us    |        rpcauth_lookup_credcache();
 1)   2.740 us    |      }
 1)   4.340 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.720 us    |          nfs4_is_valid_delegation();
 1)   2.340 us    |        }
 1)   4.420 us    |      }
 1)   6.040 us    |    }
 1) + 13.200 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.480 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.280 us    |              }
 1)   3.960 us    |            }
 1)   5.460 us    |          }
 1)   7.000 us    |        }
 1)   0.720 us    |        nfs_lookup_verify_inode();
 1)   0.880 us    |        nfs_advise_use_readdirplus();
 1) + 15.200 us   |      }
 1) + 16.720 us   |    }
 1) + 18.260 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.820 us    |        rpcauth_lookup_credcache();
 1)   2.460 us    |      }
 1)   4.040 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.300 us    |        }
 1)   3.820 us    |      }
 1)   5.360 us    |    }
 1) + 11.800 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.860 us    |                nfs4_is_valid_delegation();
 1)   2.440 us    |              }
 1)   3.980 us    |            }
 1)   5.500 us    |          }
 1)   6.980 us    |        }
 1)   0.740 us    |        nfs_lookup_revalidate_done();
 1) + 10.060 us   |      }
 1) + 11.580 us   |    }
 1) + 13.260 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.860 us    |        rpcauth_lookup_credcache();
 1)   2.580 us    |      }
 1)   4.180 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.420 us    |        }
 1)   3.980 us    |      }
 1)   5.560 us    |    }
 1) + 12.300 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.880 us    |          nfs4_is_valid_delegation();
 1)   2.420 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.720 us    |                nfs4_is_valid_delegation();
 1)   2.400 us    |              }
 1)   3.920 us    |            }
 1)   5.420 us    |          }
 1)   6.960 us    |        }
 1)   0.740 us    |        nfs_lookup_verify_inode();
 1)   0.740 us    |        nfs_advise_use_readdirplus();
 1) + 14.880 us   |      }
 1) + 16.440 us   |    }
 1) + 17.980 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.840 us    |        rpcauth_lookup_credcache();
 1)   2.380 us    |      }
 1)   3.940 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.320 us    |        }
 1)   3.980 us    |      }
 1)   5.500 us    |    }
 1) + 11.840 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs4_have_delegation() {
 1)   0.960 us    |        nfs4_is_valid_delegation();
 1)   0.740 us    |        nfs_mark_delegation_referenced();
 1)   4.520 us    |      }
 1)   6.160 us    |    }
 1)   7.880 us    |  }
 1)   0.800 us    |  nfs_permission();
 1)               |  nfs4_file_open() {
 1)   0.740 us    |    nfs_check_flags();
 1)               |    rpc_lookup_cred() {
 1)               |      rpcauth_lookupcred() {
 1)   0.900 us    |        rpcauth_lookup_credcache();
 1)   2.440 us    |      }
 1)   3.960 us    |    }
 1)   0.800 us    |    nfs_sb_active();
 1)               |    nfs4_atomic_open() {
 1)               |      nfs4_do_open() {
 1)   1.020 us    |        nfs4_get_state_owner();
 1)               |        nfs4_client_recover_expired_lease() {
 1)               |          nfs4_wait_clnt_recover() {
 1)               |            nfs_put_client() {
 1)   0.800 us    |              nfs_put_client.part.2();
 1)   2.320 us    |            }
 1)   3.880 us    |          }
 1)   5.460 us    |        }
 1)               |        nfs4_opendata_alloc() {
 1)   0.760 us    |          nfs4_label_alloc();
 1)   0.740 us    |          nfs4_label_alloc();
 1)   0.840 us    |          nfs_alloc_seqid();
 1)   0.800 us    |          nfs_sb_active();
 1)               |          nfs_fattr_init() {
 1)   0.760 us    |            nfs_inc_attr_generation_counter();
 1)   2.440 us    |          }
 1)   0.740 us    |          nfs_fattr_init_names();
 1) + 12.880 us   |        }
 1)   1.540 us    |        nfs4_get_open_state();
 1)               |        nfs4_run_open_task() {
 1)               |          rpc_run_task() {
 1)               |            rpc_new_task() {
 1)   0.740 us    |              xprt_get();
 1)   2.840 us    |            }
 1)               |            xprt_iter_get_next() {
 1)               |              xprt_iter_get_helper() {
 1)   0.760 us    |                xprt_iter_first_entry();
 1)   0.760 us    |                xprt_get();
 1)   3.920 us    |              }
 1)   5.540 us    |            }
 1)               |            rpc_execute() {
 1)   5.240 us    |              rpc_make_runnable();
 1)   6.900 us    |            }
 1) + 18.840 us   |          }
 1) ! 105.220 us  |          rpc_wait_bit_killable();
 2)               |  rpc_async_schedule() {
 2)               |    rpc_prepare_task() {
 2)               |      nfs4_open_prepare() {
 2)   0.980 us    |        nfs_wait_on_sequence();
 2)   0.780 us    |        nfs_mark_delegation_referenced();
 2)   0.740 us    |        nfs4_sequence_done();
 2)   6.340 us    |      }
 2)   8.180 us    |    }
 2)               |    rpc_release_resources_task() {
 2)   0.760 us    |      xprt_release();
 2)               |      rpc_task_release_client() {
 2)   0.920 us    |        rpc_release_client();
 2)   0.760 us    |        xprt_put();
 2)   4.280 us    |      }
 2)   7.740 us    |    }
 2) + 24.480 us   |  }
 1)               |          rpc_put_task() {
 1)               |            rpc_do_put_task() {
 1)               |              rpc_release_resources_task() {
 1)   0.900 us    |                xprt_release();
 1)   0.760 us    |                rpc_task_release_client();
 1)   4.140 us    |              }
 1)               |              rpc_free_task() {
 1)               |                nfs4_open_release() {
 1)   0.760 us    |                  nfs4_opendata_put.part.8();
 1)   2.320 us    |                }
 1)   4.120 us    |              }
 1) + 10.820 us   |            }
 1) + 12.500 us   |          }
 1) ! 140.660 us  |        }
 1)   0.740 us    |        nfs_mark_delegation_referenced();
 1)   0.840 us    |        nfs_release_seqid();
 1)               |        nfs_may_open() {
 1)               |          nfs_do_access() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   0.760 us    |                nfs_mark_delegation_referenced();
 1)   4.140 us    |              }
 1)   5.740 us    |            }
 1)   7.620 us    |          }
 1)   9.120 us    |        }
 1)   0.820 us    |        nfs_mark_delegation_referenced();
 1)   0.800 us    |        nfs4_state_set_mode_locked();
 1)   0.760 us    |        nfs_release_seqid();
 1)   0.980 us    |        nfs_inode_attach_open_context();
 1)   0.740 us    |        nfs4_sequence_free_slot();
 1)               |        nfs4_opendata_put.part.8() {
 1)   0.740 us    |          nfs4_lgopen_release();
 1)               |          nfs_free_seqid() {
 1)   0.740 us    |            nfs_release_seqid();
 1)   2.340 us    |          }
 1)   0.760 us    |          nfs4_sequence_free_slot();
 1)   0.800 us    |          nfs4_put_open_state();
 1)   0.760 us    |          nfs4_put_state_owner();
 1)   0.760 us    |          nfs_sb_deactive();
 1)   0.720 us    |          nfs_fattr_free_names();
 1) + 13.880 us   |        }
 1)   0.740 us    |        nfs4_put_state_owner();
 1) ! 206.100 us  |      }
 1) ! 207.740 us  |    }
 1)   0.880 us    |    nfs_file_set_open_context();
 1) ! 219.600 us  |  }
 1)               |  nfs_getattr() {
 1)               |    nfs_writepages() {
 1)               |      nfs_pageio_init_write() {
 1)   0.860 us    |        nfs_pageio_init();
 1)   2.600 us    |      }
 1)               |      nfs_pageio_complete() {
 1)               |        nfs_pageio_doio() {
 1)   0.720 us    |          nfs_pgio_current_mirror();
 1)   2.720 us    |        }
 1)   4.960 us    |      }
 1)               |      nfs_io_completion_put.part.0() {
 1)               |        nfs_io_completion_commit() {
 1)   1.080 us    |          nfs_commit_end();
 1)   3.380 us    |        }
 1)   5.440 us    |      }
 1) + 20.500 us   |    }
 1)               |    nfs_attribute_cache_expired() {
 1)               |      nfs4_have_delegation() {
 1)   0.760 us    |        nfs4_is_valid_delegation();
 1)   0.760 us    |        nfs_mark_delegation_referenced();
 1)   3.980 us    |      }
 1)   5.580 us    |    }
 1)               |    nfs_readdirplus_parent_cache_hit.part.5() {
 1)   0.800 us    |      nfs_advise_use_readdirplus();
 1)   2.460 us    |    }
 1) + 36.240 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   0.740 us    |            nfs_mark_delegation_referenced();
 1)   3.820 us    |          }
 1)   5.600 us    |        }
 1)   7.160 us    |      }
 1)   8.780 us    |    }
 1) + 10.440 us   |  }
 1)   0.840 us    |  nfs4_file_flush();
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.940 us    |        rpcauth_lookup_credcache();
 1)   2.640 us    |      }
 1)   4.220 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.320 us    |        }
 1)   3.940 us    |      }
 1)   5.540 us    |    }
 1) + 12.320 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.720 us    |          nfs4_is_valid_delegation();
 1)   2.400 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.300 us    |              }
 1)   3.820 us    |            }
 1)   5.340 us    |          }
 1)   6.860 us    |        }
 1)   0.760 us    |        nfs_lookup_verify_inode();
 1)   0.860 us    |        nfs_advise_use_readdirplus();
 1) + 15.000 us   |      }
 1) + 16.560 us   |    }
 1) + 18.080 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.840 us    |        rpcauth_lookup_credcache();
 1)   2.400 us    |      }
 1)   3.920 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.300 us    |        }
 1)   3.820 us    |      }
 1)   5.380 us    |    }
 1) + 11.720 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.880 us    |                nfs4_is_valid_delegation();
 1)   2.440 us    |              }
 1)   3.980 us    |            }
 1)   5.580 us    |          }
 1)   7.120 us    |        }
 1)   0.740 us    |        nfs_lookup_revalidate_done();
 1) + 10.280 us   |      }
 1) + 11.820 us   |    }
 1) + 13.540 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.860 us    |        rpcauth_lookup_credcache();
 1)   2.560 us    |      }
 1)   4.100 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.320 us    |        }
 1)   3.860 us    |      }
 1)   5.440 us    |    }
 1) + 12.020 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.760 us    |          nfs4_is_valid_delegation();
 1)   2.340 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.280 us    |              }
 1)   3.840 us    |            }
 1)   5.360 us    |          }
 1)   6.980 us    |        }
 1)   0.740 us    |        nfs_lookup_verify_inode();
 1)   0.740 us    |        nfs_advise_use_readdirplus();
 1) + 15.000 us   |      }
 1) + 16.520 us   |    }
 1) + 18.040 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.840 us    |        rpcauth_lookup_credcache();
 1)   2.400 us    |      }
 1)   3.960 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.300 us    |        }
 1)   3.860 us    |      }
 1)   5.580 us    |    }
 1) + 11.920 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.300 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   2.280 us    |              }
 1)   3.820 us    |            }
 1)   5.340 us    |          }
 1)   6.820 us    |        }
 1)   0.760 us    |        nfs_lookup_verify_inode();
 1)   0.740 us    |        nfs_advise_use_readdirplus();
 1) + 14.540 us   |      }
 1) + 16.060 us   |    }
 1) + 17.640 us   |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.840 us    |        rpcauth_lookup_credcache();
 1)   2.360 us    |      }
 1)   3.900 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.300 us    |        }
 1)   3.820 us    |      }
 1)   5.420 us    |    }
 1) + 11.660 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs4_have_delegation() {
 1)   0.740 us    |        nfs4_is_valid_delegation();
 1)   2.400 us    |      }
 1)               |      nfs_do_lookup_revalidate() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.320 us    |        }
 1)               |        nfs_check_verifier() {
 1)               |          nfs_mapping_need_revalidate_inode() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.720 us    |                nfs4_is_valid_delegation();
 1)   2.320 us    |              }
 1)   3.860 us    |            }
 1)   5.400 us    |          }
 1)   6.880 us    |        }
 1)   0.820 us    |        nfs_lookup_verify_inode();
 1)   0.740 us    |        nfs_advise_use_readdirplus();
 1) + 14.840 us   |      }
 1) + 19.700 us   |    }
 1) + 21.240 us   |  }
 1)               |  nfs_get_link() {
 1)               |    nfs_revalidate_mapping_rcu() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.740 us    |            nfs4_is_valid_delegation();
 1)   2.300 us    |          }
 1)   3.940 us    |        }
 1)   5.600 us    |      }
 1)   7.320 us    |    }
 1)   9.340 us    |  }
 1)               |  nfs_permission() {
 1)               |    rpc_lookup_cred_nonblock() {
 1)               |      rpcauth_lookupcred() {
 1)   0.840 us    |        rpcauth_lookup_credcache();
 1)   2.360 us    |      }
 1)   3.920 us    |    }
 1)               |    nfs_do_access() {
 1)               |      nfs_check_cache_invalid() {
 1)               |        nfs4_have_delegation() {
 1)   0.740 us    |          nfs4_is_valid_delegation();
 1)   2.320 us    |        }
 1)   3.860 us    |      }
 1)   5.400 us    |    }
 1) + 11.720 us   |  }
 1)               |  nfs4_lookup_revalidate() {
 1)               |    nfs4_do_lookup_revalidate() {
 1)               |      nfs4_have_delegation() {
 1)   0.980 us    |        nfs4_is_valid_delegation();
 1)   0.760 us    |        nfs_mark_delegation_referenced();
 1)   4.240 us    |      }
 1)   5.780 us    |    }
 1)   7.340 us    |  }
 1)   0.840 us    |  nfs_permission();
 1)               |  nfs4_file_open() {
 1)   0.740 us    |    nfs_check_flags();
 1)               |    rpc_lookup_cred() {
 1)               |      rpcauth_lookupcred() {
 1)   0.900 us    |        rpcauth_lookup_credcache();
 1)   2.480 us    |      }
 1)   4.020 us    |    }
 1)   0.800 us    |    nfs_sb_active();
 1)               |    nfs4_atomic_open() {
 1)               |      nfs4_do_open() {
 1)   0.960 us    |        nfs4_get_state_owner();
 1)               |        nfs4_client_recover_expired_lease() {
 1)               |          nfs4_wait_clnt_recover() {
 1)               |            nfs_put_client() {
 1)   1.040 us    |              nfs_put_client.part.2();
 1)   2.580 us    |            }
 1)   4.260 us    |          }
 1)   5.840 us    |        }
 1)               |        nfs4_opendata_alloc() {
 1)   0.740 us    |          nfs4_label_alloc();
 1)   0.740 us    |          nfs4_label_alloc();
 1)   0.840 us    |          nfs_alloc_seqid();
 1)   0.820 us    |          nfs_sb_active();
 1)               |          nfs_fattr_init() {
 1)   0.740 us    |            nfs_inc_attr_generation_counter();
 1)   2.280 us    |          }
 1)   0.760 us    |          nfs_fattr_init_names();
 1) + 12.420 us   |        }
 1)   0.980 us    |        nfs4_get_open_state();
 1)               |        nfs4_run_open_task() {
 1)               |          rpc_run_task() {
 1)               |            rpc_new_task() {
 1)   0.740 us    |              xprt_get();
 1)   2.600 us    |            }
 1)               |            xprt_iter_get_next() {
 1)               |              xprt_iter_get_helper() {
 1)   0.740 us    |                xprt_iter_first_entry();
 1)   0.740 us    |                xprt_get();
 1)   3.880 us    |              }
 1)   5.480 us    |            }
 1)               |            rpc_execute() {
 1)   4.240 us    |              rpc_make_runnable();
 1)   5.920 us    |            }
 1) + 17.600 us   |          }
 1) ! 104.260 us  |          rpc_wait_bit_killable();
 2)               |  rpc_async_schedule() {
 2)               |    rpc_prepare_task() {
 2)               |      nfs4_open_prepare() {
 2)   1.000 us    |        nfs_wait_on_sequence();
 2)   0.800 us    |        nfs_mark_delegation_referenced();
 2)   0.740 us    |        nfs4_sequence_done();
 2)   6.540 us    |      }
 2)   8.480 us    |    }
 2)               |    rpc_release_resources_task() {
 2)   0.780 us    |      xprt_release();
 2)               |      rpc_task_release_client() {
 2)   0.780 us    |        rpc_release_client();
 2)   0.740 us    |        xprt_put();
 2)   4.540 us    |      }
 2)   8.000 us    |    }
 2) + 24.700 us   |  }
 1)               |          rpc_put_task() {
 1)               |            rpc_do_put_task() {
 1)               |              rpc_release_resources_task() {
 1)   0.880 us    |                xprt_release();
 1)   0.720 us    |                rpc_task_release_client();
 1)   4.060 us    |              }
 1)               |              rpc_free_task() {
 1)               |                nfs4_open_release() {
 1)   0.760 us    |                  nfs4_opendata_put.part.8();
 1)   2.340 us    |                }
 1)   4.120 us    |              }
 1) + 10.700 us   |            }
 1) + 12.320 us   |          }
 1) ! 138.360 us  |        }
 1)   0.760 us    |        nfs_mark_delegation_referenced();
 1)   0.820 us    |        nfs_release_seqid();
 1)               |        nfs_may_open() {
 1)               |          nfs_do_access() {
 1)               |            nfs_check_cache_invalid() {
 1)               |              nfs4_have_delegation() {
 1)   0.740 us    |                nfs4_is_valid_delegation();
 1)   0.740 us    |                nfs_mark_delegation_referenced();
 1)   4.060 us    |              }
 1)   5.620 us    |            }
 1)   7.420 us    |          }
 1)   8.940 us    |        }
 1)   0.820 us    |        nfs_mark_delegation_referenced();
 1)   0.740 us    |        nfs4_state_set_mode_locked();
 1)   0.740 us    |        nfs_release_seqid();
 1)   1.220 us    |        nfs_inode_attach_open_context();
 1)   0.740 us    |        nfs4_sequence_free_slot();
 1)               |        nfs4_opendata_put.part.8() {
 1)   0.720 us    |          nfs4_lgopen_release();
 1)               |          nfs_free_seqid() {
 1)   0.740 us    |            nfs_release_seqid();
 1)   2.380 us    |          }
 1)   0.760 us    |          nfs4_sequence_free_slot();
 1)   0.740 us    |          nfs4_put_open_state();
 1)   0.740 us    |          nfs4_put_state_owner();
 1)   0.740 us    |          nfs_sb_deactive();
 1)   0.740 us    |          nfs_fattr_free_names();
 1) + 13.740 us   |        }
 1)   0.760 us    |        nfs4_put_state_owner();
 1) ! 202.420 us  |      }
 1) ! 204.020 us  |    }
 1)   0.880 us    |    nfs_file_set_open_context();
 1) ! 216.020 us  |  }
 1)               |  nfs_file_read() {
 1)   0.760 us    |    nfs_start_io_read();
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.760 us    |            nfs4_is_valid_delegation();
 1)   0.880 us    |            nfs_mark_delegation_referenced();
 1)   4.020 us    |          }
 1)   5.560 us    |        }
 1)   7.120 us    |      }
 1)   8.740 us    |    }
 1)   0.800 us    |    nfs_end_io_read();
 1) + 15.660 us   |  }
 1)               |  nfs_getattr() {
 1)               |    nfs_writepages() {
 1)               |      nfs_pageio_init_write() {
 1)   0.740 us    |        nfs_pageio_init();
 1)   2.260 us    |      }
 1)               |      nfs_pageio_complete() {
 1)               |        nfs_pageio_doio() {
 1)   0.740 us    |          nfs_pgio_current_mirror();
 1)   2.260 us    |        }
 1)   3.960 us    |      }
 1)               |      nfs_io_completion_put.part.0() {
 1)               |        nfs_io_completion_commit() {
 1)   0.920 us    |          nfs_commit_end();
 1)   2.560 us    |        }
 1)   4.200 us    |      }
 1) + 14.760 us   |    }
 1)               |    nfs_attribute_cache_expired() {
 1)               |      nfs4_have_delegation() {
 1)   0.780 us    |        nfs4_is_valid_delegation();
 1)   0.760 us    |        nfs_mark_delegation_referenced();
 1)   3.940 us    |      }
 1)   5.540 us    |    }
 1)               |    nfs_readdirplus_parent_cache_hit.part.5() {
 1)   0.720 us    |      nfs_advise_use_readdirplus();
 1)   2.540 us    |    }
 1) + 27.060 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.760 us    |            nfs4_is_valid_delegation();
 1)   0.740 us    |            nfs_mark_delegation_referenced();
 1)   3.900 us    |          }
 1)   5.420 us    |        }
 1)   7.000 us    |      }
 1)   8.580 us    |    }
 1) + 10.320 us   |  }
 1)               |  nfs_file_mmap() {
 1)               |    nfs_revalidate_mapping() {
 1)               |      nfs_mapping_need_revalidate_inode() {
 1)               |        nfs_check_cache_invalid() {
 1)               |          nfs4_have_delegation() {
 1)   0.780 us    |            nfs4_is_valid_delegation();
 1)   0.760 us    |            nfs_mark_delegation_referenced();
 1)   3.960 us    |          }
 1)   5.660 us    |        }
 1)   7.200 us    |      }
 1)   8.900 us    |    }
 1) + 10.680 us   |  }
 1)   1.040 us    |  nfs4_file_flush();
 1)               |  nfs_file_release() {
 1)               |    nfs_file_clear_open_context() {
 1)               |      nfs4_close_context() {
 1)               |        nfs4_close_sync() {
 1)   0.800 us    |          nfs4_state_set_mode_locked();
 1)               |          nfs4_put_open_state() {
 1)   0.740 us    |            nfs4_put_state_owner();
 1)   2.920 us    |          }
 1)   0.760 us    |          nfs4_put_state_owner();
 1)   8.600 us    |        }
 1) + 10.460 us   |      }
 1)   0.800 us    |      nfs_sb_deactive();
 1) + 14.740 us   |    }
 1) + 16.820 us   |  }
 1)   0.940 us    |  nfs_dentry_delete();
 1)               |  nfs_file_release() {
 1)               |    nfs_file_clear_open_context() {
 1)               |      nfs4_close_context() {
 1)               |        nfs4_close_sync() {
 1)   0.840 us    |          nfs4_state_set_mode_locked();
 1)               |          nfs4_put_open_state() {
 1)   0.900 us    |            nfs4_put_state_owner();
 1)   3.880 us    |          }
 1)   0.760 us    |          nfs4_put_state_owner();
 1) + 10.520 us   |        }
 1) + 13.100 us   |      }
 1)   0.860 us    |      nfs_sb_deactive();
 1) + 19.060 us   |    }
 1) + 25.300 us   |  }
 1)   0.900 us    |  nfs_dentry_delete();
 1)               |  nfs_file_release() {
 1)               |    nfs_file_clear_open_context() {
 1)               |      nfs4_close_context() {
 1)               |        nfs4_close_sync() {
 1)   0.920 us    |          nfs4_state_set_mode_locked();
 1)   0.860 us    |          nfs4_put_open_state();
 1)   0.780 us    |          nfs4_put_state_owner();
 1)   6.120 us    |        }
 1)   7.740 us    |      }
 1)   0.760 us    |      nfs_sb_deactive();
 1) + 11.680 us   |    }
 1) + 13.580 us   |  }
 1)               |  nfs_file_release() {
 1)               |    nfs_file_clear_open_context() {
 1)               |      nfs4_close_context() {
 1)               |        nfs4_close_sync() {
 1)   0.920 us    |          nfs4_state_set_mode_locked();
 1)   0.780 us    |          nfs4_put_open_state();
 1)   0.740 us    |          nfs4_put_state_owner();
 1)   5.940 us    |        }
 1)   7.520 us    |      }
 1)   0.760 us    |      nfs_sb_deactive();
 1) + 11.380 us   |    }
 1) + 13.040 us   |  }
diff mbox series

Patch

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 005cfb6e7238..458bfe0137f5 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -31,15 +31,16 @@  struct sock_xprt {
 	 * State of TCP reply receive
 	 */
 	struct {
-		__be32		fraghdr,
+		struct {
+			__be32	fraghdr,
 				xid,
 				calldir;
+		} __attribute__((packed));
 
 		u32		offset,
 				len;
 
-		unsigned long	copied,
-				flags;
+		unsigned long	copied;
 	} recv;
 
 	/*
@@ -76,21 +77,9 @@  struct sock_xprt {
 	void			(*old_error_report)(struct sock *);
 };
 
-/*
- * TCP receive state flags
- */
-#define TCP_RCV_LAST_FRAG	(1UL << 0)
-#define TCP_RCV_COPY_FRAGHDR	(1UL << 1)
-#define TCP_RCV_COPY_XID	(1UL << 2)
-#define TCP_RCV_COPY_DATA	(1UL << 3)
-#define TCP_RCV_READ_CALLDIR	(1UL << 4)
-#define TCP_RCV_COPY_CALLDIR	(1UL << 5)
-
 /*
  * TCP RPC flags
  */
-#define TCP_RPC_REPLY		(1UL << 6)
-
 #define XPRT_SOCK_CONNECTING	1U
 #define XPRT_SOCK_DATA_READY	(2)
 #define XPRT_SOCK_UPD_TIMEOUT	(3)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 0aa347194e0f..19e08d12696c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -497,16 +497,6 @@  TRACE_EVENT(xs_tcp_data_ready,
 			__get_str(port), __entry->err, __entry->total)
 );
 
-#define rpc_show_sock_xprt_flags(flags) \
-	__print_flags(flags, "|", \
-		{ TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
-		{ TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
-		{ TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
-		{ TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
-		{ TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
-		{ TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
-		{ TCP_RPC_REPLY, "TCP_RPC_REPLY" })
-
 TRACE_EVENT(xs_tcp_data_recv,
 	TP_PROTO(struct sock_xprt *xs),
 
@@ -516,7 +506,6 @@  TRACE_EVENT(xs_tcp_data_recv,
 		__string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
 		__string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
 		__field(u32, xid)
-		__field(unsigned long, flags)
 		__field(unsigned long, copied)
 		__field(unsigned int, reclen)
 		__field(unsigned long, offset)
@@ -526,15 +515,13 @@  TRACE_EVENT(xs_tcp_data_recv,
 		__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
 		__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
 		__entry->xid = be32_to_cpu(xs->recv.xid);
-		__entry->flags = xs->recv.flags;
 		__entry->copied = xs->recv.copied;
 		__entry->reclen = xs->recv.len;
 		__entry->offset = xs->recv.offset;
 	),
 
-	TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu",
+	TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%lu",
 			__get_str(addr), __get_str(port), __entry->xid,
-			rpc_show_sock_xprt_flags(__entry->flags),
 			__entry->copied, __entry->reclen, __entry->offset)
 );
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f16406228ead..5269ad98bb08 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,13 +47,13 @@ 
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <net/tcp.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
 
 #include <trace/events/sunrpc.h>
 
 #include "sunrpc.h"
 
-#define RPC_TCP_READ_CHUNK_SZ	(3*512*1024)
-
 static void xs_close(struct rpc_xprt *xprt);
 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 		struct socket *sock);
@@ -325,6 +325,320 @@  static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 		}
 }
 
+static size_t
+xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
+{
+	size_t i,n;
+
+	if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+		return want;
+	if (want > buf->page_len)
+		want = buf->page_len;
+	n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < n; i++) {
+		if (buf->pages[i])
+			continue;
+		buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
+		if (!buf->pages[i]) {
+			buf->page_len = (i * PAGE_SIZE) - buf->page_base;
+			return buf->page_len;
+		}
+	}
+	return want;
+}
+
+static ssize_t
+xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
+{
+	ssize_t ret;
+	if (seek != 0)
+		iov_iter_advance(&msg->msg_iter, seek);
+	ret = sock_recvmsg(sock, msg, flags);
+	return ret > 0 ? ret + seek : ret;
+}
+
+static ssize_t
+xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
+		struct kvec *kvec, size_t count, size_t seek)
+{
+	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+	return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
+		struct bio_vec *bvec, unsigned long nr, size_t count,
+		size_t seek)
+{
+	iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+	return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
+		size_t count)
+{
+	struct kvec kvec = { 0 };
+	return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+}
+
+static ssize_t
+xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
+		struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
+{
+	size_t want, seek_init = seek, offset = 0;
+	ssize_t ret;
+
+	if (seek < buf->head[0].iov_len) {
+		want = min_t(size_t, count, buf->head[0].iov_len);
+		ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto eagain;
+		seek = 0;
+	} else {
+		seek -= buf->head[0].iov_len;
+		offset += buf->head[0].iov_len;
+	}
+	if (buf->page_len && seek < buf->page_len) {
+		want = min_t(size_t, count - offset, buf->page_len);
+		want = xs_alloc_sparse_pages(buf, want, GFP_NOWAIT);
+		ret = xs_read_bvec(sock, msg, flags, buf->bvec,
+				xdr_buf_pagecount(buf),
+				want + buf->page_base,
+				seek + buf->page_base);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto eagain;
+		seek = 0;
+	} else {
+		seek -= buf->page_len;
+		offset += buf->page_len;
+	}
+	if (buf->tail[0].iov_len && seek < buf->tail[0].iov_len) {
+		want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+		ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto eagain;
+	} else
+		offset += buf->tail[0].iov_len;
+	ret = -EMSGSIZE;
+	msg->msg_flags |= MSG_TRUNC;
+out:
+	*read = offset - seek_init;
+	return ret;
+eagain:
+	ret = -EAGAIN;
+	goto out;
+sock_err:
+	offset += seek;
+	goto out;
+}
+
+static void
+xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
+{
+	if (!transport->recv.copied) {
+		if (buf->head[0].iov_len >= transport->recv.offset)
+			memcpy(buf->head[0].iov_base,
+					&transport->recv.xid,
+					transport->recv.offset);
+		transport->recv.copied = transport->recv.offset;
+	}
+}
+
+static bool
+xs_read_stream_request_done(struct sock_xprt *transport)
+{
+	return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
+}
+
+static ssize_t
+xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
+		int flags, struct rpc_rqst *req)
+{
+	struct xdr_buf *buf = &req->rq_private_buf;
+	size_t want, read;
+	ssize_t ret;
+
+	xs_read_header(transport, buf);
+
+	want = transport->recv.len - transport->recv.offset;
+	ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+			transport->recv.copied + want, transport->recv.copied,
+			&read);
+	transport->recv.offset += read;
+	transport->recv.copied += read;
+	if (transport->recv.offset == transport->recv.len) {
+		if (xs_read_stream_request_done(transport))
+			msg->msg_flags |= MSG_EOR;
+		return transport->recv.copied;
+	}
+
+	switch (ret) {
+	case -EMSGSIZE:
+		return transport->recv.copied;
+	case 0:
+		return -ESHUTDOWN;
+	default:
+		if (ret < 0)
+			return ret;
+	}
+	return -EAGAIN;
+}
+
+static size_t
+xs_read_stream_headersize(bool isfrag)
+{
+	if (isfrag)
+		return sizeof(__be32);
+	return 3 * sizeof(__be32);
+}
+
+static ssize_t
+xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
+		int flags, size_t want, size_t seek)
+{
+	struct kvec kvec = {
+		.iov_base = &transport->recv.fraghdr,
+		.iov_len = want,
+	};
+	return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
+}
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct rpc_rqst *req;
+	ssize_t ret;
+
+	/* Look up and lock the request corresponding to the given XID */
+	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
+	if (!req) {
+		printk(KERN_WARNING "Callback slot table overflowed\n");
+		return -ESHUTDOWN;
+	}
+
+	ret = xs_read_stream_request(transport, msg, flags, req);
+	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+		xprt_complete_bc_request(req, ret);
+
+	return ret;
+}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	return -ESHUTDOWN;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static ssize_t
+xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct rpc_rqst *req;
+	ssize_t ret = 0;
+
+	/* Look up and lock the request corresponding to the given XID */
+	spin_lock(&xprt->queue_lock);
+	req = xprt_lookup_rqst(xprt, transport->recv.xid);
+	if (!req) {
+		msg->msg_flags |= MSG_TRUNC;
+		goto out;
+	}
+	xprt_pin_rqst(req);
+	spin_unlock(&xprt->queue_lock);
+
+	ret = xs_read_stream_request(transport, msg, flags, req);
+
+	spin_lock(&xprt->queue_lock);
+	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+		xprt_complete_rqst(req->rq_task, ret);
+	xprt_unpin_rqst(req);
+out:
+	spin_unlock(&xprt->queue_lock);
+	return ret;
+}
+
+static ssize_t
+xs_read_stream(struct sock_xprt *transport, int flags)
+{
+	struct msghdr msg = { 0 };
+	size_t want, read = 0;
+	ssize_t ret = 0;
+
+	if (transport->recv.len == 0) {
+		want = xs_read_stream_headersize(transport->recv.copied != 0);
+		ret = xs_read_stream_header(transport, &msg, flags, want,
+				transport->recv.offset);
+		if (ret <= 0)
+			goto out_err;
+		transport->recv.offset = ret;
+		if (ret != want) {
+			ret = -EAGAIN;
+			goto out_err;
+		}
+		transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
+			RPC_FRAGMENT_SIZE_MASK;
+		transport->recv.offset -= sizeof(transport->recv.fraghdr);
+		read = ret;
+	}
+
+	switch (be32_to_cpu(transport->recv.calldir)) {
+	case RPC_CALL:
+		ret = xs_read_stream_call(transport, &msg, flags);
+		break;
+	case RPC_REPLY:
+		ret = xs_read_stream_reply(transport, &msg, flags);
+	}
+	if (msg.msg_flags & MSG_TRUNC) {
+		transport->recv.calldir = cpu_to_be32(-1);
+		transport->recv.copied = -1;
+	}
+	if (ret < 0)
+		goto out_err;
+	read += ret;
+	if (transport->recv.offset < transport->recv.len) {
+		ret = xs_read_discard(transport->sock, &msg, flags,
+				transport->recv.len - transport->recv.offset);
+		if (ret <= 0)
+			goto out_err;
+		transport->recv.offset += ret;
+		read += ret;
+	}
+	if (xs_read_stream_request_done(transport)) {
+		trace_xs_tcp_data_recv(transport);
+		transport->recv.copied = 0;
+	}
+	transport->recv.offset = 0;
+	transport->recv.len = 0;
+	return read;
+out_err:
+	switch (ret) {
+	case 0:
+	case -ESHUTDOWN:
+		xprt_force_disconnect(&transport->xprt);
+		return -ESHUTDOWN;
+	}
+	return ret;
+}
+
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
 static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
@@ -484,6 +798,12 @@  static int xs_nospace(struct rpc_rqst *req)
 	return ret;
 }
 
+static void
+xs_stream_prepare_request(struct rpc_rqst *req)
+{
+	req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+}
+
 /*
  * Determine if the previous message in the stream was aborted before it
  * could complete transmission.
@@ -1157,263 +1477,7 @@  static void xs_tcp_force_close(struct rpc_xprt *xprt)
 	xprt_force_disconnect(xprt);
 }
 
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	size_t len, used;
-	char *p;
-
-	p = ((char *) &transport->recv.fraghdr) + transport->recv.offset;
-	len = sizeof(transport->recv.fraghdr) - transport->recv.offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->recv.offset += used;
-	if (used != len)
-		return;
-
-	transport->recv.len = ntohl(transport->recv.fraghdr);
-	if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT)
-		transport->recv.flags |= TCP_RCV_LAST_FRAG;
-	else
-		transport->recv.flags &= ~TCP_RCV_LAST_FRAG;
-	transport->recv.len &= RPC_FRAGMENT_SIZE_MASK;
-
-	transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR;
-	transport->recv.offset = 0;
-
-	/* Sanity check of the record length */
-	if (unlikely(transport->recv.len < 8)) {
-		dprintk("RPC:       invalid TCP record fragment length\n");
-		xs_tcp_force_close(xprt);
-		return;
-	}
-	dprintk("RPC:       reading TCP record fragment of length %d\n",
-			transport->recv.len);
-}
-
-static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
-{
-	if (transport->recv.offset == transport->recv.len) {
-		transport->recv.flags |= TCP_RCV_COPY_FRAGHDR;
-		transport->recv.offset = 0;
-		if (transport->recv.flags & TCP_RCV_LAST_FRAG) {
-			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-			transport->recv.flags |= TCP_RCV_COPY_XID;
-			transport->recv.copied = 0;
-		}
-	}
-}
-
-static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-	size_t len, used;
-	char *p;
-
-	len = sizeof(transport->recv.xid) - transport->recv.offset;
-	dprintk("RPC:       reading XID (%zu bytes)\n", len);
-	p = ((char *) &transport->recv.xid) + transport->recv.offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->recv.offset += used;
-	if (used != len)
-		return;
-	transport->recv.flags &= ~TCP_RCV_COPY_XID;
-	transport->recv.flags |= TCP_RCV_READ_CALLDIR;
-	transport->recv.copied = 4;
-	dprintk("RPC:       reading %s XID %08x\n",
-			(transport->recv.flags & TCP_RPC_REPLY) ? "reply for"
-							      : "request with",
-			ntohl(transport->recv.xid));
-	xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
-				       struct xdr_skb_reader *desc)
-{
-	size_t len, used;
-	u32 offset;
-	char *p;
-
-	/*
-	 * We want transport->recv.offset to be 8 at the end of this routine
-	 * (4 bytes for the xid and 4 bytes for the call/reply flag).
-	 * When this function is called for the first time,
-	 * transport->recv.offset is 4 (after having already read the xid).
-	 */
-	offset = transport->recv.offset - sizeof(transport->recv.xid);
-	len = sizeof(transport->recv.calldir) - offset;
-	dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n", len);
-	p = ((char *) &transport->recv.calldir) + offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->recv.offset += used;
-	if (used != len)
-		return;
-	transport->recv.flags &= ~TCP_RCV_READ_CALLDIR;
-	/*
-	 * We don't yet have the XDR buffer, so we will write the calldir
-	 * out after we get the buffer from the 'struct rpc_rqst'
-	 */
-	switch (ntohl(transport->recv.calldir)) {
-	case RPC_REPLY:
-		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
-		transport->recv.flags |= TCP_RCV_COPY_DATA;
-		transport->recv.flags |= TCP_RPC_REPLY;
-		break;
-	case RPC_CALL:
-		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
-		transport->recv.flags |= TCP_RCV_COPY_DATA;
-		transport->recv.flags &= ~TCP_RPC_REPLY;
-		break;
-	default:
-		dprintk("RPC:       invalid request message type\n");
-		xs_tcp_force_close(&transport->xprt);
-	}
-	xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
-				     struct xdr_skb_reader *desc,
-				     struct rpc_rqst *req)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct xdr_buf *rcvbuf;
-	size_t len;
-	ssize_t r;
-
-	rcvbuf = &req->rq_private_buf;
-
-	if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) {
-		/*
-		 * Save the RPC direction in the XDR buffer
-		 */
-		memcpy(rcvbuf->head[0].iov_base + transport->recv.copied,
-			&transport->recv.calldir,
-			sizeof(transport->recv.calldir));
-		transport->recv.copied += sizeof(transport->recv.calldir);
-		transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR;
-	}
-
-	len = desc->count;
-	if (len > transport->recv.len - transport->recv.offset)
-		desc->count = transport->recv.len - transport->recv.offset;
-	r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied,
-					  desc, xdr_skb_read_bits);
-
-	if (desc->count) {
-		/* Error when copying to the receive buffer,
-		 * usually because we weren't able to allocate
-		 * additional buffer pages. All we can do now
-		 * is turn off TCP_RCV_COPY_DATA, so the request
-		 * will not receive any additional updates,
-		 * and time out.
-		 * Any remaining data from this record will
-		 * be discarded.
-		 */
-		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-		dprintk("RPC:       XID %08x truncated request\n",
-				ntohl(transport->recv.xid));
-		dprintk("RPC:       xprt = %p, recv.copied = %lu, "
-				"recv.offset = %u, recv.len = %u\n",
-				xprt, transport->recv.copied,
-				transport->recv.offset, transport->recv.len);
-		return;
-	}
-
-	transport->recv.copied += r;
-	transport->recv.offset += r;
-	desc->count = len - r;
-
-	dprintk("RPC:       XID %08x read %zd bytes\n",
-			ntohl(transport->recv.xid), r);
-	dprintk("RPC:       xprt = %p, recv.copied = %lu, recv.offset = %u, "
-			"recv.len = %u\n", xprt, transport->recv.copied,
-			transport->recv.offset, transport->recv.len);
-
-	if (transport->recv.copied == req->rq_private_buf.buflen)
-		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-	else if (transport->recv.offset == transport->recv.len) {
-		if (transport->recv.flags & TCP_RCV_LAST_FRAG)
-			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-	}
-}
-
-/*
- * Finds the request corresponding to the RPC xid and invokes the common
- * tcp read code to read the data.
- */
-static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
-				    struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_rqst *req;
-
-	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->recv.xid));
-
-	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->queue_lock);
-	req = xprt_lookup_rqst(xprt, transport->recv.xid);
-	if (!req) {
-		dprintk("RPC:       XID %08x request not found!\n",
-				ntohl(transport->recv.xid));
-		spin_unlock(&xprt->queue_lock);
-		return -1;
-	}
-	xprt_pin_rqst(req);
-	spin_unlock(&xprt->queue_lock);
-
-	xs_tcp_read_common(xprt, desc, req);
-
-	spin_lock(&xprt->queue_lock);
-	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
-		xprt_complete_rqst(req->rq_task, transport->recv.copied);
-	xprt_unpin_rqst(req);
-	spin_unlock(&xprt->queue_lock);
-	return 0;
-}
-
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Obtains an rpc_rqst previously allocated and invokes the common
- * tcp read code to read the data.  The result is placed in the callback
- * queue.
- * If we're unable to obtain the rpc_rqst we schedule the closing of the
- * connection and return -1.
- */
-static int xs_tcp_read_callback(struct rpc_xprt *xprt,
-				       struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_rqst *req;
-
-	/* Look up the request corresponding to the given XID */
-	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
-	if (req == NULL) {
-		printk(KERN_WARNING "Callback slot table overflowed\n");
-		xprt_force_disconnect(xprt);
-		return -1;
-	}
-
-	dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
-	xs_tcp_read_common(xprt, desc, req);
-
-	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
-		xprt_complete_bc_request(req, transport->recv.copied);
-
-	return 0;
-}
-
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-					struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-
-	return (transport->recv.flags & TCP_RPC_REPLY) ?
-		xs_tcp_read_reply(xprt, desc) :
-		xs_tcp_read_callback(xprt, desc);
-}
-
 static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
 {
 	int ret;
@@ -1429,106 +1493,14 @@  static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
 {
 	return PAGE_SIZE;
 }
-#else
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-					struct xdr_skb_reader *desc)
-{
-	return xs_tcp_read_reply(xprt, desc);
-}
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-/*
- * Read data off the transport.  This can be either an RPC_CALL or an
- * RPC_REPLY.  Relay the processing to helper functions.
- */
-static void xs_tcp_read_data(struct rpc_xprt *xprt,
-				    struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-
-	if (_xs_tcp_read_data(xprt, desc) == 0)
-		xs_tcp_check_fraghdr(transport);
-	else {
-		/*
-		 * The transport_lock protects the request handling.
-		 * There's no need to hold it to update the recv.flags.
-		 */
-		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-	}
-}
-
-static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-	size_t len;
-
-	len = transport->recv.len - transport->recv.offset;
-	if (len > desc->count)
-		len = desc->count;
-	desc->count -= len;
-	desc->offset += len;
-	transport->recv.offset += len;
-	dprintk("RPC:       discarded %zu bytes\n", len);
-	xs_tcp_check_fraghdr(transport);
-}
-
-static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
-{
-	struct rpc_xprt *xprt = rd_desc->arg.data;
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	struct xdr_skb_reader desc = {
-		.skb	= skb,
-		.offset	= offset,
-		.count	= len,
-	};
-	size_t ret;
-
-	dprintk("RPC:       xs_tcp_data_recv started\n");
-	do {
-		trace_xs_tcp_data_recv(transport);
-		/* Read in a new fragment marker if necessary */
-		/* Can we ever really expect to get completely empty fragments? */
-		if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) {
-			xs_tcp_read_fraghdr(xprt, &desc);
-			continue;
-		}
-		/* Read in the xid if necessary */
-		if (transport->recv.flags & TCP_RCV_COPY_XID) {
-			xs_tcp_read_xid(transport, &desc);
-			continue;
-		}
-		/* Read in the call/reply flag */
-		if (transport->recv.flags & TCP_RCV_READ_CALLDIR) {
-			xs_tcp_read_calldir(transport, &desc);
-			continue;
-		}
-		/* Read in the request data */
-		if (transport->recv.flags & TCP_RCV_COPY_DATA) {
-			xs_tcp_read_data(xprt, &desc);
-			continue;
-		}
-		/* Skip over any trailing bytes on short reads */
-		xs_tcp_read_discard(transport, &desc);
-	} while (desc.count);
-	ret = len - desc.count;
-	if (ret < rd_desc->count)
-		rd_desc->count -= ret;
-	else
-		rd_desc->count = 0;
-	trace_xs_tcp_data_recv(transport);
-	dprintk("RPC:       xs_tcp_data_recv done\n");
-	return ret;
-}
-
 static void xs_tcp_data_receive(struct sock_xprt *transport)
 {
 	struct rpc_xprt *xprt = &transport->xprt;
 	struct sock *sk;
-	read_descriptor_t rd_desc = {
-		.arg.data = xprt,
-	};
-	unsigned long total = 0;
-	int read = 0;
+	size_t read = 0;
+	ssize_t ret = 0;
 
 restart:
 	mutex_lock(&transport->recv_mutex);
@@ -1536,18 +1508,12 @@  static void xs_tcp_data_receive(struct sock_xprt *transport)
 	if (sk == NULL)
 		goto out;
 
-	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
 	for (;;) {
-		rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
-		lock_sock(sk);
-		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
-		if (rd_desc.count != 0 || read < 0) {
-			clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
-			release_sock(sk);
+		clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+		ret = xs_read_stream(transport, MSG_DONTWAIT | MSG_NOSIGNAL);
+		if (ret < 0)
 			break;
-		}
-		release_sock(sk);
-		total += read;
+		read += ret;
 		if (need_resched()) {
 			mutex_unlock(&transport->recv_mutex);
 			cond_resched();
@@ -1558,7 +1524,7 @@  static void xs_tcp_data_receive(struct sock_xprt *transport)
 		queue_work(xprtiod_workqueue, &transport->recv_worker);
 out:
 	mutex_unlock(&transport->recv_mutex);
-	trace_xs_tcp_data_ready(xprt, read, total);
+	trace_xs_tcp_data_ready(xprt, ret, read);
 }
 
 static void xs_tcp_data_receive_workfn(struct work_struct *work)
@@ -2380,7 +2346,6 @@  static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	transport->recv.offset = 0;
 	transport->recv.len = 0;
 	transport->recv.copied = 0;
-	transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
 	transport->xmit.offset = 0;
 
 	/* Tell the socket layer to start connecting... */
@@ -2802,6 +2767,7 @@  static const struct rpc_xprt_ops xs_tcp_ops = {
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
+	.prepare_request	= xs_stream_prepare_request,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_tcp_shutdown,