Message ID | 20180917130335.112832-41-trond.myklebust@hammerspace.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Convert RPC client transmission to a queued model | expand |
On Mon, 2018-09-17 at 09:03 -0400, Trond Myklebust wrote: > Most of this code should also be reusable with other socket types. > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > --- > include/linux/sunrpc/xprtsock.h | 19 +- > include/trace/events/sunrpc.h | 15 +- > net/sunrpc/xprtsock.c | 694 +++++++++++++++--------------- > -- > 3 files changed, 335 insertions(+), 393 deletions(-) > > diff --git a/include/linux/sunrpc/xprtsock.h > b/include/linux/sunrpc/xprtsock.h > index 005cfb6e7238..458bfe0137f5 100644 > --- a/include/linux/sunrpc/xprtsock.h > +++ b/include/linux/sunrpc/xprtsock.h > @@ -31,15 +31,16 @@ struct sock_xprt { > * State of TCP reply receive > */ > struct { > - __be32 fraghdr, > + struct { > + __be32 fraghdr, > xid, > calldir; > + } __attribute__((packed)); > > u32 offset, > len; > > - unsigned long copied, > - flags; > + unsigned long copied; > } recv; > > /* > @@ -76,21 +77,9 @@ struct sock_xprt { > void (*old_error_report)(struct sock *); > }; > > -/* > - * TCP receive state flags > - */ > -#define TCP_RCV_LAST_FRAG (1UL << 0) > -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) > -#define TCP_RCV_COPY_XID (1UL << 2) > -#define TCP_RCV_COPY_DATA (1UL << 3) > -#define TCP_RCV_READ_CALLDIR (1UL << 4) > -#define TCP_RCV_COPY_CALLDIR (1UL << 5) > - > /* > * TCP RPC flags > */ > -#define TCP_RPC_REPLY (1UL << 6) > - > #define XPRT_SOCK_CONNECTING 1U > #define XPRT_SOCK_DATA_READY (2) > #define XPRT_SOCK_UPD_TIMEOUT (3) > diff --git a/include/trace/events/sunrpc.h > b/include/trace/events/sunrpc.h > index 0aa347194e0f..19e08d12696c 100644 > --- a/include/trace/events/sunrpc.h > +++ b/include/trace/events/sunrpc.h > @@ -497,16 +497,6 @@ TRACE_EVENT(xs_tcp_data_ready, > __get_str(port), __entry->err, __entry->total) > ); > > -#define rpc_show_sock_xprt_flags(flags) \ > - __print_flags(flags, "|", \ > - { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \ > - { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \ > - { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \ > - { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \ > - { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \ > - { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \ > - { TCP_RPC_REPLY, "TCP_RPC_REPLY" }) > - > TRACE_EVENT(xs_tcp_data_recv, > TP_PROTO(struct sock_xprt *xs), > > @@ -516,7 +506,6 @@ TRACE_EVENT(xs_tcp_data_recv, > __string(addr, xs- > >xprt.address_strings[RPC_DISPLAY_ADDR]) > __string(port, xs- > >xprt.address_strings[RPC_DISPLAY_PORT]) > __field(u32, xid) > - __field(unsigned long, flags) > __field(unsigned long, copied) > __field(unsigned int, reclen) > __field(unsigned long, offset) > @@ -526,15 +515,13 @@ TRACE_EVENT(xs_tcp_data_recv, > __assign_str(addr, xs- > >xprt.address_strings[RPC_DISPLAY_ADDR]); > __assign_str(port, xs- > >xprt.address_strings[RPC_DISPLAY_PORT]); > __entry->xid = be32_to_cpu(xs->recv.xid); > - __entry->flags = xs->recv.flags; > __entry->copied = xs->recv.copied; > __entry->reclen = xs->recv.len; > __entry->offset = xs->recv.offset; > ), > > - TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu > reclen=%u offset=%lu", > + TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u > offset=%lu", > __get_str(addr), __get_str(port), __entry->xid, > - rpc_show_sock_xprt_flags(__entry->flags), > __entry->copied, __entry->reclen, __entry- > >offset) > ); > > diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c > index f16406228ead..5269ad98bb08 100644 > --- a/net/sunrpc/xprtsock.c > +++ b/net/sunrpc/xprtsock.c > @@ -47,13 +47,13 @@ > #include <net/checksum.h> > #include <net/udp.h> > #include <net/tcp.h> > +#include <linux/bvec.h> > +#include <linux/uio.h> > > #include <trace/events/sunrpc.h> > > #include "sunrpc.h" > > -#define RPC_TCP_READ_CHUNK_SZ (3*512*1024) > - > static void xs_close(struct rpc_xprt *xprt); > static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, > struct socket *sock); > @@ -325,6 +325,320 @@ static void xs_free_peer_addresses(struct > rpc_xprt *xprt) > } > } > > +static size_t > +xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) > +{ > + size_t i,n; > + > + if (!(buf->flags & XDRBUF_SPARSE_PAGES)) > + return want; > + if (want > buf->page_len) > + want = buf->page_len; > + n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; > + for (i = 0; i < n; i++) { > + if (buf->pages[i]) > + continue; > + buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); > + if (!buf->pages[i]) { > + buf->page_len = (i * PAGE_SIZE) - buf- > >page_base; > + return buf->page_len; > + } > + } > + return want; > +} > + > +static ssize_t > +xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, > size_t seek) > +{ > + ssize_t ret; > + if (seek != 0) > + iov_iter_advance(&msg->msg_iter, seek); > + ret = sock_recvmsg(sock, msg, flags); > + return ret > 0 ? ret + seek : ret; > +} > + > +static ssize_t > +xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, > + struct kvec *kvec, size_t count, size_t seek) > +{ > + iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, > count); > + return xs_sock_recvmsg(sock, msg, flags, seek); > +} > + > +static ssize_t > +xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, > + struct bio_vec *bvec, unsigned long nr, size_t count, > + size_t seek) > +{ > + iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, > count); > + return xs_sock_recvmsg(sock, msg, flags, seek); > +} > + > +static ssize_t > +xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, > + size_t count) > +{ > + struct kvec kvec = { 0 }; > + return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, > 0); > +} > + > +static ssize_t > +xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, > + struct xdr_buf *buf, size_t count, size_t seek, size_t > *read) > +{ > + size_t want, seek_init = seek, offset = 0; > + ssize_t ret; > + > + if (seek < buf->head[0].iov_len) { > + want = min_t(size_t, count, buf->head[0].iov_len); > + ret = xs_read_kvec(sock, msg, flags, &buf->head[0], > want, seek); > + if (ret <= 0) > + goto sock_err; > + offset += ret; > + if (offset == count || msg->msg_flags & > (MSG_EOR|MSG_TRUNC)) > + goto out; > + if (ret != want) > + goto eagain; > + seek = 0; > + } else { > + seek -= buf->head[0].iov_len; > + offset += buf->head[0].iov_len; > + } > + if (buf->page_len && seek < buf->page_len) { > + want = min_t(size_t, count - offset, buf->page_len); > + want = xs_alloc_sparse_pages(buf, want, GFP_NOWAIT); > + ret = xs_read_bvec(sock, msg, flags, buf->bvec, > + xdr_buf_pagecount(buf), > + want + buf->page_base, > + seek + buf->page_base); > + if (ret <= 0) > + goto sock_err; > + offset += ret; There is a bug here that has been fixed up in the linux-nfs.org testing branch. > + if (offset == count || msg->msg_flags & > (MSG_EOR|MSG_TRUNC)) > + goto out; > + if (ret != want) > + goto eagain; > + seek = 0; > + } else { > + seek -= buf->page_len; > + offset += buf->page_len; > + } > + if (buf->tail[0].iov_len && seek < buf->tail[0].iov_len) { > + want = min_t(size_t, count - offset, buf- > >tail[0].iov_len); > + ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], > want, seek); > + if (ret <= 0) > + goto sock_err; > + offset += ret; > + if (offset == count || msg->msg_flags & > (MSG_EOR|MSG_TRUNC)) > + goto out; > + if (ret != want) > + goto eagain; > + } else > + offset += buf->tail[0].iov_len; > + ret = -EMSGSIZE; > + msg->msg_flags |= MSG_TRUNC; > +out: > + *read = offset - seek_init; > + return ret; > +eagain: > + ret = -EAGAIN; > + goto out; > +sock_err: > + offset += seek; > + goto out; > +} > + > +static void > +xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf) > +{ > + if (!transport->recv.copied) { > + if (buf->head[0].iov_len >= transport->recv.offset) > + memcpy(buf->head[0].iov_base, > + &transport->recv.xid, > + transport->recv.offset); > + transport->recv.copied = transport->recv.offset; > + } > +} > + > +static bool > +xs_read_stream_request_done(struct sock_xprt *transport) > +{ > + return transport->recv.fraghdr & > cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); > +} > + > +static ssize_t > +xs_read_stream_request(struct sock_xprt *transport, struct msghdr > *msg, > + int flags, struct rpc_rqst *req) > +{ > + struct xdr_buf *buf = &req->rq_private_buf; > + size_t want, read; > + ssize_t ret; > + > + xs_read_header(transport, buf); > + > + want = transport->recv.len - transport->recv.offset; > + ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, > + transport->recv.copied + want, transport- > >recv.copied, > + &read); > + transport->recv.offset += read; > + transport->recv.copied += read; > + if (transport->recv.offset == transport->recv.len) { > + if (xs_read_stream_request_done(transport)) > + msg->msg_flags |= MSG_EOR; > + return transport->recv.copied; > + } > + > + switch (ret) { > + case -EMSGSIZE: > + return transport->recv.copied; > + case 0: > + return -ESHUTDOWN; > + default: > + if (ret < 0) > + return ret; > + } > + return -EAGAIN; > +} > + > +static size_t > +xs_read_stream_headersize(bool isfrag) > +{ > + if (isfrag) > + return sizeof(__be32); > + return 3 * sizeof(__be32); > +} > + > +static ssize_t > +xs_read_stream_header(struct sock_xprt *transport, struct msghdr > *msg, > + int flags, size_t want, size_t seek) > +{ > + struct kvec kvec = { > + .iov_base = &transport->recv.fraghdr, > + .iov_len = want, > + }; > + return xs_read_kvec(transport->sock, msg, flags, &kvec, want, > seek); > +} > + > +#if defined(CONFIG_SUNRPC_BACKCHANNEL) > +static ssize_t > +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, > int flags) > +{ > + struct rpc_xprt *xprt = &transport->xprt; > + struct rpc_rqst *req; > + ssize_t ret; > + > + /* Look up and lock the request corresponding to the given XID > */ > + req = xprt_lookup_bc_request(xprt, transport->recv.xid); > + if (!req) { > + printk(KERN_WARNING "Callback slot table > overflowed\n"); > + return -ESHUTDOWN; > + } > + > + ret = xs_read_stream_request(transport, msg, flags, req); > + if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) > + xprt_complete_bc_request(req, ret); > + > + return ret; > +} > +#else /* CONFIG_SUNRPC_BACKCHANNEL */ > +static ssize_t > +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, > int flags) > +{ > + return -ESHUTDOWN; > +} > +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ > + > +static ssize_t > +xs_read_stream_reply(struct sock_xprt *transport, struct msghdr > *msg, int flags) > +{ > + struct rpc_xprt *xprt = &transport->xprt; > + struct rpc_rqst *req; > + ssize_t ret = 0; > + > + /* Look up and lock the request corresponding to the given XID > */ > + spin_lock(&xprt->queue_lock); > + req = xprt_lookup_rqst(xprt, transport->recv.xid); > + if (!req) { > + msg->msg_flags |= MSG_TRUNC; > + goto out; > + } > + xprt_pin_rqst(req); > + spin_unlock(&xprt->queue_lock); > + > + ret = xs_read_stream_request(transport, msg, flags, req); > + > + spin_lock(&xprt->queue_lock); > + if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) > + xprt_complete_rqst(req->rq_task, ret); > + xprt_unpin_rqst(req); > +out: > + spin_unlock(&xprt->queue_lock); > + return ret; > +} > + > +static ssize_t > +xs_read_stream(struct sock_xprt *transport, int flags) > +{ > + struct msghdr msg = { 0 }; > + size_t want, read = 0; > + ssize_t ret = 0; > + > + if (transport->recv.len == 0) { > + want = xs_read_stream_headersize(transport->recv.copied > != 0); > + ret = xs_read_stream_header(transport, &msg, flags, > want, > + transport->recv.offset); > + if (ret <= 0) > + goto out_err; > + transport->recv.offset = ret; > + if (ret != want) { > + ret = -EAGAIN; > + goto out_err; > + } > + transport->recv.len = be32_to_cpu(transport- > >recv.fraghdr) & > + RPC_FRAGMENT_SIZE_MASK; > + transport->recv.offset -= sizeof(transport- > >recv.fraghdr); > + read = ret; > + } > + > + switch (be32_to_cpu(transport->recv.calldir)) { > + case RPC_CALL: > + ret = xs_read_stream_call(transport, &msg, flags); > + break; > + case RPC_REPLY: > + ret = xs_read_stream_reply(transport, &msg, flags); > + } > + if (msg.msg_flags & MSG_TRUNC) { > + transport->recv.calldir = cpu_to_be32(-1); > + transport->recv.copied = -1; > + } > + if (ret < 0) > + goto out_err; > + read += ret; > + if (transport->recv.offset < transport->recv.len) { > + ret = xs_read_discard(transport->sock, &msg, flags, > + transport->recv.len - transport- > >recv.offset); > + if (ret <= 0) > + goto out_err; > + transport->recv.offset += ret; > + read += ret; ...and another bug here. > + } > + if (xs_read_stream_request_done(transport)) { > + trace_xs_tcp_data_recv(transport); > + transport->recv.copied = 0; > + } > + transport->recv.offset = 0; > + transport->recv.len = 0; > + return read; > +out_err: > + switch (ret) { > + case 0: > + case -ESHUTDOWN: > + xprt_force_disconnect(&transport->xprt); > + return -ESHUTDOWN; > + } > + return ret; > +} > + > #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) > > static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, > int addrlen, struct kvec *vec, unsigned int base, int more) > @@ -484,6 +798,12 @@ static int xs_nospace(struct rpc_rqst *req) > return ret; > } > > +static void > +xs_stream_prepare_request(struct rpc_rqst *req) > +{ > + req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, > GFP_NOIO); > +} > + > /* > * Determine if the previous message in the stream was aborted > before it > * could complete transmission. > @@ -1157,263 +1477,7 @@ static void xs_tcp_force_close(struct > rpc_xprt *xprt) > xprt_force_disconnect(xprt); > } > > -static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct > xdr_skb_reader *desc) > -{ > - struct sock_xprt *transport = container_of(xprt, struct > sock_xprt, xprt); > - size_t len, used; > - char *p; > - > - p = ((char *) &transport->recv.fraghdr) + transport- > >recv.offset; > - len = sizeof(transport->recv.fraghdr) - transport->recv.offset; > - used = xdr_skb_read_bits(desc, p, len); > - transport->recv.offset += used; > - if (used != len) > - return; > - > - transport->recv.len = ntohl(transport->recv.fraghdr); > - if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT) > - transport->recv.flags |= TCP_RCV_LAST_FRAG; > - else > - transport->recv.flags &= ~TCP_RCV_LAST_FRAG; > - transport->recv.len &= RPC_FRAGMENT_SIZE_MASK; > - > - transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR; > - transport->recv.offset = 0; > - > - /* Sanity check of the record length */ > - if (unlikely(transport->recv.len < 8)) { > - dprintk("RPC: invalid TCP record fragment > length\n"); > - xs_tcp_force_close(xprt); > - return; > - } > - dprintk("RPC: reading TCP record fragment of length > %d\n", > - transport->recv.len); > -} > - > -static void xs_tcp_check_fraghdr(struct sock_xprt *transport) > -{ > - if (transport->recv.offset == transport->recv.len) { > - transport->recv.flags |= TCP_RCV_COPY_FRAGHDR; > - transport->recv.offset = 0; > - if (transport->recv.flags & TCP_RCV_LAST_FRAG) { > - transport->recv.flags &= ~TCP_RCV_COPY_DATA; > - transport->recv.flags |= TCP_RCV_COPY_XID; > - transport->recv.copied = 0; > - } > - } > -} > - > -static inline void xs_tcp_read_xid(struct sock_xprt *transport, > struct xdr_skb_reader *desc) > -{ > - size_t len, used; > - char *p; > - > - len = sizeof(transport->recv.xid) - transport->recv.offset; > - dprintk("RPC: reading XID (%zu bytes)\n", len); > - p = ((char *) &transport->recv.xid) + transport->recv.offset; > - used = xdr_skb_read_bits(desc, p, len); > - transport->recv.offset += used; > - if (used != len) > - return; > - transport->recv.flags &= ~TCP_RCV_COPY_XID; > - transport->recv.flags |= TCP_RCV_READ_CALLDIR; > - transport->recv.copied = 4; > - dprintk("RPC: reading %s XID %08x\n", > - (transport->recv.flags & TCP_RPC_REPLY) ? > "reply for" > - : > "request with", > - ntohl(transport->recv.xid)); > - xs_tcp_check_fraghdr(transport); > -} > - > -static inline void xs_tcp_read_calldir(struct sock_xprt *transport, > - struct xdr_skb_reader *desc) > -{ > - size_t len, used; > - u32 offset; > - char *p; > - > - /* > - * We want transport->recv.offset to be 8 at the end of this > routine > - * (4 bytes for the xid and 4 bytes for the call/reply flag). > - * When this function is called for the first time, > - * transport->recv.offset is 4 (after having already read the > xid). > - */ > - offset = transport->recv.offset - sizeof(transport->recv.xid); > - len = sizeof(transport->recv.calldir) - offset; > - dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", > len); > - p = ((char *) &transport->recv.calldir) + offset; > - used = xdr_skb_read_bits(desc, p, len); > - transport->recv.offset += used; > - if (used != len) > - return; > - transport->recv.flags &= ~TCP_RCV_READ_CALLDIR; > - /* > - * We don't yet have the XDR buffer, so we will write the > calldir > - * out after we get the buffer from the 'struct rpc_rqst' > - */ > - switch (ntohl(transport->recv.calldir)) { > - case RPC_REPLY: > - transport->recv.flags |= TCP_RCV_COPY_CALLDIR; > - transport->recv.flags |= TCP_RCV_COPY_DATA; > - transport->recv.flags |= TCP_RPC_REPLY; > - break; > - case RPC_CALL: > - transport->recv.flags |= TCP_RCV_COPY_CALLDIR; > - transport->recv.flags |= TCP_RCV_COPY_DATA; > - transport->recv.flags &= ~TCP_RPC_REPLY; > - break; > - default: > - dprintk("RPC: invalid request message type\n"); > - xs_tcp_force_close(&transport->xprt); > - } > - xs_tcp_check_fraghdr(transport); > -} > - > -static inline void xs_tcp_read_common(struct rpc_xprt *xprt, > - struct xdr_skb_reader *desc, > - struct rpc_rqst *req) > -{ > - struct sock_xprt *transport = > - container_of(xprt, struct sock_xprt, > xprt); > - struct xdr_buf *rcvbuf; > - size_t len; > - ssize_t r; > - > - rcvbuf = &req->rq_private_buf; > - > - if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) { > - /* > - * Save the RPC direction in the XDR buffer > - */ > - memcpy(rcvbuf->head[0].iov_base + transport- > >recv.copied, > - &transport->recv.calldir, > - sizeof(transport->recv.calldir)); > - transport->recv.copied += sizeof(transport- > >recv.calldir); > - transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR; > - } > - > - len = desc->count; > - if (len > transport->recv.len - transport->recv.offset) > - desc->count = transport->recv.len - transport- > >recv.offset; > - r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied, > - desc, xdr_skb_read_bits); > - > - if (desc->count) { > - /* Error when copying to the receive buffer, > - * usually because we weren't able to allocate > - * additional buffer pages. All we can do now > - * is turn off TCP_RCV_COPY_DATA, so the request > - * will not receive any additional updates, > - * and time out. > - * Any remaining data from this record will > - * be discarded. > - */ > - transport->recv.flags &= ~TCP_RCV_COPY_DATA; > - dprintk("RPC: XID %08x truncated request\n", > - ntohl(transport->recv.xid)); > - dprintk("RPC: xprt = %p, recv.copied = %lu, " > - "recv.offset = %u, recv.len = %u\n", > - xprt, transport->recv.copied, > - transport->recv.offset, transport- > >recv.len); > - return; > - } > - > - transport->recv.copied += r; > - transport->recv.offset += r; > - desc->count = len - r; > - > - dprintk("RPC: XID %08x read %zd bytes\n", > - ntohl(transport->recv.xid), r); > - dprintk("RPC: xprt = %p, recv.copied = %lu, recv.offset = > %u, " > - "recv.len = %u\n", xprt, transport- > >recv.copied, > - transport->recv.offset, transport->recv.len); > - > - if (transport->recv.copied == req->rq_private_buf.buflen) > - transport->recv.flags &= ~TCP_RCV_COPY_DATA; > - else if (transport->recv.offset == transport->recv.len) { > - if (transport->recv.flags & TCP_RCV_LAST_FRAG) > - transport->recv.flags &= ~TCP_RCV_COPY_DATA; > - } > -} > - > -/* > - * Finds the request corresponding to the RPC xid and invokes the > common > - * tcp read code to read the data. > - */ > -static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, > - struct xdr_skb_reader *desc) > -{ > - struct sock_xprt *transport = > - container_of(xprt, struct sock_xprt, > xprt); > - struct rpc_rqst *req; > - > - dprintk("RPC: read reply XID %08x\n", ntohl(transport- > >recv.xid)); > - > - /* Find and lock the request corresponding to this xid */ > - spin_lock(&xprt->queue_lock); > - req = xprt_lookup_rqst(xprt, transport->recv.xid); > - if (!req) { > - dprintk("RPC: XID %08x request not found!\n", > - ntohl(transport->recv.xid)); > - spin_unlock(&xprt->queue_lock); > - return -1; > - } > - xprt_pin_rqst(req); > - spin_unlock(&xprt->queue_lock); > - > - xs_tcp_read_common(xprt, desc, req); > - > - spin_lock(&xprt->queue_lock); > - if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) > - xprt_complete_rqst(req->rq_task, transport- > >recv.copied); > - xprt_unpin_rqst(req); > - spin_unlock(&xprt->queue_lock); > - return 0; > -} > - > #if defined(CONFIG_SUNRPC_BACKCHANNEL) > -/* > - * Obtains an rpc_rqst previously allocated and invokes the common > - * tcp read code to read the data. The result is placed in the > callback > - * queue. > - * If we're unable to obtain the rpc_rqst we schedule the closing of > the > - * connection and return -1. > - */ > -static int xs_tcp_read_callback(struct rpc_xprt *xprt, > - struct xdr_skb_reader *desc) > -{ > - struct sock_xprt *transport = > - container_of(xprt, struct sock_xprt, > xprt); > - struct rpc_rqst *req; > - > - /* Look up the request corresponding to the given XID */ > - req = xprt_lookup_bc_request(xprt, transport->recv.xid); > - if (req == NULL) { > - printk(KERN_WARNING "Callback slot table > overflowed\n"); > - xprt_force_disconnect(xprt); > - return -1; > - } > - > - dprintk("RPC: read callback XID %08x\n", ntohl(req- > >rq_xid)); > - xs_tcp_read_common(xprt, desc, req); > - > - if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) > - xprt_complete_bc_request(req, transport->recv.copied); > - > - return 0; > -} > - > -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, > - struct xdr_skb_reader *desc) > -{ > - struct sock_xprt *transport = > - container_of(xprt, struct sock_xprt, > xprt); > - > - return (transport->recv.flags & TCP_RPC_REPLY) ? > - xs_tcp_read_reply(xprt, desc) : > - xs_tcp_read_callback(xprt, desc); > -} > - > static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) > { > int ret; > @@ -1429,106 +1493,14 @@ static size_t xs_tcp_bc_maxpayload(struct > rpc_xprt *xprt) > { > return PAGE_SIZE; > } > -#else > -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, > - struct xdr_skb_reader *desc) > -{ > - return xs_tcp_read_reply(xprt, desc); > -} > #endif /* CONFIG_SUNRPC_BACKCHANNEL */ > > -/* > - * Read data off the transport. This can be either an RPC_CALL or > an > - * RPC_REPLY. Relay the processing to helper functions. > - */ > -static void xs_tcp_read_data(struct rpc_xprt *xprt, > - struct xdr_skb_reader *desc) > -{ > - struct sock_xprt *transport = > - container_of(xprt, struct sock_xprt, > xprt); > - > - if (_xs_tcp_read_data(xprt, desc) == 0) > - xs_tcp_check_fraghdr(transport); > - else { > - /* > - * The transport_lock protects the request handling. > - * There's no need to hold it to update the recv.flags. > - */ > - transport->recv.flags &= ~TCP_RCV_COPY_DATA; > - } > -} > - > -static inline void xs_tcp_read_discard(struct sock_xprt *transport, > struct xdr_skb_reader *desc) > -{ > - size_t len; > - > - len = transport->recv.len - transport->recv.offset; > - if (len > desc->count) > - len = desc->count; > - desc->count -= len; > - desc->offset += len; > - transport->recv.offset += len; > - dprintk("RPC: discarded %zu bytes\n", len); > - xs_tcp_check_fraghdr(transport); > -} > - > -static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct > sk_buff *skb, unsigned int offset, size_t len) > -{ > - struct rpc_xprt *xprt = rd_desc->arg.data; > - struct sock_xprt *transport = container_of(xprt, struct > sock_xprt, xprt); > - struct xdr_skb_reader desc = { > - .skb = skb, > - .offset = offset, > - .count = len, > - }; > - size_t ret; > - > - dprintk("RPC: xs_tcp_data_recv started\n"); > - do { > - trace_xs_tcp_data_recv(transport); > - /* Read in a new fragment marker if necessary */ > - /* Can we ever really expect to get completely empty > fragments? */ > - if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) { > - xs_tcp_read_fraghdr(xprt, &desc); > - continue; > - } > - /* Read in the xid if necessary */ > - if (transport->recv.flags & TCP_RCV_COPY_XID) { > - xs_tcp_read_xid(transport, &desc); > - continue; > - } > - /* Read in the call/reply flag */ > - if (transport->recv.flags & TCP_RCV_READ_CALLDIR) { > - xs_tcp_read_calldir(transport, &desc); > - continue; > - } > - /* Read in the request data */ > - if (transport->recv.flags & TCP_RCV_COPY_DATA) { > - xs_tcp_read_data(xprt, &desc); > - continue; > - } > - /* Skip over any trailing bytes on short reads */ > - xs_tcp_read_discard(transport, &desc); > - } while (desc.count); > - ret = len - desc.count; > - if (ret < rd_desc->count) > - rd_desc->count -= ret; > - else > - rd_desc->count = 0; > - trace_xs_tcp_data_recv(transport); > - dprintk("RPC: xs_tcp_data_recv done\n"); > - return ret; > -} > - > static void xs_tcp_data_receive(struct sock_xprt *transport) > { > struct rpc_xprt *xprt = &transport->xprt; > struct sock *sk; > - read_descriptor_t rd_desc = { > - .arg.data = xprt, > - }; > - unsigned long total = 0; > - int read = 0; > + size_t read = 0; > + ssize_t ret = 0; > > restart: > mutex_lock(&transport->recv_mutex); > @@ -1536,18 +1508,12 @@ static void xs_tcp_data_receive(struct > sock_xprt *transport) > if (sk == NULL) > goto out; > > - /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ > for (;;) { > - rd_desc.count = RPC_TCP_READ_CHUNK_SZ; > - lock_sock(sk); > - read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); > - if (rd_desc.count != 0 || read < 0) { > - clear_bit(XPRT_SOCK_DATA_READY, &transport- > >sock_state); > - release_sock(sk); > + clear_bit(XPRT_SOCK_DATA_READY, &transport- > >sock_state); > + ret = xs_read_stream(transport, MSG_DONTWAIT | > MSG_NOSIGNAL); > + if (ret < 0) > break; > - } > - release_sock(sk); > - total += read; > + read += ret; > if (need_resched()) { > mutex_unlock(&transport->recv_mutex); > cond_resched(); > @@ -1558,7 +1524,7 @@ static void xs_tcp_data_receive(struct > sock_xprt *transport) > queue_work(xprtiod_workqueue, &transport->recv_worker); > out: > mutex_unlock(&transport->recv_mutex); > - trace_xs_tcp_data_ready(xprt, read, total); > + trace_xs_tcp_data_ready(xprt, ret, read); > } > > static void xs_tcp_data_receive_workfn(struct work_struct *work) > @@ -2380,7 +2346,6 @@ static int xs_tcp_finish_connecting(struct > rpc_xprt *xprt, struct socket *sock) > transport->recv.offset = 0; > transport->recv.len = 0; > transport->recv.copied = 0; > - transport->recv.flags = TCP_RCV_COPY_FRAGHDR | > TCP_RCV_COPY_XID; > transport->xmit.offset = 0; > > /* Tell the socket layer to start connecting... */ > @@ -2802,6 +2767,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { > .connect = xs_connect, > .buf_alloc = rpc_malloc, > .buf_free = rpc_free, > + .prepare_request = xs_stream_prepare_request, > .send_request = xs_tcp_send_request, > .set_retrans_timeout = xprt_set_retrans_timeout_def, > .close = xs_tcp_shutdown,
Hi Trond, On Mon, Sep 17, 2018 at 09:03:31AM -0400, Trond Myklebust wrote: > Most of this code should also be reusable with other socket types. > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > --- > include/linux/sunrpc/xprtsock.h | 19 +- > include/trace/events/sunrpc.h | 15 +- > net/sunrpc/xprtsock.c | 694 +++++++++++++++----------------- > 3 files changed, 335 insertions(+), 393 deletions(-) With latest mainline (24ccea7e102d, it includes Al Viro's iov_iter fixup) I started hitting some severe slowdown and systemd timeouts with nfsroot on arm64 machines (physical or guests under KVM). Interestingly, it only happens when the client kernel is configured with 64K pages, the 4K pages configuration runs fine. It also runs fine if I add rsize=65536 to the nfsroot= argument. Bisecting led me to commit 277e4ab7d530 ("SUNRPC: Simplify TCP receive code by switching to using iterators"). Prior to this commit, it works fine. Some more info: - defconfig with CONFIG_ARM64_64K_PAGES enabled - kernel cmdline arg: nfsroot=<some-server>:/srv/nfs/debian-arm64,tcp,v4 - if it matters, the server is also an arm64 machine running 4.19 with 4K pages configuration I haven't figured out what's wrong or even how to debug this as I'm not familiar with the sunrpc code. Any suggestion? Thanks.
Hi Trond, Catalin On 09/11/2018 11:19, Catalin Marinas wrote: > Hi Trond, > > On Mon, Sep 17, 2018 at 09:03:31AM -0400, Trond Myklebust wrote: >> Most of this code should also be reusable with other socket types. >> >> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> >> --- >> include/linux/sunrpc/xprtsock.h | 19 +- >> include/trace/events/sunrpc.h | 15 +- >> net/sunrpc/xprtsock.c | 694 +++++++++++++++----------------- >> 3 files changed, 335 insertions(+), 393 deletions(-) > > With latest mainline (24ccea7e102d, it includes Al Viro's iov_iter > fixup) I started hitting some severe slowdown and systemd timeouts with > nfsroot on arm64 machines (physical or guests under KVM). Interestingly, > it only happens when the client kernel is configured with 64K pages, the > 4K pages configuration runs fine. It also runs fine if I add rsize=65536 > to the nfsroot= argument. > > Bisecting led me to commit 277e4ab7d530 ("SUNRPC: Simplify TCP receive > code by switching to using iterators"). Prior to this commit, it works > fine. > > Some more info: > > - defconfig with CONFIG_ARM64_64K_PAGES enabled > > - kernel cmdline arg: nfsroot=<some-server>:/srv/nfs/debian-arm64,tcp,v4 > > - if it matters, the server is also an arm64 machine running 4.19 with > 4K pages configuration > > I haven't figured out what's wrong or even how to debug this as I'm not > familiar with the sunrpc code. Any suggestion? > > Thanks. > I've done a bit of experiments/observations with this since it was seriously impacting all form of testing on arm64 with a 64K pages configuration. I can confirm rsize=65536 workaround above mentioned by Catalin is effective also for me, as it is to reset back before the commit mentioned in the subject. In the following I tested instead with: - linus arm64 v4.20-rc1 64K pages + "Debug Lockups and Hangs" Enabled - hw Juno-r2 - fully NFS mounted rootfs (Debian 9) - NO rsize workaround - NFS Client config(nfsstat -m) Flags: rw,relatime,vers=4.0,rsize=4096,wsize=4096,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.0.1,local_lock=none,addr=192.168.0.254 Observations: 1. despite some general boot slow down (not so evident in my setup), I hit the issue when simply trying to launch LTP or LKP tests (all is in NFS mounted rootfs): the immediately observable behavior is just that the application gets 'apparently' stuck straight away (NO output whatsoever). Waiting some seconds yields no progress or result NOR any Lockup is detected by Kernel. A good deal of effort is needed to kill the process at this point...BUT it is feasible (many SIGSTOP + KILL)...and the system is back alive. 2. running again LKP via 'strace' we can observe the process apparently starting fine but then suddenly hanging randomly multiple times: at first on an execve() and then on some read() while trying to load its own file components; each hang lasts 30-45 seconds approximately. In LKP as an example: $ strace lkp run ./dbench-100%.yaml .... newfstatat(AT_FDCWD, "/opt/lkp-tests/bin/run-local", {st_mode=S_IFREG|0755, st_size=4367, ...}, 0) = 0 faccessat(AT_FDCWD, "/opt/lkp-tests/bin/run-local", X_OK) = 0 execve("/opt/lkp-tests/bin/run-local", ["/opt/lkp-tests/bin/run-local", "./dbench-100%.yaml"], [/* 12 vars */] <<< HANGGG ... ... 30-40 secs.. .... openat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems.rb", O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7 fstat(7, {st_mode=S_IFREG|0644, st_size=33018, ...}) = 0 close(7) = 0 getuid() = 0 geteuid() = 0 getgid() = 0 getegid() = 0 oopenat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems.rb", O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7 fcntl(7, F_SETFL, O_RDONLY) = 0 fstat(7, {st_mode=S_IFREG|0644, st_size=33018, ...}) = 0 fstat(7, {st_mode=S_IFREG|0644, st_size=33018, ...}) = 0 ioctl(7, TCGETS, 0xffffdd0895d8) = -1 ENOTTY (Inappropriate ioctl for device) read(7, <<<HANGGSSS .... ~30-40 secs .... "# frozen_string_literal: true\n# "..., 8192) = 8192 read(7, ") as the standard configuration "..., 8192) = 8192 brk(0xaaaaeea70000) = 0xaaaaeea70000 read(7, "ady been required, then we have "..., 8192) = 8192 read(7, "lf.user_home\n @user_home ||= "..., 8192) = 8192 read(7, " require \"rubygems/defaults/#"..., 8192) = 250 read(7, "", 8192) = 0 close(7) = 0 .... .... openat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems/specification.rb", O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7 fstat(7, {st_mode=S_IFREG|0644, st_size=81998, ...}) = 0 close(7) = 0 getuid() = 0 geteuid() = 0 getgid() = 0 getegid() = 0 openat(AT_FDCWD, "/usr/lib/ruby/2.3.0/rubygems/specification.rb", O_RDONLY|O_NONBLOCK|O_CLOEXEC) = 7 fcntl(7, F_SETFL, O_RDONLY) = 0 fstat(7, {st_mode=S_IFREG|0644, st_size=81998, ...}) = 0 fstat(7, {st_mode=S_IFREG|0644, st_size=81998, ...}) = 0 ioctl(7, TCGETS, 0xffffef390b38) = -1 ENOTTY (Inappropriate ioctl for device) read(7, "# -*- coding: utf-8 -*-\n# frozen"..., 8192) = 8192 read(7, "e platform attribute appropriate"..., 8192) = 8192 read(7, "sealicense.com/.\n #\n # You sho"..., 8192) = 8192 read(7, " TODO: find all extraneous adds\n"..., 8192) = 8192 read(7, "rn a list of all outdated local "..., 8192) = 8192 read(7, " = authors.collect { |a"..., 8192) = 8192 read(7, "ends on.\n #\n # Use #add_depend"..., 8192) = 8192 read(7, "ns true you\n # probably want to"..., 8192) = 8192 read(7, <<<< HANGGGGG Note that this last hang happens halfway through a file read ! 3. Having a look at the underlying network traffic with Wireshark I could see in fact that the NFS packets stop flowing completely for 30-40s when all of the above happens...but I cannot see any error or timeout or NFS retries. Same happened when I tried to reduce NFS timeo to 150 (15secs) from the original 600 (60 secs). This lack of retries was confirmed by stats: root@sqwt-ubuntu:/opt/lkp-tests# nfsstat -r Client rpc stats: calls retrans authrefrsh 16008 0 16019 The only notable thing is a routine TCP KeepAlive sent by the NFS Client TCP stack during the 30/40 secs quiet window: NFS data flow restarts anyway after another 10/12 secs after the KeepAlive is sent and ACKed by the server, so it does not seem the trigger itself for the restart. 4. Waiting forever (minutes) I was finally be able to see LKP completing initialization and the dbench test being run. Below you can see the results with and without the rsize workaround: DBENCH SANE RESULTS WITH WORKAROUND RSIZE=65536 ------------------------------------------------ ... 6 122759 5.62 MB/sec execute 599 sec latency 84.304 ms 6 cleanup 600 sec 0 cleanup 600 sec Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 108397 11.100 358.921 Close 79762 16.243 406.213 Rename 4582 19.399 259.180 Unlink 21728 3.610 190.564 Qpathinfo 98367 1.773 289.554 Qfileinfo 17163 9.917 232.687 Qfsinfo 17903 2.130 216.804 Sfileinfo 8828 17.427 234.069 Find 37915 3.478 287.326 WriteX 53503 0.048 2.992 ReadX 169707 0.592 199.341 LockX 350 13.536 242.800 UnlockX 350 2.801 124.317 Flush 7548 20.248 229.864 Throughput 5.61504 MB/sec 6 clients 6 procs max_latency=406.225 ms DBENCH RESULTS WITHOUT WORKAROUND --------------------------------- ... 6 111273 5.06 MB/sec execute 599 sec latency 4066.072 ms 6 cleanup 600 sec 3 cleanup 601 sec 0 cleanup 601 sec Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 97674 12.244 13773.786 Close 71583 19.126 13774.836 Rename 4135 19.177 277.171 Unlink 19881 4.286 12842.441 Qpathinfo 88303 1.942 12923.151 Qfileinfo 15305 9.636 203.481 Qfsinfo 16305 1.801 227.405 Sfileinfo 7960 15.871 186.799 Find 34164 3.409 255.098 WriteX 48105 0.053 5.428 ReadX 152460 0.926 13759.913 LockX 314 7.562 53.131 UnlockX 314 1.847 47.083 Flush 6872 19.222 200.180 Throughput 5.06232 MB/sec 6 clients 6 procs max_latency=13774.850 ms Then I tried to running also: ./nfstest_io -d /mnt/t/data -v all -n 10 -r 3600 with WORKAROUND it took: INFO: 2018-11-28 16:31:25.031280 TIME: 449 secs WITHOUT: INFO: 2018-11-28 17:55:39.438688 TIME: 1348 secs 4. All of the above 'slowliness' disappeared when I re-run the same tests the second time, since NFS had cached all locally in the first run probably. 5. Reboot hangs similarly. The fact that the traffic stops without triggering any NFS timeo and retry makes me thing that are the egressing NFS rpc requests themselves that got stuck somehow (but I'm far from being an NFS expert) Any idea or thoughts ? Additional sensible test cases to run ? Or hint on where to look inside NFS code Trond ? (I'll re-test with a newer 4.20 RC and trying to ftrace something...in the next days) Thanks Cristian
On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote: > Hi Trond, Catalin > > On 09/11/2018 11:19, Catalin Marinas wrote: > > Hi Trond, > > > > On Mon, Sep 17, 2018 at 09:03:31AM -0400, Trond Myklebust wrote: > > > Most of this code should also be reusable with other socket > > > types. > > > > > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > > > --- > > > include/linux/sunrpc/xprtsock.h | 19 +- > > > include/trace/events/sunrpc.h | 15 +- > > > net/sunrpc/xprtsock.c | 694 +++++++++++++++----------- > > > ------ > > > 3 files changed, 335 insertions(+), 393 deletions(-) > > > > With latest mainline (24ccea7e102d, it includes Al Viro's iov_iter > > fixup) I started hitting some severe slowdown and systemd timeouts > > with > > nfsroot on arm64 machines (physical or guests under KVM). > > Interestingly, > > it only happens when the client kernel is configured with 64K > > pages, the > > 4K pages configuration runs fine. It also runs fine if I add > > rsize=65536 > > to the nfsroot= argument. > > > > Bisecting led me to commit 277e4ab7d530 ("SUNRPC: Simplify TCP > > receive > > code by switching to using iterators"). Prior to this commit, it > > works > > fine. > > > > Some more info: > > > > - defconfig with CONFIG_ARM64_64K_PAGES enabled > > > > - kernel cmdline arg: nfsroot=<some-server>:/srv/nfs/debian- > > arm64,tcp,v4 > > > > - if it matters, the server is also an arm64 machine running 4.19 > > with > > 4K pages configuration > > Question to you both: when this happens, does /proc/*/stack show any of the processes hanging in the socket or sunrpc code? If so, can you please send me examples of those stack traces (i.e. the contents of /proc/<pid>/stack for the processes that are hanging) I'd be particularly interested if the processes in question are related to the rpciod workqueue. Thanks Trond
Hi On 29/11/2018 19:56, Trond Myklebust wrote: > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote: >> Hi Trond, Catalin [snip] > > Question to you both: when this happens, does /proc/*/stack show any of > the processes hanging in the socket or sunrpc code? If so, can you > please send me examples of those stack traces (i.e. the contents of > /proc/<pid>/stack for the processes that are hanging) (using a reverse shell since starting ssh causes a lot of pain and traffic) Looking at NFS traffic holes(30-40 secs) to detect Client side various HANGS ---------------------------------------------------------------------------- root@sqwt-ubuntu:/opt/lkp-tests# nc -lk -e /bin/bash -s 192.168.0.1 -p 1235 & root@sqwt-ubuntu:/opt/lkp-tests# lkp run ./dbench-100%.yaml $ nc 192.168.0.1 1235 cat /proc/2833/cmdline ruby/opt/lkp-tests/bin/run-local./dbench-100%.yaml HANG CLOSE ---------- cat /proc/2833/stack [<0>] __switch_to+0x6c/0x90 [<0>] rpc_wait_bit_killable+0x2c/0xb0 [<0>] __rpc_wait_for_completion_task+0x3c/0x48 [<0>] nfs4_do_close+0x1ec/0x2b0 [<0>] __nfs4_close+0x130/0x198 [<0>] nfs4_close_sync+0x34/0x40 [<0>] nfs4_close_context+0x40/0x50 [<0>] __put_nfs_open_context+0xac/0x118 [<0>] nfs_file_clear_open_context+0x38/0x58 [<0>] nfs_file_release+0x7c/0x90 [<0>] __fput+0x94/0x1c0 [<0>] ____fput+0x20/0x30 [<0>] task_work_run+0x98/0xb8 [<0>] do_notify_resume+0x2d0/0x318 [<0>] work_pending+0x8/0x10 [<0>] 0xffffffffffffffff HANG READ --------- cat /proc/2833/stack [<0>] __switch_to+0x6c/0x90 [<0>] io_schedule+0x20/0x40 [<0>] wait_on_page_bit_killable+0x164/0x260 [<0>] generic_file_read_iter+0x1c4/0x820 [<0>] nfs_file_read+0xa4/0x108 [<0>] __vfs_read+0x120/0x170 [<0>] vfs_read+0x94/0x150 [<0>] ksys_read+0x6c/0xd8 [<0>] __arm64_sys_read+0x24/0x30 [<0>] el0_svc_handler+0x7c/0x118 [<0>] el0_svc+0x8/0xc [<0>] 0xffffffffffffffff HANG STAT --------- cat /proc/2833/stack [<0>] __switch_to+0x6c/0x90 [<0>] rpc_wait_bit_killable+0x2c/0xb0 [<0>] __rpc_execute+0x1cc/0x528 [<0>] rpc_execute+0xe4/0x1b0 [<0>] rpc_run_task+0x130/0x168 [<0>] nfs4_call_sync_sequence+0x80/0xc8 [<0>] _nfs4_proc_getattr+0xc8/0xf8 [<0>] nfs4_proc_getattr+0x88/0x1d8 [<0>] __nfs_revalidate_inode+0x1f8/0x468 [<0>] nfs_getattr+0x14c/0x420 [<0>] vfs_getattr_nosec+0x7c/0x98 [<0>] vfs_getattr+0x48/0x58 [<0>] vfs_statx+0xb4/0x118 [<0>] __se_sys_newfstatat+0x58/0x98 [<0>] __arm64_sys_newfstatat+0x24/0x30 [<0>] el0_svc_handler+0x7c/0x118 [<0>] el0_svc+0x8/0xc [<0>] 0xffffffffffffffff .... Looking at a straced lkp to detect HANGS ---------------------------------------- cat /proc/2878/cmdline ruby/opt/lkp-tests/bin/run-local./dbench-100%.yaml HANG READ ---------- cat /proc/2878/stack [<0>] __switch_to+0x6c/0x90 [<0>] io_schedule+0x20/0x40 [<0>] wait_on_page_bit_killable+0x164/0x260 [<0>] generic_file_read_iter+0x1c4/0x820 [<0>] nfs_file_read+0xa4/0x108 [<0>] __vfs_read+0x120/0x170 [<0>] vfs_read+0x94/0x150 [<0>] ksys_read+0x6c/0xd8 [<0>] __arm64_sys_read+0x24/0x30 [<0>] el0_svc_handler+0x7c/0x118 [<0>] el0_svc+0x8/0xc [<0>] 0xffffffffffffffff ... cat /proc/2878/status Name: ruby Umask: 0022 State: D (disk sleep) Tgid: 2878 Ngid: 0 Pid: 2878 PPid: 2876 TracerPid: 2876 Uid: 0 0 0 0 Gid: 0 0 0 0 FDSize: 64 Groups: NStgid: 2878 NSpid: 2878 NSpgid: 2876 NSsid: 2822 VmPeak: 24192 kB VmSize: 24192 kB VmLck: 0 kB VmPin: 0 kB VmHWM: 13376 kB VmRSS: 13376 kB RssAnon: 8768 kB RssFile: 4608 kB RssShmem: 0 kB VmData: 9792 kB VmStk: 8192 kB VmExe: 64 kB VmLib: 5888 kB VmPTE: 320 kB VmSwap: 0 kB HugetlbPages: 0 kB CoreDumping: 0 Threads: 2 SigQ: 0/7534 SigPnd: 0000000000000000 ShdPnd: 0000000000000000 SigBlk: 0000000000000000 SigIgn: 0000000000000000 SigCgt: 00000001c2007e4f CapInh: 0000000000000000 CapPrm: 0000003fffffffff CapEff: 0000003fffffffff CapBnd: 0000003fffffffff CapAmb: 0000000000000000 NoNewPrivs: 0 Seccomp: 0 Speculation_Store_Bypass: unknown Cpus_allowed: 3f Cpus_allowed_list: 0-5 Mems_allowed: 1 Mems_allowed_list: 0 voluntary_ctxt_switches: 7547 nonvoluntary_ctxt_switches: 564 Thanks Cristian
On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote: > Hi > > On 29/11/2018 19:56, Trond Myklebust wrote: > > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote: > > > Hi Trond, Catalin > [snip] > > Question to you both: when this happens, does /proc/*/stack show > > any of > > the processes hanging in the socket or sunrpc code? If so, can you > > please send me examples of those stack traces (i.e. the contents of > > /proc/<pid>/stack for the processes that are hanging) > > (using a reverse shell since starting ssh causes a lot of pain and > traffic) > > Looking at NFS traffic holes(30-40 secs) to detect Client side > various HANGS > ------------------------------------------------------------------- > --------- > > root@sqwt-ubuntu:/opt/lkp-tests# nc -lk -e /bin/bash -s 192.168.0.1 > -p 1235 & > root@sqwt-ubuntu:/opt/lkp-tests# lkp run ./dbench-100%.yaml > > $ nc 192.168.0.1 1235 > > cat /proc/2833/cmdline > ruby/opt/lkp-tests/bin/run-local./dbench-100%.yaml > > HANG CLOSE > ---------- > cat /proc/2833/stack > [<0>] __switch_to+0x6c/0x90 > [<0>] rpc_wait_bit_killable+0x2c/0xb0 > [<0>] __rpc_wait_for_completion_task+0x3c/0x48 > [<0>] nfs4_do_close+0x1ec/0x2b0 > [<0>] __nfs4_close+0x130/0x198 > [<0>] nfs4_close_sync+0x34/0x40 > [<0>] nfs4_close_context+0x40/0x50 > [<0>] __put_nfs_open_context+0xac/0x118 > [<0>] nfs_file_clear_open_context+0x38/0x58 > [<0>] nfs_file_release+0x7c/0x90 > [<0>] __fput+0x94/0x1c0 > [<0>] ____fput+0x20/0x30 > [<0>] task_work_run+0x98/0xb8 > [<0>] do_notify_resume+0x2d0/0x318 > [<0>] work_pending+0x8/0x10 > [<0>] 0xffffffffffffffff > > HANG READ > --------- > cat /proc/2833/stack > [<0>] __switch_to+0x6c/0x90 > [<0>] io_schedule+0x20/0x40 > [<0>] wait_on_page_bit_killable+0x164/0x260 > [<0>] generic_file_read_iter+0x1c4/0x820 > [<0>] nfs_file_read+0xa4/0x108 > [<0>] __vfs_read+0x120/0x170 > [<0>] vfs_read+0x94/0x150 > [<0>] ksys_read+0x6c/0xd8 > [<0>] __arm64_sys_read+0x24/0x30 > [<0>] el0_svc_handler+0x7c/0x118 > [<0>] el0_svc+0x8/0xc > [<0>] 0xffffffffffffffff > > > HANG STAT > --------- > cat /proc/2833/stack > [<0>] __switch_to+0x6c/0x90 > [<0>] rpc_wait_bit_killable+0x2c/0xb0 > [<0>] __rpc_execute+0x1cc/0x528 > [<0>] rpc_execute+0xe4/0x1b0 > [<0>] rpc_run_task+0x130/0x168 > [<0>] nfs4_call_sync_sequence+0x80/0xc8 > [<0>] _nfs4_proc_getattr+0xc8/0xf8 > [<0>] nfs4_proc_getattr+0x88/0x1d8 > [<0>] __nfs_revalidate_inode+0x1f8/0x468 > [<0>] nfs_getattr+0x14c/0x420 > [<0>] vfs_getattr_nosec+0x7c/0x98 > [<0>] vfs_getattr+0x48/0x58 > [<0>] vfs_statx+0xb4/0x118 > [<0>] __se_sys_newfstatat+0x58/0x98 > [<0>] __arm64_sys_newfstatat+0x24/0x30 > [<0>] el0_svc_handler+0x7c/0x118 > [<0>] el0_svc+0x8/0xc > [<0>] 0xffffffffffffffff > > .... Is there anything else blocked in the RPC layer? The above are all standard tasks waiting for the rpciod/xprtiod workqueues to complete the calls to the server.
On Fri, 2018-11-30 at 14:31 -0500, Trond Myklebust wrote: > On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote: > > Hi > > > > On 29/11/2018 19:56, Trond Myklebust wrote: > > > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote: > > > > Hi Trond, Catalin > > [snip] > > > Question to you both: when this happens, does /proc/*/stack show > > > any of > > > the processes hanging in the socket or sunrpc code? If so, can > > > you > > > please send me examples of those stack traces (i.e. the contents > > > of > > > /proc/<pid>/stack for the processes that are hanging) > > > > (using a reverse shell since starting ssh causes a lot of pain and > > traffic) > > > > Looking at NFS traffic holes(30-40 secs) to detect Client side > > various HANGS > > ------------------------------------------------------------------- > > Hi Cristian and Catalin Chuck and I have identified a few issues that might have an effect on the hangs you report. Could you please give the linux-next branch in my repository on git.linux-nfs.org ( https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/linux-next ) a try? git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next Thanks! Trond
Hi Trond, On Sun, Dec 02, 2018 at 04:44:49PM +0000, Trond Myklebust wrote: > On Fri, 2018-11-30 at 14:31 -0500, Trond Myklebust wrote: > > On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote: > > > On 29/11/2018 19:56, Trond Myklebust wrote: > > > > On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote: > > > > Question to you both: when this happens, does /proc/*/stack show > > > > any of the processes hanging in the socket or sunrpc code? If > > > > so, can you please send me examples of those stack traces (i.e. > > > > the contents of /proc/<pid>/stack for the processes that are > > > > hanging) > > > > > > (using a reverse shell since starting ssh causes a lot of pain and > > > traffic) > > > > > > Looking at NFS traffic holes(30-40 secs) to detect Client side > > > various HANGS > > Chuck and I have identified a few issues that might have an effect on > the hangs you report. Could you please give the linux-next branch in my > repository on git.linux-nfs.org ( > https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/linux-next > ) a try? > > git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next I tried, unfortunately there's no difference for me (I merged the above branch on top of 4.20-rc5).
Hi On 03/12/2018 11:45, Catalin Marinas wrote: > Hi Trond, > > On Sun, Dec 02, 2018 at 04:44:49PM +0000, Trond Myklebust wrote: >> On Fri, 2018-11-30 at 14:31 -0500, Trond Myklebust wrote: >>> On Fri, 2018-11-30 at 16:19 +0000, Cristian Marussi wrote: >>>> On 29/11/2018 19:56, Trond Myklebust wrote: >>>>> On Thu, 2018-11-29 at 19:28 +0000, Cristian Marussi wrote: >>>>> Question to you both: when this happens, does /proc/*/stack show >>>>> any of the processes hanging in the socket or sunrpc code? If >>>>> so, can you please send me examples of those stack traces (i.e. >>>>> the contents of /proc/<pid>/stack for the processes that are >>>>> hanging) >>>> >>>> (using a reverse shell since starting ssh causes a lot of pain and >>>> traffic) >>>> >>>> Looking at NFS traffic holes(30-40 secs) to detect Client side >>>> various HANGS >> >> Chuck and I have identified a few issues that might have an effect on >> the hangs you report. Could you please give the linux-next branch in my >> repository on git.linux-nfs.org ( >> https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/linux-next >> ) a try? >> >> git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next > > I tried, unfortunately there's no difference for me (I merged the above > branch on top of 4.20-rc5). > same for me. Issue still there. Beside I saw some differences in the dbench result which I used for testing. From the dbench (comparing with previous mail) it seems that Unlink and Qpathinfo MaxLat has normalized. Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 90820 13.613 13855.620 Close 66565 18.075 13853.289 Rename 3845 23.668 326.642 Unlink 18450 4.581 186.062 Qpathinfo 82068 2.677 280.203 Qfileinfo 14235 10.357 176.373 Qfsinfo 15156 2.822 242.794 Sfileinfo 7400 17.018 240.546 Find 31812 5.988 277.332 WriteX 44735 0.155 14.685 ReadX 141872 0.741 13817.870 LockX 288 10.558 96.179 UnlockX 288 3.307 57.939 Flush 6389 20.427 187.429 > Is there anything else blocked in the RPC layer? The above are all > standard tasks waiting for the rpciod/xprtiod workqueues to complete > the calls to the server. cat /proc/692/stack [<0>] __switch_to+0x6c/0x90 [<0>] rescuer_thread+0x2e8/0x360 [<0>] kthread+0x134/0x138 [<0>] ret_from_fork+0x10/0x1c [<0>] 0xffffffffffffffff I was now trying to collect more evidence ftracing during the quiet-stuck-period till the restart happens. Thanks Cristian
Hi On 03/12/2018 11:53, Cristian Marussi wrote: > Hi > [snip] > same for me. Issue still there. > > Beside I saw some differences in the dbench result which I used for testing. > > From the dbench (comparing with previous mail) it seems that > Unlink and Qpathinfo MaxLat has normalized. > > Operation Count AvgLat MaxLat > ---------------------------------------- > NTCreateX 90820 13.613 13855.620 > Close 66565 18.075 13853.289 > Rename 3845 23.668 326.642 > Unlink 18450 4.581 186.062 > Qpathinfo 82068 2.677 280.203 > Qfileinfo 14235 10.357 176.373 > Qfsinfo 15156 2.822 242.794 > Sfileinfo 7400 17.018 240.546 > Find 31812 5.988 277.332 > WriteX 44735 0.155 14.685 > ReadX 141872 0.741 13817.870 > LockX 288 10.558 96.179 > UnlockX 288 3.307 57.939 > Flush 6389 20.427 187.429 > > >> Is there anything else blocked in the RPC layer? The above are all >> standard tasks waiting for the rpciod/xprtiod workqueues to complete >> the calls to the server. > cat /proc/692/stack > [<0>] __switch_to+0x6c/0x90 > [<0>] rescuer_thread+0x2e8/0x360 > [<0>] kthread+0x134/0x138 > [<0>] ret_from_fork+0x10/0x1c > [<0>] 0xffffffffffffffff > > I was now trying to collect more evidence ftracing during the quiet-stuck-period > till the restart happens. > attached to this mail there is a 3secs ftrace function-graph taken during the quiet/stalled period of an 'LKP run dbench'; issued directly from console (no ssh or netcat shell traffic). Ftrace filter was pre-set as: set_ftrace_filter was set to : nfs* rpc* xprt* tcp* and tracing started once NO traffic was observed flowing on Wireshark. Using ARM64 64k pages on Linux NFS next branch like previous mail this morning. Thanks Cristian # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 3) | nfs_permission() { 3) | rpc_lookup_cred_nonblock() { 3) | rpcauth_lookupcred() { 3) 3.720 us | rpcauth_lookup_credcache(); 3) 8.100 us | } 3) + 11.500 us | } 3) | nfs_do_access() { 3) | nfs_check_cache_invalid() { 3) | nfs4_have_delegation() { 3) 1.220 us | nfs4_is_valid_delegation(); 3) 4.560 us | } 3) 8.040 us | } 3) + 11.420 us | } 3) + 36.140 us | } 3) | nfs4_lookup_revalidate() { 3) | nfs4_do_lookup_revalidate() { 3) | nfs_do_lookup_revalidate() { 3) | nfs4_have_delegation() { 3) 1.200 us | nfs4_is_valid_delegation(); 3) 3.860 us | } 3) | nfs_check_verifier() { 3) | nfs_mapping_need_revalidate_inode() { 3) | nfs_check_cache_invalid() { 3) | nfs4_have_delegation() { 3) 1.180 us | nfs4_is_valid_delegation(); 3) 3.740 us | } 3) 6.280 us | } 3) 8.840 us | } 3) + 11.860 us | } 3) 1.360 us | nfs_lookup_verify_inode(); 3) 1.440 us | nfs_advise_use_readdirplus(); 3) + 27.520 us | } 3) + 30.940 us | } 3) + 34.260 us | } 3) | nfs_permission() { 3) | rpc_lookup_cred_nonblock() { 3) | rpcauth_lookupcred() { 3) 1.720 us | rpcauth_lookup_credcache(); 3) 4.300 us | } 3) 6.700 us | } 3) | nfs_do_access() { 3) | nfs_check_cache_invalid() { 3) | nfs4_have_delegation() { 3) 1.220 us | nfs4_is_valid_delegation(); 3) 3.700 us | } 3) 6.320 us | } 3) 9.840 us | } 3) + 20.480 us | } 3) | nfs4_lookup_revalidate() { 3) | nfs4_do_lookup_revalidate() { 3) | nfs4_have_delegation() { 3) 1.580 us | nfs4_is_valid_delegation(); 3) 1.180 us | nfs_mark_delegation_referenced(); 3) 7.060 us | } 3) 9.620 us | } 3) + 12.320 us | } 3) 1.260 us | nfs_permission(); 3) | nfs4_file_open() { 3) 1.360 us | nfs_check_flags(); 3) | rpc_lookup_cred() { 3) | rpcauth_lookupcred() { 3) 1.580 us | rpcauth_lookup_credcache(); 3) 4.080 us | } 3) 6.640 us | } 3) 1.700 us | nfs_sb_active(); 3) | nfs4_atomic_open() { 3) | nfs4_do_open() { 3) 2.780 us | nfs4_get_state_owner(); 3) | nfs4_client_recover_expired_lease() { 3) | nfs4_wait_clnt_recover() { 3) | nfs_put_client() { 3) 1.940 us | nfs_put_client.part.2(); 3) 4.920 us | } 3) 8.140 us | } 3) + 11.180 us | } 3) | nfs4_opendata_alloc() { 3) 1.360 us | nfs4_label_alloc(); 3) 1.160 us | nfs4_label_alloc(); 3) 2.180 us | nfs_alloc_seqid(); 3) 1.300 us | nfs_sb_active(); 3) | nfs_fattr_init() { 3) 1.380 us | nfs_inc_attr_generation_counter(); 3) 4.760 us | } 3) 1.160 us | nfs_fattr_init_names(); 3) + 25.480 us | } 3) 3.520 us | nfs4_get_open_state(); 3) | nfs4_run_open_task() { 3) | rpc_run_task() { 3) | rpc_new_task() { 3) 1.280 us | xprt_get(); 3) 6.040 us | } 3) | xprt_iter_get_next() { 3) | xprt_iter_get_helper() { 3) 1.220 us | xprt_iter_first_entry(); 3) 1.480 us | xprt_get(); 3) 7.100 us | } 3) + 10.300 us | } 3) | rpc_execute() { 3) + 12.100 us | rpc_make_runnable(); 3) + 15.260 us | } 3) + 39.520 us | } 3) ! 129.680 us | rpc_wait_bit_killable(); 2) | rpc_async_schedule() { 2) | rpc_prepare_task() { 2) | nfs4_open_prepare() { 2) 1.260 us | nfs_wait_on_sequence(); 2) 0.820 us | nfs_mark_delegation_referenced(); 2) 0.740 us | nfs4_sequence_done(); 2) 8.780 us | } 2) + 11.360 us | } 2) | rpc_release_resources_task() { 2) 1.480 us | xprt_release(); 2) | rpc_task_release_client() { 2) 0.780 us | rpc_release_client(); 2) 1.120 us | xprt_put(); 2) 5.380 us | } 2) + 10.780 us | } 2) + 32.280 us | } 3) | rpc_put_task() { 3) | rpc_do_put_task() { 3) | rpc_release_resources_task() { 3) 1.460 us | xprt_release(); 3) 1.380 us | rpc_task_release_client(); 3) 7.340 us | } 3) | rpc_free_task() { 3) | nfs4_open_release() { 3) 1.520 us | nfs4_opendata_put.part.8(); 3) 4.480 us | } 3) 8.080 us | } 3) + 19.980 us | } 3) + 23.560 us | } 3) ! 203.540 us | } 3) 1.420 us | nfs_mark_delegation_referenced(); 3) 1.500 us | nfs_release_seqid(); 3) | nfs_may_open() { 3) | nfs_do_access() { 3) | nfs_check_cache_invalid() { 3) | nfs4_have_delegation() { 3) 1.200 us | nfs4_is_valid_delegation(); 3) 1.200 us | nfs_mark_delegation_referenced(); 3) 6.260 us | } 3) 8.860 us | } 3) + 11.860 us | } 3) + 14.480 us | } 3) 1.260 us | nfs_mark_delegation_referenced(); 3) 1.220 us | nfs4_state_set_mode_locked(); 3) 1.500 us | nfs_release_seqid(); 3) 1.540 us | nfs_inode_attach_open_context(); 3) 1.420 us | nfs4_sequence_free_slot(); 3) | nfs4_opendata_put.part.8() { 3) 1.380 us | nfs4_lgopen_release(); 3) | nfs_free_seqid() { 3) 1.380 us | nfs_release_seqid(); 3) 4.260 us | } 3) 1.280 us | nfs4_sequence_free_slot(); 3) 1.620 us | nfs4_put_open_state(); 3) 1.400 us | nfs4_put_state_owner(); 3) 1.280 us | nfs_sb_deactive(); 3) 1.340 us | nfs_fattr_free_names(); 3) + 25.680 us | } 3) 1.260 us | nfs4_put_state_owner(); 3) ! 329.940 us | } 3) ! 333.420 us | } 3) 1.360 us | nfs_file_set_open_context(); 3) ! 356.720 us | } 1) 2.520 us | nfs4_xattr_get_nfs4_label(); 1) | nfs_file_read() { 1) 0.900 us | nfs_start_io_read(); 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.820 us | nfs4_is_valid_delegation(); 1) 0.800 us | nfs_mark_delegation_referenced(); 1) 4.700 us | } 1) 6.540 us | } 1) 8.220 us | } 1) + 10.360 us | } 1) 0.860 us | nfs_end_io_read(); 1) + 21.200 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 1.280 us | rpcauth_lookup_credcache(); 1) 3.400 us | } 1) 5.400 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 4.040 us | } 1) 5.620 us | } 1) 7.360 us | } 1) + 16.080 us | } 1) | nfs_file_read() { 1) 0.800 us | nfs_start_io_read(); 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.860 us | nfs4_is_valid_delegation(); 1) 0.780 us | nfs_mark_delegation_referenced(); 1) 4.020 us | } 1) 6.120 us | } 1) 7.660 us | } 1) 9.320 us | } 1) 0.920 us | nfs_end_io_read(); 1) + 15.620 us | } 1) | nfs_file_read() { 1) 0.800 us | nfs_start_io_read(); 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.900 us | nfs_mark_delegation_referenced(); 1) 4.040 us | } 1) 5.580 us | } 1) 7.120 us | } 1) 8.720 us | } 1) 0.740 us | nfs_end_io_read(); 1) + 14.000 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.880 us | rpcauth_lookup_credcache(); 1) 2.440 us | } 1) 4.020 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.360 us | } 1) 4.040 us | } 1) 5.920 us | } 1) + 12.820 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.460 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.860 us | nfs4_is_valid_delegation(); 1) 2.420 us | } 1) 4.180 us | } 1) 5.700 us | } 1) 7.240 us | } 1) 1.020 us | nfs_lookup_verify_inode(); 1) 0.780 us | nfs_advise_use_readdirplus(); 1) + 16.180 us | } 1) + 17.860 us | } 1) + 20.040 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.820 us | rpcauth_lookup_credcache(); 1) 2.520 us | } 1) 4.100 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.720 us | nfs4_is_valid_delegation(); 1) 2.400 us | } 1) 3.960 us | } 1) 5.760 us | } 1) + 12.360 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.380 us | } 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.280 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.820 us | } 1) 5.340 us | } 1) 6.860 us | } 1) 0.840 us | nfs_lookup_verify_inode(); 1) 1.100 us | nfs_advise_use_readdirplus(); 1) + 15.400 us | } 1) + 20.240 us | } 1) + 21.800 us | } 1) | nfs_get_link() { 1) | nfs_revalidate_mapping_rcu() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.940 us | } 1) 5.600 us | } 1) 7.360 us | } 1) 9.500 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.820 us | rpcauth_lookup_credcache(); 1) 2.380 us | } 1) 3.940 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.280 us | } 1) 3.860 us | } 1) 5.380 us | } 1) + 11.960 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.720 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.840 us | } 1) 5.360 us | } 1) 6.860 us | } 1) 0.740 us | nfs_lookup_verify_inode(); 1) 0.740 us | nfs_advise_use_readdirplus(); 1) + 14.680 us | } 1) + 16.260 us | } 1) + 17.940 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.840 us | rpcauth_lookup_credcache(); 1) 2.520 us | } 1) 4.040 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.280 us | } 1) 3.820 us | } 1) 5.620 us | } 1) + 12.020 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.820 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 4.120 us | } 1) 5.720 us | } 1) 7.280 us | } 1) 0.800 us | nfs_permission(); 1) | nfs4_file_open() { 1) 0.760 us | nfs_check_flags(); 1) | rpc_lookup_cred() { 1) | rpcauth_lookupcred() { 1) 0.900 us | rpcauth_lookup_credcache(); 1) 2.420 us | } 1) 3.920 us | } 1) 0.900 us | nfs_sb_active(); 1) | nfs4_atomic_open() { 1) | nfs4_do_open() { 1) 1.400 us | nfs4_get_state_owner(); 1) | nfs4_client_recover_expired_lease() { 1) | nfs4_wait_clnt_recover() { 1) | nfs_put_client() { 1) 0.960 us | nfs_put_client.part.2(); 1) 2.620 us | } 1) 4.500 us | } 1) 6.120 us | } 1) | nfs4_opendata_alloc() { 1) 0.980 us | nfs4_label_alloc(); 1) 0.720 us | nfs4_label_alloc(); 1) 0.840 us | nfs_alloc_seqid(); 1) 0.800 us | nfs_sb_active(); 1) | nfs_fattr_init() { 1) 0.780 us | nfs_inc_attr_generation_counter(); 1) 2.460 us | } 1) 0.740 us | nfs_fattr_init_names(); 1) + 13.720 us | } 1) 1.020 us | nfs4_get_open_state(); 1) | nfs4_run_open_task() { 1) | rpc_run_task() { 1) | rpc_new_task() { 1) 0.820 us | xprt_get(); 1) 3.600 us | } 1) | xprt_iter_get_next() { 1) | xprt_iter_get_helper() { 1) 0.760 us | xprt_iter_first_entry(); 1) 0.760 us | xprt_get(); 1) 4.160 us | } 1) 5.980 us | } 1) | rpc_execute() { 1) 6.000 us | rpc_make_runnable(); 1) 7.740 us | } 1) + 21.180 us | } 1) ! 104.520 us | rpc_wait_bit_killable(); 2) | rpc_async_schedule() { 2) | rpc_prepare_task() { 2) | nfs4_open_prepare() { 2) 1.040 us | nfs_wait_on_sequence(); 2) 0.920 us | nfs_mark_delegation_referenced(); 2) 0.740 us | nfs4_sequence_done(); 2) 6.600 us | } 2) 8.480 us | } 2) | rpc_release_resources_task() { 2) 0.780 us | xprt_release(); 2) | rpc_task_release_client() { 2) 0.780 us | rpc_release_client(); 2) 0.760 us | xprt_put(); 2) 4.220 us | } 2) 7.880 us | } 2) + 24.700 us | } 1) | rpc_put_task() { 1) | rpc_do_put_task() { 1) | rpc_release_resources_task() { 1) 0.840 us | xprt_release(); 1) 0.760 us | rpc_task_release_client(); 1) 4.080 us | } 1) | rpc_free_task() { 1) | nfs4_open_release() { 1) 0.800 us | nfs4_opendata_put.part.8(); 1) 2.400 us | } 1) 4.420 us | } 1) + 11.240 us | } 1) + 13.020 us | } 1) ! 143.480 us | } 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 0.820 us | nfs_release_seqid(); 1) | nfs_may_open() { 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.860 us | nfs_mark_delegation_referenced(); 1) 4.060 us | } 1) 5.640 us | } 1) 7.480 us | } 1) 9.040 us | } 1) 0.860 us | nfs_mark_delegation_referenced(); 1) 0.760 us | nfs4_state_set_mode_locked(); 1) 0.880 us | nfs_release_seqid(); 1) 1.420 us | nfs_inode_attach_open_context(); 1) 0.720 us | nfs4_sequence_free_slot(); 1) | nfs4_opendata_put.part.8() { 1) 0.720 us | nfs4_lgopen_release(); 1) | nfs_free_seqid() { 1) 0.760 us | nfs_release_seqid(); 1) 2.540 us | } 1) 0.740 us | nfs4_sequence_free_slot(); 1) 0.800 us | nfs4_put_open_state(); 1) 0.800 us | nfs4_put_state_owner(); 1) 0.780 us | nfs_sb_deactive(); 1) 0.780 us | nfs_fattr_free_names(); 1) + 15.020 us | } 1) 0.760 us | nfs4_put_state_owner(); 1) ! 213.580 us | } 1) ! 215.300 us | } 1) 0.760 us | nfs_file_set_open_context(); 1) ! 227.840 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.900 us | rpcauth_lookup_credcache(); 1) 2.480 us | } 1) 4.080 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.780 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 3.960 us | } 1) 5.520 us | } 1) 7.100 us | } 1) + 14.040 us | } 1) | nfs_file_read() { 1) 0.780 us | nfs_start_io_read(); 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 3.900 us | } 1) 5.440 us | } 1) 7.000 us | } 1) 8.720 us | } 1) 0.780 us | nfs_end_io_read(); 1) + 15.060 us | } 1) | nfs_file_read() { 1) 0.760 us | nfs_start_io_read(); 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 3.860 us | } 1) 5.360 us | } 1) 6.900 us | } 1) 8.500 us | } 1) 0.760 us | nfs_end_io_read(); 1) + 14.420 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.780 us | nfs_mark_delegation_referenced(); 1) 4.080 us | } 1) 5.800 us | } 1) 7.400 us | } 1) 9.140 us | } 1) + 11.760 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 3.900 us | } 1) 5.420 us | } 1) 6.920 us | } 1) 8.540 us | } 1) + 10.300 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.780 us | nfs_mark_delegation_referenced(); 1) 4.020 us | } 1) 5.600 us | } 1) 7.160 us | } 1) 8.820 us | } 1) + 10.740 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 3.880 us | } 1) 5.400 us | } 1) 6.920 us | } 1) 8.740 us | } 1) + 10.400 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 1.080 us | rpcauth_lookup_credcache(); 1) 2.860 us | } 1) 4.560 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.760 us | nfs4_is_valid_delegation(); 1) 2.400 us | } 1) 4.080 us | } 1) 5.980 us | } 1) + 13.600 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.380 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.260 us | } 1) 3.800 us | } 1) 5.620 us | } 1) 7.180 us | } 1) 0.900 us | nfs_lookup_verify_inode(); 1) 0.740 us | nfs_advise_use_readdirplus(); 1) + 15.580 us | } 1) + 17.120 us | } 1) + 18.720 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.900 us | rpcauth_lookup_credcache(); 1) 2.520 us | } 1) 4.240 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.760 us | nfs4_is_valid_delegation(); 1) 2.600 us | } 1) 4.260 us | } 1) 6.400 us | } 1) + 13.080 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.480 us | } 1) 4.080 us | } 1) 5.600 us | } 1) 7.240 us | } 1) 0.740 us | nfs_lookup_revalidate_done(); 1) + 10.560 us | } 1) + 12.080 us | } 1) + 13.840 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 1.080 us | rpcauth_lookup_credcache(); 1) 2.740 us | } 1) 4.340 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.720 us | nfs4_is_valid_delegation(); 1) 2.340 us | } 1) 4.420 us | } 1) 6.040 us | } 1) + 13.200 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.480 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.280 us | } 1) 3.960 us | } 1) 5.460 us | } 1) 7.000 us | } 1) 0.720 us | nfs_lookup_verify_inode(); 1) 0.880 us | nfs_advise_use_readdirplus(); 1) + 15.200 us | } 1) + 16.720 us | } 1) + 18.260 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.820 us | rpcauth_lookup_credcache(); 1) 2.460 us | } 1) 4.040 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.820 us | } 1) 5.360 us | } 1) + 11.800 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.860 us | nfs4_is_valid_delegation(); 1) 2.440 us | } 1) 3.980 us | } 1) 5.500 us | } 1) 6.980 us | } 1) 0.740 us | nfs_lookup_revalidate_done(); 1) + 10.060 us | } 1) + 11.580 us | } 1) + 13.260 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.860 us | rpcauth_lookup_credcache(); 1) 2.580 us | } 1) 4.180 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.420 us | } 1) 3.980 us | } 1) 5.560 us | } 1) + 12.300 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.880 us | nfs4_is_valid_delegation(); 1) 2.420 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.720 us | nfs4_is_valid_delegation(); 1) 2.400 us | } 1) 3.920 us | } 1) 5.420 us | } 1) 6.960 us | } 1) 0.740 us | nfs_lookup_verify_inode(); 1) 0.740 us | nfs_advise_use_readdirplus(); 1) + 14.880 us | } 1) + 16.440 us | } 1) + 17.980 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.840 us | rpcauth_lookup_credcache(); 1) 2.380 us | } 1) 3.940 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.980 us | } 1) 5.500 us | } 1) + 11.840 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.960 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 4.520 us | } 1) 6.160 us | } 1) 7.880 us | } 1) 0.800 us | nfs_permission(); 1) | nfs4_file_open() { 1) 0.740 us | nfs_check_flags(); 1) | rpc_lookup_cred() { 1) | rpcauth_lookupcred() { 1) 0.900 us | rpcauth_lookup_credcache(); 1) 2.440 us | } 1) 3.960 us | } 1) 0.800 us | nfs_sb_active(); 1) | nfs4_atomic_open() { 1) | nfs4_do_open() { 1) 1.020 us | nfs4_get_state_owner(); 1) | nfs4_client_recover_expired_lease() { 1) | nfs4_wait_clnt_recover() { 1) | nfs_put_client() { 1) 0.800 us | nfs_put_client.part.2(); 1) 2.320 us | } 1) 3.880 us | } 1) 5.460 us | } 1) | nfs4_opendata_alloc() { 1) 0.760 us | nfs4_label_alloc(); 1) 0.740 us | nfs4_label_alloc(); 1) 0.840 us | nfs_alloc_seqid(); 1) 0.800 us | nfs_sb_active(); 1) | nfs_fattr_init() { 1) 0.760 us | nfs_inc_attr_generation_counter(); 1) 2.440 us | } 1) 0.740 us | nfs_fattr_init_names(); 1) + 12.880 us | } 1) 1.540 us | nfs4_get_open_state(); 1) | nfs4_run_open_task() { 1) | rpc_run_task() { 1) | rpc_new_task() { 1) 0.740 us | xprt_get(); 1) 2.840 us | } 1) | xprt_iter_get_next() { 1) | xprt_iter_get_helper() { 1) 0.760 us | xprt_iter_first_entry(); 1) 0.760 us | xprt_get(); 1) 3.920 us | } 1) 5.540 us | } 1) | rpc_execute() { 1) 5.240 us | rpc_make_runnable(); 1) 6.900 us | } 1) + 18.840 us | } 1) ! 105.220 us | rpc_wait_bit_killable(); 2) | rpc_async_schedule() { 2) | rpc_prepare_task() { 2) | nfs4_open_prepare() { 2) 0.980 us | nfs_wait_on_sequence(); 2) 0.780 us | nfs_mark_delegation_referenced(); 2) 0.740 us | nfs4_sequence_done(); 2) 6.340 us | } 2) 8.180 us | } 2) | rpc_release_resources_task() { 2) 0.760 us | xprt_release(); 2) | rpc_task_release_client() { 2) 0.920 us | rpc_release_client(); 2) 0.760 us | xprt_put(); 2) 4.280 us | } 2) 7.740 us | } 2) + 24.480 us | } 1) | rpc_put_task() { 1) | rpc_do_put_task() { 1) | rpc_release_resources_task() { 1) 0.900 us | xprt_release(); 1) 0.760 us | rpc_task_release_client(); 1) 4.140 us | } 1) | rpc_free_task() { 1) | nfs4_open_release() { 1) 0.760 us | nfs4_opendata_put.part.8(); 1) 2.320 us | } 1) 4.120 us | } 1) + 10.820 us | } 1) + 12.500 us | } 1) ! 140.660 us | } 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 0.840 us | nfs_release_seqid(); 1) | nfs_may_open() { 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 4.140 us | } 1) 5.740 us | } 1) 7.620 us | } 1) 9.120 us | } 1) 0.820 us | nfs_mark_delegation_referenced(); 1) 0.800 us | nfs4_state_set_mode_locked(); 1) 0.760 us | nfs_release_seqid(); 1) 0.980 us | nfs_inode_attach_open_context(); 1) 0.740 us | nfs4_sequence_free_slot(); 1) | nfs4_opendata_put.part.8() { 1) 0.740 us | nfs4_lgopen_release(); 1) | nfs_free_seqid() { 1) 0.740 us | nfs_release_seqid(); 1) 2.340 us | } 1) 0.760 us | nfs4_sequence_free_slot(); 1) 0.800 us | nfs4_put_open_state(); 1) 0.760 us | nfs4_put_state_owner(); 1) 0.760 us | nfs_sb_deactive(); 1) 0.720 us | nfs_fattr_free_names(); 1) + 13.880 us | } 1) 0.740 us | nfs4_put_state_owner(); 1) ! 206.100 us | } 1) ! 207.740 us | } 1) 0.880 us | nfs_file_set_open_context(); 1) ! 219.600 us | } 1) | nfs_getattr() { 1) | nfs_writepages() { 1) | nfs_pageio_init_write() { 1) 0.860 us | nfs_pageio_init(); 1) 2.600 us | } 1) | nfs_pageio_complete() { 1) | nfs_pageio_doio() { 1) 0.720 us | nfs_pgio_current_mirror(); 1) 2.720 us | } 1) 4.960 us | } 1) | nfs_io_completion_put.part.0() { 1) | nfs_io_completion_commit() { 1) 1.080 us | nfs_commit_end(); 1) 3.380 us | } 1) 5.440 us | } 1) + 20.500 us | } 1) | nfs_attribute_cache_expired() { 1) | nfs4_have_delegation() { 1) 0.760 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 3.980 us | } 1) 5.580 us | } 1) | nfs_readdirplus_parent_cache_hit.part.5() { 1) 0.800 us | nfs_advise_use_readdirplus(); 1) 2.460 us | } 1) + 36.240 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 3.820 us | } 1) 5.600 us | } 1) 7.160 us | } 1) 8.780 us | } 1) + 10.440 us | } 1) 0.840 us | nfs4_file_flush(); 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.940 us | rpcauth_lookup_credcache(); 1) 2.640 us | } 1) 4.220 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.940 us | } 1) 5.540 us | } 1) + 12.320 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.720 us | nfs4_is_valid_delegation(); 1) 2.400 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.820 us | } 1) 5.340 us | } 1) 6.860 us | } 1) 0.760 us | nfs_lookup_verify_inode(); 1) 0.860 us | nfs_advise_use_readdirplus(); 1) + 15.000 us | } 1) + 16.560 us | } 1) + 18.080 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.840 us | rpcauth_lookup_credcache(); 1) 2.400 us | } 1) 3.920 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.820 us | } 1) 5.380 us | } 1) + 11.720 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.880 us | nfs4_is_valid_delegation(); 1) 2.440 us | } 1) 3.980 us | } 1) 5.580 us | } 1) 7.120 us | } 1) 0.740 us | nfs_lookup_revalidate_done(); 1) + 10.280 us | } 1) + 11.820 us | } 1) + 13.540 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.860 us | rpcauth_lookup_credcache(); 1) 2.560 us | } 1) 4.100 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.860 us | } 1) 5.440 us | } 1) + 12.020 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.760 us | nfs4_is_valid_delegation(); 1) 2.340 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.280 us | } 1) 3.840 us | } 1) 5.360 us | } 1) 6.980 us | } 1) 0.740 us | nfs_lookup_verify_inode(); 1) 0.740 us | nfs_advise_use_readdirplus(); 1) + 15.000 us | } 1) + 16.520 us | } 1) + 18.040 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.840 us | rpcauth_lookup_credcache(); 1) 2.400 us | } 1) 3.960 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.860 us | } 1) 5.580 us | } 1) + 11.920 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.280 us | } 1) 3.820 us | } 1) 5.340 us | } 1) 6.820 us | } 1) 0.760 us | nfs_lookup_verify_inode(); 1) 0.740 us | nfs_advise_use_readdirplus(); 1) + 14.540 us | } 1) + 16.060 us | } 1) + 17.640 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.840 us | rpcauth_lookup_credcache(); 1) 2.360 us | } 1) 3.900 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.820 us | } 1) 5.420 us | } 1) + 11.660 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.400 us | } 1) | nfs_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) | nfs_check_verifier() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.720 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.860 us | } 1) 5.400 us | } 1) 6.880 us | } 1) 0.820 us | nfs_lookup_verify_inode(); 1) 0.740 us | nfs_advise_use_readdirplus(); 1) + 14.840 us | } 1) + 19.700 us | } 1) + 21.240 us | } 1) | nfs_get_link() { 1) | nfs_revalidate_mapping_rcu() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.300 us | } 1) 3.940 us | } 1) 5.600 us | } 1) 7.320 us | } 1) 9.340 us | } 1) | nfs_permission() { 1) | rpc_lookup_cred_nonblock() { 1) | rpcauth_lookupcred() { 1) 0.840 us | rpcauth_lookup_credcache(); 1) 2.360 us | } 1) 3.920 us | } 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 2.320 us | } 1) 3.860 us | } 1) 5.400 us | } 1) + 11.720 us | } 1) | nfs4_lookup_revalidate() { 1) | nfs4_do_lookup_revalidate() { 1) | nfs4_have_delegation() { 1) 0.980 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 4.240 us | } 1) 5.780 us | } 1) 7.340 us | } 1) 0.840 us | nfs_permission(); 1) | nfs4_file_open() { 1) 0.740 us | nfs_check_flags(); 1) | rpc_lookup_cred() { 1) | rpcauth_lookupcred() { 1) 0.900 us | rpcauth_lookup_credcache(); 1) 2.480 us | } 1) 4.020 us | } 1) 0.800 us | nfs_sb_active(); 1) | nfs4_atomic_open() { 1) | nfs4_do_open() { 1) 0.960 us | nfs4_get_state_owner(); 1) | nfs4_client_recover_expired_lease() { 1) | nfs4_wait_clnt_recover() { 1) | nfs_put_client() { 1) 1.040 us | nfs_put_client.part.2(); 1) 2.580 us | } 1) 4.260 us | } 1) 5.840 us | } 1) | nfs4_opendata_alloc() { 1) 0.740 us | nfs4_label_alloc(); 1) 0.740 us | nfs4_label_alloc(); 1) 0.840 us | nfs_alloc_seqid(); 1) 0.820 us | nfs_sb_active(); 1) | nfs_fattr_init() { 1) 0.740 us | nfs_inc_attr_generation_counter(); 1) 2.280 us | } 1) 0.760 us | nfs_fattr_init_names(); 1) + 12.420 us | } 1) 0.980 us | nfs4_get_open_state(); 1) | nfs4_run_open_task() { 1) | rpc_run_task() { 1) | rpc_new_task() { 1) 0.740 us | xprt_get(); 1) 2.600 us | } 1) | xprt_iter_get_next() { 1) | xprt_iter_get_helper() { 1) 0.740 us | xprt_iter_first_entry(); 1) 0.740 us | xprt_get(); 1) 3.880 us | } 1) 5.480 us | } 1) | rpc_execute() { 1) 4.240 us | rpc_make_runnable(); 1) 5.920 us | } 1) + 17.600 us | } 1) ! 104.260 us | rpc_wait_bit_killable(); 2) | rpc_async_schedule() { 2) | rpc_prepare_task() { 2) | nfs4_open_prepare() { 2) 1.000 us | nfs_wait_on_sequence(); 2) 0.800 us | nfs_mark_delegation_referenced(); 2) 0.740 us | nfs4_sequence_done(); 2) 6.540 us | } 2) 8.480 us | } 2) | rpc_release_resources_task() { 2) 0.780 us | xprt_release(); 2) | rpc_task_release_client() { 2) 0.780 us | rpc_release_client(); 2) 0.740 us | xprt_put(); 2) 4.540 us | } 2) 8.000 us | } 2) + 24.700 us | } 1) | rpc_put_task() { 1) | rpc_do_put_task() { 1) | rpc_release_resources_task() { 1) 0.880 us | xprt_release(); 1) 0.720 us | rpc_task_release_client(); 1) 4.060 us | } 1) | rpc_free_task() { 1) | nfs4_open_release() { 1) 0.760 us | nfs4_opendata_put.part.8(); 1) 2.340 us | } 1) 4.120 us | } 1) + 10.700 us | } 1) + 12.320 us | } 1) ! 138.360 us | } 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 0.820 us | nfs_release_seqid(); 1) | nfs_may_open() { 1) | nfs_do_access() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.740 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 4.060 us | } 1) 5.620 us | } 1) 7.420 us | } 1) 8.940 us | } 1) 0.820 us | nfs_mark_delegation_referenced(); 1) 0.740 us | nfs4_state_set_mode_locked(); 1) 0.740 us | nfs_release_seqid(); 1) 1.220 us | nfs_inode_attach_open_context(); 1) 0.740 us | nfs4_sequence_free_slot(); 1) | nfs4_opendata_put.part.8() { 1) 0.720 us | nfs4_lgopen_release(); 1) | nfs_free_seqid() { 1) 0.740 us | nfs_release_seqid(); 1) 2.380 us | } 1) 0.760 us | nfs4_sequence_free_slot(); 1) 0.740 us | nfs4_put_open_state(); 1) 0.740 us | nfs4_put_state_owner(); 1) 0.740 us | nfs_sb_deactive(); 1) 0.740 us | nfs_fattr_free_names(); 1) + 13.740 us | } 1) 0.760 us | nfs4_put_state_owner(); 1) ! 202.420 us | } 1) ! 204.020 us | } 1) 0.880 us | nfs_file_set_open_context(); 1) ! 216.020 us | } 1) | nfs_file_read() { 1) 0.760 us | nfs_start_io_read(); 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.760 us | nfs4_is_valid_delegation(); 1) 0.880 us | nfs_mark_delegation_referenced(); 1) 4.020 us | } 1) 5.560 us | } 1) 7.120 us | } 1) 8.740 us | } 1) 0.800 us | nfs_end_io_read(); 1) + 15.660 us | } 1) | nfs_getattr() { 1) | nfs_writepages() { 1) | nfs_pageio_init_write() { 1) 0.740 us | nfs_pageio_init(); 1) 2.260 us | } 1) | nfs_pageio_complete() { 1) | nfs_pageio_doio() { 1) 0.740 us | nfs_pgio_current_mirror(); 1) 2.260 us | } 1) 3.960 us | } 1) | nfs_io_completion_put.part.0() { 1) | nfs_io_completion_commit() { 1) 0.920 us | nfs_commit_end(); 1) 2.560 us | } 1) 4.200 us | } 1) + 14.760 us | } 1) | nfs_attribute_cache_expired() { 1) | nfs4_have_delegation() { 1) 0.780 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 3.940 us | } 1) 5.540 us | } 1) | nfs_readdirplus_parent_cache_hit.part.5() { 1) 0.720 us | nfs_advise_use_readdirplus(); 1) 2.540 us | } 1) + 27.060 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.760 us | nfs4_is_valid_delegation(); 1) 0.740 us | nfs_mark_delegation_referenced(); 1) 3.900 us | } 1) 5.420 us | } 1) 7.000 us | } 1) 8.580 us | } 1) + 10.320 us | } 1) | nfs_file_mmap() { 1) | nfs_revalidate_mapping() { 1) | nfs_mapping_need_revalidate_inode() { 1) | nfs_check_cache_invalid() { 1) | nfs4_have_delegation() { 1) 0.780 us | nfs4_is_valid_delegation(); 1) 0.760 us | nfs_mark_delegation_referenced(); 1) 3.960 us | } 1) 5.660 us | } 1) 7.200 us | } 1) 8.900 us | } 1) + 10.680 us | } 1) 1.040 us | nfs4_file_flush(); 1) | nfs_file_release() { 1) | nfs_file_clear_open_context() { 1) | nfs4_close_context() { 1) | nfs4_close_sync() { 1) 0.800 us | nfs4_state_set_mode_locked(); 1) | nfs4_put_open_state() { 1) 0.740 us | nfs4_put_state_owner(); 1) 2.920 us | } 1) 0.760 us | nfs4_put_state_owner(); 1) 8.600 us | } 1) + 10.460 us | } 1) 0.800 us | nfs_sb_deactive(); 1) + 14.740 us | } 1) + 16.820 us | } 1) 0.940 us | nfs_dentry_delete(); 1) | nfs_file_release() { 1) | nfs_file_clear_open_context() { 1) | nfs4_close_context() { 1) | nfs4_close_sync() { 1) 0.840 us | nfs4_state_set_mode_locked(); 1) | nfs4_put_open_state() { 1) 0.900 us | nfs4_put_state_owner(); 1) 3.880 us | } 1) 0.760 us | nfs4_put_state_owner(); 1) + 10.520 us | } 1) + 13.100 us | } 1) 0.860 us | nfs_sb_deactive(); 1) + 19.060 us | } 1) + 25.300 us | } 1) 0.900 us | nfs_dentry_delete(); 1) | nfs_file_release() { 1) | nfs_file_clear_open_context() { 1) | nfs4_close_context() { 1) | nfs4_close_sync() { 1) 0.920 us | nfs4_state_set_mode_locked(); 1) 0.860 us | nfs4_put_open_state(); 1) 0.780 us | nfs4_put_state_owner(); 1) 6.120 us | } 1) 7.740 us | } 1) 0.760 us | nfs_sb_deactive(); 1) + 11.680 us | } 1) + 13.580 us | } 1) | nfs_file_release() { 1) | nfs_file_clear_open_context() { 1) | nfs4_close_context() { 1) | nfs4_close_sync() { 1) 0.920 us | nfs4_state_set_mode_locked(); 1) 0.780 us | nfs4_put_open_state(); 1) 0.740 us | nfs4_put_state_owner(); 1) 5.940 us | } 1) 7.520 us | } 1) 0.760 us | nfs_sb_deactive(); 1) + 11.380 us | } 1) + 13.040 us | }
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 005cfb6e7238..458bfe0137f5 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -31,15 +31,16 @@ struct sock_xprt { * State of TCP reply receive */ struct { - __be32 fraghdr, + struct { + __be32 fraghdr, xid, calldir; + } __attribute__((packed)); u32 offset, len; - unsigned long copied, - flags; + unsigned long copied; } recv; /* @@ -76,21 +77,9 @@ struct sock_xprt { void (*old_error_report)(struct sock *); }; -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - /* * TCP RPC flags */ -#define TCP_RPC_REPLY (1UL << 6) - #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) #define XPRT_SOCK_UPD_TIMEOUT (3) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 0aa347194e0f..19e08d12696c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -497,16 +497,6 @@ TRACE_EVENT(xs_tcp_data_ready, __get_str(port), __entry->err, __entry->total) ); -#define rpc_show_sock_xprt_flags(flags) \ - __print_flags(flags, "|", \ - { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \ - { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \ - { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \ - { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \ - { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \ - { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \ - { TCP_RPC_REPLY, "TCP_RPC_REPLY" }) - TRACE_EVENT(xs_tcp_data_recv, TP_PROTO(struct sock_xprt *xs), @@ -516,7 +506,6 @@ TRACE_EVENT(xs_tcp_data_recv, __string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]) __string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]) __field(u32, xid) - __field(unsigned long, flags) __field(unsigned long, copied) __field(unsigned int, reclen) __field(unsigned long, offset) @@ -526,15 +515,13 @@ TRACE_EVENT(xs_tcp_data_recv, __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]); __entry->xid = be32_to_cpu(xs->recv.xid); - __entry->flags = xs->recv.flags; __entry->copied = xs->recv.copied; __entry->reclen = xs->recv.len; __entry->offset = xs->recv.offset; ), - TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu", + TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%lu", __get_str(addr), __get_str(port), __entry->xid, - rpc_show_sock_xprt_flags(__entry->flags), __entry->copied, __entry->reclen, __entry->offset) ); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f16406228ead..5269ad98bb08 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -47,13 +47,13 @@ #include <net/checksum.h> #include <net/udp.h> #include <net/tcp.h> +#include <linux/bvec.h> +#include <linux/uio.h> #include <trace/events/sunrpc.h> #include "sunrpc.h" -#define RPC_TCP_READ_CHUNK_SZ (3*512*1024) - static void xs_close(struct rpc_xprt *xprt); static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock); @@ -325,6 +325,320 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt) } } +static size_t +xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) +{ + size_t i,n; + + if (!(buf->flags & XDRBUF_SPARSE_PAGES)) + return want; + if (want > buf->page_len) + want = buf->page_len; + n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < n; i++) { + if (buf->pages[i]) + continue; + buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); + if (!buf->pages[i]) { + buf->page_len = (i * PAGE_SIZE) - buf->page_base; + return buf->page_len; + } + } + return want; +} + +static ssize_t +xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) +{ + ssize_t ret; + if (seek != 0) + iov_iter_advance(&msg->msg_iter, seek); + ret = sock_recvmsg(sock, msg, flags); + return ret > 0 ? ret + seek : ret; +} + +static ssize_t +xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, + struct kvec *kvec, size_t count, size_t seek) +{ + iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count); + return xs_sock_recvmsg(sock, msg, flags, seek); +} + +static ssize_t +xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, + struct bio_vec *bvec, unsigned long nr, size_t count, + size_t seek) +{ + iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count); + return xs_sock_recvmsg(sock, msg, flags, seek); +} + +static ssize_t +xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, + size_t count) +{ + struct kvec kvec = { 0 }; + return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0); +} + +static ssize_t +xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, + struct xdr_buf *buf, size_t count, size_t seek, size_t *read) +{ + size_t want, seek_init = seek, offset = 0; + ssize_t ret; + + if (seek < buf->head[0].iov_len) { + want = min_t(size_t, count, buf->head[0].iov_len); + ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek); + if (ret <= 0) + goto sock_err; + offset += ret; + if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + goto out; + if (ret != want) + goto eagain; + seek = 0; + } else { + seek -= buf->head[0].iov_len; + offset += buf->head[0].iov_len; + } + if (buf->page_len && seek < buf->page_len) { + want = min_t(size_t, count - offset, buf->page_len); + want = xs_alloc_sparse_pages(buf, want, GFP_NOWAIT); + ret = xs_read_bvec(sock, msg, flags, buf->bvec, + xdr_buf_pagecount(buf), + want + buf->page_base, + seek + buf->page_base); + if (ret <= 0) + goto sock_err; + offset += ret; + if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + goto out; + if (ret != want) + goto eagain; + seek = 0; + } else { + seek -= buf->page_len; + offset += buf->page_len; + } + if (buf->tail[0].iov_len && seek < buf->tail[0].iov_len) { + want = min_t(size_t, count - offset, buf->tail[0].iov_len); + ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); + if (ret <= 0) + goto sock_err; + offset += ret; + if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + goto out; + if (ret != want) + goto eagain; + } else + offset += buf->tail[0].iov_len; + ret = -EMSGSIZE; + msg->msg_flags |= MSG_TRUNC; +out: + *read = offset - seek_init; + return ret; +eagain: + ret = -EAGAIN; + goto out; +sock_err: + offset += seek; + goto out; +} + +static void +xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf) +{ + if (!transport->recv.copied) { + if (buf->head[0].iov_len >= transport->recv.offset) + memcpy(buf->head[0].iov_base, + &transport->recv.xid, + transport->recv.offset); + transport->recv.copied = transport->recv.offset; + } +} + +static bool +xs_read_stream_request_done(struct sock_xprt *transport) +{ + return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); +} + +static ssize_t +xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, + int flags, struct rpc_rqst *req) +{ + struct xdr_buf *buf = &req->rq_private_buf; + size_t want, read; + ssize_t ret; + + xs_read_header(transport, buf); + + want = transport->recv.len - transport->recv.offset; + ret = xs_read_xdr_buf(transport->sock, msg, flags, buf, + transport->recv.copied + want, transport->recv.copied, + &read); + transport->recv.offset += read; + transport->recv.copied += read; + if (transport->recv.offset == transport->recv.len) { + if (xs_read_stream_request_done(transport)) + msg->msg_flags |= MSG_EOR; + return transport->recv.copied; + } + + switch (ret) { + case -EMSGSIZE: + return transport->recv.copied; + case 0: + return -ESHUTDOWN; + default: + if (ret < 0) + return ret; + } + return -EAGAIN; +} + +static size_t +xs_read_stream_headersize(bool isfrag) +{ + if (isfrag) + return sizeof(__be32); + return 3 * sizeof(__be32); +} + +static ssize_t +xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg, + int flags, size_t want, size_t seek) +{ + struct kvec kvec = { + .iov_base = &transport->recv.fraghdr, + .iov_len = want, + }; + return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek); +} + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static ssize_t +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) +{ + struct rpc_xprt *xprt = &transport->xprt; + struct rpc_rqst *req; + ssize_t ret; + + /* Look up and lock the request corresponding to the given XID */ + req = xprt_lookup_bc_request(xprt, transport->recv.xid); + if (!req) { + printk(KERN_WARNING "Callback slot table overflowed\n"); + return -ESHUTDOWN; + } + + ret = xs_read_stream_request(transport, msg, flags, req); + if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + xprt_complete_bc_request(req, ret); + + return ret; +} +#else /* CONFIG_SUNRPC_BACKCHANNEL */ +static ssize_t +xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) +{ + return -ESHUTDOWN; +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + +static ssize_t +xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) +{ + struct rpc_xprt *xprt = &transport->xprt; + struct rpc_rqst *req; + ssize_t ret = 0; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->queue_lock); + req = xprt_lookup_rqst(xprt, transport->recv.xid); + if (!req) { + msg->msg_flags |= MSG_TRUNC; + goto out; + } + xprt_pin_rqst(req); + spin_unlock(&xprt->queue_lock); + + ret = xs_read_stream_request(transport, msg, flags, req); + + spin_lock(&xprt->queue_lock); + if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) + xprt_complete_rqst(req->rq_task, ret); + xprt_unpin_rqst(req); +out: + spin_unlock(&xprt->queue_lock); + return ret; +} + +static ssize_t +xs_read_stream(struct sock_xprt *transport, int flags) +{ + struct msghdr msg = { 0 }; + size_t want, read = 0; + ssize_t ret = 0; + + if (transport->recv.len == 0) { + want = xs_read_stream_headersize(transport->recv.copied != 0); + ret = xs_read_stream_header(transport, &msg, flags, want, + transport->recv.offset); + if (ret <= 0) + goto out_err; + transport->recv.offset = ret; + if (ret != want) { + ret = -EAGAIN; + goto out_err; + } + transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & + RPC_FRAGMENT_SIZE_MASK; + transport->recv.offset -= sizeof(transport->recv.fraghdr); + read = ret; + } + + switch (be32_to_cpu(transport->recv.calldir)) { + case RPC_CALL: + ret = xs_read_stream_call(transport, &msg, flags); + break; + case RPC_REPLY: + ret = xs_read_stream_reply(transport, &msg, flags); + } + if (msg.msg_flags & MSG_TRUNC) { + transport->recv.calldir = cpu_to_be32(-1); + transport->recv.copied = -1; + } + if (ret < 0) + goto out_err; + read += ret; + if (transport->recv.offset < transport->recv.len) { + ret = xs_read_discard(transport->sock, &msg, flags, + transport->recv.len - transport->recv.offset); + if (ret <= 0) + goto out_err; + transport->recv.offset += ret; + read += ret; + } + if (xs_read_stream_request_done(transport)) { + trace_xs_tcp_data_recv(transport); + transport->recv.copied = 0; + } + transport->recv.offset = 0; + transport->recv.len = 0; + return read; +out_err: + switch (ret) { + case 0: + case -ESHUTDOWN: + xprt_force_disconnect(&transport->xprt); + return -ESHUTDOWN; + } + return ret; +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) @@ -484,6 +798,12 @@ static int xs_nospace(struct rpc_rqst *req) return ret; } +static void +xs_stream_prepare_request(struct rpc_rqst *req) +{ + req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO); +} + /* * Determine if the previous message in the stream was aborted before it * could complete transmission. @@ -1157,263 +1477,7 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt) xprt_force_disconnect(xprt); } -static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - size_t len, used; - char *p; - - p = ((char *) &transport->recv.fraghdr) + transport->recv.offset; - len = sizeof(transport->recv.fraghdr) - transport->recv.offset; - used = xdr_skb_read_bits(desc, p, len); - transport->recv.offset += used; - if (used != len) - return; - - transport->recv.len = ntohl(transport->recv.fraghdr); - if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT) - transport->recv.flags |= TCP_RCV_LAST_FRAG; - else - transport->recv.flags &= ~TCP_RCV_LAST_FRAG; - transport->recv.len &= RPC_FRAGMENT_SIZE_MASK; - - transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR; - transport->recv.offset = 0; - - /* Sanity check of the record length */ - if (unlikely(transport->recv.len < 8)) { - dprintk("RPC: invalid TCP record fragment length\n"); - xs_tcp_force_close(xprt); - return; - } - dprintk("RPC: reading TCP record fragment of length %d\n", - transport->recv.len); -} - -static void xs_tcp_check_fraghdr(struct sock_xprt *transport) -{ - if (transport->recv.offset == transport->recv.len) { - transport->recv.flags |= TCP_RCV_COPY_FRAGHDR; - transport->recv.offset = 0; - if (transport->recv.flags & TCP_RCV_LAST_FRAG) { - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - transport->recv.flags |= TCP_RCV_COPY_XID; - transport->recv.copied = 0; - } - } -} - -static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc) -{ - size_t len, used; - char *p; - - len = sizeof(transport->recv.xid) - transport->recv.offset; - dprintk("RPC: reading XID (%zu bytes)\n", len); - p = ((char *) &transport->recv.xid) + transport->recv.offset; - used = xdr_skb_read_bits(desc, p, len); - transport->recv.offset += used; - if (used != len) - return; - transport->recv.flags &= ~TCP_RCV_COPY_XID; - transport->recv.flags |= TCP_RCV_READ_CALLDIR; - transport->recv.copied = 4; - dprintk("RPC: reading %s XID %08x\n", - (transport->recv.flags & TCP_RPC_REPLY) ? "reply for" - : "request with", - ntohl(transport->recv.xid)); - xs_tcp_check_fraghdr(transport); -} - -static inline void xs_tcp_read_calldir(struct sock_xprt *transport, - struct xdr_skb_reader *desc) -{ - size_t len, used; - u32 offset; - char *p; - - /* - * We want transport->recv.offset to be 8 at the end of this routine - * (4 bytes for the xid and 4 bytes for the call/reply flag). - * When this function is called for the first time, - * transport->recv.offset is 4 (after having already read the xid). - */ - offset = transport->recv.offset - sizeof(transport->recv.xid); - len = sizeof(transport->recv.calldir) - offset; - dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", len); - p = ((char *) &transport->recv.calldir) + offset; - used = xdr_skb_read_bits(desc, p, len); - transport->recv.offset += used; - if (used != len) - return; - transport->recv.flags &= ~TCP_RCV_READ_CALLDIR; - /* - * We don't yet have the XDR buffer, so we will write the calldir - * out after we get the buffer from the 'struct rpc_rqst' - */ - switch (ntohl(transport->recv.calldir)) { - case RPC_REPLY: - transport->recv.flags |= TCP_RCV_COPY_CALLDIR; - transport->recv.flags |= TCP_RCV_COPY_DATA; - transport->recv.flags |= TCP_RPC_REPLY; - break; - case RPC_CALL: - transport->recv.flags |= TCP_RCV_COPY_CALLDIR; - transport->recv.flags |= TCP_RCV_COPY_DATA; - transport->recv.flags &= ~TCP_RPC_REPLY; - break; - default: - dprintk("RPC: invalid request message type\n"); - xs_tcp_force_close(&transport->xprt); - } - xs_tcp_check_fraghdr(transport); -} - -static inline void xs_tcp_read_common(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc, - struct rpc_rqst *req) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct xdr_buf *rcvbuf; - size_t len; - ssize_t r; - - rcvbuf = &req->rq_private_buf; - - if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) { - /* - * Save the RPC direction in the XDR buffer - */ - memcpy(rcvbuf->head[0].iov_base + transport->recv.copied, - &transport->recv.calldir, - sizeof(transport->recv.calldir)); - transport->recv.copied += sizeof(transport->recv.calldir); - transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR; - } - - len = desc->count; - if (len > transport->recv.len - transport->recv.offset) - desc->count = transport->recv.len - transport->recv.offset; - r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied, - desc, xdr_skb_read_bits); - - if (desc->count) { - /* Error when copying to the receive buffer, - * usually because we weren't able to allocate - * additional buffer pages. All we can do now - * is turn off TCP_RCV_COPY_DATA, so the request - * will not receive any additional updates, - * and time out. - * Any remaining data from this record will - * be discarded. - */ - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - dprintk("RPC: XID %08x truncated request\n", - ntohl(transport->recv.xid)); - dprintk("RPC: xprt = %p, recv.copied = %lu, " - "recv.offset = %u, recv.len = %u\n", - xprt, transport->recv.copied, - transport->recv.offset, transport->recv.len); - return; - } - - transport->recv.copied += r; - transport->recv.offset += r; - desc->count = len - r; - - dprintk("RPC: XID %08x read %zd bytes\n", - ntohl(transport->recv.xid), r); - dprintk("RPC: xprt = %p, recv.copied = %lu, recv.offset = %u, " - "recv.len = %u\n", xprt, transport->recv.copied, - transport->recv.offset, transport->recv.len); - - if (transport->recv.copied == req->rq_private_buf.buflen) - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - else if (transport->recv.offset == transport->recv.len) { - if (transport->recv.flags & TCP_RCV_LAST_FRAG) - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - } -} - -/* - * Finds the request corresponding to the RPC xid and invokes the common - * tcp read code to read the data. - */ -static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; - - dprintk("RPC: read reply XID %08x\n", ntohl(transport->recv.xid)); - - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->queue_lock); - req = xprt_lookup_rqst(xprt, transport->recv.xid); - if (!req) { - dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->recv.xid)); - spin_unlock(&xprt->queue_lock); - return -1; - } - xprt_pin_rqst(req); - spin_unlock(&xprt->queue_lock); - - xs_tcp_read_common(xprt, desc, req); - - spin_lock(&xprt->queue_lock); - if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) - xprt_complete_rqst(req->rq_task, transport->recv.copied); - xprt_unpin_rqst(req); - spin_unlock(&xprt->queue_lock); - return 0; -} - #if defined(CONFIG_SUNRPC_BACKCHANNEL) -/* - * Obtains an rpc_rqst previously allocated and invokes the common - * tcp read code to read the data. The result is placed in the callback - * queue. - * If we're unable to obtain the rpc_rqst we schedule the closing of the - * connection and return -1. - */ -static int xs_tcp_read_callback(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; - - /* Look up the request corresponding to the given XID */ - req = xprt_lookup_bc_request(xprt, transport->recv.xid); - if (req == NULL) { - printk(KERN_WARNING "Callback slot table overflowed\n"); - xprt_force_disconnect(xprt); - return -1; - } - - dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); - xs_tcp_read_common(xprt, desc, req); - - if (!(transport->recv.flags & TCP_RCV_COPY_DATA)) - xprt_complete_bc_request(req, transport->recv.copied); - - return 0; -} - -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - - return (transport->recv.flags & TCP_RPC_REPLY) ? - xs_tcp_read_reply(xprt, desc) : - xs_tcp_read_callback(xprt, desc); -} - static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) { int ret; @@ -1429,106 +1493,14 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) { return PAGE_SIZE; } -#else -static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - return xs_tcp_read_reply(xprt, desc); -} #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* - * Read data off the transport. This can be either an RPC_CALL or an - * RPC_REPLY. Relay the processing to helper functions. - */ -static void xs_tcp_read_data(struct rpc_xprt *xprt, - struct xdr_skb_reader *desc) -{ - struct sock_xprt *transport = - container_of(xprt, struct sock_xprt, xprt); - - if (_xs_tcp_read_data(xprt, desc) == 0) - xs_tcp_check_fraghdr(transport); - else { - /* - * The transport_lock protects the request handling. - * There's no need to hold it to update the recv.flags. - */ - transport->recv.flags &= ~TCP_RCV_COPY_DATA; - } -} - -static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) -{ - size_t len; - - len = transport->recv.len - transport->recv.offset; - if (len > desc->count) - len = desc->count; - desc->count -= len; - desc->offset += len; - transport->recv.offset += len; - dprintk("RPC: discarded %zu bytes\n", len); - xs_tcp_check_fraghdr(transport); -} - -static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) -{ - struct rpc_xprt *xprt = rd_desc->arg.data; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct xdr_skb_reader desc = { - .skb = skb, - .offset = offset, - .count = len, - }; - size_t ret; - - dprintk("RPC: xs_tcp_data_recv started\n"); - do { - trace_xs_tcp_data_recv(transport); - /* Read in a new fragment marker if necessary */ - /* Can we ever really expect to get completely empty fragments? */ - if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) { - xs_tcp_read_fraghdr(xprt, &desc); - continue; - } - /* Read in the xid if necessary */ - if (transport->recv.flags & TCP_RCV_COPY_XID) { - xs_tcp_read_xid(transport, &desc); - continue; - } - /* Read in the call/reply flag */ - if (transport->recv.flags & TCP_RCV_READ_CALLDIR) { - xs_tcp_read_calldir(transport, &desc); - continue; - } - /* Read in the request data */ - if (transport->recv.flags & TCP_RCV_COPY_DATA) { - xs_tcp_read_data(xprt, &desc); - continue; - } - /* Skip over any trailing bytes on short reads */ - xs_tcp_read_discard(transport, &desc); - } while (desc.count); - ret = len - desc.count; - if (ret < rd_desc->count) - rd_desc->count -= ret; - else - rd_desc->count = 0; - trace_xs_tcp_data_recv(transport); - dprintk("RPC: xs_tcp_data_recv done\n"); - return ret; -} - static void xs_tcp_data_receive(struct sock_xprt *transport) { struct rpc_xprt *xprt = &transport->xprt; struct sock *sk; - read_descriptor_t rd_desc = { - .arg.data = xprt, - }; - unsigned long total = 0; - int read = 0; + size_t read = 0; + ssize_t ret = 0; restart: mutex_lock(&transport->recv_mutex); @@ -1536,18 +1508,12 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; - /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ for (;;) { - rd_desc.count = RPC_TCP_READ_CHUNK_SZ; - lock_sock(sk); - read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); - if (rd_desc.count != 0 || read < 0) { - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); - release_sock(sk); + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + ret = xs_read_stream(transport, MSG_DONTWAIT | MSG_NOSIGNAL); + if (ret < 0) break; - } - release_sock(sk); - total += read; + read += ret; if (need_resched()) { mutex_unlock(&transport->recv_mutex); cond_resched(); @@ -1558,7 +1524,7 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) queue_work(xprtiod_workqueue, &transport->recv_worker); out: mutex_unlock(&transport->recv_mutex); - trace_xs_tcp_data_ready(xprt, read, total); + trace_xs_tcp_data_ready(xprt, ret, read); } static void xs_tcp_data_receive_workfn(struct work_struct *work) @@ -2380,7 +2346,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->recv.offset = 0; transport->recv.len = 0; transport->recv.copied = 0; - transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; transport->xmit.offset = 0; /* Tell the socket layer to start connecting... */ @@ -2802,6 +2767,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .connect = xs_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, + .prepare_request = xs_stream_prepare_request, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_tcp_shutdown,
Most of this code should also be reusable with other socket types. Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> --- include/linux/sunrpc/xprtsock.h | 19 +- include/trace/events/sunrpc.h | 15 +- net/sunrpc/xprtsock.c | 694 +++++++++++++++----------------- 3 files changed, 335 insertions(+), 393 deletions(-)