diff mbox series

[v2,4/9] SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent

Message ID 20220322011618.1052288-5-trondmy@kernel.org (mailing list archive)
State New, archived
Headers show
Series Crossing our fingers is not a strategy | expand

Commit Message

Trond Myklebust March 22, 2022, 1:16 a.m. UTC
From: Trond Myklebust <trond.myklebust@hammerspace.com>

Make sure that rpciod and xprtiod are always using the same slab
allocation modes.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/backchannel_rqst.c |  8 ++++----
 net/sunrpc/rpcb_clnt.c        |  4 ++--
 net/sunrpc/socklib.c          |  3 ++-
 net/sunrpc/xprt.c             |  5 +----
 net/sunrpc/xprtsock.c         | 11 ++++++-----
 5 files changed, 15 insertions(+), 16 deletions(-)

Comments

NeilBrown March 25, 2022, 1:49 a.m. UTC | #1
On Tue, 22 Mar 2022, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> Make sure that rpciod and xprtiod are always using the same slab
> allocation modes.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
....
>  xs_stream_prepare_request(struct rpc_rqst *req)
>  {
>  	xdr_free_bvec(&req->rq_rcv_buf);
> -	req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
> +	req->rq_task->tk_status = xdr_alloc_bvec(
> +		&req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
>  }

I did some testing of swap-over-NFS, and got a crash quite quickly, due
to this change.
The problem is that GFP_KERNEL allocations almost never fail.
Multi-page allocations might occasionally fail, and others might fail
for a process that has been killed by the OOM killer (or maybe just has
a fatal signal pending), but in general GFP_KERNEL is more likely to
wait (and wait and wait) than to fail.
So the failure paths haven't been tested.

xs_stream_prepare_request() is called from xprt_request_prepare(), which
is called from xprt_request_enqueue_receive() which is called in
call_encode() *after* ->tk_status has been tested.
So when the above code sets ->tk_status to -ENOMEM - which is now more
likely - that fact is ignored and we get a crash

[  298.911356] Workqueue: xprtiod xs_stream_data_receive_workfn
[  298.911696] RIP: 0010:_copy_to_iter+0x1cc/0x435
..
[  298.918259]  __skb_datagram_iter+0x64/0x225
[  298.918507]  skb_copy_datagram_iter+0xe9/0xf2
[  298.918767]  tcp_recvmsg_locked+0x653/0x77e
[  298.919015]  tcp_recvmsg+0x100/0x188
[  298.919226]  inet_recvmsg+0x5d/0x86
[  298.919431]  xs_read_stream_request.constprop.0+0x247/0x378
[  298.919754]  xs_read_stream.constprop.0+0x1c2/0x39b
[  298.920038]  xs_stream_data_receive_workfn+0x50/0x160
[  298.920331]  process_one_work+0x267/0x422
[  298.920568]  worker_thread+0x193/0x234

So we really need to audit all these places where we add __GFP_NORETRY
and ensure errors are actually handled.

For call_encode(), it might be easiest to move
	/* Add task to reply queue before transmission to avoid races */
	if (rpc_reply_expected(task))
		xprt_request_enqueue_receive(task);

up before the
	/* Did the encode result in an error condition? */
	if (task->tk_status != 0) {

and change it to

	/* Add task to reply queue before transmission to avoid races */
	if (task->tk_status == 0 && rpc_reply_expected(task))
		xprt_request_enqueue_receive(task);

I'll try a bit more testing and auditing.

Thanks,
NeilBrown
diff mbox series

Patch

diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 22a2c235abf1..5a6b61dcdf2d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -75,9 +75,9 @@  static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
 	return 0;
 }
 
-static
-struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt)
 {
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
 	struct rpc_rqst *req;
 
 	/* Pre-allocate one backchannel rpc_rqst */
@@ -154,7 +154,7 @@  int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
 	INIT_LIST_HEAD(&tmp_list);
 	for (i = 0; i < min_reqs; i++) {
 		/* Pre-allocate one backchannel rpc_rqst */
-		req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+		req = xprt_alloc_bc_req(xprt);
 		if (req == NULL) {
 			printk(KERN_ERR "Failed to create bc rpc_rqst\n");
 			goto out_free;
@@ -343,7 +343,7 @@  struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
 			break;
 		} else if (req)
 			break;
-		new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+		new = xprt_alloc_bc_req(xprt);
 	} while (new);
 	return req;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0fdeb8666bfd..5a8e6d46809a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -714,7 +714,7 @@  void rpcb_getport_async(struct rpc_task *task)
 		goto bailout_nofree;
 	}
 
-	map = kzalloc(sizeof(struct rpcbind_args), GFP_KERNEL);
+	map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask());
 	if (!map) {
 		status = -ENOMEM;
 		goto bailout_release_client;
@@ -730,7 +730,7 @@  void rpcb_getport_async(struct rpc_task *task)
 	case RPCBVERS_4:
 	case RPCBVERS_3:
 		map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
-		map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
+		map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask());
 		if (!map->r_addr) {
 			status = -ENOMEM;
 			goto bailout_free_args;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index d52313af82bc..05b38bf68316 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -15,6 +15,7 @@ 
 #include <linux/pagemap.h>
 #include <linux/udp.h>
 #include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/export.h>
 
@@ -222,7 +223,7 @@  static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
 {
 	int err;
 
-	err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+	err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
 	if (err < 0)
 		return err;
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index bbe913121f43..744c6c1d536f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1679,15 +1679,12 @@  static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
 static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
 {
 	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
-	gfp_t gfp_mask = GFP_KERNEL;
 
 	if (xprt->num_reqs >= xprt->max_reqs)
 		goto out;
 	++xprt->num_reqs;
 	spin_unlock(&xprt->reserve_lock);
-	if (current->flags & PF_WQ_WORKER)
-		gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
-	req = kzalloc(sizeof(*req), gfp_mask);
+	req = kzalloc(sizeof(*req), rpc_task_gfp_mask());
 	spin_lock(&xprt->reserve_lock);
 	if (req != NULL)
 		goto out;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8909c768fe71..b52eaa8a0cda 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -428,9 +428,9 @@  xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		offset += want;
 	}
 
-	want = xs_alloc_sparse_pages(buf,
-			min_t(size_t, count - offset, buf->page_len),
-			GFP_KERNEL);
+	want = xs_alloc_sparse_pages(
+		buf, min_t(size_t, count - offset, buf->page_len),
+		GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
 	if (seek < want) {
 		ret = xs_read_bvec(sock, msg, flags, buf->bvec,
 				xdr_buf_pagecount(buf),
@@ -826,7 +826,8 @@  static void
 xs_stream_prepare_request(struct rpc_rqst *req)
 {
 	xdr_free_bvec(&req->rq_rcv_buf);
-	req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
+	req->rq_task->tk_status = xdr_alloc_bvec(
+		&req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
 }
 
 /*
@@ -2487,7 +2488,7 @@  static int bc_malloc(struct rpc_task *task)
 		return -EINVAL;
 	}
 
-	page = alloc_page(GFP_KERNEL);
+	page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
 	if (!page)
 		return -ENOMEM;