From patchwork Tue Mar 15 16:28:03 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Trond Myklebust X-Patchwork-Id: 12781640 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E17ECC433EF for ; Tue, 15 Mar 2022 16:34:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1350098AbiCOQfr (ORCPT ); Tue, 15 Mar 2022 12:35:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56656 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1350096AbiCOQfq (ORCPT ); Tue, 15 Mar 2022 12:35:46 -0400 Received: from ams.source.kernel.org (ams.source.kernel.org [145.40.68.75]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 478CB57152 for ; Tue, 15 Mar 2022 09:34:34 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id F1D6EB817C9 for ; Tue, 15 Mar 2022 16:34:32 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 41BEFC340E8 for ; Tue, 15 Mar 2022 16:34:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1647362071; bh=rcI4//gyqZpaPNsAijYqAv9OYh2/LcrZiCHX90zE9EE=; h=From:To:Subject:Date:From; b=Dg3hJyymoegauB/98KMGwWW4m045Ti/JwpXhYaOTFp4MW3VRxcZxqj4XSPzsJfk9o yEFAGuJlxjNQ3utGIREBCAdq9zxs+zdUpqVuhlzkjnOtr6X65hi1P681rEew4Uibml Bw72BzndQI9JVlwEEPCp9uPT/OdValQTJsWLoJz+Ljnlo7VkMcWZTzKQliYtTaRWF9 FT15YUQnhrM9S3ZcF+wzKSqQ8YeveF01uZ64sET9Xsx+Ln07HLg5m0pDTiE94DCQfK BfLmtjWpudvN3CVPGA5D1pJwCQw4fTLN6OvnTHpyxk84tn9rdLa3reimq3jlG+JJ6s NLSzJ3d9IfWFg== From: trondmy@kernel.org To: linux-nfs@vger.kernel.org Subject: [PATCH 1/3] SUNRPC: Fix socket waits for write buffer space Date: Tue, 15 Mar 2022 12:28:03 -0400 Message-Id: <20220315162805.570850-1-trondmy@kernel.org> X-Mailer: git-send-email 2.35.1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Trond Myklebust The socket layer requires that we use the socket lock to protect changes to the sock->sk_write_pending field and others. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 54 +++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7e39f87cde2d..786df8c0cda3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -763,12 +763,12 @@ xs_stream_start_connect(struct sock_xprt *transport) /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -779,25 +779,49 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + /* wait for more buffer space */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - sk->sk_write_space(sk); - } +static int xs_stream_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; + + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } @@ -887,7 +911,7 @@ static int xs_local_send_request(struct rpc_rqst *req) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -963,7 +987,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1083,7 +1107,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: From patchwork Tue Mar 15 16:28:04 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Trond Myklebust X-Patchwork-Id: 12781641 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 07697C433FE for ; Tue, 15 Mar 2022 16:34:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1350097AbiCOQfr (ORCPT ); Tue, 15 Mar 2022 12:35:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56628 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1350103AbiCOQfp (ORCPT ); Tue, 15 Mar 2022 12:35:45 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 26D6A506C7 for ; Tue, 15 Mar 2022 09:34:32 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 6D2DA614A0 for ; Tue, 15 Mar 2022 16:34:32 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id AB2B2C340EE for ; Tue, 15 Mar 2022 16:34:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1647362071; bh=MvfmD2iZlw1bl+V53u2scs6Uj9onrM6h2zMsfrP90xk=; h=From:To:Subject:Date:In-Reply-To:References:From; b=KLVPchhojpl4jshBELGyDjcDx6tVw+hWjT6Yt7ba3jWmwSG7UVQzAzijA2Upt9Ys2 i5S+C/zycx/yClFoI5PMR3CakUEzLzw3yqITCaxaIn3Uv6JN2d/LcfnuOIReVAMAwC 2CWT2ksrsI/w+ZgUR+AIWxsR4E70SMTI+Pa+/ZTGcVDOVppr3Xt0LAE0rwAe+utUH7 RXN/+0Z+wlSBWllE8sn/a7n34R9X6evt0JkTrg7hqTOnM+mzyjzXoLPfmgekem3W7r 7JA1FAfosdOnTa3enPQnyRQx5fwlo5L5qjG5weoSTcGP+jC7UfIlPwbC8bc7RCfoVK w+VN3aXwoGRfw== From: trondmy@kernel.org To: linux-nfs@vger.kernel.org Subject: [PATCH 2/3] SUNRPC: Replace internal use of SOCKWQ_ASYNC_NOSPACE Date: Tue, 15 Mar 2022 12:28:04 -0400 Message-Id: <20220315162805.570850-2-trondmy@kernel.org> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220315162805.570850-1-trondmy@kernel.org> References: <20220315162805.570850-1-trondmy@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Trond Myklebust The socket's SOCKWQ_ASYNC_NOSPACE can be cleared by various actors in the socket layer, so replace it with our own flag in the transport sock_state field. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + net/sunrpc/xprtsock.c | 22 ++++------------------ 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 8c2a712cb242..121162a95863 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -89,5 +89,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_WRITE (5) #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_NOSPACE (8) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 786df8c0cda3..23cdb841f056 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -779,14 +779,8 @@ static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) /* Don't race with disconnect */ if (xprt_connected(xprt)) { - struct socket_wq *wq; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); - /* wait for more buffer space */ + set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); @@ -1148,6 +1142,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); } static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) @@ -1494,7 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk) static void xs_write_space(struct sock *sk) { - struct socket_wq *wq; struct sock_xprt *transport; struct rpc_xprt *xprt; @@ -1505,15 +1499,10 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; transport = container_of(xprt, struct sock_xprt, xprt); - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) - goto out; - + if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state)) + return; xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); sk->sk_write_pending--; -out: - rcu_read_unlock(); } /** @@ -1854,7 +1843,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; - sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; xprt_clear_connected(xprt); @@ -2048,7 +2036,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; - sock_set_flag(sk, SOCK_FASYNC); xprt_set_connected(xprt); @@ -2215,7 +2202,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; /* socket options */ From patchwork Tue Mar 15 16:28:05 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Trond Myklebust X-Patchwork-Id: 12781642 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 58E36C433F5 for ; Tue, 15 Mar 2022 16:34:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1350096AbiCOQfs (ORCPT ); Tue, 15 Mar 2022 12:35:48 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56854 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232897AbiCOQfs (ORCPT ); Tue, 15 Mar 2022 12:35:48 -0400 Received: from sin.source.kernel.org (sin.source.kernel.org [145.40.73.55]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DF71D5005F for ; Tue, 15 Mar 2022 09:34:35 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by sin.source.kernel.org (Postfix) with ESMTPS id 1B82FCE1BB4 for ; Tue, 15 Mar 2022 16:34:34 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0CA37C340F4 for ; Tue, 15 Mar 2022 16:34:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1647362072; bh=uJxKKJu2tEJQPfSUQtoREIBPxxuB+bwNaGc87lg9iaE=; h=From:To:Subject:Date:In-Reply-To:References:From; b=b+AjVT7tQ+3zlfR0J/lHMR41QEkFMhOAc2p1TCJk8wmFR1ZalSnDQYm7HawHC9NEt 1/9CCWJLVxIJJBqSkzcojjr4iYKKjFk2Rcsp1Q2jQ8aNAZuAdb5Fylfez5KiiVbrRh H1RWjqi8/ZIFMUImcPFBLAsgogmLrnbwgXTjoNsMxyqMvX9ThIWIvSgDhTo1unUI7Z a/4UV07YWVnAf6v4W1GOdSVopqupAKqVPxGoGfxaFwl2LcUWOw/mLv+3z1AuVkMPnc m97Wrmc8Sdc6pY88WinwCEaDIvLv5KrVwvl9rmAopPLMHncxnR7eMeFX/nFf0mYQiH psYOHVwz0JxXw== From: trondmy@kernel.org To: linux-nfs@vger.kernel.org Subject: [PATCH 3/3] SUNRPC: Improve accuracy of socket ENOBUFS determination Date: Tue, 15 Mar 2022 12:28:05 -0400 Message-Id: <20220315162805.570850-3-trondmy@kernel.org> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220315162805.570850-2-trondmy@kernel.org> References: <20220315162805.570850-1-trondmy@kernel.org> <20220315162805.570850-2-trondmy@kernel.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Trond Myklebust The current code checks for whether or not the socket is in a writeable state after we get an EAGAIN. That is racy, since we've dropped the socket lock, so the amount of free buffer may have changed. Instead, let's check whether the socket is writeable before we try to write to it. If that was the case, we do expect the message to be at least partially sent unless we're in a low memory situation. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 53 +++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 23cdb841f056..87cc97d6c677 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -805,13 +805,15 @@ static int xs_sock_nospace(struct rpc_rqst *req) return ret; } -static int xs_stream_nospace(struct rpc_rqst *req) +static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) { struct sock_xprt *transport = container_of(req->rq_xprt, struct sock_xprt, xprt); struct sock *sk = transport->inet; int ret = -EAGAIN; + if (vm_wait) + return -ENOBUFS; lock_sock(sk); if (!sk_stream_memory_free(sk)) ret = xs_nospace(req, transport); @@ -869,6 +871,7 @@ static int xs_local_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; + bool vm_wait; unsigned int sent; int status; @@ -881,15 +884,14 @@ static int xs_local_send_request(struct rpc_rqst *req) xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; + req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, transport->xmit.offset, rm, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); - if (status == -EAGAIN && sock_writeable(transport->inet)) - status = -ENOBUFS; - if (likely(sent > 0) || status == 0) { transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; @@ -899,13 +901,12 @@ static int xs_local_send_request(struct rpc_rqst *req) return 0; } status = -EAGAIN; + vm_wait = false; } switch (status) { - case -ENOBUFS: - break; case -EAGAIN: - status = xs_stream_nospace(req); + status = xs_stream_nospace(req, vm_wait); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -1023,7 +1024,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; - bool vm_wait = false; + bool vm_wait; unsigned int sent; int status; @@ -1048,7 +1049,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req) * called sendmsg(). */ req->rq_xtime = ktime_get(); tcp_sock_set_cork(transport->inet, true); - while (1) { + + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; + + do { status = xprt_sock_sendmsg(transport->sock, &msg, xdr, transport->xmit.offset, rm, &sent); @@ -1069,31 +1073,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req) WARN_ON_ONCE(sent == 0 && status == 0); - if (status == -EAGAIN ) { - /* - * Return EAGAIN if we're sure we're hitting the - * socket send buffer limits. - */ - if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) - break; - /* - * Did we hit a memory allocation failure? - */ - if (sent == 0) { - status = -ENOBUFS; - if (vm_wait) - break; - /* Retry, knowing now that we're below the - * socket send buffer limit - */ - vm_wait = true; - } - continue; - } - if (status < 0) - break; - vm_wait = false; - } + if (sent > 0) + vm_wait = false; + + } while (status == 0); switch (status) { case -ENOTSOCK: @@ -1101,7 +1084,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_stream_nospace(req); + status = xs_stream_nospace(req, vm_wait); break; case -ECONNRESET: case -ECONNREFUSED: