From patchwork Mon Nov 8 10:57:09 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 12608293 X-Patchwork-Delegate: mat@martineau.name Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [193.142.43.52]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id F3BFB2C85 for ; Mon, 8 Nov 2021 11:23:42 +0000 (UTC) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1mk2L7-0006Xg-J9; Mon, 08 Nov 2021 11:57:25 +0100 From: Florian Westphal To: Cc: Florian Westphal Subject: [PATCH mptcp-next 1/3] mptcp: add TCP_INQ cmsg support Date: Mon, 8 Nov 2021 11:57:09 +0100 Message-Id: <20211108105711.16200-2-fw@strlen.de> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20211108105711.16200-1-fw@strlen.de> References: <20211108105711.16200-1-fw@strlen.de> Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Support the TCP_INQ setsockopt. This is a boolean that tells recvmsg path to include the remaining in-sequence bytes into a cmsg data. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/224 Signed-off-by: Florian Westphal --- net/mptcp/protocol.c | 33 +++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 1 + net/mptcp/sockopt.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b0bfe20d6bb0..b4263bf821ac 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -46,6 +46,7 @@ struct mptcp_skb_cb { enum { MPTCP_CMSG_TS = BIT(0), + MPTCP_CMSG_INQ = BIT(1), }; static struct percpu_counter mptcp_sockets_allocated; @@ -2006,6 +2007,29 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) return !skb_queue_empty(&msk->receive_queue); } +static unsigned int mptcp_inq_hint(const struct sock *sk) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + const struct sk_buff *skb; + u64 acked = msk->ack_seq; + u64 hint_val = 0; + + skb = skb_peek(&msk->receive_queue); + if (skb) { + u64 map_seq = MPTCP_SKB_CB(skb)->map_seq + MPTCP_SKB_CB(skb)->offset; + + hint_val = acked - map_seq; + + if (hint_val >= INT_MAX) + hint_val = INT_MAX - 1; + } + + if (hint_val == 0 && sock_flag(sk, SOCK_DONE)) + hint_val = 1; + + return (unsigned int)hint_val; +} + static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -2030,6 +2054,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, len = min_t(size_t, len, INT_MAX); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (unlikely(msk->recvmsg_inq)) + cmsg_flags = MPTCP_CMSG_INQ; + while (copied < len) { int bytes_read; @@ -2103,6 +2130,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (cmsg_flags && copied >= 0) { if (cmsg_flags & MPTCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); + + if (cmsg_flags & MPTCP_CMSG_INQ) { + unsigned int inq = mptcp_inq_hint(sk); + + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); + } } pr_debug("msk=%p rx queue empty=%d:%d copied=%d", diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 906509c6cde5..e77de7662df0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -250,6 +250,7 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 recvmsg_inq:1; spinlock_t join_list_lock; struct work_struct work; struct sk_buff *ooo_last_skb; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index b818e91f2e09..7405152691e0 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -557,6 +557,7 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_TIMESTAMP: case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: + case TCP_INQ: return true; } @@ -698,7 +699,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { + struct sock *sk = (void *)msk; + int ret, val; + switch (optname) { + case TCP_INQ: + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + if (val < 0 || val > 1) + return -EINVAL; + + lock_sock(sk); + msk->recvmsg_inq = !!val; + release_sock(sk); + return 0; case TCP_ULP: return -EOPNOTSUPP; case TCP_CONGESTION: @@ -1032,6 +1047,26 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o return 0; } +static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, + int __user *optlen, int val) +{ + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + if (len < 0) + return -EINVAL; + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { @@ -1042,6 +1077,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_CC_INFO: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); + case TCP_INQ: + return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); } return -EOPNOTSUPP; } From patchwork Mon Nov 8 10:57:10 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 12608251 X-Patchwork-Delegate: mat@martineau.name Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [193.142.43.52]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 003432C85 for ; Mon, 8 Nov 2021 10:57:31 +0000 (UTC) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1mk2LC-0006Y3-0F; Mon, 08 Nov 2021 11:57:30 +0100 From: Florian Westphal To: Cc: Florian Westphal Subject: [PATCH mptcp-next 2/3] selftests: mptcp: add TCP_INQ support Date: Mon, 8 Nov 2021 11:57:10 +0100 Message-Id: <20211108105711.16200-3-fw@strlen.de> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20211108105711.16200-1-fw@strlen.de> References: <20211108105711.16200-1-fw@strlen.de> Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Do checks on the returned inq counter. Fail on: 1. Huge value (>= 1 megabyte, test case files are 1 MB) 2. last hint larger than returned bytes when read was short 3. erroenous indication of EOF. 3) happens when a hint of X bytes reads X-1 on next call but next recvmsg returns more data (instead of EOF). Signed-off-by: Florian Westphal --- .../selftests/net/mptcp/mptcp_connect.c | 56 ++++++++++++++++++- .../selftests/net/mptcp/mptcp_sockopt.sh | 10 ++-- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index ada9b80774d4..e3e4338d610f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -73,12 +73,20 @@ static uint32_t cfg_mark; struct cfg_cmsg_types { unsigned int cmsg_enabled:1; unsigned int timestampns:1; + unsigned int tcp_inq:1; }; struct cfg_sockopt_types { unsigned int transparent:1; }; +struct tcp_inq_state { + unsigned int last; + bool expect_eof; +}; + +static struct tcp_inq_state tcp_inq; + static struct cfg_cmsg_types cfg_cmsg_types; static struct cfg_sockopt_types cfg_sockopt_types; @@ -389,7 +397,9 @@ static size_t do_write(const int fd, char *buf, const size_t len) static void process_cmsg(struct msghdr *msgh) { struct __kernel_timespec ts; + bool inq_found = false; bool ts_found = false; + unsigned int inq = 0; struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { @@ -398,12 +408,27 @@ static void process_cmsg(struct msghdr *msgh) ts_found = true; continue; } + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); + inq_found = true; + continue; + } + } if (cfg_cmsg_types.timestampns) { if (!ts_found) xerror("TIMESTAMPNS not present\n"); } + + if (cfg_cmsg_types.tcp_inq) { + if (!inq_found) + xerror("TCP_INQ not present\n"); + + if (inq >= 1 * 1024 * 1024) + xerror("tcp_inq is larger than one mbyte\n", inq); + tcp_inq.last = inq; + } } static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) @@ -420,10 +445,23 @@ static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) .msg_controllen = sizeof(msg_buf), }; int flags = 0; + unsigned int last_hint = tcp_inq.last; int ret = recvmsg(fd, &msg, flags); - if (ret <= 0) + if (ret <= 0) { + if (ret == 0 && tcp_inq.expect_eof) + return ret; + + if (ret == 0 && cfg_cmsg_types.tcp_inq) + if (last_hint != 1 && last_hint != 0) + xerror("EOF but last tcp_inq hint was %u\n", last_hint); + return ret; + } + + if (tcp_inq.expect_eof) + xerror("expected EOF, last_hint %u, now %u\n", + last_hint, tcp_inq.last); if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) xerror("got %lu bytes of cmsg data, expected 0\n", @@ -435,6 +473,15 @@ static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) if (msg.msg_controllen) process_cmsg(&msg); + if (cfg_cmsg_types.tcp_inq) { + if ((size_t)ret < len && last_hint > (unsigned int)ret) { + if (ret + 1 != (int)last_hint) + xerror("read %u of %u, last_hint was %u\n", ret, (unsigned int)len, last_hint); + else + tcp_inq.expect_eof = true; + } + } + return ret; } @@ -944,6 +991,8 @@ static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) if (cmsg->timestampns) xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); + if (cmsg->tcp_inq) + xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); } static void parse_cmsg_types(const char *type) @@ -965,6 +1014,11 @@ static void parse_cmsg_types(const char *type) return; } + if (strncmp(type, "TCPINQ", len) == 0) { + cfg_cmsg_types.tcp_inq = 1; + return; + } + fprintf(stderr, "Unrecognized cmsg option %s\n", type); exit(1); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 41de643788b8..75af784cc62a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -167,7 +167,7 @@ do_transfer() :> "$cout" :> "$sout" - mptcp_connect="./mptcp_connect -r 20" + mptcp_connect="./mptcp_connect" local local_addr if is_v6 "${connect_addr}"; then @@ -178,7 +178,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ ${local_addr} < "$sin" > "$sout" & spid=$! @@ -186,7 +186,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ $connect_addr < "$cin" > "$cout" & cpid=$! @@ -284,8 +284,8 @@ sout=$(mktemp) cin=$(mktemp) cout=$(mktemp) init -make_file "$cin" "client" 1 -make_file "$sin" "server" 1 +make_file "$cin" "client" 1024 +make_file "$sin" "server" 1024 trap cleanup EXIT run_tests $ns1 $ns2 10.0.1.1 From patchwork Mon Nov 8 10:57:11 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 12608253 X-Patchwork-Delegate: mat@martineau.name Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [193.142.43.52]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2358A29CA for ; Mon, 8 Nov 2021 10:57:36 +0000 (UTC) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1mk2LG-0006YL-FG; Mon, 08 Nov 2021 11:57:34 +0100 From: Florian Westphal To: Cc: Florian Westphal Subject: [PATCH mptcp-next 3/3] mptcp: add SIOCINQ, OUTQ and OUTQNSD ioctls Date: Mon, 8 Nov 2021 11:57:11 +0100 Message-Id: <20211108105711.16200-4-fw@strlen.de> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20211108105711.16200-1-fw@strlen.de> References: <20211108105711.16200-1-fw@strlen.de> Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Allows to query in-sequence data ready for read(), total bytes in write queue and total bytes in write queue that have not yet been sent. Signed-off-by: Florian Westphal --- net/mptcp/protocol.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b4263bf821ac..29b6f57b917e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -22,6 +22,7 @@ #endif #include #include +#include #include "protocol.h" #include "mib.h" @@ -3260,6 +3261,56 @@ static int mptcp_forward_alloc_get(const struct sock *sk) return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc; } +static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) +{ + const struct sock *sk = (void *)msk; + u64 delta; + + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) + return 0; + + delta = READ_ONCE(msk->write_seq) - v; + if (delta > INT_MAX) + delta = INT_MAX; + + return (int)delta; +} + +static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + int answ; + bool slow; + + switch (cmd) { + case SIOCINQ: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + slow = lock_sock_fast(sk); + answ = mptcp_inq_hint(sk); + unlock_sock_fast(sk, slow); + break; + case SIOCOUTQ: + slow = lock_sock_fast(sk); + answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); + unlock_sock_fast(sk, slow); + break; + case SIOCOUTQNSD: + slow = lock_sock_fast(sk); + answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_nxt)); + unlock_sock_fast(sk, slow); + break; + default: + return -ENOIOCTLCMD; + } + + return put_user(answ, (int __user *)arg); +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3272,6 +3323,7 @@ static struct proto mptcp_prot = { .shutdown = mptcp_shutdown, .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, + .ioctl = mptcp_ioctl, .recvmsg = mptcp_recvmsg, .release_cb = mptcp_release_cb, .hash = mptcp_hash,