diff mbox series

[net-next,10/10] mptcp: support TCP_CORK and TCP_NODELAY

Message ID 20211203223541.69364-11-mathew.j.martineau@linux.intel.com (mailing list archive)
State Accepted
Commit 4f6e14bd19d6de7831f31cfb3210f2ea93eeb038
Delegated to: Netdev Maintainers
Headers show
Series mptcp: New features for MPTCP sockets and netlink PM | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 22 this patch: 22
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Mat Martineau Dec. 3, 2021, 10:35 p.m. UTC
From: Maxim Galaganov <max@internet.ru>

First, add cork and nodelay fields to the mptcp_sock structure
so they can be used in sync_socket_options(), and fill them on setsockopt
while holding the msk socket lock.

Then, on setsockopt set proper tcp_sk(ssk)->nonagle values for subflows
by calling __tcp_sock_set_cork() or __tcp_sock_set_nodelay() on the ssk
while holding the ssk socket lock.

tcp_push_pending_frames() will be invoked on the ssk if a cork was cleared
or nodelay was set. Also set MPTCP_PUSH_PENDING bit by calling
mptcp_check_and_set_pending(). This will lead to __mptcp_push_pending()
being called inside mptcp_release_cb() with new tcp_sk(ssk)->nonagle.

Also add getsockopt support for TCP_CORK and TCP_NODELAY.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Maxim Galaganov <max@internet.ru>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 net/mptcp/protocol.h |  4 ++-
 net/mptcp/sockopt.c  | 70 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)

Comments

Jakub Kicinski Dec. 7, 2021, 1:30 a.m. UTC | #1
On Fri,  3 Dec 2021 14:35:41 -0800 Mat Martineau wrote:
> +static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
> +					    unsigned int optlen)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	struct sock *sk = (struct sock *)msk;
> +	int val;
> +
> +	if (optlen < sizeof(int))
> +		return -EINVAL;
> +
> +	if (copy_from_sockptr(&val, optval, sizeof(val)))
> +		return -EFAULT;

Should we check that optval is not larger than sizeof(int) or if it is
that the rest of the buffer is zero? Or for the old school options we
should stick to the old school behavior?
Florian Westphal Dec. 7, 2021, 10:19 a.m. UTC | #2
Jakub Kicinski <kuba@kernel.org> wrote:
> On Fri,  3 Dec 2021 14:35:41 -0800 Mat Martineau wrote:
> > +static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
> > +					    unsigned int optlen)
> > +{
> > +	struct mptcp_subflow_context *subflow;
> > +	struct sock *sk = (struct sock *)msk;
> > +	int val;
> > +
> > +	if (optlen < sizeof(int))
> > +		return -EINVAL;
> > +
> > +	if (copy_from_sockptr(&val, optval, sizeof(val)))
> > +		return -EFAULT;
> 
> Should we check that optval is not larger than sizeof(int) or if it is
> that the rest of the buffer is zero? Or for the old school options we
> should stick to the old school behavior?

My intent was to stick to tcp behaviour, i.e. no check on > sizeof(int)
or on "extra buffer" content.
Paolo Abeni Dec. 7, 2021, 10:37 a.m. UTC | #3
On Mon, 2021-12-06 at 17:30 -0800, Jakub Kicinski wrote:
> On Fri,  3 Dec 2021 14:35:41 -0800 Mat Martineau wrote:
> > +static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
> > +					    unsigned int optlen)
> > +{
> > +	struct mptcp_subflow_context *subflow;
> > +	struct sock *sk = (struct sock *)msk;
> > +	int val;
> > +
> > +	if (optlen < sizeof(int))
> > +		return -EINVAL;
> > +
> > +	if (copy_from_sockptr(&val, optval, sizeof(val)))
> > +		return -EFAULT;
> 
> Should we check that optval is not larger than sizeof(int) or if it is
> that the rest of the buffer is zero? Or for the old school options we
> should stick to the old school behavior?

I think it's useful if we keep the MPTCP socket options binary API as
close as possible to the plain TCP ones: that allows for seamless
switching existing TCP application to MPTCP with no code changes.

Old school, please ;)

Cheers,

Paolo
diff mbox series

Patch

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 147b22da41ca..e1469155fb15 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -249,7 +249,9 @@  struct mptcp_sock {
 	bool		rcv_fastclose;
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 	bool		csum_enabled;
-	u8		recvmsg_inq:1;
+	u8		recvmsg_inq:1,
+			cork:1,
+			nodelay:1;
 	spinlock_t	join_list_lock;
 	struct work_struct work;
 	struct sk_buff  *ooo_last_skb;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 44e0a37c567c..3c3db22fd36a 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -616,6 +616,66 @@  static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
 	return ret;
 }
 
+static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval,
+					 unsigned int optlen)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (copy_from_sockptr(&val, optval, sizeof(val)))
+		return -EFAULT;
+
+	lock_sock(sk);
+	sockopt_seq_inc(msk);
+	msk->cork = !!val;
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		lock_sock(ssk);
+		__tcp_sock_set_cork(ssk, !!val);
+		release_sock(ssk);
+	}
+	if (!val)
+		mptcp_check_and_set_pending(sk);
+	release_sock(sk);
+
+	return 0;
+}
+
+static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval,
+					    unsigned int optlen)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (copy_from_sockptr(&val, optval, sizeof(val)))
+		return -EFAULT;
+
+	lock_sock(sk);
+	sockopt_seq_inc(msk);
+	msk->nodelay = !!val;
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		lock_sock(ssk);
+		__tcp_sock_set_nodelay(ssk, !!val);
+		release_sock(ssk);
+	}
+	if (val)
+		mptcp_check_and_set_pending(sk);
+	release_sock(sk);
+
+	return 0;
+}
+
 static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
 						   sockptr_t optval, unsigned int optlen)
 {
@@ -717,6 +777,10 @@  static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 		return -EOPNOTSUPP;
 	case TCP_CONGESTION:
 		return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
+	case TCP_CORK:
+		return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
+	case TCP_NODELAY:
+		return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
 	}
 
 	return -EOPNOTSUPP;
@@ -1087,6 +1151,10 @@  static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 						      optval, optlen);
 	case TCP_INQ:
 		return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
+	case TCP_CORK:
+		return mptcp_put_int_option(msk, optval, optlen, msk->cork);
+	case TCP_NODELAY:
+		return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
 	}
 	return -EOPNOTSUPP;
 }
@@ -1189,6 +1257,8 @@  static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
 
 	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
 		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
+	__tcp_sock_set_cork(ssk, !!msk->cork);
+	__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
 
 	inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
 	inet_sk(ssk)->freebind = inet_sk(sk)->freebind;