diff mbox series

[net-next] mptcp: sysctl: add syn_retrans_before_tcp_fallback

Message ID 20250117-net-next-mptcp-syn_retrans_before_tcp_fallback-v1-1-ab4b187099b0@kernel.org (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series [net-next] mptcp: sysctl: add syn_retrans_before_tcp_fallback | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 11 of 11 maintainers
netdev/build_clang success Errors and warnings before: 2 this patch: 2
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 77 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest fail net-next-2025-01-17--21-00 (tests: 887)

Commit Message

Matthieu Baerts (NGI0) Jan. 17, 2025, 5:40 p.m. UTC
The number of SYN + MPC retransmissions before falling back to TCP was
fixed to 2. This is certainly a good default value, but having a fixed
number can be a problem in some environments.

The current behaviour means that if all packets are dropped, there will
be:

- The initial SYN + MPC

- 2 retransmissions with MPC

- The next ones will be without MPTCP.

So typically ~3 seconds before falling back to TCP. In some networks
where some temporally blackholes are unfortunately frequent, or when a
client tries to initiate connections while the network is not ready yet,
this can cause new connections not to have MPTCP connections.

In such environments, it is now possible to increase the number of SYN
retransmissions with MPTCP options to make sure MPTCP is used.

Interesting values are:

- 0: the first retransmission will be done without MPTCP options: quite
     aggressive, but also a higher risk of detecting false-positive
     MPTCP blackholes.

- >= 128: all SYN retransmissions will keep the MPTCP options: back to
          the < 6.12 behaviour.

The default behaviour is not changed here.

Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
 Documentation/networking/mptcp-sysctl.rst | 16 ++++++++++++++++
 net/mptcp/ctrl.c                          | 21 +++++++++++++++++----
 2 files changed, 33 insertions(+), 4 deletions(-)


---
base-commit: 7d2eba0f83a59d360ed1e77ed2778101a6e3c4a1
change-id: 20250117-net-next-mptcp-syn_retrans_before_tcp_fallback-5f41bbc8482e

Best regards,
diff mbox series

Patch

diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index 95598c21fc8e8782533bbc6a36de63bb465c297c..dc45c02113537b98dff76a6ed431c0449b5217f8 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -108,3 +108,19 @@  stale_loss_cnt - INTEGER
 	This is a per-namespace sysctl.
 
 	Default: 4
+
+syn_retrans_before_tcp_fallback - INTEGER
+	The number of SYN + MP_CAPABLE retransmissions before falling back to
+	TCP, i.e. dropping the MPTCP options. In other words, if all the packets
+	are dropped on the way, there will be:
+
+	* The initial SYN with MPTCP support
+	* This number of SYN retransmitted with MPTCP support
+	* The next SYN retransmissions will be without MPTCP support
+
+	0 means the first retransmission will be done without MPTCP options.
+	>= 128 means that all SYN retransmissions will keep the MPTCP options. A
+	lower number might increase false-positive MPTCP blackholes detections.
+	This is a per-namespace sysctl.
+
+	Default: 2
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index b0dd008e2114bce65ee3906bbdc19a5a4316cefa..3999e0ba2c35b50c36ce32277e0b8bfb24197946 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@  struct mptcp_pernet {
 	unsigned int close_timeout;
 	unsigned int stale_loss_cnt;
 	atomic_t active_disable_times;
+	u8 syn_retrans_before_tcp_fallback;
 	unsigned long active_disable_stamp;
 	u8 mptcp_enabled;
 	u8 checksum_enabled;
@@ -92,6 +93,7 @@  static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 	pernet->mptcp_enabled = 1;
 	pernet->add_addr_timeout = TCP_RTO_MAX;
 	pernet->blackhole_timeout = 3600;
+	pernet->syn_retrans_before_tcp_fallback = 2;
 	atomic_set(&pernet->active_disable_times, 0);
 	pernet->close_timeout = TCP_TIMEWAIT_LEN;
 	pernet->checksum_enabled = 0;
@@ -245,6 +247,12 @@  static struct ctl_table mptcp_sysctl_table[] = {
 		.proc_handler = proc_blackhole_detect_timeout,
 		.extra1 = SYSCTL_ZERO,
 	},
+	{
+		.procname = "syn_retrans_before_tcp_fallback",
+		.maxlen = sizeof(u8),
+		.mode = 0644,
+		.proc_handler = proc_dou8vec_minmax,
+	},
 };
 
 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -269,6 +277,7 @@  static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
 	/* table[7] is for available_schedulers which is read-only info */
 	table[8].data = &pernet->close_timeout;
 	table[9].data = &pernet->blackhole_timeout;
+	table[10].data = &pernet->syn_retrans_before_tcp_fallback;
 
 	hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
 				     ARRAY_SIZE(mptcp_sysctl_table));
@@ -392,17 +401,21 @@  void mptcp_active_enable(struct sock *sk)
 void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
 {
 	struct mptcp_subflow_context *subflow;
-	u32 timeouts;
 
 	if (!sk_is_mptcp(ssk))
 		return;
 
-	timeouts = inet_csk(ssk)->icsk_retransmits;
 	subflow = mptcp_subflow_ctx(ssk);
 
 	if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
-		if (timeouts == 2 || (timeouts < 2 && expired)) {
-			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+		struct net *net = sock_net(ssk);
+		u8 timeouts, to_max;
+
+		timeouts = inet_csk(ssk)->icsk_retransmits;
+		to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback;
+
+		if (timeouts == to_max || (timeouts < to_max && expired)) {
+			MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP);
 			subflow->mpc_drop = 1;
 			mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
 		} else {