diff mbox series

[RFC,net-next,1/2] tcp: add TCP_IW for socksetopt

Message ID 20250328151633.30007-2-kerneljasonxing@gmail.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series tcp: support initcwnd adjustment | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 518 this patch: 518
netdev/build_tools success Errors and warnings before: 26 (+0) this patch: 26 (+0)
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 966 this patch: 966
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 15128 this patch: 15128
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 4 this patch: 4
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing March 28, 2025, 3:16 p.m. UTC
From: Jason Xing <kernelxing@tencent.com>

ip route command adjusts the initcwnd for the certain flows. And it
takes effect in the slow start and slow start from idle cases.

Now this patch introduces a socket-level option for applications to
have the same ability. After this, I think TCP_BPF_IW can be adjusted
accordingly for slow start from idle case.

Introduce a new field to store the initial cwnd to help socket remember
what the value is when it begins to slow start after idle.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/linux/tcp.h      | 1 +
 include/uapi/linux/tcp.h | 1 +
 net/ipv4/tcp.c           | 8 ++++++++
 net/ipv4/tcp_input.c     | 2 +-
 4 files changed, 11 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1669d95bb0f9..aba0a1fe0e36 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -403,6 +403,7 @@  struct tcp_sock {
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
 	u32	prior_cwnd;	/* cwnd right before starting loss recovery */
+	u32	init_cwnd;	/* init cwnd controlled by setsockopt */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
 	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index dc8fdc80e16b..acf77114efed 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -142,6 +142,7 @@  enum {
 #define TCP_RTO_MAX_MS		44	/* max rto time in ms */
 #define TCP_RTO_MIN_US		45	/* min rto time in us */
 #define TCP_DELACK_MAX_US	46	/* max delayed ack time in us */
+#define TCP_IW			47	/* initial congestion window */
 
 #define TCP_REPAIR_ON		1
 #define TCP_REPAIR_OFF		0
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ea8de00f669d..9da7ece57b20 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3863,6 +3863,11 @@  int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 		WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max);
 		return 0;
 	}
+	case TCP_IW:
+		if (val <= 0 || tp->data_segs_out > tp->syn_data)
+			return -EINVAL;
+		tp->init_cwnd = val;
+		return 0;
 	}
 
 	sockopt_lock_sock(sk);
@@ -4708,6 +4713,9 @@  int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_DELACK_MAX_US:
 		val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_delack_max));
 		break;
+	case TCP_IW:
+		val = tp->init_cwnd;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e1f952fbac48..00cbe8970a1b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1019,7 +1019,7 @@  static void tcp_set_rto(struct sock *sk)
 
 __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
 {
-	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
+	__u32 cwnd = tp->init_cwnd ? : (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
 	if (!cwnd)
 		cwnd = TCP_INIT_CWND;