diff mbox series

[net-next] tcp: drop tcp_v{4,6}_restore_cb

Message ID 20250410161619.3581785-1-sdf@fomichev.me (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next] tcp: drop tcp_v{4,6}_restore_cb | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/build_tools success Errors and warnings before: 26 (+2) this patch: 26 (+2)
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 10 this patch: 10
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 911 this patch: 911
netdev/checkpatch warning CHECK: spaces preferred around that '<<' (ctx:VxV)
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Stanislav Fomichev April 10, 2025, 4:16 p.m. UTC
Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
to alias with inet[6]_skb_parm. Add static asserts to make
sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
at the proper offset.

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 include/net/tcp.h   | 46 +++++++++++++++++++++++++--------------------
 net/ipv4/tcp_ipv4.c | 16 ----------------
 net/ipv6/tcp_ipv6.c | 25 ------------------------
 3 files changed, 26 insertions(+), 61 deletions(-)

Comments

Eric Dumazet April 10, 2025, 4:24 p.m. UTC | #1
On Thu, Apr 10, 2025 at 6:16 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
> to alias with inet[6]_skb_parm. Add static asserts to make
> sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
> at the proper offset.

May I ask : why ?

I think you are simply reverting 971f10eca18 ("tcp: better TCP_SKB_CB
layout to reduce cache line misses")
without any performance measurements.
Stanislav Fomichev April 10, 2025, 4:45 p.m. UTC | #2
On 04/10, Eric Dumazet wrote:
> On Thu, Apr 10, 2025 at 6:16 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
> > to alias with inet[6]_skb_parm. Add static asserts to make
> > sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
> > at the proper offset.
> 
> May I ask : why ?
> 
> I think you are simply reverting 971f10eca18 ("tcp: better TCP_SKB_CB
> layout to reduce cache line misses")
> without any performance measurements.

Oh, wow, I did not go that far back into the history, thanks for the
pointer! Let me see if there is any perf impact form this...
Eric Dumazet April 10, 2025, 4:58 p.m. UTC | #3
On Thu, Apr 10, 2025 at 6:45 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
>
> On 04/10, Eric Dumazet wrote:
> > On Thu, Apr 10, 2025 at 6:16 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > Instead of moving and restoring IP[6]CB, reorder tcp_skb_cb
> > > to alias with inet[6]_skb_parm. Add static asserts to make
> > > sure tcp_skb_cb fits into skb.cb and that inet[6]_skb_parm is
> > > at the proper offset.
> >
> > May I ask : why ?
> >
> > I think you are simply reverting 971f10eca18 ("tcp: better TCP_SKB_CB
> > layout to reduce cache line misses")
> > without any performance measurements.
>
> Oh, wow, I did not go that far back into the history, thanks for the
> pointer! Let me see if there is any perf impact form this...

To be fair, we now have RB-tree for the out of order queue, we no
longer of O(N) costs
when trying to insert an skb in this queue. Also we try to coalesce
skbs together.

Tests would require thousands of skbs in the out-of-order queue.

Think of long-distance flows (rtt > 100ms), and big tcp_rmem[] and
tcp_wmem[] limits for the sender/receiver.
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4450c384ef17..e80fd505f139 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1010,6 +1010,27 @@  enum tcp_skb_cb_sacked_flags {
  * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
  */
 struct tcp_skb_cb {
+	union {
+		struct {
+#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
+			/* There is space for up to 24 bytes */
+			__u32 is_app_limited:1, /* cwnd not fully used? */
+			      delivered_ce:20,
+			      unused:11;
+			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
+			__u32 delivered;
+			/* start of send pipeline phase */
+			u64 first_tx_mstamp;
+			/* when we reached the "delivered" count */
+			u64 delivered_mstamp;
+		} tx;   /* only used for outgoing skbs */
+		union {
+			struct inet_skb_parm	h4;
+#if IS_ENABLED(CONFIG_IPV6)
+			struct inet6_skb_parm	h6;
+#endif
+		} header;	/* For incoming skbs */
+	};
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	union {
@@ -1033,28 +1054,13 @@  struct tcp_skb_cb {
 			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
 			unused:4;
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
-	union {
-		struct {
-#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
-			/* There is space for up to 24 bytes */
-			__u32 is_app_limited:1, /* cwnd not fully used? */
-			      delivered_ce:20,
-			      unused:11;
-			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
-			__u32 delivered;
-			/* start of send pipeline phase */
-			u64 first_tx_mstamp;
-			/* when we reached the "delivered" count */
-			u64 delivered_mstamp;
-		} tx;   /* only used for outgoing skbs */
-		union {
-			struct inet_skb_parm	h4;
+};
+
+static_assert(sizeof(struct tcp_skb_cb) <= sizeof_field(struct sk_buff, cb));
+static_assert(offsetof(struct tcp_skb_cb, header.h4) == 0);
 #if IS_ENABLED(CONFIG_IPV6)
-			struct inet6_skb_parm	h6;
+static_assert(offsetof(struct tcp_skb_cb, header.h6) == 0);
 #endif
-		} header;	/* For incoming skbs */
-	};
-};
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8cce0d5489da..9654f663fd0d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2153,22 +2153,9 @@  int tcp_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_IPV6_MOD(tcp_filter);
 
-static void tcp_v4_restore_cb(struct sk_buff *skb)
-{
-	memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
-		sizeof(struct inet_skb_parm));
-}
-
 static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
 			   const struct tcphdr *th)
 {
-	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
-	 * barrier() makes sure compiler wont play fool^Waliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
-		sizeof(struct inet_skb_parm));
-	barrier();
-
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff * 4);
@@ -2293,7 +2280,6 @@  int tcp_v4_rcv(struct sk_buff *skb)
 				 * Try to feed this packet to this socket
 				 * instead of discarding it.
 				 */
-				tcp_v4_restore_cb(skb);
 				sock_put(sk);
 				goto lookup;
 			}
@@ -2302,7 +2288,6 @@  int tcp_v4_rcv(struct sk_buff *skb)
 		nf_reset_ct(skb);
 		if (nsk == sk) {
 			reqsk_put(req);
-			tcp_v4_restore_cb(skb);
 		} else {
 			drop_reason = tcp_child_process(sk, nsk, skb);
 			if (drop_reason) {
@@ -2430,7 +2415,6 @@  int tcp_v4_rcv(struct sk_buff *skb)
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
-			tcp_v4_restore_cb(skb);
 			refcounted = false;
 			__this_cpu_write(tcp_tw_isn, isn);
 			goto process;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b03c223eda4f..f7734ba7f3e6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1342,16 +1342,6 @@  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	return 0; /* don't send reset */
 }
 
-static void tcp_v6_restore_cb(struct sk_buff *skb)
-{
-	/* We need to move header back to the beginning if xfrm6_policy_check()
-	 * and tcp_v6_fill_cb() are going to be called again.
-	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
-	 */
-	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
-		sizeof(struct inet6_skb_parm));
-}
-
 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 					 struct request_sock *req,
 					 struct dst_entry *dst,
@@ -1552,8 +1542,6 @@  static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
 			consume_skb(ireq->pktopts);
 			ireq->pktopts = NULL;
-			if (newnp->pktoptions)
-				tcp_v6_restore_cb(newnp->pktoptions);
 		}
 	} else {
 		if (!req_unhash && found_dup_sk) {
@@ -1710,7 +1698,6 @@  int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		if (inet6_test_bit(REPFLOW, sk))
 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
-			tcp_v6_restore_cb(opt_skb);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
 		} else {
 			__kfree_skb(opt_skb);
@@ -1725,15 +1712,6 @@  int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 			   const struct tcphdr *th)
 {
-	/* This is tricky: we move IP6CB at its correct location into
-	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
-	 * _decode_session6() uses IP6CB().
-	 * barrier() makes sure compiler won't play aliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
-		sizeof(struct inet6_skb_parm));
-	barrier();
-
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
@@ -1849,7 +1827,6 @@  INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 				 * Try to feed this packet to this socket
 				 * instead of discarding it.
 				 */
-				tcp_v6_restore_cb(skb);
 				sock_put(sk);
 				goto lookup;
 			}
@@ -1858,7 +1835,6 @@  INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 		nf_reset_ct(skb);
 		if (nsk == sk) {
 			reqsk_put(req);
-			tcp_v6_restore_cb(skb);
 		} else {
 			drop_reason = tcp_child_process(sk, nsk, skb);
 			if (drop_reason) {
@@ -1987,7 +1963,6 @@  INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 			struct inet_timewait_sock *tw = inet_twsk(sk);
 			inet_twsk_deschedule_put(tw);
 			sk = sk2;
-			tcp_v6_restore_cb(skb);
 			refcounted = false;
 			__this_cpu_write(tcp_tw_isn, isn);
 			goto process;