diff mbox series

[net-next,v2] net: skb: free up one bit in tx_flags

Message ID 20250225023416.2088705-1-willemdebruijn.kernel@gmail.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v2] net: skb: free up one bit in tx_flags | expand

Commit Message

Willem de Bruijn Feb. 25, 2025, 2:33 a.m. UTC
From: Willem de Bruijn <willemb@google.com>

The linked series wants to add skb tx completion timestamps.
That needs a bit in skb_shared_info.tx_flags, but all are in use.

A per-skb bit is only needed for features that are configured on a
per packet basis. Per socket features can be read from sk->sk_tsflags.

Per packet tsflags can be set in sendmsg using cmsg, but only those in
SOF_TIMESTAMPING_TX_RECORD_MASK.

Per packet tsflags can also be set without cmsg by sandwiching a
send inbetween two setsockopts:

    val |= SOF_TIMESTAMPING_$FEATURE;
    setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
    write(fd, buf, sz);
    val &= ~SOF_TIMESTAMPING_$FEATURE;
    setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));

Changing a datapath test from skb_shinfo(skb)->tx_flags to
skb->sk->sk_tsflags can change behavior in that case, as the tx_flags
is written before the second setsockopt updates sk_tsflags.

Therefore, only bits can be reclaimed that cannot be set by cmsg and
are also highly unlikely to be used to target individual packets
otherwise.

Free up the bit currently used for SKBTX_HW_TSTAMP_USE_CYCLES. This
selects between clock and free running counter source for HW TX
timestamps. It is probable that all packets of the same socket will
always use the same source.

Link: https://lore.kernel.org/netdev/cover.1739988644.git.pav@iki.fi/
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Gerhard Engleder <gerhard@engleder-embedded.com>

---

v1->v2
  - fix up conflict in net/socket.c with merge of BPF tx timestamping:
    SKBTX_HW_TSTAMP vs SKBTX_HW_TSTAMP_NOBPF
  - add Reviewed-by tags from v1
---
 drivers/net/ethernet/engleder/tsnep_main.c |  4 ++--
 drivers/net/ethernet/intel/igc/igc_main.c  |  3 ++-
 include/linux/skbuff.h                     |  5 ++---
 net/socket.c                               | 11 +----------
 4 files changed, 7 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 0d030cb0b21c..3de4cb06e266 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -852,8 +852,8 @@  static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
 			struct skb_shared_hwtstamps hwtstamps;
 			u64 timestamp;
 
-			if (skb_shinfo(entry->skb)->tx_flags &
-			    SKBTX_HW_TSTAMP_USE_CYCLES)
+			if (entry->skb->sk &&
+			    READ_ONCE(entry->skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC)
 				timestamp =
 					__le64_to_cpu(entry->desc_wb->counter);
 			else
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 3044392e8ded..472f009630c9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1668,7 +1668,8 @@  static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 		if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags;
-			if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_USE_CYCLES)
+			if (skb->sk &&
+			    READ_ONCE(skb->sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC)
 				tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1;
 		} else {
 			adapter->tx_hwtstamp_skipped++;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2bb8473d99a..171aa15f6541 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -478,8 +478,8 @@  enum {
 	/* device driver is going to provide hardware time stamp */
 	SKBTX_IN_PROGRESS = 1 << 2,
 
-	/* generate hardware time stamp based on cycles if supported */
-	SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3,
+	/* reserved */
+	SKBTX_RESERVED = 1 << 3,
 
 	/* generate wifi status information (where possible) */
 	SKBTX_WIFI_STATUS = 1 << 4,
@@ -500,7 +500,6 @@  enum {
 				 SKBTX_SCHED_TSTAMP | \
 				 SKBTX_BPF)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | \
-				 SKBTX_HW_TSTAMP_USE_CYCLES | \
 				 SKBTX_ANY_SW_TSTAMP)
 
 /* Definitions for flags in struct skb_shared_info */
diff --git a/net/socket.c b/net/socket.c
index 0545e9ea7058..b64ecf2722e7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -680,18 +680,9 @@  void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
 {
 	u8 flags = *tx_flags;
 
-	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
+	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
 		flags |= SKBTX_HW_TSTAMP_NOBPF;
 
-		/* PTP hardware clocks can provide a free running cycle counter
-		 * as a time base for virtual clocks. Tell driver to use the
-		 * free running cycle counter for timestamp if socket is bound
-		 * to virtual clock.
-		 */
-		if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
-			flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
-	}
-
 	if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
 		flags |= SKBTX_SW_TSTAMP;