diff mbox series

[bpf-next,v11,09/12] bpf: add BPF_SOCK_OPS_TS_ACK_OPT_CB callback

Message ID 20250214010038.54131-10-kerneljasonxing@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 193 this patch: 193
netdev/build_tools success Errors and warnings before: 27 (+1) this patch: 27 (+1)
netdev/cc_maintainers warning 3 maintainers not CCed: olteanv@gmail.com ncardwell@google.com andrew@lunn.ch
netdev/build_clang success Errors and warnings before: 9361 this patch: 9361
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7017 this patch: 7017
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 77 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 79 this patch: 79
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Jason Xing Feb. 14, 2025, 1 a.m. UTC
Support the ACK case for bpf timestamping.

Add a new sock_ops callback, BPF_SOCK_OPS_TS_ACK_OPT_CB. This
callback will occur at the same timestamping point as the user
space's SCM_TSTAMP_ACK. The BPF program can use it to get the
same SCM_TSTAMP_ACK timestamp without modifying the user-space
application.

This patch extends txstamp_ack to two bits: 1 stands for
SO_TIMESTAMPING mode, 2 bpf extension.

Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
---
 include/net/tcp.h              | 6 ++++--
 include/uapi/linux/bpf.h       | 5 +++++
 net/core/skbuff.c              | 5 ++++-
 net/dsa/user.c                 | 2 +-
 net/ipv4/tcp.c                 | 2 +-
 net/socket.c                   | 2 +-
 tools/include/uapi/linux/bpf.h | 5 +++++
 7 files changed, 21 insertions(+), 6 deletions(-)

Comments

Martin KaFai Lau Feb. 14, 2025, 8:33 p.m. UTC | #1
On 2/13/25 5:00 PM, Jason Xing wrote:
> diff --git a/net/dsa/user.c b/net/dsa/user.c
> index 291ab1b4acc4..794fe553dd77 100644
> --- a/net/dsa/user.c
> +++ b/net/dsa/user.c
> @@ -897,7 +897,7 @@ static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
>   {
>   	struct dsa_switch *ds = p->dp->ds;
>   
> -	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
> +	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF))

This change should be in patch 8.

[ ... ]

> diff --git a/net/socket.c b/net/socket.c
> index 262a28b59c7f..517de433d4bb 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -676,7 +676,7 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
>   	u8 flags = *tx_flags;
>   
>   	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
> -		flags |= SKBTX_HW_TSTAMP;
> +		flags |= SKBTX_HW_TSTAMP_NOBPF;

Same here.
Jason Xing Feb. 14, 2025, 11:16 p.m. UTC | #2
On Sat, Feb 15, 2025 at 4:34 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 2/13/25 5:00 PM, Jason Xing wrote:
> > diff --git a/net/dsa/user.c b/net/dsa/user.c
> > index 291ab1b4acc4..794fe553dd77 100644
> > --- a/net/dsa/user.c
> > +++ b/net/dsa/user.c
> > @@ -897,7 +897,7 @@ static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
> >   {
> >       struct dsa_switch *ds = p->dp->ds;
> >
> > -     if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
> > +     if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF))
>
> This change should be in patch 8.
>
> [ ... ]
>
> > diff --git a/net/socket.c b/net/socket.c
> > index 262a28b59c7f..517de433d4bb 100644
> > --- a/net/socket.c
> > +++ b/net/socket.c
> > @@ -676,7 +676,7 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
> >       u8 flags = *tx_flags;
> >
> >       if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
> > -             flags |= SKBTX_HW_TSTAMP;
> > +             flags |= SKBTX_HW_TSTAMP_NOBPF;
>
> Same here.

Sure, you're right. If you feel it's necessary to re-spin, I will
adjust these two points :)

Thanks,
Jason

>
>
Martin KaFai Lau Feb. 14, 2025, 11:41 p.m. UTC | #3
On 2/14/25 3:16 PM, Jason Xing wrote:
> On Sat, Feb 15, 2025 at 4:34 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 2/13/25 5:00 PM, Jason Xing wrote:
>>> diff --git a/net/dsa/user.c b/net/dsa/user.c
>>> index 291ab1b4acc4..794fe553dd77 100644
>>> --- a/net/dsa/user.c
>>> +++ b/net/dsa/user.c
>>> @@ -897,7 +897,7 @@ static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
>>>    {
>>>        struct dsa_switch *ds = p->dp->ds;
>>>
>>> -     if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
>>> +     if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF))
>>
>> This change should be in patch 8.
>>
>> [ ... ]
>>
>>> diff --git a/net/socket.c b/net/socket.c
>>> index 262a28b59c7f..517de433d4bb 100644
>>> --- a/net/socket.c
>>> +++ b/net/socket.c
>>> @@ -676,7 +676,7 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
>>>        u8 flags = *tx_flags;
>>>
>>>        if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
>>> -             flags |= SKBTX_HW_TSTAMP;
>>> +             flags |= SKBTX_HW_TSTAMP_NOBPF;
>>
>> Same here.
> 
> Sure, you're right. If you feel it's necessary to re-spin, I will
> adjust these two points :)

That will be good. I would wait a bit to collect Willem's comment first.
Willem de Bruijn Feb. 15, 2025, 3:16 p.m. UTC | #4
Martin KaFai Lau wrote:
> On 2/14/25 3:16 PM, Jason Xing wrote:
> > On Sat, Feb 15, 2025 at 4:34 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
> >>
> >> On 2/13/25 5:00 PM, Jason Xing wrote:
> >>> diff --git a/net/dsa/user.c b/net/dsa/user.c
> >>> index 291ab1b4acc4..794fe553dd77 100644
> >>> --- a/net/dsa/user.c
> >>> +++ b/net/dsa/user.c
> >>> @@ -897,7 +897,7 @@ static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
> >>>    {
> >>>        struct dsa_switch *ds = p->dp->ds;
> >>>
> >>> -     if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
> >>> +     if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF))
> >>
> >> This change should be in patch 8.
> >>
> >> [ ... ]
> >>
> >>> diff --git a/net/socket.c b/net/socket.c
> >>> index 262a28b59c7f..517de433d4bb 100644
> >>> --- a/net/socket.c
> >>> +++ b/net/socket.c
> >>> @@ -676,7 +676,7 @@ void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
> >>>        u8 flags = *tx_flags;
> >>>
> >>>        if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
> >>> -             flags |= SKBTX_HW_TSTAMP;
> >>> +             flags |= SKBTX_HW_TSTAMP_NOBPF;
> >>
> >> Same here.
> > 
> > Sure, you're right. If you feel it's necessary to re-spin, I will
> > adjust these two points :)
> 
> That will be good. I would wait a bit to collect Willem's comment first.

Depends on answers to my few remaining points.
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4c4dca59352b..2e2fc72e115b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -958,10 +958,12 @@  struct tcp_skb_cb {
 
 	__u8		sacked;		/* State flags for SACK.	*/
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
-	__u8		txstamp_ack:1,	/* Record TX timestamp for ack? */
+#define TSTAMP_ACK_SK	0x1
+#define TSTAMP_ACK_BPF	0x2
+	__u8		txstamp_ack:2,	/* Record TX timestamp for ack? */
 			eor:1,		/* Is skb MSG_EOR marked? */
 			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
-			unused:5;
+			unused:4;
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
 	union {
 		struct {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f70edd067edf..9355d617767f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7047,6 +7047,11 @@  enum {
 					 * SK_BPF_CB_TX_TIMESTAMPING feature
 					 * is on.
 					 */
+	BPF_SOCK_OPS_TS_ACK_OPT_CB,	/* Called when all the skbs in the
+					 * same sendmsg call are acked
+					 * when SK_BPF_CB_TX_TIMESTAMPING
+					 * feature is on.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index acafa05f7f58..f096ca6c2ced 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5550,7 +5550,7 @@  static bool skb_tstamp_tx_report_so_timestamping(struct sk_buff *skb,
 		return skb_shinfo(skb)->tx_flags & (hwtstamps ? SKBTX_HW_TSTAMP_NOBPF :
 						    SKBTX_SW_TSTAMP);
 	case SCM_TSTAMP_ACK:
-		return TCP_SKB_CB(skb)->txstamp_ack;
+		return TCP_SKB_CB(skb)->txstamp_ack & TSTAMP_ACK_SK;
 	}
 
 	return false;
@@ -5575,6 +5575,9 @@  static void skb_tstamp_tx_report_bpf_timestamping(struct sk_buff *skb,
 			op = BPF_SOCK_OPS_TS_SW_OPT_CB;
 		}
 		break;
+	case SCM_TSTAMP_ACK:
+		op = BPF_SOCK_OPS_TS_ACK_OPT_CB;
+		break;
 	default:
 		return;
 	}
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 291ab1b4acc4..794fe553dd77 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -897,7 +897,7 @@  static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
 {
 	struct dsa_switch *ds = p->dp->ds;
 
-	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NOBPF))
 		return;
 
 	if (!ds->ops->port_txtstamp)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d704bda6c41..12b9c4f9c151 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -488,7 +488,7 @@  static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
 
 		sock_tx_timestamp(sk, sockc, &shinfo->tx_flags);
 		if (tsflags & SOF_TIMESTAMPING_TX_ACK)
-			tcb->txstamp_ack = 1;
+			tcb->txstamp_ack |= TSTAMP_ACK_SK;
 		if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
 			shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
 	}
diff --git a/net/socket.c b/net/socket.c
index 262a28b59c7f..517de433d4bb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -676,7 +676,7 @@  void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
 	u8 flags = *tx_flags;
 
 	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
-		flags |= SKBTX_HW_TSTAMP;
+		flags |= SKBTX_HW_TSTAMP_NOBPF;
 
 		/* PTP hardware clocks can provide a free running cycle counter
 		 * as a time base for virtual clocks. Tell driver to use the
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7b9652ce7e3c..d3e2988b3b4c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -7037,6 +7037,11 @@  enum {
 					 * SK_BPF_CB_TX_TIMESTAMPING feature
 					 * is on.
 					 */
+	BPF_SOCK_OPS_TS_ACK_OPT_CB,	/* Called when all the skbs in the
+					 * same sendmsg call are acked
+					 * when SK_BPF_CB_TX_TIMESTAMPING
+					 * feature is on.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect