diff mbox series

[RFC,net-next,v6,08/13] net-timestamp: support hw SCM_TSTAMP_SND for bpf extension

Message ID 20250121012901.87763-9-kerneljasonxing@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 194 this patch: 194
netdev/build_tools success Errors and warnings before: 2 (+1) this patch: 2 (+1)
netdev/cc_maintainers warning 2 maintainers not CCed: olteanv@gmail.com andrew@lunn.ch
netdev/build_clang success Errors and warnings before: 8877 this patch: 8877
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6947 this patch: 6947
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 90 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 128 this patch: 128
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing Jan. 21, 2025, 1:28 a.m. UTC
In this patch, we finish the hardware part. Then bpf program can
fetch the hwstamp from skb directly.

To avoid changing so many callers using SKBTX_HW_TSTAMP from drivers,
use this simple modification like this patch does to support printing
hardware timestamp.

Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
---
 include/linux/skbuff.h         |  4 +++-
 include/uapi/linux/bpf.h       |  5 +++++
 net/core/skbuff.c              | 11 ++++++-----
 net/dsa/user.c                 |  2 +-
 net/socket.c                   |  2 +-
 tools/include/uapi/linux/bpf.h |  5 +++++
 6 files changed, 21 insertions(+), 8 deletions(-)

Comments

Martin KaFai Lau Jan. 25, 2025, 12:46 a.m. UTC | #1
On 1/20/25 5:28 PM, Jason Xing wrote:
> In this patch, we finish the hardware part. Then bpf program can
> fetch the hwstamp from skb directly.
> 
> To avoid changing so many callers using SKBTX_HW_TSTAMP from drivers,
> use this simple modification like this patch does to support printing
> hardware timestamp.
> 
> Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> ---
>   include/linux/skbuff.h         |  4 +++-
>   include/uapi/linux/bpf.h       |  5 +++++
>   net/core/skbuff.c              | 11 ++++++-----
>   net/dsa/user.c                 |  2 +-
>   net/socket.c                   |  2 +-
>   tools/include/uapi/linux/bpf.h |  5 +++++
>   6 files changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index de8d3bd311f5..df2d790ae36b 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -471,7 +471,7 @@ struct skb_shared_hwtstamps {
>   /* Definitions for tx_flags in struct skb_shared_info */
>   enum {
>   	/* generate hardware time stamp */
> -	SKBTX_HW_TSTAMP = 1 << 0,
> +	__SKBTX_HW_TSTAMP = 1 << 0,
>   
>   	/* generate software time stamp when queueing packet to NIC */
>   	SKBTX_SW_TSTAMP = 1 << 1,
> @@ -495,6 +495,8 @@ enum {
>   	SKBTX_BPF = 1 << 7,
>   };
>   
> +#define SKBTX_HW_TSTAMP		(__SKBTX_HW_TSTAMP | SKBTX_BPF)
> +
>   #define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
>   				 SKBTX_SCHED_TSTAMP | \
>   				 SKBTX_BPF)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index a6d761f07f67..8936e1061e71 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -7032,6 +7032,11 @@ enum {
>   					 * feature is on. It indicates the
>   					 * recorded timestamp.
>   					 */
> +	BPF_SOCK_OPS_TS_HW_OPT_CB,	/* Called in hardware phase when
> +					 * SO_TIMESTAMPING feature is on.

Same comment on the "SO_TIMESTAMPING".

It will be useful to elaborate more on "hardware phase", like exactly when it 
will be called,

> +					 * It indicates the recorded
> +					 * timestamp.

and the hwtstamps will be in the skb.

> +					 */
>   };
>   
>   /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 288eb9869827..c769feae5162 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -5548,7 +5548,7 @@ static bool skb_enable_app_tstamp(struct sk_buff *skb, int tstype, bool sw)
>   		flag = SKBTX_SCHED_TSTAMP;
>   		break;
>   	case SCM_TSTAMP_SND:
> -		flag = sw ? SKBTX_SW_TSTAMP : SKBTX_HW_TSTAMP;
> +		flag = sw ? SKBTX_SW_TSTAMP : __SKBTX_HW_TSTAMP;
>   		break;
>   	case SCM_TSTAMP_ACK:
>   		if (TCP_SKB_CB(skb)->txstamp_ack)
> @@ -5565,7 +5565,8 @@ static bool skb_enable_app_tstamp(struct sk_buff *skb, int tstype, bool sw)
>   }
>   
>   static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
> -			      int tstype, bool sw)
> +			      int tstype, bool sw,
> +			      struct skb_shared_hwtstamps *hwtstamps)
>   {
>   	int op;
>   
> @@ -5577,9 +5578,9 @@ static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
>   		op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
>   		break;
>   	case SCM_TSTAMP_SND:
> +		op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
>   		if (!sw)
> -			return;
> -		op = BPF_SOCK_OPS_TS_SW_OPT_CB;
> +			*skb_hwtstamps(skb) = *hwtstamps;

hwtstamps may still be NULL, no?
Jason Xing Jan. 25, 2025, 1:18 a.m. UTC | #2
On Sat, Jan 25, 2025 at 8:47 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/20/25 5:28 PM, Jason Xing wrote:
> > In this patch, we finish the hardware part. Then bpf program can
> > fetch the hwstamp from skb directly.
> >
> > To avoid changing so many callers using SKBTX_HW_TSTAMP from drivers,
> > use this simple modification like this patch does to support printing
> > hardware timestamp.
> >
> > Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> > ---
> >   include/linux/skbuff.h         |  4 +++-
> >   include/uapi/linux/bpf.h       |  5 +++++
> >   net/core/skbuff.c              | 11 ++++++-----
> >   net/dsa/user.c                 |  2 +-
> >   net/socket.c                   |  2 +-
> >   tools/include/uapi/linux/bpf.h |  5 +++++
> >   6 files changed, 21 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index de8d3bd311f5..df2d790ae36b 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -471,7 +471,7 @@ struct skb_shared_hwtstamps {
> >   /* Definitions for tx_flags in struct skb_shared_info */
> >   enum {
> >       /* generate hardware time stamp */
> > -     SKBTX_HW_TSTAMP = 1 << 0,
> > +     __SKBTX_HW_TSTAMP = 1 << 0,
> >
> >       /* generate software time stamp when queueing packet to NIC */
> >       SKBTX_SW_TSTAMP = 1 << 1,
> > @@ -495,6 +495,8 @@ enum {
> >       SKBTX_BPF = 1 << 7,
> >   };
> >
> > +#define SKBTX_HW_TSTAMP              (__SKBTX_HW_TSTAMP | SKBTX_BPF)
> > +
> >   #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP    | \
> >                                SKBTX_SCHED_TSTAMP | \
> >                                SKBTX_BPF)
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index a6d761f07f67..8936e1061e71 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -7032,6 +7032,11 @@ enum {
> >                                        * feature is on. It indicates the
> >                                        * recorded timestamp.
> >                                        */
> > +     BPF_SOCK_OPS_TS_HW_OPT_CB,      /* Called in hardware phase when
> > +                                      * SO_TIMESTAMPING feature is on.
>
> Same comment on the "SO_TIMESTAMPING".
>
> It will be useful to elaborate more on "hardware phase", like exactly when it
> will be called,

Got it.

>
> > +                                      * It indicates the recorded
> > +                                      * timestamp.
>
> and the hwtstamps will be in the skb.

Right.

>
> > +                                      */
> >   };
> >
> >   /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index 288eb9869827..c769feae5162 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -5548,7 +5548,7 @@ static bool skb_enable_app_tstamp(struct sk_buff *skb, int tstype, bool sw)
> >               flag = SKBTX_SCHED_TSTAMP;
> >               break;
> >       case SCM_TSTAMP_SND:
> > -             flag = sw ? SKBTX_SW_TSTAMP : SKBTX_HW_TSTAMP;
> > +             flag = sw ? SKBTX_SW_TSTAMP : __SKBTX_HW_TSTAMP;
> >               break;
> >       case SCM_TSTAMP_ACK:
> >               if (TCP_SKB_CB(skb)->txstamp_ack)
> > @@ -5565,7 +5565,8 @@ static bool skb_enable_app_tstamp(struct sk_buff *skb, int tstype, bool sw)
> >   }
> >
> >   static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
> > -                           int tstype, bool sw)
> > +                           int tstype, bool sw,
> > +                           struct skb_shared_hwtstamps *hwtstamps)
> >   {
> >       int op;
> >
> > @@ -5577,9 +5578,9 @@ static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
> >               op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
> >               break;
> >       case SCM_TSTAMP_SND:
> > +             op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
> >               if (!sw)
> > -                     return;
> > -             op = BPF_SOCK_OPS_TS_SW_OPT_CB;
> > +                     *skb_hwtstamps(skb) = *hwtstamps;
>
> hwtstamps may still be NULL, no?

Right, it can be zero if something wrong happens.

Thanks,
Jason
Martin KaFai Lau Jan. 25, 2025, 1:29 a.m. UTC | #3
On 1/24/25 5:18 PM, Jason Xing wrote:
>>> @@ -5577,9 +5578,9 @@ static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
>>>                op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
>>>                break;
>>>        case SCM_TSTAMP_SND:
>>> +             op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
>>>                if (!sw)
>>> -                     return;
>>> -             op = BPF_SOCK_OPS_TS_SW_OPT_CB;
>>> +                     *skb_hwtstamps(skb) = *hwtstamps;
>> hwtstamps may still be NULL, no?
> Right, it can be zero if something wrong happens.

Then it needs a NULL check, no?
Jason Xing Jan. 25, 2025, 1:35 a.m. UTC | #4
On Sat, Jan 25, 2025 at 9:30 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/24/25 5:18 PM, Jason Xing wrote:
> >>> @@ -5577,9 +5578,9 @@ static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
> >>>                op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
> >>>                break;
> >>>        case SCM_TSTAMP_SND:
> >>> +             op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
> >>>                if (!sw)
> >>> -                     return;
> >>> -             op = BPF_SOCK_OPS_TS_SW_OPT_CB;
> >>> +                     *skb_hwtstamps(skb) = *hwtstamps;
> >> hwtstamps may still be NULL, no?
> > Right, it can be zero if something wrong happens.
>
> Then it needs a NULL check, no?

My original intention is passing whatever to the userspace, so the bpf
program will be aware of what is happening in the kernel. Passing NULL
to hwstamps is right which will not cause any problem, I think.

Do you mean the default value of hwstamps itself is NULL so in this
case we don't need to re-init it to NULL again?

Like this:
If (*hwtstamps)
     *skb_hwtstamps(skb) = *hwtstamps;

But it looks no different actually.

Thanks,
Jason
Martin KaFai Lau Jan. 25, 2025, 2:36 a.m. UTC | #5
On 1/24/25 5:35 PM, Jason Xing wrote:
> On Sat, Jan 25, 2025 at 9:30 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 1/24/25 5:18 PM, Jason Xing wrote:
>>>>> @@ -5577,9 +5578,9 @@ static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
>>>>>                 op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
>>>>>                 break;
>>>>>         case SCM_TSTAMP_SND:
>>>>> +             op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
>>>>>                 if (!sw)
>>>>> -                     return;
>>>>> -             op = BPF_SOCK_OPS_TS_SW_OPT_CB;
>>>>> +                     *skb_hwtstamps(skb) = *hwtstamps;
>>>> hwtstamps may still be NULL, no?
>>> Right, it can be zero if something wrong happens.
>>
>> Then it needs a NULL check, no?
> 
> My original intention is passing whatever to the userspace, so the bpf
> program will be aware of what is happening in the kernel.

This is fine.

> Passing NULL to hwstamps is right which will not cause any problem, I think.
> 
> Do you mean the default value of hwstamps itself is NULL so in this
> case we don't need to re-init it to NULL again?
> 
> Like this:
> If (*hwtstamps)
   if (hwtstamps) instead ?

I don't know. If hwtstamps is NULL, doing *hwtstamps will be bad and oops....
May be my brain doesn't work well at the end of Friday. Please check.

>       *skb_hwtstamps(skb) = *hwtstamps;
> 
> But it looks no different actually.
> 
> Thanks,
> Jason
Jason Xing Jan. 25, 2025, 2:59 a.m. UTC | #6
On Sat, Jan 25, 2025 at 10:37 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/24/25 5:35 PM, Jason Xing wrote:
> > On Sat, Jan 25, 2025 at 9:30 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
> >>
> >> On 1/24/25 5:18 PM, Jason Xing wrote:
> >>>>> @@ -5577,9 +5578,9 @@ static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
> >>>>>                 op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
> >>>>>                 break;
> >>>>>         case SCM_TSTAMP_SND:
> >>>>> +             op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
> >>>>>                 if (!sw)
> >>>>> -                     return;
> >>>>> -             op = BPF_SOCK_OPS_TS_SW_OPT_CB;
> >>>>> +                     *skb_hwtstamps(skb) = *hwtstamps;
> >>>> hwtstamps may still be NULL, no?
> >>> Right, it can be zero if something wrong happens.
> >>
> >> Then it needs a NULL check, no?
> >
> > My original intention is passing whatever to the userspace, so the bpf
> > program will be aware of what is happening in the kernel.
>
> This is fine.
>
> > Passing NULL to hwstamps is right which will not cause any problem, I think.
> >
> > Do you mean the default value of hwstamps itself is NULL so in this
> > case we don't need to re-init it to NULL again?
> >
> > Like this:
> > If (*hwtstamps)
>    if (hwtstamps) instead ?
>
> I don't know. If hwtstamps is NULL, doing *hwtstamps will be bad and oops....
> May be my brain doesn't work well at the end of Friday. Please check.

Thanks for your effort, Martin!

I will deliberately inject this error case and then see what will happen.

Thanks,
Jason
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index de8d3bd311f5..df2d790ae36b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -471,7 +471,7 @@  struct skb_shared_hwtstamps {
 /* Definitions for tx_flags in struct skb_shared_info */
 enum {
 	/* generate hardware time stamp */
-	SKBTX_HW_TSTAMP = 1 << 0,
+	__SKBTX_HW_TSTAMP = 1 << 0,
 
 	/* generate software time stamp when queueing packet to NIC */
 	SKBTX_SW_TSTAMP = 1 << 1,
@@ -495,6 +495,8 @@  enum {
 	SKBTX_BPF = 1 << 7,
 };
 
+#define SKBTX_HW_TSTAMP		(__SKBTX_HW_TSTAMP | SKBTX_BPF)
+
 #define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
 				 SKBTX_SCHED_TSTAMP | \
 				 SKBTX_BPF)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a6d761f07f67..8936e1061e71 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7032,6 +7032,11 @@  enum {
 					 * feature is on. It indicates the
 					 * recorded timestamp.
 					 */
+	BPF_SOCK_OPS_TS_HW_OPT_CB,	/* Called in hardware phase when
+					 * SO_TIMESTAMPING feature is on.
+					 * It indicates the recorded
+					 * timestamp.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 288eb9869827..c769feae5162 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5548,7 +5548,7 @@  static bool skb_enable_app_tstamp(struct sk_buff *skb, int tstype, bool sw)
 		flag = SKBTX_SCHED_TSTAMP;
 		break;
 	case SCM_TSTAMP_SND:
-		flag = sw ? SKBTX_SW_TSTAMP : SKBTX_HW_TSTAMP;
+		flag = sw ? SKBTX_SW_TSTAMP : __SKBTX_HW_TSTAMP;
 		break;
 	case SCM_TSTAMP_ACK:
 		if (TCP_SKB_CB(skb)->txstamp_ack)
@@ -5565,7 +5565,8 @@  static bool skb_enable_app_tstamp(struct sk_buff *skb, int tstype, bool sw)
 }
 
 static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
-			      int tstype, bool sw)
+			      int tstype, bool sw,
+			      struct skb_shared_hwtstamps *hwtstamps)
 {
 	int op;
 
@@ -5577,9 +5578,9 @@  static void skb_tstamp_tx_bpf(struct sk_buff *skb, struct sock *sk,
 		op = BPF_SOCK_OPS_TS_SCHED_OPT_CB;
 		break;
 	case SCM_TSTAMP_SND:
+		op = sw ? BPF_SOCK_OPS_TS_SW_OPT_CB : BPF_SOCK_OPS_TS_HW_OPT_CB;
 		if (!sw)
-			return;
-		op = BPF_SOCK_OPS_TS_SW_OPT_CB;
+			*skb_hwtstamps(skb) = *hwtstamps;
 		break;
 	default:
 		return;
@@ -5602,7 +5603,7 @@  void __skb_tstamp_tx(struct sk_buff *orig_skb,
 
 	/* bpf extension feature entry */
 	if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
-		skb_tstamp_tx_bpf(orig_skb, sk, tstype, sw);
+		skb_tstamp_tx_bpf(orig_skb, sk, tstype, sw, hwtstamps);
 
 	/* application feature entry */
 	if (!skb_enable_app_tstamp(orig_skb, tstype, sw))
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 291ab1b4acc4..ae715bf0ae75 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -897,7 +897,7 @@  static void dsa_skb_tx_timestamp(struct dsa_user_priv *p,
 {
 	struct dsa_switch *ds = p->dp->ds;
 
-	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+	if (!(skb_shinfo(skb)->tx_flags & __SKBTX_HW_TSTAMP))
 		return;
 
 	if (!ds->ops->port_txtstamp)
diff --git a/net/socket.c b/net/socket.c
index 262a28b59c7f..70eabb510ce6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -676,7 +676,7 @@  void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags)
 	u8 flags = *tx_flags;
 
 	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
-		flags |= SKBTX_HW_TSTAMP;
+		flags |= __SKBTX_HW_TSTAMP;
 
 		/* PTP hardware clocks can provide a free running cycle counter
 		 * as a time base for virtual clocks. Tell driver to use the
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 73fc0a95c9ca..f1583b5814ea 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -7025,6 +7025,11 @@  enum {
 					 * feature is on. It indicates the
 					 * recorded timestamp.
 					 */
+	BPF_SOCK_OPS_TS_HW_OPT_CB,	/* Called in hardware phase when
+					 * SO_TIMESTAMPING feature is on.
+					 * It indicates the recorded
+					 * timestamp.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect