diff mbox series

[net-next,5/9] net-timestamp: ready to turn on the button to generate tx timestamps

Message ID 20241008095109.99918-6-kerneljasonxing@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 206 this patch: 206
netdev/build_tools success Errors and warnings before: 2 (+1) this patch: 2 (+1)
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang success Errors and warnings before: 257 this patch: 257
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6961 this patch: 6961
netdev/checkpatch warning WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing Oct. 8, 2024, 9:51 a.m. UTC
From: Jason Xing <kernelxing@tencent.com>

Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
are three points in the previous patches where generating timestamps
works. Let us make the basic bpf mechanism for timestamping feature
 work finally.

We can use like this as a simple example in bpf program:
__section("sockops")

case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
	dport = bpf_ntohl(skops->remote_port);
	sport = skops->local_port;
	skops->reply = SOF_TIMESTAMPING_TX_SCHED;
	bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
	bpf_printk(...);

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/uapi/linux/bpf.h       |  8 ++++++++
 net/ipv4/tcp.c                 | 27 ++++++++++++++++++++++++++-
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

Comments

Willem de Bruijn Oct. 8, 2024, 6:53 p.m. UTC | #1
Jason Xing wrote:
> From: Jason Xing <kernelxing@tencent.com>
> 
> Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
> are three points in the previous patches where generating timestamps
> works. Let us make the basic bpf mechanism for timestamping feature
>  work finally.
> 
> We can use like this as a simple example in bpf program:
> __section("sockops")
> 
> case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
> 	dport = bpf_ntohl(skops->remote_port);
> 	sport = skops->local_port;
> 	skops->reply = SOF_TIMESTAMPING_TX_SCHED;
> 	bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
> case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> 	bpf_printk(...);
> 
> Signed-off-by: Jason Xing <kernelxing@tencent.com>

>  /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 82cc4a5633ce..ddf4089779b5 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -477,12 +477,37 @@ void tcp_init_sock(struct sock *sk)
>  }
>  EXPORT_SYMBOL(tcp_init_sock);
>  
> +static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> +{
> +	u32 flags;
> +
> +	flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
> +	if (flags <= 0)
> +		return 0;
> +
> +	if (flags & ~SOF_TIMESTAMPING_MASK)
> +		return 0;
> +
> +	if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> +		return 0;
> +
> +	return flags;
> +}
> +
>  static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
>  {
>  	struct sk_buff *skb = tcp_write_queue_tail(sk);
>  	u32 tsflags = sockc->tsflags;
> +	u32 flags;
> +
> +	if (!skb)
> +		return;
> +
> +	flags = bpf_tcp_tx_timestamp(sk);
> +	if (flags)
> +		tsflags = flags;

So this feature overwrites the flags set by the user?

Ideally we would use an entirely separate field for BPF admin
timestamping requests.

>  
> -	if (tsflags && skb) {
> +	if (tsflags) {
>  		struct skb_shared_info *shinfo = skb_shinfo(skb);
>  		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
Vadim Fedorenko Oct. 8, 2024, 7:18 p.m. UTC | #2
On 08/10/2024 10:51, Jason Xing wrote:
> From: Jason Xing <kernelxing@tencent.com>
> 
> Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
> are three points in the previous patches where generating timestamps
> works. Let us make the basic bpf mechanism for timestamping feature
>   work finally.
> 
> We can use like this as a simple example in bpf program:
> __section("sockops")
> 
> case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
> 	dport = bpf_ntohl(skops->remote_port);
> 	sport = skops->local_port;
> 	skops->reply = SOF_TIMESTAMPING_TX_SCHED;
> 	bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
> case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> 	bpf_printk(...);
> 
> Signed-off-by: Jason Xing <kernelxing@tencent.com>
> ---
>   include/uapi/linux/bpf.h       |  8 ++++++++
>   net/ipv4/tcp.c                 | 27 ++++++++++++++++++++++++++-
>   tools/include/uapi/linux/bpf.h |  8 ++++++++
>   3 files changed, 42 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 1b478ec18ac2..6bf3f2892776 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -7034,6 +7034,14 @@ enum {
>   					 * feature is on. It indicates the
>   					 * recorded timestamp.
>   					 */
> +	BPF_SOCK_OPS_TX_TS_OPT_CB,	/* Called when the last skb from
> +					 * sendmsg is going to push when
> +					 * SO_TIMESTAMPING feature is on.
> +					 * Let user have a chance to switch
> +					 * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
> +					 * flag for other three tx timestamp
> +					 * use.
> +					 */
>   };
>   
>   /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 82cc4a5633ce..ddf4089779b5 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -477,12 +477,37 @@ void tcp_init_sock(struct sock *sk)
>   }
>   EXPORT_SYMBOL(tcp_init_sock);
>   
> +static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> +{
> +	u32 flags;
> +
> +	flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
> +	if (flags <= 0)
> +		return 0;
> +
> +	if (flags & ~SOF_TIMESTAMPING_MASK)
> +		return 0;
> +
> +	if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> +		return 0;
> +
> +	return flags;
> +}
> +
>   static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
>   {
>   	struct sk_buff *skb = tcp_write_queue_tail(sk);
>   	u32 tsflags = sockc->tsflags;
> +	u32 flags;
> +
> +	if (!skb)
> +		return;
> +
> +	flags = bpf_tcp_tx_timestamp(sk);
> +	if (flags)
> +		tsflags = flags;

In this case it's impossible to clear timestamping flags from bpf
program, but it may be very useful. Consider providing flags from
socket cookie to the program or maybe add an option to combine them?

>   
> -	if (tsflags && skb) {
> +	if (tsflags) {
>   		struct skb_shared_info *shinfo = skb_shinfo(skb);
>   		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>   
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index fc9b94de19f2..d3bf538846da 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -7033,6 +7033,14 @@ enum {
>   					 * feature is on. It indicates the
>   					 * recorded timestamp.
>   					 */
> +	BPF_SOCK_OPS_TX_TS_OPT_CB,	/* Called when the last skb from
> +					 * sendmsg is going to push when
> +					 * SO_TIMESTAMPING feature is on.
> +					 * Let user have a chance to switch
> +					 * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
> +					 * flag for other three tx timestamp
> +					 * use.
> +					 */
>   };
>   
>   /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
Jason Xing Oct. 8, 2024, 11:37 p.m. UTC | #3
On Wed, Oct 9, 2024 at 2:53 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Jason Xing wrote:
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
> > are three points in the previous patches where generating timestamps
> > works. Let us make the basic bpf mechanism for timestamping feature
> >  work finally.
> >
> > We can use like this as a simple example in bpf program:
> > __section("sockops")
> >
> > case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
> >       dport = bpf_ntohl(skops->remote_port);
> >       sport = skops->local_port;
> >       skops->reply = SOF_TIMESTAMPING_TX_SCHED;
> >       bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
> > case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> >       bpf_printk(...);
> >
> > Signed-off-by: Jason Xing <kernelxing@tencent.com>
>
> >  /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 82cc4a5633ce..ddf4089779b5 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -477,12 +477,37 @@ void tcp_init_sock(struct sock *sk)
> >  }
> >  EXPORT_SYMBOL(tcp_init_sock);
> >
> > +static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> > +{
> > +     u32 flags;
> > +
> > +     flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
> > +     if (flags <= 0)
> > +             return 0;
> > +
> > +     if (flags & ~SOF_TIMESTAMPING_MASK)
> > +             return 0;
> > +
> > +     if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> > +             return 0;
> > +
> > +     return flags;
> > +}
> > +
> >  static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
> >  {
> >       struct sk_buff *skb = tcp_write_queue_tail(sk);
> >       u32 tsflags = sockc->tsflags;
> > +     u32 flags;
> > +
> > +     if (!skb)
> > +             return;
> > +
> > +     flags = bpf_tcp_tx_timestamp(sk);
> > +     if (flags)
> > +             tsflags = flags;
>
> So this feature overwrites the flags set by the user?

It only overrides each last skb instead of the whole socket so that
some time if we don't want to use this bpf program any more, we could
easily and directly detach it without having to find a proper time to
clear the fields in struct sock. That's the advantage of setting
through each sendmsg call, compared to bpf_setsockopt method.

> Ideally we would use an entirely separate field for BPF admin
> timestamping requests.

I understand what you mean. I'm not that familiar with how a bpf
extension actually implements, so I dug into how RTO min time can be
affected by bpf programs (see BPF_SOCK_OPS_TIMEOUT_INIT as an
example). It also modifies the existing field.

Thanks,
Jason
Jason Xing Oct. 8, 2024, 11:48 p.m. UTC | #4
On Wed, Oct 9, 2024 at 3:18 AM Vadim Fedorenko
<vadim.fedorenko@linux.dev> wrote:
>
> On 08/10/2024 10:51, Jason Xing wrote:
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
> > are three points in the previous patches where generating timestamps
> > works. Let us make the basic bpf mechanism for timestamping feature
> >   work finally.
> >
> > We can use like this as a simple example in bpf program:
> > __section("sockops")
> >
> > case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
> >       dport = bpf_ntohl(skops->remote_port);
> >       sport = skops->local_port;
> >       skops->reply = SOF_TIMESTAMPING_TX_SCHED;
> >       bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
> > case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> >       bpf_printk(...);
> >
> > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > ---
> >   include/uapi/linux/bpf.h       |  8 ++++++++
> >   net/ipv4/tcp.c                 | 27 ++++++++++++++++++++++++++-
> >   tools/include/uapi/linux/bpf.h |  8 ++++++++
> >   3 files changed, 42 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 1b478ec18ac2..6bf3f2892776 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -7034,6 +7034,14 @@ enum {
> >                                        * feature is on. It indicates the
> >                                        * recorded timestamp.
> >                                        */
> > +     BPF_SOCK_OPS_TX_TS_OPT_CB,      /* Called when the last skb from
> > +                                      * sendmsg is going to push when
> > +                                      * SO_TIMESTAMPING feature is on.
> > +                                      * Let user have a chance to switch
> > +                                      * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
> > +                                      * flag for other three tx timestamp
> > +                                      * use.
> > +                                      */
> >   };
> >
> >   /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 82cc4a5633ce..ddf4089779b5 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -477,12 +477,37 @@ void tcp_init_sock(struct sock *sk)
> >   }
> >   EXPORT_SYMBOL(tcp_init_sock);
> >
> > +static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> > +{
> > +     u32 flags;
> > +
> > +     flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
> > +     if (flags <= 0)
> > +             return 0;
> > +
> > +     if (flags & ~SOF_TIMESTAMPING_MASK)
> > +             return 0;
> > +
> > +     if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> > +             return 0;
> > +
> > +     return flags;
> > +}
> > +
> >   static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
> >   {
> >       struct sk_buff *skb = tcp_write_queue_tail(sk);
> >       u32 tsflags = sockc->tsflags;
> > +     u32 flags;
> > +
> > +     if (!skb)
> > +             return;
> > +
> > +     flags = bpf_tcp_tx_timestamp(sk);
> > +     if (flags)
> > +             tsflags = flags;
>
> In this case it's impossible to clear timestamping flags from bpf

It cannot be cleared only from the last skb until the next round of
recvmsg. Since the last skb is generated and bpf program is attached,
I would like to know why we need to clear the related fields in the
skb? Please note that I didn't hack the sk_tstflags in struct sock :)

> program, but it may be very useful. Consider providing flags from
> socket cookie to the program or maybe add an option to combine them?

Thanks for this idea. May I ask what the benefits are through adding
an option because the bpf test statement (BPF_SOCK_OPS_TEST_FLAG) is a
good option to take a whole control? Or could you provide more details
about how you expect to do so?

Thanks,
Jason
Vadim Fedorenko Oct. 9, 2024, 9:16 a.m. UTC | #5
On 09/10/2024 00:48, Jason Xing wrote:
> On Wed, Oct 9, 2024 at 3:18 AM Vadim Fedorenko
> <vadim.fedorenko@linux.dev> wrote:
>>
>> On 08/10/2024 10:51, Jason Xing wrote:
>>> From: Jason Xing <kernelxing@tencent.com>
>>>
>>> Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
>>> are three points in the previous patches where generating timestamps
>>> works. Let us make the basic bpf mechanism for timestamping feature
>>>    work finally.
>>>
>>> We can use like this as a simple example in bpf program:
>>> __section("sockops")
>>>
>>> case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
>>>        dport = bpf_ntohl(skops->remote_port);
>>>        sport = skops->local_port;
>>>        skops->reply = SOF_TIMESTAMPING_TX_SCHED;
>>>        bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
>>> case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
>>>        bpf_printk(...);
>>>
>>> Signed-off-by: Jason Xing <kernelxing@tencent.com>
>>> ---
>>>    include/uapi/linux/bpf.h       |  8 ++++++++
>>>    net/ipv4/tcp.c                 | 27 ++++++++++++++++++++++++++-
>>>    tools/include/uapi/linux/bpf.h |  8 ++++++++
>>>    3 files changed, 42 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>> index 1b478ec18ac2..6bf3f2892776 100644
>>> --- a/include/uapi/linux/bpf.h
>>> +++ b/include/uapi/linux/bpf.h
>>> @@ -7034,6 +7034,14 @@ enum {
>>>                                         * feature is on. It indicates the
>>>                                         * recorded timestamp.
>>>                                         */
>>> +     BPF_SOCK_OPS_TX_TS_OPT_CB,      /* Called when the last skb from
>>> +                                      * sendmsg is going to push when
>>> +                                      * SO_TIMESTAMPING feature is on.
>>> +                                      * Let user have a chance to switch
>>> +                                      * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
>>> +                                      * flag for other three tx timestamp
>>> +                                      * use.
>>> +                                      */
>>>    };
>>>
>>>    /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
>>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>>> index 82cc4a5633ce..ddf4089779b5 100644
>>> --- a/net/ipv4/tcp.c
>>> +++ b/net/ipv4/tcp.c
>>> @@ -477,12 +477,37 @@ void tcp_init_sock(struct sock *sk)
>>>    }
>>>    EXPORT_SYMBOL(tcp_init_sock);
>>>
>>> +static u32 bpf_tcp_tx_timestamp(struct sock *sk)
>>> +{
>>> +     u32 flags;
>>> +
>>> +     flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
>>> +     if (flags <= 0)
>>> +             return 0;
>>> +
>>> +     if (flags & ~SOF_TIMESTAMPING_MASK)
>>> +             return 0;
>>> +
>>> +     if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
>>> +             return 0;
>>> +
>>> +     return flags;
>>> +}
>>> +
>>>    static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
>>>    {
>>>        struct sk_buff *skb = tcp_write_queue_tail(sk);
>>>        u32 tsflags = sockc->tsflags;
>>> +     u32 flags;
>>> +
>>> +     if (!skb)
>>> +             return;
>>> +
>>> +     flags = bpf_tcp_tx_timestamp(sk);
>>> +     if (flags)
>>> +             tsflags = flags;
>>
>> In this case it's impossible to clear timestamping flags from bpf
> 
> It cannot be cleared only from the last skb until the next round of
> recvmsg. Since the last skb is generated and bpf program is attached,
> I would like to know why we need to clear the related fields in the
> skb? Please note that I didn't hack the sk_tstflags in struct sock :)

>> program, but it may be very useful. Consider providing flags from
>> socket cookie to the program or maybe add an option to combine them?
> 
> Thanks for this idea. May I ask what the benefits are through adding
> an option because the bpf test statement (BPF_SOCK_OPS_TEST_FLAG) is a
> good option to take a whole control? Or could you provide more details
> about how you expect to do so?

Well, as Willem mentioned, you are overriding flags completely. But what
if an application is waiting for some type of timestamp to arrive, but
bpf program rewrites flags and disables this type of timestamp? It will
confuse application.

Thinking twice, clearing flags might not be useful because of the very
same issue though.


> 
> Thanks,
> Jason
Jason Xing Oct. 9, 2024, 11:15 a.m. UTC | #6
On Wed, Oct 9, 2024 at 5:17 PM Vadim Fedorenko
<vadim.fedorenko@linux.dev> wrote:
>
> On 09/10/2024 00:48, Jason Xing wrote:
> > On Wed, Oct 9, 2024 at 3:18 AM Vadim Fedorenko
> > <vadim.fedorenko@linux.dev> wrote:
> >>
> >> On 08/10/2024 10:51, Jason Xing wrote:
> >>> From: Jason Xing <kernelxing@tencent.com>
> >>>
> >>> Once we set BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG flag here, there
> >>> are three points in the previous patches where generating timestamps
> >>> works. Let us make the basic bpf mechanism for timestamping feature
> >>>    work finally.
> >>>
> >>> We can use like this as a simple example in bpf program:
> >>> __section("sockops")
> >>>
> >>> case BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB:
> >>>        dport = bpf_ntohl(skops->remote_port);
> >>>        sport = skops->local_port;
> >>>        skops->reply = SOF_TIMESTAMPING_TX_SCHED;
> >>>        bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_TX_TIMESTAMP_OPT_CB_FLAG);
> >>> case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> >>>        bpf_printk(...);
> >>>
> >>> Signed-off-by: Jason Xing <kernelxing@tencent.com>
> >>> ---
> >>>    include/uapi/linux/bpf.h       |  8 ++++++++
> >>>    net/ipv4/tcp.c                 | 27 ++++++++++++++++++++++++++-
> >>>    tools/include/uapi/linux/bpf.h |  8 ++++++++
> >>>    3 files changed, 42 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >>> index 1b478ec18ac2..6bf3f2892776 100644
> >>> --- a/include/uapi/linux/bpf.h
> >>> +++ b/include/uapi/linux/bpf.h
> >>> @@ -7034,6 +7034,14 @@ enum {
> >>>                                         * feature is on. It indicates the
> >>>                                         * recorded timestamp.
> >>>                                         */
> >>> +     BPF_SOCK_OPS_TX_TS_OPT_CB,      /* Called when the last skb from
> >>> +                                      * sendmsg is going to push when
> >>> +                                      * SO_TIMESTAMPING feature is on.
> >>> +                                      * Let user have a chance to switch
> >>> +                                      * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
> >>> +                                      * flag for other three tx timestamp
> >>> +                                      * use.
> >>> +                                      */
> >>>    };
> >>>
> >>>    /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> >>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> >>> index 82cc4a5633ce..ddf4089779b5 100644
> >>> --- a/net/ipv4/tcp.c
> >>> +++ b/net/ipv4/tcp.c
> >>> @@ -477,12 +477,37 @@ void tcp_init_sock(struct sock *sk)
> >>>    }
> >>>    EXPORT_SYMBOL(tcp_init_sock);
> >>>
> >>> +static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> >>> +{
> >>> +     u32 flags;
> >>> +
> >>> +     flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
> >>> +     if (flags <= 0)
> >>> +             return 0;
> >>> +
> >>> +     if (flags & ~SOF_TIMESTAMPING_MASK)
> >>> +             return 0;
> >>> +
> >>> +     if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> >>> +             return 0;
> >>> +
> >>> +     return flags;
> >>> +}
> >>> +
> >>>    static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
> >>>    {
> >>>        struct sk_buff *skb = tcp_write_queue_tail(sk);
> >>>        u32 tsflags = sockc->tsflags;
> >>> +     u32 flags;
> >>> +
> >>> +     if (!skb)
> >>> +             return;
> >>> +
> >>> +     flags = bpf_tcp_tx_timestamp(sk);
> >>> +     if (flags)
> >>> +             tsflags = flags;
> >>
> >> In this case it's impossible to clear timestamping flags from bpf
> >
> > It cannot be cleared only from the last skb until the next round of
> > recvmsg. Since the last skb is generated and bpf program is attached,
> > I would like to know why we need to clear the related fields in the
> > skb? Please note that I didn't hack the sk_tstflags in struct sock :)
>
> >> program, but it may be very useful. Consider providing flags from
> >> socket cookie to the program or maybe add an option to combine them?
> >
> > Thanks for this idea. May I ask what the benefits are through adding
> > an option because the bpf test statement (BPF_SOCK_OPS_TEST_FLAG) is a
> > good option to take a whole control? Or could you provide more details
> > about how you expect to do so?
>
> Well, as Willem mentioned, you are overriding flags completely. But what
> if an application is waiting for some type of timestamp to arrive, but
> bpf program rewrites flags and disables this type of timestamp? It will
> confuse application.

Indeed, this series doesn't handle the conflict very well. Initially,
I tried so hard to avoid implementing the feature again. But now, it
seems inevitable. Let me dig into it more.

>
> Thinking twice, clearing flags might not be useful because of the very
> same issue though.

Yes.

Thanks,
Jason
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1b478ec18ac2..6bf3f2892776 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7034,6 +7034,14 @@  enum {
 					 * feature is on. It indicates the
 					 * recorded timestamp.
 					 */
+	BPF_SOCK_OPS_TX_TS_OPT_CB,	/* Called when the last skb from
+					 * sendmsg is going to push when
+					 * SO_TIMESTAMPING feature is on.
+					 * Let user have a chance to switch
+					 * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
+					 * flag for other three tx timestamp
+					 * use.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 82cc4a5633ce..ddf4089779b5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -477,12 +477,37 @@  void tcp_init_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
+static u32 bpf_tcp_tx_timestamp(struct sock *sk)
+{
+	u32 flags;
+
+	flags = tcp_call_bpf(sk, BPF_SOCK_OPS_TX_TS_OPT_CB, 0, NULL);
+	if (flags <= 0)
+		return 0;
+
+	if (flags & ~SOF_TIMESTAMPING_MASK)
+		return 0;
+
+	if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
+		return 0;
+
+	return flags;
+}
+
 static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
 {
 	struct sk_buff *skb = tcp_write_queue_tail(sk);
 	u32 tsflags = sockc->tsflags;
+	u32 flags;
+
+	if (!skb)
+		return;
+
+	flags = bpf_tcp_tx_timestamp(sk);
+	if (flags)
+		tsflags = flags;
 
-	if (tsflags && skb) {
+	if (tsflags) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fc9b94de19f2..d3bf538846da 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -7033,6 +7033,14 @@  enum {
 					 * feature is on. It indicates the
 					 * recorded timestamp.
 					 */
+	BPF_SOCK_OPS_TX_TS_OPT_CB,	/* Called when the last skb from
+					 * sendmsg is going to push when
+					 * SO_TIMESTAMPING feature is on.
+					 * Let user have a chance to switch
+					 * on BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG
+					 * flag for other three tx timestamp
+					 * use.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect