diff mbox series

[RFC,net-next,v6,12/13] net-timestamp: introduce cgroup lock to avoid affecting non-bpf cases

Message ID 20250121012901.87763-13-kerneljasonxing@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1 this patch: 1
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 42 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 57 this patch: 57
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing Jan. 21, 2025, 1:29 a.m. UTC
Introducing the lock to avoid affecting the applications which
are not using timestamping bpf feature.

Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
---
 net/core/skbuff.c     | 6 ++++--
 net/ipv4/tcp.c        | 3 ++-
 net/ipv4/tcp_input.c  | 3 ++-
 net/ipv4/tcp_output.c | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

Comments

Martin KaFai Lau Jan. 25, 2025, 1:09 a.m. UTC | #1
On 1/20/25 5:29 PM, Jason Xing wrote:
> Introducing the lock to avoid affecting the applications which
> are not using timestamping bpf feature.
> 
> Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> ---
>   net/core/skbuff.c     | 6 ++++--
>   net/ipv4/tcp.c        | 3 ++-
>   net/ipv4/tcp_input.c  | 3 ++-
>   net/ipv4/tcp_output.c | 3 ++-
>   4 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 33340e0b094f..db5b4b653351 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -5605,11 +5605,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
>   		return;
>   
>   	/* bpf extension feature entry */
> -	if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
> +	if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&

I wonder if it is really needed. The caller has just tested the tx_flags.

> +	    skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
>   		skb_tstamp_tx_bpf(orig_skb, sk, tstype, sw, hwtstamps);
>   
>   	/* application feature entry */
> -	if (!skb_enable_app_tstamp(orig_skb, tstype, sw))
> +	if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&

Same here and this one looks wrong also. The userspace may get something 
unexpected in the err queue. The bpf prog may have already detached here after 
setting the SKBTX_BPF earlier.

> +	    !skb_enable_app_tstamp(orig_skb, tstype, sw))
>   		return;
>   
>   	tsflags = READ_ONCE(sk->sk_tsflags);
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 49e489c346ea..d88160af00c4 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -493,7 +493,8 @@ static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
>   			shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
>   	}
>   
> -	if (SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) {
> +	if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&

This looks ok considering SK_BPF_CB_FLAG_TEST may get to another cacheline.

> +	    SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) {
>   		struct skb_shared_info *shinfo = skb_shinfo(skb);
>   		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
>   
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index c8945f5be31b..e30607ba41e5 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3324,7 +3324,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
>   
>   	/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
>   	if (likely(!TCP_SKB_CB(skb)->txstamp_ack &&
> -		   !TCP_SKB_CB(skb)->txstamp_ack_bpf))
> +		   !(cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&

Same here. txtstamp_ack has just been tested.... txstamp_ack_bpf is the next bit.

> +		     TCP_SKB_CB(skb)->txstamp_ack_bpf)))
>   		return;
>   
>   	shinfo = skb_shinfo(skb);
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index fc84ca669b76..483f19c2083e 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1556,7 +1556,8 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
>   static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
>   {
>   	return TCP_SKB_CB(skb)->txstamp_ack ||
> -	       TCP_SKB_CB(skb)->txstamp_ack_bpf ||
> +	       (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&

Same here.

> +		TCP_SKB_CB(skb)->txstamp_ack_bpf) ||
>   		(skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
>   }
>
Jason Xing Jan. 25, 2025, 1:25 a.m. UTC | #2
On Sat, Jan 25, 2025 at 9:09 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/20/25 5:29 PM, Jason Xing wrote:
> > Introducing the lock to avoid affecting the applications which
> > are not using timestamping bpf feature.
> >
> > Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> > ---
> >   net/core/skbuff.c     | 6 ++++--
> >   net/ipv4/tcp.c        | 3 ++-
> >   net/ipv4/tcp_input.c  | 3 ++-
> >   net/ipv4/tcp_output.c | 3 ++-
> >   4 files changed, 10 insertions(+), 5 deletions(-)
> >
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index 33340e0b094f..db5b4b653351 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -5605,11 +5605,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
> >               return;
> >
> >       /* bpf extension feature entry */
> > -     if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
> > +     if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
>
> I wonder if it is really needed. The caller has just tested the tx_flags.
>
> > +         skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
> >               skb_tstamp_tx_bpf(orig_skb, sk, tstype, sw, hwtstamps);
> >
> >       /* application feature entry */
> > -     if (!skb_enable_app_tstamp(orig_skb, tstype, sw))
> > +     if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
>
> Same here and this one looks wrong also. The userspace may get something
> unexpected in the err queue. The bpf prog may have already detached here after
> setting the SKBTX_BPF earlier.

Oh, thanks for spotting this case.

>
> > +         !skb_enable_app_tstamp(orig_skb, tstype, sw))
> >               return;
> >
> >       tsflags = READ_ONCE(sk->sk_tsflags);
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 49e489c346ea..d88160af00c4 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -493,7 +493,8 @@ static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
> >                       shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
> >       }
> >
> > -     if (SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) {
> > +     if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
>
> This looks ok considering SK_BPF_CB_FLAG_TEST may get to another cacheline.
>
> > +         SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) {
> >               struct skb_shared_info *shinfo = skb_shinfo(skb);
> >               struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
> >
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index c8945f5be31b..e30607ba41e5 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -3324,7 +3324,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
> >
> >       /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
> >       if (likely(!TCP_SKB_CB(skb)->txstamp_ack &&
> > -                !TCP_SKB_CB(skb)->txstamp_ack_bpf))
> > +                !(cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
>
> Same here. txtstamp_ack has just been tested.... txstamp_ack_bpf is the next bit.
>
> > +                  TCP_SKB_CB(skb)->txstamp_ack_bpf)))
> >               return;
> >
> >       shinfo = skb_shinfo(skb);
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index fc84ca669b76..483f19c2083e 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -1556,7 +1556,8 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
> >   static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
> >   {
> >       return TCP_SKB_CB(skb)->txstamp_ack ||
> > -            TCP_SKB_CB(skb)->txstamp_ack_bpf ||
> > +            (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
>
> Same here.

I thought the cgroup_bpf_enabled which nearly doesn't consume any
resources can protect the timestamping use everywhere. I have no
strong preference. I will remove them as you suggested.

Thanks,
Jason

>
> > +             TCP_SKB_CB(skb)->txstamp_ack_bpf) ||
> >               (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
> >   }
> >
>
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 33340e0b094f..db5b4b653351 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5605,11 +5605,13 @@  void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		return;
 
 	/* bpf extension feature entry */
-	if (skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
+	if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
+	    skb_shinfo(orig_skb)->tx_flags & SKBTX_BPF)
 		skb_tstamp_tx_bpf(orig_skb, sk, tstype, sw, hwtstamps);
 
 	/* application feature entry */
-	if (!skb_enable_app_tstamp(orig_skb, tstype, sw))
+	if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
+	    !skb_enable_app_tstamp(orig_skb, tstype, sw))
 		return;
 
 	tsflags = READ_ONCE(sk->sk_tsflags);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 49e489c346ea..d88160af00c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -493,7 +493,8 @@  static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
 			shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
 	}
 
-	if (SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) {
+	if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
+	    SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c8945f5be31b..e30607ba41e5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3324,7 +3324,8 @@  static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
 
 	/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
 	if (likely(!TCP_SKB_CB(skb)->txstamp_ack &&
-		   !TCP_SKB_CB(skb)->txstamp_ack_bpf))
+		   !(cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
+		     TCP_SKB_CB(skb)->txstamp_ack_bpf)))
 		return;
 
 	shinfo = skb_shinfo(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fc84ca669b76..483f19c2083e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1556,7 +1556,8 @@  static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
 static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
 {
 	return TCP_SKB_CB(skb)->txstamp_ack ||
-	       TCP_SKB_CB(skb)->txstamp_ack_bpf ||
+	       (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
+		TCP_SKB_CB(skb)->txstamp_ack_bpf) ||
 		(skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
 }