Message ID | 20241008095109.99918-2-kerneljasonxing@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net-timestamp: bpf extension to equip applications transparently | expand |
Jason Xing wrote: > From: Jason Xing <kernelxing@tencent.com> > > Implement basic codes so that we later can easily add each tx points. > Introducing BPF_SOCK_OPS_ALL_CB_FLAGS used as a test statement can help use > control whether to output or not. > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > --- > include/uapi/linux/bpf.h | 5 ++++- > net/core/skbuff.c | 18 ++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 5 ++++- > 3 files changed, 26 insertions(+), 2 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c6cd7c7aeeee..157e139ed6fc 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -6900,8 +6900,11 @@ enum { > * options first before the BPF program does. > */ > BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), > + /* Call bpf when the kernel is generating tx timestamps. > + */ > + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), > /* Mask of all currently supported cb flags */ > - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, > + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, > }; > > /* List of known BPF sock_ops operators. > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index 74149dc4ee31..5ff1a91c1204 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -5539,6 +5539,21 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, > } > EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); > > +static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag, > + struct skb_shared_hwtstamps *hwtstamps) > +{ > + struct tcp_sock *tp; > + > + if (!sk_is_tcp(sk)) > + return false; > + > + tp = tcp_sk(sk); > + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)) > + return true; > + > + return false; > +} > + > void __skb_tstamp_tx(struct sk_buff *orig_skb, > const struct sk_buff *ack_skb, > struct skb_shared_hwtstamps *hwtstamps, > @@ -5551,6 +5566,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, > if (!sk) > return; > > + if (bpf_skb_tstamp_tx(sk, tstype, hwtstamps)) > + return; > + Eventually, this whole feature could probably be behind a static_branch.
On Wed, Oct 9, 2024 at 2:45 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > From: Jason Xing <kernelxing@tencent.com> > > > > Implement basic codes so that we later can easily add each tx points. > > Introducing BPF_SOCK_OPS_ALL_CB_FLAGS used as a test statement can help use > > control whether to output or not. > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > --- > > include/uapi/linux/bpf.h | 5 ++++- > > net/core/skbuff.c | 18 ++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 5 ++++- > > 3 files changed, 26 insertions(+), 2 deletions(-) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index c6cd7c7aeeee..157e139ed6fc 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -6900,8 +6900,11 @@ enum { > > * options first before the BPF program does. > > */ > > BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), > > + /* Call bpf when the kernel is generating tx timestamps. > > + */ > > + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), > > /* Mask of all currently supported cb flags */ > > - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, > > + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, > > }; > > > > /* List of known BPF sock_ops operators. > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > index 74149dc4ee31..5ff1a91c1204 100644 > > --- a/net/core/skbuff.c > > +++ b/net/core/skbuff.c > > @@ -5539,6 +5539,21 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, > > } > > EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); > > > > +static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag, > > + struct skb_shared_hwtstamps *hwtstamps) > > +{ > > + struct tcp_sock *tp; > > + > > + if (!sk_is_tcp(sk)) > > + return false; > > + > > + tp = tcp_sk(sk); > > + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)) > > + return true; > > + > > + return false; > > +} > > + > > void __skb_tstamp_tx(struct sk_buff *orig_skb, > > const struct sk_buff *ack_skb, > > struct skb_shared_hwtstamps *hwtstamps, > > @@ -5551,6 +5566,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, > > if (!sk) > > return; > > > > + if (bpf_skb_tstamp_tx(sk, tstype, hwtstamps)) > > + return; > > + > > Eventually, this whole feature could probably be behind a > static_branch. You want to implement another toggle to control it? But for tx path "BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)" works as a per-netns toggle. I would like to know what you exactly want to do in the next move? Thanks, Jason
From: Jason Xing <kerneljasonxing@gmail.com> Date: Tue, 8 Oct 2024 17:51:01 +0800 > From: Jason Xing <kernelxing@tencent.com> > > Implement basic codes so that we later can easily add each tx points. > Introducing BPF_SOCK_OPS_ALL_CB_FLAGS used as a test statement can help use > control whether to output or not. > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > --- > include/uapi/linux/bpf.h | 5 ++++- > net/core/skbuff.c | 18 ++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 5 ++++- > 3 files changed, 26 insertions(+), 2 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c6cd7c7aeeee..157e139ed6fc 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -6900,8 +6900,11 @@ enum { > * options first before the BPF program does. > */ > BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), > + /* Call bpf when the kernel is generating tx timestamps. > + */ > + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), > /* Mask of all currently supported cb flags */ > - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, > + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, I remember this change makes two selftests fail and needs diff in this link. https://lore.kernel.org/bpf/20231016161134.25365-1-kuniyu@amazon.com/ Also, adding a bpf selftest or extending some for this series would be nice.
On Wed, Oct 9, 2024 at 8:59 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > From: Jason Xing <kerneljasonxing@gmail.com> > Date: Tue, 8 Oct 2024 17:51:01 +0800 > > From: Jason Xing <kernelxing@tencent.com> > > > > Implement basic codes so that we later can easily add each tx points. > > Introducing BPF_SOCK_OPS_ALL_CB_FLAGS used as a test statement can help use > > control whether to output or not. > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > --- > > include/uapi/linux/bpf.h | 5 ++++- > > net/core/skbuff.c | 18 ++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 5 ++++- > > 3 files changed, 26 insertions(+), 2 deletions(-) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index c6cd7c7aeeee..157e139ed6fc 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -6900,8 +6900,11 @@ enum { > > * options first before the BPF program does. > > */ > > BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), > > + /* Call bpf when the kernel is generating tx timestamps. > > + */ > > + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), > > /* Mask of all currently supported cb flags */ > > - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, > > + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, > > I remember this change makes two selftests fail and needs diff > in this link. > https://lore.kernel.org/bpf/20231016161134.25365-1-kuniyu@amazon.com/ Thanks for pointing out this. I will dig into this :) > > Also, adding a bpf selftest or extending some for this series > would be nice. Sure, I would like to add a selftest after we all reach a consensus on how to implement it in the right way. Thanks, Jason
Jason Xing wrote: > On Wed, Oct 9, 2024 at 2:45 AM Willem de Bruijn > <willemdebruijn.kernel@gmail.com> wrote: > > > > Jason Xing wrote: > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > Implement basic codes so that we later can easily add each tx points. > > > Introducing BPF_SOCK_OPS_ALL_CB_FLAGS used as a test statement can help use > > > control whether to output or not. > > > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > --- > > > include/uapi/linux/bpf.h | 5 ++++- > > > net/core/skbuff.c | 18 ++++++++++++++++++ > > > tools/include/uapi/linux/bpf.h | 5 ++++- > > > 3 files changed, 26 insertions(+), 2 deletions(-) > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > index c6cd7c7aeeee..157e139ed6fc 100644 > > > --- a/include/uapi/linux/bpf.h > > > +++ b/include/uapi/linux/bpf.h > > > @@ -6900,8 +6900,11 @@ enum { > > > * options first before the BPF program does. > > > */ > > > BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), > > > + /* Call bpf when the kernel is generating tx timestamps. > > > + */ > > > + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), > > > /* Mask of all currently supported cb flags */ > > > - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, > > > + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, > > > }; > > > > > > /* List of known BPF sock_ops operators. > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > index 74149dc4ee31..5ff1a91c1204 100644 > > > --- a/net/core/skbuff.c > > > +++ b/net/core/skbuff.c > > > @@ -5539,6 +5539,21 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, > > > } > > > EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); > > > > > > +static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag, > > > + struct skb_shared_hwtstamps *hwtstamps) > > > +{ > > > + struct tcp_sock *tp; > > > + > > > + if (!sk_is_tcp(sk)) > > > + return false; > > > + > > > + tp = tcp_sk(sk); > > > + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)) > > > + return true; > > > + > > > + return false; > > > +} > > > + > > > void __skb_tstamp_tx(struct sk_buff *orig_skb, > > > const struct sk_buff *ack_skb, > > > struct skb_shared_hwtstamps *hwtstamps, > > > @@ -5551,6 +5566,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, > > > if (!sk) > > > return; > > > > > > + if (bpf_skb_tstamp_tx(sk, tstype, hwtstamps)) > > > + return; > > > + > > > > Eventually, this whole feature could probably be behind a > > static_branch. > > You want to implement another toggle to control it? But for tx path > "BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)" > works as a per-netns toggle. I would like to know what you exactly > want to do in the next move? Not another toggle. A static branch that enables the datapath logic when a BPF program becomes active. See also for instance ipv4_min_ttl.
On Wed, Oct 9, 2024 at 9:22 PM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > On Wed, Oct 9, 2024 at 2:45 AM Willem de Bruijn > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > Jason Xing wrote: > > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > > > Implement basic codes so that we later can easily add each tx points. > > > > Introducing BPF_SOCK_OPS_ALL_CB_FLAGS used as a test statement can help use > > > > control whether to output or not. > > > > > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > > --- > > > > include/uapi/linux/bpf.h | 5 ++++- > > > > net/core/skbuff.c | 18 ++++++++++++++++++ > > > > tools/include/uapi/linux/bpf.h | 5 ++++- > > > > 3 files changed, 26 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > > index c6cd7c7aeeee..157e139ed6fc 100644 > > > > --- a/include/uapi/linux/bpf.h > > > > +++ b/include/uapi/linux/bpf.h > > > > @@ -6900,8 +6900,11 @@ enum { > > > > * options first before the BPF program does. > > > > */ > > > > BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), > > > > + /* Call bpf when the kernel is generating tx timestamps. > > > > + */ > > > > + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), > > > > /* Mask of all currently supported cb flags */ > > > > - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, > > > > + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, > > > > }; > > > > > > > > /* List of known BPF sock_ops operators. > > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > > index 74149dc4ee31..5ff1a91c1204 100644 > > > > --- a/net/core/skbuff.c > > > > +++ b/net/core/skbuff.c > > > > @@ -5539,6 +5539,21 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, > > > > } > > > > EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); > > > > > > > > +static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag, > > > > + struct skb_shared_hwtstamps *hwtstamps) > > > > +{ > > > > + struct tcp_sock *tp; > > > > + > > > > + if (!sk_is_tcp(sk)) > > > > + return false; > > > > + > > > > + tp = tcp_sk(sk); > > > > + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)) > > > > + return true; > > > > + > > > > + return false; > > > > +} > > > > + > > > > void __skb_tstamp_tx(struct sk_buff *orig_skb, > > > > const struct sk_buff *ack_skb, > > > > struct skb_shared_hwtstamps *hwtstamps, > > > > @@ -5551,6 +5566,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, > > > > if (!sk) > > > > return; > > > > > > > > + if (bpf_skb_tstamp_tx(sk, tstype, hwtstamps)) > > > > + return; > > > > + > > > > > > Eventually, this whole feature could probably be behind a > > > static_branch. > > > > You want to implement another toggle to control it? But for tx path > > "BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)" > > works as a per-netns toggle. I would like to know what you exactly > > want to do in the next move? > > Not another toggle. A static branch that enables the datapath logic > when a BPF program becomes active. See also for instance ipv4_min_ttl. Thanks, I see. Then we can totally use the bpf_setsockopt() interface with a new tsflag field, or something like this, to implement just like how ip4_min_ttl works. I will give it a try to see if it can easily work. Thanks, Jason
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..157e139ed6fc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6900,8 +6900,11 @@ enum { * options first before the BPF program does. */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), + /* Call bpf when the kernel is generating tx timestamps. + */ + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, }; /* List of known BPF sock_ops operators. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 74149dc4ee31..5ff1a91c1204 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5539,6 +5539,21 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); +static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct tcp_sock *tp; + + if (!sk_is_tcp(sk)) + return false; + + tp = tcp_sk(sk); + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)) + return true; + + return false; +} + void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, @@ -5551,6 +5566,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if (!sk) return; + if (bpf_skb_tstamp_tx(sk, tstype, hwtstamps)) + return; + tsflags = READ_ONCE(sk->sk_tsflags); if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fb3cb2636e6..93853d9d4922 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6899,8 +6899,11 @@ enum { * options first before the BPF program does. */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), + /* Call bpf when the kernel is generating tx timestamps. + */ + BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG = (1<<7), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xFF, }; /* List of known BPF sock_ops operators.