Message ID | 20241028110535.82999-8-kerneljasonxing@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net-timestamp: bpf extension to equip applications transparently | expand |
Jason Xing wrote: > From: Jason Xing <kernelxing@tencent.com> > > This patch behaves like how cmsg feature works, that is to say, > check and set on each call of udp_sendmsg before passing sk_tsflags_bpf > to cork tsflags. > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > --- > include/net/sock.h | 1 + > include/uapi/linux/bpf.h | 3 +++ > net/core/skbuff.c | 2 +- > net/ipv4/udp.c | 1 + > tools/include/uapi/linux/bpf.h | 3 +++ > 5 files changed, 9 insertions(+), 1 deletion(-) > > diff --git a/include/net/sock.h b/include/net/sock.h > index 062f405c744e..cf7fea456455 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) > } > > void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); > int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, > int type); > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 6fc3bd12b650..055ffa7c965c 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7028,6 +7028,9 @@ enum { > * feature is on. It indicates the > * recorded timestamp. > */ > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > + * syscall is triggered > + */ > }; > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index 8b2a79c0fe1c..0b571306f7ea 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, > __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); > } > > -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > { > struct bpf_sock_ops_kern sock_ops; > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index 9a20af41e272..e768421abc37 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > if (!corkreq) { > struct inet_cork cork; > > + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); > skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, > sizeof(struct udphdr), &ipc, &rt, > &cork, msg->msg_flags); > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 6fc3bd12b650..055ffa7c965c 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -7028,6 +7028,9 @@ enum { > * feature is on. It indicates the > * recorded timestamp. > */ > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > + * syscall is triggered > + */ If adding a timestamp as close to syscall entry as possible, give it a generic name, not specific to UDP. And please explain in the commit message the reason for a new timestamp recording point: with existing timestamping the application can call clock_gettime before (and optionally after) the send call. An admin using BPF does not have this option, so needs this as part of the BPF timestamping API. > }; > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > -- > 2.37.3 >
On Tue, Oct 29, 2024 at 9:07 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > From: Jason Xing <kernelxing@tencent.com> > > > > This patch behaves like how cmsg feature works, that is to say, > > check and set on each call of udp_sendmsg before passing sk_tsflags_bpf > > to cork tsflags. > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > --- > > include/net/sock.h | 1 + > > include/uapi/linux/bpf.h | 3 +++ > > net/core/skbuff.c | 2 +- > > net/ipv4/udp.c | 1 + > > tools/include/uapi/linux/bpf.h | 3 +++ > > 5 files changed, 9 insertions(+), 1 deletion(-) > > > > diff --git a/include/net/sock.h b/include/net/sock.h > > index 062f405c744e..cf7fea456455 100644 > > --- a/include/net/sock.h > > +++ b/include/net/sock.h > > @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) > > } > > > > void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); > > int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, > > int type); > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 6fc3bd12b650..055ffa7c965c 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -7028,6 +7028,9 @@ enum { > > * feature is on. It indicates the > > * recorded timestamp. > > */ > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > + * syscall is triggered > > + */ > > }; > > > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > index 8b2a79c0fe1c..0b571306f7ea 100644 > > --- a/net/core/skbuff.c > > +++ b/net/core/skbuff.c > > @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, > > __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); > > } > > > > -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > { > > struct bpf_sock_ops_kern sock_ops; > > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > > index 9a20af41e272..e768421abc37 100644 > > --- a/net/ipv4/udp.c > > +++ b/net/ipv4/udp.c > > @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > if (!corkreq) { > > struct inet_cork cork; > > > > + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); > > skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, > > sizeof(struct udphdr), &ipc, &rt, > > &cork, msg->msg_flags); > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > > index 6fc3bd12b650..055ffa7c965c 100644 > > --- a/tools/include/uapi/linux/bpf.h > > +++ b/tools/include/uapi/linux/bpf.h > > @@ -7028,6 +7028,9 @@ enum { > > * feature is on. It indicates the > > * recorded timestamp. > > */ > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > + * syscall is triggered > > + */ > > If adding a timestamp as close to syscall entry as possible, give it a > generic name, not specific to UDP. Good suggestion, then it will also solve the remaining issue for TCP type: __when__ we should record the user timestamp which exists in the application SO_TIMESTAMPING feature. > > And please explain in the commit message the reason for a new > timestamp recording point: with existing timestamping the application > can call clock_gettime before (and optionally after) the send call. > An admin using BPF does not have this option, so needs this as part of > the BPF timestamping API. Will revise this part. Thanks for your description! Thanks, Jason
Jason Xing wrote: > On Tue, Oct 29, 2024 at 9:07 AM Willem de Bruijn > <willemdebruijn.kernel@gmail.com> wrote: > > > > Jason Xing wrote: > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > This patch behaves like how cmsg feature works, that is to say, > > > check and set on each call of udp_sendmsg before passing sk_tsflags_bpf > > > to cork tsflags. > > > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > --- > > > include/net/sock.h | 1 + > > > include/uapi/linux/bpf.h | 3 +++ > > > net/core/skbuff.c | 2 +- > > > net/ipv4/udp.c | 1 + > > > tools/include/uapi/linux/bpf.h | 3 +++ > > > 5 files changed, 9 insertions(+), 1 deletion(-) > > > > > > diff --git a/include/net/sock.h b/include/net/sock.h > > > index 062f405c744e..cf7fea456455 100644 > > > --- a/include/net/sock.h > > > +++ b/include/net/sock.h > > > @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) > > > } > > > > > > void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); > > > int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, > > > int type); > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > --- a/include/uapi/linux/bpf.h > > > +++ b/include/uapi/linux/bpf.h > > > @@ -7028,6 +7028,9 @@ enum { > > > * feature is on. It indicates the > > > * recorded timestamp. > > > */ > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > + * syscall is triggered > > > + */ > > > }; > > > > > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > index 8b2a79c0fe1c..0b571306f7ea 100644 > > > --- a/net/core/skbuff.c > > > +++ b/net/core/skbuff.c > > > @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, > > > __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); > > > } > > > > > > -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > { > > > struct bpf_sock_ops_kern sock_ops; > > > > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > > > index 9a20af41e272..e768421abc37 100644 > > > --- a/net/ipv4/udp.c > > > +++ b/net/ipv4/udp.c > > > @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > > if (!corkreq) { > > > struct inet_cork cork; > > > > > > + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); > > > skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, > > > sizeof(struct udphdr), &ipc, &rt, > > > &cork, msg->msg_flags); > > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > --- a/tools/include/uapi/linux/bpf.h > > > +++ b/tools/include/uapi/linux/bpf.h > > > @@ -7028,6 +7028,9 @@ enum { > > > * feature is on. It indicates the > > > * recorded timestamp. > > > */ > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > + * syscall is triggered > > > + */ > > > > If adding a timestamp as close to syscall entry as possible, give it a > > generic name, not specific to UDP. > > Good suggestion, then it will also solve the remaining issue for TCP type: > __when__ we should record the user timestamp which exists in the > application SO_TIMESTAMPING feature. > > > > > And please explain in the commit message the reason for a new > > timestamp recording point: with existing timestamping the application > > can call clock_gettime before (and optionally after) the send call. > > An admin using BPF does not have this option, so needs this as part of > > the BPF timestamping API. > > Will revise this part. Thanks for your description! Actually, I may have misunderstood the intention of this new hook. I thought it was to record an additional timestamp. But it is (also?) to program skb_shared_info.tx_flags based on instructions parsed from cmsg in __sock_cmsg_send.
On Tue, Oct 29, 2024 at 9:33 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > On Tue, Oct 29, 2024 at 9:07 AM Willem de Bruijn > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > Jason Xing wrote: > > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > > > This patch behaves like how cmsg feature works, that is to say, > > > > check and set on each call of udp_sendmsg before passing sk_tsflags_bpf > > > > to cork tsflags. > > > > > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > > --- > > > > include/net/sock.h | 1 + > > > > include/uapi/linux/bpf.h | 3 +++ > > > > net/core/skbuff.c | 2 +- > > > > net/ipv4/udp.c | 1 + > > > > tools/include/uapi/linux/bpf.h | 3 +++ > > > > 5 files changed, 9 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/include/net/sock.h b/include/net/sock.h > > > > index 062f405c744e..cf7fea456455 100644 > > > > --- a/include/net/sock.h > > > > +++ b/include/net/sock.h > > > > @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) > > > > } > > > > > > > > void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); > > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); > > > > int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, > > > > int type); > > > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > > --- a/include/uapi/linux/bpf.h > > > > +++ b/include/uapi/linux/bpf.h > > > > @@ -7028,6 +7028,9 @@ enum { > > > > * feature is on. It indicates the > > > > * recorded timestamp. > > > > */ > > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > > + * syscall is triggered > > > > + */ > > > > }; > > > > > > > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > > index 8b2a79c0fe1c..0b571306f7ea 100644 > > > > --- a/net/core/skbuff.c > > > > +++ b/net/core/skbuff.c > > > > @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, > > > > __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); > > > > } > > > > > > > > -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > > { > > > > struct bpf_sock_ops_kern sock_ops; > > > > > > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > > > > index 9a20af41e272..e768421abc37 100644 > > > > --- a/net/ipv4/udp.c > > > > +++ b/net/ipv4/udp.c > > > > @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > > > if (!corkreq) { > > > > struct inet_cork cork; > > > > > > > > + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); > > > > skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, > > > > sizeof(struct udphdr), &ipc, &rt, > > > > &cork, msg->msg_flags); > > > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > > --- a/tools/include/uapi/linux/bpf.h > > > > +++ b/tools/include/uapi/linux/bpf.h > > > > @@ -7028,6 +7028,9 @@ enum { > > > > * feature is on. It indicates the > > > > * recorded timestamp. > > > > */ > > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > > + * syscall is triggered > > > > + */ > > > > > > If adding a timestamp as close to syscall entry as possible, give it a > > > generic name, not specific to UDP. > > > > Good suggestion, then it will also solve the remaining issue for TCP type: > > __when__ we should record the user timestamp which exists in the > > application SO_TIMESTAMPING feature. > > > > > > > > And please explain in the commit message the reason for a new > > > timestamp recording point: with existing timestamping the application > > > can call clock_gettime before (and optionally after) the send call. > > > An admin using BPF does not have this option, so needs this as part of > > > the BPF timestamping API. > > > > Will revise this part. Thanks for your description! > > Actually, I may have misunderstood the intention of this new hook. > > I thought it was to record an additional timestamp. I planned to do it after this series. For now, without the new hook, it will not work for UDP type. > > But it is (also?) to program skb_shared_info.tx_flags based on > instructions parsed from cmsg in __sock_cmsg_send. I'm not sure if I grasp the key point you said. For UDP, skb_shared_info.tx_flags will finally be initialized in __ip_append_data() based on cork->tx_flags. cork->tx_flags is computed by sock_tx_timestamp() based on ipc->sockc.tsflags if cmsg feature is turned on. __sock_tx_timestamp() uses "flags |= xxx" to initialize the cork->tx_flags, so that the cork->tx_flags will not be completely overridden by either the cmsg method or bpf program, that is to say, the cork->tx_flags can combine both of them. Then another key point is that we do the check to see which one actually works in sk_tstamp_tx_flags() by testing sk->sk_tsflags or sk->sk_tsflags_bpf in patch [2/14]. It guarantees that. Thanks, Jason
Jason Xing wrote: > On Tue, Oct 29, 2024 at 9:33 AM Willem de Bruijn > <willemdebruijn.kernel@gmail.com> wrote: > > > > Jason Xing wrote: > > > On Tue, Oct 29, 2024 at 9:07 AM Willem de Bruijn > > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > > > Jason Xing wrote: > > > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > > > > > This patch behaves like how cmsg feature works, that is to say, > > > > > check and set on each call of udp_sendmsg before passing sk_tsflags_bpf > > > > > to cork tsflags. > > > > > > > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > > > --- > > > > > include/net/sock.h | 1 + > > > > > include/uapi/linux/bpf.h | 3 +++ > > > > > net/core/skbuff.c | 2 +- > > > > > net/ipv4/udp.c | 1 + > > > > > tools/include/uapi/linux/bpf.h | 3 +++ > > > > > 5 files changed, 9 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/include/net/sock.h b/include/net/sock.h > > > > > index 062f405c744e..cf7fea456455 100644 > > > > > --- a/include/net/sock.h > > > > > +++ b/include/net/sock.h > > > > > @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) > > > > > } > > > > > > > > > > void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); > > > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); > > > > > int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, > > > > > int type); > > > > > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > > > --- a/include/uapi/linux/bpf.h > > > > > +++ b/include/uapi/linux/bpf.h > > > > > @@ -7028,6 +7028,9 @@ enum { > > > > > * feature is on. It indicates the > > > > > * recorded timestamp. > > > > > */ > > > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > > > + * syscall is triggered > > > > > + */ > > > > > }; > > > > > > > > > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > > > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > > > index 8b2a79c0fe1c..0b571306f7ea 100644 > > > > > --- a/net/core/skbuff.c > > > > > +++ b/net/core/skbuff.c > > > > > @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, > > > > > __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); > > > > > } > > > > > > > > > > -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > > > { > > > > > struct bpf_sock_ops_kern sock_ops; > > > > > > > > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > > > > > index 9a20af41e272..e768421abc37 100644 > > > > > --- a/net/ipv4/udp.c > > > > > +++ b/net/ipv4/udp.c > > > > > @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > > > > if (!corkreq) { > > > > > struct inet_cork cork; > > > > > > > > > > + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); > > > > > skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, > > > > > sizeof(struct udphdr), &ipc, &rt, > > > > > &cork, msg->msg_flags); > > > > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > > > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > > > --- a/tools/include/uapi/linux/bpf.h > > > > > +++ b/tools/include/uapi/linux/bpf.h > > > > > @@ -7028,6 +7028,9 @@ enum { > > > > > * feature is on. It indicates the > > > > > * recorded timestamp. > > > > > */ > > > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > > > + * syscall is triggered > > > > > + */ > > > > > > > > If adding a timestamp as close to syscall entry as possible, give it a > > > > generic name, not specific to UDP. > > > > > > Good suggestion, then it will also solve the remaining issue for TCP type: > > > __when__ we should record the user timestamp which exists in the > > > application SO_TIMESTAMPING feature. > > > > > > > > > > > And please explain in the commit message the reason for a new > > > > timestamp recording point: with existing timestamping the application > > > > can call clock_gettime before (and optionally after) the send call. > > > > An admin using BPF does not have this option, so needs this as part of > > > > the BPF timestamping API. > > > > > > Will revise this part. Thanks for your description! > > > > Actually, I may have misunderstood the intention of this new hook. > > > > I thought it was to record an additional timestamp. > > I planned to do it after this series. For now, without the new hook, > it will not work for UDP type. Why not? This is something specific to the SK BPF hooks, I suppose? As soon as bpf_setsockopt is called, the timestamp callbacks should start getting called? > > > > But it is (also?) to program skb_shared_info.tx_flags based on > > instructions parsed from cmsg in __sock_cmsg_send. > > I'm not sure if I grasp the key point you said. > > For UDP, skb_shared_info.tx_flags will finally be initialized in > __ip_append_data() based on cork->tx_flags. > > cork->tx_flags is computed by sock_tx_timestamp() based on > ipc->sockc.tsflags if cmsg feature is turned on. > > __sock_tx_timestamp() uses "flags |= xxx" to initialize the > cork->tx_flags, so that the cork->tx_flags will not be completely > overridden by either the cmsg method or bpf program, that is to say, > the cork->tx_flags can combine both of them. > > Then another key point is that we do the check to see which one > actually works in sk_tstamp_tx_flags() by testing sk->sk_tsflags or > sk->sk_tsflags_bpf in patch [2/14]. It guarantees that. Ack, thanks. So I was mistaken the second time around.
On Tue, Oct 29, 2024 at 11:04 PM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > On Tue, Oct 29, 2024 at 9:33 AM Willem de Bruijn > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > Jason Xing wrote: > > > > On Tue, Oct 29, 2024 at 9:07 AM Willem de Bruijn > > > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > > > > > Jason Xing wrote: > > > > > > From: Jason Xing <kernelxing@tencent.com> > > > > > > > > > > > > This patch behaves like how cmsg feature works, that is to say, > > > > > > check and set on each call of udp_sendmsg before passing sk_tsflags_bpf > > > > > > to cork tsflags. > > > > > > > > > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > > > > > --- > > > > > > include/net/sock.h | 1 + > > > > > > include/uapi/linux/bpf.h | 3 +++ > > > > > > net/core/skbuff.c | 2 +- > > > > > > net/ipv4/udp.c | 1 + > > > > > > tools/include/uapi/linux/bpf.h | 3 +++ > > > > > > 5 files changed, 9 insertions(+), 1 deletion(-) > > > > > > > > > > > > diff --git a/include/net/sock.h b/include/net/sock.h > > > > > > index 062f405c744e..cf7fea456455 100644 > > > > > > --- a/include/net/sock.h > > > > > > +++ b/include/net/sock.h > > > > > > @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) > > > > > > } > > > > > > > > > > > > void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); > > > > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); > > > > > > int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, > > > > > > int type); > > > > > > > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > > > > --- a/include/uapi/linux/bpf.h > > > > > > +++ b/include/uapi/linux/bpf.h > > > > > > @@ -7028,6 +7028,9 @@ enum { > > > > > > * feature is on. It indicates the > > > > > > * recorded timestamp. > > > > > > */ > > > > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > > > > + * syscall is triggered > > > > > > + */ > > > > > > }; > > > > > > > > > > > > /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect > > > > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > > > > index 8b2a79c0fe1c..0b571306f7ea 100644 > > > > > > --- a/net/core/skbuff.c > > > > > > +++ b/net/core/skbuff.c > > > > > > @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, > > > > > > __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); > > > > > > } > > > > > > > > > > > > -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > > > > +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) > > > > > > { > > > > > > struct bpf_sock_ops_kern sock_ops; > > > > > > > > > > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > > > > > > index 9a20af41e272..e768421abc37 100644 > > > > > > --- a/net/ipv4/udp.c > > > > > > +++ b/net/ipv4/udp.c > > > > > > @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) > > > > > > if (!corkreq) { > > > > > > struct inet_cork cork; > > > > > > > > > > > > + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); > > > > > > skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, > > > > > > sizeof(struct udphdr), &ipc, &rt, > > > > > > &cork, msg->msg_flags); > > > > > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > > > > > > index 6fc3bd12b650..055ffa7c965c 100644 > > > > > > --- a/tools/include/uapi/linux/bpf.h > > > > > > +++ b/tools/include/uapi/linux/bpf.h > > > > > > @@ -7028,6 +7028,9 @@ enum { > > > > > > * feature is on. It indicates the > > > > > > * recorded timestamp. > > > > > > */ > > > > > > + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg > > > > > > + * syscall is triggered > > > > > > + */ > > > > > > > > > > If adding a timestamp as close to syscall entry as possible, give it a > > > > > generic name, not specific to UDP. > > > > > > > > Good suggestion, then it will also solve the remaining issue for TCP type: > > > > __when__ we should record the user timestamp which exists in the > > > > application SO_TIMESTAMPING feature. > > > > > > > > > > > > > > And please explain in the commit message the reason for a new > > > > > timestamp recording point: with existing timestamping the application > > > > > can call clock_gettime before (and optionally after) the send call. > > > > > An admin using BPF does not have this option, so needs this as part of > > > > > the BPF timestamping API. > > > > > > > > Will revise this part. Thanks for your description! > > > > > > Actually, I may have misunderstood the intention of this new hook. > > > > > > I thought it was to record an additional timestamp. > > > > I planned to do it after this series. For now, without the new hook, > > it will not work for UDP type. > > Why not? This is something specific to the SK BPF hooks, I suppose? I mean both hooks (one for UDP, one for USR time) are significant. > > As soon as bpf_setsockopt is called, the timestamp callbacks should > start getting called? Right, but the question is when we trigger the call of bpf_setsockopt() for the UDP proto? The current patch is trying to deal with it. > > > > > > > But it is (also?) to program skb_shared_info.tx_flags based on > > > instructions parsed from cmsg in __sock_cmsg_send. > > > > I'm not sure if I grasp the key point you said. > > > > For UDP, skb_shared_info.tx_flags will finally be initialized in > > __ip_append_data() based on cork->tx_flags. > > > > cork->tx_flags is computed by sock_tx_timestamp() based on > > ipc->sockc.tsflags if cmsg feature is turned on. > > > > __sock_tx_timestamp() uses "flags |= xxx" to initialize the > > cork->tx_flags, so that the cork->tx_flags will not be completely > > overridden by either the cmsg method or bpf program, that is to say, > > the cork->tx_flags can combine both of them. > > > > Then another key point is that we do the check to see which one > > actually works in sk_tstamp_tx_flags() by testing sk->sk_tsflags or > > sk->sk_tsflags_bpf in patch [2/14]. It guarantees that. > > Ack, thanks. So I was mistaken the second time around. Thanks for your review :) Thanks, Jason
diff --git a/include/net/sock.h b/include/net/sock.h index 062f405c744e..cf7fea456455 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2828,6 +2828,7 @@ static inline bool sk_listener_or_tw(const struct sock *sk) } void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args); int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc3bd12b650..055ffa7c965c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7028,6 +7028,9 @@ enum { * feature is on. It indicates the * recorded timestamp. */ + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg + * syscall is triggered + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8b2a79c0fe1c..0b571306f7ea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5622,7 +5622,7 @@ static void skb_tstamp_tx_output(struct sk_buff *orig_skb, __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } -static void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) +void timestamp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args) { struct bpf_sock_ops_kern sock_ops; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9a20af41e272..e768421abc37 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1264,6 +1264,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!corkreq) { struct inet_cork cork; + timestamp_call_bpf(sk, BPF_SOCK_OPS_TS_UDP_SND_CB, 0, NULL); skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, &cork, msg->msg_flags); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fc3bd12b650..055ffa7c965c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7028,6 +7028,9 @@ enum { * feature is on. It indicates the * recorded timestamp. */ + BPF_SOCK_OPS_TS_UDP_SND_CB, /* Called when every udp_sendmsg + * syscall is triggered + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect