Message ID | 20241012040651.95616-3-kerneljasonxing@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net-timestamp: bpf extension to equip applications transparently | expand |
Jason Xing wrote: > From: Jason Xing <kernelxing@tencent.com> > > For now, we support bpf_setsockopt only TX timestamps flags. Users > can use something like this in bpf program to turn on the feature: > > flags = SOF_TIMESTAMPING_TX_SCHED; > bpf_setsockopt(skops, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags)); > > Later, I will support each Tx flags one by one based on this. > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > --- > include/net/sock.h | 2 ++ > net/core/filter.c | 27 +++++++++++++++++++++++++++ > net/core/sock.c | 35 ++++++++++++++++++++++++----------- > 3 files changed, 53 insertions(+), 11 deletions(-) > > diff --git a/include/net/sock.h b/include/net/sock.h > index 8cf278c957b3..66ecd78f1dfe 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -2890,6 +2890,8 @@ void sock_def_readable(struct sock *sk); > > int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); > void sock_set_timestamp(struct sock *sk, int optname, bool valbool); > +int sock_get_timestamping(struct so_timestamping *timestamping, > + sockptr_t optval, unsigned int optlen); > int sock_set_timestamping(struct sock *sk, int optname, > struct so_timestamping timestamping); > > diff --git a/net/core/filter.c b/net/core/filter.c > index bd0d08bf76bb..996426095bd9 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5204,10 +5204,30 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { > .arg1_type = ARG_PTR_TO_CTX, > }; > > +static int bpf_sock_set_timestamping(struct sock *sk, > + struct so_timestamping *timestamping) > +{ > + u32 flags = timestamping->flags; > + > + if (flags & ~SOF_TIMESTAMPING_MASK) > + return -EINVAL; > + > + if (!(flags & (SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | > + SOF_TIMESTAMPING_TX_ACK))) > + return -EINVAL; > + > + WRITE_ONCE(sk->sk_tsflags[BPFPROG_TS_REQUESTOR], flags); > + > + return 0; > +} > + > static int sol_socket_sockopt(struct sock *sk, int optname, > char *optval, int *optlen, > bool getopt) > { > + struct so_timestamping ts; > + int ret = 0; > + > switch (optname) { > case SO_REUSEADDR: > case SO_SNDBUF: > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > break; > case SO_BINDTODEVICE: > break; > + case SO_TIMESTAMPING_NEW: > + case SO_TIMESTAMPING_OLD: > + ret = sock_get_timestamping(&ts, KERNEL_SOCKPTR(optval), > + *optlen); > + if (!ret) > + ret = bpf_sock_set_timestamping(sk, &ts); > + return ret; > default: > return -EINVAL; > } > diff --git a/net/core/sock.c b/net/core/sock.c > index 52c8c5a5ba27..a6e0d51a5f72 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -894,6 +894,27 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) > return 0; > } > > +int sock_get_timestamping(struct so_timestamping *timestamping, > + sockptr_t optval, unsigned int optlen) > +{ > + int val; > + > + if (copy_from_sockptr(&val, optval, sizeof(val))) > + return -EFAULT; Ideally don't read this again. If you do, then move it in the else clause. > + > + if (optlen == sizeof(*timestamping)) { > + if (copy_from_sockptr(timestamping, optval, > + sizeof(*timestamping))) { > + return -EFAULT; > + } > + } else { > + memset(timestamping, 0, sizeof(*timestamping)); > + timestamping->flags = val; > + } > + > + return 0; > +} > + > int sock_set_timestamping(struct sock *sk, int optname, > struct so_timestamping timestamping) > { > @@ -1402,17 +1423,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname, > > case SO_TIMESTAMPING_NEW: > case SO_TIMESTAMPING_OLD: > - if (optlen == sizeof(timestamping)) { > - if (copy_from_sockptr(×tamping, optval, > - sizeof(timestamping))) { > - ret = -EFAULT; > - break; > - } > - } else { > - memset(×tamping, 0, sizeof(timestamping)); > - timestamping.flags = val; > - } > - ret = sock_set_timestamping(sk, optname, timestamping); > + ret = sock_get_timestamping(×tamping, optval, optlen); > + if (!ret) > + ret = sock_set_timestamping(sk, optname, timestamping); > break; > > case SO_RCVLOWAT: > -- > 2.37.3 >
On Tue, Oct 15, 2024 at 9:34 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > From: Jason Xing <kernelxing@tencent.com> > > > > For now, we support bpf_setsockopt only TX timestamps flags. Users > > can use something like this in bpf program to turn on the feature: > > > > flags = SOF_TIMESTAMPING_TX_SCHED; > > bpf_setsockopt(skops, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags)); > > > > Later, I will support each Tx flags one by one based on this. > > > > Signed-off-by: Jason Xing <kernelxing@tencent.com> > > --- > > include/net/sock.h | 2 ++ > > net/core/filter.c | 27 +++++++++++++++++++++++++++ > > net/core/sock.c | 35 ++++++++++++++++++++++++----------- > > 3 files changed, 53 insertions(+), 11 deletions(-) > > > > diff --git a/include/net/sock.h b/include/net/sock.h > > index 8cf278c957b3..66ecd78f1dfe 100644 > > --- a/include/net/sock.h > > +++ b/include/net/sock.h > > @@ -2890,6 +2890,8 @@ void sock_def_readable(struct sock *sk); > > > > int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); > > void sock_set_timestamp(struct sock *sk, int optname, bool valbool); > > +int sock_get_timestamping(struct so_timestamping *timestamping, > > + sockptr_t optval, unsigned int optlen); > > int sock_set_timestamping(struct sock *sk, int optname, > > struct so_timestamping timestamping); > > > > diff --git a/net/core/filter.c b/net/core/filter.c > > index bd0d08bf76bb..996426095bd9 100644 > > --- a/net/core/filter.c > > +++ b/net/core/filter.c > > @@ -5204,10 +5204,30 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { > > .arg1_type = ARG_PTR_TO_CTX, > > }; > > > > +static int bpf_sock_set_timestamping(struct sock *sk, > > + struct so_timestamping *timestamping) > > +{ > > + u32 flags = timestamping->flags; > > + > > + if (flags & ~SOF_TIMESTAMPING_MASK) > > + return -EINVAL; > > + > > + if (!(flags & (SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | > > + SOF_TIMESTAMPING_TX_ACK))) > > + return -EINVAL; > > + > > + WRITE_ONCE(sk->sk_tsflags[BPFPROG_TS_REQUESTOR], flags); > > + > > + return 0; > > +} > > + > > static int sol_socket_sockopt(struct sock *sk, int optname, > > char *optval, int *optlen, > > bool getopt) > > { > > + struct so_timestamping ts; > > + int ret = 0; > > + > > switch (optname) { > > case SO_REUSEADDR: > > case SO_SNDBUF: > > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > > break; > > case SO_BINDTODEVICE: > > break; > > + case SO_TIMESTAMPING_NEW: > > + case SO_TIMESTAMPING_OLD: > > + ret = sock_get_timestamping(&ts, KERNEL_SOCKPTR(optval), > > + *optlen); > > + if (!ret) > > + ret = bpf_sock_set_timestamping(sk, &ts); > > + return ret; > > default: > > return -EINVAL; > > } > > diff --git a/net/core/sock.c b/net/core/sock.c > > index 52c8c5a5ba27..a6e0d51a5f72 100644 > > --- a/net/core/sock.c > > +++ b/net/core/sock.c > > @@ -894,6 +894,27 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) > > return 0; > > } > > > > +int sock_get_timestamping(struct so_timestamping *timestamping, > > + sockptr_t optval, unsigned int optlen) > > +{ > > + int val; > > + > > + if (copy_from_sockptr(&val, optval, sizeof(val))) > > + return -EFAULT; > > Ideally don't read this again. > > If you do, then move it in the else clause. Thanks, I will do that.
On 10/11/24 9:06 PM, Jason Xing wrote: > static int sol_socket_sockopt(struct sock *sk, int optname, > char *optval, int *optlen, > bool getopt) > { > + struct so_timestamping ts; > + int ret = 0; > + > switch (optname) { > case SO_REUSEADDR: > case SO_SNDBUF: > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > break; > case SO_BINDTODEVICE: > break; > + case SO_TIMESTAMPING_NEW: > + case SO_TIMESTAMPING_OLD: How about remove the "_OLD" support ?
Martin KaFai Lau wrote: > On 10/11/24 9:06 PM, Jason Xing wrote: > > static int sol_socket_sockopt(struct sock *sk, int optname, > > char *optval, int *optlen, > > bool getopt) > > { > > + struct so_timestamping ts; > > + int ret = 0; > > + > > switch (optname) { > > case SO_REUSEADDR: > > case SO_SNDBUF: > > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > > break; > > case SO_BINDTODEVICE: > > break; > > + case SO_TIMESTAMPING_NEW: > > + case SO_TIMESTAMPING_OLD: > > How about remove the "_OLD" support ? +1 I forgot to mention that yesterday.
On 10/11/24 9:06 PM, Jason Xing wrote: > +static int bpf_sock_set_timestamping(struct sock *sk, > + struct so_timestamping *timestamping) > +{ > + u32 flags = timestamping->flags; > + > + if (flags & ~SOF_TIMESTAMPING_MASK) > + return -EINVAL; > + > + if (!(flags & (SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | > + SOF_TIMESTAMPING_TX_ACK))) hmm... Does it mean at least one of the bit must be set and cannot be completely cleared once it has been set before? > + return -EINVAL; > + > + WRITE_ONCE(sk->sk_tsflags[BPFPROG_TS_REQUESTOR], flags); > + > + return 0; > +}
On Wed, Oct 16, 2024 at 5:33 AM Martin KaFai Lau <martin.lau@linux.dev> wrote: > > On 10/11/24 9:06 PM, Jason Xing wrote: > > static int sol_socket_sockopt(struct sock *sk, int optname, > > char *optval, int *optlen, > > bool getopt) > > { > > + struct so_timestamping ts; > > + int ret = 0; > > + > > switch (optname) { > > case SO_REUSEADDR: > > case SO_SNDBUF: > > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > > break; > > case SO_BINDTODEVICE: > > break; > > + case SO_TIMESTAMPING_NEW: > > + case SO_TIMESTAMPING_OLD: > > How about remove the "_OLD" support ? Will do that. Thanks!
On Wed, Oct 16, 2024 at 7:54 AM Martin KaFai Lau <martin.lau@linux.dev> wrote: > > On 10/11/24 9:06 PM, Jason Xing wrote: > > +static int bpf_sock_set_timestamping(struct sock *sk, > > + struct so_timestamping *timestamping) > > +{ > > + u32 flags = timestamping->flags; > > + > > + if (flags & ~SOF_TIMESTAMPING_MASK) > > + return -EINVAL; > > + > > + if (!(flags & (SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | > > + SOF_TIMESTAMPING_TX_ACK))) > > hmm... Does it mean at least one of the bit must be set and cannot be completely > cleared once it has been set before? Yes. Because in the current BPF extension feature I don't support all the original SO_TIMESTAMPING flags (SOF_TIMESTAMPING_*) . When it comes to clearing flags, I cannot find a proper time/chance to clear them. That's the reason why I don't implement it.
On Wed, Oct 16, 2024 at 5:56 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Martin KaFai Lau wrote: > > On 10/11/24 9:06 PM, Jason Xing wrote: > > > static int sol_socket_sockopt(struct sock *sk, int optname, > > > char *optval, int *optlen, > > > bool getopt) > > > { > > > + struct so_timestamping ts; > > > + int ret = 0; > > > + > > > switch (optname) { > > > case SO_REUSEADDR: > > > case SO_SNDBUF: > > > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > > > break; > > > case SO_BINDTODEVICE: > > > break; > > > + case SO_TIMESTAMPING_NEW: > > > + case SO_TIMESTAMPING_OLD: > > > > How about remove the "_OLD" support ? > > +1 I forgot to mention that yesterday. Hello Willem, Martin, I did a test on this and found that if we only use SO_TIMESTAMPING_NEW, we will never enter the real set sk_tsflags_bpf logic, unless there is "case SO_TIMESTAMPING_OLD". And I checked SO_TIMESTAMPING in include/uapi/asm-generic/socket.h: #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) /* on 64-bit and x32, avoid the ?: operator */ ... #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD ... #else ... #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) ... #endif The SO_TIMESTAMPING is defined as SO_TIMESTAMPING_OLD. I wonder if I missed something? Thanks in advance. Thanks, Jason
Jason Xing wrote: > On Wed, Oct 16, 2024 at 5:56 AM Willem de Bruijn > <willemdebruijn.kernel@gmail.com> wrote: > > > > Martin KaFai Lau wrote: > > > On 10/11/24 9:06 PM, Jason Xing wrote: > > > > static int sol_socket_sockopt(struct sock *sk, int optname, > > > > char *optval, int *optlen, > > > > bool getopt) > > > > { > > > > + struct so_timestamping ts; > > > > + int ret = 0; > > > > + > > > > switch (optname) { > > > > case SO_REUSEADDR: > > > > case SO_SNDBUF: > > > > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > > > > break; > > > > case SO_BINDTODEVICE: > > > > break; > > > > + case SO_TIMESTAMPING_NEW: > > > > + case SO_TIMESTAMPING_OLD: > > > > > > How about remove the "_OLD" support ? > > > > +1 I forgot to mention that yesterday. > > Hello Willem, Martin, > > I did a test on this and found that if we only use > SO_TIMESTAMPING_NEW, we will never enter the real set sk_tsflags_bpf > logic, unless there is "case SO_TIMESTAMPING_OLD". > > And I checked SO_TIMESTAMPING in include/uapi/asm-generic/socket.h: > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > /* on 64-bit and x32, avoid the ?: operator */ > ... > #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD > ... > #else > ... > #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? > SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) > ... > #endif > > The SO_TIMESTAMPING is defined as SO_TIMESTAMPING_OLD. I wonder if I > missed something? Thanks in advance. The _NEW vs _OLD aim to deal with y2038 issues on 32-bit platforms. For new APIs, like BPF timestamping, we should always use the safe structs, such as timespec64. Then we can just use SO_TIMESTAMPING without the NEW or OLD suffix.
On Wed, Oct 23, 2024 at 8:06 AM Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote: > > Jason Xing wrote: > > On Wed, Oct 16, 2024 at 5:56 AM Willem de Bruijn > > <willemdebruijn.kernel@gmail.com> wrote: > > > > > > Martin KaFai Lau wrote: > > > > On 10/11/24 9:06 PM, Jason Xing wrote: > > > > > static int sol_socket_sockopt(struct sock *sk, int optname, > > > > > char *optval, int *optlen, > > > > > bool getopt) > > > > > { > > > > > + struct so_timestamping ts; > > > > > + int ret = 0; > > > > > + > > > > > switch (optname) { > > > > > case SO_REUSEADDR: > > > > > case SO_SNDBUF: > > > > > @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, > > > > > break; > > > > > case SO_BINDTODEVICE: > > > > > break; > > > > > + case SO_TIMESTAMPING_NEW: > > > > > + case SO_TIMESTAMPING_OLD: > > > > > > > > How about remove the "_OLD" support ? > > > > > > +1 I forgot to mention that yesterday. > > > > Hello Willem, Martin, > > > > I did a test on this and found that if we only use > > SO_TIMESTAMPING_NEW, we will never enter the real set sk_tsflags_bpf > > logic, unless there is "case SO_TIMESTAMPING_OLD". > > > > And I checked SO_TIMESTAMPING in include/uapi/asm-generic/socket.h: > > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > > /* on 64-bit and x32, avoid the ?: operator */ > > ... > > #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD > > ... > > #else > > ... > > #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? > > SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) > > ... > > #endif > > > > The SO_TIMESTAMPING is defined as SO_TIMESTAMPING_OLD. I wonder if I > > missed something? Thanks in advance. > > The _NEW vs _OLD aim to deal with y2038 issues on 32-bit platforms. > > For new APIs, like BPF timestamping, we should always use the safe > structs, such as timespec64. Thanks, I learned a lot. > > Then we can just use SO_TIMESTAMPING without the NEW or OLD suffix. Weird thing is that the SO_TIMESTAMPING would be converted to SO_TIMESTAMPING_OLD in kernel if I use this : bpf_setsockopt(skops, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags)); As I mentioned before, SO_TIMESTAMPING exists in include/uapi/asm-generic/socket.h: #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) /* on 64-bit and x32, avoid the ?: operator */ ... #define SO_TIMESTAMPING SO_TIMESTAMPING_OLD ... #else ... #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) ... #endif So I wonder if there is something unexpected? BTW, I conducted the test on a VM with x86_64 cpu. Thanks, Jason
diff --git a/include/net/sock.h b/include/net/sock.h index 8cf278c957b3..66ecd78f1dfe 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2890,6 +2890,8 @@ void sock_def_readable(struct sock *sk); int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); void sock_set_timestamp(struct sock *sk, int optname, bool valbool); +int sock_get_timestamping(struct so_timestamping *timestamping, + sockptr_t optval, unsigned int optlen); int sock_set_timestamping(struct sock *sk, int optname, struct so_timestamping timestamping); diff --git a/net/core/filter.c b/net/core/filter.c index bd0d08bf76bb..996426095bd9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5204,10 +5204,30 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static int bpf_sock_set_timestamping(struct sock *sk, + struct so_timestamping *timestamping) +{ + u32 flags = timestamping->flags; + + if (flags & ~SOF_TIMESTAMPING_MASK) + return -EINVAL; + + if (!(flags & (SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK))) + return -EINVAL; + + WRITE_ONCE(sk->sk_tsflags[BPFPROG_TS_REQUESTOR], flags); + + return 0; +} + static int sol_socket_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) { + struct so_timestamping ts; + int ret = 0; + switch (optname) { case SO_REUSEADDR: case SO_SNDBUF: @@ -5225,6 +5245,13 @@ static int sol_socket_sockopt(struct sock *sk, int optname, break; case SO_BINDTODEVICE: break; + case SO_TIMESTAMPING_NEW: + case SO_TIMESTAMPING_OLD: + ret = sock_get_timestamping(&ts, KERNEL_SOCKPTR(optval), + *optlen); + if (!ret) + ret = bpf_sock_set_timestamping(sk, &ts); + return ret; default: return -EINVAL; } diff --git a/net/core/sock.c b/net/core/sock.c index 52c8c5a5ba27..a6e0d51a5f72 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -894,6 +894,27 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) return 0; } +int sock_get_timestamping(struct so_timestamping *timestamping, + sockptr_t optval, unsigned int optlen) +{ + int val; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + if (optlen == sizeof(*timestamping)) { + if (copy_from_sockptr(timestamping, optval, + sizeof(*timestamping))) { + return -EFAULT; + } + } else { + memset(timestamping, 0, sizeof(*timestamping)); + timestamping->flags = val; + } + + return 0; +} + int sock_set_timestamping(struct sock *sk, int optname, struct so_timestamping timestamping) { @@ -1402,17 +1423,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname, case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_OLD: - if (optlen == sizeof(timestamping)) { - if (copy_from_sockptr(×tamping, optval, - sizeof(timestamping))) { - ret = -EFAULT; - break; - } - } else { - memset(×tamping, 0, sizeof(timestamping)); - timestamping.flags = val; - } - ret = sock_set_timestamping(sk, optname, timestamping); + ret = sock_get_timestamping(×tamping, optval, optlen); + if (!ret) + ret = sock_set_timestamping(sk, optname, timestamping); break; case SO_RCVLOWAT: