Message ID | 20240902130937.457115-1-vadfed@meta.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,v2,1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message | expand |
Vadim Fedorenko wrote: > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > timestamps and packets sent via socket. Unfortunately, there is no way > to reliably predict socket timestamp ID value in case of error returned > by sendmsg. For UDP sockets it's impossible because of lockless > nature of UDP transmit, several threads may send packets in parallel. In > case of RAW sockets MSG_MORE option makes things complicated. More > details are in the conversation [1]. > This patch adds new control message type to give user-space > software an opportunity to control the mapping between packets and > values by providing ID with each sendmsg. This works fine for UDP > sockets only, and explicit check is added to control message parser. > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > --- > Documentation/networking/timestamping.rst | 14 ++++++++++++++ > arch/alpha/include/uapi/asm/socket.h | 4 +++- > arch/mips/include/uapi/asm/socket.h | 2 ++ > arch/parisc/include/uapi/asm/socket.h | 2 ++ > arch/sparc/include/uapi/asm/socket.h | 2 ++ > include/net/inet_sock.h | 4 +++- > include/net/sock.h | 1 + > include/uapi/asm-generic/socket.h | 2 ++ > include/uapi/linux/net_tstamp.h | 3 ++- > net/core/sock.c | 12 ++++++++++++ > net/ethtool/common.c | 1 + > net/ipv4/ip_output.c | 16 ++++++++++++---- > net/ipv6/ip6_output.c | 16 ++++++++++++---- > 13 files changed, 68 insertions(+), 11 deletions(-) > > diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst > index 5e93cd71f99f..93b0901e4e8e 100644 > --- a/Documentation/networking/timestamping.rst > +++ b/Documentation/networking/timestamping.rst > @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: > among all possibly concurrently outstanding timestamp requests for > that socket. > > + With this option enabled user-space application can provide custom > + ID for each message sent via UDP socket with control message with > + type set to SCM_TS_OPT_ID:: > + > + struct msghdr *msg; > + ... > + cmsg = CMSG_FIRSTHDR(msg); > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SO_TIMESTAMPING; > + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); > + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; > + err = sendmsg(fd, msg, 0); > + Please make it clear that this CMSG is optional. The process can optionally override the default generated ID, by passing a specific ID with control message SCM_TS_OPT_ID: > SOF_TIMESTAMPING_OPT_ID_TCP: > Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP > timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > index e94f621903fe..0698e6662cdf 100644 > --- a/arch/alpha/include/uapi/asm/socket.h > +++ b/arch/alpha/include/uapi/asm/socket.h > @@ -10,7 +10,7 @@ > * Note: we only bother about making the SOL_SOCKET options > * same as OSF/1, as that's all that "normal" programs are > * likely to set. We don't necessarily want to be binary > - * compatible with _everything_. > + * compatible with _everything_. Is this due to a checkpatch warning? If so, please add a brief comment to the commit message to show that this change is intentional. If not, please don't touch unrelated code. > */ > #define SOL_SOCKET 0xffff > > @@ -140,6 +140,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > index 60ebaed28a4c..bb3dc8feb205 100644 > --- a/arch/mips/include/uapi/asm/socket.h > +++ b/arch/mips/include/uapi/asm/socket.h > @@ -151,6 +151,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > index be264c2b1a11..c3ab3b3289eb 100644 > --- a/arch/parisc/include/uapi/asm/socket.h > +++ b/arch/parisc/include/uapi/asm/socket.h > @@ -132,6 +132,8 @@ > #define SO_PASSPIDFD 0x404A > #define SO_PEERPIDFD 0x404B > > +#define SCM_TS_OPT_ID 0x404C > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > index 682da3714686..9b40f0a57fbc 100644 > --- a/arch/sparc/include/uapi/asm/socket.h > +++ b/arch/sparc/include/uapi/asm/socket.h > @@ -133,6 +133,8 @@ > #define SO_PASSPIDFD 0x0055 > #define SO_PEERPIDFD 0x0056 > > +#define SCM_TS_OPT_ID 0x0057 > + > #if !defined(__KERNEL__) > > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > index 394c3b66065e..2161d50cf0fd 100644 > --- a/include/net/inet_sock.h > +++ b/include/net/inet_sock.h > @@ -174,6 +174,7 @@ struct inet_cork { > __s16 tos; > char priority; > __u16 gso_size; > + u32 ts_opt_id; > u64 transmit_time; > u32 mark; > }; > @@ -241,7 +242,8 @@ struct inet_sock { > struct inet_cork_full cork; > }; > > -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ typo: timestamp And maybe more relevant: /* ts_opt_id field is valid, overriding sk_tskey */ > enum { > INET_FLAGS_PKTINFO = 0, > diff --git a/include/net/sock.h b/include/net/sock.h > index f51d61fab059..73e21dad5660 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > u64 transmit_time; > u32 mark; > u32 tsflags; > + u32 ts_opt_id; > }; > > static inline void sockcm_init(struct sockcm_cookie *sockc, > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > index 8ce8a39a1e5f..db3df3e74b01 100644 > --- a/include/uapi/asm-generic/socket.h > +++ b/include/uapi/asm-generic/socket.h > @@ -135,6 +135,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 nit: different indentation > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..e2f145e3f3a1 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -32,8 +32,9 @@ enum { > SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), > > - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, > + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_CMSG, > SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | > SOF_TIMESTAMPING_LAST > }; > diff --git a/net/core/sock.c b/net/core/sock.c > index 468b1239606c..560b075765fa 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, > return -EINVAL; > sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); > break; > + case SCM_TS_OPT_ID: > + /* allow this option for UDP sockets only */ > + if (!sk_is_udp(sk)) > + return -EINVAL; Let's relax the restriction that this is only for UDP. At least to also support SOCK_RAW. I don't think that requires any additional code at all? Extending to TCP should be straightforward too, just a branch on sockc in tcp_tx_timestamp. If so, let's support all. It makes for a simpler API if it is supported uniformly wherever OPT_ID is. > + tsflags = READ_ONCE(sk->sk_tsflags); > + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) > + return -EINVAL; > + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) > + return -EINVAL; > + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); > + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; > + break; > /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ > case SCM_RIGHTS: > case SCM_CREDENTIALS:
On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: > > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > timestamps and packets sent via socket. Unfortunately, there is no way > to reliably predict socket timestamp ID value in case of error returned > by sendmsg. For UDP sockets it's impossible because of lockless > nature of UDP transmit, several threads may send packets in parallel. In > case of RAW sockets MSG_MORE option makes things complicated. More > details are in the conversation [1]. > This patch adds new control message type to give user-space > software an opportunity to control the mapping between packets and > values by providing ID with each sendmsg. This works fine for UDP > sockets only, and explicit check is added to control message parser. > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > --- > Documentation/networking/timestamping.rst | 14 ++++++++++++++ > arch/alpha/include/uapi/asm/socket.h | 4 +++- > arch/mips/include/uapi/asm/socket.h | 2 ++ > arch/parisc/include/uapi/asm/socket.h | 2 ++ > arch/sparc/include/uapi/asm/socket.h | 2 ++ > include/net/inet_sock.h | 4 +++- > include/net/sock.h | 1 + > include/uapi/asm-generic/socket.h | 2 ++ > include/uapi/linux/net_tstamp.h | 3 ++- > net/core/sock.c | 12 ++++++++++++ > net/ethtool/common.c | 1 + > net/ipv4/ip_output.c | 16 ++++++++++++---- > net/ipv6/ip6_output.c | 16 ++++++++++++---- > 13 files changed, 68 insertions(+), 11 deletions(-) > > diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst > index 5e93cd71f99f..93b0901e4e8e 100644 > --- a/Documentation/networking/timestamping.rst > +++ b/Documentation/networking/timestamping.rst > @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: > among all possibly concurrently outstanding timestamp requests for > that socket. > > + With this option enabled user-space application can provide custom > + ID for each message sent via UDP socket with control message with > + type set to SCM_TS_OPT_ID:: > + > + struct msghdr *msg; > + ... > + cmsg = CMSG_FIRSTHDR(msg); > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SO_TIMESTAMPING; > + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); > + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; > + err = sendmsg(fd, msg, 0); > + > + > SOF_TIMESTAMPING_OPT_ID_TCP: > Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP > timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > index e94f621903fe..0698e6662cdf 100644 > --- a/arch/alpha/include/uapi/asm/socket.h > +++ b/arch/alpha/include/uapi/asm/socket.h > @@ -10,7 +10,7 @@ > * Note: we only bother about making the SOL_SOCKET options > * same as OSF/1, as that's all that "normal" programs are > * likely to set. We don't necessarily want to be binary > - * compatible with _everything_. > + * compatible with _everything_. > */ > #define SOL_SOCKET 0xffff > > @@ -140,6 +140,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > index 60ebaed28a4c..bb3dc8feb205 100644 > --- a/arch/mips/include/uapi/asm/socket.h > +++ b/arch/mips/include/uapi/asm/socket.h > @@ -151,6 +151,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > index be264c2b1a11..c3ab3b3289eb 100644 > --- a/arch/parisc/include/uapi/asm/socket.h > +++ b/arch/parisc/include/uapi/asm/socket.h > @@ -132,6 +132,8 @@ > #define SO_PASSPIDFD 0x404A > #define SO_PEERPIDFD 0x404B > > +#define SCM_TS_OPT_ID 0x404C > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > index 682da3714686..9b40f0a57fbc 100644 > --- a/arch/sparc/include/uapi/asm/socket.h > +++ b/arch/sparc/include/uapi/asm/socket.h > @@ -133,6 +133,8 @@ > #define SO_PASSPIDFD 0x0055 > #define SO_PEERPIDFD 0x0056 > > +#define SCM_TS_OPT_ID 0x0057 > + > #if !defined(__KERNEL__) > > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > index 394c3b66065e..2161d50cf0fd 100644 > --- a/include/net/inet_sock.h > +++ b/include/net/inet_sock.h > @@ -174,6 +174,7 @@ struct inet_cork { > __s16 tos; > char priority; > __u16 gso_size; > + u32 ts_opt_id; > u64 transmit_time; > u32 mark; > }; > @@ -241,7 +242,8 @@ struct inet_sock { > struct inet_cork_full cork; > }; > > -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > > enum { > INET_FLAGS_PKTINFO = 0, > diff --git a/include/net/sock.h b/include/net/sock.h > index f51d61fab059..73e21dad5660 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > u64 transmit_time; > u32 mark; > u32 tsflags; > + u32 ts_opt_id; > }; > > static inline void sockcm_init(struct sockcm_cookie *sockc, > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > index 8ce8a39a1e5f..db3df3e74b01 100644 > --- a/include/uapi/asm-generic/socket.h > +++ b/include/uapi/asm-generic/socket.h > @@ -135,6 +135,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..e2f145e3f3a1 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -32,8 +32,9 @@ enum { > SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), I'm not sure if the new flag needs to be documented as well? After this patch, people may search the key word in the documentation file and then find nothing. If we have this flag here, normally it means we can pass it through setsockopt, so is it expected? If it's an exception, I reckon that we can forbid passing/setting this option in sock_set_timestamping() and document this rule? Thanks, Jason
Jason Xing wrote: > On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: > > > > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > > timestamps and packets sent via socket. Unfortunately, there is no way > > to reliably predict socket timestamp ID value in case of error returned > > by sendmsg. For UDP sockets it's impossible because of lockless > > nature of UDP transmit, several threads may send packets in parallel. In > > case of RAW sockets MSG_MORE option makes things complicated. More > > details are in the conversation [1]. > > This patch adds new control message type to give user-space > > software an opportunity to control the mapping between packets and > > values by providing ID with each sendmsg. This works fine for UDP > > sockets only, and explicit check is added to control message parser. > > > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > > --- > > Documentation/networking/timestamping.rst | 14 ++++++++++++++ > > arch/alpha/include/uapi/asm/socket.h | 4 +++- > > arch/mips/include/uapi/asm/socket.h | 2 ++ > > arch/parisc/include/uapi/asm/socket.h | 2 ++ > > arch/sparc/include/uapi/asm/socket.h | 2 ++ > > include/net/inet_sock.h | 4 +++- > > include/net/sock.h | 1 + > > include/uapi/asm-generic/socket.h | 2 ++ > > include/uapi/linux/net_tstamp.h | 3 ++- > > net/core/sock.c | 12 ++++++++++++ > > net/ethtool/common.c | 1 + > > net/ipv4/ip_output.c | 16 ++++++++++++---- > > net/ipv6/ip6_output.c | 16 ++++++++++++---- > > 13 files changed, 68 insertions(+), 11 deletions(-) > > > > diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst > > index 5e93cd71f99f..93b0901e4e8e 100644 > > --- a/Documentation/networking/timestamping.rst > > +++ b/Documentation/networking/timestamping.rst > > @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: > > among all possibly concurrently outstanding timestamp requests for > > that socket. > > > > + With this option enabled user-space application can provide custom > > + ID for each message sent via UDP socket with control message with > > + type set to SCM_TS_OPT_ID:: > > + > > + struct msghdr *msg; > > + ... > > + cmsg = CMSG_FIRSTHDR(msg); > > + cmsg->cmsg_level = SOL_SOCKET; > > + cmsg->cmsg_type = SO_TIMESTAMPING; > > + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); > > + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; > > + err = sendmsg(fd, msg, 0); > > + > > + > > SOF_TIMESTAMPING_OPT_ID_TCP: > > Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP > > timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the > > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > > index e94f621903fe..0698e6662cdf 100644 > > --- a/arch/alpha/include/uapi/asm/socket.h > > +++ b/arch/alpha/include/uapi/asm/socket.h > > @@ -10,7 +10,7 @@ > > * Note: we only bother about making the SOL_SOCKET options > > * same as OSF/1, as that's all that "normal" programs are > > * likely to set. We don't necessarily want to be binary > > - * compatible with _everything_. > > + * compatible with _everything_. > > */ > > #define SOL_SOCKET 0xffff > > > > @@ -140,6 +140,8 @@ > > #define SO_PASSPIDFD 76 > > #define SO_PEERPIDFD 77 > > > > +#define SCM_TS_OPT_ID 78 > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 > > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > > index 60ebaed28a4c..bb3dc8feb205 100644 > > --- a/arch/mips/include/uapi/asm/socket.h > > +++ b/arch/mips/include/uapi/asm/socket.h > > @@ -151,6 +151,8 @@ > > #define SO_PASSPIDFD 76 > > #define SO_PEERPIDFD 77 > > > > +#define SCM_TS_OPT_ID 78 > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 > > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > > index be264c2b1a11..c3ab3b3289eb 100644 > > --- a/arch/parisc/include/uapi/asm/socket.h > > +++ b/arch/parisc/include/uapi/asm/socket.h > > @@ -132,6 +132,8 @@ > > #define SO_PASSPIDFD 0x404A > > #define SO_PEERPIDFD 0x404B > > > > +#define SCM_TS_OPT_ID 0x404C > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 > > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > > index 682da3714686..9b40f0a57fbc 100644 > > --- a/arch/sparc/include/uapi/asm/socket.h > > +++ b/arch/sparc/include/uapi/asm/socket.h > > @@ -133,6 +133,8 @@ > > #define SO_PASSPIDFD 0x0055 > > #define SO_PEERPIDFD 0x0056 > > > > +#define SCM_TS_OPT_ID 0x0057 > > + > > #if !defined(__KERNEL__) > > > > > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > > index 394c3b66065e..2161d50cf0fd 100644 > > --- a/include/net/inet_sock.h > > +++ b/include/net/inet_sock.h > > @@ -174,6 +174,7 @@ struct inet_cork { > > __s16 tos; > > char priority; > > __u16 gso_size; > > + u32 ts_opt_id; > > u64 transmit_time; > > u32 mark; > > }; > > @@ -241,7 +242,8 @@ struct inet_sock { > > struct inet_cork_full cork; > > }; > > > > -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > > +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > > +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > > > > enum { > > INET_FLAGS_PKTINFO = 0, > > diff --git a/include/net/sock.h b/include/net/sock.h > > index f51d61fab059..73e21dad5660 100644 > > --- a/include/net/sock.h > > +++ b/include/net/sock.h > > @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > > u64 transmit_time; > > u32 mark; > > u32 tsflags; > > + u32 ts_opt_id; > > }; > > > > static inline void sockcm_init(struct sockcm_cookie *sockc, > > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > > index 8ce8a39a1e5f..db3df3e74b01 100644 > > --- a/include/uapi/asm-generic/socket.h > > +++ b/include/uapi/asm-generic/socket.h > > @@ -135,6 +135,8 @@ > > #define SO_PASSPIDFD 76 > > #define SO_PEERPIDFD 77 > > > > +#define SCM_TS_OPT_ID 78 > > + > > #if !defined(__KERNEL__) > > > > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > > index a2c66b3d7f0f..e2f145e3f3a1 100644 > > --- a/include/uapi/linux/net_tstamp.h > > +++ b/include/uapi/linux/net_tstamp.h > > @@ -32,8 +32,9 @@ enum { > > SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > > SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > > SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > > + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), > > I'm not sure if the new flag needs to be documented as well? After > this patch, people may search the key word in the documentation file > and then find nothing. > > If we have this flag here, normally it means we can pass it through > setsockopt, so is it expected? If it's an exception, I reckon that we > can forbid passing/setting this option in sock_set_timestamping() and > document this rule? Good point, thanks. It must definitely not be part of SOF_TIMESTAMPING_MASK. My bad for suggesting without giving it much thought. The bit is kernel-internal. No need to even mention it in user-facing documentation. But anyone reading net_tstamp.h might wonder what it does. It should not even be in a UAPI header, but in an internal one. Probably include/net/sock.h, near SK_FLAGS_TIMESTAMP. Maybe we can reserve bit 31 in u32 sk_tsflags. And if we ever have to double that flag size, it can move up to 63, as it is not UAPI in any way. This is a workaround to having a separate flags field in sockcm_cookie. And have a BUILD_BUG_ON if SOF_TIMESTAMPING_LAST reaches this reserved region.
On Mon, Sep 02, 2024 at 06:09:35AM -0700, Vadim Fedorenko wrote: > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > timestamps and packets sent via socket. Unfortunately, there is no way > to reliably predict socket timestamp ID value in case of error returned > by sendmsg. For UDP sockets it's impossible because of lockless > nature of UDP transmit, several threads may send packets in parallel. In > case of RAW sockets MSG_MORE option makes things complicated. More > details are in the conversation [1]. > This patch adds new control message type to give user-space > software an opportunity to control the mapping between packets and > values by providing ID with each sendmsg. This works fine for UDP > sockets only, and explicit check is added to control message parser. > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> ... > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c ... > @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, > flags &= ~MSG_SPLICE_PAGES; > } > > - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > - if (hold_tskey) > - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + if (cork->tx_flags & SKBTX_ANY_TSTAMP && > + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { > + if (cork->flags & IPCORK_TS_OPT_ID) { > + tskey = cork->ts_opt_id; > + } else { > + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + hold_tskey = true; Hi Vadim, I think that hold_tskey also needs to be assigned a value in the cases where wither of the if conditions above are false. Flagged by Smatch. > + } > + } > > /* > * Let's try using as much space as possible.
On 02/09/2024 19:38, Simon Horman wrote: > On Mon, Sep 02, 2024 at 06:09:35AM -0700, Vadim Fedorenko wrote: >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >> timestamps and packets sent via socket. Unfortunately, there is no way >> to reliably predict socket timestamp ID value in case of error returned >> by sendmsg. For UDP sockets it's impossible because of lockless >> nature of UDP transmit, several threads may send packets in parallel. In >> case of RAW sockets MSG_MORE option makes things complicated. More >> details are in the conversation [1]. >> This patch adds new control message type to give user-space >> software an opportunity to control the mapping between packets and >> values by providing ID with each sendmsg. This works fine for UDP >> sockets only, and explicit check is added to control message parser. >> >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >> >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > > ... > >> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c > > ... > >> @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, >> flags &= ~MSG_SPLICE_PAGES; >> } >> >> - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && >> - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; >> - if (hold_tskey) >> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; >> + if (cork->tx_flags & SKBTX_ANY_TSTAMP && >> + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { >> + if (cork->flags & IPCORK_TS_OPT_ID) { >> + tskey = cork->ts_opt_id; >> + } else { >> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; >> + hold_tskey = true; > > Hi Vadim, > > I think that hold_tskey also needs to be assigned a value in > the cases where wither of the if conditions above are false. Hi Simon! Yes, you are right. I should probably init it with false to avoid 'else' statement. Thanks, Vadim > Flagged by Smatch. > >> + } >> + } >> >> /* >> * Let's try using as much space as possible. >
On 02/09/2024 16:51, Willem de Bruijn wrote: > Jason Xing wrote: >> On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: >>> >>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >>> timestamps and packets sent via socket. Unfortunately, there is no way >>> to reliably predict socket timestamp ID value in case of error returned >>> by sendmsg. For UDP sockets it's impossible because of lockless >>> nature of UDP transmit, several threads may send packets in parallel. In >>> case of RAW sockets MSG_MORE option makes things complicated. More >>> details are in the conversation [1]. >>> This patch adds new control message type to give user-space >>> software an opportunity to control the mapping between packets and >>> values by providing ID with each sendmsg. This works fine for UDP >>> sockets only, and explicit check is added to control message parser. >>> >>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >>> >>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >>> --- >>> Documentation/networking/timestamping.rst | 14 ++++++++++++++ >>> arch/alpha/include/uapi/asm/socket.h | 4 +++- >>> arch/mips/include/uapi/asm/socket.h | 2 ++ >>> arch/parisc/include/uapi/asm/socket.h | 2 ++ >>> arch/sparc/include/uapi/asm/socket.h | 2 ++ >>> include/net/inet_sock.h | 4 +++- >>> include/net/sock.h | 1 + >>> include/uapi/asm-generic/socket.h | 2 ++ >>> include/uapi/linux/net_tstamp.h | 3 ++- >>> net/core/sock.c | 12 ++++++++++++ >>> net/ethtool/common.c | 1 + >>> net/ipv4/ip_output.c | 16 ++++++++++++---- >>> net/ipv6/ip6_output.c | 16 ++++++++++++---- >>> 13 files changed, 68 insertions(+), 11 deletions(-) >>> >>> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst >>> index 5e93cd71f99f..93b0901e4e8e 100644 >>> --- a/Documentation/networking/timestamping.rst >>> +++ b/Documentation/networking/timestamping.rst >>> @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: >>> among all possibly concurrently outstanding timestamp requests for >>> that socket. >>> >>> + With this option enabled user-space application can provide custom >>> + ID for each message sent via UDP socket with control message with >>> + type set to SCM_TS_OPT_ID:: >>> + >>> + struct msghdr *msg; >>> + ... >>> + cmsg = CMSG_FIRSTHDR(msg); >>> + cmsg->cmsg_level = SOL_SOCKET; >>> + cmsg->cmsg_type = SO_TIMESTAMPING; >>> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); >>> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; >>> + err = sendmsg(fd, msg, 0); >>> + >>> + >>> SOF_TIMESTAMPING_OPT_ID_TCP: >>> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP >>> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the >>> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h >>> index e94f621903fe..0698e6662cdf 100644 >>> --- a/arch/alpha/include/uapi/asm/socket.h >>> +++ b/arch/alpha/include/uapi/asm/socket.h >>> @@ -10,7 +10,7 @@ >>> * Note: we only bother about making the SOL_SOCKET options >>> * same as OSF/1, as that's all that "normal" programs are >>> * likely to set. We don't necessarily want to be binary >>> - * compatible with _everything_. >>> + * compatible with _everything_. >>> */ >>> #define SOL_SOCKET 0xffff >>> >>> @@ -140,6 +140,8 @@ >>> #define SO_PASSPIDFD 76 >>> #define SO_PEERPIDFD 77 >>> >>> +#define SCM_TS_OPT_ID 78 >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 >>> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h >>> index 60ebaed28a4c..bb3dc8feb205 100644 >>> --- a/arch/mips/include/uapi/asm/socket.h >>> +++ b/arch/mips/include/uapi/asm/socket.h >>> @@ -151,6 +151,8 @@ >>> #define SO_PASSPIDFD 76 >>> #define SO_PEERPIDFD 77 >>> >>> +#define SCM_TS_OPT_ID 78 >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 >>> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h >>> index be264c2b1a11..c3ab3b3289eb 100644 >>> --- a/arch/parisc/include/uapi/asm/socket.h >>> +++ b/arch/parisc/include/uapi/asm/socket.h >>> @@ -132,6 +132,8 @@ >>> #define SO_PASSPIDFD 0x404A >>> #define SO_PEERPIDFD 0x404B >>> >>> +#define SCM_TS_OPT_ID 0x404C >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 >>> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h >>> index 682da3714686..9b40f0a57fbc 100644 >>> --- a/arch/sparc/include/uapi/asm/socket.h >>> +++ b/arch/sparc/include/uapi/asm/socket.h >>> @@ -133,6 +133,8 @@ >>> #define SO_PASSPIDFD 0x0055 >>> #define SO_PEERPIDFD 0x0056 >>> >>> +#define SCM_TS_OPT_ID 0x0057 >>> + >>> #if !defined(__KERNEL__) >>> >>> >>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >>> index 394c3b66065e..2161d50cf0fd 100644 >>> --- a/include/net/inet_sock.h >>> +++ b/include/net/inet_sock.h >>> @@ -174,6 +174,7 @@ struct inet_cork { >>> __s16 tos; >>> char priority; >>> __u16 gso_size; >>> + u32 ts_opt_id; >>> u64 transmit_time; >>> u32 mark; >>> }; >>> @@ -241,7 +242,8 @@ struct inet_sock { >>> struct inet_cork_full cork; >>> }; >>> >>> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ >>> >>> enum { >>> INET_FLAGS_PKTINFO = 0, >>> diff --git a/include/net/sock.h b/include/net/sock.h >>> index f51d61fab059..73e21dad5660 100644 >>> --- a/include/net/sock.h >>> +++ b/include/net/sock.h >>> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { >>> u64 transmit_time; >>> u32 mark; >>> u32 tsflags; >>> + u32 ts_opt_id; >>> }; >>> >>> static inline void sockcm_init(struct sockcm_cookie *sockc, >>> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h >>> index 8ce8a39a1e5f..db3df3e74b01 100644 >>> --- a/include/uapi/asm-generic/socket.h >>> +++ b/include/uapi/asm-generic/socket.h >>> @@ -135,6 +135,8 @@ >>> #define SO_PASSPIDFD 76 >>> #define SO_PEERPIDFD 77 >>> >>> +#define SCM_TS_OPT_ID 78 >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) >>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >>> index a2c66b3d7f0f..e2f145e3f3a1 100644 >>> --- a/include/uapi/linux/net_tstamp.h >>> +++ b/include/uapi/linux/net_tstamp.h >>> @@ -32,8 +32,9 @@ enum { >>> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), >>> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), >>> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), >>> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), >> >> I'm not sure if the new flag needs to be documented as well? After >> this patch, people may search the key word in the documentation file >> and then find nothing. >> >> If we have this flag here, normally it means we can pass it through >> setsockopt, so is it expected? If it's an exception, I reckon that we >> can forbid passing/setting this option in sock_set_timestamping() and >> document this rule? > > Good point, thanks. > > It must definitely not be part of SOF_TIMESTAMPING_MASK. My bad for > suggesting without giving it much thought. > > The bit is kernel-internal. No need to even mention it in user-facing > documentation. But anyone reading net_tstamp.h might wonder what it > does. > > It should not even be in a UAPI header, but in an internal one. > Probably include/net/sock.h, near SK_FLAGS_TIMESTAMP. > > Maybe we can reserve bit 31 in u32 sk_tsflags. And if we ever have > to double that flag size, it can move up to 63, as it is not UAPI in > any way. This is a workaround to having a separate flags field in > sockcm_cookie. > > And have a BUILD_BUG_ON if SOF_TIMESTAMPING_LAST reaches this reserved > region. Yeah, I was also thinking of it not being UAPI, that's why I tried to avoid it in my RFC using 0 as a reserved value. Do you think SK_FLAGS_CMSG_TS_OPT_ID is good naming for it? Thanks, Vadim
Vadim Fedorenko wrote: > On 02/09/2024 16:51, Willem de Bruijn wrote: > > Jason Xing wrote: > >> On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: > >>> > >>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > >>> timestamps and packets sent via socket. Unfortunately, there is no way > >>> to reliably predict socket timestamp ID value in case of error returned > >>> by sendmsg. For UDP sockets it's impossible because of lockless > >>> nature of UDP transmit, several threads may send packets in parallel. In > >>> case of RAW sockets MSG_MORE option makes things complicated. More > >>> details are in the conversation [1]. > >>> This patch adds new control message type to give user-space > >>> software an opportunity to control the mapping between packets and > >>> values by providing ID with each sendmsg. This works fine for UDP > >>> sockets only, and explicit check is added to control message parser. > >>> > >>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > >>> > >>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > >>> --- > >>> Documentation/networking/timestamping.rst | 14 ++++++++++++++ > >>> arch/alpha/include/uapi/asm/socket.h | 4 +++- > >>> arch/mips/include/uapi/asm/socket.h | 2 ++ > >>> arch/parisc/include/uapi/asm/socket.h | 2 ++ > >>> arch/sparc/include/uapi/asm/socket.h | 2 ++ > >>> include/net/inet_sock.h | 4 +++- > >>> include/net/sock.h | 1 + > >>> include/uapi/asm-generic/socket.h | 2 ++ > >>> include/uapi/linux/net_tstamp.h | 3 ++- > >>> net/core/sock.c | 12 ++++++++++++ > >>> net/ethtool/common.c | 1 + > >>> net/ipv4/ip_output.c | 16 ++++++++++++---- > >>> net/ipv6/ip6_output.c | 16 ++++++++++++---- > >>> 13 files changed, 68 insertions(+), 11 deletions(-) > >>> > >>> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst > >>> index 5e93cd71f99f..93b0901e4e8e 100644 > >>> --- a/Documentation/networking/timestamping.rst > >>> +++ b/Documentation/networking/timestamping.rst > >>> @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: > >>> among all possibly concurrently outstanding timestamp requests for > >>> that socket. > >>> > >>> + With this option enabled user-space application can provide custom > >>> + ID for each message sent via UDP socket with control message with > >>> + type set to SCM_TS_OPT_ID:: > >>> + > >>> + struct msghdr *msg; > >>> + ... > >>> + cmsg = CMSG_FIRSTHDR(msg); > >>> + cmsg->cmsg_level = SOL_SOCKET; > >>> + cmsg->cmsg_type = SO_TIMESTAMPING; > >>> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); > >>> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; > >>> + err = sendmsg(fd, msg, 0); > >>> + > >>> + > >>> SOF_TIMESTAMPING_OPT_ID_TCP: > >>> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP > >>> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the > >>> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > >>> index e94f621903fe..0698e6662cdf 100644 > >>> --- a/arch/alpha/include/uapi/asm/socket.h > >>> +++ b/arch/alpha/include/uapi/asm/socket.h > >>> @@ -10,7 +10,7 @@ > >>> * Note: we only bother about making the SOL_SOCKET options > >>> * same as OSF/1, as that's all that "normal" programs are > >>> * likely to set. We don't necessarily want to be binary > >>> - * compatible with _everything_. > >>> + * compatible with _everything_. > >>> */ > >>> #define SOL_SOCKET 0xffff > >>> > >>> @@ -140,6 +140,8 @@ > >>> #define SO_PASSPIDFD 76 > >>> #define SO_PEERPIDFD 77 > >>> > >>> +#define SCM_TS_OPT_ID 78 > >>> + > >>> #if !defined(__KERNEL__) > >>> > >>> #if __BITS_PER_LONG == 64 > >>> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > >>> index 60ebaed28a4c..bb3dc8feb205 100644 > >>> --- a/arch/mips/include/uapi/asm/socket.h > >>> +++ b/arch/mips/include/uapi/asm/socket.h > >>> @@ -151,6 +151,8 @@ > >>> #define SO_PASSPIDFD 76 > >>> #define SO_PEERPIDFD 77 > >>> > >>> +#define SCM_TS_OPT_ID 78 > >>> + > >>> #if !defined(__KERNEL__) > >>> > >>> #if __BITS_PER_LONG == 64 > >>> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > >>> index be264c2b1a11..c3ab3b3289eb 100644 > >>> --- a/arch/parisc/include/uapi/asm/socket.h > >>> +++ b/arch/parisc/include/uapi/asm/socket.h > >>> @@ -132,6 +132,8 @@ > >>> #define SO_PASSPIDFD 0x404A > >>> #define SO_PEERPIDFD 0x404B > >>> > >>> +#define SCM_TS_OPT_ID 0x404C > >>> + > >>> #if !defined(__KERNEL__) > >>> > >>> #if __BITS_PER_LONG == 64 > >>> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > >>> index 682da3714686..9b40f0a57fbc 100644 > >>> --- a/arch/sparc/include/uapi/asm/socket.h > >>> +++ b/arch/sparc/include/uapi/asm/socket.h > >>> @@ -133,6 +133,8 @@ > >>> #define SO_PASSPIDFD 0x0055 > >>> #define SO_PEERPIDFD 0x0056 > >>> > >>> +#define SCM_TS_OPT_ID 0x0057 > >>> + > >>> #if !defined(__KERNEL__) > >>> > >>> > >>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > >>> index 394c3b66065e..2161d50cf0fd 100644 > >>> --- a/include/net/inet_sock.h > >>> +++ b/include/net/inet_sock.h > >>> @@ -174,6 +174,7 @@ struct inet_cork { > >>> __s16 tos; > >>> char priority; > >>> __u16 gso_size; > >>> + u32 ts_opt_id; > >>> u64 transmit_time; > >>> u32 mark; > >>> }; > >>> @@ -241,7 +242,8 @@ struct inet_sock { > >>> struct inet_cork_full cork; > >>> }; > >>> > >>> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > >>> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > >>> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > >>> > >>> enum { > >>> INET_FLAGS_PKTINFO = 0, > >>> diff --git a/include/net/sock.h b/include/net/sock.h > >>> index f51d61fab059..73e21dad5660 100644 > >>> --- a/include/net/sock.h > >>> +++ b/include/net/sock.h > >>> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > >>> u64 transmit_time; > >>> u32 mark; > >>> u32 tsflags; > >>> + u32 ts_opt_id; > >>> }; > >>> > >>> static inline void sockcm_init(struct sockcm_cookie *sockc, > >>> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > >>> index 8ce8a39a1e5f..db3df3e74b01 100644 > >>> --- a/include/uapi/asm-generic/socket.h > >>> +++ b/include/uapi/asm-generic/socket.h > >>> @@ -135,6 +135,8 @@ > >>> #define SO_PASSPIDFD 76 > >>> #define SO_PEERPIDFD 77 > >>> > >>> +#define SCM_TS_OPT_ID 78 > >>> + > >>> #if !defined(__KERNEL__) > >>> > >>> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > >>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > >>> index a2c66b3d7f0f..e2f145e3f3a1 100644 > >>> --- a/include/uapi/linux/net_tstamp.h > >>> +++ b/include/uapi/linux/net_tstamp.h > >>> @@ -32,8 +32,9 @@ enum { > >>> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > >>> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > >>> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > >>> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), > >> > >> I'm not sure if the new flag needs to be documented as well? After > >> this patch, people may search the key word in the documentation file > >> and then find nothing. > >> > >> If we have this flag here, normally it means we can pass it through > >> setsockopt, so is it expected? If it's an exception, I reckon that we > >> can forbid passing/setting this option in sock_set_timestamping() and > >> document this rule? > > > > Good point, thanks. > > > > It must definitely not be part of SOF_TIMESTAMPING_MASK. My bad for > > suggesting without giving it much thought. > > > > The bit is kernel-internal. No need to even mention it in user-facing > > documentation. But anyone reading net_tstamp.h might wonder what it > > does. > > > > It should not even be in a UAPI header, but in an internal one. > > Probably include/net/sock.h, near SK_FLAGS_TIMESTAMP. > > > > Maybe we can reserve bit 31 in u32 sk_tsflags. And if we ever have > > to double that flag size, it can move up to 63, as it is not UAPI in > > any way. This is a workaround to having a separate flags field in > > sockcm_cookie. > > > > And have a BUILD_BUG_ON if SOF_TIMESTAMPING_LAST reaches this reserved > > region. > > Yeah, I was also thinking of it not being UAPI, that's why I tried to > avoid it in my RFC using 0 as a reserved value. Do you think > SK_FLAGS_CMSG_TS_OPT_ID is good naming for it? It's relevant only to sockcm_cookie, so maybe SOCKCM_FLAG_TS_OPT_ID? One day we'll need another sockcm_cookie flag, we'll grow it to add a real flags field and can get rid of this hack. The struct is used embedded in ipcm_cookie and ipcm6_cookie, which would grow as a result. It is also simply stack allocated in cases like performance sensitive tcp_sendmsg_locked. Here, the main cost is a slightly more expensive zeroing in sockcm_init. For now, I think we should just stick with using the highest bit in sockcm.tsflags.
On 02/09/2024 15:29, Willem de Bruijn wrote: > Vadim Fedorenko wrote: >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >> timestamps and packets sent via socket. Unfortunately, there is no way >> to reliably predict socket timestamp ID value in case of error returned >> by sendmsg. For UDP sockets it's impossible because of lockless >> nature of UDP transmit, several threads may send packets in parallel. In >> case of RAW sockets MSG_MORE option makes things complicated. More >> details are in the conversation [1]. >> This patch adds new control message type to give user-space >> software an opportunity to control the mapping between packets and >> values by providing ID with each sendmsg. This works fine for UDP >> sockets only, and explicit check is added to control message parser. >> >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >> >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >> --- >> Documentation/networking/timestamping.rst | 14 ++++++++++++++ >> arch/alpha/include/uapi/asm/socket.h | 4 +++- >> arch/mips/include/uapi/asm/socket.h | 2 ++ >> arch/parisc/include/uapi/asm/socket.h | 2 ++ >> arch/sparc/include/uapi/asm/socket.h | 2 ++ >> include/net/inet_sock.h | 4 +++- >> include/net/sock.h | 1 + >> include/uapi/asm-generic/socket.h | 2 ++ >> include/uapi/linux/net_tstamp.h | 3 ++- >> net/core/sock.c | 12 ++++++++++++ >> net/ethtool/common.c | 1 + >> net/ipv4/ip_output.c | 16 ++++++++++++---- >> net/ipv6/ip6_output.c | 16 ++++++++++++---- >> 13 files changed, 68 insertions(+), 11 deletions(-) >> >> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst >> index 5e93cd71f99f..93b0901e4e8e 100644 >> --- a/Documentation/networking/timestamping.rst >> +++ b/Documentation/networking/timestamping.rst >> @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: >> among all possibly concurrently outstanding timestamp requests for >> that socket. >> >> + With this option enabled user-space application can provide custom >> + ID for each message sent via UDP socket with control message with >> + type set to SCM_TS_OPT_ID:: >> + >> + struct msghdr *msg; >> + ... >> + cmsg = CMSG_FIRSTHDR(msg); >> + cmsg->cmsg_level = SOL_SOCKET; >> + cmsg->cmsg_type = SO_TIMESTAMPING; >> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); >> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; >> + err = sendmsg(fd, msg, 0); >> + > > Please make it clear that this CMSG is optional. > > The process can optionally override the default generated ID, by > passing a specific ID with control message SCM_TS_OPT_ID: Ok, I'll re-phrase it this way, thanks! >> SOF_TIMESTAMPING_OPT_ID_TCP: >> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP >> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the >> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h >> index e94f621903fe..0698e6662cdf 100644 >> --- a/arch/alpha/include/uapi/asm/socket.h >> +++ b/arch/alpha/include/uapi/asm/socket.h >> @@ -10,7 +10,7 @@ >> * Note: we only bother about making the SOL_SOCKET options >> * same as OSF/1, as that's all that "normal" programs are >> * likely to set. We don't necessarily want to be binary >> - * compatible with _everything_. >> + * compatible with _everything_. > > Is this due to a checkpatch warning? If so, please add a brief comment > to the commit message to show that this change is intentional. If not, > please don't touch unrelated code. I'll remove it, because it looks like it was some unhappy linter... >> */ >> #define SOL_SOCKET 0xffff >> >> @@ -140,6 +140,8 @@ >> #define SO_PASSPIDFD 76 >> #define SO_PEERPIDFD 77 >> >> +#define SCM_TS_OPT_ID 78 >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 >> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h >> index 60ebaed28a4c..bb3dc8feb205 100644 >> --- a/arch/mips/include/uapi/asm/socket.h >> +++ b/arch/mips/include/uapi/asm/socket.h >> @@ -151,6 +151,8 @@ >> #define SO_PASSPIDFD 76 >> #define SO_PEERPIDFD 77 >> >> +#define SCM_TS_OPT_ID 78 >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 >> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h >> index be264c2b1a11..c3ab3b3289eb 100644 >> --- a/arch/parisc/include/uapi/asm/socket.h >> +++ b/arch/parisc/include/uapi/asm/socket.h >> @@ -132,6 +132,8 @@ >> #define SO_PASSPIDFD 0x404A >> #define SO_PEERPIDFD 0x404B >> >> +#define SCM_TS_OPT_ID 0x404C >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 >> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h >> index 682da3714686..9b40f0a57fbc 100644 >> --- a/arch/sparc/include/uapi/asm/socket.h >> +++ b/arch/sparc/include/uapi/asm/socket.h >> @@ -133,6 +133,8 @@ >> #define SO_PASSPIDFD 0x0055 >> #define SO_PEERPIDFD 0x0056 >> >> +#define SCM_TS_OPT_ID 0x0057 >> + >> #if !defined(__KERNEL__) >> >> >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >> index 394c3b66065e..2161d50cf0fd 100644 >> --- a/include/net/inet_sock.h >> +++ b/include/net/inet_sock.h >> @@ -174,6 +174,7 @@ struct inet_cork { >> __s16 tos; >> char priority; >> __u16 gso_size; >> + u32 ts_opt_id; >> u64 transmit_time; >> u32 mark; >> }; >> @@ -241,7 +242,8 @@ struct inet_sock { >> struct inet_cork_full cork; >> }; >> >> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > > typo: timestamp > > And maybe more relevant: /* ts_opt_id field is valid, overriding sk_tskey */ I'll change it >> enum { >> INET_FLAGS_PKTINFO = 0, >> diff --git a/include/net/sock.h b/include/net/sock.h >> index f51d61fab059..73e21dad5660 100644 >> --- a/include/net/sock.h >> +++ b/include/net/sock.h >> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { >> u64 transmit_time; >> u32 mark; >> u32 tsflags; >> + u32 ts_opt_id; >> }; >> >> static inline void sockcm_init(struct sockcm_cookie *sockc, >> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h >> index 8ce8a39a1e5f..db3df3e74b01 100644 >> --- a/include/uapi/asm-generic/socket.h >> +++ b/include/uapi/asm-generic/socket.h >> @@ -135,6 +135,8 @@ >> #define SO_PASSPIDFD 76 >> #define SO_PEERPIDFD 77 >> >> +#define SCM_TS_OPT_ID 78 > > nit: different indentation Hmm... that's interesting, it's ok in the code, there no spaces before #define. I'll re-check it in the patch in v3. >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >> index a2c66b3d7f0f..e2f145e3f3a1 100644 >> --- a/include/uapi/linux/net_tstamp.h >> +++ b/include/uapi/linux/net_tstamp.h >> @@ -32,8 +32,9 @@ enum { >> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), >> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), >> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), >> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), >> >> - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, >> + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_CMSG, >> SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | >> SOF_TIMESTAMPING_LAST >> }; >> diff --git a/net/core/sock.c b/net/core/sock.c >> index 468b1239606c..560b075765fa 100644 >> --- a/net/core/sock.c >> +++ b/net/core/sock.c >> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, >> return -EINVAL; >> sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); >> break; >> + case SCM_TS_OPT_ID: >> + /* allow this option for UDP sockets only */ >> + if (!sk_is_udp(sk)) >> + return -EINVAL; > > Let's relax the restriction that this is only for UDP. > > At least to also support SOCK_RAW. I don't think that requires any > additional code at all? RAW sockets use skb_setup_tx_timestamps which does atomic operation of incrementing sk_tskey when in _sock_tx_timestamp. So I'll have to convert all spots (can, ipv4/raw, ipv6/raw, 3 x af_packet) to use the same logic as in udp path (sock_tx_timestamp) and add conditions. Or change skb_setup_tx_timestamps to do the logic and take different arguments. It may look as a big refactoring, so I would like to make it as a follow-up series. > Extending to TCP should be straightforward too, just a branch > on sockc in tcp_tx_timestamp. TCP part looks a bit easier, as you said, I have to adjust tcp_tx_timestamp and the logic is straightforward. I still have to provide a pointer to sock coockie instead of flags, but there is only one caller of this function and it's much easier than with RAW sockets. > If so, let's support all. It makes for a simpler API if it is > supported uniformly wherever OPT_ID is. If you think that the way I explained for RAW sockets is good enough, I can send all of them as a single patcheset. Otherwise I would like to add RAW sockets in follow-up series. >> + tsflags = READ_ONCE(sk->sk_tsflags); >> + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) >> + return -EINVAL; >> + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) >> + return -EINVAL; >> + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); >> + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; >> + break; >> /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ >> case SCM_RIGHTS: >> case SCM_CREDENTIALS:
On 02/09/2024 21:59, Willem de Bruijn wrote: > Vadim Fedorenko wrote: >> On 02/09/2024 16:51, Willem de Bruijn wrote: >>> Jason Xing wrote: >>>> On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: >>>>> >>>>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >>>>> timestamps and packets sent via socket. Unfortunately, there is no way >>>>> to reliably predict socket timestamp ID value in case of error returned >>>>> by sendmsg. For UDP sockets it's impossible because of lockless >>>>> nature of UDP transmit, several threads may send packets in parallel. In >>>>> case of RAW sockets MSG_MORE option makes things complicated. More >>>>> details are in the conversation [1]. >>>>> This patch adds new control message type to give user-space >>>>> software an opportunity to control the mapping between packets and >>>>> values by providing ID with each sendmsg. This works fine for UDP >>>>> sockets only, and explicit check is added to control message parser. >>>>> >>>>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >>>>> >>>>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >>>>> --- >>>>> Documentation/networking/timestamping.rst | 14 ++++++++++++++ >>>>> arch/alpha/include/uapi/asm/socket.h | 4 +++- >>>>> arch/mips/include/uapi/asm/socket.h | 2 ++ >>>>> arch/parisc/include/uapi/asm/socket.h | 2 ++ >>>>> arch/sparc/include/uapi/asm/socket.h | 2 ++ >>>>> include/net/inet_sock.h | 4 +++- >>>>> include/net/sock.h | 1 + >>>>> include/uapi/asm-generic/socket.h | 2 ++ >>>>> include/uapi/linux/net_tstamp.h | 3 ++- >>>>> net/core/sock.c | 12 ++++++++++++ >>>>> net/ethtool/common.c | 1 + >>>>> net/ipv4/ip_output.c | 16 ++++++++++++---- >>>>> net/ipv6/ip6_output.c | 16 ++++++++++++---- >>>>> 13 files changed, 68 insertions(+), 11 deletions(-) >>>>> >>>>> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst >>>>> index 5e93cd71f99f..93b0901e4e8e 100644 >>>>> --- a/Documentation/networking/timestamping.rst >>>>> +++ b/Documentation/networking/timestamping.rst >>>>> @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: >>>>> among all possibly concurrently outstanding timestamp requests for >>>>> that socket. >>>>> >>>>> + With this option enabled user-space application can provide custom >>>>> + ID for each message sent via UDP socket with control message with >>>>> + type set to SCM_TS_OPT_ID:: >>>>> + >>>>> + struct msghdr *msg; >>>>> + ... >>>>> + cmsg = CMSG_FIRSTHDR(msg); >>>>> + cmsg->cmsg_level = SOL_SOCKET; >>>>> + cmsg->cmsg_type = SO_TIMESTAMPING; >>>>> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); >>>>> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; >>>>> + err = sendmsg(fd, msg, 0); >>>>> + >>>>> + >>>>> SOF_TIMESTAMPING_OPT_ID_TCP: >>>>> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP >>>>> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the >>>>> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h >>>>> index e94f621903fe..0698e6662cdf 100644 >>>>> --- a/arch/alpha/include/uapi/asm/socket.h >>>>> +++ b/arch/alpha/include/uapi/asm/socket.h >>>>> @@ -10,7 +10,7 @@ >>>>> * Note: we only bother about making the SOL_SOCKET options >>>>> * same as OSF/1, as that's all that "normal" programs are >>>>> * likely to set. We don't necessarily want to be binary >>>>> - * compatible with _everything_. >>>>> + * compatible with _everything_. >>>>> */ >>>>> #define SOL_SOCKET 0xffff >>>>> >>>>> @@ -140,6 +140,8 @@ >>>>> #define SO_PASSPIDFD 76 >>>>> #define SO_PEERPIDFD 77 >>>>> >>>>> +#define SCM_TS_OPT_ID 78 >>>>> + >>>>> #if !defined(__KERNEL__) >>>>> >>>>> #if __BITS_PER_LONG == 64 >>>>> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h >>>>> index 60ebaed28a4c..bb3dc8feb205 100644 >>>>> --- a/arch/mips/include/uapi/asm/socket.h >>>>> +++ b/arch/mips/include/uapi/asm/socket.h >>>>> @@ -151,6 +151,8 @@ >>>>> #define SO_PASSPIDFD 76 >>>>> #define SO_PEERPIDFD 77 >>>>> >>>>> +#define SCM_TS_OPT_ID 78 >>>>> + >>>>> #if !defined(__KERNEL__) >>>>> >>>>> #if __BITS_PER_LONG == 64 >>>>> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h >>>>> index be264c2b1a11..c3ab3b3289eb 100644 >>>>> --- a/arch/parisc/include/uapi/asm/socket.h >>>>> +++ b/arch/parisc/include/uapi/asm/socket.h >>>>> @@ -132,6 +132,8 @@ >>>>> #define SO_PASSPIDFD 0x404A >>>>> #define SO_PEERPIDFD 0x404B >>>>> >>>>> +#define SCM_TS_OPT_ID 0x404C >>>>> + >>>>> #if !defined(__KERNEL__) >>>>> >>>>> #if __BITS_PER_LONG == 64 >>>>> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h >>>>> index 682da3714686..9b40f0a57fbc 100644 >>>>> --- a/arch/sparc/include/uapi/asm/socket.h >>>>> +++ b/arch/sparc/include/uapi/asm/socket.h >>>>> @@ -133,6 +133,8 @@ >>>>> #define SO_PASSPIDFD 0x0055 >>>>> #define SO_PEERPIDFD 0x0056 >>>>> >>>>> +#define SCM_TS_OPT_ID 0x0057 >>>>> + >>>>> #if !defined(__KERNEL__) >>>>> >>>>> >>>>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >>>>> index 394c3b66065e..2161d50cf0fd 100644 >>>>> --- a/include/net/inet_sock.h >>>>> +++ b/include/net/inet_sock.h >>>>> @@ -174,6 +174,7 @@ struct inet_cork { >>>>> __s16 tos; >>>>> char priority; >>>>> __u16 gso_size; >>>>> + u32 ts_opt_id; >>>>> u64 transmit_time; >>>>> u32 mark; >>>>> }; >>>>> @@ -241,7 +242,8 @@ struct inet_sock { >>>>> struct inet_cork_full cork; >>>>> }; >>>>> >>>>> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>>>> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>>>> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ >>>>> >>>>> enum { >>>>> INET_FLAGS_PKTINFO = 0, >>>>> diff --git a/include/net/sock.h b/include/net/sock.h >>>>> index f51d61fab059..73e21dad5660 100644 >>>>> --- a/include/net/sock.h >>>>> +++ b/include/net/sock.h >>>>> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { >>>>> u64 transmit_time; >>>>> u32 mark; >>>>> u32 tsflags; >>>>> + u32 ts_opt_id; >>>>> }; >>>>> >>>>> static inline void sockcm_init(struct sockcm_cookie *sockc, >>>>> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h >>>>> index 8ce8a39a1e5f..db3df3e74b01 100644 >>>>> --- a/include/uapi/asm-generic/socket.h >>>>> +++ b/include/uapi/asm-generic/socket.h >>>>> @@ -135,6 +135,8 @@ >>>>> #define SO_PASSPIDFD 76 >>>>> #define SO_PEERPIDFD 77 >>>>> >>>>> +#define SCM_TS_OPT_ID 78 >>>>> + >>>>> #if !defined(__KERNEL__) >>>>> >>>>> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) >>>>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >>>>> index a2c66b3d7f0f..e2f145e3f3a1 100644 >>>>> --- a/include/uapi/linux/net_tstamp.h >>>>> +++ b/include/uapi/linux/net_tstamp.h >>>>> @@ -32,8 +32,9 @@ enum { >>>>> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), >>>>> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), >>>>> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), >>>>> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), >>>> >>>> I'm not sure if the new flag needs to be documented as well? After >>>> this patch, people may search the key word in the documentation file >>>> and then find nothing. >>>> >>>> If we have this flag here, normally it means we can pass it through >>>> setsockopt, so is it expected? If it's an exception, I reckon that we >>>> can forbid passing/setting this option in sock_set_timestamping() and >>>> document this rule? >>> >>> Good point, thanks. >>> >>> It must definitely not be part of SOF_TIMESTAMPING_MASK. My bad for >>> suggesting without giving it much thought. >>> >>> The bit is kernel-internal. No need to even mention it in user-facing >>> documentation. But anyone reading net_tstamp.h might wonder what it >>> does. >>> >>> It should not even be in a UAPI header, but in an internal one. >>> Probably include/net/sock.h, near SK_FLAGS_TIMESTAMP. >>> >>> Maybe we can reserve bit 31 in u32 sk_tsflags. And if we ever have >>> to double that flag size, it can move up to 63, as it is not UAPI in >>> any way. This is a workaround to having a separate flags field in >>> sockcm_cookie. >>> >>> And have a BUILD_BUG_ON if SOF_TIMESTAMPING_LAST reaches this reserved >>> region. >> >> Yeah, I was also thinking of it not being UAPI, that's why I tried to >> avoid it in my RFC using 0 as a reserved value. Do you think >> SK_FLAGS_CMSG_TS_OPT_ID is good naming for it? > > It's relevant only to sockcm_cookie, so maybe SOCKCM_FLAG_TS_OPT_ID? > > One day we'll need another sockcm_cookie flag, we'll grow it to add a > real flags field and can get rid of this hack. > > The struct is used embedded in ipcm_cookie and ipcm6_cookie, which > would grow as a result. > > It is also simply stack allocated in cases like performance sensitive > tcp_sendmsg_locked. Here, the main cost is a slightly more expensive > zeroing in sockcm_init. > > For now, I think we should just stick with using the highest bit in > sockcm.tsflags. I totally agree with using the highest bit in sockcm.tsflags for now. I was just wondering about the name as naming is always the hardest part. SOCKCM_FLAG_TS_OPT_ID looks good and reasonable, I'll use it. Thanks, Vadim
Hi Vadim, kernel test robot noticed the following build warnings: url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-txtimestamp-add-SCM_TS_OPT_ID-test/20240902-212008 base: net-next/main patch link: https://lore.kernel.org/r/20240902130937.457115-1-vadfed%40meta.com patch subject: [PATCH net-next v2 1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message config: csky-randconfig-r072-20240903 (https://download.01.org/0day-ci/archive/20240903/202409031142.3dSuW9Oo-lkp@intel.com/config) compiler: csky-linux-gcc (GCC) 14.1.0 If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Reported-by: Dan Carpenter <dan.carpenter@linaro.org> | Closes: https://lore.kernel.org/r/202409031142.3dSuW9Oo-lkp@intel.com/ smatch warnings: net/ipv4/ip_output.c:1284 __ip_append_data() error: uninitialized symbol 'hold_tskey'. vim +/hold_tskey +1284 net/ipv4/ip_output.c f5fca608651129 David S. Miller 2011-05-08 952 static int __ip_append_data(struct sock *sk, f5fca608651129 David S. Miller 2011-05-08 953 struct flowi4 *fl4, f5fca608651129 David S. Miller 2011-05-08 954 struct sk_buff_head *queue, 1470ddf7f8cecf Herbert Xu 2011-03-01 955 struct inet_cork *cork, 5640f7685831e0 Eric Dumazet 2012-09-23 956 struct page_frag *pfrag, 1470ddf7f8cecf Herbert Xu 2011-03-01 957 int getfrag(void *from, char *to, int offset, 1470ddf7f8cecf Herbert Xu 2011-03-01 958 int len, int odd, struct sk_buff *skb), ^1da177e4c3f41 Linus Torvalds 2005-04-16 959 void *from, int length, int transhdrlen, ^1da177e4c3f41 Linus Torvalds 2005-04-16 960 unsigned int flags) ^1da177e4c3f41 Linus Torvalds 2005-04-16 961 { ^1da177e4c3f41 Linus Torvalds 2005-04-16 962 struct inet_sock *inet = inet_sk(sk); b5947e5d1e710c Willem de Bruijn 2018-11-30 963 struct ubuf_info *uarg = NULL; ^1da177e4c3f41 Linus Torvalds 2005-04-16 964 struct sk_buff *skb; 07df5294a753df Herbert Xu 2011-03-01 965 struct ip_options *opt = cork->opt; ^1da177e4c3f41 Linus Torvalds 2005-04-16 966 int hh_len; ^1da177e4c3f41 Linus Torvalds 2005-04-16 967 int exthdrlen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 968 int mtu; ^1da177e4c3f41 Linus Torvalds 2005-04-16 969 int copy; ^1da177e4c3f41 Linus Torvalds 2005-04-16 970 int err; ^1da177e4c3f41 Linus Torvalds 2005-04-16 971 int offset = 0; 8eb77cc73977d8 Pavel Begunkov 2022-07-12 972 bool zc = false; daba287b299ec7 Hannes Frederic Sowa 2013-10-27 973 unsigned int maxfraglen, fragheaderlen, maxnonfragsize; ^1da177e4c3f41 Linus Torvalds 2005-04-16 974 int csummode = CHECKSUM_NONE; 05d6d492097c55 Eric Dumazet 2024-04-29 975 struct rtable *rt = dst_rtable(cork->dst); 488b6d91b07112 Vadim Fedorenko 2024-02-13 976 bool paged, hold_tskey, extra_uref = false; 694aba690de062 Eric Dumazet 2018-03-31 977 unsigned int wmem_alloc_delta = 0; 09c2d251b70723 Willem de Bruijn 2014-08-04 978 u32 tskey = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 979 96d7303e9cfb6a Steffen Klassert 2011-06-05 980 skb = skb_peek_tail(queue); 96d7303e9cfb6a Steffen Klassert 2011-06-05 981 96d7303e9cfb6a Steffen Klassert 2011-06-05 982 exthdrlen = !skb ? rt->dst.header_len : 0; bec1f6f697362c Willem de Bruijn 2018-04-26 983 mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; 15e36f5b8e982d Willem de Bruijn 2018-04-26 984 paged = !!cork->gso_size; bec1f6f697362c Willem de Bruijn 2018-04-26 985 d8d1f30b95a635 Changli Gao 2010-06-10 986 hh_len = LL_RESERVED_SPACE(rt->dst.dev); ^1da177e4c3f41 Linus Torvalds 2005-04-16 987 ^1da177e4c3f41 Linus Torvalds 2005-04-16 988 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); ^1da177e4c3f41 Linus Torvalds 2005-04-16 989 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; cbc08a33126f8f Miaohe Lin 2020-08-29 990 maxnonfragsize = ip_sk_ignore_df(sk) ? IP_MAX_MTU : mtu; ^1da177e4c3f41 Linus Torvalds 2005-04-16 991 daba287b299ec7 Hannes Frederic Sowa 2013-10-27 992 if (cork->length + length > maxnonfragsize - fragheaderlen) { f5fca608651129 David S. Miller 2011-05-08 993 ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, 61e7f09d0f437c Hannes Frederic Sowa 2013-12-19 994 mtu - (opt ? opt->optlen : 0)); ^1da177e4c3f41 Linus Torvalds 2005-04-16 995 return -EMSGSIZE; ^1da177e4c3f41 Linus Torvalds 2005-04-16 996 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 997 ^1da177e4c3f41 Linus Torvalds 2005-04-16 998 /* ^1da177e4c3f41 Linus Torvalds 2005-04-16 999 * transhdrlen > 0 means that this is the first fragment and we wish ^1da177e4c3f41 Linus Torvalds 2005-04-16 1000 * it won't be fragmented in the future. ^1da177e4c3f41 Linus Torvalds 2005-04-16 1001 */ ^1da177e4c3f41 Linus Torvalds 2005-04-16 1002 if (transhdrlen && ^1da177e4c3f41 Linus Torvalds 2005-04-16 1003 length + fragheaderlen <= mtu && c8cd0989bd151f Tom Herbert 2015-12-14 1004 rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && bec1f6f697362c Willem de Bruijn 2018-04-26 1005 (!(flags & MSG_MORE) || cork->gso_size) && cd027a5433d667 Jacek Kalwas 2018-04-12 1006 (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) 84fa7933a33f80 Patrick McHardy 2006-08-29 1007 csummode = CHECKSUM_PARTIAL; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1008 c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1009 if ((flags & MSG_ZEROCOPY) && length) { c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1010 struct msghdr *msg = from; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1011 c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1012 if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1013 if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1014 return -EINVAL; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1015 c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1016 /* Leave uarg NULL if can't zerocopy, callers should c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1017 * be able to handle it. c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1018 */ c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1019 if ((rt->dst.dev->features & NETIF_F_SG) && c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1020 csummode == CHECKSUM_PARTIAL) { c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1021 paged = true; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1022 zc = true; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1023 uarg = msg->msg_ubuf; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1024 } c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1025 } else if (sock_flag(sk, SOCK_ZEROCOPY)) { 8c793822c5803e Jonathan Lemon 2021-01-06 1026 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); b5947e5d1e710c Willem de Bruijn 2018-11-30 1027 if (!uarg) b5947e5d1e710c Willem de Bruijn 2018-11-30 1028 return -ENOBUFS; 522924b583082f Willem de Bruijn 2019-06-07 1029 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ b5947e5d1e710c Willem de Bruijn 2018-11-30 1030 if (rt->dst.dev->features & NETIF_F_SG && b5947e5d1e710c Willem de Bruijn 2018-11-30 1031 csummode == CHECKSUM_PARTIAL) { b5947e5d1e710c Willem de Bruijn 2018-11-30 1032 paged = true; 8eb77cc73977d8 Pavel Begunkov 2022-07-12 1033 zc = true; b5947e5d1e710c Willem de Bruijn 2018-11-30 1034 } else { e7d2b510165fff Pavel Begunkov 2022-09-23 1035 uarg_to_msgzc(uarg)->zerocopy = 0; 52900d22288e7d Willem de Bruijn 2018-11-30 1036 skb_zcopy_set(skb, uarg, &extra_uref); b5947e5d1e710c Willem de Bruijn 2018-11-30 1037 } b5947e5d1e710c Willem de Bruijn 2018-11-30 1038 } 7da0dde68486b2 David Howells 2023-05-22 1039 } else if ((flags & MSG_SPLICE_PAGES) && length) { cafbe182a467bf Eric Dumazet 2023-08-16 1040 if (inet_test_bit(HDRINCL, sk)) 7da0dde68486b2 David Howells 2023-05-22 1041 return -EPERM; 5a6f6873606e03 David Howells 2023-06-14 1042 if (rt->dst.dev->features & NETIF_F_SG && 5a6f6873606e03 David Howells 2023-06-14 1043 getfrag == ip_generic_getfrag) 7da0dde68486b2 David Howells 2023-05-22 1044 /* We need an empty buffer to attach stuff to */ 7da0dde68486b2 David Howells 2023-05-22 1045 paged = true; 7da0dde68486b2 David Howells 2023-05-22 1046 else 7da0dde68486b2 David Howells 2023-05-22 1047 flags &= ~MSG_SPLICE_PAGES; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1048 } b5947e5d1e710c Willem de Bruijn 2018-11-30 1049 1470ddf7f8cecf Herbert Xu 2011-03-01 1050 cork->length += length; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1051 b7399073687728 Vadim Fedorenko 2024-09-02 1052 if (cork->tx_flags & SKBTX_ANY_TSTAMP && b7399073687728 Vadim Fedorenko 2024-09-02 1053 READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { b7399073687728 Vadim Fedorenko 2024-09-02 1054 if (cork->flags & IPCORK_TS_OPT_ID) { b7399073687728 Vadim Fedorenko 2024-09-02 1055 tskey = cork->ts_opt_id; b7399073687728 Vadim Fedorenko 2024-09-02 1056 } else { 488b6d91b07112 Vadim Fedorenko 2024-02-13 1057 tskey = atomic_inc_return(&sk->sk_tskey) - 1; b7399073687728 Vadim Fedorenko 2024-09-02 1058 hold_tskey = true; hold_tskey is never set to false. b7399073687728 Vadim Fedorenko 2024-09-02 1059 } b7399073687728 Vadim Fedorenko 2024-09-02 1060 } 488b6d91b07112 Vadim Fedorenko 2024-02-13 1061 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1062 /* So, what's going on in the loop below? ^1da177e4c3f41 Linus Torvalds 2005-04-16 1063 * ^1da177e4c3f41 Linus Torvalds 2005-04-16 1064 * We use calculated fragment length to generate chained skb, ^1da177e4c3f41 Linus Torvalds 2005-04-16 1065 * each of segments is IP fragment ready for sending to network after ^1da177e4c3f41 Linus Torvalds 2005-04-16 1066 * adding appropriate IP header. ^1da177e4c3f41 Linus Torvalds 2005-04-16 1067 */ ^1da177e4c3f41 Linus Torvalds 2005-04-16 1068 26cde9f7e2747b Herbert Xu 2010-06-15 1069 if (!skb) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1070 goto alloc_new_skb; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1071 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1072 while (length > 0) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1073 /* Check if the remaining data fits into current packet. */ ^1da177e4c3f41 Linus Torvalds 2005-04-16 1074 copy = mtu - skb->len; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1075 if (copy < length) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1076 copy = maxfraglen - skb->len; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1077 if (copy <= 0) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1078 char *data; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1079 unsigned int datalen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1080 unsigned int fraglen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1081 unsigned int fraggap; 6d123b81ac6150 Jakub Kicinski 2021-06-23 1082 unsigned int alloclen, alloc_extra; aba36930a35e7f Willem de Bruijn 2018-11-24 1083 unsigned int pagedlen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1084 struct sk_buff *skb_prev; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1085 alloc_new_skb: ^1da177e4c3f41 Linus Torvalds 2005-04-16 1086 skb_prev = skb; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1087 if (skb_prev) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1088 fraggap = skb_prev->len - maxfraglen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1089 else ^1da177e4c3f41 Linus Torvalds 2005-04-16 1090 fraggap = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1091 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1092 /* ^1da177e4c3f41 Linus Torvalds 2005-04-16 1093 * If remaining data exceeds the mtu, ^1da177e4c3f41 Linus Torvalds 2005-04-16 1094 * we know we need more fragment(s). ^1da177e4c3f41 Linus Torvalds 2005-04-16 1095 */ ^1da177e4c3f41 Linus Torvalds 2005-04-16 1096 datalen = length + fraggap; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1097 if (datalen > mtu - fragheaderlen) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1098 datalen = maxfraglen - fragheaderlen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1099 fraglen = datalen + fragheaderlen; aba36930a35e7f Willem de Bruijn 2018-11-24 1100 pagedlen = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1101 6d123b81ac6150 Jakub Kicinski 2021-06-23 1102 alloc_extra = hh_len + 15; 6d123b81ac6150 Jakub Kicinski 2021-06-23 1103 alloc_extra += exthdrlen; 6d123b81ac6150 Jakub Kicinski 2021-06-23 1104 6d123b81ac6150 Jakub Kicinski 2021-06-23 1105 /* The last fragment gets additional space at tail. 6d123b81ac6150 Jakub Kicinski 2021-06-23 1106 * Note, with MSG_MORE we overallocate on fragments, 6d123b81ac6150 Jakub Kicinski 2021-06-23 1107 * because we have no idea what fragment will be 6d123b81ac6150 Jakub Kicinski 2021-06-23 1108 * the last. 6d123b81ac6150 Jakub Kicinski 2021-06-23 1109 */ 6d123b81ac6150 Jakub Kicinski 2021-06-23 1110 if (datalen == length + fraggap) 6d123b81ac6150 Jakub Kicinski 2021-06-23 1111 alloc_extra += rt->dst.trailer_len; 6d123b81ac6150 Jakub Kicinski 2021-06-23 1112 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1113 if ((flags & MSG_MORE) && d8d1f30b95a635 Changli Gao 2010-06-10 1114 !(rt->dst.dev->features&NETIF_F_SG)) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1115 alloclen = mtu; 6d123b81ac6150 Jakub Kicinski 2021-06-23 1116 else if (!paged && 6d123b81ac6150 Jakub Kicinski 2021-06-23 1117 (fraglen + alloc_extra < SKB_MAX_ALLOC || 6d123b81ac6150 Jakub Kicinski 2021-06-23 1118 !(rt->dst.dev->features & NETIF_F_SG))) 59104f062435c7 Eric Dumazet 2010-09-20 1119 alloclen = fraglen; 47cf88993c9108 Pavel Begunkov 2022-08-25 1120 else { 8eb77cc73977d8 Pavel Begunkov 2022-07-12 1121 alloclen = fragheaderlen + transhdrlen; 8eb77cc73977d8 Pavel Begunkov 2022-07-12 1122 pagedlen = datalen - transhdrlen; 15e36f5b8e982d Willem de Bruijn 2018-04-26 1123 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 1124 6d123b81ac6150 Jakub Kicinski 2021-06-23 1125 alloclen += alloc_extra; 33f99dc7fd948b Steffen Klassert 2011-06-22 1126 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1127 if (transhdrlen) { 6d123b81ac6150 Jakub Kicinski 2021-06-23 1128 skb = sock_alloc_send_skb(sk, alloclen, ^1da177e4c3f41 Linus Torvalds 2005-04-16 1129 (flags & MSG_DONTWAIT), &err); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1130 } else { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1131 skb = NULL; 694aba690de062 Eric Dumazet 2018-03-31 1132 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= ^1da177e4c3f41 Linus Torvalds 2005-04-16 1133 2 * sk->sk_sndbuf) 6d123b81ac6150 Jakub Kicinski 2021-06-23 1134 skb = alloc_skb(alloclen, ^1da177e4c3f41 Linus Torvalds 2005-04-16 1135 sk->sk_allocation); 51456b2914a34d Ian Morris 2015-04-03 1136 if (unlikely(!skb)) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1137 err = -ENOBUFS; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1138 } 51456b2914a34d Ian Morris 2015-04-03 1139 if (!skb) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1140 goto error; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1141 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1142 /* ^1da177e4c3f41 Linus Torvalds 2005-04-16 1143 * Fill in the control structures ^1da177e4c3f41 Linus Torvalds 2005-04-16 1144 */ ^1da177e4c3f41 Linus Torvalds 2005-04-16 1145 skb->ip_summed = csummode; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1146 skb->csum = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1147 skb_reserve(skb, hh_len); 11878b40ed5c5b Willem de Bruijn 2014-07-14 1148 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1149 /* ^1da177e4c3f41 Linus Torvalds 2005-04-16 1150 * Find where to start putting bytes. ^1da177e4c3f41 Linus Torvalds 2005-04-16 1151 */ 15e36f5b8e982d Willem de Bruijn 2018-04-26 1152 data = skb_put(skb, fraglen + exthdrlen - pagedlen); c14d2450cb7fe1 Arnaldo Carvalho de Melo 2007-03-11 1153 skb_set_network_header(skb, exthdrlen); b0e380b1d8a8e0 Arnaldo Carvalho de Melo 2007-04-10 1154 skb->transport_header = (skb->network_header + b0e380b1d8a8e0 Arnaldo Carvalho de Melo 2007-04-10 1155 fragheaderlen); 353e5c9abd900d Steffen Klassert 2011-06-22 1156 data += fragheaderlen + exthdrlen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1157 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1158 if (fraggap) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1159 skb->csum = skb_copy_and_csum_bits( ^1da177e4c3f41 Linus Torvalds 2005-04-16 1160 skb_prev, maxfraglen, 8d5930dfb7edbf Al Viro 2020-07-10 1161 data + transhdrlen, fraggap); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1162 skb_prev->csum = csum_sub(skb_prev->csum, ^1da177e4c3f41 Linus Torvalds 2005-04-16 1163 skb->csum); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1164 data += fraggap; e9fa4f7bd291c2 Herbert Xu 2006-08-13 1165 pskb_trim_unique(skb_prev, maxfraglen); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1166 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 1167 15e36f5b8e982d Willem de Bruijn 2018-04-26 1168 copy = datalen - transhdrlen - fraggap - pagedlen; 0f71c9caf26726 David Howells 2023-08-01 1169 /* [!] NOTE: copy will be negative if pagedlen>0 0f71c9caf26726 David Howells 2023-08-01 1170 * because then the equation reduces to -fraggap. 0f71c9caf26726 David Howells 2023-08-01 1171 */ ^1da177e4c3f41 Linus Torvalds 2005-04-16 1172 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1173 err = -EFAULT; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1174 kfree_skb(skb); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1175 goto error; 0f71c9caf26726 David Howells 2023-08-01 1176 } else if (flags & MSG_SPLICE_PAGES) { 0f71c9caf26726 David Howells 2023-08-01 1177 copy = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1178 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 1179 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1180 offset += copy; 15e36f5b8e982d Willem de Bruijn 2018-04-26 1181 length -= copy + transhdrlen; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1182 transhdrlen = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1183 exthdrlen = 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1184 csummode = CHECKSUM_NONE; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1185 52900d22288e7d Willem de Bruijn 2018-11-30 1186 /* only the initial fragment is time stamped */ 52900d22288e7d Willem de Bruijn 2018-11-30 1187 skb_shinfo(skb)->tx_flags = cork->tx_flags; 52900d22288e7d Willem de Bruijn 2018-11-30 1188 cork->tx_flags = 0; 52900d22288e7d Willem de Bruijn 2018-11-30 1189 skb_shinfo(skb)->tskey = tskey; 52900d22288e7d Willem de Bruijn 2018-11-30 1190 tskey = 0; 52900d22288e7d Willem de Bruijn 2018-11-30 1191 skb_zcopy_set(skb, uarg, &extra_uref); 52900d22288e7d Willem de Bruijn 2018-11-30 1192 0dec879f636f11 Julian Anastasov 2017-02-06 1193 if ((flags & MSG_CONFIRM) && !skb_prev) 0dec879f636f11 Julian Anastasov 2017-02-06 1194 skb_set_dst_pending_confirm(skb, 1); 0dec879f636f11 Julian Anastasov 2017-02-06 1195 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1196 /* ^1da177e4c3f41 Linus Torvalds 2005-04-16 1197 * Put the packet on the pending queue. ^1da177e4c3f41 Linus Torvalds 2005-04-16 1198 */ 694aba690de062 Eric Dumazet 2018-03-31 1199 if (!skb->destructor) { 694aba690de062 Eric Dumazet 2018-03-31 1200 skb->destructor = sock_wfree; 694aba690de062 Eric Dumazet 2018-03-31 1201 skb->sk = sk; 694aba690de062 Eric Dumazet 2018-03-31 1202 wmem_alloc_delta += skb->truesize; 694aba690de062 Eric Dumazet 2018-03-31 1203 } 1470ddf7f8cecf Herbert Xu 2011-03-01 1204 __skb_queue_tail(queue, skb); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1205 continue; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1206 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 1207 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1208 if (copy > length) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1209 copy = length; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1210 113f99c3358564 Willem de Bruijn 2018-05-17 1211 if (!(rt->dst.dev->features&NETIF_F_SG) && 113f99c3358564 Willem de Bruijn 2018-05-17 1212 skb_tailroom(skb) >= copy) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1213 unsigned int off; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1214 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1215 off = skb->len; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1216 if (getfrag(from, skb_put(skb, copy), ^1da177e4c3f41 Linus Torvalds 2005-04-16 1217 offset, copy, off, skb) < 0) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1218 __skb_trim(skb, off); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1219 err = -EFAULT; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1220 goto error; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1221 } 7da0dde68486b2 David Howells 2023-05-22 1222 } else if (flags & MSG_SPLICE_PAGES) { 7da0dde68486b2 David Howells 2023-05-22 1223 struct msghdr *msg = from; 7da0dde68486b2 David Howells 2023-05-22 1224 0f71c9caf26726 David Howells 2023-08-01 1225 err = -EIO; 0f71c9caf26726 David Howells 2023-08-01 1226 if (WARN_ON_ONCE(copy > msg->msg_iter.count)) 0f71c9caf26726 David Howells 2023-08-01 1227 goto error; 0f71c9caf26726 David Howells 2023-08-01 1228 7da0dde68486b2 David Howells 2023-05-22 1229 err = skb_splice_from_iter(skb, &msg->msg_iter, copy, 7da0dde68486b2 David Howells 2023-05-22 1230 sk->sk_allocation); 7da0dde68486b2 David Howells 2023-05-22 1231 if (err < 0) 7da0dde68486b2 David Howells 2023-05-22 1232 goto error; 7da0dde68486b2 David Howells 2023-05-22 1233 copy = err; 7da0dde68486b2 David Howells 2023-05-22 1234 wmem_alloc_delta += copy; c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1235 } else if (!zc) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1236 int i = skb_shinfo(skb)->nr_frags; 5640f7685831e0 Eric Dumazet 2012-09-23 1237 ^1da177e4c3f41 Linus Torvalds 2005-04-16 1238 err = -ENOMEM; 5640f7685831e0 Eric Dumazet 2012-09-23 1239 if (!sk_page_frag_refill(sk, pfrag)) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1240 goto error; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1241 c445f31b3cfaa0 Pavel Begunkov 2022-07-12 1242 skb_zcopy_downgrade_managed(skb); 5640f7685831e0 Eric Dumazet 2012-09-23 1243 if (!skb_can_coalesce(skb, i, pfrag->page, 5640f7685831e0 Eric Dumazet 2012-09-23 1244 pfrag->offset)) { ^1da177e4c3f41 Linus Torvalds 2005-04-16 1245 err = -EMSGSIZE; 5640f7685831e0 Eric Dumazet 2012-09-23 1246 if (i == MAX_SKB_FRAGS) ^1da177e4c3f41 Linus Torvalds 2005-04-16 1247 goto error; 5640f7685831e0 Eric Dumazet 2012-09-23 1248 5640f7685831e0 Eric Dumazet 2012-09-23 1249 __skb_fill_page_desc(skb, i, pfrag->page, 5640f7685831e0 Eric Dumazet 2012-09-23 1250 pfrag->offset, 0); 5640f7685831e0 Eric Dumazet 2012-09-23 1251 skb_shinfo(skb)->nr_frags = ++i; 5640f7685831e0 Eric Dumazet 2012-09-23 1252 get_page(pfrag->page); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1253 } 5640f7685831e0 Eric Dumazet 2012-09-23 1254 copy = min_t(int, copy, pfrag->size - pfrag->offset); 5640f7685831e0 Eric Dumazet 2012-09-23 1255 if (getfrag(from, 5640f7685831e0 Eric Dumazet 2012-09-23 1256 page_address(pfrag->page) + pfrag->offset, 5640f7685831e0 Eric Dumazet 2012-09-23 1257 offset, copy, skb->len, skb) < 0) 5640f7685831e0 Eric Dumazet 2012-09-23 1258 goto error_efault; 5640f7685831e0 Eric Dumazet 2012-09-23 1259 5640f7685831e0 Eric Dumazet 2012-09-23 1260 pfrag->offset += copy; 5640f7685831e0 Eric Dumazet 2012-09-23 1261 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); ede57d58e6f38d Richard Gobert 2022-06-22 1262 skb_len_add(skb, copy); 694aba690de062 Eric Dumazet 2018-03-31 1263 wmem_alloc_delta += copy; b5947e5d1e710c Willem de Bruijn 2018-11-30 1264 } else { b5947e5d1e710c Willem de Bruijn 2018-11-30 1265 err = skb_zerocopy_iter_dgram(skb, from, copy); b5947e5d1e710c Willem de Bruijn 2018-11-30 1266 if (err < 0) b5947e5d1e710c Willem de Bruijn 2018-11-30 1267 goto error; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1268 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 1269 offset += copy; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1270 length -= copy; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1271 } ^1da177e4c3f41 Linus Torvalds 2005-04-16 1272 9e8445a56c253f Paolo Abeni 2018-04-04 1273 if (wmem_alloc_delta) 694aba690de062 Eric Dumazet 2018-03-31 1274 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1275 return 0; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1276 5640f7685831e0 Eric Dumazet 2012-09-23 1277 error_efault: 5640f7685831e0 Eric Dumazet 2012-09-23 1278 err = -EFAULT; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1279 error: 8e0449172497a9 Jonathan Lemon 2021-01-06 1280 net_zcopy_put_abort(uarg, extra_uref); 1470ddf7f8cecf Herbert Xu 2011-03-01 1281 cork->length -= length; 5e38e270444f26 Pavel Emelyanov 2008-07-16 1282 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); 694aba690de062 Eric Dumazet 2018-03-31 1283 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); 488b6d91b07112 Vadim Fedorenko 2024-02-13 @1284 if (hold_tskey) 488b6d91b07112 Vadim Fedorenko 2024-02-13 1285 atomic_dec(&sk->sk_tskey); ^1da177e4c3f41 Linus Torvalds 2005-04-16 1286 return err; ^1da177e4c3f41 Linus Torvalds 2005-04-16 1287 }
On 03/09/2024 09:06, Dan Carpenter wrote: > Hi Vadim, > > kernel test robot noticed the following build warnings: > > url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-txtimestamp-add-SCM_TS_OPT_ID-test/20240902-212008 > base: net-next/main > patch link: https://lore.kernel.org/r/20240902130937.457115-1-vadfed%40meta.com > patch subject: [PATCH net-next v2 1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message > config: csky-randconfig-r072-20240903 (https://download.01.org/0day-ci/archive/20240903/202409031142.3dSuW9Oo-lkp@intel.com/config) > compiler: csky-linux-gcc (GCC) 14.1.0 > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Reported-by: Dan Carpenter <dan.carpenter@linaro.org> > | Closes: https://lore.kernel.org/r/202409031142.3dSuW9Oo-lkp@intel.com/ > > smatch warnings: > net/ipv4/ip_output.c:1284 __ip_append_data() error: uninitialized symbol 'hold_tskey'. > > vim +/hold_tskey +1284 net/ipv4/ip_output.c > [.. snip ..] > ^1da177e4c3f41 Linus Torvalds 2005-04-16 1051 > b7399073687728 Vadim Fedorenko 2024-09-02 1052 if (cork->tx_flags & SKBTX_ANY_TSTAMP && > b7399073687728 Vadim Fedorenko 2024-09-02 1053 READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { > b7399073687728 Vadim Fedorenko 2024-09-02 1054 if (cork->flags & IPCORK_TS_OPT_ID) { > b7399073687728 Vadim Fedorenko 2024-09-02 1055 tskey = cork->ts_opt_id; > b7399073687728 Vadim Fedorenko 2024-09-02 1056 } else { > 488b6d91b07112 Vadim Fedorenko 2024-02-13 1057 tskey = atomic_inc_return(&sk->sk_tskey) - 1; > b7399073687728 Vadim Fedorenko 2024-09-02 1058 hold_tskey = true; > > hold_tskey is never set to false. Hi Dan, This was already flagged by Simon, I'll fix it the next version. Thanks, Vadim
On Mon, Sep 02, 2024 at 08:35:26PM +0100, Vadim Fedorenko wrote: > On 02/09/2024 19:38, Simon Horman wrote: > > On Mon, Sep 02, 2024 at 06:09:35AM -0700, Vadim Fedorenko wrote: > > > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > > > timestamps and packets sent via socket. Unfortunately, there is no way > > > to reliably predict socket timestamp ID value in case of error returned > > > by sendmsg. For UDP sockets it's impossible because of lockless > > > nature of UDP transmit, several threads may send packets in parallel. In > > > case of RAW sockets MSG_MORE option makes things complicated. More > > > details are in the conversation [1]. > > > This patch adds new control message type to give user-space > > > software an opportunity to control the mapping between packets and > > > values by providing ID with each sendmsg. This works fine for UDP > > > sockets only, and explicit check is added to control message parser. > > > > > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > > > > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > > > > ... > > > > > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c > > > > ... > > > > > @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, > > > flags &= ~MSG_SPLICE_PAGES; > > > } > > > - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > > > - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > > > - if (hold_tskey) > > > - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > > > + if (cork->tx_flags & SKBTX_ANY_TSTAMP && > > > + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { > > > + if (cork->flags & IPCORK_TS_OPT_ID) { > > > + tskey = cork->ts_opt_id; > > > + } else { > > > + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > > > + hold_tskey = true; > > > > Hi Vadim, > > > > I think that hold_tskey also needs to be assigned a value in > > the cases where wither of the if conditions above are false. > > Hi Simon! > > Yes, you are right. I should probably init it with false to avoid > 'else' statement. Thanks, I agree that seems like a good approach.
On 02/09/2024 16:51, Willem de Bruijn wrote: > Jason Xing wrote: >> On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: >>> >>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >>> timestamps and packets sent via socket. Unfortunately, there is no way >>> to reliably predict socket timestamp ID value in case of error returned >>> by sendmsg. For UDP sockets it's impossible because of lockless >>> nature of UDP transmit, several threads may send packets in parallel. In >>> case of RAW sockets MSG_MORE option makes things complicated. More >>> details are in the conversation [1]. >>> This patch adds new control message type to give user-space >>> software an opportunity to control the mapping between packets and >>> values by providing ID with each sendmsg. This works fine for UDP >>> sockets only, and explicit check is added to control message parser. >>> >>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >>> >>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >>> --- >>> Documentation/networking/timestamping.rst | 14 ++++++++++++++ >>> arch/alpha/include/uapi/asm/socket.h | 4 +++- >>> arch/mips/include/uapi/asm/socket.h | 2 ++ >>> arch/parisc/include/uapi/asm/socket.h | 2 ++ >>> arch/sparc/include/uapi/asm/socket.h | 2 ++ >>> include/net/inet_sock.h | 4 +++- >>> include/net/sock.h | 1 + >>> include/uapi/asm-generic/socket.h | 2 ++ >>> include/uapi/linux/net_tstamp.h | 3 ++- >>> net/core/sock.c | 12 ++++++++++++ >>> net/ethtool/common.c | 1 + >>> net/ipv4/ip_output.c | 16 ++++++++++++---- >>> net/ipv6/ip6_output.c | 16 ++++++++++++---- >>> 13 files changed, 68 insertions(+), 11 deletions(-) >>> >>> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst >>> index 5e93cd71f99f..93b0901e4e8e 100644 >>> --- a/Documentation/networking/timestamping.rst >>> +++ b/Documentation/networking/timestamping.rst >>> @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: >>> among all possibly concurrently outstanding timestamp requests for >>> that socket. >>> >>> + With this option enabled user-space application can provide custom >>> + ID for each message sent via UDP socket with control message with >>> + type set to SCM_TS_OPT_ID:: >>> + >>> + struct msghdr *msg; >>> + ... >>> + cmsg = CMSG_FIRSTHDR(msg); >>> + cmsg->cmsg_level = SOL_SOCKET; >>> + cmsg->cmsg_type = SO_TIMESTAMPING; >>> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); >>> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; >>> + err = sendmsg(fd, msg, 0); >>> + >>> + >>> SOF_TIMESTAMPING_OPT_ID_TCP: >>> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP >>> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the >>> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h >>> index e94f621903fe..0698e6662cdf 100644 >>> --- a/arch/alpha/include/uapi/asm/socket.h >>> +++ b/arch/alpha/include/uapi/asm/socket.h >>> @@ -10,7 +10,7 @@ >>> * Note: we only bother about making the SOL_SOCKET options >>> * same as OSF/1, as that's all that "normal" programs are >>> * likely to set. We don't necessarily want to be binary >>> - * compatible with _everything_. >>> + * compatible with _everything_. >>> */ >>> #define SOL_SOCKET 0xffff >>> >>> @@ -140,6 +140,8 @@ >>> #define SO_PASSPIDFD 76 >>> #define SO_PEERPIDFD 77 >>> >>> +#define SCM_TS_OPT_ID 78 >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 >>> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h >>> index 60ebaed28a4c..bb3dc8feb205 100644 >>> --- a/arch/mips/include/uapi/asm/socket.h >>> +++ b/arch/mips/include/uapi/asm/socket.h >>> @@ -151,6 +151,8 @@ >>> #define SO_PASSPIDFD 76 >>> #define SO_PEERPIDFD 77 >>> >>> +#define SCM_TS_OPT_ID 78 >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 >>> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h >>> index be264c2b1a11..c3ab3b3289eb 100644 >>> --- a/arch/parisc/include/uapi/asm/socket.h >>> +++ b/arch/parisc/include/uapi/asm/socket.h >>> @@ -132,6 +132,8 @@ >>> #define SO_PASSPIDFD 0x404A >>> #define SO_PEERPIDFD 0x404B >>> >>> +#define SCM_TS_OPT_ID 0x404C >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 >>> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h >>> index 682da3714686..9b40f0a57fbc 100644 >>> --- a/arch/sparc/include/uapi/asm/socket.h >>> +++ b/arch/sparc/include/uapi/asm/socket.h >>> @@ -133,6 +133,8 @@ >>> #define SO_PASSPIDFD 0x0055 >>> #define SO_PEERPIDFD 0x0056 >>> >>> +#define SCM_TS_OPT_ID 0x0057 >>> + >>> #if !defined(__KERNEL__) >>> >>> >>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >>> index 394c3b66065e..2161d50cf0fd 100644 >>> --- a/include/net/inet_sock.h >>> +++ b/include/net/inet_sock.h >>> @@ -174,6 +174,7 @@ struct inet_cork { >>> __s16 tos; >>> char priority; >>> __u16 gso_size; >>> + u32 ts_opt_id; >>> u64 transmit_time; >>> u32 mark; >>> }; >>> @@ -241,7 +242,8 @@ struct inet_sock { >>> struct inet_cork_full cork; >>> }; >>> >>> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ >>> >>> enum { >>> INET_FLAGS_PKTINFO = 0, >>> diff --git a/include/net/sock.h b/include/net/sock.h >>> index f51d61fab059..73e21dad5660 100644 >>> --- a/include/net/sock.h >>> +++ b/include/net/sock.h >>> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { >>> u64 transmit_time; >>> u32 mark; >>> u32 tsflags; >>> + u32 ts_opt_id; >>> }; >>> >>> static inline void sockcm_init(struct sockcm_cookie *sockc, >>> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h >>> index 8ce8a39a1e5f..db3df3e74b01 100644 >>> --- a/include/uapi/asm-generic/socket.h >>> +++ b/include/uapi/asm-generic/socket.h >>> @@ -135,6 +135,8 @@ >>> #define SO_PASSPIDFD 76 >>> #define SO_PEERPIDFD 77 >>> >>> +#define SCM_TS_OPT_ID 78 >>> + >>> #if !defined(__KERNEL__) >>> >>> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) >>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >>> index a2c66b3d7f0f..e2f145e3f3a1 100644 >>> --- a/include/uapi/linux/net_tstamp.h >>> +++ b/include/uapi/linux/net_tstamp.h >>> @@ -32,8 +32,9 @@ enum { >>> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), >>> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), >>> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), >>> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), >> >> I'm not sure if the new flag needs to be documented as well? After >> this patch, people may search the key word in the documentation file >> and then find nothing. >> >> If we have this flag here, normally it means we can pass it through >> setsockopt, so is it expected? If it's an exception, I reckon that we >> can forbid passing/setting this option in sock_set_timestamping() and >> document this rule? > > Good point, thanks. > > It must definitely not be part of SOF_TIMESTAMPING_MASK. My bad for > suggesting without giving it much thought. > > The bit is kernel-internal. No need to even mention it in user-facing > documentation. But anyone reading net_tstamp.h might wonder what it > does. > > It should not even be in a UAPI header, but in an internal one. > Probably include/net/sock.h, near SK_FLAGS_TIMESTAMP. > > Maybe we can reserve bit 31 in u32 sk_tsflags. And if we ever have > to double that flag size, it can move up to 63, as it is not UAPI in > any way. This is a workaround to having a separate flags field in > sockcm_cookie. > > And have a BUILD_BUG_ON if SOF_TIMESTAMPING_LAST reaches this reserved > region. Hi Willem, There is one more issue here. sk_tsflags is u32 as well as sockcm_cookie::tsflags. But sock_tx_timestamp receives __u16 tsflags, usually filled with sockc.tsflags. It works now because SOF_TIMESTAMPING_OPT_ID_TCP is not checked in these functions, but it's wrong in general. Should I fix it in this series or it's better to do in a seperate one?
Vadim Fedorenko wrote: > On 02/09/2024 15:29, Willem de Bruijn wrote: > > Vadim Fedorenko wrote: > >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > >> timestamps and packets sent via socket. Unfortunately, there is no way > >> to reliably predict socket timestamp ID value in case of error returned > >> by sendmsg. For UDP sockets it's impossible because of lockless > >> nature of UDP transmit, several threads may send packets in parallel. In > >> case of RAW sockets MSG_MORE option makes things complicated. More > >> details are in the conversation [1]. > >> This patch adds new control message type to give user-space > >> software an opportunity to control the mapping between packets and > >> values by providing ID with each sendmsg. This works fine for UDP > >> sockets only, and explicit check is added to control message parser. > >> > >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > >> > >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > >> --- > >> Documentation/networking/timestamping.rst | 14 ++++++++++++++ > >> arch/alpha/include/uapi/asm/socket.h | 4 +++- > >> arch/mips/include/uapi/asm/socket.h | 2 ++ > >> arch/parisc/include/uapi/asm/socket.h | 2 ++ > >> arch/sparc/include/uapi/asm/socket.h | 2 ++ > >> include/net/inet_sock.h | 4 +++- > >> include/net/sock.h | 1 + > >> include/uapi/asm-generic/socket.h | 2 ++ > >> include/uapi/linux/net_tstamp.h | 3 ++- > >> net/core/sock.c | 12 ++++++++++++ > >> net/ethtool/common.c | 1 + > >> net/ipv4/ip_output.c | 16 ++++++++++++---- > >> net/ipv6/ip6_output.c | 16 ++++++++++++---- > >> 13 files changed, 68 insertions(+), 11 deletions(-) > >> > >> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst > >> index 5e93cd71f99f..93b0901e4e8e 100644 > >> --- a/Documentation/networking/timestamping.rst > >> +++ b/Documentation/networking/timestamping.rst > >> @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: > >> among all possibly concurrently outstanding timestamp requests for > >> that socket. > >> > >> + With this option enabled user-space application can provide custom > >> + ID for each message sent via UDP socket with control message with > >> + type set to SCM_TS_OPT_ID:: > >> + > >> + struct msghdr *msg; > >> + ... > >> + cmsg = CMSG_FIRSTHDR(msg); > >> + cmsg->cmsg_level = SOL_SOCKET; > >> + cmsg->cmsg_type = SO_TIMESTAMPING; > >> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); > >> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; > >> + err = sendmsg(fd, msg, 0); > >> + > > > > Please make it clear that this CMSG is optional. > > > > The process can optionally override the default generated ID, by > > passing a specific ID with control message SCM_TS_OPT_ID: > > Ok, I'll re-phrase it this way, thanks! > > > >> SOF_TIMESTAMPING_OPT_ID_TCP: > >> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP > >> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the > >> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > >> index e94f621903fe..0698e6662cdf 100644 > >> --- a/arch/alpha/include/uapi/asm/socket.h > >> +++ b/arch/alpha/include/uapi/asm/socket.h > >> @@ -10,7 +10,7 @@ > >> * Note: we only bother about making the SOL_SOCKET options > >> * same as OSF/1, as that's all that "normal" programs are > >> * likely to set. We don't necessarily want to be binary > >> - * compatible with _everything_. > >> + * compatible with _everything_. > > > > Is this due to a checkpatch warning? If so, please add a brief comment > > to the commit message to show that this change is intentional. If not, > > please don't touch unrelated code. > > I'll remove it, because it looks like it was some unhappy linter... > > >> */ > >> #define SOL_SOCKET 0xffff > >> > >> @@ -140,6 +140,8 @@ > >> #define SO_PASSPIDFD 76 > >> #define SO_PEERPIDFD 77 > >> > >> +#define SCM_TS_OPT_ID 78 > >> + > >> #if !defined(__KERNEL__) > >> > >> #if __BITS_PER_LONG == 64 > >> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > >> index 60ebaed28a4c..bb3dc8feb205 100644 > >> --- a/arch/mips/include/uapi/asm/socket.h > >> +++ b/arch/mips/include/uapi/asm/socket.h > >> @@ -151,6 +151,8 @@ > >> #define SO_PASSPIDFD 76 > >> #define SO_PEERPIDFD 77 > >> > >> +#define SCM_TS_OPT_ID 78 > >> + > >> #if !defined(__KERNEL__) > >> > >> #if __BITS_PER_LONG == 64 > >> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > >> index be264c2b1a11..c3ab3b3289eb 100644 > >> --- a/arch/parisc/include/uapi/asm/socket.h > >> +++ b/arch/parisc/include/uapi/asm/socket.h > >> @@ -132,6 +132,8 @@ > >> #define SO_PASSPIDFD 0x404A > >> #define SO_PEERPIDFD 0x404B > >> > >> +#define SCM_TS_OPT_ID 0x404C > >> + > >> #if !defined(__KERNEL__) > >> > >> #if __BITS_PER_LONG == 64 > >> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > >> index 682da3714686..9b40f0a57fbc 100644 > >> --- a/arch/sparc/include/uapi/asm/socket.h > >> +++ b/arch/sparc/include/uapi/asm/socket.h > >> @@ -133,6 +133,8 @@ > >> #define SO_PASSPIDFD 0x0055 > >> #define SO_PEERPIDFD 0x0056 > >> > >> +#define SCM_TS_OPT_ID 0x0057 > >> + > >> #if !defined(__KERNEL__) > >> > >> > >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > >> index 394c3b66065e..2161d50cf0fd 100644 > >> --- a/include/net/inet_sock.h > >> +++ b/include/net/inet_sock.h > >> @@ -174,6 +174,7 @@ struct inet_cork { > >> __s16 tos; > >> char priority; > >> __u16 gso_size; > >> + u32 ts_opt_id; > >> u64 transmit_time; > >> u32 mark; > >> }; > >> @@ -241,7 +242,8 @@ struct inet_sock { > >> struct inet_cork_full cork; > >> }; > >> > >> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > >> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > >> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > > > > typo: timestamp > > > > And maybe more relevant: /* ts_opt_id field is valid, overriding sk_tskey */ > > I'll change it > > >> enum { > >> INET_FLAGS_PKTINFO = 0, > >> diff --git a/include/net/sock.h b/include/net/sock.h > >> index f51d61fab059..73e21dad5660 100644 > >> --- a/include/net/sock.h > >> +++ b/include/net/sock.h > >> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > >> u64 transmit_time; > >> u32 mark; > >> u32 tsflags; > >> + u32 ts_opt_id; > >> }; > >> > >> static inline void sockcm_init(struct sockcm_cookie *sockc, > >> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > >> index 8ce8a39a1e5f..db3df3e74b01 100644 > >> --- a/include/uapi/asm-generic/socket.h > >> +++ b/include/uapi/asm-generic/socket.h > >> @@ -135,6 +135,8 @@ > >> #define SO_PASSPIDFD 76 > >> #define SO_PEERPIDFD 77 > >> > >> +#define SCM_TS_OPT_ID 78 > > > > nit: different indentation > > Hmm... that's interesting, it's ok in the code, there no spaces before > #define. I'll re-check it in the patch in v3. > > >> + > >> #if !defined(__KERNEL__) > >> > >> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > >> index a2c66b3d7f0f..e2f145e3f3a1 100644 > >> --- a/include/uapi/linux/net_tstamp.h > >> +++ b/include/uapi/linux/net_tstamp.h > >> @@ -32,8 +32,9 @@ enum { > >> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > >> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > >> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > >> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), > >> > >> - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, > >> + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_CMSG, > >> SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | > >> SOF_TIMESTAMPING_LAST > >> }; > >> diff --git a/net/core/sock.c b/net/core/sock.c > >> index 468b1239606c..560b075765fa 100644 > >> --- a/net/core/sock.c > >> +++ b/net/core/sock.c > >> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, > >> return -EINVAL; > >> sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); > >> break; > >> + case SCM_TS_OPT_ID: > >> + /* allow this option for UDP sockets only */ > >> + if (!sk_is_udp(sk)) > >> + return -EINVAL; > > > > Let's relax the restriction that this is only for UDP. > > > > At least to also support SOCK_RAW. I don't think that requires any > > additional code at all? > > RAW sockets use skb_setup_tx_timestamps which does atomic operation of > incrementing sk_tskey when in _sock_tx_timestamp. So I'll have to > convert all spots (can, ipv4/raw, ipv6/raw, 3 x af_packet) to use the > same logic as in udp path (sock_tx_timestamp) and add conditions. > Or change skb_setup_tx_timestamps to do the logic and take different > arguments. It may look as a big refactoring, so I would like to make > it as a follow-up series. You're referring to passing the whole sockcm_cookie through these functions, right? Agreed that that is a lot of churn and best left for a separate patch. Would still be good to have it merged together in a single series, but not a hard requirement. Other than that it seems a small change in _sock_tx_timestamp. @@ -2668,8 +2668,12 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, if (unlikely(tsflags)) { - __sock_tx_timestamp(tsflags, tx_flags); + __sock_tx_timestamp(sockc->tsflags, tx_flags); if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && - tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) - *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { + if (tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) + *tskey = sockc->ts_opt_id; + else + *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + } > > Extending to TCP should be straightforward too, just a branch > > on sockc in tcp_tx_timestamp. > > TCP part looks a bit easier, as you said, I have to adjust > tcp_tx_timestamp and the logic is straightforward. I still have to > provide a pointer to sock coockie instead of flags, but there is only > one caller of this function and it's much easier than with RAW sockets. Yeah, the two are intermingled. If you want to pass sockc to _sock_tx_timestamp then all callers have to pass it. And TCP currently does not. > > If so, let's support all. It makes for a simpler API if it is > > supported uniformly wherever OPT_ID is. > > If you think that the way I explained for RAW sockets is good enough, > I can send all of them as a single patcheset. Otherwise I would like to > add RAW sockets in follow-up series. > > >> + tsflags = READ_ONCE(sk->sk_tsflags); > >> + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) > >> + return -EINVAL; > >> + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) > >> + return -EINVAL; > >> + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); > >> + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; > >> + break; > >> /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ > >> case SCM_RIGHTS: > >> case SCM_CREDENTIALS: >
Vadim Fedorenko wrote: > On 02/09/2024 16:51, Willem de Bruijn wrote: > > Jason Xing wrote: > >> On Mon, Sep 2, 2024 at 9:09 PM Vadim Fedorenko <vadfed@meta.com> wrote: > >>> > >>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > >>> timestamps and packets sent via socket. Unfortunately, there is no way > >>> to reliably predict socket timestamp ID value in case of error returned > >>> by sendmsg. For UDP sockets it's impossible because of lockless > >>> nature of UDP transmit, several threads may send packets in parallel. In > >>> case of RAW sockets MSG_MORE option makes things complicated. More > >>> details are in the conversation [1]. > >>> This patch adds new control message type to give user-space > >>> software an opportunity to control the mapping between packets and > >>> values by providing ID with each sendmsg. This works fine for UDP > >>> sockets only, and explicit check is added to control message parser. > >>> > >>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > >>> > >>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > There is one more issue here. sk_tsflags is u32 as well as > sockcm_cookie::tsflags. But sock_tx_timestamp receives __u16 tsflags, > usually filled with sockc.tsflags. It works now because > SOF_TIMESTAMPING_OPT_ID_TCP is not checked in these functions, but it's > wrong in general. Should I fix it in this series or it's better to do in > a seperate one? Good find! I overlooked that when expanding sk_tsflags when adding the 17th flag, SOF_TIMESTAMPING_OPT_ID_TCP, and increasing sk_tsflags from 16 to 32b, in commit b534dc46c8ae. Passing sockc instead of sockc.ts_flags in all cases will address this. This does mean that up until now SOF_TIMESTAMPING_OPT_ID_TCP could not be passed as a sock cookie. We did not notice that, because it is benign: the option selects which tcp field to use as base for tskey, and is only active on setsockopt.
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 5e93cd71f99f..93b0901e4e8e 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -193,6 +193,20 @@ SOF_TIMESTAMPING_OPT_ID: among all possibly concurrently outstanding timestamp requests for that socket. + With this option enabled user-space application can provide custom + ID for each message sent via UDP socket with control message with + type set to SCM_TS_OPT_ID:: + + struct msghdr *msg; + ... + cmsg = CMSG_FIRSTHDR(msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; + err = sendmsg(fd, msg, 0); + + SOF_TIMESTAMPING_OPT_ID_TCP: Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index e94f621903fe..0698e6662cdf 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -10,7 +10,7 @@ * Note: we only bother about making the SOL_SOCKET options * same as OSF/1, as that's all that "normal" programs are * likely to set. We don't necessarily want to be binary - * compatible with _everything_. + * compatible with _everything_. */ #define SOL_SOCKET 0xffff @@ -140,6 +140,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 60ebaed28a4c..bb3dc8feb205 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -151,6 +151,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index be264c2b1a11..c3ab3b3289eb 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -132,6 +132,8 @@ #define SO_PASSPIDFD 0x404A #define SO_PEERPIDFD 0x404B +#define SCM_TS_OPT_ID 0x404C + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 682da3714686..9b40f0a57fbc 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -133,6 +133,8 @@ #define SO_PASSPIDFD 0x0055 #define SO_PEERPIDFD 0x0056 +#define SCM_TS_OPT_ID 0x0057 + #if !defined(__KERNEL__) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 394c3b66065e..2161d50cf0fd 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -174,6 +174,7 @@ struct inet_cork { __s16 tos; char priority; __u16 gso_size; + u32 ts_opt_id; u64 transmit_time; u32 mark; }; @@ -241,7 +242,8 @@ struct inet_sock { struct inet_cork_full cork; }; -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ enum { INET_FLAGS_PKTINFO = 0, diff --git a/include/net/sock.h b/include/net/sock.h index f51d61fab059..73e21dad5660 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1794,6 +1794,7 @@ struct sockcm_cookie { u64 transmit_time; u32 mark; u32 tsflags; + u32 ts_opt_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8ce8a39a1e5f..db3df3e74b01 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..e2f145e3f3a1 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -32,8 +32,9 @@ enum { SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), SOF_TIMESTAMPING_BIND_PHC = (1 << 15), SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_CMSG, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/core/sock.c b/net/core/sock.c index 468b1239606c..560b075765fa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, return -EINVAL; sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); break; + case SCM_TS_OPT_ID: + /* allow this option for UDP sockets only */ + if (!sk_is_udp(sk)) + return -EINVAL; + tsflags = READ_ONCE(sk->sk_tsflags); + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) + return -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; + break; /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ case SCM_RIGHTS: case SCM_CREDENTIALS: diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7257ae272296..147b87c883a9 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -430,6 +430,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp", + [const_ilog2(SOF_TIMESTAMPING_OPT_ID_CMSG)] = "option-id-cmsg", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b90d0f78ac80..1a0fe7e99843 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1048,10 +1048,15 @@ static int __ip_append_data(struct sock *sk, cork->length += length; - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* So, what's going on in the loop below? * @@ -1324,8 +1329,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->mark = ipc->sockc.mark; cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; + cork->ts_opt_id = ipc->sockc.ts_opt_id; cork->tx_flags = 0; sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); + if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) + cork->flags |= IPCORK_TS_OPT_ID; return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f26841f1490f..d7113f8622bf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); + if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) + cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, flags &= ~MSG_SPLICE_PAGES; } - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* * Let's try using as much space as possible.
SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX timestamps and packets sent via socket. Unfortunately, there is no way to reliably predict socket timestamp ID value in case of error returned by sendmsg. For UDP sockets it's impossible because of lockless nature of UDP transmit, several threads may send packets in parallel. In case of RAW sockets MSG_MORE option makes things complicated. More details are in the conversation [1]. This patch adds new control message type to give user-space software an opportunity to control the mapping between packets and values by providing ID with each sendmsg. This works fine for UDP sockets only, and explicit check is added to control message parser. [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ Signed-off-by: Vadim Fedorenko <vadfed@meta.com> --- Documentation/networking/timestamping.rst | 14 ++++++++++++++ arch/alpha/include/uapi/asm/socket.h | 4 +++- arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/inet_sock.h | 4 +++- include/net/sock.h | 1 + include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/net_tstamp.h | 3 ++- net/core/sock.c | 12 ++++++++++++ net/ethtool/common.c | 1 + net/ipv4/ip_output.c | 16 ++++++++++++---- net/ipv6/ip6_output.c | 16 ++++++++++++---- 13 files changed, 68 insertions(+), 11 deletions(-)