Message ID | 20240904113153.2196238-2-vadfed@meta.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add option to provide OPT_ID value via cmsg | expand |
Hello Vadim, On Wed, Sep 4, 2024 at 7:32 PM Vadim Fedorenko <vadfed@meta.com> wrote: > > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > timestamps and packets sent via socket. Unfortunately, there is no way > to reliably predict socket timestamp ID value in case of error returned > by sendmsg. For UDP sockets it's impossible because of lockless > nature of UDP transmit, several threads may send packets in parallel. In > case of RAW sockets MSG_MORE option makes things complicated. More > details are in the conversation [1]. > This patch adds new control message type to give user-space > software an opportunity to control the mapping between packets and > values by providing ID with each sendmsg for UDP sockets. > The documentation is also added in this patch. > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > --- > Documentation/networking/timestamping.rst | 13 +++++++++++++ > arch/alpha/include/uapi/asm/socket.h | 2 ++ > arch/mips/include/uapi/asm/socket.h | 2 ++ > arch/parisc/include/uapi/asm/socket.h | 2 ++ > arch/sparc/include/uapi/asm/socket.h | 2 ++ > include/net/inet_sock.h | 4 +++- > include/net/sock.h | 2 ++ > include/uapi/asm-generic/socket.h | 2 ++ > include/uapi/linux/net_tstamp.h | 7 +++++++ > net/core/sock.c | 9 +++++++++ > net/ipv4/ip_output.c | 18 +++++++++++++----- > net/ipv6/ip6_output.c | 18 +++++++++++++----- > 12 files changed, 70 insertions(+), 11 deletions(-) > > diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst > index 5e93cd71f99f..e365526d6bf9 100644 > --- a/Documentation/networking/timestamping.rst > +++ b/Documentation/networking/timestamping.rst > @@ -193,6 +193,19 @@ SOF_TIMESTAMPING_OPT_ID: > among all possibly concurrently outstanding timestamp requests for > that socket. > > + The process can optionally override the default generated ID, by > + passing a specific ID with control message SCM_TS_OPT_ID:: > + > + struct msghdr *msg; > + ... > + cmsg = CMSG_FIRSTHDR(msg); > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SCM_TS_OPT_ID; > + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); > + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; > + err = sendmsg(fd, msg, 0); > + > + > SOF_TIMESTAMPING_OPT_ID_TCP: > Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP > timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the > diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h > index e94f621903fe..99dec81e7c84 100644 > --- a/arch/alpha/include/uapi/asm/socket.h > +++ b/arch/alpha/include/uapi/asm/socket.h > @@ -140,6 +140,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h > index 60ebaed28a4c..bb3dc8feb205 100644 > --- a/arch/mips/include/uapi/asm/socket.h > +++ b/arch/mips/include/uapi/asm/socket.h > @@ -151,6 +151,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h > index be264c2b1a11..c3ab3b3289eb 100644 > --- a/arch/parisc/include/uapi/asm/socket.h > +++ b/arch/parisc/include/uapi/asm/socket.h > @@ -132,6 +132,8 @@ > #define SO_PASSPIDFD 0x404A > #define SO_PEERPIDFD 0x404B > > +#define SCM_TS_OPT_ID 0x404C > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 > diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h > index 682da3714686..9b40f0a57fbc 100644 > --- a/arch/sparc/include/uapi/asm/socket.h > +++ b/arch/sparc/include/uapi/asm/socket.h > @@ -133,6 +133,8 @@ > #define SO_PASSPIDFD 0x0055 > #define SO_PEERPIDFD 0x0056 > > +#define SCM_TS_OPT_ID 0x0057 > + > #if !defined(__KERNEL__) > > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > index 394c3b66065e..f01dd273bea6 100644 > --- a/include/net/inet_sock.h > +++ b/include/net/inet_sock.h > @@ -174,6 +174,7 @@ struct inet_cork { > __s16 tos; > char priority; > __u16 gso_size; > + u32 ts_opt_id; > u64 transmit_time; > u32 mark; > }; > @@ -241,7 +242,8 @@ struct inet_sock { > struct inet_cork_full cork; > }; > > -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */ > > enum { > INET_FLAGS_PKTINFO = 0, > diff --git a/include/net/sock.h b/include/net/sock.h > index f51d61fab059..c6554ad82961 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -952,6 +952,7 @@ enum sock_flags { > }; > > #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) > +#define SOCKCM_FLAG_TS_OPT_ID BIT(31) > > static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) > { > @@ -1794,6 +1795,7 @@ struct sockcm_cookie { > u64 transmit_time; > u32 mark; > u32 tsflags; > + u32 ts_opt_id; > }; > > static inline void sockcm_init(struct sockcm_cookie *sockc, > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > index 8ce8a39a1e5f..db3df3e74b01 100644 > --- a/include/uapi/asm-generic/socket.h > +++ b/include/uapi/asm-generic/socket.h > @@ -135,6 +135,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..1c38536350e7 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -38,6 +38,13 @@ enum { > SOF_TIMESTAMPING_LAST > }; > > +/* > + * The highest bit of sk_tsflags is reserved for kernel-internal > + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* > + * values do not reach this reserved area > + */ > +static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); I saw some error occur in the patchwork: ./usr/include/linux/net_tstamp.h:46:36: error: expected ‘)’ before ‘!=’ token 46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); | ^~~ | ) make[5]: *** [../usr/include/Makefile:85: usr/include/linux/net_tstamp.hdrtest] Error 1 make[4]: *** [../scripts/Makefile.build:485: usr/include] Error 2 make[3]: *** [../scripts/Makefile.build:485: usr] Error 2 make[3]: *** Waiting for unfinished jobs.... make[2]: *** [/home/nipa/net-next/wt-1/Makefile:1925: .] Error 2 make[1]: *** [/home/nipa/net-next/wt-1/Makefile:224: __sub-make] Error 2 make: *** [Makefile:224: __sub-make] Error 2 Please see the link: https://netdev.bots.linux.dev/static/nipa/886766/13790642/build_32bit/stderr https://netdev.bots.linux.dev/static/nipa/886766/13790640/build_32bit/stderr Thanks, Jason
On 04/09/2024 14:56, Jason Xing wrote: > Hello Vadim, > > On Wed, Sep 4, 2024 at 7:32 PM Vadim Fedorenko <vadfed@meta.com> wrote: >> >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >> timestamps and packets sent via socket. Unfortunately, there is no way >> to reliably predict socket timestamp ID value in case of error returned >> by sendmsg. For UDP sockets it's impossible because of lockless >> nature of UDP transmit, several threads may send packets in parallel. In >> case of RAW sockets MSG_MORE option makes things complicated. More >> details are in the conversation [1]. >> This patch adds new control message type to give user-space >> software an opportunity to control the mapping between packets and >> values by providing ID with each sendmsg for UDP sockets. >> The documentation is also added in this patch. >> >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >> >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >> --- >> Documentation/networking/timestamping.rst | 13 +++++++++++++ >> arch/alpha/include/uapi/asm/socket.h | 2 ++ >> arch/mips/include/uapi/asm/socket.h | 2 ++ >> arch/parisc/include/uapi/asm/socket.h | 2 ++ >> arch/sparc/include/uapi/asm/socket.h | 2 ++ >> include/net/inet_sock.h | 4 +++- >> include/net/sock.h | 2 ++ >> include/uapi/asm-generic/socket.h | 2 ++ >> include/uapi/linux/net_tstamp.h | 7 +++++++ >> net/core/sock.c | 9 +++++++++ >> net/ipv4/ip_output.c | 18 +++++++++++++----- >> net/ipv6/ip6_output.c | 18 +++++++++++++----- >> 12 files changed, 70 insertions(+), 11 deletions(-) >> >> diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst >> index 5e93cd71f99f..e365526d6bf9 100644 >> --- a/Documentation/networking/timestamping.rst >> +++ b/Documentation/networking/timestamping.rst >> @@ -193,6 +193,19 @@ SOF_TIMESTAMPING_OPT_ID: >> among all possibly concurrently outstanding timestamp requests for >> that socket. >> >> + The process can optionally override the default generated ID, by >> + passing a specific ID with control message SCM_TS_OPT_ID:: >> + >> + struct msghdr *msg; >> + ... >> + cmsg = CMSG_FIRSTHDR(msg); >> + cmsg->cmsg_level = SOL_SOCKET; >> + cmsg->cmsg_type = SCM_TS_OPT_ID; >> + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); >> + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; >> + err = sendmsg(fd, msg, 0); >> + >> + >> SOF_TIMESTAMPING_OPT_ID_TCP: >> Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP >> timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the >> diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h >> index e94f621903fe..99dec81e7c84 100644 >> --- a/arch/alpha/include/uapi/asm/socket.h >> +++ b/arch/alpha/include/uapi/asm/socket.h >> @@ -140,6 +140,8 @@ >> #define SO_PASSPIDFD 76 >> #define SO_PEERPIDFD 77 >> >> +#define SCM_TS_OPT_ID 78 >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 >> diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h >> index 60ebaed28a4c..bb3dc8feb205 100644 >> --- a/arch/mips/include/uapi/asm/socket.h >> +++ b/arch/mips/include/uapi/asm/socket.h >> @@ -151,6 +151,8 @@ >> #define SO_PASSPIDFD 76 >> #define SO_PEERPIDFD 77 >> >> +#define SCM_TS_OPT_ID 78 >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 >> diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h >> index be264c2b1a11..c3ab3b3289eb 100644 >> --- a/arch/parisc/include/uapi/asm/socket.h >> +++ b/arch/parisc/include/uapi/asm/socket.h >> @@ -132,6 +132,8 @@ >> #define SO_PASSPIDFD 0x404A >> #define SO_PEERPIDFD 0x404B >> >> +#define SCM_TS_OPT_ID 0x404C >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 >> diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h >> index 682da3714686..9b40f0a57fbc 100644 >> --- a/arch/sparc/include/uapi/asm/socket.h >> +++ b/arch/sparc/include/uapi/asm/socket.h >> @@ -133,6 +133,8 @@ >> #define SO_PASSPIDFD 0x0055 >> #define SO_PEERPIDFD 0x0056 >> >> +#define SCM_TS_OPT_ID 0x0057 >> + >> #if !defined(__KERNEL__) >> >> >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >> index 394c3b66065e..f01dd273bea6 100644 >> --- a/include/net/inet_sock.h >> +++ b/include/net/inet_sock.h >> @@ -174,6 +174,7 @@ struct inet_cork { >> __s16 tos; >> char priority; >> __u16 gso_size; >> + u32 ts_opt_id; >> u64 transmit_time; >> u32 mark; >> }; >> @@ -241,7 +242,8 @@ struct inet_sock { >> struct inet_cork_full cork; >> }; >> >> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >> +#define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */ >> >> enum { >> INET_FLAGS_PKTINFO = 0, >> diff --git a/include/net/sock.h b/include/net/sock.h >> index f51d61fab059..c6554ad82961 100644 >> --- a/include/net/sock.h >> +++ b/include/net/sock.h >> @@ -952,6 +952,7 @@ enum sock_flags { >> }; >> >> #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) >> +#define SOCKCM_FLAG_TS_OPT_ID BIT(31) >> >> static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) >> { >> @@ -1794,6 +1795,7 @@ struct sockcm_cookie { >> u64 transmit_time; >> u32 mark; >> u32 tsflags; >> + u32 ts_opt_id; >> }; >> >> static inline void sockcm_init(struct sockcm_cookie *sockc, >> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h >> index 8ce8a39a1e5f..db3df3e74b01 100644 >> --- a/include/uapi/asm-generic/socket.h >> +++ b/include/uapi/asm-generic/socket.h >> @@ -135,6 +135,8 @@ >> #define SO_PASSPIDFD 76 >> #define SO_PEERPIDFD 77 >> >> +#define SCM_TS_OPT_ID 78 >> + >> #if !defined(__KERNEL__) >> >> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >> index a2c66b3d7f0f..1c38536350e7 100644 >> --- a/include/uapi/linux/net_tstamp.h >> +++ b/include/uapi/linux/net_tstamp.h >> @@ -38,6 +38,13 @@ enum { >> SOF_TIMESTAMPING_LAST >> }; >> >> +/* >> + * The highest bit of sk_tsflags is reserved for kernel-internal >> + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* >> + * values do not reach this reserved area >> + */ >> +static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); > > I saw some error occur in the patchwork: > > ./usr/include/linux/net_tstamp.h:46:36: error: expected ‘)’ before ‘!=’ token > 46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); > | ^~~ > | ) > make[5]: *** [../usr/include/Makefile:85: > usr/include/linux/net_tstamp.hdrtest] Error 1 > make[4]: *** [../scripts/Makefile.build:485: usr/include] Error 2 > make[3]: *** [../scripts/Makefile.build:485: usr] Error 2 > make[3]: *** Waiting for unfinished jobs.... > make[2]: *** [/home/nipa/net-next/wt-1/Makefile:1925: .] Error 2 > make[1]: *** [/home/nipa/net-next/wt-1/Makefile:224: __sub-make] Error 2 > make: *** [Makefile:224: __sub-make] Error 2 > > Please see the link: > https://netdev.bots.linux.dev/static/nipa/886766/13790642/build_32bit/stderr > https://netdev.bots.linux.dev/static/nipa/886766/13790640/build_32bit/stderr > > Thanks, > Jason Hmm, that's interesting.. Looks like some inconsistency in compilers. I'll re-check it, thanks Jason.
Vadim Fedorenko wrote: > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > timestamps and packets sent via socket. Unfortunately, there is no way > to reliably predict socket timestamp ID value in case of error returned > by sendmsg. For UDP sockets it's impossible because of lockless > nature of UDP transmit, several threads may send packets in parallel. In > case of RAW sockets MSG_MORE option makes things complicated. More > details are in the conversation [1]. > This patch adds new control message type to give user-space > software an opportunity to control the mapping between packets and > values by providing ID with each sendmsg for UDP sockets. > The documentation is also added in this patch. > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > --- > Documentation/networking/timestamping.rst | 13 +++++++++++++ > arch/alpha/include/uapi/asm/socket.h | 2 ++ > arch/mips/include/uapi/asm/socket.h | 2 ++ > arch/parisc/include/uapi/asm/socket.h | 2 ++ > arch/sparc/include/uapi/asm/socket.h | 2 ++ > include/net/inet_sock.h | 4 +++- > include/net/sock.h | 2 ++ > include/uapi/asm-generic/socket.h | 2 ++ > include/uapi/linux/net_tstamp.h | 7 +++++++ > net/core/sock.c | 9 +++++++++ > net/ipv4/ip_output.c | 18 +++++++++++++----- > net/ipv6/ip6_output.c | 18 +++++++++++++----- > 12 files changed, 70 insertions(+), 11 deletions(-) > > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..1c38536350e7 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -38,6 +38,13 @@ enum { > SOF_TIMESTAMPING_LAST > }; > > +/* > + * The highest bit of sk_tsflags is reserved for kernel-internal > + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* > + * values do not reach this reserved area > + */ > +static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); Let's not leak any if this implementation detail to include/uapi. A BUILD_BUG_ON wherever SOCKCM_FLAG_TS_OPT_ID is used, such as in case SCM_TS_OPT_ID, should work. > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c > index eea443b7f65e..bd2f6a699470 100644 > --- a/net/ipv4/ip_output.c > +++ b/net/ipv4/ip_output.c > @@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk, > unsigned int maxfraglen, fragheaderlen, maxnonfragsize; > int csummode = CHECKSUM_NONE; > struct rtable *rt = dst_rtable(cork->dst); > - bool paged, hold_tskey, extra_uref = false; > + bool paged, hold_tskey = false, extra_uref = false; > unsigned int wmem_alloc_delta = 0; > u32 tskey = 0; > > @@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk, > > cork->length += length; > > - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > - if (hold_tskey) > - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + if (cork->tx_flags & SKBTX_ANY_TSTAMP && > + READ_ONCE(sk->sk_tsflags) & SOCKCM_FLAG_TS_OPT_ID) { s/SOCKCM_FLAG_TS_OPT_ID/SOF_TIMESTAMPING_OPT_ID/ > + if (cork->flags & IPCORK_TS_OPT_ID) { > + tskey = cork->ts_opt_id; > + } else { > + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + hold_tskey = true; > + } > + } > > /* So, what's going on in the loop below? > * > @@ -1325,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, > cork->mark = ipc->sockc.mark; > cork->priority = ipc->priority; > cork->transmit_time = ipc->sockc.transmit_time; > + cork->ts_opt_id = ipc->sockc.ts_opt_id; > cork->tx_flags = 0; > sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); > + if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) > + cork->flags |= IPCORK_TS_OPT_ID; We can move initialization of ts_opt_id into the branch. Or conversely avoid the branch with some convoluted shift operations to have the rval be either 1 << 1 or 0 << 1. But let's do the simpler thing.
Hello Vadim, On Wed, Sep 4, 2024 at 7:32 PM Vadim Fedorenko <vadfed@meta.com> wrote: [...] > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..1c38536350e7 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -38,6 +38,13 @@ enum { > SOF_TIMESTAMPING_LAST > }; > > +/* > + * The highest bit of sk_tsflags is reserved for kernel-internal > + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* > + * values do not reach this reserved area I wonder if we can add the above description which is quite useful in enum{} like this: diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..2314fccaf51d 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,7 +13,12 @@ #include <linux/types.h> #include <linux/socket.h> /* for SO_TIMESTAMPING */ -/* SO_TIMESTAMPING flags */ +/* SO_TIMESTAMPING flags + * + * The highest bit of sk_tsflags is reserved for kernel-internal + * SOCKCM_FLAG_TS_OPT_ID. + * SOCKCM_FLAG_TS_OPT_ID = (1 << 31), + */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), to explicitly remind the developers not to touch 1<<31 field. Or else, it can be very hard to trace who occupied the highest field in the future at the first glance, I think. [...] > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c > index f26841f1490f..9b87d23314e8 100644 > --- a/net/ipv6/ip6_output.c > +++ b/net/ipv6/ip6_output.c > @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, > cork->base.gso_size = ipc6->gso_size; > cork->base.tx_flags = 0; > cork->base.mark = ipc6->sockc.mark; > + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; > sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); > + if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) > + cork->base.flags |= IPCORK_TS_OPT_ID; > > cork->base.length = 0; > cork->base.transmit_time = ipc6->sockc.transmit_time; > @@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk, > bool zc = false; > u32 tskey = 0; > struct rt6_info *rt = dst_rt6_info(cork->dst); > - bool paged, hold_tskey, extra_uref = false; > + bool paged, hold_tskey = false, extra_uref = false; > struct ipv6_txoptions *opt = v6_cork->opt; > int csummode = CHECKSUM_NONE; > unsigned int maxnonfragsize, headersize; > @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, > flags &= ~MSG_SPLICE_PAGES; > } > > - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > - if (hold_tskey) > - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + if (cork->tx_flags & SKBTX_ANY_TSTAMP && > + READ_ONCE(sk->sk_tsflags) & SOCKCM_FLAG_TS_OPT_ID) { s/SOCKCM_FLAG_TS_OPT_ID/SOF_TIMESTAMPING_OPT_ID/ In case you forget to change here :) > + if (cork->flags & IPCORK_TS_OPT_ID) { > + tskey = cork->ts_opt_id; > + } else { > + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + hold_tskey = true; > + } > + } > > /* > * Let's try using as much space as possible. > -- > 2.43.5 >
On 05/09/2024 09:24, Jason Xing wrote: > Hello Vadim, > > On Wed, Sep 4, 2024 at 7:32 PM Vadim Fedorenko <vadfed@meta.com> wrote: > [...] >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >> index a2c66b3d7f0f..1c38536350e7 100644 >> --- a/include/uapi/linux/net_tstamp.h >> +++ b/include/uapi/linux/net_tstamp.h >> @@ -38,6 +38,13 @@ enum { >> SOF_TIMESTAMPING_LAST >> }; >> >> +/* >> + * The highest bit of sk_tsflags is reserved for kernel-internal >> + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* >> + * values do not reach this reserved area > > I wonder if we can add the above description which is quite useful in > enum{} like this: > > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..2314fccaf51d 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -13,7 +13,12 @@ > #include <linux/types.h> > #include <linux/socket.h> /* for SO_TIMESTAMPING */ > > -/* SO_TIMESTAMPING flags */ > +/* SO_TIMESTAMPING flags > + * > + * The highest bit of sk_tsflags is reserved for kernel-internal > + * SOCKCM_FLAG_TS_OPT_ID. > + * SOCKCM_FLAG_TS_OPT_ID = (1 << 31), > + */ > enum { > SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), > SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), > > to explicitly remind the developers not to touch 1<<31 field. Or else, > it can be very hard to trace who occupied the highest field in the > future at the first glance, I think. > > [...] That's a bit contradictory to Willem's comment about not exposing implementation details to UAPI headers, which I think makes sense. I will move the comment to the definition area of SOCKCM_FLAG_TS_OPT_ID and will try to add meaningful message to BUILD_BUG_ON() to make it easier for developers to understand the problem. >> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c >> index f26841f1490f..9b87d23314e8 100644 >> --- a/net/ipv6/ip6_output.c >> +++ b/net/ipv6/ip6_output.c >> @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, >> cork->base.gso_size = ipc6->gso_size; >> cork->base.tx_flags = 0; >> cork->base.mark = ipc6->sockc.mark; >> + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; >> sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); >> + if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) >> + cork->base.flags |= IPCORK_TS_OPT_ID; >> >> cork->base.length = 0; >> cork->base.transmit_time = ipc6->sockc.transmit_time; >> @@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk, >> bool zc = false; >> u32 tskey = 0; >> struct rt6_info *rt = dst_rt6_info(cork->dst); >> - bool paged, hold_tskey, extra_uref = false; >> + bool paged, hold_tskey = false, extra_uref = false; >> struct ipv6_txoptions *opt = v6_cork->opt; >> int csummode = CHECKSUM_NONE; >> unsigned int maxnonfragsize, headersize; >> @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, >> flags &= ~MSG_SPLICE_PAGES; >> } >> >> - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && >> - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; >> - if (hold_tskey) >> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; >> + if (cork->tx_flags & SKBTX_ANY_TSTAMP && >> + READ_ONCE(sk->sk_tsflags) & SOCKCM_FLAG_TS_OPT_ID) { > > s/SOCKCM_FLAG_TS_OPT_ID/SOF_TIMESTAMPING_OPT_ID/ > In case you forget to change here :) Yeah, I've fixed this one already, but thanks! > >> + if (cork->flags & IPCORK_TS_OPT_ID) { >> + tskey = cork->ts_opt_id; >> + } else { >> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; >> + hold_tskey = true; >> + } >> + } >> >> /* >> * Let's try using as much space as possible. >> -- >> 2.43.5 >>
On Thu, Sep 5, 2024 at 4:34 PM Vadim Fedorenko <vadim.fedorenko@linux.dev> wrote: > > On 05/09/2024 09:24, Jason Xing wrote: > > Hello Vadim, > > > > On Wed, Sep 4, 2024 at 7:32 PM Vadim Fedorenko <vadfed@meta.com> wrote: > > [...] > >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > >> index a2c66b3d7f0f..1c38536350e7 100644 > >> --- a/include/uapi/linux/net_tstamp.h > >> +++ b/include/uapi/linux/net_tstamp.h > >> @@ -38,6 +38,13 @@ enum { > >> SOF_TIMESTAMPING_LAST > >> }; > >> > >> +/* > >> + * The highest bit of sk_tsflags is reserved for kernel-internal > >> + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* > >> + * values do not reach this reserved area > > > > I wonder if we can add the above description which is quite useful in > > enum{} like this: > > > > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > > index a2c66b3d7f0f..2314fccaf51d 100644 > > --- a/include/uapi/linux/net_tstamp.h > > +++ b/include/uapi/linux/net_tstamp.h > > @@ -13,7 +13,12 @@ > > #include <linux/types.h> > > #include <linux/socket.h> /* for SO_TIMESTAMPING */ > > > > -/* SO_TIMESTAMPING flags */ > > +/* SO_TIMESTAMPING flags > > + * > > + * The highest bit of sk_tsflags is reserved for kernel-internal > > + * SOCKCM_FLAG_TS_OPT_ID. > > + * SOCKCM_FLAG_TS_OPT_ID = (1 << 31), > > + */ > > enum { > > SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), > > SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), > > > > to explicitly remind the developers not to touch 1<<31 field. Or else, > > it can be very hard to trace who occupied the highest field in the > > future at the first glance, I think. > > > > [...] > > That's a bit contradictory to Willem's comment about not exposing > implementation details to UAPI headers, which I think makes sense. Oh, well, I missed checking the filename... it's an uapi file, sorry for the noise :( > > I will move the comment to the definition area of SOCKCM_FLAG_TS_OPT_ID > and will try to add meaningful message to BUILD_BUG_ON() to make it > easier for developers to understand the problem. Great! Thanks!
On 04/09/2024 21:54, Willem de Bruijn wrote: > Vadim Fedorenko wrote: >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >> timestamps and packets sent via socket. Unfortunately, there is no way >> to reliably predict socket timestamp ID value in case of error returned >> by sendmsg. For UDP sockets it's impossible because of lockless >> nature of UDP transmit, several threads may send packets in parallel. In >> case of RAW sockets MSG_MORE option makes things complicated. More >> details are in the conversation [1]. >> This patch adds new control message type to give user-space >> software an opportunity to control the mapping between packets and >> values by providing ID with each sendmsg for UDP sockets. >> The documentation is also added in this patch. >> >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >> >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >> --- >> Documentation/networking/timestamping.rst | 13 +++++++++++++ >> arch/alpha/include/uapi/asm/socket.h | 2 ++ >> arch/mips/include/uapi/asm/socket.h | 2 ++ >> arch/parisc/include/uapi/asm/socket.h | 2 ++ >> arch/sparc/include/uapi/asm/socket.h | 2 ++ >> include/net/inet_sock.h | 4 +++- >> include/net/sock.h | 2 ++ >> include/uapi/asm-generic/socket.h | 2 ++ >> include/uapi/linux/net_tstamp.h | 7 +++++++ >> net/core/sock.c | 9 +++++++++ >> net/ipv4/ip_output.c | 18 +++++++++++++----- >> net/ipv6/ip6_output.c | 18 +++++++++++++----- >> 12 files changed, 70 insertions(+), 11 deletions(-) >> >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >> index a2c66b3d7f0f..1c38536350e7 100644 >> --- a/include/uapi/linux/net_tstamp.h >> +++ b/include/uapi/linux/net_tstamp.h >> @@ -38,6 +38,13 @@ enum { >> SOF_TIMESTAMPING_LAST >> }; >> >> +/* >> + * The highest bit of sk_tsflags is reserved for kernel-internal >> + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* >> + * values do not reach this reserved area >> + */ >> +static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); > > Let's not leak any if this implementation detail to include/uapi. > > A BUILD_BUG_ON wherever SOCKCM_FLAG_TS_OPT_ID is used, such as in case > SCM_TS_OPT_ID, should work. Makes sense. I'll change the check and will try to add meaningful message. >> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c >> index eea443b7f65e..bd2f6a699470 100644 >> --- a/net/ipv4/ip_output.c >> +++ b/net/ipv4/ip_output.c >> @@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk, >> unsigned int maxfraglen, fragheaderlen, maxnonfragsize; >> int csummode = CHECKSUM_NONE; >> struct rtable *rt = dst_rtable(cork->dst); >> - bool paged, hold_tskey, extra_uref = false; >> + bool paged, hold_tskey = false, extra_uref = false; >> unsigned int wmem_alloc_delta = 0; >> u32 tskey = 0; >> >> @@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk, >> >> cork->length += length; >> >> - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && >> - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; >> - if (hold_tskey) >> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; >> + if (cork->tx_flags & SKBTX_ANY_TSTAMP && >> + READ_ONCE(sk->sk_tsflags) & SOCKCM_FLAG_TS_OPT_ID) { > > s/SOCKCM_FLAG_TS_OPT_ID/SOF_TIMESTAMPING_OPT_ID/ Ack >> + if (cork->flags & IPCORK_TS_OPT_ID) { >> + tskey = cork->ts_opt_id; >> + } else { >> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; >> + hold_tskey = true; >> + } >> + } >> >> /* So, what's going on in the loop below? >> * >> @@ -1325,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, >> cork->mark = ipc->sockc.mark; >> cork->priority = ipc->priority; >> cork->transmit_time = ipc->sockc.transmit_time; >> + cork->ts_opt_id = ipc->sockc.ts_opt_id; >> cork->tx_flags = 0; >> sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); >> + if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) >> + cork->flags |= IPCORK_TS_OPT_ID; > > We can move initialization of ts_opt_id into the branch. > > Or conversely avoid the branch with some convoluted shift operations > to have the rval be either 1 << 1 or 0 << 1. But let's do the simpler > thing. What is the reason to move initialization behind the flag? We are not doing this for transmit_time even though it's also used with flag only. It's not a big deal to change, I just wonder what are the benefits?
Vadim Fedorenko wrote: > On 04/09/2024 21:54, Willem de Bruijn wrote: > > Vadim Fedorenko wrote: > >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > >> timestamps and packets sent via socket. Unfortunately, there is no way > >> to reliably predict socket timestamp ID value in case of error returned > >> by sendmsg. For UDP sockets it's impossible because of lockless > >> nature of UDP transmit, several threads may send packets in parallel. In > >> case of RAW sockets MSG_MORE option makes things complicated. More > >> details are in the conversation [1]. > >> This patch adds new control message type to give user-space > >> software an opportunity to control the mapping between packets and > >> values by providing ID with each sendmsg for UDP sockets. > >> The documentation is also added in this patch. > >> > >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > >> > >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > >> @@ -1325,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, > >> cork->mark = ipc->sockc.mark; > >> cork->priority = ipc->priority; > >> cork->transmit_time = ipc->sockc.transmit_time; > >> + cork->ts_opt_id = ipc->sockc.ts_opt_id; > >> cork->tx_flags = 0; > >> sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); > >> + if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) > >> + cork->flags |= IPCORK_TS_OPT_ID; > > > > We can move initialization of ts_opt_id into the branch. > > > > Or conversely avoid the branch with some convoluted shift operations > > to have the rval be either 1 << 1 or 0 << 1. But let's do the simpler > > thing. > > What is the reason to move initialization behind the flag? We are not > doing this for transmit_time even though it's also used with flag only. > > It's not a big deal to change, I just wonder what are the benefits? Just avoid one assignment in the hot path that does not use this feature. cork->ts_opt_id is only valuid if cork->flags & IPCORK_TS_OPT_ID.
Hi Vadim, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/net_tstamp-add-SCM_TS_OPT_ID-to-provide-OPT_ID-in-control-message/20240904-193351 base: net-next/main patch link: https://lore.kernel.org/r/20240904113153.2196238-2-vadfed%40meta.com patch subject: [PATCH net-next v3 1/4] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message config: i386-buildonly-randconfig-005-20240906 (https://download.01.org/0day-ci/archive/20240906/202409061658.ydeSewh7-lkp@intel.com/config) compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240906/202409061658.ydeSewh7-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409061658.ydeSewh7-lkp@intel.com/ All error/warnings (new ones prefixed by >>): In file included from <built-in>:1: >> ./usr/include/linux/net_tstamp.h:46:15: error: unknown type name 'SOF_TIMESTAMPING_LAST' 46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); | ^ >> ./usr/include/linux/net_tstamp.h:46:37: error: expected ')' 46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); | ^ ./usr/include/linux/net_tstamp.h:46:14: note: to match this '(' 46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); | ^ >> ./usr/include/linux/net_tstamp.h:46:1: warning: type specifier missing, defaults to 'int' [-Wimplicit-int] 46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); | ^ | int 1 warning and 2 errors generated.
Hi Vadim,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/net_tstamp-add-SCM_TS_OPT_ID-to-provide-OPT_ID-in-control-message/20240904-193351
base: net-next/main
patch link: https://lore.kernel.org/r/20240904113153.2196238-2-vadfed%40meta.com
patch subject: [PATCH net-next v3 1/4] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message
config: i386-allmodconfig (https://download.01.org/0day-ci/archive/20240906/202409062041.8g7uYSEJ-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240906/202409062041.8g7uYSEJ-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409062041.8g7uYSEJ-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from <command-line>:
>> ./usr/include/linux/net_tstamp.h:46:36: error: expected ')' before '!=' token
46 | static_assert(SOF_TIMESTAMPING_LAST != (1 << 31));
| ^~~
| )
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 5e93cd71f99f..e365526d6bf9 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -193,6 +193,19 @@ SOF_TIMESTAMPING_OPT_ID: among all possibly concurrently outstanding timestamp requests for that socket. + The process can optionally override the default generated ID, by + passing a specific ID with control message SCM_TS_OPT_ID:: + + struct msghdr *msg; + ... + cmsg = CMSG_FIRSTHDR(msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TS_OPT_ID; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *((__u32 *) CMSG_DATA(cmsg)) = opt_id; + err = sendmsg(fd, msg, 0); + + SOF_TIMESTAMPING_OPT_ID_TCP: Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index e94f621903fe..99dec81e7c84 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 60ebaed28a4c..bb3dc8feb205 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -151,6 +151,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index be264c2b1a11..c3ab3b3289eb 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -132,6 +132,8 @@ #define SO_PASSPIDFD 0x404A #define SO_PEERPIDFD 0x404B +#define SCM_TS_OPT_ID 0x404C + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 682da3714686..9b40f0a57fbc 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -133,6 +133,8 @@ #define SO_PASSPIDFD 0x0055 #define SO_PEERPIDFD 0x0056 +#define SCM_TS_OPT_ID 0x0057 + #if !defined(__KERNEL__) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 394c3b66065e..f01dd273bea6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -174,6 +174,7 @@ struct inet_cork { __s16 tos; char priority; __u16 gso_size; + u32 ts_opt_id; u64 transmit_time; u32 mark; }; @@ -241,7 +242,8 @@ struct inet_sock { struct inet_cork_full cork; }; -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_TS_OPT_ID 2 /* ts_opt_id field is valid, overriding sk_tskey */ enum { INET_FLAGS_PKTINFO = 0, diff --git a/include/net/sock.h b/include/net/sock.h index f51d61fab059..c6554ad82961 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -952,6 +952,7 @@ enum sock_flags { }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) +#define SOCKCM_FLAG_TS_OPT_ID BIT(31) static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { @@ -1794,6 +1795,7 @@ struct sockcm_cookie { u64 transmit_time; u32 mark; u32 tsflags; + u32 ts_opt_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8ce8a39a1e5f..db3df3e74b01 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..1c38536350e7 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -38,6 +38,13 @@ enum { SOF_TIMESTAMPING_LAST }; +/* + * The highest bit of sk_tsflags is reserved for kernel-internal + * SOCKCM_FLAG_TS_OPT_ID. This check is to control that SOF_TIMESTAMPING* + * values do not reach this reserved area + */ +static_assert(SOF_TIMESTAMPING_LAST != (1 << 31)); + /* * SO_TIMESTAMPING flags are either for recording a packet timestamp or for * reporting the timestamp to user space. diff --git a/net/core/sock.c b/net/core/sock.c index 468b1239606c..ac70c759a371 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2859,6 +2859,15 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, return -EINVAL; sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); break; + case SCM_TS_OPT_ID: + tsflags = READ_ONCE(sk->sk_tsflags); + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) + return -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); + sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID; + break; /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ case SCM_RIGHTS: case SCM_CREDENTIALS: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index eea443b7f65e..bd2f6a699470 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -973,7 +973,7 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = dst_rtable(cork->dst); - bool paged, hold_tskey, extra_uref = false; + bool paged, hold_tskey = false, extra_uref = false; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; @@ -1049,10 +1049,15 @@ static int __ip_append_data(struct sock *sk, cork->length += length; - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOCKCM_FLAG_TS_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* So, what's going on in the loop below? * @@ -1325,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->mark = ipc->sockc.mark; cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; + cork->ts_opt_id = ipc->sockc.ts_opt_id; cork->tx_flags = 0; sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); + if (ipc->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) + cork->flags |= IPCORK_TS_OPT_ID; return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f26841f1490f..9b87d23314e8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); + if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) + cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; @@ -1433,7 +1436,7 @@ static int __ip6_append_data(struct sock *sk, bool zc = false; u32 tskey = 0; struct rt6_info *rt = dst_rt6_info(cork->dst); - bool paged, hold_tskey, extra_uref = false; + bool paged, hold_tskey = false, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; @@ -1543,10 +1546,15 @@ static int __ip6_append_data(struct sock *sk, flags &= ~MSG_SPLICE_PAGES; } - hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOCKCM_FLAG_TS_OPT_ID) { + if (cork->flags & IPCORK_TS_OPT_ID) { + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + hold_tskey = true; + } + } /* * Let's try using as much space as possible.
SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX timestamps and packets sent via socket. Unfortunately, there is no way to reliably predict socket timestamp ID value in case of error returned by sendmsg. For UDP sockets it's impossible because of lockless nature of UDP transmit, several threads may send packets in parallel. In case of RAW sockets MSG_MORE option makes things complicated. More details are in the conversation [1]. This patch adds new control message type to give user-space software an opportunity to control the mapping between packets and values by providing ID with each sendmsg for UDP sockets. The documentation is also added in this patch. [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ Signed-off-by: Vadim Fedorenko <vadfed@meta.com> --- Documentation/networking/timestamping.rst | 13 +++++++++++++ arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/inet_sock.h | 4 +++- include/net/sock.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/net_tstamp.h | 7 +++++++ net/core/sock.c | 9 +++++++++ net/ipv4/ip_output.c | 18 +++++++++++++----- net/ipv6/ip6_output.c | 18 +++++++++++++----- 12 files changed, 70 insertions(+), 11 deletions(-)