Message ID | 20240829204922.1674865-1-vadfed@meta.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message | expand |
Vadim Fedorenko wrote: > SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > timestamps and packets sent via socket. Unfortunately, there is no way > to reliably predict socket timestamp ID value in case of error returned > by sendmsg. For UDP sockets it's impossible because of lockless > nature of UDP transmit, several threads may send packets in parallel. In > case of RAW sockets MSG_MORE option makes things complicated. More > details are in the conversation [1]. > This patch adds new control message type to give user-space > software an opportunity to control the mapping between packets and > values by providing ID with each sendmsg. This works fine for UDP > sockets only, and explicit check is added to control message parser. > > [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > --- > include/net/inet_sock.h | 4 +++- > include/net/sock.h | 1 + > include/uapi/asm-generic/socket.h | 2 ++ > include/uapi/linux/net_tstamp.h | 1 + > net/core/sock.c | 12 ++++++++++++ > net/ipv4/ip_output.c | 13 +++++++++++-- > net/ipv6/ip6_output.c | 13 +++++++++++-- > 7 files changed, 41 insertions(+), 5 deletions(-) > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > index 394c3b66065e..2161d50cf0fd 100644 > --- a/include/net/inet_sock.h > +++ b/include/net/inet_sock.h > @@ -174,6 +174,7 @@ struct inet_cork { > __s16 tos; > char priority; > __u16 gso_size; > + u32 ts_opt_id; > u64 transmit_time; > u32 mark; > }; > @@ -241,7 +242,8 @@ struct inet_sock { > struct inet_cork_full cork; > }; > > -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > > enum { > INET_FLAGS_PKTINFO = 0, > diff --git a/include/net/sock.h b/include/net/sock.h > index f51d61fab059..73e21dad5660 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > u64 transmit_time; > u32 mark; > u32 tsflags; > + u32 ts_opt_id; > }; > > static inline void sockcm_init(struct sockcm_cookie *sockc, > diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > index 8ce8a39a1e5f..db3df3e74b01 100644 > --- a/include/uapi/asm-generic/socket.h > +++ b/include/uapi/asm-generic/socket.h > @@ -135,6 +135,8 @@ > #define SO_PASSPIDFD 76 > #define SO_PEERPIDFD 77 > > +#define SCM_TS_OPT_ID 78 > + > #if !defined(__KERNEL__) > > #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > index a2c66b3d7f0f..081b40a55a2e 100644 > --- a/include/uapi/linux/net_tstamp.h > +++ b/include/uapi/linux/net_tstamp.h > @@ -32,6 +32,7 @@ enum { > SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), > > SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, > SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | Update SOF_TIMESTAMPING_LAST > diff --git a/net/core/sock.c b/net/core/sock.c > index 468b1239606c..560b075765fa 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, > return -EINVAL; > sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); > break; > + case SCM_TS_OPT_ID: > + /* allow this option for UDP sockets only */ > + if (!sk_is_udp(sk)) > + return -EINVAL; > + tsflags = READ_ONCE(sk->sk_tsflags); > + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) > + return -EINVAL; > + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) > + return -EINVAL; > + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); > + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; > + break; > /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ > case SCM_RIGHTS: > case SCM_CREDENTIALS: > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c > index b90d0f78ac80..65b5d9f53102 100644 > --- a/net/ipv4/ip_output.c > +++ b/net/ipv4/ip_output.c > @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk, > > hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > - if (hold_tskey) > - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + if (hold_tskey) { > + if (cork->flags & IPCORK_TS_OPT_ID) { > + hold_tskey = false; > + tskey = cork->ts_opt_id; > + } else { > + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + } > + } > > /* So, what's going on in the loop below? > * > @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, > cork->mark = ipc->sockc.mark; > cork->priority = ipc->priority; > cork->transmit_time = ipc->sockc.transmit_time; > + cork->ts_opt_id = ipc->sockc.ts_opt_id; > cork->tx_flags = 0; > sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); > + if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) > + cork->flags |= IPCORK_TS_OPT_ID; > > return 0; > } > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c > index f26841f1490f..91eafef85c85 100644 > --- a/net/ipv6/ip6_output.c > +++ b/net/ipv6/ip6_output.c > @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, > cork->base.gso_size = ipc6->gso_size; > cork->base.tx_flags = 0; > cork->base.mark = ipc6->sockc.mark; > + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; > sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); > + if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) > + cork->base.flags |= IPCORK_TS_OPT_ID; > > cork->base.length = 0; > cork->base.transmit_time = ipc6->sockc.transmit_time; > @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk, > > hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > - if (hold_tskey) > - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + if (hold_tskey) { > + if (cork->flags & IPCORK_TS_OPT_ID) { > + hold_tskey = false; > + tskey = cork->ts_opt_id; > + } else { > + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > + } > + } Setting, then clearing hold_tskey is a bit weird. How about if (cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { if (cork->flags & IPCORK_TS_OPT_ID) { tskey = cork->ts_opt_id; } else { tskey = atomic_inc_return(&sk->sk_tskey) - 1; hold_tskey = true; } }
Hi Vadim, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-txtimestamp-add-SCM_TS_OPT_ID-test/20240830-045630 base: net-next/main patch link: https://lore.kernel.org/r/20240829204922.1674865-1-vadfed%40meta.com patch subject: [PATCH net-next 1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message config: alpha-allyesconfig (https://download.01.org/0day-ci/archive/20240831/202408310023.xSozGTYj-lkp@intel.com/config) compiler: alpha-linux-gcc (GCC) 13.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240831/202408310023.xSozGTYj-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202408310023.xSozGTYj-lkp@intel.com/ All errors (new ones prefixed by >>): net/core/sock.c: In function '__sock_cmsg_send': >> net/core/sock.c:2862:14: error: 'SCM_TS_OPT_ID' undeclared (first use in this function); did you mean 'IPCORK_TS_OPT_ID'? 2862 | case SCM_TS_OPT_ID: | ^~~~~~~~~~~~~ | IPCORK_TS_OPT_ID net/core/sock.c:2862:14: note: each undeclared identifier is reported only once for each function it appears in vim +2862 net/core/sock.c 2828 2829 int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, 2830 struct sockcm_cookie *sockc) 2831 { 2832 u32 tsflags; 2833 2834 switch (cmsg->cmsg_type) { 2835 case SO_MARK: 2836 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 2837 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2838 return -EPERM; 2839 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2840 return -EINVAL; 2841 sockc->mark = *(u32 *)CMSG_DATA(cmsg); 2842 break; 2843 case SO_TIMESTAMPING_OLD: 2844 case SO_TIMESTAMPING_NEW: 2845 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2846 return -EINVAL; 2847 2848 tsflags = *(u32 *)CMSG_DATA(cmsg); 2849 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) 2850 return -EINVAL; 2851 2852 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; 2853 sockc->tsflags |= tsflags; 2854 break; 2855 case SCM_TXTIME: 2856 if (!sock_flag(sk, SOCK_TXTIME)) 2857 return -EINVAL; 2858 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64))) 2859 return -EINVAL; 2860 sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); 2861 break; > 2862 case SCM_TS_OPT_ID: 2863 /* allow this option for UDP sockets only */ 2864 if (!sk_is_udp(sk)) 2865 return -EINVAL; 2866 tsflags = READ_ONCE(sk->sk_tsflags); 2867 if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) 2868 return -EINVAL; 2869 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2870 return -EINVAL; 2871 sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); 2872 sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; 2873 break; 2874 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ 2875 case SCM_RIGHTS: 2876 case SCM_CREDENTIALS: 2877 break; 2878 default: 2879 return -EINVAL; 2880 } 2881 return 0; 2882 } 2883 EXPORT_SYMBOL(__sock_cmsg_send); 2884
Hi Vadim, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-txtimestamp-add-SCM_TS_OPT_ID-test/20240830-045630 base: net-next/main patch link: https://lore.kernel.org/r/20240829204922.1674865-1-vadfed%40meta.com patch subject: [PATCH net-next 1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message config: mips-gcw0_defconfig (https://download.01.org/0day-ci/archive/20240831/202408310034.uyJpRXb7-lkp@intel.com/config) compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 46fe36a4295f05d5d3731762e31fc4e6e99863e9) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240831/202408310034.uyJpRXb7-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202408310034.uyJpRXb7-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from net/core/sock.c:91: In file included from include/linux/errqueue.h:6: In file included from include/net/ip.h:22: In file included from include/linux/ip.h:16: In file included from include/linux/skbuff.h:17: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:8: In file included from include/linux/cacheflush.h:5: In file included from arch/mips/include/asm/cacheflush.h:13: In file included from include/linux/mm.h:2228: include/linux/vmstat.h:517:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 517 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ >> net/core/sock.c:2862:7: error: use of undeclared identifier 'SCM_TS_OPT_ID' 2862 | case SCM_TS_OPT_ID: | ^ 1 warning and 1 error generated. vim +/SCM_TS_OPT_ID +2862 net/core/sock.c 2828 2829 int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, 2830 struct sockcm_cookie *sockc) 2831 { 2832 u32 tsflags; 2833 2834 switch (cmsg->cmsg_type) { 2835 case SO_MARK: 2836 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 2837 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2838 return -EPERM; 2839 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2840 return -EINVAL; 2841 sockc->mark = *(u32 *)CMSG_DATA(cmsg); 2842 break; 2843 case SO_TIMESTAMPING_OLD: 2844 case SO_TIMESTAMPING_NEW: 2845 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2846 return -EINVAL; 2847 2848 tsflags = *(u32 *)CMSG_DATA(cmsg); 2849 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) 2850 return -EINVAL; 2851 2852 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; 2853 sockc->tsflags |= tsflags; 2854 break; 2855 case SCM_TXTIME: 2856 if (!sock_flag(sk, SOCK_TXTIME)) 2857 return -EINVAL; 2858 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64))) 2859 return -EINVAL; 2860 sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); 2861 break; > 2862 case SCM_TS_OPT_ID: 2863 /* allow this option for UDP sockets only */ 2864 if (!sk_is_udp(sk)) 2865 return -EINVAL; 2866 tsflags = READ_ONCE(sk->sk_tsflags); 2867 if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) 2868 return -EINVAL; 2869 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2870 return -EINVAL; 2871 sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); 2872 sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; 2873 break; 2874 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ 2875 case SCM_RIGHTS: 2876 case SCM_CREDENTIALS: 2877 break; 2878 default: 2879 return -EINVAL; 2880 } 2881 return 0; 2882 } 2883 EXPORT_SYMBOL(__sock_cmsg_send); 2884
On Fri, Aug 30, 2024 at 1:11 PM Vadim Fedorenko <vadim.fedorenko@linux.dev> wrote: > > On 30/08/2024 16:02, Willem de Bruijn wrote: > > Vadim Fedorenko wrote: > >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX > >> timestamps and packets sent via socket. Unfortunately, there is no way > >> to reliably predict socket timestamp ID value in case of error returned > >> by sendmsg. For UDP sockets it's impossible because of lockless > >> nature of UDP transmit, several threads may send packets in parallel. In > >> case of RAW sockets MSG_MORE option makes things complicated. More > >> details are in the conversation [1]. > >> This patch adds new control message type to give user-space > >> software an opportunity to control the mapping between packets and > >> values by providing ID with each sendmsg. This works fine for UDP > >> sockets only, and explicit check is added to control message parser. > >> > >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ > >> > >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > >> --- > >> include/net/inet_sock.h | 4 +++- > >> include/net/sock.h | 1 + > >> include/uapi/asm-generic/socket.h | 2 ++ > >> include/uapi/linux/net_tstamp.h | 1 + > >> net/core/sock.c | 12 ++++++++++++ > >> net/ipv4/ip_output.c | 13 +++++++++++-- > >> net/ipv6/ip6_output.c | 13 +++++++++++-- > >> 7 files changed, 41 insertions(+), 5 deletions(-) > >> > >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > >> index 394c3b66065e..2161d50cf0fd 100644 > >> --- a/include/net/inet_sock.h > >> +++ b/include/net/inet_sock.h > >> @@ -174,6 +174,7 @@ struct inet_cork { > >> __s16 tos; > >> char priority; > >> __u16 gso_size; > >> + u32 ts_opt_id; > >> u64 transmit_time; > >> u32 mark; > >> }; > >> @@ -241,7 +242,8 @@ struct inet_sock { > >> struct inet_cork_full cork; > >> }; > >> > >> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > >> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ > >> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ > >> > >> enum { > >> INET_FLAGS_PKTINFO = 0, > >> diff --git a/include/net/sock.h b/include/net/sock.h > >> index f51d61fab059..73e21dad5660 100644 > >> --- a/include/net/sock.h > >> +++ b/include/net/sock.h > >> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { > >> u64 transmit_time; > >> u32 mark; > >> u32 tsflags; > >> + u32 ts_opt_id; > >> }; > >> > >> static inline void sockcm_init(struct sockcm_cookie *sockc, > >> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h > >> index 8ce8a39a1e5f..db3df3e74b01 100644 > >> --- a/include/uapi/asm-generic/socket.h > >> +++ b/include/uapi/asm-generic/socket.h > >> @@ -135,6 +135,8 @@ > >> #define SO_PASSPIDFD 76 > >> #define SO_PEERPIDFD 77 > >> > >> +#define SCM_TS_OPT_ID 78 > >> + > >> #if !defined(__KERNEL__) > >> > >> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) > >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h > >> index a2c66b3d7f0f..081b40a55a2e 100644 > >> --- a/include/uapi/linux/net_tstamp.h > >> +++ b/include/uapi/linux/net_tstamp.h > >> @@ -32,6 +32,7 @@ enum { > >> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), > >> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), > >> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), > >> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), > >> > >> SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, > >> SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | > > > > Update SOF_TIMESTAMPING_LAST > > Got it > > >> diff --git a/net/core/sock.c b/net/core/sock.c > >> index 468b1239606c..560b075765fa 100644 > >> --- a/net/core/sock.c > >> +++ b/net/core/sock.c > >> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, > >> return -EINVAL; > >> sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); > >> break; > >> + case SCM_TS_OPT_ID: > >> + /* allow this option for UDP sockets only */ > >> + if (!sk_is_udp(sk)) > >> + return -EINVAL; > >> + tsflags = READ_ONCE(sk->sk_tsflags); > >> + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) > >> + return -EINVAL; > >> + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) > >> + return -EINVAL; > >> + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); > >> + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; > >> + break; > >> /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ > >> case SCM_RIGHTS: > >> case SCM_CREDENTIALS: > >> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c > >> index b90d0f78ac80..65b5d9f53102 100644 > >> --- a/net/ipv4/ip_output.c > >> +++ b/net/ipv4/ip_output.c > >> @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk, > >> > >> hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > >> READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > >> - if (hold_tskey) > >> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > >> + if (hold_tskey) { > >> + if (cork->flags & IPCORK_TS_OPT_ID) { > >> + hold_tskey = false; > >> + tskey = cork->ts_opt_id; > >> + } else { > >> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > >> + } > >> + } > >> > >> /* So, what's going on in the loop below? > >> * > >> @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, > >> cork->mark = ipc->sockc.mark; > >> cork->priority = ipc->priority; > >> cork->transmit_time = ipc->sockc.transmit_time; > >> + cork->ts_opt_id = ipc->sockc.ts_opt_id; > >> cork->tx_flags = 0; > >> sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); > >> + if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) > >> + cork->flags |= IPCORK_TS_OPT_ID; > >> > >> return 0; > >> } > >> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c > >> index f26841f1490f..91eafef85c85 100644 > >> --- a/net/ipv6/ip6_output.c > >> +++ b/net/ipv6/ip6_output.c > >> @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, > >> cork->base.gso_size = ipc6->gso_size; > >> cork->base.tx_flags = 0; > >> cork->base.mark = ipc6->sockc.mark; > >> + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; > >> sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); > >> + if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) > >> + cork->base.flags |= IPCORK_TS_OPT_ID; > >> > >> cork->base.length = 0; > >> cork->base.transmit_time = ipc6->sockc.transmit_time; > >> @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk, > >> > >> hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && > >> READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; > >> - if (hold_tskey) > >> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; > >> + if (hold_tskey) { > >> + if (cork->flags & IPCORK_TS_OPT_ID) { > >> + hold_tskey = false; > >> + tskey = cork->ts_opt_id; > >> + } else { > >> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; > >> + } > >> + } > > > > Setting, then clearing hold_tskey is a bit weird. How about > > > > if (cork->tx_flags & SKBTX_ANY_TSTAMP && > > READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { > > if (cork->flags & IPCORK_TS_OPT_ID) { > > tskey = cork->ts_opt_id; > > } else { > > tskey = atomic_inc_return(&sk->sk_tskey) - 1; > > hold_tskey = true; > > } > > } > > Yeah, looks ok, I'll change it this way, thanks! > > Can you please help me with kernel test robot report? I don't really get > how can SCM_TS_OPT_ID be undefined if I added it the exact same place > where other option are defined, like SCM_TXTIME or SO_MARK? Both bot reports mention arch-alpha. Take a look at the patch that introduced SCM_TXTIME. That is defined and used in the same locations. UAPI socket.h definitions need to be defined separate for various archs. I also missed this. Btw, for a next version please also document the new feature in Documentation/networking/timestamping.rst And let's keep it on the list.
On 30/08/2024 22:07, Willem de Bruijn wrote: > On Fri, Aug 30, 2024 at 1:11 PM Vadim Fedorenko > <vadim.fedorenko@linux.dev> wrote: >> >> On 30/08/2024 16:02, Willem de Bruijn wrote: >>> Vadim Fedorenko wrote: >>>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX >>>> timestamps and packets sent via socket. Unfortunately, there is no way >>>> to reliably predict socket timestamp ID value in case of error returned >>>> by sendmsg. For UDP sockets it's impossible because of lockless >>>> nature of UDP transmit, several threads may send packets in parallel. In >>>> case of RAW sockets MSG_MORE option makes things complicated. More >>>> details are in the conversation [1]. >>>> This patch adds new control message type to give user-space >>>> software an opportunity to control the mapping between packets and >>>> values by providing ID with each sendmsg. This works fine for UDP >>>> sockets only, and explicit check is added to control message parser. >>>> >>>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ >>>> >>>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >>>> --- >>>> include/net/inet_sock.h | 4 +++- >>>> include/net/sock.h | 1 + >>>> include/uapi/asm-generic/socket.h | 2 ++ >>>> include/uapi/linux/net_tstamp.h | 1 + >>>> net/core/sock.c | 12 ++++++++++++ >>>> net/ipv4/ip_output.c | 13 +++++++++++-- >>>> net/ipv6/ip6_output.c | 13 +++++++++++-- >>>> 7 files changed, 41 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h >>>> index 394c3b66065e..2161d50cf0fd 100644 >>>> --- a/include/net/inet_sock.h >>>> +++ b/include/net/inet_sock.h >>>> @@ -174,6 +174,7 @@ struct inet_cork { >>>> __s16 tos; >>>> char priority; >>>> __u16 gso_size; >>>> + u32 ts_opt_id; >>>> u64 transmit_time; >>>> u32 mark; >>>> }; >>>> @@ -241,7 +242,8 @@ struct inet_sock { >>>> struct inet_cork_full cork; >>>> }; >>>> >>>> -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>>> +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ >>>> +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ >>>> >>>> enum { >>>> INET_FLAGS_PKTINFO = 0, >>>> diff --git a/include/net/sock.h b/include/net/sock.h >>>> index f51d61fab059..73e21dad5660 100644 >>>> --- a/include/net/sock.h >>>> +++ b/include/net/sock.h >>>> @@ -1794,6 +1794,7 @@ struct sockcm_cookie { >>>> u64 transmit_time; >>>> u32 mark; >>>> u32 tsflags; >>>> + u32 ts_opt_id; >>>> }; >>>> >>>> static inline void sockcm_init(struct sockcm_cookie *sockc, >>>> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h >>>> index 8ce8a39a1e5f..db3df3e74b01 100644 >>>> --- a/include/uapi/asm-generic/socket.h >>>> +++ b/include/uapi/asm-generic/socket.h >>>> @@ -135,6 +135,8 @@ >>>> #define SO_PASSPIDFD 76 >>>> #define SO_PEERPIDFD 77 >>>> >>>> +#define SCM_TS_OPT_ID 78 >>>> + >>>> #if !defined(__KERNEL__) >>>> >>>> #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) >>>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h >>>> index a2c66b3d7f0f..081b40a55a2e 100644 >>>> --- a/include/uapi/linux/net_tstamp.h >>>> +++ b/include/uapi/linux/net_tstamp.h >>>> @@ -32,6 +32,7 @@ enum { >>>> SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), >>>> SOF_TIMESTAMPING_BIND_PHC = (1 << 15), >>>> SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), >>>> + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), >>>> >>>> SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, >>>> SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | >>> >>> Update SOF_TIMESTAMPING_LAST >> >> Got it >> >>>> diff --git a/net/core/sock.c b/net/core/sock.c >>>> index 468b1239606c..560b075765fa 100644 >>>> --- a/net/core/sock.c >>>> +++ b/net/core/sock.c >>>> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, >>>> return -EINVAL; >>>> sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); >>>> break; >>>> + case SCM_TS_OPT_ID: >>>> + /* allow this option for UDP sockets only */ >>>> + if (!sk_is_udp(sk)) >>>> + return -EINVAL; >>>> + tsflags = READ_ONCE(sk->sk_tsflags); >>>> + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) >>>> + return -EINVAL; >>>> + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) >>>> + return -EINVAL; >>>> + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); >>>> + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; >>>> + break; >>>> /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ >>>> case SCM_RIGHTS: >>>> case SCM_CREDENTIALS: >>>> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c >>>> index b90d0f78ac80..65b5d9f53102 100644 >>>> --- a/net/ipv4/ip_output.c >>>> +++ b/net/ipv4/ip_output.c >>>> @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk, >>>> >>>> hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && >>>> READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; >>>> - if (hold_tskey) >>>> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; >>>> + if (hold_tskey) { >>>> + if (cork->flags & IPCORK_TS_OPT_ID) { >>>> + hold_tskey = false; >>>> + tskey = cork->ts_opt_id; >>>> + } else { >>>> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; >>>> + } >>>> + } >>>> >>>> /* So, what's going on in the loop below? >>>> * >>>> @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, >>>> cork->mark = ipc->sockc.mark; >>>> cork->priority = ipc->priority; >>>> cork->transmit_time = ipc->sockc.transmit_time; >>>> + cork->ts_opt_id = ipc->sockc.ts_opt_id; >>>> cork->tx_flags = 0; >>>> sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); >>>> + if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) >>>> + cork->flags |= IPCORK_TS_OPT_ID; >>>> >>>> return 0; >>>> } >>>> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c >>>> index f26841f1490f..91eafef85c85 100644 >>>> --- a/net/ipv6/ip6_output.c >>>> +++ b/net/ipv6/ip6_output.c >>>> @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, >>>> cork->base.gso_size = ipc6->gso_size; >>>> cork->base.tx_flags = 0; >>>> cork->base.mark = ipc6->sockc.mark; >>>> + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; >>>> sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); >>>> + if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) >>>> + cork->base.flags |= IPCORK_TS_OPT_ID; >>>> >>>> cork->base.length = 0; >>>> cork->base.transmit_time = ipc6->sockc.transmit_time; >>>> @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk, >>>> >>>> hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && >>>> READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; >>>> - if (hold_tskey) >>>> - tskey = atomic_inc_return(&sk->sk_tskey) - 1; >>>> + if (hold_tskey) { >>>> + if (cork->flags & IPCORK_TS_OPT_ID) { >>>> + hold_tskey = false; >>>> + tskey = cork->ts_opt_id; >>>> + } else { >>>> + tskey = atomic_inc_return(&sk->sk_tskey) - 1; >>>> + } >>>> + } >>> >>> Setting, then clearing hold_tskey is a bit weird. How about >>> >>> if (cork->tx_flags & SKBTX_ANY_TSTAMP && >>> READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { >>> if (cork->flags & IPCORK_TS_OPT_ID) { >>> tskey = cork->ts_opt_id; >>> } else { >>> tskey = atomic_inc_return(&sk->sk_tskey) - 1; >>> hold_tskey = true; >>> } >>> } >> >> Yeah, looks ok, I'll change it this way, thanks! >> >> Can you please help me with kernel test robot report? I don't really get >> how can SCM_TS_OPT_ID be undefined if I added it the exact same place >> where other option are defined, like SCM_TXTIME or SO_MARK? > > Both bot reports mention arch-alpha. > > Take a look at the patch that introduced SCM_TXTIME. That is defined > and used in the same locations. > > UAPI socket.h definitions need to be defined separate for various > archs. I also missed this. Thanks, Willem, now I found all the definitions. > Btw, for a next version please also document the new feature in > Documentation/networking/timestamping.rst Yep, I'll make series of 3 patches then. Will try to stick into section 1.3.3 Timestamp Options. > And let's keep it on the list. Sure, I accidentally removed the list..
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 394c3b66065e..2161d50cf0fd 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -174,6 +174,7 @@ struct inet_cork { __s16 tos; char priority; __u16 gso_size; + u32 ts_opt_id; u64 transmit_time; u32 mark; }; @@ -241,7 +242,8 @@ struct inet_sock { struct inet_cork_full cork; }; -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_TS_OPT_ID 2 /* timestmap opt id has been provided in cmsg */ enum { INET_FLAGS_PKTINFO = 0, diff --git a/include/net/sock.h b/include/net/sock.h index f51d61fab059..73e21dad5660 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1794,6 +1794,7 @@ struct sockcm_cookie { u64 transmit_time; u32 mark; u32 tsflags; + u32 ts_opt_id; }; static inline void sockcm_init(struct sockcm_cookie *sockc, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8ce8a39a1e5f..db3df3e74b01 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..081b40a55a2e 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -32,6 +32,7 @@ enum { SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), SOF_TIMESTAMPING_BIND_PHC = (1 << 15), SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), + SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17), SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | diff --git a/net/core/sock.c b/net/core/sock.c index 468b1239606c..560b075765fa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, return -EINVAL; sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); break; + case SCM_TS_OPT_ID: + /* allow this option for UDP sockets only */ + if (!sk_is_udp(sk)) + return -EINVAL; + tsflags = READ_ONCE(sk->sk_tsflags); + if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) + return -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); + sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG; + break; /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ case SCM_RIGHTS: case SCM_CREDENTIALS: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b90d0f78ac80..65b5d9f53102 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk, hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (hold_tskey) { + if (cork->flags & IPCORK_TS_OPT_ID) { + hold_tskey = false; + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + } + } /* So, what's going on in the loop below? * @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->mark = ipc->sockc.mark; cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; + cork->ts_opt_id = ipc->sockc.ts_opt_id; cork->tx_flags = 0; sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags); + if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) + cork->flags |= IPCORK_TS_OPT_ID; return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f26841f1490f..91eafef85c85 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; + cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); + if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG) + cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk, hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; - if (hold_tskey) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; + if (hold_tskey) { + if (cork->flags & IPCORK_TS_OPT_ID) { + hold_tskey = false; + tskey = cork->ts_opt_id; + } else { + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + } + } /* * Let's try using as much space as possible.
SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX timestamps and packets sent via socket. Unfortunately, there is no way to reliably predict socket timestamp ID value in case of error returned by sendmsg. For UDP sockets it's impossible because of lockless nature of UDP transmit, several threads may send packets in parallel. In case of RAW sockets MSG_MORE option makes things complicated. More details are in the conversation [1]. This patch adds new control message type to give user-space software an opportunity to control the mapping between packets and values by providing ID with each sendmsg. This works fine for UDP sockets only, and explicit check is added to control message parser. [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ Signed-off-by: Vadim Fedorenko <vadfed@meta.com> --- include/net/inet_sock.h | 4 +++- include/net/sock.h | 1 + include/uapi/asm-generic/socket.h | 2 ++ include/uapi/linux/net_tstamp.h | 1 + net/core/sock.c | 12 ++++++++++++ net/ipv4/ip_output.c | 13 +++++++++++-- net/ipv6/ip6_output.c | 13 +++++++++++-- 7 files changed, 41 insertions(+), 5 deletions(-)