diff mbox series

[net-next,1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message

Message ID 20240829204922.1674865-1-vadfed@meta.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 91 this patch: 91
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 3 maintainers not CCed: edumazet@google.com arnd@arndb.de linux-arch@vger.kernel.org
netdev/build_clang success Errors and warnings before: 1086 this patch: 1086
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 15051 this patch: 15051
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 18 this patch: 18
netdev/source_inline success Was 0 now: 0

Commit Message

Vadim Fedorenko Aug. 29, 2024, 8:49 p.m. UTC
SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX
timestamps and packets sent via socket. Unfortunately, there is no way
to reliably predict socket timestamp ID value in case of error returned
by sendmsg. For UDP sockets it's impossible because of lockless
nature of UDP transmit, several threads may send packets in parallel. In
case of RAW sockets MSG_MORE option makes things complicated. More
details are in the conversation [1].
This patch adds new control message type to give user-space
software an opportunity to control the mapping between packets and
values by providing ID with each sendmsg. This works fine for UDP
sockets only, and explicit check is added to control message parser.

[1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 include/net/inet_sock.h           |  4 +++-
 include/net/sock.h                |  1 +
 include/uapi/asm-generic/socket.h |  2 ++
 include/uapi/linux/net_tstamp.h   |  1 +
 net/core/sock.c                   | 12 ++++++++++++
 net/ipv4/ip_output.c              | 13 +++++++++++--
 net/ipv6/ip6_output.c             | 13 +++++++++++--
 7 files changed, 41 insertions(+), 5 deletions(-)

Comments

Willem de Bruijn Aug. 30, 2024, 3:02 p.m. UTC | #1
Vadim Fedorenko wrote:
> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX
> timestamps and packets sent via socket. Unfortunately, there is no way
> to reliably predict socket timestamp ID value in case of error returned
> by sendmsg. For UDP sockets it's impossible because of lockless
> nature of UDP transmit, several threads may send packets in parallel. In
> case of RAW sockets MSG_MORE option makes things complicated. More
> details are in the conversation [1].
> This patch adds new control message type to give user-space
> software an opportunity to control the mapping between packets and
> values by providing ID with each sendmsg. This works fine for UDP
> sockets only, and explicit check is added to control message parser.
> 
> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/
> 
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>  include/net/inet_sock.h           |  4 +++-
>  include/net/sock.h                |  1 +
>  include/uapi/asm-generic/socket.h |  2 ++
>  include/uapi/linux/net_tstamp.h   |  1 +
>  net/core/sock.c                   | 12 ++++++++++++
>  net/ipv4/ip_output.c              | 13 +++++++++++--
>  net/ipv6/ip6_output.c             | 13 +++++++++++--
>  7 files changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 394c3b66065e..2161d50cf0fd 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -174,6 +174,7 @@ struct inet_cork {
>  	__s16			tos;
>  	char			priority;
>  	__u16			gso_size;
> +	u32			ts_opt_id;
>  	u64			transmit_time;
>  	u32			mark;
>  };
> @@ -241,7 +242,8 @@ struct inet_sock {
>  	struct inet_cork_full	cork;
>  };
>  
> -#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
> +#define IPCORK_OPT		1	/* ip-options has been held in ipcork.opt */
> +#define IPCORK_TS_OPT_ID	2	/* timestmap opt id has been provided in cmsg */
>  
>  enum {
>  	INET_FLAGS_PKTINFO	= 0,
> diff --git a/include/net/sock.h b/include/net/sock.h
> index f51d61fab059..73e21dad5660 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1794,6 +1794,7 @@ struct sockcm_cookie {
>  	u64 transmit_time;
>  	u32 mark;
>  	u32 tsflags;
> +	u32 ts_opt_id;
>  };
>  
>  static inline void sockcm_init(struct sockcm_cookie *sockc,
> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> index 8ce8a39a1e5f..db3df3e74b01 100644
> --- a/include/uapi/asm-generic/socket.h
> +++ b/include/uapi/asm-generic/socket.h
> @@ -135,6 +135,8 @@
>  #define SO_PASSPIDFD		76
>  #define SO_PEERPIDFD		77
>  
> +#define SCM_TS_OPT_ID		78
> +
>  #if !defined(__KERNEL__)
>  
>  #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
> index a2c66b3d7f0f..081b40a55a2e 100644
> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -32,6 +32,7 @@ enum {
>  	SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
>  	SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
>  	SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
> +	SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17),
>  
>  	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
>  	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |

Update SOF_TIMESTAMPING_LAST

> diff --git a/net/core/sock.c b/net/core/sock.c
> index 468b1239606c..560b075765fa 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
>  			return -EINVAL;
>  		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
>  		break;
> +	case SCM_TS_OPT_ID:
> +		/* allow this option for UDP sockets only */
> +		if (!sk_is_udp(sk))
> +			return -EINVAL;
> +		tsflags = READ_ONCE(sk->sk_tsflags);
> +		if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
> +			return -EINVAL;
> +		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
> +			return -EINVAL;
> +		sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
> +		sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG;
> +		break;
>  	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
>  	case SCM_RIGHTS:
>  	case SCM_CREDENTIALS:
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index b90d0f78ac80..65b5d9f53102 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk,
>  
>  	hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
>  		     READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
> -	if (hold_tskey)
> -		tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> +	if (hold_tskey) {
> +		if (cork->flags & IPCORK_TS_OPT_ID) {
> +			hold_tskey = false;
> +			tskey = cork->ts_opt_id;
> +		} else {
> +			tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> +		}
> +	}
>  
>  	/* So, what's going on in the loop below?
>  	 *
> @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
>  	cork->mark = ipc->sockc.mark;
>  	cork->priority = ipc->priority;
>  	cork->transmit_time = ipc->sockc.transmit_time;
> +	cork->ts_opt_id = ipc->sockc.ts_opt_id;
>  	cork->tx_flags = 0;
>  	sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
> +	if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
> +		cork->flags |= IPCORK_TS_OPT_ID;
>  
>  	return 0;
>  }
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index f26841f1490f..91eafef85c85 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
>  	cork->base.gso_size = ipc6->gso_size;
>  	cork->base.tx_flags = 0;
>  	cork->base.mark = ipc6->sockc.mark;
> +	cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
>  	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
> +	if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
> +		cork->base.flags |= IPCORK_TS_OPT_ID;
>  
>  	cork->base.length = 0;
>  	cork->base.transmit_time = ipc6->sockc.transmit_time;
> @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk,
>  
>  	hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
>  		     READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
> -	if (hold_tskey)
> -		tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> +	if (hold_tskey) {
> +		if (cork->flags & IPCORK_TS_OPT_ID) {
> +			hold_tskey = false;
> +			tskey = cork->ts_opt_id;
> +		} else {
> +			tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> +		}
> +	}

Setting, then clearing hold_tskey is a bit weird. How about

if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
        if (cork->flags & IPCORK_TS_OPT_ID) {
                 tskey = cork->ts_opt_id;
        } else {
                 tskey = atomic_inc_return(&sk->sk_tskey) - 1;
                 hold_tskey = true;
        }
}
kernel test robot Aug. 30, 2024, 4:18 p.m. UTC | #2
Hi Vadim,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-txtimestamp-add-SCM_TS_OPT_ID-test/20240830-045630
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240829204922.1674865-1-vadfed%40meta.com
patch subject: [PATCH net-next 1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message
config: alpha-allyesconfig (https://download.01.org/0day-ci/archive/20240831/202408310023.xSozGTYj-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 13.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240831/202408310023.xSozGTYj-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408310023.xSozGTYj-lkp@intel.com/

All errors (new ones prefixed by >>):

   net/core/sock.c: In function '__sock_cmsg_send':
>> net/core/sock.c:2862:14: error: 'SCM_TS_OPT_ID' undeclared (first use in this function); did you mean 'IPCORK_TS_OPT_ID'?
    2862 |         case SCM_TS_OPT_ID:
         |              ^~~~~~~~~~~~~
         |              IPCORK_TS_OPT_ID
   net/core/sock.c:2862:14: note: each undeclared identifier is reported only once for each function it appears in


vim +2862 net/core/sock.c

  2828	
  2829	int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
  2830			     struct sockcm_cookie *sockc)
  2831	{
  2832		u32 tsflags;
  2833	
  2834		switch (cmsg->cmsg_type) {
  2835		case SO_MARK:
  2836			if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
  2837			    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
  2838				return -EPERM;
  2839			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
  2840				return -EINVAL;
  2841			sockc->mark = *(u32 *)CMSG_DATA(cmsg);
  2842			break;
  2843		case SO_TIMESTAMPING_OLD:
  2844		case SO_TIMESTAMPING_NEW:
  2845			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
  2846				return -EINVAL;
  2847	
  2848			tsflags = *(u32 *)CMSG_DATA(cmsg);
  2849			if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
  2850				return -EINVAL;
  2851	
  2852			sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
  2853			sockc->tsflags |= tsflags;
  2854			break;
  2855		case SCM_TXTIME:
  2856			if (!sock_flag(sk, SOCK_TXTIME))
  2857				return -EINVAL;
  2858			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
  2859				return -EINVAL;
  2860			sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
  2861			break;
> 2862		case SCM_TS_OPT_ID:
  2863			/* allow this option for UDP sockets only */
  2864			if (!sk_is_udp(sk))
  2865				return -EINVAL;
  2866			tsflags = READ_ONCE(sk->sk_tsflags);
  2867			if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
  2868				return -EINVAL;
  2869			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
  2870				return -EINVAL;
  2871			sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
  2872			sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG;
  2873			break;
  2874		/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
  2875		case SCM_RIGHTS:
  2876		case SCM_CREDENTIALS:
  2877			break;
  2878		default:
  2879			return -EINVAL;
  2880		}
  2881		return 0;
  2882	}
  2883	EXPORT_SYMBOL(__sock_cmsg_send);
  2884
kernel test robot Aug. 30, 2024, 4:50 p.m. UTC | #3
Hi Vadim,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-txtimestamp-add-SCM_TS_OPT_ID-test/20240830-045630
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240829204922.1674865-1-vadfed%40meta.com
patch subject: [PATCH net-next 1/2] net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message
config: mips-gcw0_defconfig (https://download.01.org/0day-ci/archive/20240831/202408310034.uyJpRXb7-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 46fe36a4295f05d5d3731762e31fc4e6e99863e9)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240831/202408310034.uyJpRXb7-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408310034.uyJpRXb7-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from net/core/sock.c:91:
   In file included from include/linux/errqueue.h:6:
   In file included from include/net/ip.h:22:
   In file included from include/linux/ip.h:16:
   In file included from include/linux/skbuff.h:17:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:8:
   In file included from include/linux/cacheflush.h:5:
   In file included from arch/mips/include/asm/cacheflush.h:13:
   In file included from include/linux/mm.h:2228:
   include/linux/vmstat.h:517:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     517 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> net/core/sock.c:2862:7: error: use of undeclared identifier 'SCM_TS_OPT_ID'
    2862 |         case SCM_TS_OPT_ID:
         |              ^
   1 warning and 1 error generated.


vim +/SCM_TS_OPT_ID +2862 net/core/sock.c

  2828	
  2829	int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
  2830			     struct sockcm_cookie *sockc)
  2831	{
  2832		u32 tsflags;
  2833	
  2834		switch (cmsg->cmsg_type) {
  2835		case SO_MARK:
  2836			if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
  2837			    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
  2838				return -EPERM;
  2839			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
  2840				return -EINVAL;
  2841			sockc->mark = *(u32 *)CMSG_DATA(cmsg);
  2842			break;
  2843		case SO_TIMESTAMPING_OLD:
  2844		case SO_TIMESTAMPING_NEW:
  2845			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
  2846				return -EINVAL;
  2847	
  2848			tsflags = *(u32 *)CMSG_DATA(cmsg);
  2849			if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
  2850				return -EINVAL;
  2851	
  2852			sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
  2853			sockc->tsflags |= tsflags;
  2854			break;
  2855		case SCM_TXTIME:
  2856			if (!sock_flag(sk, SOCK_TXTIME))
  2857				return -EINVAL;
  2858			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
  2859				return -EINVAL;
  2860			sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
  2861			break;
> 2862		case SCM_TS_OPT_ID:
  2863			/* allow this option for UDP sockets only */
  2864			if (!sk_is_udp(sk))
  2865				return -EINVAL;
  2866			tsflags = READ_ONCE(sk->sk_tsflags);
  2867			if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
  2868				return -EINVAL;
  2869			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
  2870				return -EINVAL;
  2871			sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
  2872			sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG;
  2873			break;
  2874		/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
  2875		case SCM_RIGHTS:
  2876		case SCM_CREDENTIALS:
  2877			break;
  2878		default:
  2879			return -EINVAL;
  2880		}
  2881		return 0;
  2882	}
  2883	EXPORT_SYMBOL(__sock_cmsg_send);
  2884
Willem de Bruijn Aug. 30, 2024, 9:07 p.m. UTC | #4
On Fri, Aug 30, 2024 at 1:11 PM Vadim Fedorenko
<vadim.fedorenko@linux.dev> wrote:
>
> On 30/08/2024 16:02, Willem de Bruijn wrote:
> > Vadim Fedorenko wrote:
> >> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX
> >> timestamps and packets sent via socket. Unfortunately, there is no way
> >> to reliably predict socket timestamp ID value in case of error returned
> >> by sendmsg. For UDP sockets it's impossible because of lockless
> >> nature of UDP transmit, several threads may send packets in parallel. In
> >> case of RAW sockets MSG_MORE option makes things complicated. More
> >> details are in the conversation [1].
> >> This patch adds new control message type to give user-space
> >> software an opportunity to control the mapping between packets and
> >> values by providing ID with each sendmsg. This works fine for UDP
> >> sockets only, and explicit check is added to control message parser.
> >>
> >> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/
> >>
> >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> >> ---
> >>   include/net/inet_sock.h           |  4 +++-
> >>   include/net/sock.h                |  1 +
> >>   include/uapi/asm-generic/socket.h |  2 ++
> >>   include/uapi/linux/net_tstamp.h   |  1 +
> >>   net/core/sock.c                   | 12 ++++++++++++
> >>   net/ipv4/ip_output.c              | 13 +++++++++++--
> >>   net/ipv6/ip6_output.c             | 13 +++++++++++--
> >>   7 files changed, 41 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> >> index 394c3b66065e..2161d50cf0fd 100644
> >> --- a/include/net/inet_sock.h
> >> +++ b/include/net/inet_sock.h
> >> @@ -174,6 +174,7 @@ struct inet_cork {
> >>      __s16                   tos;
> >>      char                    priority;
> >>      __u16                   gso_size;
> >> +    u32                     ts_opt_id;
> >>      u64                     transmit_time;
> >>      u32                     mark;
> >>   };
> >> @@ -241,7 +242,8 @@ struct inet_sock {
> >>      struct inet_cork_full   cork;
> >>   };
> >>
> >> -#define IPCORK_OPT  1       /* ip-options has been held in ipcork.opt */
> >> +#define IPCORK_OPT          1       /* ip-options has been held in ipcork.opt */
> >> +#define IPCORK_TS_OPT_ID    2       /* timestmap opt id has been provided in cmsg */
> >>
> >>   enum {
> >>      INET_FLAGS_PKTINFO      = 0,
> >> diff --git a/include/net/sock.h b/include/net/sock.h
> >> index f51d61fab059..73e21dad5660 100644
> >> --- a/include/net/sock.h
> >> +++ b/include/net/sock.h
> >> @@ -1794,6 +1794,7 @@ struct sockcm_cookie {
> >>      u64 transmit_time;
> >>      u32 mark;
> >>      u32 tsflags;
> >> +    u32 ts_opt_id;
> >>   };
> >>
> >>   static inline void sockcm_init(struct sockcm_cookie *sockc,
> >> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
> >> index 8ce8a39a1e5f..db3df3e74b01 100644
> >> --- a/include/uapi/asm-generic/socket.h
> >> +++ b/include/uapi/asm-generic/socket.h
> >> @@ -135,6 +135,8 @@
> >>   #define SO_PASSPIDFD               76
> >>   #define SO_PEERPIDFD               77
> >>
> >> +#define SCM_TS_OPT_ID               78
> >> +
> >>   #if !defined(__KERNEL__)
> >>
> >>   #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
> >> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
> >> index a2c66b3d7f0f..081b40a55a2e 100644
> >> --- a/include/uapi/linux/net_tstamp.h
> >> +++ b/include/uapi/linux/net_tstamp.h
> >> @@ -32,6 +32,7 @@ enum {
> >>      SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
> >>      SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
> >>      SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
> >> +    SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17),
> >>
> >>      SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
> >>      SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
> >
> > Update SOF_TIMESTAMPING_LAST
>
> Got it
>
> >> diff --git a/net/core/sock.c b/net/core/sock.c
> >> index 468b1239606c..560b075765fa 100644
> >> --- a/net/core/sock.c
> >> +++ b/net/core/sock.c
> >> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
> >>                      return -EINVAL;
> >>              sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
> >>              break;
> >> +    case SCM_TS_OPT_ID:
> >> +            /* allow this option for UDP sockets only */
> >> +            if (!sk_is_udp(sk))
> >> +                    return -EINVAL;
> >> +            tsflags = READ_ONCE(sk->sk_tsflags);
> >> +            if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
> >> +                    return -EINVAL;
> >> +            if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
> >> +                    return -EINVAL;
> >> +            sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
> >> +            sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG;
> >> +            break;
> >>      /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
> >>      case SCM_RIGHTS:
> >>      case SCM_CREDENTIALS:
> >> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> >> index b90d0f78ac80..65b5d9f53102 100644
> >> --- a/net/ipv4/ip_output.c
> >> +++ b/net/ipv4/ip_output.c
> >> @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk,
> >>
> >>      hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
> >>                   READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
> >> -    if (hold_tskey)
> >> -            tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> >> +    if (hold_tskey) {
> >> +            if (cork->flags & IPCORK_TS_OPT_ID) {
> >> +                    hold_tskey = false;
> >> +                    tskey = cork->ts_opt_id;
> >> +            } else {
> >> +                    tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> >> +            }
> >> +    }
> >>
> >>      /* So, what's going on in the loop below?
> >>       *
> >> @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
> >>      cork->mark = ipc->sockc.mark;
> >>      cork->priority = ipc->priority;
> >>      cork->transmit_time = ipc->sockc.transmit_time;
> >> +    cork->ts_opt_id = ipc->sockc.ts_opt_id;
> >>      cork->tx_flags = 0;
> >>      sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
> >> +    if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
> >> +            cork->flags |= IPCORK_TS_OPT_ID;
> >>
> >>      return 0;
> >>   }
> >> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> >> index f26841f1490f..91eafef85c85 100644
> >> --- a/net/ipv6/ip6_output.c
> >> +++ b/net/ipv6/ip6_output.c
> >> @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
> >>      cork->base.gso_size = ipc6->gso_size;
> >>      cork->base.tx_flags = 0;
> >>      cork->base.mark = ipc6->sockc.mark;
> >> +    cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
> >>      sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
> >> +    if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
> >> +            cork->base.flags |= IPCORK_TS_OPT_ID;
> >>
> >>      cork->base.length = 0;
> >>      cork->base.transmit_time = ipc6->sockc.transmit_time;
> >> @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk,
> >>
> >>      hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
> >>                   READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
> >> -    if (hold_tskey)
> >> -            tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> >> +    if (hold_tskey) {
> >> +            if (cork->flags & IPCORK_TS_OPT_ID) {
> >> +                    hold_tskey = false;
> >> +                    tskey = cork->ts_opt_id;
> >> +            } else {
> >> +                    tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> >> +            }
> >> +    }
> >
> > Setting, then clearing hold_tskey is a bit weird. How about
> >
> > if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
> >      READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
> >          if (cork->flags & IPCORK_TS_OPT_ID) {
> >                   tskey = cork->ts_opt_id;
> >          } else {
> >                   tskey = atomic_inc_return(&sk->sk_tskey) - 1;
> >                   hold_tskey = true;
> >          }
> > }
>
> Yeah, looks ok, I'll change it this way, thanks!
>
> Can you please help me with kernel test robot report? I don't really get
> how can SCM_TS_OPT_ID be undefined if I added it the exact same place
> where other option are defined, like SCM_TXTIME or SO_MARK?

Both bot reports mention arch-alpha.

Take a look at the patch that introduced SCM_TXTIME. That is defined
and used in the same locations.

UAPI socket.h definitions need to be defined separate for various
archs. I also missed this.

Btw, for a next version please also document the new feature in
Documentation/networking/timestamping.rst

And let's keep it on the list.
Vadim Fedorenko Sept. 1, 2024, 8:55 p.m. UTC | #5
On 30/08/2024 22:07, Willem de Bruijn wrote:
> On Fri, Aug 30, 2024 at 1:11 PM Vadim Fedorenko
> <vadim.fedorenko@linux.dev> wrote:
>>
>> On 30/08/2024 16:02, Willem de Bruijn wrote:
>>> Vadim Fedorenko wrote:
>>>> SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX
>>>> timestamps and packets sent via socket. Unfortunately, there is no way
>>>> to reliably predict socket timestamp ID value in case of error returned
>>>> by sendmsg. For UDP sockets it's impossible because of lockless
>>>> nature of UDP transmit, several threads may send packets in parallel. In
>>>> case of RAW sockets MSG_MORE option makes things complicated. More
>>>> details are in the conversation [1].
>>>> This patch adds new control message type to give user-space
>>>> software an opportunity to control the mapping between packets and
>>>> values by providing ID with each sendmsg. This works fine for UDP
>>>> sockets only, and explicit check is added to control message parser.
>>>>
>>>> [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/
>>>>
>>>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>>>> ---
>>>>    include/net/inet_sock.h           |  4 +++-
>>>>    include/net/sock.h                |  1 +
>>>>    include/uapi/asm-generic/socket.h |  2 ++
>>>>    include/uapi/linux/net_tstamp.h   |  1 +
>>>>    net/core/sock.c                   | 12 ++++++++++++
>>>>    net/ipv4/ip_output.c              | 13 +++++++++++--
>>>>    net/ipv6/ip6_output.c             | 13 +++++++++++--
>>>>    7 files changed, 41 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
>>>> index 394c3b66065e..2161d50cf0fd 100644
>>>> --- a/include/net/inet_sock.h
>>>> +++ b/include/net/inet_sock.h
>>>> @@ -174,6 +174,7 @@ struct inet_cork {
>>>>       __s16                   tos;
>>>>       char                    priority;
>>>>       __u16                   gso_size;
>>>> +    u32                     ts_opt_id;
>>>>       u64                     transmit_time;
>>>>       u32                     mark;
>>>>    };
>>>> @@ -241,7 +242,8 @@ struct inet_sock {
>>>>       struct inet_cork_full   cork;
>>>>    };
>>>>
>>>> -#define IPCORK_OPT  1       /* ip-options has been held in ipcork.opt */
>>>> +#define IPCORK_OPT          1       /* ip-options has been held in ipcork.opt */
>>>> +#define IPCORK_TS_OPT_ID    2       /* timestmap opt id has been provided in cmsg */
>>>>
>>>>    enum {
>>>>       INET_FLAGS_PKTINFO      = 0,
>>>> diff --git a/include/net/sock.h b/include/net/sock.h
>>>> index f51d61fab059..73e21dad5660 100644
>>>> --- a/include/net/sock.h
>>>> +++ b/include/net/sock.h
>>>> @@ -1794,6 +1794,7 @@ struct sockcm_cookie {
>>>>       u64 transmit_time;
>>>>       u32 mark;
>>>>       u32 tsflags;
>>>> +    u32 ts_opt_id;
>>>>    };
>>>>
>>>>    static inline void sockcm_init(struct sockcm_cookie *sockc,
>>>> diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
>>>> index 8ce8a39a1e5f..db3df3e74b01 100644
>>>> --- a/include/uapi/asm-generic/socket.h
>>>> +++ b/include/uapi/asm-generic/socket.h
>>>> @@ -135,6 +135,8 @@
>>>>    #define SO_PASSPIDFD               76
>>>>    #define SO_PEERPIDFD               77
>>>>
>>>> +#define SCM_TS_OPT_ID               78
>>>> +
>>>>    #if !defined(__KERNEL__)
>>>>
>>>>    #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
>>>> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
>>>> index a2c66b3d7f0f..081b40a55a2e 100644
>>>> --- a/include/uapi/linux/net_tstamp.h
>>>> +++ b/include/uapi/linux/net_tstamp.h
>>>> @@ -32,6 +32,7 @@ enum {
>>>>       SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
>>>>       SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
>>>>       SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
>>>> +    SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17),
>>>>
>>>>       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
>>>>       SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
>>>
>>> Update SOF_TIMESTAMPING_LAST
>>
>> Got it
>>
>>>> diff --git a/net/core/sock.c b/net/core/sock.c
>>>> index 468b1239606c..560b075765fa 100644
>>>> --- a/net/core/sock.c
>>>> +++ b/net/core/sock.c
>>>> @@ -2859,6 +2859,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
>>>>                       return -EINVAL;
>>>>               sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
>>>>               break;
>>>> +    case SCM_TS_OPT_ID:
>>>> +            /* allow this option for UDP sockets only */
>>>> +            if (!sk_is_udp(sk))
>>>> +                    return -EINVAL;
>>>> +            tsflags = READ_ONCE(sk->sk_tsflags);
>>>> +            if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
>>>> +                    return -EINVAL;
>>>> +            if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
>>>> +                    return -EINVAL;
>>>> +            sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
>>>> +            sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG;
>>>> +            break;
>>>>       /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
>>>>       case SCM_RIGHTS:
>>>>       case SCM_CREDENTIALS:
>>>> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
>>>> index b90d0f78ac80..65b5d9f53102 100644
>>>> --- a/net/ipv4/ip_output.c
>>>> +++ b/net/ipv4/ip_output.c
>>>> @@ -1050,8 +1050,14 @@ static int __ip_append_data(struct sock *sk,
>>>>
>>>>       hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
>>>>                    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
>>>> -    if (hold_tskey)
>>>> -            tskey = atomic_inc_return(&sk->sk_tskey) - 1;
>>>> +    if (hold_tskey) {
>>>> +            if (cork->flags & IPCORK_TS_OPT_ID) {
>>>> +                    hold_tskey = false;
>>>> +                    tskey = cork->ts_opt_id;
>>>> +            } else {
>>>> +                    tskey = atomic_inc_return(&sk->sk_tskey) - 1;
>>>> +            }
>>>> +    }
>>>>
>>>>       /* So, what's going on in the loop below?
>>>>        *
>>>> @@ -1324,8 +1330,11 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
>>>>       cork->mark = ipc->sockc.mark;
>>>>       cork->priority = ipc->priority;
>>>>       cork->transmit_time = ipc->sockc.transmit_time;
>>>> +    cork->ts_opt_id = ipc->sockc.ts_opt_id;
>>>>       cork->tx_flags = 0;
>>>>       sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
>>>> +    if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
>>>> +            cork->flags |= IPCORK_TS_OPT_ID;
>>>>
>>>>       return 0;
>>>>    }
>>>> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
>>>> index f26841f1490f..91eafef85c85 100644
>>>> --- a/net/ipv6/ip6_output.c
>>>> +++ b/net/ipv6/ip6_output.c
>>>> @@ -1401,7 +1401,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
>>>>       cork->base.gso_size = ipc6->gso_size;
>>>>       cork->base.tx_flags = 0;
>>>>       cork->base.mark = ipc6->sockc.mark;
>>>> +    cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
>>>>       sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
>>>> +    if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
>>>> +            cork->base.flags |= IPCORK_TS_OPT_ID;
>>>>
>>>>       cork->base.length = 0;
>>>>       cork->base.transmit_time = ipc6->sockc.transmit_time;
>>>> @@ -1545,8 +1548,14 @@ static int __ip6_append_data(struct sock *sk,
>>>>
>>>>       hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
>>>>                    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
>>>> -    if (hold_tskey)
>>>> -            tskey = atomic_inc_return(&sk->sk_tskey) - 1;
>>>> +    if (hold_tskey) {
>>>> +            if (cork->flags & IPCORK_TS_OPT_ID) {
>>>> +                    hold_tskey = false;
>>>> +                    tskey = cork->ts_opt_id;
>>>> +            } else {
>>>> +                    tskey = atomic_inc_return(&sk->sk_tskey) - 1;
>>>> +            }
>>>> +    }
>>>
>>> Setting, then clearing hold_tskey is a bit weird. How about
>>>
>>> if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
>>>       READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
>>>           if (cork->flags & IPCORK_TS_OPT_ID) {
>>>                    tskey = cork->ts_opt_id;
>>>           } else {
>>>                    tskey = atomic_inc_return(&sk->sk_tskey) - 1;
>>>                    hold_tskey = true;
>>>           }
>>> }
>>
>> Yeah, looks ok, I'll change it this way, thanks!
>>
>> Can you please help me with kernel test robot report? I don't really get
>> how can SCM_TS_OPT_ID be undefined if I added it the exact same place
>> where other option are defined, like SCM_TXTIME or SO_MARK?
> 
> Both bot reports mention arch-alpha.
> 
> Take a look at the patch that introduced SCM_TXTIME. That is defined
> and used in the same locations.
> 
> UAPI socket.h definitions need to be defined separate for various
> archs. I also missed this.

Thanks, Willem, now I found all the definitions.

> Btw, for a next version please also document the new feature in
> Documentation/networking/timestamping.rst

Yep, I'll make series of 3 patches then. Will try to stick into section
1.3.3 Timestamp Options.

> And let's keep it on the list.

Sure, I accidentally removed the list..
diff mbox series

Patch

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 394c3b66065e..2161d50cf0fd 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -174,6 +174,7 @@  struct inet_cork {
 	__s16			tos;
 	char			priority;
 	__u16			gso_size;
+	u32			ts_opt_id;
 	u64			transmit_time;
 	u32			mark;
 };
@@ -241,7 +242,8 @@  struct inet_sock {
 	struct inet_cork_full	cork;
 };
 
-#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
+#define IPCORK_OPT		1	/* ip-options has been held in ipcork.opt */
+#define IPCORK_TS_OPT_ID	2	/* timestmap opt id has been provided in cmsg */
 
 enum {
 	INET_FLAGS_PKTINFO	= 0,
diff --git a/include/net/sock.h b/include/net/sock.h
index f51d61fab059..73e21dad5660 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1794,6 +1794,7 @@  struct sockcm_cookie {
 	u64 transmit_time;
 	u32 mark;
 	u32 tsflags;
+	u32 ts_opt_id;
 };
 
 static inline void sockcm_init(struct sockcm_cookie *sockc,
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 8ce8a39a1e5f..db3df3e74b01 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -135,6 +135,8 @@ 
 #define SO_PASSPIDFD		76
 #define SO_PEERPIDFD		77
 
+#define SCM_TS_OPT_ID		78
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index a2c66b3d7f0f..081b40a55a2e 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -32,6 +32,7 @@  enum {
 	SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14),
 	SOF_TIMESTAMPING_BIND_PHC = (1 << 15),
 	SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16),
+	SOF_TIMESTAMPING_OPT_ID_CMSG = (1 << 17),
 
 	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
diff --git a/net/core/sock.c b/net/core/sock.c
index 468b1239606c..560b075765fa 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2859,6 +2859,18 @@  int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
 			return -EINVAL;
 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
 		break;
+	case SCM_TS_OPT_ID:
+		/* allow this option for UDP sockets only */
+		if (!sk_is_udp(sk))
+			return -EINVAL;
+		tsflags = READ_ONCE(sk->sk_tsflags);
+		if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
+			return -EINVAL;
+		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+			return -EINVAL;
+		sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg);
+		sockc->tsflags |= SOF_TIMESTAMPING_OPT_ID_CMSG;
+		break;
 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
 	case SCM_RIGHTS:
 	case SCM_CREDENTIALS:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b90d0f78ac80..65b5d9f53102 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1050,8 +1050,14 @@  static int __ip_append_data(struct sock *sk,
 
 	hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
 		     READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
-	if (hold_tskey)
-		tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+	if (hold_tskey) {
+		if (cork->flags & IPCORK_TS_OPT_ID) {
+			hold_tskey = false;
+			tskey = cork->ts_opt_id;
+		} else {
+			tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+		}
+	}
 
 	/* So, what's going on in the loop below?
 	 *
@@ -1324,8 +1330,11 @@  static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->mark = ipc->sockc.mark;
 	cork->priority = ipc->priority;
 	cork->transmit_time = ipc->sockc.transmit_time;
+	cork->ts_opt_id = ipc->sockc.ts_opt_id;
 	cork->tx_flags = 0;
 	sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
+	if (ipc->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
+		cork->flags |= IPCORK_TS_OPT_ID;
 
 	return 0;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..91eafef85c85 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1401,7 +1401,10 @@  static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	cork->base.gso_size = ipc6->gso_size;
 	cork->base.tx_flags = 0;
 	cork->base.mark = ipc6->sockc.mark;
+	cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
+	if (ipc6->sockc.tsflags & SOF_TIMESTAMPING_OPT_ID_CMSG)
+		cork->base.flags |= IPCORK_TS_OPT_ID;
 
 	cork->base.length = 0;
 	cork->base.transmit_time = ipc6->sockc.transmit_time;
@@ -1545,8 +1548,14 @@  static int __ip6_append_data(struct sock *sk,
 
 	hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
 		     READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
-	if (hold_tskey)
-		tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+	if (hold_tskey) {
+		if (cork->flags & IPCORK_TS_OPT_ID) {
+			hold_tskey = false;
+			tskey = cork->ts_opt_id;
+		} else {
+			tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+		}
+	}
 
 	/*
 	 * Let's try using as much space as possible.