diff mbox series

[net-next] tcp: add TTL to SCM_TIMESTAMPING_OPT_STATS

Message ID 20210120204155.552275-1-ysseung@google.com (mailing list archive)
State Accepted
Commit e7ed11ee945438b737e2ae2370e35591e16ec371
Delegated to: Netdev Maintainers
Headers show
Series [net-next] tcp: add TTL to SCM_TIMESTAMPING_OPT_STATS | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 24 maintainers not CCed: andriin@fb.com pabeni@redhat.com dseok.yi@samsung.com willemb@google.com elver@google.com bjorn@kernel.org daniel@iogearbox.net decui@microsoft.com jiri@mellanox.com linmiaohe@huawei.com nogikh@google.com kuba@kernel.org ast@kernel.org viro@zeniv.linux.org.uk kyk.segfault@gmail.com jakub@cloudflare.com jonathan.lemon@gmail.com yoshfuji@linux-ipv6.org ap420073@gmail.com rdunlap@infradead.org steffen.klassert@secunet.com pablo@netfilter.org xiyou.wangcong@gmail.com gnault@redhat.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 8831 this patch: 8831
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 142 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 9233 this patch: 9233
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Yousuk Seung Jan. 20, 2021, 8:41 p.m. UTC
This patch adds TCP_NLA_TTL to SCM_TIMESTAMPING_OPT_STATS that exports
the time-to-live or hop limit of the latest incoming packet with
SCM_TSTAMP_ACK. The value exported may not be from the packet that acks
the sequence when incoming packets are aggregated. Exporting the
time-to-live or hop limit value of incoming packets helps to estimate
the hop count of the path of the flow that may change over time.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
---
 include/linux/skbuff.h   |  2 +-
 include/linux/tcp.h      |  3 ++-
 include/uapi/linux/tcp.h |  1 +
 net/core/dev.c           |  2 +-
 net/core/skbuff.c        |  6 ++++--
 net/ipv4/tcp.c           | 18 +++++++++++++++++-
 net/ipv4/tcp_input.c     | 16 ++++++++--------
 7 files changed, 34 insertions(+), 14 deletions(-)

Comments

patchwork-bot+netdevbpf@kernel.org Jan. 23, 2021, 3:20 a.m. UTC | #1
Hello:

This patch was applied to netdev/net-next.git (refs/heads/master):

On Wed, 20 Jan 2021 12:41:55 -0800 you wrote:
> This patch adds TCP_NLA_TTL to SCM_TIMESTAMPING_OPT_STATS that exports
> the time-to-live or hop limit of the latest incoming packet with
> SCM_TSTAMP_ACK. The value exported may not be from the packet that acks
> the sequence when incoming packets are aggregated. Exporting the
> time-to-live or hop limit value of incoming packets helps to estimate
> the hop count of the path of the flow that may change over time.
> 
> [...]

Here is the summary with links:
  - [net-next] tcp: add TTL to SCM_TIMESTAMPING_OPT_STATS
    https://git.kernel.org/netdev/net-next/c/e7ed11ee9454

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 46f901adf1a8..77a74a3b511e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3859,7 +3859,7 @@  static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps);
 
-void __skb_tstamp_tx(struct sk_buff *orig_skb,
+void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb,
 		     struct skb_shared_hwtstamps *hwtstamps,
 		     struct sock *sk, int tstype);
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2f87377e9af7..48d8a363319e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -496,7 +496,8 @@  static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
 }
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-					       const struct sk_buff *orig_skb);
+					       const struct sk_buff *orig_skb,
+					       const struct sk_buff *ack_skb);
 
 static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 {
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 768e93bd5b51..16dfa40bdac3 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -314,6 +314,7 @@  enum {
 	TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */
 	TCP_NLA_BYTES_NOTSENT,	/* Bytes in write queue not yet sent */
 	TCP_NLA_EDT,		/* Earliest departure time (CLOCK_MONOTONIC) */
+	TCP_NLA_TTL,		/* TTL or hop limit of a packet received */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/core/dev.c b/net/core/dev.c
index 0332f2e8f7da..8492b1560787 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4083,7 +4083,7 @@  static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 	skb_reset_mac_header(skb);
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
-		__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+		__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
 
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cf2c4dcf4257..7b9351ba4a20 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4717,6 +4717,7 @@  void skb_complete_tx_timestamp(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
 void __skb_tstamp_tx(struct sk_buff *orig_skb,
+		     const struct sk_buff *ack_skb,
 		     struct skb_shared_hwtstamps *hwtstamps,
 		     struct sock *sk, int tstype)
 {
@@ -4739,7 +4740,8 @@  void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
 		    sk->sk_protocol == IPPROTO_TCP &&
 		    sk->sk_type == SOCK_STREAM) {
-			skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
+			skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+							     ack_skb);
 			opt_stats = true;
 		} else
 #endif
@@ -4768,7 +4770,7 @@  EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		   struct skb_shared_hwtstamps *hwtstamps)
 {
-	return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+	return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
 			       SCM_TSTAMP_SND);
 }
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2267d21c73a6..6ba6c5b6579c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3766,11 +3766,24 @@  static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
+		nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
 		0;
 }
 
+/* Returns TTL or hop limit of an incoming packet from skb. */
+static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP))
+		return ip_hdr(skb)->ttl;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return ipv6_hdr(skb)->hop_limit;
+	else
+		return 0;
+}
+
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-					       const struct sk_buff *orig_skb)
+					       const struct sk_buff *orig_skb,
+					       const struct sk_buff *ack_skb)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *stats;
@@ -3826,6 +3839,9 @@  struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
 		    max_t(int, 0, tp->write_seq - tp->snd_nxt));
 	nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
 			  TCP_NLA_PAD);
+	if (ack_skb)
+		nla_put_u8(stats, TCP_NLA_TTL,
+			   tcp_skb_ttl_or_hop_limit(ack_skb));
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c7e16b0ed791..5905e604344e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3145,7 +3145,7 @@  static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 }
 
 static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
-			   u32 prior_snd_una)
+			   const struct sk_buff *ack_skb, u32 prior_snd_una)
 {
 	const struct skb_shared_info *shinfo;
 
@@ -3157,7 +3157,7 @@  static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
 	if (!before(shinfo->tskey, prior_snd_una) &&
 	    before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
 		tcp_skb_tsorted_save(skb) {
-			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+			__skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
 		} tcp_skb_tsorted_restore(skb);
 	}
 }
@@ -3166,8 +3166,8 @@  static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
-			       u32 prior_snd_una,
+static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
+			       u32 prior_fack, u32 prior_snd_una,
 			       struct tcp_sacktag_state *sack, bool ece_ack)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3256,7 +3256,7 @@  static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		if (!fully_acked)
 			break;
 
-		tcp_ack_tstamp(sk, skb, prior_snd_una);
+		tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
 
 		next = skb_rb_next(skb);
 		if (unlikely(skb == tp->retransmit_skb_hint))
@@ -3274,7 +3274,7 @@  static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		tp->snd_up = tp->snd_una;
 
 	if (skb) {
-		tcp_ack_tstamp(sk, skb, prior_snd_una);
+		tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
 		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
 			flag |= FLAG_SACK_RENEGING;
 	}
@@ -3808,8 +3808,8 @@  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		goto no_queue;
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
-				    flag & FLAG_ECE);
+	flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
+				    &sack_state, flag & FLAG_ECE);
 
 	tcp_rack_update_reo_wnd(sk, &rs);