diff mbox series

[net-next,v2,1/2] net: tcp: add skb drop reasons to tcp connect request

Message ID 20220428073340.224391-2-imagedong@tencent.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: tcp: add skb drop reasons to connect request | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5737 this patch: 5737
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 1134 this patch: 1134
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5881 this patch: 5881
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Menglong Dong April 28, 2022, 7:33 a.m. UTC
From: Menglong Dong <imagedong@tencent.com>

For now, the return value of tcp_v4_conn_request() has the following
means:

  >=0: the skb is acceptable, free it with consume_skb()
  <0 : the skb is unacceptable, free it with kfree_skb() and send a
       RESET

In order to get the drop reasons from tcp_v4_conn_request(), we make
some changes to its return value:

  ==0: the skb is acceptable, free it with consume_skb()
  >0:  the return value is exactly the skb drop reasons, free it with
       kfree_skb_reason() without sendind RESET
  <0:  the same as what we do before

( As a negative value can be returned by
struct inet_connection_sock_af_ops.conn_request(), so we can't make
the return value of tcp_v4_conn_request() as num skb_drop_reason
directly. )

Therefore, previous logic is not changed, as tcp_v4_conn_request()
never return a positive before.

With drop reasons returned, the caller of tcp_v4_conn_request(), which
is tcp_rcv_state_process(), will call kfree_skb_reason() instead of
consume_skb().

Following new drop reasons are added:

  SKB_DROP_REASON_LISTENOVERFLOWS
  SKB_DROP_REASON_TCP_REQQFULLDROP

Reviewed-by: Jiang Biao <benbjiang@tencent.com>
Reviewed-by: Hao Peng <flyingpeng@tencent.com>
Signed-off-by: Menglong Dong <imagedong@tencent.com>
---
v2:
- don't free skb in conn_request, as Eric suggested, and use it's
  return value to pass drop reasons.
---
 include/linux/skbuff.h     |  4 ++++
 include/trace/events/skb.h |  2 ++
 net/ipv4/tcp_input.c       | 20 ++++++++++++++------
 net/ipv4/tcp_ipv4.c        |  2 +-
 net/ipv6/tcp_ipv6.c        |  4 ++--
 5 files changed, 23 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 84d78df60453..f33b3636bbce 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -469,6 +469,10 @@  enum skb_drop_reason {
 	SKB_DROP_REASON_PKT_TOO_BIG,	/* packet size is too big (maybe exceed
 					 * the MTU)
 					 */
+	SKB_DROP_REASON_LISTENOVERFLOWS, /* accept queue of the listen socket is full */
+	SKB_DROP_REASON_TCP_REQQFULLDROP, /* request queue of the listen
+					   * socket is full
+					   */
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index a477bf907498..de6c93670437 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -80,6 +80,8 @@ 
 	EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS)	\
 	EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES)	\
 	EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG)		\
+	EM(SKB_DROP_REASON_LISTENOVERFLOWS, LISTENOVERFLOWS)	\
+	EM(SKB_DROP_REASON_TCP_REQQFULLDROP, TCP_REQQFULLDROP)	\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index daff631b9486..412367b7dfd6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6411,7 +6411,7 @@  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct request_sock *req;
-	int queued = 0;
+	int err, queued = 0;
 	bool acceptable;
 	SKB_DR(reason);
 
@@ -6438,13 +6438,16 @@  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			 */
 			rcu_read_lock();
 			local_bh_disable();
-			acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+			err = icsk->icsk_af_ops->conn_request(sk, skb);
 			local_bh_enable();
 			rcu_read_unlock();
 
-			if (!acceptable)
+			if (err < 0)
 				return 1;
-			consume_skb(skb);
+			if (err)
+				kfree_skb_reason(skb, err);
+			else
+				consume_skb(skb);
 			return 0;
 		}
 		SKB_DR_SET(reason, TCP_FLAGS);
@@ -6878,6 +6881,7 @@  int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	bool want_cookie = false;
 	struct dst_entry *dst;
 	struct flowi fl;
+	SKB_DR(reason);
 
 	/* TW buckets are converted to open requests without
 	 * limitations, they conserve resources and peer is
@@ -6886,12 +6890,15 @@  int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
 	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
 		want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
-		if (!want_cookie)
+		if (!want_cookie) {
+			SKB_DR_SET(reason, TCP_REQQFULLDROP);
 			goto drop;
+		}
 	}
 
 	if (sk_acceptq_is_full(sk)) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+		SKB_DR_SET(reason, LISTENOVERFLOWS);
 		goto drop;
 	}
 
@@ -6947,6 +6954,7 @@  int tcp_conn_request(struct request_sock_ops *rsk_ops,
 			 */
 			pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
 				    rsk_ops->family);
+			SKB_DR_SET(reason, TCP_REQQFULLDROP);
 			goto drop_and_release;
 		}
 
@@ -7007,6 +7015,6 @@  int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	__reqsk_free(req);
 drop:
 	tcp_listendrop(sk);
-	return 0;
+	return reason;
 }
 EXPORT_SYMBOL(tcp_conn_request);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 157265aecbed..6a49470d30db 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1470,7 +1470,7 @@  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 drop:
 	tcp_listendrop(sk);
-	return 0;
+	return SKB_DROP_REASON_IP_INADDRERRORS;
 }
 EXPORT_SYMBOL(tcp_v4_conn_request);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 782df529ff69..92f4a58fdc2c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1158,7 +1158,7 @@  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
-		return 0;
+		return SKB_DROP_REASON_IP_INADDRERRORS;
 	}
 
 	return tcp_conn_request(&tcp6_request_sock_ops,
@@ -1166,7 +1166,7 @@  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 drop:
 	tcp_listendrop(sk);
-	return 0; /* don't send reset */
+	return SKB_DROP_REASON_IP_INADDRERRORS; /* don't send reset */
 }
 
 static void tcp_v6_restore_cb(struct sk_buff *skb)