diff mbox series

[net-next,1/3] net: ip: add skb drop reasons for ip egress path

Message ID 20220220155705.194266-2-imagedong@tencent.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: use kfree_skb_reason() for ip/neighbour | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5979 this patch: 5979
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 878 this patch: 878
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6130 this patch: 6130
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Menglong Dong Feb. 20, 2022, 3:57 p.m. UTC
From: Menglong Dong <imagedong@tencent.com>

Replace kfree_skb() with kfree_skb_reason() in the packet egress path of
IP layer (both IPv4 and IPv6 are considered).

Following functions are involved:

__ip_queue_xmit()
ip_finish_output()
ip_mc_finish_output()
ip6_output()
ip6_finish_output()
ip6_finish_output2()

Following new drop reasons are introduced:

SKB_DROP_REASON_IP_OUTNOROUTES
SKB_DROP_REASON_BPF_CGROUP_EGRESS
SKB_DROP_REASON_IPV6DSIABLED

Reviewed-by: Mengen Sun <mengensun@tencent.com>
Reviewed-by: Hao Peng <flyingpeng@tencent.com>
Signed-off-by: Menglong Dong <imagedong@tencent.com>
---
 include/linux/skbuff.h     | 13 +++++++++++++
 include/trace/events/skb.h |  4 ++++
 net/ipv4/ip_output.c       |  6 +++---
 net/ipv6/ip6_output.c      |  6 +++---
 4 files changed, 23 insertions(+), 6 deletions(-)

Comments

David Ahern Feb. 22, 2022, 3:13 a.m. UTC | #1
On 2/20/22 8:57 AM, menglong8.dong@gmail.com wrote:
> From: Menglong Dong <imagedong@tencent.com>
> 
> Replace kfree_skb() with kfree_skb_reason() in the packet egress path of
> IP layer (both IPv4 and IPv6 are considered).
> 
> Following functions are involved:
> 
> __ip_queue_xmit()
> ip_finish_output()
> ip_mc_finish_output()
> ip6_output()
> ip6_finish_output()
> ip6_finish_output2()
> 
> Following new drop reasons are introduced:
> 
> SKB_DROP_REASON_IP_OUTNOROUTES
> SKB_DROP_REASON_BPF_CGROUP_EGRESS
> SKB_DROP_REASON_IPV6DSIABLED
> 
> Reviewed-by: Mengen Sun <mengensun@tencent.com>
> Reviewed-by: Hao Peng <flyingpeng@tencent.com>
> Signed-off-by: Menglong Dong <imagedong@tencent.com>
> ---
>  include/linux/skbuff.h     | 13 +++++++++++++
>  include/trace/events/skb.h |  4 ++++
>  net/ipv4/ip_output.c       |  6 +++---
>  net/ipv6/ip6_output.c      |  6 +++---
>  4 files changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index a3e90efe6586..c310a4a8fc86 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -380,6 +380,19 @@ enum skb_drop_reason {
>  					 * the ofo queue, corresponding to
>  					 * LINUX_MIB_TCPOFOMERGE
>  					 */
> +	SKB_DROP_REASON_IP_OUTNOROUTES,	/* route lookup failed during
> +					 * packet outputting
> +					 */

This should be good enough since the name contains OUT.

/* route lookup failed */

> +	SKB_DROP_REASON_BPF_CGROUP_EGRESS,	/* dropped by eBPF program
> +						 * with type of BPF_PROG_TYPE_CGROUP_SKB
> +						 * and attach type of
> +						 * BPF_CGROUP_INET_EGRESS
> +						 * during packet sending
> +						 */

/* dropped by BPF_CGROUP_INET_EGRESS eBPF program */

> +	SKB_DROP_REASON_IPV6DSIABLED,	/* IPv6 is disabled on the device,
> +					 * see the doc for disable_ipv6
> +					 * in ip-sysctl.rst for detail
> +					 */

Just /* IPv6 is disabled on the device */


>  	SKB_DROP_REASON_MAX,
>  };
>  

> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 0c0574eb5f5b..df549b7415fb 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c

This file has other relevant drops. e.g., ip_finish_output2 when a neigh
entry can not be created and after skb_gso_segment. The other set for
tun/tap devices has SKB_DROP_REASON_SKB_GSO_SEG which can be used for
the latter. That set also adds kfree_skb_list_reason for the frag drops.


> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 0c6c971ce0a5..4cd9e5fd25e4 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c

Similarly here. The other set should land in the next few days, so you
cna put this set on top of it.
Menglong Dong Feb. 22, 2022, 3:44 a.m. UTC | #2
On Tue, Feb 22, 2022 at 11:13 AM David Ahern <dsahern@kernel.org> wrote:
>
> On 2/20/22 8:57 AM, menglong8.dong@gmail.com wrote:
> > From: Menglong Dong <imagedong@tencent.com>
> >
> > Replace kfree_skb() with kfree_skb_reason() in the packet egress path of
> > IP layer (both IPv4 and IPv6 are considered).
> >
> > Following functions are involved:
> >
> > __ip_queue_xmit()
> > ip_finish_output()
> > ip_mc_finish_output()
> > ip6_output()
> > ip6_finish_output()
> > ip6_finish_output2()
> >
> > Following new drop reasons are introduced:
> >
> > SKB_DROP_REASON_IP_OUTNOROUTES
> > SKB_DROP_REASON_BPF_CGROUP_EGRESS
> > SKB_DROP_REASON_IPV6DSIABLED
> >
> > Reviewed-by: Mengen Sun <mengensun@tencent.com>
> > Reviewed-by: Hao Peng <flyingpeng@tencent.com>
> > Signed-off-by: Menglong Dong <imagedong@tencent.com>
> > ---
> >  include/linux/skbuff.h     | 13 +++++++++++++
> >  include/trace/events/skb.h |  4 ++++
> >  net/ipv4/ip_output.c       |  6 +++---
> >  net/ipv6/ip6_output.c      |  6 +++---
> >  4 files changed, 23 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index a3e90efe6586..c310a4a8fc86 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -380,6 +380,19 @@ enum skb_drop_reason {
> >                                        * the ofo queue, corresponding to
> >                                        * LINUX_MIB_TCPOFOMERGE
> >                                        */
> > +     SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed during
> > +                                      * packet outputting
> > +                                      */
>
> This should be good enough since the name contains OUT.
>
> /* route lookup failed */
>
> > +     SKB_DROP_REASON_BPF_CGROUP_EGRESS,      /* dropped by eBPF program
> > +                                              * with type of BPF_PROG_TYPE_CGROUP_SKB
> > +                                              * and attach type of
> > +                                              * BPF_CGROUP_INET_EGRESS
> > +                                              * during packet sending
> > +                                              */
>
> /* dropped by BPF_CGROUP_INET_EGRESS eBPF program */
>
> > +     SKB_DROP_REASON_IPV6DSIABLED,   /* IPv6 is disabled on the device,
> > +                                      * see the doc for disable_ipv6
> > +                                      * in ip-sysctl.rst for detail
> > +                                      */
>
> Just /* IPv6 is disabled on the device */
>
>
> >       SKB_DROP_REASON_MAX,
> >  };
> >
>
> > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> > index 0c0574eb5f5b..df549b7415fb 100644
> > --- a/net/ipv4/ip_output.c
> > +++ b/net/ipv4/ip_output.c
>
> This file has other relevant drops. e.g., ip_finish_output2 when a neigh
> entry can not be created and after skb_gso_segment. The other set for
> tun/tap devices has SKB_DROP_REASON_SKB_GSO_SEG which can be used for
> the latter. That set also adds kfree_skb_list_reason for the frag drops.
>

I tried to add a drop reason for neigh creating fail, but I found it's hard
to find the root reason, as __neigh_create() can fail in many cases.
And I'm not sure if there is any help when we get a
'SKB_DROP_REASON_NEIGH_CREATEFAIL' message.

Seems it's hard to make every drop reason accurate, is it ok if we
use the name 'SKB_DROP_REASON_NEIGH_CREATEFAIL' for
this path?

>
> > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> > index 0c6c971ce0a5..4cd9e5fd25e4 100644
> > --- a/net/ipv6/ip6_output.c
> > +++ b/net/ipv6/ip6_output.c
>
> Similarly here. The other set should land in the next few days, so you
> cna put this set on top of it.

Yeah, I can make use of it.

>
Menglong Dong Feb. 25, 2022, 6:05 a.m. UTC | #3
On Tue, Feb 22, 2022 at 11:13 AM David Ahern <dsahern@kernel.org> wrote:
>
> On 2/20/22 8:57 AM, menglong8.dong@gmail.com wrote:
> > From: Menglong Dong <imagedong@tencent.com>
> >
> > Replace kfree_skb() with kfree_skb_reason() in the packet egress path of
> > IP layer (both IPv4 and IPv6 are considered).
> >
> > Following functions are involved:
> >
> > __ip_queue_xmit()
> > ip_finish_output()
> > ip_mc_finish_output()
> > ip6_output()
> > ip6_finish_output()
> > ip6_finish_output2()
> >
> > Following new drop reasons are introduced:
> >
> > SKB_DROP_REASON_IP_OUTNOROUTES
> > SKB_DROP_REASON_BPF_CGROUP_EGRESS
> > SKB_DROP_REASON_IPV6DSIABLED
> >
> > Reviewed-by: Mengen Sun <mengensun@tencent.com>
> > Reviewed-by: Hao Peng <flyingpeng@tencent.com>
> > Signed-off-by: Menglong Dong <imagedong@tencent.com>
> > ---
> >  include/linux/skbuff.h     | 13 +++++++++++++
> >  include/trace/events/skb.h |  4 ++++
> >  net/ipv4/ip_output.c       |  6 +++---
> >  net/ipv6/ip6_output.c      |  6 +++---
> >  4 files changed, 23 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index a3e90efe6586..c310a4a8fc86 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -380,6 +380,19 @@ enum skb_drop_reason {
> >                                        * the ofo queue, corresponding to
> >                                        * LINUX_MIB_TCPOFOMERGE
> >                                        */
> > +     SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed during
> > +                                      * packet outputting
> > +                                      */
>
> This should be good enough since the name contains OUT.
>
> /* route lookup failed */
>
> > +     SKB_DROP_REASON_BPF_CGROUP_EGRESS,      /* dropped by eBPF program
> > +                                              * with type of BPF_PROG_TYPE_CGROUP_SKB
> > +                                              * and attach type of
> > +                                              * BPF_CGROUP_INET_EGRESS
> > +                                              * during packet sending
> > +                                              */
>
> /* dropped by BPF_CGROUP_INET_EGRESS eBPF program */
>
> > +     SKB_DROP_REASON_IPV6DSIABLED,   /* IPv6 is disabled on the device,
> > +                                      * see the doc for disable_ipv6
> > +                                      * in ip-sysctl.rst for detail
> > +                                      */
>
> Just /* IPv6 is disabled on the device */
>
>
> >       SKB_DROP_REASON_MAX,
> >  };
> >
>
> > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> > index 0c0574eb5f5b..df549b7415fb 100644
> > --- a/net/ipv4/ip_output.c
> > +++ b/net/ipv4/ip_output.c
>
> This file has other relevant drops. e.g., ip_finish_output2 when a neigh
> entry can not be created and after skb_gso_segment. The other set for
> tun/tap devices has SKB_DROP_REASON_SKB_GSO_SEG which can be used for
> the latter. That set also adds kfree_skb_list_reason for the frag drops.
>
>
> > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> > index 0c6c971ce0a5..4cd9e5fd25e4 100644
> > --- a/net/ipv6/ip6_output.c
> > +++ b/net/ipv6/ip6_output.c
>
> Similarly here. The other set should land in the next few days, so you
> cna put this set on top of it.

Do I need to wait for that set to be ready and use something in it?
Seems they are not ready yet, and I think maybe I can send a v2
now without it?

>
Roman Mashak Feb. 25, 2022, 1:41 p.m. UTC | #4
Menglong Dong <menglong8.dong@gmail.com> writes:

> On Tue, Feb 22, 2022 at 11:13 AM David Ahern <dsahern@kernel.org> wrote:
>>
>> On 2/20/22 8:57 AM, menglong8.dong@gmail.com wrote:
>> > From: Menglong Dong <imagedong@tencent.com>
>> >
>> > Replace kfree_skb() with kfree_skb_reason() in the packet egress path of
>> > IP layer (both IPv4 and IPv6 are considered).
>> >
>> > Following functions are involved:
>> >
>> > __ip_queue_xmit()
>> > ip_finish_output()
>> > ip_mc_finish_output()
>> > ip6_output()
>> > ip6_finish_output()
>> > ip6_finish_output2()
>> >
>> > Following new drop reasons are introduced:
>> >
>> > SKB_DROP_REASON_IP_OUTNOROUTES
>> > SKB_DROP_REASON_BPF_CGROUP_EGRESS
>> > SKB_DROP_REASON_IPV6DSIABLED

Is this a typo and should be SKB_DROP_REASON_IPV6DISABLED ?

[...]
Menglong Dong Feb. 25, 2022, 1:53 p.m. UTC | #5
On Fri, Feb 25, 2022 at 9:41 PM Roman Mashak <mrv@mojatatu.com> wrote:
>
> Menglong Dong <menglong8.dong@gmail.com> writes:
>
> > On Tue, Feb 22, 2022 at 11:13 AM David Ahern <dsahern@kernel.org> wrote:
> >>
> >> On 2/20/22 8:57 AM, menglong8.dong@gmail.com wrote:
> >> > From: Menglong Dong <imagedong@tencent.com>
> >> >
> >> > Replace kfree_skb() with kfree_skb_reason() in the packet egress path of
> >> > IP layer (both IPv4 and IPv6 are considered).
> >> >
> >> > Following functions are involved:
> >> >
> >> > __ip_queue_xmit()
> >> > ip_finish_output()
> >> > ip_mc_finish_output()
> >> > ip6_output()
> >> > ip6_finish_output()
> >> > ip6_finish_output2()
> >> >
> >> > Following new drop reasons are introduced:
> >> >
> >> > SKB_DROP_REASON_IP_OUTNOROUTES
> >> > SKB_DROP_REASON_BPF_CGROUP_EGRESS
> >> > SKB_DROP_REASON_IPV6DSIABLED
>
> Is this a typo and should be SKB_DROP_REASON_IPV6DISABLED ?

In fact......yeah, this is a typo :)
I'll fix it. Thanks!

>
> [...]
>
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a3e90efe6586..c310a4a8fc86 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -380,6 +380,19 @@  enum skb_drop_reason {
 					 * the ofo queue, corresponding to
 					 * LINUX_MIB_TCPOFOMERGE
 					 */
+	SKB_DROP_REASON_IP_OUTNOROUTES,	/* route lookup failed during
+					 * packet outputting
+					 */
+	SKB_DROP_REASON_BPF_CGROUP_EGRESS,	/* dropped by eBPF program
+						 * with type of BPF_PROG_TYPE_CGROUP_SKB
+						 * and attach type of
+						 * BPF_CGROUP_INET_EGRESS
+						 * during packet sending
+						 */
+	SKB_DROP_REASON_IPV6DSIABLED,	/* IPv6 is disabled on the device,
+					 * see the doc for disable_ipv6
+					 * in ip-sysctl.rst for detail
+					 */
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 2ab7193313aa..47dedef7b6b8 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -37,6 +37,10 @@ 
 	EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA)		\
 	EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW)	\
 	EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE)		\
+	EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES)	\
+	EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS,			\
+	   BPF_CGROUP_EGRESS)					\
+	EM(SKB_DROP_REASON_IPV6DSIABLED, IPV6DSIABLED)		\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0c0574eb5f5b..df549b7415fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -317,7 +317,7 @@  static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
 	case NET_XMIT_CN:
 		return __ip_finish_output(net, sk, skb) ? : ret;
 	default:
-		kfree_skb(skb);
+		kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
 		return ret;
 	}
 }
@@ -337,7 +337,7 @@  static int ip_mc_finish_output(struct net *net, struct sock *sk,
 	case NET_XMIT_SUCCESS:
 		break;
 	default:
-		kfree_skb(skb);
+		kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
 		return ret;
 	}
 
@@ -536,7 +536,7 @@  int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 no_route:
 	rcu_read_unlock();
 	IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
-	kfree_skb(skb);
+	kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES);
 	return -EHOSTUNREACH;
 }
 EXPORT_SYMBOL(__ip_queue_xmit);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0c6c971ce0a5..4cd9e5fd25e4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -130,7 +130,7 @@  static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 	rcu_read_unlock_bh();
 
 	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
-	kfree_skb(skb);
+	kfree_skb_reason(skb, SKB_DROP_REASON_IP_OUTNOROUTES);
 	return -EINVAL;
 }
 
@@ -202,7 +202,7 @@  static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 	case NET_XMIT_CN:
 		return __ip6_finish_output(net, sk, skb) ? : ret;
 	default:
-		kfree_skb(skb);
+		kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
 		return ret;
 	}
 }
@@ -217,7 +217,7 @@  int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	if (unlikely(idev->cnf.disable_ipv6)) {
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
-		kfree_skb(skb);
+		kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DSIABLED);
 		return 0;
 	}