diff mbox series

[net-next,3/6] net: ipv4: use kfree_skb_reason() in ip_rcv_finish_core()

Message ID 20220124131538.1453657-4-imagedong@tencent.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: use kfree_skb_reason() for ip/udp packet receive | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5978 this patch: 5978
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 871 this patch: 871
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6132 this patch: 6132
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 80 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Menglong Dong Jan. 24, 2022, 1:15 p.m. UTC
From: Menglong Dong <imagedong@tencent.com>

Replace kfree_skb() with kfree_skb_reason() in ip_rcv_finish_core(),
following drop reasons are introduced:

SKB_DROP_REASON_IP_ROUTE_INPUT
SKB_DROP_REASON_IP_RPFILTER
SKB_DROP_REASON_EARLY_DEMUX
SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST

Signed-off-by: Menglong Dong <imagedong@tencent.com>
---
 include/linux/skbuff.h     |  4 ++++
 include/trace/events/skb.h |  5 +++++
 net/ipv4/ip_input.c        | 22 ++++++++++++++++------
 3 files changed, 25 insertions(+), 6 deletions(-)

Comments

David Ahern Jan. 26, 2022, 2:18 a.m. UTC | #1
On 1/24/22 6:15 AM, menglong8.dong@gmail.com wrote:
> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> index ab9bee4bbf0a..77bb9ddc441b 100644
> --- a/net/ipv4/ip_input.c
> +++ b/net/ipv4/ip_input.c
> @@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
>  {
>  	const struct iphdr *iph = ip_hdr(skb);
>  	int (*edemux)(struct sk_buff *skb);
> +	int err, drop_reason;
>  	struct rtable *rt;
> -	int err;
> +
> +	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
>  
>  	if (ip_can_use_hint(skb, iph, hint)) {
>  		err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
> @@ -339,8 +341,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
>  		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
>  			err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
>  					      udp_v4_early_demux, skb);
> -			if (unlikely(err))
> +			if (unlikely(err)) {
> +				drop_reason = SKB_DROP_REASON_EARLY_DEMUX;

is there really value in this one? You ignore the error case from
ip_route_use_hint which is a similar, highly unlikely error path so why
care about this one? The only failure case is ip_mc_validate_source from
udp_v4_early_demux and 'early demux' drops really mean nothing to the user.


>  				goto drop_error;
> +			}
>  			/* must reload iph, skb->head might have changed */
>  			iph = ip_hdr(skb);
>  		}
> @@ -353,8 +357,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
>  	if (!skb_valid_dst(skb)) {
>  		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
>  					   iph->tos, dev);
> -		if (unlikely(err))
> +		if (unlikely(err)) {
> +			drop_reason = SKB_DROP_REASON_IP_ROUTE_INPUT;

The reason codes should be meaningful to users and not derived from a
code path. What does SKB_DROP_REASON_IP_ROUTE_INPUT mean as a failure?


>  			goto drop_error;
> +		}
>  	}
>  
>  #ifdef CONFIG_IP_ROUTE_CLASSID
Menglong Dong Jan. 26, 2022, 2:36 a.m. UTC | #2
On Wed, Jan 26, 2022 at 10:18 AM David Ahern <dsahern@gmail.com> wrote:
>
> On 1/24/22 6:15 AM, menglong8.dong@gmail.com wrote:
> > diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> > index ab9bee4bbf0a..77bb9ddc441b 100644
> > --- a/net/ipv4/ip_input.c
> > +++ b/net/ipv4/ip_input.c
> > @@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> >  {
> >       const struct iphdr *iph = ip_hdr(skb);
> >       int (*edemux)(struct sk_buff *skb);
> > +     int err, drop_reason;
> >       struct rtable *rt;
> > -     int err;
> > +
> > +     drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
> >
> >       if (ip_can_use_hint(skb, iph, hint)) {
> >               err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
> > @@ -339,8 +341,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> >               if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
> >                       err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
> >                                             udp_v4_early_demux, skb);
> > -                     if (unlikely(err))
> > +                     if (unlikely(err)) {
> > +                             drop_reason = SKB_DROP_REASON_EARLY_DEMUX;
>
> is there really value in this one? You ignore the error case from
> ip_route_use_hint which is a similar, highly unlikely error path so why
> care about this one? The only failure case is ip_mc_validate_source from
> udp_v4_early_demux and 'early demux' drops really mean nothing to the user.
>

Ok, let's just ignore it ( In fact, it's because that I don't know
what 'early demux'
do :/ )

>
> >                               goto drop_error;
> > +                     }
> >                       /* must reload iph, skb->head might have changed */
> >                       iph = ip_hdr(skb);
> >               }
> > @@ -353,8 +357,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> >       if (!skb_valid_dst(skb)) {
> >               err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> >                                          iph->tos, dev);
> > -             if (unlikely(err))
> > +             if (unlikely(err)) {
> > +                     drop_reason = SKB_DROP_REASON_IP_ROUTE_INPUT;
>
> The reason codes should be meaningful to users and not derived from a
> code path. What does SKB_DROP_REASON_IP_ROUTE_INPUT mean as a failure?
>

Is't it meaningful? I name it from the meaning of 'ip route lookup or validate
failed in input path', can't it express this information?

>
> >                       goto drop_error;
> > +             }
> >       }
> >
> >  #ifdef CONFIG_IP_ROUTE_CLASSID
David Ahern Jan. 26, 2022, 2:57 a.m. UTC | #3
On 1/25/22 7:36 PM, Menglong Dong wrote:
> Is't it meaningful? I name it from the meaning of 'ip route lookup or validate
> failed in input path', can't it express this information?


ip_route_input_noref has many failures and not all of them are FIB
lookups. ip_route_input_slow has a bunch of EINVAL cases for example.

Returning a 'reason' as the code function name has no meaning to a user
and could actually be misleading in some cases. I would skip this one
for now.
Menglong Dong Jan. 26, 2022, 3:13 a.m. UTC | #4
On Wed, Jan 26, 2022 at 10:57 AM David Ahern <dsahern@gmail.com> wrote:
>
> On 1/25/22 7:36 PM, Menglong Dong wrote:
> > Is't it meaningful? I name it from the meaning of 'ip route lookup or validate
> > failed in input path', can't it express this information?
>
>
> ip_route_input_noref has many failures and not all of them are FIB
> lookups. ip_route_input_slow has a bunch of EINVAL cases for example.
>
> Returning a 'reason' as the code function name has no meaning to a user
> and could actually be misleading in some cases. I would skip this one
> for now.

Yeah, the real reason can be complex. I'll skip this case for now.
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f3028028b83e..8942d32c0657 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -324,6 +324,10 @@  enum skb_drop_reason {
 	SKB_DROP_REASON_OTHERHOST,
 	SKB_DROP_REASON_IP_CSUM,
 	SKB_DROP_REASON_IP_INHDR,
+	SKB_DROP_REASON_IP_ROUTE_INPUT,
+	SKB_DROP_REASON_IP_RPFILTER,
+	SKB_DROP_REASON_EARLY_DEMUX,
+	SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index d1b0d9690e62..1dcdcc92cf08 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -20,6 +20,11 @@ 
 	EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST)		\
 	EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM)			\
 	EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR)			\
+	EM(SKB_DROP_REASON_IP_ROUTE_INPUT, IP_ROUTE_INPUT)	\
+	EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER)		\
+	EM(SKB_DROP_REASON_EARLY_DEMUX, EARLY_DEMUX)		\
+	EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST,		\
+	   UNICAST_IN_L2_MULTICAST)				\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ab9bee4bbf0a..77bb9ddc441b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -318,8 +318,10 @@  static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
+	int err, drop_reason;
 	struct rtable *rt;
-	int err;
+
+	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
 
 	if (ip_can_use_hint(skb, iph, hint)) {
 		err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
@@ -339,8 +341,10 @@  static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
 			err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
 					      udp_v4_early_demux, skb);
-			if (unlikely(err))
+			if (unlikely(err)) {
+				drop_reason = SKB_DROP_REASON_EARLY_DEMUX;
 				goto drop_error;
+			}
 			/* must reload iph, skb->head might have changed */
 			iph = ip_hdr(skb);
 		}
@@ -353,8 +357,10 @@  static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	if (!skb_valid_dst(skb)) {
 		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
 					   iph->tos, dev);
-		if (unlikely(err))
+		if (unlikely(err)) {
+			drop_reason = SKB_DROP_REASON_IP_ROUTE_INPUT;
 			goto drop_error;
+		}
 	}
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -396,19 +402,23 @@  static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 		 * so-called "hole-196" attack) so do it for both.
 		 */
 		if (in_dev &&
-		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
+		    IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
+			drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
 			goto drop;
+		}
 	}
 
 	return NET_RX_SUCCESS;
 
 drop:
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 	return NET_RX_DROP;
 
 drop_error:
-	if (err == -EXDEV)
+	if (err == -EXDEV) {
+		drop_reason = SKB_DROP_REASON_IP_RPFILTER;
 		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+	}
 	goto drop;
 }