diff mbox series

[net-next,1/2] net, tc: Make tc-related drop reason more flexible

Message ID 20231006190956.18810-1-daniel@iogearbox.net (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,1/2] net, tc: Make tc-related drop reason more flexible | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2762 this patch: 2762
netdev/cc_maintainers warning 3 maintainers not CCed: davem@davemloft.net pabeni@redhat.com edumazet@google.com
netdev/build_clang fail Errors and warnings before: 142 this patch: 142
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1498 this patch: 1498
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 97 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Daniel Borkmann Oct. 6, 2023, 7:09 p.m. UTC
Currently, the kfree_skb_reason() in sch_handle_{ingress,egress}() can only
express a basic SKB_DROP_REASON_TC_INGRESS or SKB_DROP_REASON_TC_EGRESS reason.

Victor kicked-off an initial proposal to make this more flexible by disambiguating
verdict from return code by moving the verdict into struct tcf_result and
letting tcf_classify() return a negative error. If hit, then two new drop
reasons were added in the proposal, that is SKB_DROP_REASON_TC_INGRESS_ERROR
as well as SKB_DROP_REASON_TC_EGRESS_ERROR. Further analysis of the actual
error codes would have required to attach to tcf_classify via kprobe/kretprobe
to more deeply debug skb and the returned error.

In order to make the kfree_skb_reason() in sch_handle_{ingress,egress}() more
extensible, it can be addressed in a more straight forward way, that is: Instead
of placing the verdict into struct tcf_result, we can just put the drop reason
in there, which does not require changes throughout various classful schedulers
given the existing verdict logic can stay as is.

Then, SKB_DROP_REASON_TC_ERROR{,_*} can be added to the enum skb_drop_reason
to disambiguate between an error or an intentional drop. New drop reason error
codes can be added successively to the tc code base.

For internal error locations which have not yet been annotated with a
SKB_DROP_REASON_TC_ERROR{,_*}, the fallback is SKB_DROP_REASON_TC_INGRESS and
SKB_DROP_REASON_TC_EGRESS, respectively. Generic errors could be marked with a
SKB_DROP_REASON_TC_ERROR code until they are converted to more specific ones
if it is found that they would be useful for troubleshooting.

While drop reasons have infrastructure for subsystem specific error codes which
are currently used by mac80211 and ovs, Jakub mentioned that it is preferred
for tc to use the enum skb_drop_reason core codes given it is a better fit and
currently the tooling support is better.

With regards to the latter:

  [...] I think Alastair (bpftrace) is working on auto-prettifying enums when
  bpftrace outputs maps. So we can do something like:

  $ bpftrace -e 'tracepoint:skb:kfree_skb { @[args->reason] = count(); }'
  Attaching 1 probe...
  ^C

  @[SKB_DROP_REASON_TC_INGRESS]: 2
  @[SKB_CONSUMED]: 34

  ^^^^^^^^^^^^ names!!

  Auto-magically. [...]

Add a small helper tc_set_drop_reason() which can be used to set the drop reason
into tcf_result.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Victor Nogueira <victor@mojatatu.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/netdev/20231006063233.74345d36@kernel.org
---
 include/net/sch_generic.h |  9 +++++++--
 net/core/dev.c            | 15 ++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

Comments

Cong Wang Oct. 8, 2023, 5:27 p.m. UTC | #1
On Fri, Oct 06, 2023 at 09:09:55PM +0200, Daniel Borkmann wrote:
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index c7318c73cfd6..90774cb2ac03 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -324,7 +324,6 @@ struct Qdisc_ops {
>  	struct module		*owner;
>  };
>  
> -
>  struct tcf_result {
>  	union {
>  		struct {
> @@ -332,8 +331,8 @@ struct tcf_result {
>  			u32		classid;
>  		};
>  		const struct tcf_proto *goto_tp;
> -
>  	};
> +	enum skb_drop_reason		drop_reason;
>  };
>  
>  struct tcf_chain;
> @@ -667,6 +666,12 @@ static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
>  	return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
>  }
>  
> +static inline void tc_set_drop_reason(struct tcf_result *res,
> +				      enum skb_drop_reason reason)
> +{
> +	res->drop_reason = reason;
> +}
> +

Since this helper is for TC filters and actions, include/net/pkt_cls.h
is a better place for it?
Daniel Borkmann Oct. 9, 2023, 8:11 a.m. UTC | #2
On 10/8/23 7:27 PM, Cong Wang wrote:
> On Fri, Oct 06, 2023 at 09:09:55PM +0200, Daniel Borkmann wrote:
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index c7318c73cfd6..90774cb2ac03 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -324,7 +324,6 @@ struct Qdisc_ops {
>>   	struct module		*owner;
>>   };
>>   
>> -
>>   struct tcf_result {
>>   	union {
>>   		struct {
>> @@ -332,8 +331,8 @@ struct tcf_result {
>>   			u32		classid;
>>   		};
>>   		const struct tcf_proto *goto_tp;
>> -
>>   	};
>> +	enum skb_drop_reason		drop_reason;
>>   };
>>   
>>   struct tcf_chain;
>> @@ -667,6 +666,12 @@ static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
>>   	return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
>>   }
>>   
>> +static inline void tc_set_drop_reason(struct tcf_result *res,
>> +				      enum skb_drop_reason reason)
>> +{
>> +	res->drop_reason = reason;
>> +}
>> +
> 
> Since this helper is for TC filters and actions, include/net/pkt_cls.h
> is a better place for it?

Makes sense, will move it in a v2 (and also rename into tcf_set_drop_reason).

Thanks,
Daniel
diff mbox series

Patch

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c7318c73cfd6..90774cb2ac03 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -324,7 +324,6 @@  struct Qdisc_ops {
 	struct module		*owner;
 };
 
-
 struct tcf_result {
 	union {
 		struct {
@@ -332,8 +331,8 @@  struct tcf_result {
 			u32		classid;
 		};
 		const struct tcf_proto *goto_tp;
-
 	};
+	enum skb_drop_reason		drop_reason;
 };
 
 struct tcf_chain;
@@ -667,6 +666,12 @@  static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
 	return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
 }
 
+static inline void tc_set_drop_reason(struct tcf_result *res,
+				      enum skb_drop_reason reason)
+{
+	res->drop_reason = reason;
+}
+
 int qdisc_class_hash_init(struct Qdisc_class_hash *);
 void qdisc_class_hash_insert(struct Qdisc_class_hash *,
 			     struct Qdisc_class_common *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 606a366cc209..664426285fa3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3910,7 +3910,8 @@  EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
 #endif /* CONFIG_NET_EGRESS */
 
 #ifdef CONFIG_NET_XGRESS
-static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
+		  enum skb_drop_reason *drop_reason)
 {
 	int ret = TC_ACT_UNSPEC;
 #ifdef CONFIG_NET_CLS_ACT
@@ -3922,12 +3923,14 @@  static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
 
 	tc_skb_cb(skb)->mru = 0;
 	tc_skb_cb(skb)->post_ct = false;
+	res.drop_reason = *drop_reason;
 
 	mini_qdisc_bstats_cpu_update(miniq, skb);
 	ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
 	/* Only tcf related quirks below. */
 	switch (ret) {
 	case TC_ACT_SHOT:
+		*drop_reason = res.drop_reason;
 		mini_qdisc_qstats_cpu_drop(miniq);
 		break;
 	case TC_ACT_OK:
@@ -3977,6 +3980,7 @@  sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		   struct net_device *orig_dev, bool *another)
 {
 	struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+	enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS;
 	int sch_ret;
 
 	if (!entry)
@@ -3994,7 +3998,7 @@  sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		if (sch_ret != TC_ACT_UNSPEC)
 			goto ingress_verdict;
 	}
-	sch_ret = tc_run(tcx_entry(entry), skb);
+	sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
 ingress_verdict:
 	switch (sch_ret) {
 	case TC_ACT_REDIRECT:
@@ -4011,7 +4015,7 @@  sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		*ret = NET_RX_SUCCESS;
 		return NULL;
 	case TC_ACT_SHOT:
-		kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+		kfree_skb_reason(skb, drop_reason);
 		*ret = NET_RX_DROP;
 		return NULL;
 	/* used by tc_run */
@@ -4032,6 +4036,7 @@  static __always_inline struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
 	struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+	enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS;
 	int sch_ret;
 
 	if (!entry)
@@ -4045,7 +4050,7 @@  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 		if (sch_ret != TC_ACT_UNSPEC)
 			goto egress_verdict;
 	}
-	sch_ret = tc_run(tcx_entry(entry), skb);
+	sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason);
 egress_verdict:
 	switch (sch_ret) {
 	case TC_ACT_REDIRECT:
@@ -4054,7 +4059,7 @@  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 		*ret = NET_XMIT_SUCCESS;
 		return NULL;
 	case TC_ACT_SHOT:
-		kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+		kfree_skb_reason(skb, drop_reason);
 		*ret = NET_XMIT_DROP;
 		return NULL;
 	/* used by tc_run */