Message ID | 17d459487b61c5d0276a01a3bc1254c6432b5d12.1736793775.git.lucien.xin@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [PATCHv2,net] net: sched: refine software bypass handling in tc_run | expand |
Hi Xin, With the concept turned on it's head, we properly shouldn't call it a bypass anymore? Now that software processing is only enabled, if there are any rules that needs it. s/PATCHv2 net/PATCH v2 net/g, but I think my patch below pushes it firmly into net-next territory, unless you can convince the maintainers that usesw is always set correctly. I will run it through some tests tomorrow with my patch applied. On 1/13/25 6:42 PM, Xin Long wrote: > [...] > @@ -410,48 +411,17 @@ static void tcf_proto_get(struct tcf_proto *tp) > refcount_inc(&tp->refcnt); > } > > -static void tcf_maintain_bypass(struct tcf_block *block) > -{ > - int filtercnt = atomic_read(&block->filtercnt); > - int skipswcnt = atomic_read(&block->skipswcnt); > - bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; > - > - if (bypass_wanted != block->bypass_wanted) { > -#ifdef CONFIG_NET_CLS_ACT > - if (bypass_wanted) > - static_branch_inc(&tcf_bypass_check_needed_key); This enabled the global sw bypass checking static key, when sw was NOT used. > [...] > @@ -2409,7 +2379,13 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, > tfilter_notify(net, skb, n, tp, block, q, parent, fh, > RTM_NEWTFILTER, false, rtnl_held, extack); > tfilter_put(tp, fh); > - tcf_block_filter_cnt_update(block, &tp->counted, true); > + spin_lock(&tp->lock); > + if (tp->usesw && !tp->counted) { > + if (atomic_inc_return(&block->useswcnt) == 1) > + static_branch_inc(&tcf_bypass_check_needed_key); This enables the global sw bypass checking static key, when sw IS used. I think you are missing the below patch (not tested in anyway, yet): This patch: - Renames the static key, as it's use has changed. - Fixes tc_run() to the new way to use the static key. --- diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e4fea1decca1..4eb0ebb9e76c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -75,7 +75,7 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) } #ifdef CONFIG_NET_CLS_ACT -DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); +DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key); static inline bool tcf_block_bypass_sw(struct tcf_block *block) { diff --git a/net/core/dev.c b/net/core/dev.c index a9f62f5aeb84..3ec89165296f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2134,8 +2134,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif #ifdef CONFIG_NET_CLS_ACT -DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); -EXPORT_SYMBOL(tcf_bypass_check_needed_key); +DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key); +EXPORT_SYMBOL(tcf_sw_enabled_key); #endif DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4030,10 +4030,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, if (!miniq) return ret; - if (static_branch_unlikely(&tcf_bypass_check_needed_key)) { - if (tcf_block_bypass_sw(miniq->block)) - return ret; - } + /* Global bypass */ + if (!static_branch_likely(&tcf_sw_enabled_key)) + return ret; + + /* Block-wise bypass */ + if (tcf_block_bypass_sw(miniq->block)) + return ret; tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 358b66dfdc83..617fcb682209 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -419,7 +419,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, tp->ops->destroy(tp, rtnl_held, extack); if (tp->usesw && tp->counted) { if (!atomic_dec_return(&tp->chain->block->useswcnt)) - static_branch_dec(&tcf_bypass_check_needed_key); + static_branch_dec(&tcf_sw_enabled_key); tp->counted = false; } if (sig_destroy) @@ -2382,7 +2382,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, spin_lock(&tp->lock); if (tp->usesw && !tp->counted) { if (atomic_inc_return(&block->useswcnt) == 1) - static_branch_inc(&tcf_bypass_check_needed_key); + static_branch_inc(&tcf_sw_enabled_key); tp->counted = true; } spin_unlock(&tp->lock);
On Mon, 13 Jan 2025 13:42:55 -0500 Xin Long wrote: > This patch addresses issues with filter counting in block (tcf_block), > particularly for software bypass scenarios, by introducing a more > accurate mechanism using useswcnt. I think this is causing: [ 35.565404][ T350] BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:49 [ 35.565956][ T350] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 350, name: tc [ 35.566288][ T350] preempt_count: 1, expected: 0 [ 35.566529][ T350] RCU nest depth: 0, expected: 0 [ 35.566753][ T350] 2 locks held by tc/350: [ 35.566922][ T350] #0: ffffffff9d1e7e88 (rtnl_mutex){+.+.}-{4:4}, at: tc_new_tfilter+0x902/0x1c90 [ 35.567325][ T350] #1: ffff88800a377b90 (&tp->lock){+.+.}-{3:3}, at: tc_new_tfilter+0x9d1/0x1c90 [ 35.567707][ T350] CPU: 2 UID: 0 PID: 350 Comm: tc Not tainted 6.13.0-rc7-virtme #1 [ 35.568006][ T350] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 35.568259][ T350] Call Trace: [ 35.568414][ T350] <TASK> [ 35.568520][ T350] dump_stack_lvl+0xb0/0xd0 [ 35.568745][ T350] __might_resched+0x2f8/0x530 [ 35.568945][ T350] ? tc_new_tfilter+0x9d1/0x1c90 [ 35.569151][ T350] cpus_read_lock+0x1b/0xe0 [ 35.569349][ T350] static_key_slow_inc+0x13/0x30 [ 35.569547][ T350] tc_new_tfilter+0x1523/0x1c90 [ 35.569757][ T350] ? mark_lock+0x38/0x3e0 [ 35.569918][ T350] ? __pfx_tc_new_tfilter+0x10/0x10 [ 35.570142][ T350] ? __pfx_lock_acquire.part.0+0x10/0x10 [ 35.570555][ T350] ? rtnetlink_rcv_msg+0x6ef/0xc10 [ 35.570767][ T350] ? __pfx_tc_new_tfilter+0x10/0x10 [ 35.570968][ T350] rtnetlink_rcv_msg+0x712/0xc10 [ 35.571174][ T350] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 35.571393][ T350] ? hlock_class+0x4e/0x130 [ 35.571591][ T350] ? mark_lock+0x38/0x3e0 [ 35.571749][ T350] ? __lock_acquire+0xb9a/0x1680 [ 35.571951][ T350] netlink_rcv_skb+0x130/0x360 [ 35.572153][ T350] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 35.572353][ T350] ? __pfx_netlink_rcv_skb+0x10/0x10 [ 35.572571][ T350] ? netlink_deliver_tap+0x13e/0x340 [ 35.572772][ T350] ? netlink_deliver_tap+0xc3/0x340 [ 35.572982][ T350] netlink_unicast+0x44b/0x710 [ 35.573188][ T350] ? __pfx_netlink_unicast+0x10/0x10 [ 35.573387][ T350] ? find_held_lock+0x2c/0x110 [ 35.573591][ T350] netlink_sendmsg+0x723/0xbe0 [ 35.573796][ T350] ? __pfx_netlink_sendmsg+0x10/0x10 [ 35.574006][ T350] ____sys_sendmsg+0x7ac/0xa10 [ 35.574207][ T350] ? __pfx_____sys_sendmsg+0x10/0x10 [ 35.574407][ T350] ? __pfx_copy_msghdr_from_user+0x10/0x10 [ 35.574668][ T350] ___sys_sendmsg+0xee/0x170 [ 35.574866][ T350] ? __debug_check_no_obj_freed+0x253/0x520 [ 35.575114][ T350] ? __pfx____sys_sendmsg+0x10/0x10 [ 35.575318][ T350] ? __pfx___debug_check_no_obj_freed+0x10/0x10 [ 35.575565][ T350] ? __pfx_free_object_rcu+0x10/0x10 [ 35.575766][ T350] ? trace_rcu_segcb_stats+0x36/0x1e0 [ 35.575973][ T350] ? lockdep_hardirqs_on_prepare+0x275/0x410 [ 35.576225][ T350] ? kmem_cache_free+0xf8/0x330 [ 35.576423][ T350] ? do_sys_openat2+0x141/0x160 [ 35.576620][ T350] ? do_sys_openat2+0x10a/0x160 [ 35.576821][ T350] ? do_sys_openat2+0x10a/0x160 [ 35.577027][ T350] __sys_sendmsg+0x109/0x1a0 [ 35.577226][ T350] ? __pfx___sys_sendmsg+0x10/0x10 [ 35.577448][ T350] do_syscall_64+0xc1/0x1d0 [ 35.577648][ T350] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 35.577892][ T350] RIP: 0033:0x7f539e60b9a7 [ 35.578106][ T350] Code: Unable to access opcode bytes at 0x7f539e60b97d. [ 35.578359][ T350] RSP: 002b:00007ffd917a3fc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 35.578670][ T350] RAX: ffffffffffffffda RBX: 000000000047dbc0 RCX: 00007f539e60b9a7 [ 35.578979][ T350] RDX: 0000000000000000 RSI: 00007ffd917a4030 RDI: 0000000000000005 [ 35.579271][ T350] RBP: 000000000000dd86 R08: 0000000000000000 R09: 0000000000000000 [ 35.579567][ T350] R10: 00007f539e4c4708 R11: 0000000000000246 R12: 00007ffd917a9973 [ 35.579862][ T350] R13: 00000000678583dc R14: 0000000000483b60 R15: 00007ffd917a9977 [ 35.580179][ T350] </TASK>
On Mon, Jan 13, 2025 at 4:41 PM Asbjørn Sloth Tønnesen <ast@fiberby.net> wrote: > > Hi Xin, > > With the concept turned on it's head, we properly shouldn't call it a bypass > anymore? Now that software processing is only enabled, if there are any rules > that needs it. cool, I missed that. > > s/PATCHv2 net/PATCH v2 net/g, but I think my patch below pushes it > firmly into net-next territory, unless you can convince the maintainers that > usesw is always set correctly. yeah, it's now changing the code tc_run() in net/core/dev.c. > > I will run it through some tests tomorrow with my patch applied. That will be great. :-) Thanks. > > On 1/13/25 6:42 PM, Xin Long wrote: > > [...] > > @@ -410,48 +411,17 @@ static void tcf_proto_get(struct tcf_proto *tp) > > refcount_inc(&tp->refcnt); > > } > > > > -static void tcf_maintain_bypass(struct tcf_block *block) > > -{ > > - int filtercnt = atomic_read(&block->filtercnt); > > - int skipswcnt = atomic_read(&block->skipswcnt); > > - bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; > > - > > - if (bypass_wanted != block->bypass_wanted) { > > -#ifdef CONFIG_NET_CLS_ACT > > - if (bypass_wanted) > > - static_branch_inc(&tcf_bypass_check_needed_key); > > This enabled the global sw bypass checking static key, when sw was NOT used. > > > [...] > > @@ -2409,7 +2379,13 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, > > tfilter_notify(net, skb, n, tp, block, q, parent, fh, > > RTM_NEWTFILTER, false, rtnl_held, extack); > > tfilter_put(tp, fh); > > - tcf_block_filter_cnt_update(block, &tp->counted, true); > > + spin_lock(&tp->lock); > > + if (tp->usesw && !tp->counted) { > > + if (atomic_inc_return(&block->useswcnt) == 1) > > + static_branch_inc(&tcf_bypass_check_needed_key); > > This enables the global sw bypass checking static key, when sw IS used. > > I think you are missing the below patch (not tested in anyway, yet): > > This patch: > - Renames the static key, as it's use has changed. > - Fixes tc_run() to the new way to use the static key. > > --- > diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h > index e4fea1decca1..4eb0ebb9e76c 100644 > --- a/include/net/pkt_cls.h > +++ b/include/net/pkt_cls.h > @@ -75,7 +75,7 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) > } > > #ifdef CONFIG_NET_CLS_ACT > -DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); > +DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key); > > static inline bool tcf_block_bypass_sw(struct tcf_block *block) > { > diff --git a/net/core/dev.c b/net/core/dev.c > index a9f62f5aeb84..3ec89165296f 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -2134,8 +2134,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); > #endif > > #ifdef CONFIG_NET_CLS_ACT > -DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); > -EXPORT_SYMBOL(tcf_bypass_check_needed_key); > +DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key); > +EXPORT_SYMBOL(tcf_sw_enabled_key); > #endif > > DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); > @@ -4030,10 +4030,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, > if (!miniq) > return ret; > > - if (static_branch_unlikely(&tcf_bypass_check_needed_key)) { > - if (tcf_block_bypass_sw(miniq->block)) > - return ret; > - } > + /* Global bypass */ > + if (!static_branch_likely(&tcf_sw_enabled_key)) > + return ret; > + > + /* Block-wise bypass */ > + if (tcf_block_bypass_sw(miniq->block)) > + return ret; > > tc_skb_cb(skb)->mru = 0; > tc_skb_cb(skb)->post_ct = false; > diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c > index 358b66dfdc83..617fcb682209 100644 > --- a/net/sched/cls_api.c > +++ b/net/sched/cls_api.c > @@ -419,7 +419,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, > tp->ops->destroy(tp, rtnl_held, extack); > if (tp->usesw && tp->counted) { > if (!atomic_dec_return(&tp->chain->block->useswcnt)) > - static_branch_dec(&tcf_bypass_check_needed_key); > + static_branch_dec(&tcf_sw_enabled_key); > tp->counted = false; > } > if (sig_destroy) > @@ -2382,7 +2382,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, > spin_lock(&tp->lock); > if (tp->usesw && !tp->counted) { > if (atomic_inc_return(&block->useswcnt) == 1) > - static_branch_inc(&tcf_bypass_check_needed_key); > + static_branch_inc(&tcf_sw_enabled_key); > tp->counted = true; > } > spin_unlock(&tp->lock);
On Mon, Jan 13, 2025 at 5:26 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Mon, 13 Jan 2025 13:42:55 -0500 Xin Long wrote: > > This patch addresses issues with filter counting in block (tcf_block), > > particularly for software bypass scenarios, by introducing a more > > accurate mechanism using useswcnt. > > I think this is causing: > > [ 35.565404][ T350] BUG: sleeping function called from invalid context at ./include/linux/percpu-rwsem.h:49 > [ 35.565956][ T350] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 350, name: tc > [ 35.566288][ T350] preempt_count: 1, expected: 0 > [ 35.566529][ T350] RCU nest depth: 0, expected: 0 > [ 35.566753][ T350] 2 locks held by tc/350: > [ 35.566922][ T350] #0: ffffffff9d1e7e88 (rtnl_mutex){+.+.}-{4:4}, at: tc_new_tfilter+0x902/0x1c90 > [ 35.567325][ T350] #1: ffff88800a377b90 (&tp->lock){+.+.}-{3:3}, at: tc_new_tfilter+0x9d1/0x1c90 > [ 35.567707][ T350] CPU: 2 UID: 0 PID: 350 Comm: tc Not tainted 6.13.0-rc7-virtme #1 > [ 35.568006][ T350] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > [ 35.568259][ T350] Call Trace: > [ 35.568414][ T350] <TASK> > [ 35.568520][ T350] dump_stack_lvl+0xb0/0xd0 > [ 35.568745][ T350] __might_resched+0x2f8/0x530 > [ 35.568945][ T350] ? tc_new_tfilter+0x9d1/0x1c90 > [ 35.569151][ T350] cpus_read_lock+0x1b/0xe0 > [ 35.569349][ T350] static_key_slow_inc+0x13/0x30 > [ 35.569547][ T350] tc_new_tfilter+0x1523/0x1c90 > [ 35.569757][ T350] ? mark_lock+0x38/0x3e0 > [ 35.569918][ T350] ? __pfx_tc_new_tfilter+0x10/0x10 > [ 35.570142][ T350] ? __pfx_lock_acquire.part.0+0x10/0x10 > [ 35.570555][ T350] ? rtnetlink_rcv_msg+0x6ef/0xc10 > [ 35.570767][ T350] ? __pfx_tc_new_tfilter+0x10/0x10 > [ 35.570968][ T350] rtnetlink_rcv_msg+0x712/0xc10 > [ 35.571174][ T350] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 > [ 35.571393][ T350] ? hlock_class+0x4e/0x130 > [ 35.571591][ T350] ? mark_lock+0x38/0x3e0 > [ 35.571749][ T350] ? __lock_acquire+0xb9a/0x1680 > [ 35.571951][ T350] netlink_rcv_skb+0x130/0x360 > [ 35.572153][ T350] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 > [ 35.572353][ T350] ? __pfx_netlink_rcv_skb+0x10/0x10 > [ 35.572571][ T350] ? netlink_deliver_tap+0x13e/0x340 > [ 35.572772][ T350] ? netlink_deliver_tap+0xc3/0x340 > [ 35.572982][ T350] netlink_unicast+0x44b/0x710 > [ 35.573188][ T350] ? __pfx_netlink_unicast+0x10/0x10 > [ 35.573387][ T350] ? find_held_lock+0x2c/0x110 > [ 35.573591][ T350] netlink_sendmsg+0x723/0xbe0 > [ 35.573796][ T350] ? __pfx_netlink_sendmsg+0x10/0x10 > [ 35.574006][ T350] ____sys_sendmsg+0x7ac/0xa10 > [ 35.574207][ T350] ? __pfx_____sys_sendmsg+0x10/0x10 > [ 35.574407][ T350] ? __pfx_copy_msghdr_from_user+0x10/0x10 > [ 35.574668][ T350] ___sys_sendmsg+0xee/0x170 > [ 35.574866][ T350] ? __debug_check_no_obj_freed+0x253/0x520 > [ 35.575114][ T350] ? __pfx____sys_sendmsg+0x10/0x10 > [ 35.575318][ T350] ? __pfx___debug_check_no_obj_freed+0x10/0x10 > [ 35.575565][ T350] ? __pfx_free_object_rcu+0x10/0x10 > [ 35.575766][ T350] ? trace_rcu_segcb_stats+0x36/0x1e0 > [ 35.575973][ T350] ? lockdep_hardirqs_on_prepare+0x275/0x410 > [ 35.576225][ T350] ? kmem_cache_free+0xf8/0x330 > [ 35.576423][ T350] ? do_sys_openat2+0x141/0x160 > [ 35.576620][ T350] ? do_sys_openat2+0x10a/0x160 > [ 35.576821][ T350] ? do_sys_openat2+0x10a/0x160 > [ 35.577027][ T350] __sys_sendmsg+0x109/0x1a0 > [ 35.577226][ T350] ? __pfx___sys_sendmsg+0x10/0x10 > [ 35.577448][ T350] do_syscall_64+0xc1/0x1d0 > [ 35.577648][ T350] entry_SYSCALL_64_after_hwframe+0x77/0x7f > [ 35.577892][ T350] RIP: 0033:0x7f539e60b9a7 > [ 35.578106][ T350] Code: Unable to access opcode bytes at 0x7f539e60b97d. > [ 35.578359][ T350] RSP: 002b:00007ffd917a3fc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e > [ 35.578670][ T350] RAX: ffffffffffffffda RBX: 000000000047dbc0 RCX: 00007f539e60b9a7 > [ 35.578979][ T350] RDX: 0000000000000000 RSI: 00007ffd917a4030 RDI: 0000000000000005 > [ 35.579271][ T350] RBP: 000000000000dd86 R08: 0000000000000000 R09: 0000000000000000 > [ 35.579567][ T350] R10: 00007f539e4c4708 R11: 0000000000000246 R12: 00007ffd917a9973 > [ 35.579862][ T350] R13: 00000000678583dc R14: 0000000000483b60 R15: 00007ffd917a9977 > [ 35.580179][ T350] </TASK> > -- Thanks for catching this. I will try moving the call to static_branch_inc() out of spin_lock(&tp->lock), like: spin_lock(&tp->lock); if (tp->usesw && !tp->counted) { tp->counted = true; tp_counted = 1; } spin_unlock(&tp->lock); if (tp_counted && atomic_inc_return(&block->useswcnt) == 1) static_branch_inc(&tcf_sw_enabled_key);
Hi Xin, kernel test robot noticed the following build errors: [auto build test ERROR on net/main] url: https://github.com/intel-lab-lkp/linux/commits/Xin-Long/net-sched-refine-software-bypass-handling-in-tc_run/20250114-025301 base: net/main patch link: https://lore.kernel.org/r/17d459487b61c5d0276a01a3bc1254c6432b5d12.1736793775.git.lucien.xin%40gmail.com patch subject: [PATCHv2 net] net: sched: refine software bypass handling in tc_run config: i386-buildonly-randconfig-004-20250114 (https://download.01.org/0day-ci/archive/20250115/202501150051.z1HfVwib-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250115/202501150051.z1HfVwib-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202501150051.z1HfVwib-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from include/linux/static_key.h:1, from arch/x86/include/asm/nospec-branch.h:6, from arch/x86/include/asm/irqflags.h:9, from include/linux/irqflags.h:18, from arch/x86/include/asm/special_insns.h:10, from arch/x86/include/asm/processor.h:25, from arch/x86/include/asm/timex.h:5, from include/linux/timex.h:67, from include/linux/time32.h:13, from include/linux/time.h:60, from include/linux/stat.h:19, from include/linux/module.h:13, from net/sched/cls_api.c:12: net/sched/cls_api.c: In function 'tcf_proto_destroy': >> net/sched/cls_api.c:422:44: error: 'tcf_bypass_check_needed_key' undeclared (first use in this function) 422 | static_branch_dec(&tcf_bypass_check_needed_key); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/jump_label.h:525:63: note: in definition of macro 'static_branch_dec' 525 | #define static_branch_dec(x) static_key_slow_dec(&(x)->key) | ^ net/sched/cls_api.c:422:44: note: each undeclared identifier is reported only once for each function it appears in 422 | static_branch_dec(&tcf_bypass_check_needed_key); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/jump_label.h:525:63: note: in definition of macro 'static_branch_dec' 525 | #define static_branch_dec(x) static_key_slow_dec(&(x)->key) | ^ net/sched/cls_api.c: In function 'tc_new_tfilter': net/sched/cls_api.c:2385:52: error: 'tcf_bypass_check_needed_key' undeclared (first use in this function) 2385 | static_branch_inc(&tcf_bypass_check_needed_key); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/jump_label.h:301:74: note: in definition of macro 'static_key_slow_inc' 301 | #define static_key_slow_inc(key) static_key_fast_inc_not_disabled(key) | ^~~ net/sched/cls_api.c:2385:33: note: in expansion of macro 'static_branch_inc' 2385 | static_branch_inc(&tcf_bypass_check_needed_key); | ^~~~~~~~~~~~~~~~~ vim +/tcf_bypass_check_needed_key +422 net/sched/cls_api.c 415 416 static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, 417 bool sig_destroy, struct netlink_ext_ack *extack) 418 { 419 tp->ops->destroy(tp, rtnl_held, extack); 420 if (tp->usesw && tp->counted) { 421 if (!atomic_dec_return(&tp->chain->block->useswcnt)) > 422 static_branch_dec(&tcf_bypass_check_needed_key); 423 tp->counted = false; 424 } 425 if (sig_destroy) 426 tcf_proto_signal_destroyed(tp->chain, tp); 427 tcf_chain_put(tp->chain); 428 module_put(tp->ops->owner); 429 kfree_rcu(tp, rcu); 430 } 431
Hi Xin, kernel test robot noticed the following build errors: [auto build test ERROR on net/main] url: https://github.com/intel-lab-lkp/linux/commits/Xin-Long/net-sched-refine-software-bypass-handling-in-tc_run/20250114-025301 base: net/main patch link: https://lore.kernel.org/r/17d459487b61c5d0276a01a3bc1254c6432b5d12.1736793775.git.lucien.xin%40gmail.com patch subject: [PATCHv2 net] net: sched: refine software bypass handling in tc_run config: i386-buildonly-randconfig-005-20250114 (https://download.01.org/0day-ci/archive/20250115/202501150119.PO2Xlm4u-lkp@intel.com/config) compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250115/202501150119.PO2Xlm4u-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202501150119.PO2Xlm4u-lkp@intel.com/ All errors (new ones prefixed by >>): >> net/sched/cls_api.c:422:23: error: use of undeclared identifier 'tcf_bypass_check_needed_key' 422 | static_branch_dec(&tcf_bypass_check_needed_key); | ^ net/sched/cls_api.c:2385:24: error: use of undeclared identifier 'tcf_bypass_check_needed_key' 2385 | static_branch_inc(&tcf_bypass_check_needed_key); | ^ 2 errors generated. vim +/tcf_bypass_check_needed_key +422 net/sched/cls_api.c 415 416 static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, 417 bool sig_destroy, struct netlink_ext_ack *extack) 418 { 419 tp->ops->destroy(tp, rtnl_held, extack); 420 if (tp->usesw && tp->counted) { 421 if (!atomic_dec_return(&tp->chain->block->useswcnt)) > 422 static_branch_dec(&tcf_bypass_check_needed_key); 423 tp->counted = false; 424 } 425 if (sig_destroy) 426 tcf_proto_signal_destroyed(tp->chain, tp); 427 tcf_chain_put(tp->chain); 428 module_put(tp->ops->owner); 429 kfree_rcu(tp, rcu); 430 } 431
On 1/14/25 2:30 AM, Xin Long wrote: > On Mon, Jan 13, 2025 at 4:41 PM Asbjørn Sloth Tønnesen <ast@fiberby.net> wrote: >> I will run it through some tests tomorrow with my patch applied. > That will be great. :-) Hi Xin, Given the already posted changes, when I rerun the benchmark tests from my original patch last year, I don't see any significant differences in the forwarding performance. (single 8-core CPU, no parallel rule updates) The test code is linked in my original patch: https://lore.kernel.org/netdev/20240325204740.1393349-4-ast@fiberby.net/
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cf199af85c52..e4fea1decca1 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -79,7 +79,7 @@ DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); static inline bool tcf_block_bypass_sw(struct tcf_block *block) { - return block && block->bypass_wanted; + return block && !atomic_read(&block->useswcnt); } #endif @@ -760,6 +760,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags) +{ + if (tp->usesw) + return; + if (tc_skip_sw(flags) && tc_in_hw(flags)) + return; + tp->usesw = true; +} + #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5d74fa7e694c..1e6324f0d4ef 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -425,6 +425,7 @@ struct tcf_proto { spinlock_t lock; bool deleting; bool counted; + bool usesw; refcount_t refcnt; struct rcu_head rcu; struct hlist_node destroy_ht_node; @@ -474,9 +475,7 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; - bool bypass_wanted; - atomic_t filtercnt; /* Number of filters */ - atomic_t skipswcnt; /* Number of skip_sw filters */ + atomic_t useswcnt; atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7578e27260c9..358b66dfdc83 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + tp->usesw = !tp->ops->reoffload; spin_lock_init(&tp->lock); refcount_set(&tp->refcnt, 1); @@ -410,48 +411,17 @@ static void tcf_proto_get(struct tcf_proto *tp) refcount_inc(&tp->refcnt); } -static void tcf_maintain_bypass(struct tcf_block *block) -{ - int filtercnt = atomic_read(&block->filtercnt); - int skipswcnt = atomic_read(&block->skipswcnt); - bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; - - if (bypass_wanted != block->bypass_wanted) { -#ifdef CONFIG_NET_CLS_ACT - if (bypass_wanted) - static_branch_inc(&tcf_bypass_check_needed_key); - else - static_branch_dec(&tcf_bypass_check_needed_key); -#endif - block->bypass_wanted = bypass_wanted; - } -} - -static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add) -{ - lockdep_assert_not_held(&block->cb_lock); - - down_write(&block->cb_lock); - if (*counted != add) { - if (add) { - atomic_inc(&block->filtercnt); - *counted = true; - } else { - atomic_dec(&block->filtercnt); - *counted = false; - } - } - tcf_maintain_bypass(block); - up_write(&block->cb_lock); -} - static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); - tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false); + if (tp->usesw && tp->counted) { + if (!atomic_dec_return(&tp->chain->block->useswcnt)) + static_branch_dec(&tcf_bypass_check_needed_key); + tp->counted = false; + } if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); @@ -2409,7 +2379,13 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); - tcf_block_filter_cnt_update(block, &tp->counted, true); + spin_lock(&tp->lock); + if (tp->usesw && !tp->counted) { + if (atomic_inc_return(&block->useswcnt) == 1) + static_branch_inc(&tcf_bypass_check_needed_key); + tp->counted = true; + } + spin_unlock(&tp->lock); /* q pointer is NULL for shared blocks */ if (q) q->flags &= ~TCQ_F_CAN_BYPASS; @@ -3532,8 +3508,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; - if (tc_skip_sw(*flags)) - atomic_inc(&block->skipswcnt); atomic_inc(&block->offloadcnt); } @@ -3542,8 +3516,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; - if (tc_skip_sw(*flags)) - atomic_dec(&block->skipswcnt); atomic_dec(&block->offloadcnt); } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1941ebec23ff..7fbe42f0e5c2 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(prog->gen_flags)) prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, prog->gen_flags); + if (oldprog) { idr_replace(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1008ec8a464c..03505673d523 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2503,6 +2503,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, fnew->flags); + spin_lock(&tp->lock); /* tp was deleted concurrently. -EAGAIN will cause caller to lookup diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 9f1e62ca508d..f03bf5da39ee 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, new->flags); + *arg = head; rcu_assign_pointer(tp->root, new); return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d3a03c57545b..2a1c00048fd6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -951,6 +951,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, new->flags); + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); tcf_exts_get_net(&n->exts); @@ -1164,6 +1166,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(n->flags)) n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, n->flags); + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins))