Message ID | 20250305163732.2766420-3-sdf@fomichev.me (mailing list archive) |
---|---|
State | Accepted |
Commit | c4f0f30b424e7258a777bcbcbf9006207da4854c |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: Hold netdev instance lock during ndo operations | expand |
On Wed, Mar 5, 2025 at 5:37 PM Stanislav Fomichev <sdf@fomichev.me> wrote: > > Introduce new dev_setup_tc for nft ndo_setup_tc paths. > > Reviewed-by: Eric Dumazet <edumazet@google.com> > Cc: Saeed Mahameed <saeed@kernel.org> > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me> > --- > drivers/net/ethernet/intel/iavf/iavf_main.c | 2 -- > include/linux/netdevice.h | 2 ++ > net/core/dev.c | 18 ++++++++++++++++++ > net/netfilter/nf_flow_table_offload.c | 2 +- > net/netfilter/nf_tables_offload.c | 2 +- > 5 files changed, 22 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c > index 9f4d223dffcf..032e1a58af6f 100644 > --- a/drivers/net/ethernet/intel/iavf/iavf_main.c > +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c > @@ -3894,10 +3894,8 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) > if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > return 0; > > - netdev_lock(netdev); > netif_set_real_num_rx_queues(netdev, total_qps); > netif_set_real_num_tx_queues(netdev, total_qps); > - netdev_unlock(netdev); > > return ret; > } > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 33066b155c84..69951eeb96d2 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -3353,6 +3353,8 @@ int dev_alloc_name(struct net_device *dev, const char *name); > int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); > void dev_close(struct net_device *dev); > void dev_close_many(struct list_head *head, bool unlink); > +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, > + void *type_data); > void dev_disable_lro(struct net_device *dev); > int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); > u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, > diff --git a/net/core/dev.c b/net/core/dev.c > index 7a327c782ea4..57af25683ea1 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -1786,6 +1786,24 @@ void dev_close(struct net_device *dev) > } > EXPORT_SYMBOL(dev_close); > > +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, > + void *type_data) > +{ > + const struct net_device_ops *ops = dev->netdev_ops; > + int ret; > + > + ASSERT_RTNL(); > + > + if (!ops->ndo_setup_tc) > + return -EOPNOTSUPP; > + > + netdev_lock_ops(dev); > + ret = ops->ndo_setup_tc(dev, type, type_data); > + netdev_unlock_ops(dev); > + > + return ret; > +} > +EXPORT_SYMBOL(dev_setup_tc); > > /** > * dev_disable_lro - disable Large Receive Offload on a device > diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c > index e06bc36f49fe..0ec4abded10d 100644 > --- a/net/netfilter/nf_flow_table_offload.c > +++ b/net/netfilter/nf_flow_table_offload.c > @@ -1175,7 +1175,7 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, > nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, > extack); > down_write(&flowtable->flow_block_lock); > - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); > + err = dev_setup_tc(dev, TC_SETUP_FT, bo); > up_write(&flowtable->flow_block_lock); > if (err < 0) > return err; > diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c > index 64675f1c7f29..b761899c143c 100644 > --- a/net/netfilter/nf_tables_offload.c > +++ b/net/netfilter/nf_tables_offload.c > @@ -390,7 +390,7 @@ static int nft_block_offload_cmd(struct nft_base_chain *chain, > > nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack); > > - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); > + err = dev_setup_tc(dev, TC_SETUP_BLOCK, &bo); > if (err < 0) > return err; > It seems RTNL was not taken in this path, can you take a look ? syzbot reported : RTNL: assertion failed at net/core/dev.c (1769) WARNING: CPU: 1 PID: 9148 at net/core/dev.c:1769 dev_setup_tc+0x315/0x360 net/core/dev.c:1769 Modules linked in: CPU: 1 UID: 0 PID: 9148 Comm: syz.3.1494 Not tainted 6.14.0-rc5-syzkaller-01064-g2525e16a2bae #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 RIP: 0010:dev_setup_tc+0x315/0x360 net/core/dev.c:1769 Code: cc 49 89 ee e8 dc da f7 f7 c6 05 c0 39 5d 06 01 90 48 c7 c7 a0 5e 2e 8d 48 c7 c6 80 5e 2e 8d ba e9 06 00 00 e8 3c 97 b7 f7 90 <0f> 0b 90 90 e9 66 fd ff ff 89 d1 80 e1 07 38 c1 0f 8c aa fd ff ff RSP: 0018:ffffc9000be3eed0 EFLAGS: 00010246 RAX: eea924c6092c5700 RBX: 0000000000000000 RCX: 0000000000080000 RDX: ffffc9000c979000 RSI: 000000000000491b RDI: 000000000000491c RBP: ffff88802a810008 R08: ffffffff81818e32 R09: fffffbfff1d3a67c R10: dffffc0000000000 R11: fffffbfff1d3a67c R12: ffffc9000be3f070 R13: ffffffff8d4ab1e0 R14: ffff88802a810008 R15: ffff88802a810000 FS: 00007fbe7aece6c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000110c2b5042 CR3: 0000000024cd0000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> nf_flow_table_offload_cmd net/netfilter/nf_flow_table_offload.c:1178 [inline] nf_flow_table_offload_setup+0x2ff/0x710 net/netfilter/nf_flow_table_offload.c:1198 nft_register_flowtable_net_hooks+0x24c/0x570 net/netfilter/nf_tables_api.c:8918 nf_tables_newflowtable+0x19f4/0x23d0 net/netfilter/nf_tables_api.c:9139 nfnetlink_rcv_batch net/netfilter/nfnetlink.c:524 [inline] nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:647 [inline] nfnetlink_rcv+0x14e3/0x2ab0 net/netfilter/nfnetlink.c:665 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x8de/0xcb0 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:709 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:724 ____sys_sendmsg+0x53a/0x860 net/socket.c:2564 ___sys_sendmsg net/socket.c:2618 [inline] __sys_sendmsg+0x269/0x350 net/socket.c:2650 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbe79f8d169
On 03/07, Eric Dumazet wrote: > On Wed, Mar 5, 2025 at 5:37 PM Stanislav Fomichev <sdf@fomichev.me> wrote: > > > > Introduce new dev_setup_tc for nft ndo_setup_tc paths. > > > > Reviewed-by: Eric Dumazet <edumazet@google.com> > > Cc: Saeed Mahameed <saeed@kernel.org> > > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me> > > --- > > drivers/net/ethernet/intel/iavf/iavf_main.c | 2 -- > > include/linux/netdevice.h | 2 ++ > > net/core/dev.c | 18 ++++++++++++++++++ > > net/netfilter/nf_flow_table_offload.c | 2 +- > > net/netfilter/nf_tables_offload.c | 2 +- > > 5 files changed, 22 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c > > index 9f4d223dffcf..032e1a58af6f 100644 > > --- a/drivers/net/ethernet/intel/iavf/iavf_main.c > > +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c > > @@ -3894,10 +3894,8 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) > > if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) > > return 0; > > > > - netdev_lock(netdev); > > netif_set_real_num_rx_queues(netdev, total_qps); > > netif_set_real_num_tx_queues(netdev, total_qps); > > - netdev_unlock(netdev); > > > > return ret; > > } > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > > index 33066b155c84..69951eeb96d2 100644 > > --- a/include/linux/netdevice.h > > +++ b/include/linux/netdevice.h > > @@ -3353,6 +3353,8 @@ int dev_alloc_name(struct net_device *dev, const char *name); > > int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); > > void dev_close(struct net_device *dev); > > void dev_close_many(struct list_head *head, bool unlink); > > +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, > > + void *type_data); > > void dev_disable_lro(struct net_device *dev); > > int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); > > u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, > > diff --git a/net/core/dev.c b/net/core/dev.c > > index 7a327c782ea4..57af25683ea1 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -1786,6 +1786,24 @@ void dev_close(struct net_device *dev) > > } > > EXPORT_SYMBOL(dev_close); > > > > +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, > > + void *type_data) > > +{ > > + const struct net_device_ops *ops = dev->netdev_ops; > > + int ret; > > + > > + ASSERT_RTNL(); > > + > > + if (!ops->ndo_setup_tc) > > + return -EOPNOTSUPP; > > + > > + netdev_lock_ops(dev); > > + ret = ops->ndo_setup_tc(dev, type, type_data); > > + netdev_unlock_ops(dev); > > + > > + return ret; > > +} > > +EXPORT_SYMBOL(dev_setup_tc); > > > > /** > > * dev_disable_lro - disable Large Receive Offload on a device > > diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c > > index e06bc36f49fe..0ec4abded10d 100644 > > --- a/net/netfilter/nf_flow_table_offload.c > > +++ b/net/netfilter/nf_flow_table_offload.c > > @@ -1175,7 +1175,7 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, > > nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, > > extack); > > down_write(&flowtable->flow_block_lock); > > - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); > > + err = dev_setup_tc(dev, TC_SETUP_FT, bo); > > up_write(&flowtable->flow_block_lock); > > if (err < 0) > > return err; > > diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c > > index 64675f1c7f29..b761899c143c 100644 > > --- a/net/netfilter/nf_tables_offload.c > > +++ b/net/netfilter/nf_tables_offload.c > > @@ -390,7 +390,7 @@ static int nft_block_offload_cmd(struct nft_base_chain *chain, > > > > nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack); > > > > - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); > > + err = dev_setup_tc(dev, TC_SETUP_BLOCK, &bo); > > if (err < 0) > > return err; > > > > It seems RTNL was not taken in this path, can you take a look ? > > syzbot reported : > > RTNL: assertion failed at net/core/dev.c (1769) > WARNING: CPU: 1 PID: 9148 at net/core/dev.c:1769 > dev_setup_tc+0x315/0x360 net/core/dev.c:1769 > Modules linked in: > CPU: 1 UID: 0 PID: 9148 Comm: syz.3.1494 Not tainted > 6.14.0-rc5-syzkaller-01064-g2525e16a2bae #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, > BIOS Google 02/12/2025 > RIP: 0010:dev_setup_tc+0x315/0x360 net/core/dev.c:1769 > Code: cc 49 89 ee e8 dc da f7 f7 c6 05 c0 39 5d 06 01 90 48 c7 c7 a0 > 5e 2e 8d 48 c7 c6 80 5e 2e 8d ba e9 06 00 00 e8 3c 97 b7 f7 90 <0f> 0b > 90 90 e9 66 fd ff ff 89 d1 80 e1 07 38 c1 0f 8c aa fd ff ff > RSP: 0018:ffffc9000be3eed0 EFLAGS: 00010246 > RAX: eea924c6092c5700 RBX: 0000000000000000 RCX: 0000000000080000 > RDX: ffffc9000c979000 RSI: 000000000000491b RDI: 000000000000491c > RBP: ffff88802a810008 R08: ffffffff81818e32 R09: fffffbfff1d3a67c > R10: dffffc0000000000 R11: fffffbfff1d3a67c R12: ffffc9000be3f070 > R13: ffffffff8d4ab1e0 R14: ffff88802a810008 R15: ffff88802a810000 > FS: 00007fbe7aece6c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 000000110c2b5042 CR3: 0000000024cd0000 CR4: 00000000003526f0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > Call Trace: > <TASK> > nf_flow_table_offload_cmd net/netfilter/nf_flow_table_offload.c:1178 [inline] > nf_flow_table_offload_setup+0x2ff/0x710 > net/netfilter/nf_flow_table_offload.c:1198 > nft_register_flowtable_net_hooks+0x24c/0x570 net/netfilter/nf_tables_api.c:8918 > nf_tables_newflowtable+0x19f4/0x23d0 net/netfilter/nf_tables_api.c:9139 > nfnetlink_rcv_batch net/netfilter/nfnetlink.c:524 [inline] > nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:647 [inline] > nfnetlink_rcv+0x14e3/0x2ab0 net/netfilter/nfnetlink.c:665 > netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] > netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1339 > netlink_sendmsg+0x8de/0xcb0 net/netlink/af_netlink.c:1883 > sock_sendmsg_nosec net/socket.c:709 [inline] > __sock_sendmsg+0x221/0x270 net/socket.c:724 > ____sys_sendmsg+0x53a/0x860 net/socket.c:2564 > ___sys_sendmsg net/socket.c:2618 [inline] > __sys_sendmsg+0x269/0x350 net/socket.c:2650 > do_syscall_x64 arch/x86/entry/common.c:52 [inline] > do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 > entry_SYSCALL_64_after_hwframe+0x77/0x7f > RIP: 0033:0x7fbe79f8d169 Interesting. Looks like this path (and same for nft_block_offload_cmd?) was never grabbing grabbing rtnl_lock but calling device's ndo_setup_tc. Will take a look! CC'd Pablo in case it was by design.
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 9f4d223dffcf..032e1a58af6f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3894,10 +3894,8 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return 0; - netdev_lock(netdev); netif_set_real_num_rx_queues(netdev, total_qps); netif_set_real_num_tx_queues(netdev, total_qps); - netdev_unlock(netdev); return ret; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 33066b155c84..69951eeb96d2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3353,6 +3353,8 @@ int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 7a327c782ea4..57af25683ea1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1786,6 +1786,24 @@ void dev_close(struct net_device *dev) } EXPORT_SYMBOL(dev_close); +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int ret; + + ASSERT_RTNL(); + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + netdev_lock_ops(dev); + ret = ops->ndo_setup_tc(dev, type, type_data); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_setup_tc); /** * dev_disable_lro - disable Large Receive Offload on a device diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index e06bc36f49fe..0ec4abded10d 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1175,7 +1175,7 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); down_write(&flowtable->flow_block_lock); - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + err = dev_setup_tc(dev, TC_SETUP_FT, bo); up_write(&flowtable->flow_block_lock); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 64675f1c7f29..b761899c143c 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -390,7 +390,7 @@ static int nft_block_offload_cmd(struct nft_base_chain *chain, nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack); - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + err = dev_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) return err;