diff mbox series

[net] net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table

Message ID 20231205172554.3570602-1-vladbu@nvidia.com (mailing list archive)
State Accepted
Commit 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc
Delegated to: Netdev Maintainers
Headers show
Series [net] net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1175 this patch: 1175
netdev/cc_maintainers warning 4 maintainers not CCed: coreteam@netfilter.org fw@strlen.de kadlec@netfilter.org netfilter-devel@vger.kernel.org
netdev/build_clang success Errors and warnings before: 1142 this patch: 1142
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1202 this patch: 1202
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 87 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Vlad Buslov Dec. 5, 2023, 5:25 p.m. UTC
The referenced change added custom cleanup code to act_ct to delete any
callbacks registered on the parent block when deleting the
tcf_ct_flow_table instance. However, the underlying issue is that the
drivers don't obtain the reference to the tcf_ct_flow_table instance when
registering callbacks which means that not only driver callbacks may still
be on the table when deleting it but also that the driver can still have
pointers to its internal nf_flowtable and can use it concurrently which
results either warning in netfilter[0] or use-after-free.

Fix the issue by taking a reference to the underlying struct
tcf_ct_flow_table instance when registering the callback and release the
reference when unregistering. Expose new API required for such reference
counting by adding two new callbacks to nf_flowtable_type and implementing
them for act_ct flowtable_ct type. This fixes the issue by extending the
lifetime of nf_flowtable until all users have unregistered.

[0]:
[106170.938634] ------------[ cut here ]------------
[106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm
ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis
try overlay mlx5_core
[106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1
[106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
[106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00
[106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202
[106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48
[106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000
[106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700
[106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58
[106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000
[106170.951616] FS:  0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000
[106170.952329] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0
[106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[106170.954766] Call Trace:
[106170.955057]  <TASK>
[106170.955315]  ? __warn+0x79/0x120
[106170.955648]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.956172]  ? report_bug+0x17c/0x190
[106170.956537]  ? handle_bug+0x3c/0x60
[106170.956891]  ? exc_invalid_op+0x14/0x70
[106170.957264]  ? asm_exc_invalid_op+0x16/0x20
[106170.957666]  ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core]
[106170.958172]  ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core]
[106170.958788]  ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core]
[106170.959339]  ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core]
[106170.959854]  ? mapping_remove+0x154/0x1d0 [mlx5_core]
[106170.960342]  ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core]
[106170.960927]  mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core]
[106170.961441]  mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core]
[106170.962001]  mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core]
[106170.962524]  mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core]
[106170.963034]  mlx5e_flow_put+0x73/0xe0 [mlx5_core]
[106170.963506]  mlx5e_put_flow_list+0x38/0x70 [mlx5_core]
[106170.964002]  mlx5e_rep_update_flows+0xec/0x290 [mlx5_core]
[106170.964525]  mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core]
[106170.965056]  process_one_work+0x13a/0x2c0
[106170.965443]  worker_thread+0x2e5/0x3f0
[106170.965808]  ? rescuer_thread+0x410/0x410
[106170.966192]  kthread+0xc6/0xf0
[106170.966515]  ? kthread_complete_and_exit+0x20/0x20
[106170.966970]  ret_from_fork+0x2d/0x50
[106170.967332]  ? kthread_complete_and_exit+0x20/0x20
[106170.967774]  ret_from_fork_asm+0x11/0x20
[106170.970466]  </TASK>
[106170.970726] ---[ end trace 0000000000000000 ]---

Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
---
 include/net/netfilter/nf_flow_table.h | 10 ++++++++
 net/sched/act_ct.c                    | 34 ++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 6 deletions(-)

Comments

Louis Peens Dec. 6, 2023, 9:31 a.m. UTC | #1
On Tue, Dec 05, 2023 at 06:25:54PM +0100, Vlad Buslov wrote:
> The referenced change added custom cleanup code to act_ct to delete any
> callbacks registered on the parent block when deleting the
> tcf_ct_flow_table instance. However, the underlying issue is that the
> drivers don't obtain the reference to the tcf_ct_flow_table instance when
> registering callbacks which means that not only driver callbacks may still
> be on the table when deleting it but also that the driver can still have
> pointers to its internal nf_flowtable and can use it concurrently which
> results either warning in netfilter[0] or use-after-free.
> 
> Fix the issue by taking a reference to the underlying struct
> tcf_ct_flow_table instance when registering the callback and release the
> reference when unregistering. Expose new API required for such reference
> counting by adding two new callbacks to nf_flowtable_type and implementing
> them for act_ct flowtable_ct type. This fixes the issue by extending the
> lifetime of nf_flowtable until all users have unregistered.

This definitely looks like a much better fix to me, thanks Vlad. We
will do some checking our side just to confirm that it doesn't break
anything with the nfp driver, but I don't expect that it should.
Jakub Kicinski Dec. 8, 2023, 11:40 p.m. UTC | #2
On Tue, 5 Dec 2023 18:25:54 +0100 Vlad Buslov wrote:
> The referenced change added custom cleanup code to act_ct to delete any
> callbacks registered on the parent block when deleting the
> tcf_ct_flow_table instance. However, the underlying issue is that the
> drivers don't obtain the reference to the tcf_ct_flow_table instance when
> registering callbacks which means that not only driver callbacks may still
> be on the table when deleting it but also that the driver can still have
> pointers to its internal nf_flowtable and can use it concurrently which
> results either warning in netfilter[0] or use-after-free.
> 
> Fix the issue by taking a reference to the underlying struct
> tcf_ct_flow_table instance when registering the callback and release the
> reference when unregistering. Expose new API required for such reference
> counting by adding two new callbacks to nf_flowtable_type and implementing
> them for act_ct flowtable_ct type. This fixes the issue by extending the
> lifetime of nf_flowtable until all users have unregistered.

Some acks would be good here - Pablo, Jamal?
Pablo Neira Ayuso Dec. 11, 2023, 9:30 a.m. UTC | #3
On Fri, Dec 08, 2023 at 03:40:35PM -0800, Jakub Kicinski wrote:
> On Tue, 5 Dec 2023 18:25:54 +0100 Vlad Buslov wrote:
> > The referenced change added custom cleanup code to act_ct to delete any
> > callbacks registered on the parent block when deleting the
> > tcf_ct_flow_table instance. However, the underlying issue is that the
> > drivers don't obtain the reference to the tcf_ct_flow_table instance when
> > registering callbacks which means that not only driver callbacks may still
> > be on the table when deleting it but also that the driver can still have
> > pointers to its internal nf_flowtable and can use it concurrently which
> > results either warning in netfilter[0] or use-after-free.
> > 
> > Fix the issue by taking a reference to the underlying struct
> > tcf_ct_flow_table instance when registering the callback and release the
> > reference when unregistering. Expose new API required for such reference
> > counting by adding two new callbacks to nf_flowtable_type and implementing
> > them for act_ct flowtable_ct type. This fixes the issue by extending the
> > lifetime of nf_flowtable until all users have unregistered.
> 
> Some acks would be good here - Pablo, Jamal?

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>

I'd prefer driver does not access nf_flowtable directly, I already
mentioned this in the past.
patchwork-bot+netdevbpf@kernel.org Dec. 11, 2023, 10 a.m. UTC | #4
Hello:

This patch was applied to netdev/net.git (main)
by David S. Miller <davem@davemloft.net>:

On Tue, 5 Dec 2023 18:25:54 +0100 you wrote:
> The referenced change added custom cleanup code to act_ct to delete any
> callbacks registered on the parent block when deleting the
> tcf_ct_flow_table instance. However, the underlying issue is that the
> drivers don't obtain the reference to the tcf_ct_flow_table instance when
> registering callbacks which means that not only driver callbacks may still
> be on the table when deleting it but also that the driver can still have
> pointers to its internal nf_flowtable and can use it concurrently which
> results either warning in netfilter[0] or use-after-free.
> 
> [...]

Here is the summary with links:
  - [net] net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table
    https://git.kernel.org/netdev/net/c/125f1c7f26ff

You are awesome, thank you!
diff mbox series

Patch

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index fe1507c1db82..692d5955911c 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -62,6 +62,8 @@  struct nf_flowtable_type {
 						  enum flow_offload_tuple_dir dir,
 						  struct nf_flow_rule *flow_rule);
 	void				(*free)(struct nf_flowtable *ft);
+	void				(*get)(struct nf_flowtable *ft);
+	void				(*put)(struct nf_flowtable *ft);
 	nf_hookfn			*hook;
 	struct module			*owner;
 };
@@ -240,6 +242,11 @@  nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
 	}
 
 	list_add_tail(&block_cb->list, &block->cb_list);
+	up_write(&flow_table->flow_block_lock);
+
+	if (flow_table->type->get)
+		flow_table->type->get(flow_table);
+	return 0;
 
 unlock:
 	up_write(&flow_table->flow_block_lock);
@@ -262,6 +269,9 @@  nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
 		WARN_ON(true);
 	}
 	up_write(&flow_table->flow_block_lock);
+
+	if (flow_table->type->put)
+		flow_table->type->put(flow_table);
 }
 
 void flow_offload_route_init(struct flow_offload *flow,
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b3f4a503ee2b..f69c47945175 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -286,9 +286,31 @@  static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow)
 	       !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
 }
 
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_get(struct nf_flowtable *ft)
+{
+	struct tcf_ct_flow_table *ct_ft =
+		container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+	tcf_ct_flow_table_get_ref(ct_ft);
+}
+
+static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft);
+
+static void tcf_ct_nf_put(struct nf_flowtable *ft)
+{
+	struct tcf_ct_flow_table *ct_ft =
+		container_of(ft, struct tcf_ct_flow_table, nf_ft);
+
+	tcf_ct_flow_table_put(ct_ft);
+}
+
 static struct nf_flowtable_type flowtable_ct = {
 	.gc		= tcf_ct_flow_is_outdated,
 	.action		= tcf_ct_flow_table_fill_actions,
+	.get		= tcf_ct_nf_get,
+	.put		= tcf_ct_nf_put,
 	.owner		= THIS_MODULE,
 };
 
@@ -337,9 +359,13 @@  static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params)
 	return err;
 }
 
+static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft)
+{
+	refcount_inc(&ct_ft->ref);
+}
+
 static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
 {
-	struct flow_block_cb *block_cb, *tmp_cb;
 	struct tcf_ct_flow_table *ct_ft;
 	struct flow_block *block;
 
@@ -347,13 +373,9 @@  static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
 			     rwork);
 	nf_flow_table_free(&ct_ft->nf_ft);
 
-	/* Remove any remaining callbacks before cleanup */
 	block = &ct_ft->nf_ft.flow_block;
 	down_write(&ct_ft->nf_ft.flow_block_lock);
-	list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
-		list_del(&block_cb->list);
-		flow_block_cb_free(block_cb);
-	}
+	WARN_ON(!list_empty(&block->cb_list));
 	up_write(&ct_ft->nf_ft.flow_block_lock);
 	kfree(ct_ft);