diff mbox series

RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage

Message ID 0fadd34b-21cf-3a0d-a494-bde7b8dbc3ed@I-love.SAKURA.ne.jp (mailing list archive)
State Superseded
Headers show
Series RDMA/core: Avoid flush_workqueue(system_unbound_wq) usage | expand

Commit Message

Tetsuo Handa April 18, 2022, 1:30 p.m. UTC
Flushing system-wide workqueues is dangerous and will be forbidden.
Replace system_unbound_wq with local ib_unbound_wq.

Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Note: This patch is only compile tested.

 drivers/infiniband/core/device.c | 25 +++++++++++++++----------
 drivers/infiniband/core/ucma.c   |  2 +-
 drivers/infiniband/hw/mlx5/odp.c |  4 ++--
 include/rdma/ib_verbs.h          |  1 +
 4 files changed, 19 insertions(+), 13 deletions(-)

Comments

Jason Gunthorpe April 19, 2022, 9:01 p.m. UTC | #1
On Mon, Apr 18, 2022 at 10:30:47PM +0900, Tetsuo Handa wrote:
> Flushing system-wide workqueues is dangerous and will be forbidden.
> Replace system_unbound_wq with local ib_unbound_wq.

I don't really like this patch, the whole reason to get rid of
flushing the system_wq is due to unknowable interactions between work
on the queue. Creating a subsystem wide WQ is creating the same
problem, just in a microcosm.

The flush_work exists because of this:

> @@ -1602,7 +1604,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
>  	WARN_ON(!refcount_read(&ib_dev->refcount));
>  	WARN_ON(!ib_dev->ops.dealloc_driver);
>  	get_device(&ib_dev->dev);
> -	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
> +	if (!queue_work(ib_unbound_wq, &ib_dev->unregistration_work))
>  		put_device(&ib_dev->dev);
>  }

Which fires off a work that must be completed before the module is
unloaded.

It seems like a big waste to create an entire WQ just for a single
infrequent user like this.

Is there some other solution?

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4deb60a3b43f..75d9f592c6a1 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,8 @@  struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
+struct workqueue_struct *ib_unbound_wq;
+EXPORT_SYMBOL_GPL(ib_unbound_wq);
 
 /*
  * Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -1602,7 +1604,7 @@  void ib_unregister_device_queued(struct ib_device *ib_dev)
 	WARN_ON(!refcount_read(&ib_dev->refcount));
 	WARN_ON(!ib_dev->ops.dealloc_driver);
 	get_device(&ib_dev->dev);
-	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+	if (!queue_work(ib_unbound_wq, &ib_dev->unregistration_work))
 		put_device(&ib_dev->dev);
 }
 EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -2751,27 +2753,28 @@  static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
 
 static int __init ib_core_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	ib_wq = alloc_workqueue("infiniband", 0, 0);
 	if (!ib_wq)
 		return -ENOMEM;
 
+	ib_unbound_wq = alloc_workqueue("ib-unb-wq", WQ_UNBOUND,
+					WQ_UNBOUND_MAX_ACTIVE);
+	if (!ib_unbound_wq)
+		goto err;
+
 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
 			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
-	if (!ib_comp_wq) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!ib_comp_wq)
+		goto err_unbound;
 
 	ib_comp_unbound_wq =
 		alloc_workqueue("ib-comp-unb-wq",
 				WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
 				WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
-	if (!ib_comp_unbound_wq) {
-		ret = -ENOMEM;
+	if (!ib_comp_unbound_wq)
 		goto err_comp;
-	}
 
 	ret = class_register(&ib_class);
 	if (ret) {
@@ -2831,6 +2834,8 @@  static int __init ib_core_init(void)
 	destroy_workqueue(ib_comp_unbound_wq);
 err_comp:
 	destroy_workqueue(ib_comp_wq);
+err_unbound:
+	destroy_workqueue(ib_unbound_wq);
 err:
 	destroy_workqueue(ib_wq);
 	return ret;
@@ -2852,7 +2857,7 @@  static void __exit ib_core_cleanup(void)
 	destroy_workqueue(ib_comp_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
-	flush_workqueue(system_unbound_wq);
+	destroy_workqueue(ib_unbound_wq);
 	WARN_ON(!xa_empty(&clients));
 	WARN_ON(!xa_empty(&devices));
 }
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9d6ac9dff39a..65c31c7a3af1 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -360,7 +360,7 @@  static int ucma_event_handler(struct rdma_cm_id *cm_id,
 	if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
 		xa_lock(&ctx_table);
 		if (xa_load(&ctx_table, ctx->id) == ctx)
-			queue_work(system_unbound_wq, &ctx->close_work);
+			queue_work(ib_unbound_wq, &ctx->close_work);
 		xa_unlock(&ctx_table);
 	}
 	return 0;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 41c964a45f89..7611a28a4573 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -220,7 +220,7 @@  static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
 
 	/* Freeing a MR is a sleeping operation, so bounce to a work queue */
 	INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
-	queue_work(system_unbound_wq, &mr->odp_destroy.work);
+	queue_work(ib_unbound_wq, &mr->odp_destroy.work);
 }
 
 static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
@@ -1818,6 +1818,6 @@  int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 		destroy_prefetch_work(work);
 		return rc;
 	}
-	queue_work(system_unbound_wq, &work->work);
+	queue_work(ib_unbound_wq, &work->work);
 	return 0;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b0bc9de5e9a8..8be23c03f412 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -54,6 +54,7 @@  struct ib_port;
 struct hw_stats_device_data;
 
 extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_unbound_wq;
 extern struct workqueue_struct *ib_comp_wq;
 extern struct workqueue_struct *ib_comp_unbound_wq;