diff mbox series

RDMA/siw: Fix tx thread initialization.

Message ID 20230728114418.124328-1-bmt@zurich.ibm.com (mailing list archive)
State Accepted
Headers show
Series RDMA/siw: Fix tx thread initialization. | expand

Commit Message

Bernard Metzler July 28, 2023, 11:44 a.m. UTC
Immediately removing the siw module after insertion may
crash in siw_stop_tx_thread(), if the according thread did
not yet had a chance to initialize its wait queue and
siw_stop_tx_thread() tries to wakeup that thread. Initializing
the threads state before spwaning it fixes it.

Reported-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
 drivers/infiniband/sw/siw/siw.h       |  3 +-
 drivers/infiniband/sw/siw/siw_main.c  | 40 ++----------------------
 drivers/infiniband/sw/siw/siw_qp_tx.c | 44 +++++++++++++++++++++++----
 3 files changed, 43 insertions(+), 44 deletions(-)

Comments

Guoqing Jiang July 28, 2023, 11:57 a.m. UTC | #1
On 7/28/23 19:44, Bernard Metzler wrote:
> Immediately removing the siw module after insertion may
> crash in siw_stop_tx_thread(), if the according thread did
> not yet had a chance to initialize its wait queue and
> siw_stop_tx_thread() tries to wakeup that thread. Initializing
> the threads state before spwaning it fixes it.
>
> Reported-by: Guoqing Jiang <guoqing.jiang@linux.dev>
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
>   drivers/infiniband/sw/siw/siw.h       |  3 +-
>   drivers/infiniband/sw/siw/siw_main.c  | 40 ++----------------------
>   drivers/infiniband/sw/siw/siw_qp_tx.c | 44 +++++++++++++++++++++++----
>   3 files changed, 43 insertions(+), 44 deletions(-)

It works for me.

Tested-by: Guoqing Jiang <guoqing.jiang@linux.dev>

Thanks,
Guoqing
Leon Romanovsky July 30, 2023, 1:18 p.m. UTC | #2
On Fri, 28 Jul 2023 13:44:18 +0200, Bernard Metzler wrote:
> Immediately removing the siw module after insertion may
> crash in siw_stop_tx_thread(), if the according thread did
> not yet had a chance to initialize its wait queue and
> siw_stop_tx_thread() tries to wakeup that thread. Initializing
> the threads state before spwaning it fixes it.
> 
> 
> [...]

Applied, thanks!

[1/1] RDMA/siw: Fix tx thread initialization.
      https://git.kernel.org/rdma/rdma/c/5a752f23f43a67

Best regards,
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 2f3a9cda3850..7af311934c7a 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -530,11 +530,12 @@  void siw_qp_llp_data_ready(struct sock *sk);
 void siw_qp_llp_write_space(struct sock *sk);
 
 /* QP TX path functions */
+int siw_create_tx_threads(void);
+void siw_stop_tx_threads(void);
 int siw_run_sq(void *arg);
 int siw_qp_sq_process(struct siw_qp *qp);
 int siw_sq_start(struct siw_qp *qp);
 int siw_activate_tx(struct siw_qp *qp);
-void siw_stop_tx_thread(int nr_cpu);
 int siw_get_tx_cpu(struct siw_device *sdev);
 void siw_put_tx_cpu(int cpu);
 
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 65b5cda5457b..ba44a5776006 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -88,29 +88,6 @@  static void siw_device_cleanup(struct ib_device *base_dev)
 	xa_destroy(&sdev->mem_xa);
 }
 
-static int siw_create_tx_threads(void)
-{
-	int cpu, assigned = 0;
-
-	for_each_online_cpu(cpu) {
-		/* Skip HT cores */
-		if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
-			continue;
-
-		siw_tx_thread[cpu] =
-			kthread_run_on_cpu(siw_run_sq,
-					   (unsigned long *)(long)cpu,
-					   cpu, "siw_tx/%u");
-		if (IS_ERR(siw_tx_thread[cpu])) {
-			siw_tx_thread[cpu] = NULL;
-			continue;
-		}
-
-		assigned++;
-	}
-	return assigned;
-}
-
 static int siw_dev_qualified(struct net_device *netdev)
 {
 	/*
@@ -535,7 +512,6 @@  static struct rdma_link_ops siw_link_ops = {
 static __init int siw_init_module(void)
 {
 	int rv;
-	int nr_cpu;
 
 	if (SENDPAGE_THRESH < SIW_MAX_INLINE) {
 		pr_info("siw: sendpage threshold too small: %u\n",
@@ -580,12 +556,8 @@  static __init int siw_init_module(void)
 	return 0;
 
 out_error:
-	for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) {
-		if (siw_tx_thread[nr_cpu]) {
-			siw_stop_tx_thread(nr_cpu);
-			siw_tx_thread[nr_cpu] = NULL;
-		}
-	}
+	siw_stop_tx_threads();
+
 	if (siw_crypto_shash)
 		crypto_free_shash(siw_crypto_shash);
 
@@ -599,14 +571,8 @@  static __init int siw_init_module(void)
 
 static void __exit siw_exit_module(void)
 {
-	int cpu;
+	siw_stop_tx_threads();
 
-	for_each_possible_cpu(cpu) {
-		if (siw_tx_thread[cpu]) {
-			siw_stop_tx_thread(cpu);
-			siw_tx_thread[cpu] = NULL;
-		}
-	}
 	unregister_netdevice_notifier(&siw_netdev_nb);
 	rdma_link_unregister(&siw_link_ops);
 	ib_unregister_driver(RDMA_DRIVER_SIW);
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index ffb16beb6c30..a1f8def4504e 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -1209,10 +1209,45 @@  struct tx_task_t {
 
 static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g);
 
-void siw_stop_tx_thread(int nr_cpu)
+int siw_create_tx_threads(void)
 {
-	kthread_stop(siw_tx_thread[nr_cpu]);
-	wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting);
+	int cpu, assigned = 0;
+
+	for_each_online_cpu(cpu) {
+		struct tx_task_t *tx_task;
+
+		/* Skip HT cores */
+		if (cpu % cpumask_weight(topology_sibling_cpumask(cpu)))
+			continue;
+
+		tx_task = &per_cpu(siw_tx_task_g, cpu);
+		init_llist_head(&tx_task->active);
+		init_waitqueue_head(&tx_task->waiting);
+
+		siw_tx_thread[cpu] =
+			kthread_run_on_cpu(siw_run_sq,
+					   (unsigned long *)(long)cpu,
+					   cpu, "siw_tx/%u");
+		if (IS_ERR(siw_tx_thread[cpu])) {
+			siw_tx_thread[cpu] = NULL;
+			continue;
+		}
+		assigned++;
+	}
+	return assigned;
+}
+
+void siw_stop_tx_threads(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (siw_tx_thread[cpu]) {
+			kthread_stop(siw_tx_thread[cpu]);
+			wake_up(&per_cpu(siw_tx_task_g, cpu).waiting);
+			siw_tx_thread[cpu] = NULL;
+		}
+	}
 }
 
 int siw_run_sq(void *data)
@@ -1222,9 +1257,6 @@  int siw_run_sq(void *data)
 	struct siw_qp *qp;
 	struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu);
 
-	init_llist_head(&tx_task->active);
-	init_waitqueue_head(&tx_task->waiting);
-
 	while (1) {
 		struct llist_node *fifo_list = NULL;