diff mbox series

[for-next,15/16] RDMA/rxe: Add workqueue support for tasks

Message ID 20221018043345.4033-16-rpearsonhpe@gmail.com (mailing list archive)
State Superseded
Headers show
Series Implement work queues for rdma_rxe | expand

Commit Message

Bob Pearson Oct. 18, 2022, 4:33 a.m. UTC
Add a third task type RXE_TASK_TYPE_WORKQUEUE to rxe_task.c.

Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe.c      |  9 ++-
 drivers/infiniband/sw/rxe/rxe_task.c | 84 ++++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_task.h | 10 +++-
 3 files changed, 101 insertions(+), 2 deletions(-)

Comments

Leon Romanovsky Oct. 18, 2022, 8:59 a.m. UTC | #1
On Mon, Oct 17, 2022 at 11:33:46PM -0500, Bob Pearson wrote:
> Add a third task type RXE_TASK_TYPE_WORKQUEUE to rxe_task.c.

Why do you need an extra type and not instead of RXE_TASK_TYPE_TASKLET?

> 
> Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>  drivers/infiniband/sw/rxe/rxe.c      |  9 ++-
>  drivers/infiniband/sw/rxe/rxe_task.c | 84 ++++++++++++++++++++++++++++
>  drivers/infiniband/sw/rxe/rxe_task.h | 10 +++-
>  3 files changed, 101 insertions(+), 2 deletions(-)

<...>

> +static struct workqueue_struct *rxe_wq;
> +
> +int rxe_alloc_wq(void)
> +{
> +	rxe_wq = alloc_workqueue("rxe_wq", WQ_MEM_RECLAIM |
> +				WQ_HIGHPRI | WQ_CPU_INTENSIVE |
> +				WQ_SYSFS, WQ_MAX_ACTIVE);

Are you sure that all these flags can be justified? WQ_MEM_RECLAIM?

> +
> +	if (!rxe_wq)
> +		return -ENOMEM;
> +
> +	return 0;
> +}

<...>

> +static void work_sched(struct rxe_task *task)
> +{
> +	if (!task->valid)
> +		return;
> +
> +	queue_work(rxe_wq, &task->work);
> +}
> +
> +static void work_do_task(struct work_struct *work)
> +{
> +	struct rxe_task *task = container_of(work, typeof(*task), work);
> +
> +	if (!task->valid)
> +		return;

How can it be that submitted task is not valid? Especially without any
locking.

> +
> +	do_task(task);
> +}

Thanks

> +
Bob Pearson Oct. 18, 2022, 3:18 p.m. UTC | #2
On 10/18/22 03:59, Leon Romanovsky wrote:
> On Mon, Oct 17, 2022 at 11:33:46PM -0500, Bob Pearson wrote:
>> Add a third task type RXE_TASK_TYPE_WORKQUEUE to rxe_task.c.
> 
> Why do you need an extra type and not instead of RXE_TASK_TYPE_TASKLET?

It performs much better in some settings.
> 
>>
>> Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
>> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>> ---
>>  drivers/infiniband/sw/rxe/rxe.c      |  9 ++-
>>  drivers/infiniband/sw/rxe/rxe_task.c | 84 ++++++++++++++++++++++++++++
>>  drivers/infiniband/sw/rxe/rxe_task.h | 10 +++-
>>  3 files changed, 101 insertions(+), 2 deletions(-)
> 
> <...>
> 
>> +static struct workqueue_struct *rxe_wq;
>> +
>> +int rxe_alloc_wq(void)
>> +{
>> +	rxe_wq = alloc_workqueue("rxe_wq", WQ_MEM_RECLAIM |
>> +				WQ_HIGHPRI | WQ_CPU_INTENSIVE |
>> +				WQ_SYSFS, WQ_MAX_ACTIVE);
> 
> Are you sure that all these flags can be justified? WQ_MEM_RECLAIM?

Not really. CPU intensive is most likely correct. The rest not so much.
> 
>> +
>> +	if (!rxe_wq)
>> +		return -ENOMEM;
>> +
>> +	return 0;
>> +}
> 
> <...>
> 
>> +static void work_sched(struct rxe_task *task)
>> +{
>> +	if (!task->valid)
>> +		return;
>> +
>> +	queue_work(rxe_wq, &task->work);
>> +}
>> +
>> +static void work_do_task(struct work_struct *work)
>> +{
>> +	struct rxe_task *task = container_of(work, typeof(*task), work);
>> +
>> +	if (!task->valid)
>> +		return;
> 
> How can it be that submitted task is not valid? Especially without any
> locking.

This and a similar subroutine for tasklets are called deferred and can have a significant
delay before being called. In the mean time someone could have tried to destroy the QP. The valid
flag is only cleared by QP destroy code and is not turned back on. Perhaps a rmb().
> 
>> +
>> +	do_task(task);
>> +}
> 
> Thanks
> 
>> +
Leon Romanovsky Oct. 18, 2022, 5:52 p.m. UTC | #3
On Tue, Oct 18, 2022 at 10:18:13AM -0500, Bob Pearson wrote:
> On 10/18/22 03:59, Leon Romanovsky wrote:
> > On Mon, Oct 17, 2022 at 11:33:46PM -0500, Bob Pearson wrote:
> >> Add a third task type RXE_TASK_TYPE_WORKQUEUE to rxe_task.c.
> > 
> > Why do you need an extra type and not instead of RXE_TASK_TYPE_TASKLET?
> 
> It performs much better in some settings.
> > 
> >>
> >> Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >>  drivers/infiniband/sw/rxe/rxe.c      |  9 ++-
> >>  drivers/infiniband/sw/rxe/rxe_task.c | 84 ++++++++++++++++++++++++++++
> >>  drivers/infiniband/sw/rxe/rxe_task.h | 10 +++-
> >>  3 files changed, 101 insertions(+), 2 deletions(-)

<...>

> >> +static void work_do_task(struct work_struct *work)
> >> +{
> >> +	struct rxe_task *task = container_of(work, typeof(*task), work);
> >> +
> >> +	if (!task->valid)
> >> +		return;
> > 
> > How can it be that submitted task is not valid? Especially without any
> > locking.
> 
> This and a similar subroutine for tasklets are called deferred and can have a significant
> delay before being called. In the mean time someone could have tried to destroy the QP. The valid
> flag is only cleared by QP destroy code and is not turned back on. Perhaps a rmb().

rmb() is not a locking.

> > 
> >> +
> >> +	do_task(task);
> >> +}
> > 
> > Thanks
> > 
> >> +
>
Daisuke Matsuda (Fujitsu) Oct. 20, 2022, 9:28 a.m. UTC | #4
On Wed, Oct 19, 2022 12:18 AM Bob Pearson wrote:
> On 10/18/22 03:59, Leon Romanovsky wrote:
> > On Mon, Oct 17, 2022 at 11:33:46PM -0500, Bob Pearson wrote:
> >> Add a third task type RXE_TASK_TYPE_WORKQUEUE to rxe_task.c.
> >
> > Why do you need an extra type and not instead of RXE_TASK_TYPE_TASKLET?
> 
> It performs much better in some settings.
> >
> >>
> >> Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
> >> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> >> ---
> >>  drivers/infiniband/sw/rxe/rxe.c      |  9 ++-
> >>  drivers/infiniband/sw/rxe/rxe_task.c | 84 ++++++++++++++++++++++++++++
> >>  drivers/infiniband/sw/rxe/rxe_task.h | 10 +++-
> >>  3 files changed, 101 insertions(+), 2 deletions(-)
> >
> > <...>
> >
> >> +static struct workqueue_struct *rxe_wq;
> >> +
> >> +int rxe_alloc_wq(void)
> >> +{
> >> +	rxe_wq = alloc_workqueue("rxe_wq", WQ_MEM_RECLAIM |
> >> +				WQ_HIGHPRI | WQ_CPU_INTENSIVE |
> >> +				WQ_SYSFS, WQ_MAX_ACTIVE);
> >
> > Are you sure that all these flags can be justified? WQ_MEM_RECLAIM?
> 
> Not really. CPU intensive is most likely correct. The rest not so much.

I doubt this workqueue executes works in the queued order. If the order is changed
unexpectedly on responder, that may cause a failure or unexpected results.
Perhaps, we should make it equivalent to alloc_ordered_workqueue() as shown below:
=== workqueue.h===
#define alloc_ordered_workqueue(fmt, flags, args...)                    \
        alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED |                \
                        __WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
===

Daisuke

> >
> >> +
> >> +	if (!rxe_wq)
> >> +		return -ENOMEM;
> >> +
> >> +	return 0;
> >> +}
> >
> > <...>
> >
> >> +static void work_sched(struct rxe_task *task)
> >> +{
> >> +	if (!task->valid)
> >> +		return;
> >> +
> >> +	queue_work(rxe_wq, &task->work);
> >> +}
> >> +
> >> +static void work_do_task(struct work_struct *work)
> >> +{
> >> +	struct rxe_task *task = container_of(work, typeof(*task), work);
> >> +
> >> +	if (!task->valid)
> >> +		return;
> >
> > How can it be that submitted task is not valid? Especially without any
> > locking.
> 
> This and a similar subroutine for tasklets are called deferred and can have a significant
> delay before being called. In the mean time someone could have tried to destroy the QP. The valid
> flag is only cleared by QP destroy code and is not turned back on. Perhaps a rmb().
> >
> >> +
> >> +	do_task(task);
> >> +}
> >
> > Thanks
> >
> >> +
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 51daac5c4feb..6d80218334ca 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -210,10 +210,16 @@  static int __init rxe_module_init(void)
 {
 	int err;
 
-	err = rxe_net_init();
+	err = rxe_alloc_wq();
 	if (err)
 		return err;
 
+	err = rxe_net_init();
+	if (err) {
+		rxe_destroy_wq();
+		return err;
+	}
+
 	rdma_link_register(&rxe_link_ops);
 	pr_info("loaded\n");
 	return 0;
@@ -224,6 +230,7 @@  static void __exit rxe_module_exit(void)
 	rdma_link_unregister(&rxe_link_ops);
 	ib_unregister_driver(RDMA_DRIVER_RXE);
 	rxe_net_exit();
+	rxe_destroy_wq();
 
 	pr_info("unloaded\n");
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index a2c58ce66c8f..ea33ea3bc0b1 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -10,6 +10,25 @@ 
 
 #include "rxe.h"
 
+static struct workqueue_struct *rxe_wq;
+
+int rxe_alloc_wq(void)
+{
+	rxe_wq = alloc_workqueue("rxe_wq", WQ_MEM_RECLAIM |
+				WQ_HIGHPRI | WQ_CPU_INTENSIVE |
+				WQ_SYSFS, WQ_MAX_ACTIVE);
+
+	if (!rxe_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void rxe_destroy_wq(void)
+{
+	destroy_workqueue(rxe_wq);
+}
+
 static bool task_is_idle(struct rxe_task *task)
 {
 	if (!task->valid)
@@ -216,6 +235,68 @@  static void tsklet_init(struct rxe_task *task)
 	task->ops = &tsklet_ops;
 }
 
+static void work_sched(struct rxe_task *task)
+{
+	if (!task->valid)
+		return;
+
+	queue_work(rxe_wq, &task->work);
+}
+
+static void work_do_task(struct work_struct *work)
+{
+	struct rxe_task *task = container_of(work, typeof(*task), work);
+
+	if (!task->valid)
+		return;
+
+	do_task(task);
+}
+
+static void work_run(struct rxe_task *task)
+{
+	if (!task->valid)
+		return;
+
+	do_task(task);
+}
+
+static void work_enable(struct rxe_task *task)
+{
+	if (!task->valid)
+		return;
+
+	enable_task(task);
+}
+
+static void work_disable(struct rxe_task *task)
+{
+	if (!task->valid)
+		return;
+
+	disable_task(task);
+	flush_workqueue(rxe_wq);
+}
+
+static void work_cleanup(struct rxe_task *task)
+{
+	cleanup_task(task);
+}
+
+static const struct rxe_task_ops work_ops = {
+	.sched = work_sched,
+	.run = work_run,
+	.enable = work_enable,
+	.disable = work_disable,
+	.cleanup = work_cleanup,
+};
+
+static void work_init(struct rxe_task *task)
+{
+	INIT_WORK(&task->work, work_do_task);
+	task->ops = &work_ops;
+}
+
 int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *),
 		  enum rxe_task_type type)
 {
@@ -233,6 +314,9 @@  int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *),
 	case RXE_TASK_TYPE_TASKLET:
 		tsklet_init(task);
 		break;
+	case RXE_TASK_TYPE_WORKQUEUE:
+		work_init(task);
+		break;
 	default:
 		pr_debug("%s: invalid task type = %d\n", __func__, type);
 		return -EINVAL;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index c81947e88629..4887ca566769 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -20,6 +20,7 @@  struct rxe_task_ops {
 enum rxe_task_type {
 	RXE_TASK_TYPE_INLINE	= 0,
 	RXE_TASK_TYPE_TASKLET	= 1,
+	RXE_TASK_TYPE_WORKQUEUE	= 2,
 };
 
 enum {
@@ -35,7 +36,10 @@  enum {
  * called again.
  */
 struct rxe_task {
-	struct tasklet_struct		tasklet;
+	union {
+		struct tasklet_struct		tasklet;
+		struct work_struct		work;
+	};
 	int				state;
 	spinlock_t			lock;
 	void				*arg;
@@ -46,6 +50,10 @@  struct rxe_task {
 	enum rxe_task_type		type;
 };
 
+int rxe_alloc_wq(void);
+
+void rxe_destroy_wq(void);
+
 int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *),
 		  enum rxe_task_type type);