diff mbox series

[01/15] RDMA/ucma: Reduce the number of rdma_destroy_id() calls

Message ID 20190930231707.48259-2-bvanassche@acm.org (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series RDMA patches for kernel v5.5 | expand

Commit Message

Bart Van Assche Sept. 30, 2019, 11:16 p.m. UTC
Instead of calling rdma_destroy_id() after waiting for the context completion
finished, call rdma_destroy_id() from inside the ucma_put_ctx() function.
This patch reduces the number of rdma_destroy_id() calls but does not change
the behavior of this code.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/infiniband/core/ucma.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

Comments

Jason Gunthorpe Oct. 1, 2019, 3:07 p.m. UTC | #1
On Mon, Sep 30, 2019 at 04:16:53PM -0700, Bart Van Assche wrote:
> Instead of calling rdma_destroy_id() after waiting for the context completion
> finished, call rdma_destroy_id() from inside the ucma_put_ctx() function.
> This patch reduces the number of rdma_destroy_id() calls but does not change
> the behavior of this code.
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
>  drivers/infiniband/core/ucma.c | 17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
> index 0274e9b704be..30c09864fd9e 100644
> +++ b/drivers/infiniband/core/ucma.c
> @@ -160,8 +160,14 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
>  
>  static void ucma_put_ctx(struct ucma_context *ctx)
>  {
> -	if (atomic_dec_and_test(&ctx->ref))
> -		complete(&ctx->comp);
> +	if (!atomic_dec_and_test(&ctx->ref))
> +		return;
> +	/*
> +	 * rdma_destroy_id() ensures that no event handlers are inflight
> +	 * for that id before releasing it.
> +	 */
> +	rdma_destroy_id(ctx->cm_id);
> +	complete(&ctx->comp);
>  }

Since all the refcounting here is basically insane, you can't do this
without creating new kinds of bugs related to lifetime of ctx->cm_id

The call to rdma_destroy_id must be after the xa_erase as other
threads can continue to access the context despite its zero ref via
ucma_get_ctx(() as ucma_get_ctx() is not using refcounts properly.

The xa_erase provides the needed barrier.

Maybe this patch could be fixed if the ucma_get_ctx used an
atomic_inc_not_zero ?

Jason
Bart Van Assche Oct. 1, 2019, 5:13 p.m. UTC | #2
On 10/1/19 8:07 AM, Jason Gunthorpe wrote:
> On Mon, Sep 30, 2019 at 04:16:53PM -0700, Bart Van Assche wrote:
>> Instead of calling rdma_destroy_id() after waiting for the context completion
>> finished, call rdma_destroy_id() from inside the ucma_put_ctx() function.
>> This patch reduces the number of rdma_destroy_id() calls but does not change
>> the behavior of this code.
>>
>> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
>>   drivers/infiniband/core/ucma.c | 17 ++++++++---------
>>   1 file changed, 8 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
>> index 0274e9b704be..30c09864fd9e 100644
>> +++ b/drivers/infiniband/core/ucma.c
>> @@ -160,8 +160,14 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
>>   
>>   static void ucma_put_ctx(struct ucma_context *ctx)
>>   {
>> -	if (atomic_dec_and_test(&ctx->ref))
>> -		complete(&ctx->comp);
>> +	if (!atomic_dec_and_test(&ctx->ref))
>> +		return;
>> +	/*
>> +	 * rdma_destroy_id() ensures that no event handlers are inflight
>> +	 * for that id before releasing it.
>> +	 */
>> +	rdma_destroy_id(ctx->cm_id);
>> +	complete(&ctx->comp);
>>   }
> 
> Since all the refcounting here is basically insane, you can't do this
> without creating new kinds of bugs related to lifetime of ctx->cm_id
> 
> The call to rdma_destroy_id must be after the xa_erase as other
> threads can continue to access the context despite its zero ref via
> ucma_get_ctx(() as ucma_get_ctx() is not using refcounts properly.
> 
> The xa_erase provides the needed barrier.
> 
> Maybe this patch could be fixed if the ucma_get_ctx used an
> atomic_inc_not_zero ?

Hi Jason,

Since I'm not an ucma expert, maybe it's better that I drop this patch.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 0274e9b704be..30c09864fd9e 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -160,8 +160,14 @@  static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
 
 static void ucma_put_ctx(struct ucma_context *ctx)
 {
-	if (atomic_dec_and_test(&ctx->ref))
-		complete(&ctx->comp);
+	if (!atomic_dec_and_test(&ctx->ref))
+		return;
+	/*
+	 * rdma_destroy_id() ensures that no event handlers are inflight
+	 * for that id before releasing it.
+	 */
+	rdma_destroy_id(ctx->cm_id);
+	complete(&ctx->comp);
 }
 
 /*
@@ -199,8 +205,6 @@  static void ucma_close_id(struct work_struct *work)
 	 */
 	ucma_put_ctx(ctx);
 	wait_for_completion(&ctx->comp);
-	/* No new events will be generated after destroying the id. */
-	rdma_destroy_id(ctx->cm_id);
 }
 
 static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -628,7 +632,6 @@  static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
 		xa_unlock(&ctx_table);
 		ucma_put_ctx(ctx);
 		wait_for_completion(&ctx->comp);
-		rdma_destroy_id(ctx->cm_id);
 	} else {
 		xa_unlock(&ctx_table);
 	}
@@ -1756,10 +1759,6 @@  static int ucma_close(struct inode *inode, struct file *filp)
 			xa_unlock(&ctx_table);
 			ucma_put_ctx(ctx);
 			wait_for_completion(&ctx->comp);
-			/* rdma_destroy_id ensures that no event handlers are
-			 * inflight for that id before releasing it.
-			 */
-			rdma_destroy_id(ctx->cm_id);
 		} else {
 			xa_unlock(&ctx_table);
 		}