diff mbox series

[v2,1/5] RDMA/rxe: Remove unnecessary check for qp->is_user/cq->is_user

Message ID 20210902084640.679744-2-yangx.jy@fujitsu.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series RDMA/rxe: Do some cleanup | expand

Commit Message

Xiao Yang Sept. 2, 2021, 8:46 a.m. UTC
1) post_one_send() always processes kernel's send queue.
2) rxe_poll_cq() always processes kernel's completion queue.

Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space")
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
---
 drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++---------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

Comments

Jason Gunthorpe Sept. 14, 2021, 6:32 p.m. UTC | #1
On Thu, Sep 02, 2021 at 04:46:36PM +0800, Xiao Yang wrote:
> 1) post_one_send() always processes kernel's send queue.
> 2) rxe_poll_cq() always processes kernel's completion queue.
> 
> Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space")
> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
>  drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++---------------------
>  1 file changed, 6 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
> index c223959ac174..cdded9f64910 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
> @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
>  	struct rxe_sq *sq = &qp->sq;
>  	struct rxe_send_wqe *send_wqe;
>  	unsigned long flags;
> -	int full;
>  
>  	err = validate_send_wr(qp, ibwr, mask, length);
>  	if (err)
> @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
>  
>  	spin_lock_irqsave(&qp->sq.sq_lock, flags);
>  
> -	if (qp->is_user)
> -		full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
> -	else
> -		full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
> -
> -	if (unlikely(full)) {
> +	if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) {
>  		spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
>  		return -ENOMEM;
>  	}
>  
> -	if (qp->is_user)
> -		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
> -	else
> -		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
> +	send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
>  
>  	init_send_wqe(qp, ibwr, mask, length, send_wqe);
>  
> -	if (qp->is_user)
> -		advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
> -	else
> -		advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
> +	advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
>  
>  	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);

This bit looks OK
  
> @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
>  
>  	spin_lock_irqsave(&cq->cq_lock, flags);
>  	for (i = 0; i < num_entries; i++) {
> -		if (cq->is_user)
> -			cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
> -		else
> -			cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
> +		cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
>  		if (!cqe)
>  			break;
>  
>  		memcpy(wc++, &cqe->ibwc, sizeof(*wc));
> -		if (cq->is_user)
> -			advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
> -		else
> -			advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
> +
> +		advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
>  	}

But why is this OK?

It is used here:

	.poll_cq = rxe_poll_cq,

Which is part of:

static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
[..]

		ret = ib_poll_cq(cq, 1, &wc);

That is used called?

Jason
Xiao Yang Sept. 16, 2021, 9:16 a.m. UTC | #2
On 2021/9/15 2:32, Jason Gunthorpe wrote:
> On Thu, Sep 02, 2021 at 04:46:36PM +0800, Xiao Yang wrote:
>> 1) post_one_send() always processes kernel's send queue.
>> 2) rxe_poll_cq() always processes kernel's completion queue.
>>
>> Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space")
>> Signed-off-by: Xiao Yang<yangx.jy@fujitsu.com>
>>   drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++---------------------
>>   1 file changed, 6 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
>> index c223959ac174..cdded9f64910 100644
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
>> @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
>>   	struct rxe_sq *sq =&qp->sq;
>>   	struct rxe_send_wqe *send_wqe;
>>   	unsigned long flags;
>> -	int full;
>>
>>   	err = validate_send_wr(qp, ibwr, mask, length);
>>   	if (err)
>> @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
>>
>>   	spin_lock_irqsave(&qp->sq.sq_lock, flags);
>>
>> -	if (qp->is_user)
>> -		full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
>> -	else
>> -		full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
>> -
>> -	if (unlikely(full)) {
>> +	if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) {
>>   		spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
>>   		return -ENOMEM;
>>   	}
>>
>> -	if (qp->is_user)
>> -		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
>> -	else
>> -		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
>> +	send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
>>
>>   	init_send_wqe(qp, ibwr, mask, length, send_wqe);
>>
>> -	if (qp->is_user)
>> -		advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
>> -	else
>> -		advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
>> +	advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
>>
>>   	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
> This bit looks OK
>
>> @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
>>
>>   	spin_lock_irqsave(&cq->cq_lock, flags);
>>   	for (i = 0; i<  num_entries; i++) {
>> -		if (cq->is_user)
>> -			cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
>> -		else
>> -			cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
>> +		cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
>>   		if (!cqe)
>>   			break;
>>
>>   		memcpy(wc++,&cqe->ibwc, sizeof(*wc));
>> -		if (cq->is_user)
>> -			advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
>> -		else
>> -			advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
>> +
>> +		advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
>>   	}
> But why is this OK?
>
> It is used here:
>
> 	.poll_cq = rxe_poll_cq,
>
> Which is part of:
>
> static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
> [..]
>
> 		ret = ib_poll_cq(cq, 1,&wc);
>
> That is used called?
Hi Jason,

ib_uverbs_poll_cq() is called by ibv_cmd_poll_cq() in userspace but rxe 
uses its own rxe_poll_cq() instead.
See the following code in rdma-core:
-------------------------------------------------------------------
libibverbs/cmd.c:
int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
{
     ...
     ret = execute_cmd_write_no_uhw(ibcq->context, 
IB_USER_VERBS_CMD_POLL_CQ, &cmd, sizeof(cmd), resp, rsize);
     ...
}

providers/rxe/rxe.c:
...
.poll_cq = rxe_poll_cq,
...
-------------------------------------------------------------------
In this case, rxe has no chance to call ib_uverbs_poll_cq from userspace.

rxe_poll_cq() can also be called by kthread, like this:
--------------------------------------------------------------
[ 1247.060587] Call Trace:
[ 1247.060592]  __ib_process_cq+0x57/0x150 [ib_core]
[ 1247.060633]  ib_cq_poll_work+0x26/0x80 [ib_core]
[ 1247.060671]  process_one_work+0x1ec/0x390
[ 1247.060680]  worker_thread+0x50/0x3a0
[ 1247.060687]  ? process_one_work+0x390/0x390
[ 1247.060695]  kthread+0x127/0x150
[ 1247.060701]  ? set_kthread_struct+0x40/0x40
[ 1247.060706]  ret_from_fork+0x22/0x30
[ 1247.060713] ---[ end trace 24a7d2217da4f2b5 ]---
--------------------------------------------------------------

By the way,  I think the following code also indicates that 
rxe_poll_cq() is always processed by kernel.
----------------------------------------------------------------------------------
memcpy(wc++, &cqe->ibwc, sizeof(*wc));
----------------------------------------------------------------------------------

Best Regards,
Xiao Yang
> Jason
Jason Gunthorpe Sept. 16, 2021, 1:22 p.m. UTC | #3
On Thu, Sep 16, 2021 at 09:16:35AM +0000, yangx.jy@fujitsu.com wrote:
> On 2021/9/15 2:32, Jason Gunthorpe wrote:
> > On Thu, Sep 02, 2021 at 04:46:36PM +0800, Xiao Yang wrote:
> >> 1) post_one_send() always processes kernel's send queue.
> >> 2) rxe_poll_cq() always processes kernel's completion queue.
> >>
> >> Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space")
> >> Signed-off-by: Xiao Yang<yangx.jy@fujitsu.com>
> >>   drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++---------------------
> >>   1 file changed, 6 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
> >> index c223959ac174..cdded9f64910 100644
> >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
> >> @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
> >>   	struct rxe_sq *sq =&qp->sq;
> >>   	struct rxe_send_wqe *send_wqe;
> >>   	unsigned long flags;
> >> -	int full;
> >>
> >>   	err = validate_send_wr(qp, ibwr, mask, length);
> >>   	if (err)
> >> @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
> >>
> >>   	spin_lock_irqsave(&qp->sq.sq_lock, flags);
> >>
> >> -	if (qp->is_user)
> >> -		full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
> >> -	else
> >> -		full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
> >> -
> >> -	if (unlikely(full)) {
> >> +	if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) {
> >>   		spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
> >>   		return -ENOMEM;
> >>   	}
> >>
> >> -	if (qp->is_user)
> >> -		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
> >> -	else
> >> -		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
> >> +	send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
> >>
> >>   	init_send_wqe(qp, ibwr, mask, length, send_wqe);
> >>
> >> -	if (qp->is_user)
> >> -		advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
> >> -	else
> >> -		advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
> >> +	advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
> >>
> >>   	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
> > This bit looks OK
> >
> >> @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
> >>
> >>   	spin_lock_irqsave(&cq->cq_lock, flags);
> >>   	for (i = 0; i<  num_entries; i++) {
> >> -		if (cq->is_user)
> >> -			cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
> >> -		else
> >> -			cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
> >> +		cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
> >>   		if (!cqe)
> >>   			break;
> >>
> >>   		memcpy(wc++,&cqe->ibwc, sizeof(*wc));
> >> -		if (cq->is_user)
> >> -			advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
> >> -		else
> >> -			advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
> >> +
> >> +		advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
> >>   	}
> > But why is this OK?
> >
> > It is used here:
> >
> > 	.poll_cq = rxe_poll_cq,
> >
> > Which is part of:
> >
> > static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
> > [..]
> >
> > 		ret = ib_poll_cq(cq, 1,&wc);
> >
> > That is used called?
> Hi Jason,
> 
> ib_uverbs_poll_cq() is called by ibv_cmd_poll_cq() in userspace but rxe 
> uses its own rxe_poll_cq() instead.
> See the following code in rdma-core:

Yes, but rdma-core doesn't matter.

The question is why is this safe and the reason is rxe doesn't set
IB_USER_VERBS_CMD_POLL_CQ in uverbs_cmd_mask.

I'd be a bit happier seeing this fixed so we have a poll_kernel_cq
poll_user_cq op and this isn't so tricky.

Jason
Xiao Yang Sept. 17, 2021, 6 a.m. UTC | #4
On 2021/9/16 21:22, Jason Gunthorpe wrote:
> Yes, but rdma-core doesn't matter.
>
> The question is why is this safe and the reason is rxe doesn't set
> IB_USER_VERBS_CMD_POLL_CQ in uverbs_cmd_mask.
>
> I'd be a bit happier seeing this fixed so we have a poll_kernel_cq
> poll_user_cq op and this isn't so tricky.
Hi Jason,

I saw you removed IB_USER_VERBS_CMD_POLL_CQ from uverbs_cmd_mask by
commit 628c02bf38aa ("RDMA: Remove uverbs cmds from drivers that don't 
use them")

I think I don't need to update the patch, right? :-)

Best Regards,
Xiao Yang
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index c223959ac174..cdded9f64910 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -632,7 +632,6 @@  static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 	struct rxe_sq *sq = &qp->sq;
 	struct rxe_send_wqe *send_wqe;
 	unsigned long flags;
-	int full;
 
 	err = validate_send_wr(qp, ibwr, mask, length);
 	if (err)
@@ -640,27 +639,16 @@  static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 
 	spin_lock_irqsave(&qp->sq.sq_lock, flags);
 
-	if (qp->is_user)
-		full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
-	else
-		full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
-
-	if (unlikely(full)) {
+	if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) {
 		spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 		return -ENOMEM;
 	}
 
-	if (qp->is_user)
-		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
-	else
-		send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
+	send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
 
 	init_send_wqe(qp, ibwr, mask, length, send_wqe);
 
-	if (qp->is_user)
-		advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
-	else
-		advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
+	advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
 
 	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 
@@ -852,18 +840,13 @@  static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 
 	spin_lock_irqsave(&cq->cq_lock, flags);
 	for (i = 0; i < num_entries; i++) {
-		if (cq->is_user)
-			cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
-		else
-			cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
+		cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
 		if (!cqe)
 			break;
 
 		memcpy(wc++, &cqe->ibwc, sizeof(*wc));
-		if (cq->is_user)
-			advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
-		else
-			advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
+
+		advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
 	}
 	spin_unlock_irqrestore(&cq->cq_lock, flags);