Message ID | 20210902084640.679744-2-yangx.jy@fujitsu.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | RDMA/rxe: Do some cleanup | expand |
On Thu, Sep 02, 2021 at 04:46:36PM +0800, Xiao Yang wrote: > 1) post_one_send() always processes kernel's send queue. > 2) rxe_poll_cq() always processes kernel's completion queue. > > Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space") > Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> > drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++--------------------- > 1 file changed, 6 insertions(+), 23 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c > index c223959ac174..cdded9f64910 100644 > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c > @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, > struct rxe_sq *sq = &qp->sq; > struct rxe_send_wqe *send_wqe; > unsigned long flags; > - int full; > > err = validate_send_wr(qp, ibwr, mask, length); > if (err) > @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, > > spin_lock_irqsave(&qp->sq.sq_lock, flags); > > - if (qp->is_user) > - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); > - else > - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); > - > - if (unlikely(full)) { > + if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) { > spin_unlock_irqrestore(&qp->sq.sq_lock, flags); > return -ENOMEM; > } > > - if (qp->is_user) > - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); > - else > - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); > + send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); > > init_send_wqe(qp, ibwr, mask, length, send_wqe); > > - if (qp->is_user) > - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); > - else > - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); > + advance_producer(sq->queue, QUEUE_TYPE_KERNEL); > > spin_unlock_irqrestore(&qp->sq.sq_lock, flags); This bit looks OK > @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) > > spin_lock_irqsave(&cq->cq_lock, flags); > for (i = 0; i < num_entries; i++) { > - if (cq->is_user) > - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); > - else > - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); > + cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); > if (!cqe) > break; > > memcpy(wc++, &cqe->ibwc, sizeof(*wc)); > - if (cq->is_user) > - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); > - else > - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); > + > + advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); > } But why is this OK? It is used here: .poll_cq = rxe_poll_cq, Which is part of: static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) [..] ret = ib_poll_cq(cq, 1, &wc); That is used called? Jason
On 2021/9/15 2:32, Jason Gunthorpe wrote: > On Thu, Sep 02, 2021 at 04:46:36PM +0800, Xiao Yang wrote: >> 1) post_one_send() always processes kernel's send queue. >> 2) rxe_poll_cq() always processes kernel's completion queue. >> >> Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space") >> Signed-off-by: Xiao Yang<yangx.jy@fujitsu.com> >> drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++--------------------- >> 1 file changed, 6 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c >> index c223959ac174..cdded9f64910 100644 >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c >> @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, >> struct rxe_sq *sq =&qp->sq; >> struct rxe_send_wqe *send_wqe; >> unsigned long flags; >> - int full; >> >> err = validate_send_wr(qp, ibwr, mask, length); >> if (err) >> @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, >> >> spin_lock_irqsave(&qp->sq.sq_lock, flags); >> >> - if (qp->is_user) >> - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); >> - else >> - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); >> - >> - if (unlikely(full)) { >> + if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) { >> spin_unlock_irqrestore(&qp->sq.sq_lock, flags); >> return -ENOMEM; >> } >> >> - if (qp->is_user) >> - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); >> - else >> - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); >> + send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); >> >> init_send_wqe(qp, ibwr, mask, length, send_wqe); >> >> - if (qp->is_user) >> - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); >> - else >> - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); >> + advance_producer(sq->queue, QUEUE_TYPE_KERNEL); >> >> spin_unlock_irqrestore(&qp->sq.sq_lock, flags); > This bit looks OK > >> @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) >> >> spin_lock_irqsave(&cq->cq_lock, flags); >> for (i = 0; i< num_entries; i++) { >> - if (cq->is_user) >> - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); >> - else >> - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); >> + cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); >> if (!cqe) >> break; >> >> memcpy(wc++,&cqe->ibwc, sizeof(*wc)); >> - if (cq->is_user) >> - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); >> - else >> - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); >> + >> + advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); >> } > But why is this OK? > > It is used here: > > .poll_cq = rxe_poll_cq, > > Which is part of: > > static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) > [..] > > ret = ib_poll_cq(cq, 1,&wc); > > That is used called? Hi Jason, ib_uverbs_poll_cq() is called by ibv_cmd_poll_cq() in userspace but rxe uses its own rxe_poll_cq() instead. See the following code in rdma-core: ------------------------------------------------------------------- libibverbs/cmd.c: int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) { ... ret = execute_cmd_write_no_uhw(ibcq->context, IB_USER_VERBS_CMD_POLL_CQ, &cmd, sizeof(cmd), resp, rsize); ... } providers/rxe/rxe.c: ... .poll_cq = rxe_poll_cq, ... ------------------------------------------------------------------- In this case, rxe has no chance to call ib_uverbs_poll_cq from userspace. rxe_poll_cq() can also be called by kthread, like this: -------------------------------------------------------------- [ 1247.060587] Call Trace: [ 1247.060592] __ib_process_cq+0x57/0x150 [ib_core] [ 1247.060633] ib_cq_poll_work+0x26/0x80 [ib_core] [ 1247.060671] process_one_work+0x1ec/0x390 [ 1247.060680] worker_thread+0x50/0x3a0 [ 1247.060687] ? process_one_work+0x390/0x390 [ 1247.060695] kthread+0x127/0x150 [ 1247.060701] ? set_kthread_struct+0x40/0x40 [ 1247.060706] ret_from_fork+0x22/0x30 [ 1247.060713] ---[ end trace 24a7d2217da4f2b5 ]--- -------------------------------------------------------------- By the way, I think the following code also indicates that rxe_poll_cq() is always processed by kernel. ---------------------------------------------------------------------------------- memcpy(wc++, &cqe->ibwc, sizeof(*wc)); ---------------------------------------------------------------------------------- Best Regards, Xiao Yang > Jason
On Thu, Sep 16, 2021 at 09:16:35AM +0000, yangx.jy@fujitsu.com wrote: > On 2021/9/15 2:32, Jason Gunthorpe wrote: > > On Thu, Sep 02, 2021 at 04:46:36PM +0800, Xiao Yang wrote: > >> 1) post_one_send() always processes kernel's send queue. > >> 2) rxe_poll_cq() always processes kernel's completion queue. > >> > >> Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space") > >> Signed-off-by: Xiao Yang<yangx.jy@fujitsu.com> > >> drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++--------------------- > >> 1 file changed, 6 insertions(+), 23 deletions(-) > >> > >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c > >> index c223959ac174..cdded9f64910 100644 > >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c > >> @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, > >> struct rxe_sq *sq =&qp->sq; > >> struct rxe_send_wqe *send_wqe; > >> unsigned long flags; > >> - int full; > >> > >> err = validate_send_wr(qp, ibwr, mask, length); > >> if (err) > >> @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, > >> > >> spin_lock_irqsave(&qp->sq.sq_lock, flags); > >> > >> - if (qp->is_user) > >> - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); > >> - else > >> - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); > >> - > >> - if (unlikely(full)) { > >> + if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) { > >> spin_unlock_irqrestore(&qp->sq.sq_lock, flags); > >> return -ENOMEM; > >> } > >> > >> - if (qp->is_user) > >> - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); > >> - else > >> - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); > >> + send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); > >> > >> init_send_wqe(qp, ibwr, mask, length, send_wqe); > >> > >> - if (qp->is_user) > >> - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); > >> - else > >> - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); > >> + advance_producer(sq->queue, QUEUE_TYPE_KERNEL); > >> > >> spin_unlock_irqrestore(&qp->sq.sq_lock, flags); > > This bit looks OK > > > >> @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) > >> > >> spin_lock_irqsave(&cq->cq_lock, flags); > >> for (i = 0; i< num_entries; i++) { > >> - if (cq->is_user) > >> - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); > >> - else > >> - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); > >> + cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); > >> if (!cqe) > >> break; > >> > >> memcpy(wc++,&cqe->ibwc, sizeof(*wc)); > >> - if (cq->is_user) > >> - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); > >> - else > >> - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); > >> + > >> + advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); > >> } > > But why is this OK? > > > > It is used here: > > > > .poll_cq = rxe_poll_cq, > > > > Which is part of: > > > > static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) > > [..] > > > > ret = ib_poll_cq(cq, 1,&wc); > > > > That is used called? > Hi Jason, > > ib_uverbs_poll_cq() is called by ibv_cmd_poll_cq() in userspace but rxe > uses its own rxe_poll_cq() instead. > See the following code in rdma-core: Yes, but rdma-core doesn't matter. The question is why is this safe and the reason is rxe doesn't set IB_USER_VERBS_CMD_POLL_CQ in uverbs_cmd_mask. I'd be a bit happier seeing this fixed so we have a poll_kernel_cq poll_user_cq op and this isn't so tricky. Jason
On 2021/9/16 21:22, Jason Gunthorpe wrote: > Yes, but rdma-core doesn't matter. > > The question is why is this safe and the reason is rxe doesn't set > IB_USER_VERBS_CMD_POLL_CQ in uverbs_cmd_mask. > > I'd be a bit happier seeing this fixed so we have a poll_kernel_cq > poll_user_cq op and this isn't so tricky. Hi Jason, I saw you removed IB_USER_VERBS_CMD_POLL_CQ from uverbs_cmd_mask by commit 628c02bf38aa ("RDMA: Remove uverbs cmds from drivers that don't use them") I think I don't need to update the patch, right? :-) Best Regards, Xiao Yang > Jason
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index c223959ac174..cdded9f64910 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -632,7 +632,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, struct rxe_sq *sq = &qp->sq; struct rxe_send_wqe *send_wqe; unsigned long flags; - int full; err = validate_send_wr(qp, ibwr, mask, length); if (err) @@ -640,27 +639,16 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, spin_lock_irqsave(&qp->sq.sq_lock, flags); - if (qp->is_user) - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); - else - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); - - if (unlikely(full)) { + if (unlikely(queue_full(sq->queue, QUEUE_TYPE_KERNEL))) { spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return -ENOMEM; } - if (qp->is_user) - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); - else - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); + send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (qp->is_user) - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); - else - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); + advance_producer(sq->queue, QUEUE_TYPE_KERNEL); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -852,18 +840,13 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { - if (cq->is_user) - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); - else - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); + cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); if (!cqe) break; memcpy(wc++, &cqe->ibwc, sizeof(*wc)); - if (cq->is_user) - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); - else - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); + + advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); } spin_unlock_irqrestore(&cq->cq_lock, flags);
1) post_one_send() always processes kernel's send queue. 2) rxe_poll_cq() always processes kernel's completion queue. Fixes: 5bcf5a59c41e ("RDMA/rxe: Protext kernel index from user space") Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> --- drivers/infiniband/sw/rxe/rxe_verbs.c | 29 ++++++--------------------- 1 file changed, 6 insertions(+), 23 deletions(-)