Message ID | 1558031213-14219-1-git-send-email-aditr@vmware.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | [rdma-core] vmw_pvrdma: Use resource ids from physical device if available | expand |
On Thu, May 16, 2019 at 06:27:06PM +0000, Adit Ranadive wrote: > From: Bryan Tan <bryantan@vmware.com> > > This change allows user-space to use physical resource numbers if > they are passed up from the driver. Doing so allows communication with > physical non-ESX endpoints (such as a bare-metal Linux machine or a > SR-IOV-enabled VM). > > This is accomplished by separating the concept of the QP number from > the QP handle. Previously, the two were the same, as the QP number > was exposed to the guest and also used to reference a virtual QP in > the device backend. With physical resource numbers exposed, the QP > number given to the guest is the QP number assigned to the physical > HCA's QP, while the QP handle is still the internal handle used to > reference a virtual QP. Regardless of whether the device is exposing > physical ids, the driver will still try to pick up the QP handle from > the backend if possible. > > The MR keys exposed to the guest when the physical resource ids feature > is turned on are likewise now the MR keys created by the physical HCA, > instead of virtual MR keys. > > The ABI has been updated to allow the return of the QP handle to the > guest library. The ABI version has been bumped up because of this > non-compatible change. > > Reviewed-by: Jorgen Hansen <jhansen@vmware.com> > Signed-off-by: Adit Ranadive <aditr@vmware.com> > Signed-off-by: Bryan Tan <bryantan@vmware.com> > kernel-headers/rdma/vmw_pvrdma-abi.h | 11 ++++++++++- > providers/vmw_pvrdma/pvrdma-abi.h | 4 +++- > providers/vmw_pvrdma/pvrdma.h | 1 + > providers/vmw_pvrdma/pvrdma_main.c | 4 ++-- > providers/vmw_pvrdma/qp.c | 31 ++++++++++++++++++++----------- > 5 files changed, 36 insertions(+), 15 deletions(-) > > Github PR: > https://github.com/linux-rdma/rdma-core/pull/531 > > diff --git a/kernel-headers/rdma/vmw_pvrdma-abi.h b/kernel-headers/rdma/vmw_pvrdma-abi.h > index 6e73f0274e41..8c388d623e5c 100644 > +++ b/kernel-headers/rdma/vmw_pvrdma-abi.h > @@ -49,7 +49,11 @@ > > #include <linux/types.h> > > -#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */ > +#define PVRDMA_UVERBS_MIN_ABI_VERSION 3 > +#define PVRDMA_UVERBS_MAX_ABI_VERSION 4 > + > +#define PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION 3 > + > #define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */ > #define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */ > #define PVRDMA_UAR_QP_SEND (1 << 30) /* Send bit. */ > @@ -179,6 +183,11 @@ struct pvrdma_create_qp { > __aligned_u64 qp_addr; > }; > > +struct pvrdma_create_qp_resp { > + __u32 qpn; > + __u32 qp_handle; > +}; > + > /* PVRDMA masked atomic compare and swap */ > struct pvrdma_ex_cmp_swap { > __aligned_u64 swap_val; > diff --git a/providers/vmw_pvrdma/pvrdma-abi.h b/providers/vmw_pvrdma/pvrdma-abi.h > index 77db9ddd1bb7..9775925f8555 100644 > +++ b/providers/vmw_pvrdma/pvrdma-abi.h > @@ -54,8 +54,10 @@ DECLARE_DRV_CMD(user_pvrdma_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, > empty, pvrdma_alloc_pd_resp); > DECLARE_DRV_CMD(user_pvrdma_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, > pvrdma_create_cq, pvrdma_create_cq_resp); > -DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP, > +DECLARE_DRV_CMD(user_pvrdma_create_qp_v3, IB_USER_VERBS_CMD_CREATE_QP, > pvrdma_create_qp, empty); > +DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP, > + pvrdma_create_qp, pvrdma_create_qp_resp); > DECLARE_DRV_CMD(user_pvrdma_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, > pvrdma_create_srq, pvrdma_create_srq_resp); > DECLARE_DRV_CMD(user_pvrdma_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, > diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h > index ebd50ce1c3cd..b67c07e94f90 100644 > +++ b/providers/vmw_pvrdma/pvrdma.h > @@ -170,6 +170,7 @@ struct pvrdma_qp { > struct pvrdma_wq sq; > struct pvrdma_wq rq; > int is_srq; > + uint32_t qp_handle; > }; > > struct pvrdma_ah { > diff --git a/providers/vmw_pvrdma/pvrdma_main.c b/providers/vmw_pvrdma/pvrdma_main.c > index 52a2de22d44c..616310ae45c5 100644 > +++ b/providers/vmw_pvrdma/pvrdma_main.c > @@ -201,8 +201,8 @@ static const struct verbs_match_ent hca_table[] = { > > static const struct verbs_device_ops pvrdma_dev_ops = { > .name = "pvrdma", > - .match_min_abi_version = PVRDMA_UVERBS_ABI_VERSION, > - .match_max_abi_version = PVRDMA_UVERBS_ABI_VERSION, > + .match_min_abi_version = PVRDMA_UVERBS_MIN_ABI_VERSION, > + .match_max_abi_version = PVRDMA_UVERBS_MAX_ABI_VERSION, > .match_table = hca_table, > .alloc_device = pvrdma_device_alloc, > .uninit_device = pvrdma_uninit_device, > diff --git a/providers/vmw_pvrdma/qp.c b/providers/vmw_pvrdma/qp.c > index ef429db93a43..a173d441df0d 100644 > +++ b/providers/vmw_pvrdma/qp.c > @@ -211,9 +211,9 @@ struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd, > { > struct pvrdma_device *dev = to_vdev(pd->context->device); > struct user_pvrdma_create_qp cmd; > - struct ib_uverbs_create_qp_resp resp; > + struct user_pvrdma_create_qp_resp resp; > + struct user_pvrdma_create_qp_v3_resp resp_v3; > struct pvrdma_qp *qp; > - int ret; > int is_srq = !!(attr->srq); > > attr->cap.max_send_sge = max_t(uint32_t, 1U, attr->cap.max_send_sge); > @@ -282,14 +282,23 @@ struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd, > cmd.rbuf_size = qp->rbuf.length; > cmd.qp_addr = (uintptr_t) qp; > > - ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, > - &cmd.ibv_cmd, sizeof(cmd), > - &resp, sizeof(resp)); > + if (dev->abi_version <= PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION) { > + if (ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, > + sizeof(cmd), &resp_v3.ibv_resp, > + sizeof(resp_v3.ibv_resp))) And here just pass in the version with the longer size and provide some in-band way to tell if the kernel wrote to it. Jason
diff --git a/kernel-headers/rdma/vmw_pvrdma-abi.h b/kernel-headers/rdma/vmw_pvrdma-abi.h index 6e73f0274e41..8c388d623e5c 100644 --- a/kernel-headers/rdma/vmw_pvrdma-abi.h +++ b/kernel-headers/rdma/vmw_pvrdma-abi.h @@ -49,7 +49,11 @@ #include <linux/types.h> -#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */ +#define PVRDMA_UVERBS_MIN_ABI_VERSION 3 +#define PVRDMA_UVERBS_MAX_ABI_VERSION 4 + +#define PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION 3 + #define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */ #define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */ #define PVRDMA_UAR_QP_SEND (1 << 30) /* Send bit. */ @@ -179,6 +183,11 @@ struct pvrdma_create_qp { __aligned_u64 qp_addr; }; +struct pvrdma_create_qp_resp { + __u32 qpn; + __u32 qp_handle; +}; + /* PVRDMA masked atomic compare and swap */ struct pvrdma_ex_cmp_swap { __aligned_u64 swap_val; diff --git a/providers/vmw_pvrdma/pvrdma-abi.h b/providers/vmw_pvrdma/pvrdma-abi.h index 77db9ddd1bb7..9775925f8555 100644 --- a/providers/vmw_pvrdma/pvrdma-abi.h +++ b/providers/vmw_pvrdma/pvrdma-abi.h @@ -54,8 +54,10 @@ DECLARE_DRV_CMD(user_pvrdma_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty, pvrdma_alloc_pd_resp); DECLARE_DRV_CMD(user_pvrdma_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, pvrdma_create_cq, pvrdma_create_cq_resp); -DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP, +DECLARE_DRV_CMD(user_pvrdma_create_qp_v3, IB_USER_VERBS_CMD_CREATE_QP, pvrdma_create_qp, empty); +DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP, + pvrdma_create_qp, pvrdma_create_qp_resp); DECLARE_DRV_CMD(user_pvrdma_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, pvrdma_create_srq, pvrdma_create_srq_resp); DECLARE_DRV_CMD(user_pvrdma_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT, diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h index ebd50ce1c3cd..b67c07e94f90 100644 --- a/providers/vmw_pvrdma/pvrdma.h +++ b/providers/vmw_pvrdma/pvrdma.h @@ -170,6 +170,7 @@ struct pvrdma_qp { struct pvrdma_wq sq; struct pvrdma_wq rq; int is_srq; + uint32_t qp_handle; }; struct pvrdma_ah { diff --git a/providers/vmw_pvrdma/pvrdma_main.c b/providers/vmw_pvrdma/pvrdma_main.c index 52a2de22d44c..616310ae45c5 100644 --- a/providers/vmw_pvrdma/pvrdma_main.c +++ b/providers/vmw_pvrdma/pvrdma_main.c @@ -201,8 +201,8 @@ static const struct verbs_match_ent hca_table[] = { static const struct verbs_device_ops pvrdma_dev_ops = { .name = "pvrdma", - .match_min_abi_version = PVRDMA_UVERBS_ABI_VERSION, - .match_max_abi_version = PVRDMA_UVERBS_ABI_VERSION, + .match_min_abi_version = PVRDMA_UVERBS_MIN_ABI_VERSION, + .match_max_abi_version = PVRDMA_UVERBS_MAX_ABI_VERSION, .match_table = hca_table, .alloc_device = pvrdma_device_alloc, .uninit_device = pvrdma_uninit_device, diff --git a/providers/vmw_pvrdma/qp.c b/providers/vmw_pvrdma/qp.c index ef429db93a43..a173d441df0d 100644 --- a/providers/vmw_pvrdma/qp.c +++ b/providers/vmw_pvrdma/qp.c @@ -211,9 +211,9 @@ struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd, { struct pvrdma_device *dev = to_vdev(pd->context->device); struct user_pvrdma_create_qp cmd; - struct ib_uverbs_create_qp_resp resp; + struct user_pvrdma_create_qp_resp resp; + struct user_pvrdma_create_qp_v3_resp resp_v3; struct pvrdma_qp *qp; - int ret; int is_srq = !!(attr->srq); attr->cap.max_send_sge = max_t(uint32_t, 1U, attr->cap.max_send_sge); @@ -282,14 +282,23 @@ struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd, cmd.rbuf_size = qp->rbuf.length; cmd.qp_addr = (uintptr_t) qp; - ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, - &cmd.ibv_cmd, sizeof(cmd), - &resp, sizeof(resp)); + if (dev->abi_version <= PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION) { + if (ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp_v3.ibv_resp, + sizeof(resp_v3.ibv_resp))) + goto err_free; - if (ret) - goto err_free; + qp->qp_handle = qp->ibv_qp.qp_num; + } else { + if (ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, + sizeof(cmd), &resp.ibv_resp, + sizeof(resp))) + goto err_free; + + qp->qp_handle = resp.drv_payload.qp_handle; + } - to_vctx(pd->context)->qp_tbl[qp->ibv_qp.qp_num & 0xFFFF] = qp; + to_vctx(pd->context)->qp_tbl[qp->qp_handle & 0xFFFF] = qp; /* If set, each WR submitted to the SQ generate a completion entry */ if (attr->sq_sig_all) @@ -414,7 +423,7 @@ int pvrdma_destroy_qp(struct ibv_qp *ibqp) free(qp->rq.wrid); pvrdma_free_buf(&qp->rbuf); pvrdma_free_buf(&qp->sbuf); - ctx->qp_tbl[ibqp->qp_num & 0xFFFF] = NULL; + ctx->qp_tbl[qp->qp_handle & 0xFFFF] = NULL; free(qp); return 0; @@ -547,7 +556,7 @@ out: if (nreq) { udma_to_device_barrier(); pvrdma_write_uar_qp(ctx->uar, - PVRDMA_UAR_QP_SEND | ibqp->qp_num); + PVRDMA_UAR_QP_SEND | qp->qp_handle); } pthread_spin_unlock(&qp->sq.lock); @@ -630,7 +639,7 @@ int pvrdma_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, out: if (nreq) pvrdma_write_uar_qp(ctx->uar, - PVRDMA_UAR_QP_RECV | ibqp->qp_num); + PVRDMA_UAR_QP_RECV | qp->qp_handle); pthread_spin_unlock(&qp->rq.lock); return ret;