diff mbox series

[rdma-core] vmw_pvrdma: Use resource ids from physical device if available

Message ID 1558031213-14219-1-git-send-email-aditr@vmware.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series [rdma-core] vmw_pvrdma: Use resource ids from physical device if available | expand

Commit Message

Adit Ranadive May 16, 2019, 6:27 p.m. UTC
From: Bryan Tan <bryantan@vmware.com>

This change allows user-space to use physical resource numbers if
they are passed up from the driver. Doing so allows communication with
physical non-ESX endpoints (such as a bare-metal Linux machine or a
SR-IOV-enabled VM).

This is accomplished by separating the concept of the QP number from
the QP handle. Previously, the two were the same, as the QP number
was exposed to the guest and also used to reference  a virtual QP in
the device backend. With physical resource numbers exposed, the QP
number given to the guest is the QP number assigned to the physical
HCA's QP, while the QP handle is still the internal handle used to
reference a virtual QP. Regardless of whether the device is exposing
physical ids, the driver will still try to pick up the QP handle from
the backend if possible.

The MR keys exposed to the guest when the physical resource ids feature
is turned on are likewise now the MR keys created by the physical HCA,
instead of virtual MR keys.

The ABI has been updated to allow the return of the QP handle to the
guest library. The ABI version has been bumped up because of this
non-compatible change.

Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Bryan Tan <bryantan@vmware.com>
---
 kernel-headers/rdma/vmw_pvrdma-abi.h | 11 ++++++++++-
 providers/vmw_pvrdma/pvrdma-abi.h    |  4 +++-
 providers/vmw_pvrdma/pvrdma.h        |  1 +
 providers/vmw_pvrdma/pvrdma_main.c   |  4 ++--
 providers/vmw_pvrdma/qp.c            | 31 ++++++++++++++++++++-----------
 5 files changed, 36 insertions(+), 15 deletions(-)
---

Github PR:
https://github.com/linux-rdma/rdma-core/pull/531

---

Comments

Jason Gunthorpe May 16, 2019, 6:29 p.m. UTC | #1
On Thu, May 16, 2019 at 06:27:06PM +0000, Adit Ranadive wrote:
> From: Bryan Tan <bryantan@vmware.com>
> 
> This change allows user-space to use physical resource numbers if
> they are passed up from the driver. Doing so allows communication with
> physical non-ESX endpoints (such as a bare-metal Linux machine or a
> SR-IOV-enabled VM).
> 
> This is accomplished by separating the concept of the QP number from
> the QP handle. Previously, the two were the same, as the QP number
> was exposed to the guest and also used to reference  a virtual QP in
> the device backend. With physical resource numbers exposed, the QP
> number given to the guest is the QP number assigned to the physical
> HCA's QP, while the QP handle is still the internal handle used to
> reference a virtual QP. Regardless of whether the device is exposing
> physical ids, the driver will still try to pick up the QP handle from
> the backend if possible.
> 
> The MR keys exposed to the guest when the physical resource ids feature
> is turned on are likewise now the MR keys created by the physical HCA,
> instead of virtual MR keys.
> 
> The ABI has been updated to allow the return of the QP handle to the
> guest library. The ABI version has been bumped up because of this
> non-compatible change.
> 
> Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
> Signed-off-by: Adit Ranadive <aditr@vmware.com>
> Signed-off-by: Bryan Tan <bryantan@vmware.com>
>  kernel-headers/rdma/vmw_pvrdma-abi.h | 11 ++++++++++-
>  providers/vmw_pvrdma/pvrdma-abi.h    |  4 +++-
>  providers/vmw_pvrdma/pvrdma.h        |  1 +
>  providers/vmw_pvrdma/pvrdma_main.c   |  4 ++--
>  providers/vmw_pvrdma/qp.c            | 31 ++++++++++++++++++++-----------
>  5 files changed, 36 insertions(+), 15 deletions(-)
> 
> Github PR:
> https://github.com/linux-rdma/rdma-core/pull/531
> 
> diff --git a/kernel-headers/rdma/vmw_pvrdma-abi.h b/kernel-headers/rdma/vmw_pvrdma-abi.h
> index 6e73f0274e41..8c388d623e5c 100644
> +++ b/kernel-headers/rdma/vmw_pvrdma-abi.h
> @@ -49,7 +49,11 @@
>  
>  #include <linux/types.h>
>  
> -#define PVRDMA_UVERBS_ABI_VERSION	3		/* ABI Version. */
> +#define PVRDMA_UVERBS_MIN_ABI_VERSION		3
> +#define PVRDMA_UVERBS_MAX_ABI_VERSION		4
> +
> +#define PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION	3
> +
>  #define PVRDMA_UAR_HANDLE_MASK		0x00FFFFFF	/* Bottom 24 bits. */
>  #define PVRDMA_UAR_QP_OFFSET		0		/* QP doorbell. */
>  #define PVRDMA_UAR_QP_SEND		(1 << 30)	/* Send bit. */
> @@ -179,6 +183,11 @@ struct pvrdma_create_qp {
>  	__aligned_u64 qp_addr;
>  };
>  
> +struct pvrdma_create_qp_resp {
> +	__u32 qpn;
> +	__u32 qp_handle;
> +};
> +
>  /* PVRDMA masked atomic compare and swap */
>  struct pvrdma_ex_cmp_swap {
>  	__aligned_u64 swap_val;
> diff --git a/providers/vmw_pvrdma/pvrdma-abi.h b/providers/vmw_pvrdma/pvrdma-abi.h
> index 77db9ddd1bb7..9775925f8555 100644
> +++ b/providers/vmw_pvrdma/pvrdma-abi.h
> @@ -54,8 +54,10 @@ DECLARE_DRV_CMD(user_pvrdma_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
>  		empty, pvrdma_alloc_pd_resp);
>  DECLARE_DRV_CMD(user_pvrdma_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
>  		pvrdma_create_cq, pvrdma_create_cq_resp);
> -DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
> +DECLARE_DRV_CMD(user_pvrdma_create_qp_v3, IB_USER_VERBS_CMD_CREATE_QP,
>  		pvrdma_create_qp, empty);
> +DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
> +		pvrdma_create_qp, pvrdma_create_qp_resp);
>  DECLARE_DRV_CMD(user_pvrdma_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
>  		pvrdma_create_srq, pvrdma_create_srq_resp);
>  DECLARE_DRV_CMD(user_pvrdma_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
> diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h
> index ebd50ce1c3cd..b67c07e94f90 100644
> +++ b/providers/vmw_pvrdma/pvrdma.h
> @@ -170,6 +170,7 @@ struct pvrdma_qp {
>  	struct pvrdma_wq		sq;
>  	struct pvrdma_wq		rq;
>  	int				is_srq;
> +	uint32_t			qp_handle;
>  };
>  
>  struct pvrdma_ah {
> diff --git a/providers/vmw_pvrdma/pvrdma_main.c b/providers/vmw_pvrdma/pvrdma_main.c
> index 52a2de22d44c..616310ae45c5 100644
> +++ b/providers/vmw_pvrdma/pvrdma_main.c
> @@ -201,8 +201,8 @@ static const struct verbs_match_ent hca_table[] = {
>  
>  static const struct verbs_device_ops pvrdma_dev_ops = {
>  	.name = "pvrdma",
> -	.match_min_abi_version = PVRDMA_UVERBS_ABI_VERSION,
> -	.match_max_abi_version = PVRDMA_UVERBS_ABI_VERSION,
> +	.match_min_abi_version = PVRDMA_UVERBS_MIN_ABI_VERSION,
> +	.match_max_abi_version = PVRDMA_UVERBS_MAX_ABI_VERSION,
>  	.match_table = hca_table,
>  	.alloc_device = pvrdma_device_alloc,
>  	.uninit_device = pvrdma_uninit_device,
> diff --git a/providers/vmw_pvrdma/qp.c b/providers/vmw_pvrdma/qp.c
> index ef429db93a43..a173d441df0d 100644
> +++ b/providers/vmw_pvrdma/qp.c
> @@ -211,9 +211,9 @@ struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd,
>  {
>  	struct pvrdma_device *dev = to_vdev(pd->context->device);
>  	struct user_pvrdma_create_qp cmd;
> -	struct ib_uverbs_create_qp_resp resp;
> +	struct user_pvrdma_create_qp_resp resp;
> +	struct user_pvrdma_create_qp_v3_resp resp_v3;
>  	struct pvrdma_qp *qp;
> -	int ret;
>  	int is_srq = !!(attr->srq);
>  
>  	attr->cap.max_send_sge = max_t(uint32_t, 1U, attr->cap.max_send_sge);
> @@ -282,14 +282,23 @@ struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd,
>  	cmd.rbuf_size = qp->rbuf.length;
>  	cmd.qp_addr = (uintptr_t) qp;
>  
> -	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr,
> -				&cmd.ibv_cmd, sizeof(cmd),
> -				&resp, sizeof(resp));
> +	if (dev->abi_version <= PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION) {
> +		if (ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd,
> +				      sizeof(cmd), &resp_v3.ibv_resp,
> +				      sizeof(resp_v3.ibv_resp)))

And here just pass in the version with the longer size and provide
some in-band way to tell if the kernel wrote to it.

Jason
diff mbox series

Patch

diff --git a/kernel-headers/rdma/vmw_pvrdma-abi.h b/kernel-headers/rdma/vmw_pvrdma-abi.h
index 6e73f0274e41..8c388d623e5c 100644
--- a/kernel-headers/rdma/vmw_pvrdma-abi.h
+++ b/kernel-headers/rdma/vmw_pvrdma-abi.h
@@ -49,7 +49,11 @@ 
 
 #include <linux/types.h>
 
-#define PVRDMA_UVERBS_ABI_VERSION	3		/* ABI Version. */
+#define PVRDMA_UVERBS_MIN_ABI_VERSION		3
+#define PVRDMA_UVERBS_MAX_ABI_VERSION		4
+
+#define PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION	3
+
 #define PVRDMA_UAR_HANDLE_MASK		0x00FFFFFF	/* Bottom 24 bits. */
 #define PVRDMA_UAR_QP_OFFSET		0		/* QP doorbell. */
 #define PVRDMA_UAR_QP_SEND		(1 << 30)	/* Send bit. */
@@ -179,6 +183,11 @@  struct pvrdma_create_qp {
 	__aligned_u64 qp_addr;
 };
 
+struct pvrdma_create_qp_resp {
+	__u32 qpn;
+	__u32 qp_handle;
+};
+
 /* PVRDMA masked atomic compare and swap */
 struct pvrdma_ex_cmp_swap {
 	__aligned_u64 swap_val;
diff --git a/providers/vmw_pvrdma/pvrdma-abi.h b/providers/vmw_pvrdma/pvrdma-abi.h
index 77db9ddd1bb7..9775925f8555 100644
--- a/providers/vmw_pvrdma/pvrdma-abi.h
+++ b/providers/vmw_pvrdma/pvrdma-abi.h
@@ -54,8 +54,10 @@  DECLARE_DRV_CMD(user_pvrdma_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD,
 		empty, pvrdma_alloc_pd_resp);
 DECLARE_DRV_CMD(user_pvrdma_create_cq, IB_USER_VERBS_CMD_CREATE_CQ,
 		pvrdma_create_cq, pvrdma_create_cq_resp);
-DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
+DECLARE_DRV_CMD(user_pvrdma_create_qp_v3, IB_USER_VERBS_CMD_CREATE_QP,
 		pvrdma_create_qp, empty);
+DECLARE_DRV_CMD(user_pvrdma_create_qp, IB_USER_VERBS_CMD_CREATE_QP,
+		pvrdma_create_qp, pvrdma_create_qp_resp);
 DECLARE_DRV_CMD(user_pvrdma_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ,
 		pvrdma_create_srq, pvrdma_create_srq_resp);
 DECLARE_DRV_CMD(user_pvrdma_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h
index ebd50ce1c3cd..b67c07e94f90 100644
--- a/providers/vmw_pvrdma/pvrdma.h
+++ b/providers/vmw_pvrdma/pvrdma.h
@@ -170,6 +170,7 @@  struct pvrdma_qp {
 	struct pvrdma_wq		sq;
 	struct pvrdma_wq		rq;
 	int				is_srq;
+	uint32_t			qp_handle;
 };
 
 struct pvrdma_ah {
diff --git a/providers/vmw_pvrdma/pvrdma_main.c b/providers/vmw_pvrdma/pvrdma_main.c
index 52a2de22d44c..616310ae45c5 100644
--- a/providers/vmw_pvrdma/pvrdma_main.c
+++ b/providers/vmw_pvrdma/pvrdma_main.c
@@ -201,8 +201,8 @@  static const struct verbs_match_ent hca_table[] = {
 
 static const struct verbs_device_ops pvrdma_dev_ops = {
 	.name = "pvrdma",
-	.match_min_abi_version = PVRDMA_UVERBS_ABI_VERSION,
-	.match_max_abi_version = PVRDMA_UVERBS_ABI_VERSION,
+	.match_min_abi_version = PVRDMA_UVERBS_MIN_ABI_VERSION,
+	.match_max_abi_version = PVRDMA_UVERBS_MAX_ABI_VERSION,
 	.match_table = hca_table,
 	.alloc_device = pvrdma_device_alloc,
 	.uninit_device = pvrdma_uninit_device,
diff --git a/providers/vmw_pvrdma/qp.c b/providers/vmw_pvrdma/qp.c
index ef429db93a43..a173d441df0d 100644
--- a/providers/vmw_pvrdma/qp.c
+++ b/providers/vmw_pvrdma/qp.c
@@ -211,9 +211,9 @@  struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd,
 {
 	struct pvrdma_device *dev = to_vdev(pd->context->device);
 	struct user_pvrdma_create_qp cmd;
-	struct ib_uverbs_create_qp_resp resp;
+	struct user_pvrdma_create_qp_resp resp;
+	struct user_pvrdma_create_qp_v3_resp resp_v3;
 	struct pvrdma_qp *qp;
-	int ret;
 	int is_srq = !!(attr->srq);
 
 	attr->cap.max_send_sge = max_t(uint32_t, 1U, attr->cap.max_send_sge);
@@ -282,14 +282,23 @@  struct ibv_qp *pvrdma_create_qp(struct ibv_pd *pd,
 	cmd.rbuf_size = qp->rbuf.length;
 	cmd.qp_addr = (uintptr_t) qp;
 
-	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr,
-				&cmd.ibv_cmd, sizeof(cmd),
-				&resp, sizeof(resp));
+	if (dev->abi_version <= PVRDMA_UVERBS_NO_QP_HANDLE_ABI_VERSION) {
+		if (ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd,
+				      sizeof(cmd), &resp_v3.ibv_resp,
+				      sizeof(resp_v3.ibv_resp)))
+			goto err_free;
 
-	if (ret)
-		goto err_free;
+		qp->qp_handle = qp->ibv_qp.qp_num;
+	} else {
+		if (ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd,
+				      sizeof(cmd), &resp.ibv_resp,
+				      sizeof(resp)))
+			goto err_free;
+
+		qp->qp_handle = resp.drv_payload.qp_handle;
+	}
 
-	to_vctx(pd->context)->qp_tbl[qp->ibv_qp.qp_num & 0xFFFF] = qp;
+	to_vctx(pd->context)->qp_tbl[qp->qp_handle & 0xFFFF] = qp;
 
 	/* If set, each WR submitted to the SQ generate a completion entry */
 	if (attr->sq_sig_all)
@@ -414,7 +423,7 @@  int pvrdma_destroy_qp(struct ibv_qp *ibqp)
 	free(qp->rq.wrid);
 	pvrdma_free_buf(&qp->rbuf);
 	pvrdma_free_buf(&qp->sbuf);
-	ctx->qp_tbl[ibqp->qp_num & 0xFFFF] = NULL;
+	ctx->qp_tbl[qp->qp_handle & 0xFFFF] = NULL;
 	free(qp);
 
 	return 0;
@@ -547,7 +556,7 @@  out:
 	if (nreq) {
 		udma_to_device_barrier();
 		pvrdma_write_uar_qp(ctx->uar,
-				    PVRDMA_UAR_QP_SEND | ibqp->qp_num);
+				    PVRDMA_UAR_QP_SEND | qp->qp_handle);
 	}
 
 	pthread_spin_unlock(&qp->sq.lock);
@@ -630,7 +639,7 @@  int pvrdma_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
 out:
 	if (nreq)
 		pvrdma_write_uar_qp(ctx->uar,
-				    PVRDMA_UAR_QP_RECV | ibqp->qp_num);
+				    PVRDMA_UAR_QP_RECV | qp->qp_handle);
 
 	pthread_spin_unlock(&qp->rq.lock);
 	return ret;