diff mbox

[rdma-core,4/5] verbs: Add alloc_null_mr verb

Message ID 1529512102-24740-5-git-send-email-yishaih@mellanox.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Yishai Hadas June 20, 2018, 4:28 p.m. UTC
From: Yonatan Cohen <yonatanc@mellanox.com>

ibv_alloc_null_mr() allocates a null memory region (MR) that is associated
with the protection domain PD.
A null MR does not map any specific address.
It is used to force local HCA operations to skip the PCI bus access, while
keeping track of the processed length in the ibv_sge handling.
Meaning, instead of a PCI write access the HCA leaves the target memory
untouched, and skips filling that packet section.
Similar behavior is done upon send, the HCA skips data which is pointed
by that null MR and saves PCI bus access.
This functionality saves PCI read/write operations and improve performance.
The MR's member lkey is used as the lkey field of struct ibv_sge when
posting buffers with ibv_post_* verbs.
The ibv_mr member addr will be NULL, length will be SIZE_MAX, and the
rkey will be zero, as they are irrelevant.
ibv_dereg_mr() deregisters the MR.
The use of ibv_rereg_mr() or ibv_bind_mw() with this MR is invalid.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
---
 libibverbs/driver.h                   |  2 ++
 libibverbs/dummy_ops.c                |  8 +++++
 libibverbs/man/CMakeLists.txt         |  1 +
 libibverbs/man/ibv_alloc_null_mr.3.md | 55 +++++++++++++++++++++++++++++++++++
 libibverbs/verbs.c                    |  7 ++++-
 libibverbs/verbs.h                    | 18 ++++++++++++
 6 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 libibverbs/man/ibv_alloc_null_mr.3.md

Comments

Jason Gunthorpe June 20, 2018, 7:42 p.m. UTC | #1
On Wed, Jun 20, 2018 at 07:28:21PM +0300, Yishai Hadas wrote:
> From: Yonatan Cohen <yonatanc@mellanox.com>
> 
> ibv_alloc_null_mr() allocates a null memory region (MR) that is associated
> with the protection domain PD.
> A null MR does not map any specific address.
> It is used to force local HCA operations to skip the PCI bus access, while
> keeping track of the processed length in the ibv_sge handling.
> Meaning, instead of a PCI write access the HCA leaves the target memory
> untouched, and skips filling that packet section.
> Similar behavior is done upon send, the HCA skips data which is pointed
> by that null MR and saves PCI bus access.
> This functionality saves PCI read/write operations and improve performance.
> The MR's member lkey is used as the lkey field of struct ibv_sge when
> posting buffers with ibv_post_* verbs.
> The ibv_mr member addr will be NULL, length will be SIZE_MAX, and the
> rkey will be zero, as they are irrelevant.
> ibv_dereg_mr() deregisters the MR.
> The use of ibv_rereg_mr() or ibv_bind_mw() with this MR is invalid.
> 
> Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
> Reviewed-by: Guy Levi <guyle@mellanox.com>
> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
>  libibverbs/driver.h                   |  2 ++
>  libibverbs/dummy_ops.c                |  8 +++++
>  libibverbs/man/CMakeLists.txt         |  1 +
>  libibverbs/man/ibv_alloc_null_mr.3.md | 55 +++++++++++++++++++++++++++++++++++
>  libibverbs/verbs.c                    |  7 ++++-
>  libibverbs/verbs.h                    | 18 ++++++++++++
>  6 files changed, 90 insertions(+), 1 deletion(-)
>  create mode 100644 libibverbs/man/ibv_alloc_null_mr.3.md
> 
> diff --git a/libibverbs/driver.h b/libibverbs/driver.h
> index 43077f7..64c8757 100644
> +++ b/libibverbs/driver.h
> @@ -87,6 +87,7 @@ enum ibv_gid_type {
>  
>  enum ibv_mr_type {
>  	IBV_MR_TYPE_MR,
> +	IBV_MR_TYPE_NULL_MR,
>  };
>  
>  struct verbs_mr {
> @@ -218,6 +219,7 @@ struct verbs_context_ops {
>  	struct ibv_dm *(*alloc_dm)(struct ibv_context *context,
>  				   struct ibv_alloc_dm_attr *attr);
>  	struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
> +	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
>  	struct ibv_pd *(*alloc_parent_domain)(
>  		struct ibv_context *context,
>  		struct ibv_parent_domain_init_attr *attr);
> diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
> index 1fd8f84..ddc5efe 100644
> +++ b/libibverbs/dummy_ops.c
> @@ -394,6 +394,12 @@ static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
>  	return NULL;
>  }
>  
> +static struct ibv_mr *alloc_null_mr(struct ibv_pd *pd)
> +{
> +	errno = ENOSYS;
> +	return NULL;
> +}

These function definitions are in sorted order.

>  static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>  			     int access)
>  {
> @@ -432,6 +438,7 @@ static int resize_cq(struct ibv_cq *cq, int cqe)
>  const struct verbs_context_ops verbs_dummy_ops = {
>  	alloc_dm,
>  	alloc_mw,
> +	alloc_null_mr,
>  	alloc_parent_domain,
>  	alloc_pd,
>  	alloc_td,
> @@ -607,6 +614,7 @@ void verbs_set_ops(struct verbs_context *vctx,
>  	SET_OP(ctx, req_notify_cq);
>  	SET_PRIV_OP(ctx, rereg_mr);
>  	SET_PRIV_OP(ctx, resize_cq);
> +	SET_OP(vctx, alloc_null_mr);

This list is sorted too.

> +
> +**ibv_alloc_null_mr()** allocates a null memory region (MR) that is associated with the protection
> +domain *pd*.
> +A null mr does not map any specific address.
> +It is used to force local HCA operations to skip the PCI bus access, while keeping track of the
> +processed length in the ibv_sge handling.
> +Meaning, instead of a PCI write access, the HCA leaves the target memory untouched,
> +and skips filling that packet section.
> +Similar behavior is done upon send, the HCA skips data which is pointed by that null MR
> +and saves PCI bus access.
> +This functionality saves PCI read/write operations and improve performance.
> +The local key field lkey is used in struct ibv_sge when posting buffers with
> +ibv_post_* verbs.
> +The ibv_mr member addr will be NULL, length will be SIZE_MAX, and the rkey will be zero, as they are irrelevant.
> +**ibv_dereg_mr()** deregisters the MR.
> +The use of ibv_rereg_mr() or ibv_bind_mw()
> +with this MR is invalid.

The above is a bit hard to read.. Suggeest

**ibv_alloc_null_mr()** allocates a null memory region (MR) that is
associated with the protection domain *pd*.

A null MR discards all data written to it, and always returns 0 on
read. It has the maximum length and oly the lkey is valid, the MR is not
exposed as an rkey.

A device should implement the null MR in a way that bypasses PCI
transfers, internally discarding or sourcing 0 data. This provides a
way to avoid PCI bus transfers by using a scatter/gather list in
commands if applications do not intend to access the data, or need
data to be 0 filled.

**ibv_dereg_mr()** deregisters the MR.  The use of ibv_rereg_mr() or
ibv_bind_mw() with this MR is invalid.

> diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
> index 83ff88c..2d04715 100644
> +++ b/libibverbs/verbs.h
> @@ -1795,6 +1795,7 @@ struct verbs_context {
>  	struct ibv_counters *(*create_counters)(struct ibv_context *context,
>  						struct ibv_counters_init_attr *init_attr);
>  	int (*destroy_counters)(struct ibv_counters *counters);
> +	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
>  	struct ibv_mr *(*reg_dm_mr)(struct ibv_pd *pd, struct ibv_dm *dm,
>  				    uint64_t dm_offset, size_t length,
>  				    unsigned int access);

WOAH! What is this?  You know better.. New stuff is always at the top.

> +/*
> + * ibv_alloc_null_mr - allocate mr with special lkey
> + */

'special lkey' is mlx5 specific language, don't use it in the generic header.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yishai Hadas June 21, 2018, 12:35 p.m. UTC | #2
On 6/20/2018 10:42 PM, Jason Gunthorpe wrote:
> On Wed, Jun 20, 2018 at 07:28:21PM +0300, Yishai Hadas wrote:
>> From: Yonatan Cohen <yonatanc@mellanox.com>
>>
>> ibv_alloc_null_mr() allocates a null memory region (MR) that is associated
>> with the protection domain PD.
>> A null MR does not map any specific address.
>> It is used to force local HCA operations to skip the PCI bus access, while
>> keeping track of the processed length in the ibv_sge handling.
>> Meaning, instead of a PCI write access the HCA leaves the target memory
>> untouched, and skips filling that packet section.
>> Similar behavior is done upon send, the HCA skips data which is pointed
>> by that null MR and saves PCI bus access.
>> This functionality saves PCI read/write operations and improve performance.
>> The MR's member lkey is used as the lkey field of struct ibv_sge when
>> posting buffers with ibv_post_* verbs.
>> The ibv_mr member addr will be NULL, length will be SIZE_MAX, and the
>> rkey will be zero, as they are irrelevant.
>> ibv_dereg_mr() deregisters the MR.
>> The use of ibv_rereg_mr() or ibv_bind_mw() with this MR is invalid.
>>
>> Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
>> Reviewed-by: Guy Levi <guyle@mellanox.com>
>> Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
>>   libibverbs/driver.h                   |  2 ++
>>   libibverbs/dummy_ops.c                |  8 +++++
>>   libibverbs/man/CMakeLists.txt         |  1 +
>>   libibverbs/man/ibv_alloc_null_mr.3.md | 55 +++++++++++++++++++++++++++++++++++
>>   libibverbs/verbs.c                    |  7 ++++-
>>   libibverbs/verbs.h                    | 18 ++++++++++++
>>   6 files changed, 90 insertions(+), 1 deletion(-)
>>   create mode 100644 libibverbs/man/ibv_alloc_null_mr.3.md
>>
>> diff --git a/libibverbs/driver.h b/libibverbs/driver.h
>> index 43077f7..64c8757 100644
>> +++ b/libibverbs/driver.h
>> @@ -87,6 +87,7 @@ enum ibv_gid_type {
>>   
>>   enum ibv_mr_type {
>>   	IBV_MR_TYPE_MR,
>> +	IBV_MR_TYPE_NULL_MR,
>>   };
>>   
>>   struct verbs_mr {
>> @@ -218,6 +219,7 @@ struct verbs_context_ops {
>>   	struct ibv_dm *(*alloc_dm)(struct ibv_context *context,
>>   				   struct ibv_alloc_dm_attr *attr);
>>   	struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
>> +	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
>>   	struct ibv_pd *(*alloc_parent_domain)(
>>   		struct ibv_context *context,
>>   		struct ibv_parent_domain_init_attr *attr);
>> diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
>> index 1fd8f84..ddc5efe 100644
>> +++ b/libibverbs/dummy_ops.c
>> @@ -394,6 +394,12 @@ static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
>>   	return NULL;
>>   }
>>   
>> +static struct ibv_mr *alloc_null_mr(struct ibv_pd *pd)
>> +{
>> +	errno = ENOSYS;
>> +	return NULL;
>> +}
> 
> These function definitions are in sorted order.
> 
>>   static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>>   			     int access)
>>   {
>> @@ -432,6 +438,7 @@ static int resize_cq(struct ibv_cq *cq, int cqe)
>>   const struct verbs_context_ops verbs_dummy_ops = {
>>   	alloc_dm,
>>   	alloc_mw,
>> +	alloc_null_mr,
>>   	alloc_parent_domain,
>>   	alloc_pd,
>>   	alloc_td,
>> @@ -607,6 +614,7 @@ void verbs_set_ops(struct verbs_context *vctx,
>>   	SET_OP(ctx, req_notify_cq);
>>   	SET_PRIV_OP(ctx, rereg_mr);
>>   	SET_PRIV_OP(ctx, resize_cq);
>> +	SET_OP(vctx, alloc_null_mr);
> 
> This list is sorted too.

OK

> 
>> +
>> +**ibv_alloc_null_mr()** allocates a null memory region (MR) that is associated with the protection
>> +domain *pd*.
>> +A null mr does not map any specific address.
>> +It is used to force local HCA operations to skip the PCI bus access, while keeping track of the
>> +processed length in the ibv_sge handling.
>> +Meaning, instead of a PCI write access, the HCA leaves the target memory untouched,
>> +and skips filling that packet section.
>> +Similar behavior is done upon send, the HCA skips data which is pointed by that null MR
>> +and saves PCI bus access.
>> +This functionality saves PCI read/write operations and improve performance.
>> +The local key field lkey is used in struct ibv_sge when posting buffers with
>> +ibv_post_* verbs.
>> +The ibv_mr member addr will be NULL, length will be SIZE_MAX, and the rkey will be zero, as they are irrelevant.
>> +**ibv_dereg_mr()** deregisters the MR.
>> +The use of ibv_rereg_mr() or ibv_bind_mw()
>> +with this MR is invalid.
> 
> The above is a bit hard to read.. Suggeest
> 
> **ibv_alloc_null_mr()** allocates a null memory region (MR) that is
> associated with the protection domain *pd*.
> 
> A null MR discards all data written to it, and always returns 0 on
> read. It has the maximum length and oly the lkey is valid, the MR is not
> exposed as an rkey.
> 
> A device should implement the null MR in a way that bypasses PCI
> transfers, internally discarding or sourcing 0 data. This provides a
> way to avoid PCI bus transfers by using a scatter/gather list in
> commands if applications do not intend to access the data, or need
> data to be 0 filled.
> 

We are fine with the above suggestion, will add here few more sentences 
as of below to better clarify:

Specifically upon **ibv_post_send()** the device skips PCI read cycles 
and upon **ibv_post_recv()** the device skips PCI write cycles which 
finally improves performance.

> **ibv_dereg_mr()** deregisters the MR.  The use of ibv_rereg_mr() or
> ibv_bind_mw() with this MR is invalid.
> 
>> diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
>> index 83ff88c..2d04715 100644
>> +++ b/libibverbs/verbs.h
>> @@ -1795,6 +1795,7 @@ struct verbs_context {
>>   	struct ibv_counters *(*create_counters)(struct ibv_context *context,
>>   						struct ibv_counters_init_attr *init_attr);
>>   	int (*destroy_counters)(struct ibv_counters *counters);
>> +	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
>>   	struct ibv_mr *(*reg_dm_mr)(struct ibv_pd *pd, struct ibv_dm *dm,
>>   				    uint64_t dm_offset, size_t length,
>>   				    unsigned int access);
> 
> WOAH! What is this?  You know better.. New stuff is always at the top.

Sure... this came from a rebase at the last step to include the counters 
stuff that was accepted, will fix.

> 
>> +/*
>> + * ibv_alloc_null_mr - allocate mr with special lkey
>> + */
> 
> 'special lkey' is mlx5 specific language, don't use it in the generic header.
> 

OK
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/libibverbs/driver.h b/libibverbs/driver.h
index 43077f7..64c8757 100644
--- a/libibverbs/driver.h
+++ b/libibverbs/driver.h
@@ -87,6 +87,7 @@  enum ibv_gid_type {
 
 enum ibv_mr_type {
 	IBV_MR_TYPE_MR,
+	IBV_MR_TYPE_NULL_MR,
 };
 
 struct verbs_mr {
@@ -218,6 +219,7 @@  struct verbs_context_ops {
 	struct ibv_dm *(*alloc_dm)(struct ibv_context *context,
 				   struct ibv_alloc_dm_attr *attr);
 	struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
+	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
 	struct ibv_pd *(*alloc_parent_domain)(
 		struct ibv_context *context,
 		struct ibv_parent_domain_init_attr *attr);
diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
index 1fd8f84..ddc5efe 100644
--- a/libibverbs/dummy_ops.c
+++ b/libibverbs/dummy_ops.c
@@ -394,6 +394,12 @@  static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
 	return NULL;
 }
 
+static struct ibv_mr *alloc_null_mr(struct ibv_pd *pd)
+{
+	errno = ENOSYS;
+	return NULL;
+}
+
 static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 			     int access)
 {
@@ -432,6 +438,7 @@  static int resize_cq(struct ibv_cq *cq, int cqe)
 const struct verbs_context_ops verbs_dummy_ops = {
 	alloc_dm,
 	alloc_mw,
+	alloc_null_mr,
 	alloc_parent_domain,
 	alloc_pd,
 	alloc_td,
@@ -607,6 +614,7 @@  void verbs_set_ops(struct verbs_context *vctx,
 	SET_OP(ctx, req_notify_cq);
 	SET_PRIV_OP(ctx, rereg_mr);
 	SET_PRIV_OP(ctx, resize_cq);
+	SET_OP(vctx, alloc_null_mr);
 
 #undef SET_OP
 #undef SET_OP2
diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt
index 5cd30d2..99c57d7 100644
--- a/libibverbs/man/CMakeLists.txt
+++ b/libibverbs/man/CMakeLists.txt
@@ -1,6 +1,7 @@ 
 rdma_man_pages(
   ibv_alloc_dm.3
   ibv_alloc_mw.3
+  ibv_alloc_null_mr.3.md
   ibv_alloc_parent_domain.3
   ibv_alloc_pd.3
   ibv_alloc_td.3
diff --git a/libibverbs/man/ibv_alloc_null_mr.3.md b/libibverbs/man/ibv_alloc_null_mr.3.md
new file mode 100644
index 0000000..6f234c4
--- /dev/null
+++ b/libibverbs/man/ibv_alloc_null_mr.3.md
@@ -0,0 +1,55 @@ 
+---
+date: 2018-6-1
+footer: libibverbs
+header: "Libibverbs Programmer's Manual"
+layout: page
+license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md'
+section: 3
+title: ibv_alloc_null_mr
+---
+
+# NAME
+
+ibv_alloc_null_mr - allocate a null memory region (MR)
+
+# SYNOPSIS
+
+```c
+#include <infiniband/verbs.h>
+
+struct ibv_mr *ibv_alloc_null_mr(struct ibv_pd *pd);
+```
+
+
+# DESCRIPTION
+
+**ibv_alloc_null_mr()** allocates a null memory region (MR) that is associated with the protection
+domain *pd*.
+A null mr does not map any specific address.
+It is used to force local HCA operations to skip the PCI bus access, while keeping track of the
+processed length in the ibv_sge handling.
+Meaning, instead of a PCI write access, the HCA leaves the target memory untouched,
+and skips filling that packet section.
+Similar behavior is done upon send, the HCA skips data which is pointed by that null MR
+and saves PCI bus access.
+This functionality saves PCI read/write operations and improve performance.
+The local key field lkey is used in struct ibv_sge when posting buffers with
+ibv_post_* verbs.
+The ibv_mr member addr will be NULL, length will be SIZE_MAX, and the rkey will be zero, as they are irrelevant.
+**ibv_dereg_mr()** deregisters the MR.
+The use of ibv_rereg_mr() or ibv_bind_mw()
+with this MR is invalid.
+
+# RETURN VALUE
+
+**ibv_alloc_null_mr()** returns a pointer to the allocated MR, or NULL if the request fails.
+
+# SEE ALSO
+
+**ibv_reg_mr**(3),
+**ibv_dereg_mr**(3),
+
+# AUTHOR
+
+Yonatan Cohen <yonatanc@mellanox.com>
+
diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c
index dd74e64..a0aac36 100644
--- a/libibverbs/verbs.c
+++ b/libibverbs/verbs.c
@@ -253,6 +253,11 @@  LATEST_SYMVER_FUNC(ibv_rereg_mr, 1_1, "IBVERBS_1.1",
 	void *old_addr;
 	size_t old_len;
 
+	if (verbs_get_mr(mr)->mr_type != IBV_MR_TYPE_MR) {
+		errno = EINVAL;
+		return IBV_REREG_MR_ERR_INPUT;
+	}
+
 	if (flags & ~IBV_REREG_MR_FLAGS_SUPPORTED) {
 		errno = EINVAL;
 		return IBV_REREG_MR_ERR_INPUT;
@@ -311,7 +316,7 @@  LATEST_SYMVER_FUNC(ibv_dereg_mr, 1_1, "IBVERBS_1.1",
 	size_t length	= mr->length;
 
 	ret = get_ops(mr->context)->dereg_mr(verbs_get_mr(mr));
-	if (!ret)
+	if (!ret && (verbs_get_mr(mr)->mr_type == IBV_MR_TYPE_MR))
 		ibv_dofork_range(addr, length);
 
 	return ret;
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 83ff88c..2d04715 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -1795,6 +1795,7 @@  struct verbs_context {
 	struct ibv_counters *(*create_counters)(struct ibv_context *context,
 						struct ibv_counters_init_attr *init_attr);
 	int (*destroy_counters)(struct ibv_counters *counters);
+	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
 	struct ibv_mr *(*reg_dm_mr)(struct ibv_pd *pd, struct ibv_dm *dm,
 				    uint64_t dm_offset, size_t length,
 				    unsigned int access);
@@ -2206,6 +2207,23 @@  int ibv_memcpy_from_dm(void *host_addr, struct ibv_dm *dm,
 	return dm->memcpy_from_dm(host_addr, dm, dm_offset, length);
 }
 
+/*
+ * ibv_alloc_null_mr - allocate mr with special lkey
+ */
+static inline
+struct ibv_mr *ibv_alloc_null_mr(struct ibv_pd *pd)
+{
+	struct verbs_context *vctx;
+
+	vctx = verbs_get_ctx_op(pd->context, alloc_null_mr);
+	if (!vctx) {
+		errno = ENOSYS;
+		return NULL;
+	}
+
+	return vctx->alloc_null_mr(pd);
+}
+
 /**
  * ibv_reg_dm_mr - Register device memory as a memory region
  * @pd - The PD to associated this MR with