diff mbox series

[RFC,rdma-core,3/3] verbs: Introduce ibv_advise_mr verb

Message ID 1541428068-17056-4-git-send-email-yishaih@mellanox.com (mailing list archive)
State RFC
Headers show
Series Introduce ibv_advise_mr verb | expand

Commit Message

Yishai Hadas Nov. 5, 2018, 2:27 p.m. UTC
From: Moni Shoua <monis@mellanox.com>

Introduce a new verb named ibv_advise_mr(), it includes:
- The application interface.
- The command interface with the kernel.

A detailed man page describes the verb's purpose and its usage.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
---
 libibverbs/CMakeLists.txt         |   1 +
 libibverbs/cmd_mr.c               |  57 +++++++++++++++++
 libibverbs/driver.h               |  10 +++
 libibverbs/dummy_ops.c            |  11 ++++
 libibverbs/libibverbs.map.in      |   1 +
 libibverbs/man/CMakeLists.txt     |   1 +
 libibverbs/man/ibv_advise_mr.3.md | 125 ++++++++++++++++++++++++++++++++++++++
 libibverbs/verbs.h                |  28 +++++++++
 libibverbs/verbs_api.h            |   6 ++
 9 files changed, 240 insertions(+)
 create mode 100644 libibverbs/cmd_mr.c
 create mode 100644 libibverbs/man/ibv_advise_mr.3.md
diff mbox series

Patch

diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt
index ddf5995..2ddafd6 100644
--- a/libibverbs/CMakeLists.txt
+++ b/libibverbs/CMakeLists.txt
@@ -36,6 +36,7 @@  rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map"
   cmd_fallback.c
   cmd_flow_action.c
   cmd_ioctl.c
+  cmd_mr.c
   compat-1_0.c
   device.c
   dummy_ops.c
diff --git a/libibverbs/cmd_mr.c b/libibverbs/cmd_mr.c
new file mode 100644
index 0000000..c6a9eb0
--- /dev/null
+++ b/libibverbs/cmd_mr.c
@@ -0,0 +1,57 @@ 
+
+/*
+ * Copyright (c) 2018 Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials
+ *   provided with the distribution.
+ *
+ *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ *   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ *   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ *   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *   SOFTWARE.
+ */
+
+#include <infiniband/cmd_ioctl.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+#include <infiniband/driver.h>
+#include <infiniband/cmd_write.h>
+
+int ibv_cmd_advise_mr(struct ibv_pd *pd,
+		      enum ibv_advise_mr_advice advice,
+		      uint32_t flags,
+		      struct ibv_sge *sg_list,
+		      uint32_t num_sge)
+{
+	DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_MR,
+				    UVERBS_METHOD_ADVISE_MR,
+				    4);
+
+	fill_attr_in_obj(cmd, UVERBS_ATTR_ADVISE_MR_PD_HANDLE, pd->handle);
+	fill_attr_const_in(cmd, UVERBS_ATTR_ADVISE_MR_ADVICE, advice);
+	fill_attr_in_uint32(cmd, UVERBS_ATTR_ADVISE_MR_FLAGS, flags);
+	fill_attr_in_ptr_array(cmd, UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+			       sg_list, num_sge);
+
+	return execute_ioctl(pd->context, cmd);
+
+}
diff --git a/libibverbs/driver.h b/libibverbs/driver.h
index adf46c3..fb562d4 100644
--- a/libibverbs/driver.h
+++ b/libibverbs/driver.h
@@ -218,6 +218,11 @@  struct verbs_counters {
  * Keep sorted.
  */
 struct verbs_context_ops {
+	int (*advise_mr)(struct ibv_pd *pd,
+			 enum ibv_advise_mr_advice advice,
+			 uint32_t flags,
+			 struct ibv_sge *sg_list,
+			 uint32_t num_sges);
 	struct ibv_dm *(*alloc_dm)(struct ibv_context *context,
 				   struct ibv_alloc_dm_attr *attr);
 	struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
@@ -442,6 +447,11 @@  int ibv_cmd_rereg_mr(struct verbs_mr *vmr, uint32_t flags, void *addr,
 		     size_t cmd_sz, struct ib_uverbs_rereg_mr_resp *resp,
 		     size_t resp_sz);
 int ibv_cmd_dereg_mr(struct verbs_mr *vmr);
+int ibv_cmd_advise_mr(struct ibv_pd *pd,
+		      enum ibv_advise_mr_advice advice,
+		      uint32_t flags,
+		      struct ibv_sge *sg_list,
+		      uint32_t num_sge);
 int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type,
 		     struct ibv_mw *mw, struct ibv_alloc_mw *cmd,
 		     size_t cmd_size,
diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
index 43f8430..a5e9380 100644
--- a/libibverbs/dummy_ops.c
+++ b/libibverbs/dummy_ops.c
@@ -33,6 +33,15 @@ 
 #include "ibverbs.h"
 #include <errno.h>
 
+static int advise_mr(struct ibv_pd *pd,
+		     enum ibv_advise_mr_advice advice,
+		     uint32_t flags,
+		     struct ibv_sge *sg_list,
+		     uint32_t num_sges)
+{
+	return ENOSYS;
+}
+
 static struct ibv_dm *alloc_dm(struct ibv_context *context,
 			       struct ibv_alloc_dm_attr *attr)
 {
@@ -436,6 +445,7 @@  static int resize_cq(struct ibv_cq *cq, int cqe)
  * Keep sorted.
  */
 const struct verbs_context_ops verbs_dummy_ops = {
+	advise_mr,
 	alloc_dm,
 	alloc_mw,
 	alloc_null_mr,
@@ -550,6 +560,7 @@  void verbs_set_ops(struct verbs_context *vctx,
 		}                                                              \
 	} while (0)
 
+	SET_OP(vctx, advise_mr);
 	SET_OP(vctx, alloc_dm);
 	SET_OP(ctx, alloc_mw);
 	SET_OP(vctx, alloc_null_mr);
diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
index c489c76..4bffb1b 100644
--- a/libibverbs/libibverbs.map.in
+++ b/libibverbs/libibverbs.map.in
@@ -120,6 +120,7 @@  IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ {
 		__ioctl_final_num_attrs;
 		_verbs_init_and_alloc_context;
 		execute_ioctl;
+		ibv_cmd_advise_mr;
 		ibv_cmd_alloc_dm;
 		ibv_cmd_alloc_mw;
 		ibv_cmd_alloc_pd;
diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt
index 6efd671..4d5abef 100644
--- a/libibverbs/man/CMakeLists.txt
+++ b/libibverbs/man/CMakeLists.txt
@@ -1,4 +1,5 @@ 
 rdma_man_pages(
+  ibv_advise_mr.3.md
   ibv_alloc_dm.3
   ibv_alloc_mw.3
   ibv_alloc_null_mr.3.md
diff --git a/libibverbs/man/ibv_advise_mr.3.md b/libibverbs/man/ibv_advise_mr.3.md
new file mode 100644
index 0000000..ce7747f
--- /dev/null
+++ b/libibverbs/man/ibv_advise_mr.3.md
@@ -0,0 +1,125 @@ 
+---
+date: 2018-10-19
+footer: libibverbs
+header: "Libibverbs Programmer's Manual"
+layout: page
+license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md'
+section: 3
+title: IBV_ADVISE_MR
+---
+
+# NAME
+
+ibv_advise_mr - Gives advice or directions to the kernel about an
+		address range belongs to a memory region (MR).
+
+# SYNOPSIS
+
+```c
+#include <infiniband/verbs.h>
+
+int ibv_advise_mr(struct ibv_pd *pd,
+		  enum ibv_advise_mr_advice advice,
+		  uint32_t flags,
+		  struct ibv_sge *sg_list,
+		  uint32_t num_sge)
+```
+
+# DESCRIPTION
+
+**ibv_advise_mr()** Give advice or directions to the kernel about an
+address range belonging to a memory region (MR).
+Applications that are aware of future access patterns can use this verb
+in order to leverage this knowledge to improve system or
+application performance.
+
+**Conventional advice values**
+*IBV_ADVISE_MR_ADVICE_PREFETCH*
+:	Pre-fetch a range of an on-demand paging MR.
+	Make pages present before the actual IO is conducted.
+	This would provide a way to reduce latency by overlapping paging-in
+	and either compute time or IO to other ranges.
+
+# ARGUMENTS
+*pd*
+:	The protection domain (PD) associated with the MR.
+
+*advice*
+:	The requested advise value (as listed above).
+
+*flags*
+:	Describes the properties of the advise operation
+	**Conventional advice values**
+	*IBV_ADVISE_MR_FLAG_SYNC*
+	:	Request to be a synchronized operation
+	*IBV_ADVISE_MR_FLAG_WRITE_ACCESS*
+	:	When using IBV_ADVISE_OP_PREFETCH advise value, one should
+		specify this flag to allow pre-fetching with a future write
+		access (The MR must allow write access).
+		The default pre-fetching behavior is read only access.
+
+*sg_list*
+:	Pointer to the s/g array
+	When using IBV_ADVISE_OP_PREFETCH advise value, all the lkeys of all
+	the scatter gatther elements (SGEs) must be associated with ODP MRs
+	(MRs that were registered with IBV_ACCESS_ON_DEMAND).
+
+*num_sge*
+:	Number of elements in the the s/g array
+
+# RETURN VALUE
+
+**ibv_advise_mr()** returns 0 when the call was successful, or the value
+		    of errno on failure (which indicates the failure reason).
+
+*ENOSYS*
+:	libibverbs or provider driver doesn't support the ibv_advise_mr() verb.
+
+*ENOTSUP*
+:	The advise operation isn't supported.
+
+*EFAULT*
+:	In one of the following:
+	o When the range requested is out of the MR bounds, or when parts of
+	  it are not part of the process address space.
+	o One of the lkeys provided in the scatter gather list is invalid or
+	  with wrong write access.
+
+*EINVAL*
+:	In one of the following:
+	o The PD is invalid.
+	o The flags are invalid.
+
+# NOTES
+
+An application may pre-fetch any address range within an ODP MR when using the
+IBV_ADVISE_MR_ADVICE_PREFETCH advice.
+Semantically, this operation is best-effort. That means the kernel does not
+guarantee that underlying pages are updated in the HCA or the pre-fetched pages
+would remain resident.
+
+When using IBV_ADVISE_MR_ADVICE_PREFETCH advice, the operation will be done in
+the following stages:
+	o Page in the user pages to memory (pages aren't pinned).
+	o Get the dma mapping of these user pages.
+	o Post the underlaying pages transalations to the HCA.
+
+If **IBV_ADVISE_MR_FLAG_SYNC** is specified then the underlying pages are
+guarantteed to be updated in the HCA before returning SUCCESS.
+Otherwise the driver can choose to postpone the posting of the new trasalations
+to the HCA.
+When performing a local RDMA access operation it is recommended to use
+IBV_ADVISE_MR_FLAG_SYNC flag with IBV_ADVISE_MR_ADVICE_PREFETCH advice to
+increase probability that the pages transaltions are valid in the HCA
+and avoid future page faults.
+
+# SEE ALSO
+
+**ibv_reg_mr**(3),
+**ibv_rereg_mr**(3),
+**ibv_dereg_mr**(3)
+
+# AUTHOR
+
+Aviad Yehezkel <aviadye@mellanox.com>
+
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 93e0430..4107703 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -1786,6 +1786,11 @@  struct ibv_values_ex {
 
 struct verbs_context {
 	/*  "grows up" - new fields go here */
+	int (*advise_mr)(struct ibv_pd *pd,
+			 enum ibv_advise_mr_advice advice,
+			 uint32_t flags,
+			 struct ibv_sge *sg_list,
+			 uint32_t num_sges);
 	struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
 	int (*read_counters)(struct ibv_counters *counters,
 			     uint64_t *counters_value,
@@ -2210,6 +2215,29 @@  struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context);
 int ibv_destroy_comp_channel(struct ibv_comp_channel *channel);
 
 /**
+ * ibv_advise_mr - Gives advice about an address range in MRs
+ * @pd - protection domain of all MRs for which the advice is for
+ * @advice - type of advice
+ * @flags - advice modifiers
+ * @sg_list - an array of memory ranges
+ * @num_sge - number of elements in the array
+ */
+static inline int ibv_advise_mr(struct ibv_pd *pd,
+				enum ibv_advise_mr_advice advice,
+				uint32_t flags,
+				struct ibv_sge *sg_list,
+				uint32_t num_sge)
+{
+	struct verbs_context *vctx;
+
+	vctx = verbs_get_ctx_op(pd->context, advise_mr);
+	if (!vctx)
+		return ENOSYS;
+
+	return vctx->advise_mr(pd, advice, flags, sg_list, num_sge);
+}
+
+/**
  * ibv_alloc_dm - Allocate device memory
  * @context - Context DM will be attached to
  * @attr - Attributes to allocate the DM with
diff --git a/libibverbs/verbs_api.h b/libibverbs/verbs_api.h
index 4ac1335..743b305 100644
--- a/libibverbs/verbs_api.h
+++ b/libibverbs/verbs_api.h
@@ -85,6 +85,12 @@ 
 #define ibv_flow_action_esp_encap                       ib_uverbs_flow_action_esp_encap
 #define ibv_flow_action_esp                             ib_uverbs_flow_action_esp
 
+#define ibv_advise_mr_advice                            ib_uverbs_advise_mr_advice
+#define IBV_ADVISE_MR_ADVICE_PREFETCH                   IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH
+
+#define IBV_ADVISE_MR_FLAG_SYNC                         IB_UVERBS_ADVISE_MR_FLAG_SYNC
+#define IBV_ADVISE_MR_FLAG_WRITE_ACCESS                 IB_UVERBS_ADVISE_MR_FLAG_WRITE_ACCESS
+
 #define IBV_QPF_GRH_REQUIRED				IB_UVERBS_QPF_GRH_REQUIRED
 
 #endif