@@ -36,6 +36,7 @@ rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map"
cmd_fallback.c
cmd_flow_action.c
cmd_ioctl.c
+ cmd_mr.c
compat-1_0.c
device.c
dummy_ops.c
new file mode 100644
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <infiniband/cmd_ioctl.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+#include <infiniband/driver.h>
+
+int ibv_cmd_advise_mr(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sge)
+{
+ DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_MR,
+ UVERBS_METHOD_ADVISE_MR,
+ 4);
+
+ fill_attr_in_obj(cmd, UVERBS_ATTR_ADVISE_MR_PD_HANDLE, pd->handle);
+ fill_attr_const_in(cmd, UVERBS_ATTR_ADVISE_MR_ADVICE, advice);
+ fill_attr_in_uint32(cmd, UVERBS_ATTR_ADVISE_MR_FLAGS, flags);
+ fill_attr_in_ptr_array(cmd, UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+ sg_list, num_sge);
+
+ return execute_ioctl(pd->context, cmd);
+}
@@ -218,6 +218,11 @@ struct verbs_counters {
* Keep sorted.
*/
struct verbs_context_ops {
+ int (*advise_mr)(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sges);
struct ibv_dm *(*alloc_dm)(struct ibv_context *context,
struct ibv_alloc_dm_attr *attr);
struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type);
@@ -440,6 +445,11 @@ int ibv_cmd_rereg_mr(struct verbs_mr *vmr, uint32_t flags, void *addr,
size_t cmd_sz, struct ib_uverbs_rereg_mr_resp *resp,
size_t resp_sz);
int ibv_cmd_dereg_mr(struct verbs_mr *vmr);
+int ibv_cmd_advise_mr(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sge);
int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type,
struct ibv_mw *mw, struct ibv_alloc_mw *cmd,
size_t cmd_size,
@@ -33,6 +33,15 @@
#include "ibverbs.h"
#include <errno.h>
+static int advise_mr(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sges)
+{
+ return ENOSYS;
+}
+
static struct ibv_dm *alloc_dm(struct ibv_context *context,
struct ibv_alloc_dm_attr *attr)
{
@@ -436,6 +445,7 @@ static int resize_cq(struct ibv_cq *cq, int cqe)
* Keep sorted.
*/
const struct verbs_context_ops verbs_dummy_ops = {
+ advise_mr,
alloc_dm,
alloc_mw,
alloc_null_mr,
@@ -550,6 +560,7 @@ void verbs_set_ops(struct verbs_context *vctx,
} \
} while (0)
+ SET_OP(vctx, advise_mr);
SET_OP(vctx, alloc_dm);
SET_OP(ctx, alloc_mw);
SET_OP(vctx, alloc_null_mr);
@@ -120,6 +120,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ {
__ioctl_final_num_attrs;
_verbs_init_and_alloc_context;
execute_ioctl;
+ ibv_cmd_advise_mr;
ibv_cmd_alloc_dm;
ibv_cmd_alloc_mw;
ibv_cmd_alloc_pd;
@@ -1,4 +1,5 @@
rdma_man_pages(
+ ibv_advise_mr.3.md
ibv_alloc_dm.3
ibv_alloc_mw.3
ibv_alloc_null_mr.3.md
new file mode 100644
@@ -0,0 +1,126 @@
+---
+date: 2018-10-19
+footer: libibverbs
+header: "Libibverbs Programmer's Manual"
+layout: page
+license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md'
+section: 3
+title: IBV_ADVISE_MR
+---
+
+# NAME
+
+ibv_advise_mr - Gives advice or directions to the kernel about an
+ address range belongs to a memory region (MR).
+
+# SYNOPSIS
+
+```c
+#include <infiniband/verbs.h>
+
+int ibv_advise_mr(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sge)
+```
+
+# DESCRIPTION
+
+**ibv_advise_mr()** Give advice or directions to the kernel about an
+address range belonging to a memory region (MR).
+Applications that are aware of future access patterns can use this verb
+in order to leverage this knowledge to improve system or
+application performance.
+
+**Conventional advice values**
+
+*IBV_ADVISE_MR_ADVICE_PREFETCH*
+: Pre-fetch a range of an on-demand paging MR.
+ Make pages present with read-only permission before the actual IO is conducted.
+ This would provide a way to reduce latency by overlapping paging-in
+ and either compute time or IO to other ranges.
+
+*IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE*
+: Like IBV_ADVISE_MR_ADVICE_PREFETCH but with read-access
+ and write-access permission to the fetched memory.
+
+# ARGUMENTS
+*pd*
+: The protection domain (PD) associated with the MR.
+
+*advice*
+: The requested advise value (as listed above).
+
+*flags*
+: Describes the properties of the advise operation
+ **Conventional advice values**
+ *IBV_ADVISE_MR_FLAG_FLUSH*
+ : Request to be a synchronized operation. Return to the caller
+ after the operation is completed.
+
+*sg_list*
+: Pointer to the s/g array
+ When using IBV_ADVISE_OP_PREFETCH advise value, all the lkeys of all
+ the scatter gather elements (SGEs) must be associated with ODP MRs
+ (MRs that were registered with IBV_ACCESS_ON_DEMAND).
+
+*num_sge*
+: Number of elements in the s/g array
+
+# RETURN VALUE
+
+**ibv_advise_mr()** returns 0 when the call was successful, or the value
+ of errno on failure (which indicates the failure reason).
+
+*ENOSYS*
+: libibverbs or provider driver doesn't support the ibv_advise_mr() verb.
+
+*ENOTSUP*
+: The advise operation isn't supported.
+
+*EFAULT*
+: In one of the following:
+ o When the range requested is out of the MR bounds, or when parts of
+ it are not part of the process address space.
+ o One of the lkeys provided in the scatter gather list is invalid or
+ with wrong write access.
+
+*EINVAL*
+: In one of the following:
+ o The PD is invalid.
+ o The flags are invalid.
+
+# NOTES
+
+An application may pre-fetch any address range within an ODP MR when using the
+**IBV_ADVISE_MR_ADVICE_PREFETCH** or **IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE** advice.
+Semantically, this operation is best-effort. That means the kernel does not
+guarantee that underlying pages are updated in the HCA or the pre-fetched pages
+would remain resident.
+
+When using **IBV_ADVISE_MR_ADVICE_PREFETCH** or **IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE**
+advice, the operation will be done in the following stages:
+ o Page in the user pages to memory (pages aren't pinned).
+ o Get the dma mapping of these user pages.
+ o Post the underlying page translations to the HCA.
+
+If **IBV_ADVISE_MR_FLAG_FLUSH** is specified then the underlying pages are
+guaranteed to be updated in the HCA before returning SUCCESS.
+Otherwise the driver can choose to postpone the posting of the new translations
+to the HCA.
+When performing a local RDMA access operation it is recommended to use
+IBV_ADVISE_MR_FLAG_FLUSH flag with one of the pre-fetch advices to
+increase probability that the pages translations are valid in the HCA
+and avoid future page faults.
+
+# SEE ALSO
+
+**ibv_reg_mr**(3),
+**ibv_rereg_mr**(3),
+**ibv_dereg_mr**(3)
+
+# AUTHOR
+
+Aviad Yehezkel <aviadye@mellanox.com>
+
@@ -1786,6 +1786,11 @@ struct ibv_values_ex {
struct verbs_context {
/* "grows up" - new fields go here */
+ int (*advise_mr)(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sges);
struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd);
int (*read_counters)(struct ibv_counters *counters,
uint64_t *counters_value,
@@ -2210,6 +2215,29 @@ struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context);
int ibv_destroy_comp_channel(struct ibv_comp_channel *channel);
/**
+ * ibv_advise_mr - Gives advice about an address range in MRs
+ * @pd - protection domain of all MRs for which the advice is for
+ * @advice - type of advice
+ * @flags - advice modifiers
+ * @sg_list - an array of memory ranges
+ * @num_sge - number of elements in the array
+ */
+static inline int ibv_advise_mr(struct ibv_pd *pd,
+ enum ibv_advise_mr_advice advice,
+ uint32_t flags,
+ struct ibv_sge *sg_list,
+ uint32_t num_sge)
+{
+ struct verbs_context *vctx;
+
+ vctx = verbs_get_ctx_op(pd->context, advise_mr);
+ if (!vctx)
+ return ENOSYS;
+
+ return vctx->advise_mr(pd, advice, flags, sg_list, num_sge);
+}
+
+/**
* ibv_alloc_dm - Allocate device memory
* @context - Context DM will be attached to
* @attr - Attributes to allocate the DM with
@@ -85,6 +85,12 @@
#define ibv_flow_action_esp_encap ib_uverbs_flow_action_esp_encap
#define ibv_flow_action_esp ib_uverbs_flow_action_esp
+#define ibv_advise_mr_advice ib_uverbs_advise_mr_advice
+#define IBV_ADVISE_MR_ADVICE_PREFETCH IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH
+#define IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE
+
+#define IBV_ADVISE_MR_FLAG_FLUSH IB_UVERBS_ADVISE_MR_FLAG_FLUSH
+
#define IBV_QPF_GRH_REQUIRED IB_UVERBS_QPF_GRH_REQUIRED
#endif