@@ -121,6 +121,11 @@ IBVERBS_1.7 {
ibv_reg_mr_iova;
} IBVERBS_1.6;
+IBVERBS_1.8 {
+ global:
+ ibv_reg_mr_iova2;
+} IBVERBS_1.7;
+
/* If any symbols in this stanza change ABI then the entire staza gets a new symbol
version. See the top level CMakeLists.txt for this setting. */
@@ -43,6 +43,21 @@ describes the desired memory protection attributes; it is either 0 or the bitwis
.B IBV_ACCESS_ZERO_BASED\fR Use byte offset from beginning of MR to access this MR, instead of a pointer address
.TP
.B IBV_ACCESS_ON_DEMAND\fR Create an on-demand paging MR
+.TP
+.B IBV_ACCESS_RELAXED_ORDERING\fR Create an MR that uses relaxed ordering.
+
+Such MRs may reorder the scattering of Send/Write/Atomic operations to memory,
+so the target CPU and other RDMA operations may observe them in a
+different order than requested at the initiator. In addition, memory
+reads from such MR do not flush PCI-Express writes from peer devices.
+
+It is only safe to read a data buffer that was written to a relaxed MR after
+observing the approperiate completion, or after observing a subsequent atomic
+operation on a strongly ordered MR.
+
+Relaxed MRs can improve performance on some systems. On systems/drivers without
+adequate support this flag is ignored.
+
.PP
If
.B IBV_ACCESS_REMOTE_WRITE
@@ -296,6 +296,7 @@ LATEST_SYMVER_FUNC(ibv_dealloc_pd, 1_1, "IBVERBS_1.1",
return get_ops(pd->context)->dealloc_pd(pd);
}
+#undef ibv_reg_mr
LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
struct ibv_mr *,
struct ibv_pd *pd, void *addr,
@@ -319,6 +320,8 @@ LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
return mr;
}
+
+#undef ibv_reg_mr_iova
struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
uint64_t iova, int access)
{
@@ -339,6 +342,18 @@ struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
return mr;
}
+LATEST_SYMVER_FUNC(ibv_reg_mr_iova2, 1_8, "IBVERBS_1.8",
+ struct ibv_mr *,
+ struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t iova, int access)
+{
+ /* TODO check whether kernel supports optional
+ * IBV_ACCESS_RELAXED_ORDERING and clear it if it is not supported
+ */
+
+ return ibv_reg_mr_iova(pd, addr, length, iova, access);
+}
+
LATEST_SYMVER_FUNC(ibv_rereg_mr, 1_1, "IBVERBS_1.1",
int,
struct ibv_mr *mr, int flags,
@@ -580,6 +580,7 @@ enum ibv_access_flags {
IBV_ACCESS_MW_BIND = (1<<4),
IBV_ACCESS_ZERO_BASED = (1<<5),
IBV_ACCESS_ON_DEMAND = (1<<6),
+ IBV_ACCESS_RELAXED_ORDERING = (1<<8),
};
struct ibv_mw_bind_info {
@@ -2387,6 +2388,18 @@ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
*/
struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
size_t length, int access);
+/* Use the new version of ibv_reg_mr only if flags that require it are used. */
+#define ibv_reg_mr(pd, addr, length, access) ({\
+ const int optional_access_flags = IBV_ACCESS_RELAXED_ORDERING; \
+ struct ibv_mr *__mr; \
+ \
+ if (__builtin_constant_p(access) && \
+ !((access) & optional_access_flags)) \
+ __mr = ibv_reg_mr(pd, addr, length, access); \
+ else \
+ __mr = ibv_reg_mr_iova2(pd, addr, length, (uintptr_t)addr, \
+ access); \
+ __mr; })
/**
* ibv_reg_mr_iova - Register a memory region with a virtual offset
@@ -2394,7 +2407,26 @@ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
*/
struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
uint64_t iova, int access);
-
+/* Use the new version of ibv_reg_mr only if flags that require it are used. */
+#define ibv_reg_mr_iova(pd, addr, length, iova, access) ({\
+ const int optional_access_flags = IBV_ACCESS_RELAXED_ORDERING; \
+ struct ibv_mr *__mr; \
+ \
+ if (__builtin_constant_p(access) && \
+ !((access) & optional_access_flags)) \
+ __mr = ibv_reg_mr_iova(pd, addr, length, iova, access); \
+ else \
+ __mr = ibv_reg_mr_iova2(pd, addr, length, iova, access); \
+ __mr; })
+
+/**
+ * ibv_reg_mr_iova2 - Register a memory region with a virtual offset
+ *
+ * This version of ibv_reg_mr_iova can ignore optional access flags such as
+ * IBV_ACCESS_RELAXED_ORDERING if they are not supported.
+ */
+struct ibv_mr *ibv_reg_mr_iova2(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t iova, int access);
enum ibv_rereg_mr_err_code {
/* Old MR is valid, invalid input */
@@ -454,7 +454,8 @@ enum {
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_ATOMIC |
- IBV_ACCESS_ZERO_BASED
+ IBV_ACCESS_ZERO_BASED |
+ IBV_ACCESS_RELAXED_ORDERING
};
struct ibv_mr *mlx5_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *ibdm,
Add flag to allow the creation of relaxed ordering memory regions. Access through such MRs can improve performance by allowing the system to reorder certain memory accesses. As relaxed ordering is an optimization, drivers that do not support it can simply ignore it, and it is recommended that users enable it whenever completions are used to synchronize with the HCA. We replace the symbols ibv_reg_mr/ibv_reg_mr_iova with a new symbol (ibv_reg_mr_iova2). The new function would check against the kernel whether relaxed ordering is supported or not, and disable it if necessary. Running against an older library will fail with a linker error instead of an obscure EINVAL from the kernel. Signed-off-by: Haggai Eran <haggaie@mellanox.com> --- libibverbs/libibverbs.map.in | 5 +++++ libibverbs/man/ibv_reg_mr.3 | 15 +++++++++++++++ libibverbs/verbs.c | 15 +++++++++++++++ libibverbs/verbs.h | 34 +++++++++++++++++++++++++++++++++- providers/mlx5/verbs.c | 3 ++- 5 files changed, 70 insertions(+), 2 deletions(-)