From patchwork Sun Jul 5 08:19:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644203 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C5E5118B5 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B4F27206B6 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726480AbgGEIUP (ORCPT ); Sun, 5 Jul 2020 04:20:15 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33111 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726491AbgGEIUM (ORCPT ); Sun, 5 Jul 2020 04:20:12 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5D1001673; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5QD009278; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K52f009276; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com Subject: [PATCH V1 rdma-core 03/13] verbs: Enhance async FD usage Date: Sun, 5 Jul 2020 11:19:39 +0300 Message-Id: <1593937189-8744-4-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Enhance async FD usage by adding an option to enforce it be a mandatory attribute in a given device once the applicable objects are created (e.g.QP, CQ, etc.). This is some preparation step before introducing in the next patch the ibv_import_device() verb which must use its own async FD to be able to get events on its private created objects. In addition, we moved ibv_cmd_alloc_async_fd() to be called as part of ibv_open_device() in case wasn't allocated yet (i.e. ioctl flow), this will be aligned with the import device flow that will be introduced. Signed-off-by: Yishai Hadas --- libibverbs/cmd_cq.c | 9 +++++++-- libibverbs/cmd_device.c | 5 +---- libibverbs/cmd_qp.c | 4 ++++ libibverbs/cmd_srq.c | 4 ++++ libibverbs/cmd_wq.c | 4 ++++ libibverbs/device.c | 8 ++++++++ libibverbs/driver.h | 1 + libibverbs/ibverbs.h | 1 + 8 files changed, 30 insertions(+), 6 deletions(-) diff --git a/libibverbs/cmd_cq.c b/libibverbs/cmd_cq.c index ec6f0e6..e8113d4 100644 --- a/libibverbs/cmd_cq.c +++ b/libibverbs/cmd_cq.c @@ -31,6 +31,7 @@ */ #include +#include "ibverbs.h" static int ibv_icmd_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector, @@ -38,6 +39,7 @@ static int ibv_icmd_create_cq(struct ibv_context *context, int cqe, struct ibv_command_buffer *link) { DECLARE_FBCMD_BUFFER(cmdb, UVERBS_OBJECT_CQ, UVERBS_METHOD_CQ_CREATE, 8, link); + struct verbs_ex_private *priv = get_priv(context); struct ib_uverbs_attr *handle; struct ib_uverbs_attr *async_fd_attr; uint32_t resp_cqe; @@ -54,8 +56,11 @@ static int ibv_icmd_create_cq(struct ibv_context *context, int cqe, fill_attr_in_fd(cmdb, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, channel->fd); fill_attr_in_uint32(cmdb, UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, comp_vector); async_fd_attr = fill_attr_in_fd(cmdb, UVERBS_ATTR_CREATE_CQ_EVENT_FD, context->async_fd); - /* Prevent fallback to the 'write' mode if kernel doesn't support it */ - attr_optional(async_fd_attr); + if (priv->imported) + fallback_require_ioctl(cmdb); + else + /* Prevent fallback to the 'write' mode if kernel doesn't support it */ + attr_optional(async_fd_attr); if (flags) { fallback_require_ex(cmdb); diff --git a/libibverbs/cmd_device.c b/libibverbs/cmd_device.c index 4de59c0..648cc0b 100644 --- a/libibverbs/cmd_device.c +++ b/libibverbs/cmd_device.c @@ -99,7 +99,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num, return 0; } -static int cmd_alloc_async_fd(struct ibv_context *context) +int ibv_cmd_alloc_async_fd(struct ibv_context *context) { DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_ASYNC_EVENT, UVERBS_METHOD_ASYNC_EVENT_ALLOC, 1); @@ -153,9 +153,6 @@ static int cmd_get_context(struct verbs_context *context_ex, return 0; } case SUCCESS: - ret = cmd_alloc_async_fd(context); - if (ret) - return ret; break; default: return ret; diff --git a/libibverbs/cmd_qp.c b/libibverbs/cmd_qp.c index a11bb98..567984d 100644 --- a/libibverbs/cmd_qp.c +++ b/libibverbs/cmd_qp.c @@ -76,6 +76,7 @@ static int ibv_icmd_create_qp(struct ibv_context *context, struct ibv_command_buffer *link) { DECLARE_FBCMD_BUFFER(cmdb, UVERBS_OBJECT_QP, UVERBS_METHOD_QP_CREATE, 15, link); + struct verbs_ex_private *priv = get_priv(context); struct ib_uverbs_attr *handle; uint32_t qp_num; uint32_t pd_handle; @@ -172,6 +173,9 @@ static int ibv_icmd_create_qp(struct ibv_context *context, fill_attr_in_ptr(cmdb, UVERBS_ATTR_CREATE_QP_CAP, &attr_ex->cap); fill_attr_in_fd(cmdb, UVERBS_ATTR_CREATE_QP_EVENT_FD, context->async_fd); + if (priv->imported) + fallback_require_ioctl(cmdb); + if (attr_ex->sq_sig_all) create_flags |= IB_UVERBS_QP_CREATE_SQ_SIG_ALL; diff --git a/libibverbs/cmd_srq.c b/libibverbs/cmd_srq.c index 4e63046..dfaaa6a 100644 --- a/libibverbs/cmd_srq.c +++ b/libibverbs/cmd_srq.c @@ -53,6 +53,7 @@ static int ibv_icmd_create_srq(struct ibv_pd *pd, struct verbs_srq *vsrq, struct ibv_command_buffer *link) { DECLARE_FBCMD_BUFFER(cmdb, UVERBS_OBJECT_SRQ, UVERBS_METHOD_SRQ_CREATE, 13, link); + struct verbs_ex_private *priv = get_priv(pd->context); struct ib_uverbs_attr *handle; uint32_t max_wr; uint32_t max_sge; @@ -107,6 +108,9 @@ static int ibv_icmd_create_srq(struct ibv_pd *pd, struct verbs_srq *vsrq, fill_attr_out_ptr(cmdb, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR, &max_wr); fill_attr_out_ptr(cmdb, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE, &max_sge); + if (priv->imported) + fallback_require_ioctl(cmdb); + switch (execute_ioctl_fallback(srq->context, create_srq, cmdb, &ret)) { case TRY_WRITE: { if (attr_ex->srq_type == IBV_SRQT_BASIC && abi_ver > 5) { diff --git a/libibverbs/cmd_wq.c b/libibverbs/cmd_wq.c index d233c3a..855fc04 100644 --- a/libibverbs/cmd_wq.c +++ b/libibverbs/cmd_wq.c @@ -31,6 +31,7 @@ */ #include +#include "ibverbs.h" static int ibv_icmd_create_wq(struct ibv_context *context, struct ibv_wq_init_attr *wq_init_attr, @@ -38,6 +39,7 @@ static int ibv_icmd_create_wq(struct ibv_context *context, struct ibv_command_buffer *link) { DECLARE_FBCMD_BUFFER(cmdb, UVERBS_OBJECT_WQ, UVERBS_METHOD_WQ_CREATE, 13, link); + struct verbs_ex_private *priv = get_priv(context); struct ib_uverbs_attr *handle; uint32_t max_wr; uint32_t max_sge; @@ -62,6 +64,8 @@ static int ibv_icmd_create_wq(struct ibv_context *context, fill_attr_out_ptr(cmdb, UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE, &max_sge); fill_attr_out_ptr(cmdb, UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM, &wq_num); + if (priv->imported) + fallback_require_ioctl(cmdb); fallback_require_ex(cmdb); switch (execute_ioctl_fallback(context, create_wq, cmdb, &ret)) { diff --git a/libibverbs/device.c b/libibverbs/device.c index 629832e..4ef2a6a 100644 --- a/libibverbs/device.c +++ b/libibverbs/device.c @@ -344,6 +344,7 @@ struct ibv_context *verbs_open_device(struct ibv_device *device, void *private_d struct verbs_device *verbs_device = verbs_get_device(device); int cmd_fd; struct verbs_context *context_ex; + int ret; /* * We'll only be doing writes, but we need O_RDWR in case the @@ -363,6 +364,13 @@ struct ibv_context *verbs_open_device(struct ibv_device *device, void *private_d return NULL; set_lib_ops(context_ex); + if (context_ex->context.async_fd == -1) { + ret = ibv_cmd_alloc_async_fd(&context_ex->context); + if (ret) { + ibv_close_device(&context_ex->context); + return NULL; + } + } return &context_ex->context; } diff --git a/libibverbs/driver.h b/libibverbs/driver.h index 9f9933d..a1b241b 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -453,6 +453,7 @@ int ibv_cmd_query_device_ex(struct ibv_context *context, int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr, struct ibv_query_port *cmd, size_t cmd_size); +int ibv_cmd_alloc_async_fd(struct ibv_context *context); int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd, struct ibv_alloc_pd *cmd, size_t cmd_size, struct ib_uverbs_alloc_pd_resp *resp, size_t resp_size); diff --git a/libibverbs/ibverbs.h b/libibverbs/ibverbs.h index 4b9b88f..6703c10 100644 --- a/libibverbs/ibverbs.h +++ b/libibverbs/ibverbs.h @@ -74,6 +74,7 @@ struct verbs_ex_private { uint32_t driver_id; bool use_ioctl_write; struct verbs_context_ops ops; + bool imported; }; static inline struct verbs_ex_private *get_priv(struct ibv_context *ctx) From patchwork Sun Jul 5 08:19:40 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644197 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3C05F17E1 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2AD6D206B6 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726519AbgGEIUO (ORCPT ); Sun, 5 Jul 2020 04:20:14 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33105 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726303AbgGEIUN (ORCPT ); Sun, 5 Jul 2020 04:20:13 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5xD001675; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5ri009283; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5EM009282; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com Subject: [PATCH V1 rdma-core 04/13] verbs: Introduce ibv_import_device() verb Date: Sun, 5 Jul 2020 11:19:40 +0300 Message-Id: <1593937189-8744-5-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Introduce ibv_import_device(), it enable a process to get an ibv_context that is associated with a given command FD that it owns. A process is creating a device and then uses some of the Linux systems calls to dup its 'cmd_fd' member and lets other process to obtain owning on. Once other process obtains the 'cmd_fd' it can call ibv_import_device() which returns an ibv_contxet on the original RDMA device. A detailed man page as part of this patch describes the expected usage and flow. Signed-off-by: Yishai Hadas --- debian/libibverbs1.symbols | 1 + libibverbs/device.c | 62 +++++++++++++++++++++++++++++++++++ libibverbs/driver.h | 2 ++ libibverbs/libibverbs.map.in | 1 + libibverbs/man/CMakeLists.txt | 1 + libibverbs/man/ibv_import_device.3.md | 48 +++++++++++++++++++++++++++ libibverbs/verbs.h | 5 +++ 7 files changed, 120 insertions(+) create mode 100644 libibverbs/man/ibv_import_device.3.md diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols index bb7eac9..1ba5453 100644 --- a/debian/libibverbs1.symbols +++ b/debian/libibverbs1.symbols @@ -67,6 +67,7 @@ libibverbs.so.1 libibverbs1 #MINVER# ibv_get_device_name@IBVERBS_1.1 1.1.6 ibv_get_pkey_index@IBVERBS_1.5 20 ibv_get_sysfs_path@IBVERBS_1.0 1.1.6 + ibv_import_device@IBVERBS_1.10 31 ibv_init_ah_from_wc@IBVERBS_1.1 1.1.6 ibv_modify_qp@IBVERBS_1.0 1.1.6 ibv_modify_qp@IBVERBS_1.1 1.1.6 diff --git a/libibverbs/device.c b/libibverbs/device.c index 4ef2a6a..e9c429a 100644 --- a/libibverbs/device.c +++ b/libibverbs/device.c @@ -382,6 +382,68 @@ LATEST_SYMVER_FUNC(ibv_open_device, 1_1, "IBVERBS_1.1", return verbs_open_device(device, NULL); } +struct ibv_context *ibv_import_device(int cmd_fd) +{ + struct verbs_device *verbs_device = NULL; + struct verbs_context *context_ex; + struct ibv_device **dev_list; + struct ibv_context *ctx = NULL; + struct stat st; + int ret; + int i; + + if (fstat(cmd_fd, &st) || !S_ISCHR(st.st_mode)) { + errno = EINVAL; + return NULL; + } + + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + errno = ENODEV; + return NULL; + } + + for (i = 0; dev_list[i]; ++i) { + if (verbs_get_device(dev_list[i])->sysfs->sysfs_cdev == + st.st_rdev) { + verbs_device = verbs_get_device(dev_list[i]); + break; + } + } + + if (!verbs_device) { + errno = ENODEV; + goto out; + } + + if (!verbs_device->ops->import_context) { + errno = EOPNOTSUPP; + goto out; + } + + /* In case the underlay cdev number was assigned in the meantime to + * other device as of some disassociate flow, the next call on the + * FD will end up with EIO (i.e. query_context command) and we should + * be safe from using the wrong device. + */ + context_ex = verbs_device->ops->import_context(&verbs_device->device, cmd_fd); + if (!context_ex) + goto out; + + set_lib_ops(context_ex); + + context_ex->priv->imported = true; + ctx = &context_ex->context; + ret = ibv_cmd_alloc_async_fd(ctx); + if (ret) { + ibv_close_device(ctx); + ctx = NULL; + } +out: + ibv_free_device_list(dev_list); + return ctx; +} + void verbs_uninit_context(struct verbs_context *context_ex) { free(context_ex->priv); diff --git a/libibverbs/driver.h b/libibverbs/driver.h index a1b241b..48eace4 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -216,6 +216,8 @@ struct verbs_device_ops { struct verbs_context *(*alloc_context)(struct ibv_device *device, int cmd_fd, void *private_data); + struct verbs_context *(*import_context)(struct ibv_device *device, + int cmd_fd); struct verbs_device *(*alloc_device)(struct verbs_sysfs_dev *sysfs_dev); void (*uninit_device)(struct verbs_device *device); diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index 61b6a80..be75717 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -133,6 +133,7 @@ IBVERBS_1.9 { IBVERBS_1.10 { global: + ibv_import_device; ibv_query_ece; ibv_set_ece; } IBVERBS_1.9; diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt index a9cf61e..06b5b49 100644 --- a/libibverbs/man/CMakeLists.txt +++ b/libibverbs/man/CMakeLists.txt @@ -37,6 +37,7 @@ rdma_man_pages( ibv_get_device_name.3.md ibv_get_pkey_index.3.md ibv_get_srq_num.3.md + ibv_import_device.3.md ibv_inc_rkey.3.md ibv_modify_qp.3 ibv_modify_qp_rate_limit.3 diff --git a/libibverbs/man/ibv_import_device.3.md b/libibverbs/man/ibv_import_device.3.md new file mode 100644 index 0000000..601b50a --- /dev/null +++ b/libibverbs/man/ibv_import_device.3.md @@ -0,0 +1,48 @@ +--- +date: 2020-5-3 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: ibv_import_device +--- + +# NAME + +ibv_import_device - import a device from a given comamnd FD + +# SYNOPSIS + +```c +#include + +struct ibv_context *ibv_import_device(int cmd_fd); + +``` + + +# DESCRIPTION + +**ibv_import_device()** returns an *ibv_context* pointer that is associated with the given +*cmd_fd*. + +The *cmd_fd* is obtained from the ibv_context cmd_fd member, which must be dup'd (eg by dup(), SCM_RIGHTS, etc) +before being passed to ibv_import_device(). + +Once the *ibv_context* usage has been ended *ibv_close_device()* should be called. +This call may cleanup whatever is needed/opposite of the import including closing the command FD. + +# RETURN VALUE + +**ibv_import_device()** returns a pointer to the allocated RDMA context, or NULL if the request fails. + +# SEE ALSO + +**ibv_open_device**(3), +**ibv_close_device**(3), + +# AUTHOR + +Yishai Hadas + diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index f72c1ab..e1247b8 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -2242,6 +2242,11 @@ struct ibv_context *ibv_open_device(struct ibv_device *device); int ibv_close_device(struct ibv_context *context); /** + * ibv_import_device - Import device + */ +struct ibv_context *ibv_import_device(int cmd_fd); + +/** * ibv_get_async_event - Get next async event * @event: Pointer to use to return async event * From patchwork Sun Jul 5 08:19:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644213 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3D9A3739 for ; Sun, 5 Jul 2020 08:21:18 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 27C77206BE for ; Sun, 5 Jul 2020 08:21:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726280AbgGEIVR (ORCPT ); Sun, 5 Jul 2020 04:21:17 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33109 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726582AbgGEIVR (ORCPT ); Sun, 5 Jul 2020 04:21:17 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5bW001688; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K53m009291; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5UC009288; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com Subject: [PATCH V1 rdma-core 05/13] mlx5: Refactor mlx5_alloc_context() Date: Sun, 5 Jul 2020 11:19:41 +0300 Message-Id: <1593937189-8744-6-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org This patch refactors mlx5_alloc_context() by splitting its functionality to few parts, init context, get context command, set context, uninit context. As part of splitting, cleaned up some redundant outlen checks. The above is some preparation step to enable implementing mlx5_import_context() by sharing the applicable parts. Signed-off-by: Yishai Hadas --- providers/mlx5/mlx5.c | 274 ++++++++++++++++++++++++++++---------------------- 1 file changed, 154 insertions(+), 120 deletions(-) diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c index 805be2c..cb9ed60 100644 --- a/providers/mlx5/mlx5.c +++ b/providers/mlx5/mlx5.c @@ -1268,17 +1268,17 @@ repeat: static void adjust_uar_info(struct mlx5_device *mdev, struct mlx5_context *context, - struct mlx5_alloc_ucontext_resp resp) + struct mlx5_ib_alloc_ucontext_resp *resp) { - if (!resp.log_uar_size && !resp.num_uars_per_page) { + if (!resp->log_uar_size && !resp->num_uars_per_page) { /* old kernel */ context->uar_size = mdev->page_size; context->num_uars_per_page = 1; return; } - context->uar_size = 1 << resp.log_uar_size; - context->num_uars_per_page = resp.num_uars_per_page; + context->uar_size = 1 << resp->log_uar_size; + context->num_uars_per_page = resp->num_uars_per_page; } bool mlx5dv_is_supported(struct ibv_device *device) @@ -1297,120 +1297,108 @@ mlx5dv_open_device(struct ibv_device *device, struct mlx5dv_context_attr *attr) return verbs_open_device(device, attr); } -static struct verbs_context *mlx5_alloc_context(struct ibv_device *ibdev, +static int get_uar_info(struct mlx5_device *mdev, + int *tot_uuars, int *low_lat_uuars) +{ + *tot_uuars = get_total_uuars(mdev->page_size); + if (*tot_uuars < 0) { + errno = -*tot_uuars; + return -1; + } + + *low_lat_uuars = get_num_low_lat_uuars(*tot_uuars); + if (*low_lat_uuars < 0) { + errno = -*low_lat_uuars; + return -1; + } + + if (*low_lat_uuars > *tot_uuars - 1) { + errno = ENOMEM; + return -1; + } + + return 0; +} + +static void mlx5_uninit_context(struct mlx5_context *context) +{ + close_debug_file(context); + + verbs_uninit_context(&context->ibv_ctx); + free(context); +} + +static struct mlx5_context *mlx5_init_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) { - struct mlx5_context *context; - struct mlx5_alloc_ucontext req; - struct mlx5_alloc_ucontext_resp resp; - int i; - int page_size; - int tot_uuars; - int low_lat_uuars; - int gross_uuars; - int j; - struct mlx5_device *mdev = to_mdev(ibdev); - struct verbs_context *v_ctx; - struct ibv_port_attr port_attr; - struct ibv_device_attr_ex device_attr; - int k; - int bfi; - int num_sys_page_map; - struct mlx5dv_context_attr *ctx_attr = private_data; - bool always_devx = false; + struct mlx5dv_context_attr *ctx_attr = private_data; + struct mlx5_device *mdev = to_mdev(ibdev); + struct mlx5_context *context; + int low_lat_uuars; + int tot_uuars; + int ret; if (ctx_attr && ctx_attr->comp_mask) { errno = EINVAL; return NULL; } + ret = get_uar_info(mdev, &tot_uuars, &low_lat_uuars); + if (ret) + return NULL; + context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, RDMA_DRIVER_MLX5); if (!context) return NULL; - v_ctx = &context->ibv_ctx; - page_size = mdev->page_size; - mlx5_single_threaded = single_threaded_app(); - open_debug_file(context); set_debug_mask(); set_freeze_on_error(); if (gethostname(context->hostname, sizeof(context->hostname))) strcpy(context->hostname, "host_unknown"); - tot_uuars = get_total_uuars(page_size); - if (tot_uuars < 0) { - errno = -tot_uuars; - goto err_free; - } - - low_lat_uuars = get_num_low_lat_uuars(tot_uuars); - if (low_lat_uuars < 0) { - errno = -low_lat_uuars; - goto err_free; - } - - if (low_lat_uuars > tot_uuars - 1) { - errno = ENOMEM; - goto err_free; - } - - memset(&req, 0, sizeof(req)); - memset(&resp, 0, sizeof(resp)); - - req.total_num_bfregs = tot_uuars; - req.num_low_latency_bfregs = low_lat_uuars; - req.max_cqe_version = MLX5_CQE_VERSION_V1; - req.lib_caps |= (MLX5_LIB_CAP_4K_UAR | MLX5_LIB_CAP_DYN_UAR); - if (ctx_attr && ctx_attr->flags) { - - if (!check_comp_mask(ctx_attr->flags, - MLX5DV_CONTEXT_FLAGS_DEVX)) { - errno = EINVAL; - goto err_free; - } - - req.flags = MLX5_IB_ALLOC_UCTX_DEVX; - } else { - req.flags = MLX5_IB_ALLOC_UCTX_DEVX; - always_devx = true; - } + mlx5_single_threaded = single_threaded_app(); + context->tot_uuars = tot_uuars; + context->low_lat_uuars = low_lat_uuars; -retry_open: - if (mlx5_cmd_get_context(context, &req, sizeof(req), &resp, - sizeof(resp))) { - if (always_devx) { - req.flags &= ~MLX5_IB_ALLOC_UCTX_DEVX; - always_devx = false; - memset(&resp, 0, sizeof(resp)); - goto retry_open; - } else { - goto err_free; - } - } + return context; +} - context->max_num_qps = resp.qp_tab_size; - context->bf_reg_size = resp.bf_reg_size; - context->tot_uuars = resp.tot_bfregs; - context->low_lat_uuars = low_lat_uuars; - context->cache_line_size = resp.cache_line_size; - context->max_sq_desc_sz = resp.max_sq_desc_sz; - context->max_rq_desc_sz = resp.max_rq_desc_sz; - context->max_send_wqebb = resp.max_send_wqebb; - context->num_ports = resp.num_ports; - context->max_recv_wr = resp.max_recv_wr; - context->max_srq_recv_wr = resp.max_srq_recv_wr; - context->num_dyn_bfregs = resp.num_dyn_bfregs; - - if (resp.comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE) +static int mlx5_set_context(struct mlx5_context *context, + struct mlx5_ib_alloc_ucontext_resp *resp) +{ + struct verbs_context *v_ctx = &context->ibv_ctx; + struct ibv_port_attr port_attr = {}; + struct ibv_device_attr_ex device_attr = {}; + int cmd_fd = v_ctx->context.cmd_fd; + struct mlx5_device *mdev = to_mdev(v_ctx->context.device); + struct ibv_device *ibdev = v_ctx->context.device; + int page_size = mdev->page_size; + int num_sys_page_map; + int gross_uuars; + int bfi; + int i, k, j; + + context->max_num_qps = resp->qp_tab_size; + context->bf_reg_size = resp->bf_reg_size; + context->cache_line_size = resp->cache_line_size; + context->max_sq_desc_sz = resp->max_sq_desc_sz; + context->max_rq_desc_sz = resp->max_rq_desc_sz; + context->max_send_wqebb = resp->max_send_wqebb; + context->num_ports = resp->num_ports; + context->max_recv_wr = resp->max_recv_wr; + context->max_srq_recv_wr = resp->max_srq_recv_wr; + context->num_dyn_bfregs = resp->num_dyn_bfregs; + + if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE) context->flags |= MLX5_CTX_FLAGS_ECE_SUPPORTED; - if (resp.comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY) { - context->dump_fill_mkey = resp.dump_fill_mkey; + if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY) { + context->dump_fill_mkey = resp->dump_fill_mkey; /* Have the BE value ready to be used in data path */ - context->dump_fill_mkey_be = htobe32(resp.dump_fill_mkey); + context->dump_fill_mkey_be = htobe32(resp->dump_fill_mkey); } else { /* kernel driver will never return MLX5_INVALID_LKEY for * dump_fill_mkey @@ -1419,19 +1407,18 @@ retry_open: context->dump_fill_mkey_be = htobe32(MLX5_INVALID_LKEY); } - context->cqe_version = resp.cqe_version; - + context->cqe_version = resp->cqe_version; adjust_uar_info(mdev, context, resp); - context->cmds_supp_uhw = resp.cmds_supp_uhw; + context->cmds_supp_uhw = resp->cmds_supp_uhw; context->vendor_cap_flags = 0; list_head_init(&context->dyn_uar_bf_list); list_head_init(&context->dyn_uar_nc_list); list_head_init(&context->dyn_uar_qp_shared_list); list_head_init(&context->dyn_uar_qp_dedicated_list); - if (resp.eth_min_inline) - context->eth_min_inline_size = (resp.eth_min_inline == MLX5_USER_INLINE_MODE_NONE) ? + if (resp->eth_min_inline) + context->eth_min_inline_size = (resp->eth_min_inline == MLX5_USER_INLINE_MODE_NONE) ? 0 : MLX5_ETH_L2_INLINE_HEADER_SIZE; else context->eth_min_inline_size = MLX5_ETH_L2_INLINE_HEADER_SIZE; @@ -1453,7 +1440,8 @@ retry_open: context->prefer_bf = get_always_bf(); context->shut_up_bf = get_shut_up_bf(); - if (context->tot_uuars) { + if (resp->tot_bfregs) { + context->tot_uuars = resp->tot_bfregs; gross_uuars = context->tot_uuars / MLX5_NUM_NON_FP_BFREGS_PER_UAR * NUM_BFREGS_PER_UAR; context->bfs = calloc(gross_uuars, sizeof(*context->bfs)); if (!context->bfs) { @@ -1462,8 +1450,8 @@ retry_open: } context->flags |= MLX5_CTX_FLAGS_NO_KERN_DYN_UAR; } else { - context->qp_max_dedicated_uuars = low_lat_uuars; - context->qp_max_shared_uuars = tot_uuars - low_lat_uuars; + context->qp_max_dedicated_uuars = context->low_lat_uuars; + context->qp_max_shared_uuars = context->tot_uuars - context->low_lat_uuars; goto bf_done; } @@ -1491,9 +1479,9 @@ retry_open: if (bfi) context->bfs[bfi].buf_size = context->bf_reg_size / 2; context->bfs[bfi].uuarn = bfi; - context->bfs[bfi].uar_mmap_offset = get_uar_mmap_offset(i, - page_size, - uar_type_to_cmd(context->uar[i].type)); + context->bfs[bfi].uar_mmap_offset = + get_uar_mmap_offset(i, page_size, + uar_type_to_cmd(context->uar[i].type)); } } } @@ -1501,23 +1489,16 @@ retry_open: bf_done: context->hca_core_clock = NULL; - if (resp.response_length + sizeof(resp.ibv_resp) >= - offsetof(struct mlx5_alloc_ucontext_resp, hca_core_clock_offset) + - sizeof(resp.hca_core_clock_offset) && - resp.comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET) { - context->core_clock.offset = resp.hca_core_clock_offset; + if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET) { + context->core_clock.offset = resp->hca_core_clock_offset; mlx5_map_internal_clock(mdev, &v_ctx->context); } context->clock_info_page = NULL; - if (resp.response_length + sizeof(resp.ibv_resp) >= - offsetof(struct mlx5_alloc_ucontext_resp, clock_info_versions) + - sizeof(resp.clock_info_versions) && - (resp.clock_info_versions & (1 << MLX5_IB_CLOCK_INFO_V1))) { + if ((resp->clock_info_versions & (1 << MLX5_IB_CLOCK_INFO_V1))) mlx5_map_clock_info(mdev, &v_ctx->context); - } - context->flow_action_flags = resp.flow_action_flags; + context->flow_action_flags = resp->flow_action_flags; mlx5_read_env(ibdev, context); @@ -1532,7 +1513,6 @@ bf_done: goto err_free; } - memset(&device_attr, 0, sizeof(device_attr)); if (!mlx5_query_device_ex(&v_ctx->context, NULL, &device_attr, sizeof(struct ibv_device_attr_ex))) { context->cached_device_cap_flags = @@ -1554,7 +1534,7 @@ bf_done: MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC); context->cq_uar_reg = context->cq_uar ? context->cq_uar->uar : context->uar[0].reg; - return v_ctx; + return 0; err_free_bf: free(context->bfs); @@ -1564,10 +1544,64 @@ err_free: if (context->uar[i].reg) munmap(context->uar[i].reg, page_size); } - close_debug_file(context); - verbs_uninit_context(&context->ibv_ctx); - free(context); + return -1; +} + +static struct verbs_context *mlx5_alloc_context(struct ibv_device *ibdev, + int cmd_fd, + void *private_data) +{ + struct mlx5_context *context; + struct mlx5_alloc_ucontext req = {}; + struct mlx5_alloc_ucontext_resp resp = {}; + struct mlx5dv_context_attr *ctx_attr = private_data; + bool always_devx = false; + int ret; + + context = mlx5_init_context(ibdev, cmd_fd, NULL); + if (!context) + return NULL; + + req.total_num_bfregs = context->tot_uuars; + req.num_low_latency_bfregs = context->low_lat_uuars; + req.max_cqe_version = MLX5_CQE_VERSION_V1; + req.lib_caps |= (MLX5_LIB_CAP_4K_UAR | MLX5_LIB_CAP_DYN_UAR); + if (ctx_attr && ctx_attr->flags) { + + if (!check_comp_mask(ctx_attr->flags, + MLX5DV_CONTEXT_FLAGS_DEVX)) { + errno = EINVAL; + goto err; + } + + req.flags = MLX5_IB_ALLOC_UCTX_DEVX; + } else { + req.flags = MLX5_IB_ALLOC_UCTX_DEVX; + always_devx = true; + } + +retry_open: + if (mlx5_cmd_get_context(context, &req, sizeof(req), &resp, + sizeof(resp))) { + if (always_devx) { + req.flags &= ~MLX5_IB_ALLOC_UCTX_DEVX; + always_devx = false; + memset(&resp, 0, sizeof(resp)); + goto retry_open; + } else { + goto err; + } + } + + ret = mlx5_set_context(context, &resp.drv_payload); + if (ret) + goto err; + + return &context->ibv_ctx; + +err: + mlx5_uninit_context(context); return NULL; } From patchwork Sun Jul 5 08:19:44 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644211 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A718813B6 for ; Sun, 5 Jul 2020 08:20:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 99FAC206BE for ; Sun, 5 Jul 2020 08:20:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726491AbgGEIUQ (ORCPT ); Sun, 5 Jul 2020 04:20:16 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33107 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726565AbgGEIUM (ORCPT ); Sun, 5 Jul 2020 04:20:12 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5hs001700; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5QL009306; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5Q0009305; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com Subject: [PATCH V1 rdma-core 08/13] mlx5: Implement the import/unimport PD verbs Date: Sun, 5 Jul 2020 11:19:44 +0300 Message-Id: <1593937189-8744-9-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Implement the import/unimport PD verbs based on their definition as described in the man page. It uses the query PD ioctl command to retrieve the original PD properties based on its given handle. Signed-off-by: Yishai Hadas --- providers/mlx5/mlx5.c | 2 ++ providers/mlx5/mlx5.h | 3 +++ providers/mlx5/verbs.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c index 648e220..ea55e0f 100644 --- a/providers/mlx5/mlx5.c +++ b/providers/mlx5/mlx5.c @@ -144,6 +144,7 @@ static const struct verbs_context_ops mlx5_ctx_common_ops = { .destroy_wq = mlx5_destroy_wq, .free_dm = mlx5_free_dm, .get_srq_num = mlx5_get_srq_num, + .import_pd = mlx5_import_pd, .modify_cq = mlx5_modify_cq, .modify_flow_action_esp = mlx5_modify_flow_action_esp, .modify_qp_rate_limit = mlx5_modify_qp_rate_limit, @@ -159,6 +160,7 @@ static const struct verbs_context_ops mlx5_ctx_common_ops = { .alloc_null_mr = mlx5_alloc_null_mr, .free_context = mlx5_free_context, .set_ece = mlx5_set_ece, + .unimport_pd = mlx5_unimport_pd, }; static const struct verbs_context_ops mlx5_ctx_cqev1_ops = { diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index b57ba25..3c129d7 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -1040,6 +1040,9 @@ int mlx5_advise_mr(struct ibv_pd *pd, uint32_t flags, struct ibv_sge *sg_list, uint32_t num_sges); +struct ibv_pd *mlx5_import_pd(struct ibv_context *context, + uint32_t pd_handle); +void mlx5_unimport_pd(struct ibv_pd *pd); int mlx5_qp_fill_wr_pfns(struct mlx5_qp *mqp, const struct ibv_qp_init_attr_ex *attr, const struct mlx5dv_qp_init_attr *mlx5_attr); diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 5643ecc..81e1ed9 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -556,26 +556,39 @@ static int mlx5_dealloc_parent_domain(struct mlx5_parent_domain *mparent_domain) return 0; } -int mlx5_free_pd(struct ibv_pd *pd) +static int _mlx5_free_pd(struct ibv_pd *pd, bool unimport) { int ret; struct mlx5_parent_domain *mparent_domain = to_mparent_domain(pd); struct mlx5_pd *mpd = to_mpd(pd); - if (mparent_domain) + if (mparent_domain) { + if (unimport) + return EINVAL; + return mlx5_dealloc_parent_domain(mparent_domain); + } if (atomic_load(&mpd->refcount) > 1) return EBUSY; + if (unimport) + goto end; + ret = ibv_cmd_dealloc_pd(pd); if (ret) return ret; +end: free(mpd); return 0; } +int mlx5_free_pd(struct ibv_pd *pd) +{ + return _mlx5_free_pd(pd, false); +} + struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int acc) { @@ -706,6 +719,43 @@ int mlx5_advise_mr(struct ibv_pd *pd, return ibv_cmd_advise_mr(pd, advice, flags, sg_list, num_sge); } +struct ibv_pd *mlx5_import_pd(struct ibv_context *context, + uint32_t pd_handle) +{ + DECLARE_COMMAND_BUFFER(cmd, + UVERBS_OBJECT_PD, + MLX5_IB_METHOD_PD_QUERY, + 2); + + struct mlx5_pd *pd; + int ret; + + pd = calloc(1, sizeof *pd); + if (!pd) + return NULL; + + fill_attr_in_obj(cmd, MLX5_IB_ATTR_QUERY_PD_HANDLE, pd_handle); + fill_attr_out_ptr(cmd, MLX5_IB_ATTR_QUERY_PD_RESP_PDN, &pd->pdn); + + ret = execute_ioctl(context, cmd); + if (ret) { + free(pd); + return NULL; + } + + pd->ibv_pd.context = context; + pd->ibv_pd.handle = pd_handle; + atomic_init(&pd->refcount, 1); + + return &pd->ibv_pd; +} + +void mlx5_unimport_pd(struct ibv_pd *pd) +{ + if (_mlx5_free_pd(pd, true)) + assert(false); +} + struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; From patchwork Sun Jul 5 08:19:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644205 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 94CA818A2 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8714B206B6 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726605AbgGEIUN (ORCPT ); Sun, 5 Jul 2020 04:20:13 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33108 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726134AbgGEIUM (ORCPT ); Sun, 5 Jul 2020 04:20:12 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5E0001707; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5RD009313; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5M0009312; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com Subject: [PATCH V1 rdma-core 09/13] verbs: Introduce ibv_import/unimport_mr() verbs Date: Sun, 5 Jul 2020 11:19:45 +0300 Message-Id: <1593937189-8744-10-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Introduce ibv_import/unimport_mr() verbs, this enables an application which previously imported the device and an associated PD to import an MR that is associated with. A detailed man page as part of this patch describes the expected usage and flow. Signed-off-by: Yishai Hadas --- debian/libibverbs1.symbols | 2 ++ libibverbs/driver.h | 3 ++ libibverbs/dummy_ops.c | 15 +++++++++ libibverbs/libibverbs.map.in | 2 ++ libibverbs/man/CMakeLists.txt | 2 ++ libibverbs/man/ibv_import_mr.3.md | 64 +++++++++++++++++++++++++++++++++++++++ libibverbs/verbs.c | 16 ++++++++++ libibverbs/verbs.h | 10 ++++++ 8 files changed, 114 insertions(+) create mode 100644 libibverbs/man/ibv_import_mr.3.md diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols index f92e77e..2efbe89 100644 --- a/debian/libibverbs1.symbols +++ b/debian/libibverbs1.symbols @@ -68,6 +68,7 @@ libibverbs.so.1 libibverbs1 #MINVER# ibv_get_pkey_index@IBVERBS_1.5 20 ibv_get_sysfs_path@IBVERBS_1.0 1.1.6 ibv_import_device@IBVERBS_1.10 31 + ibv_import_mr@IBVERBS_1.10 31 ibv_import_pd@IBVERBS_1.10 31 ibv_init_ah_from_wc@IBVERBS_1.1 1.1.6 ibv_modify_qp@IBVERBS_1.0 1.1.6 @@ -105,6 +106,7 @@ libibverbs.so.1 libibverbs1 #MINVER# ibv_resize_cq@IBVERBS_1.1 1.1.6 ibv_resolve_eth_l2_from_gid@IBVERBS_1.1 1.2.0 ibv_set_ece@IBVERBS_1.10 31 + ibv_unimport_mr@IBVERBS_1.10 31 ibv_unimport_pd@IBVERBS_1.10 31 ibv_wc_status_str@IBVERBS_1.1 1.1.6 mbps_to_ibv_rate@IBVERBS_1.1 1.1.8 diff --git a/libibverbs/driver.h b/libibverbs/driver.h index edb60d3..9065605 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -318,6 +318,8 @@ struct verbs_context_ops { void (*free_context)(struct ibv_context *context); int (*free_dm)(struct ibv_dm *dm); int (*get_srq_num)(struct ibv_srq *srq, uint32_t *srq_num); + struct ibv_mr *(*import_mr)(struct ibv_pd *pd, + uint32_t mr_handle); struct ibv_pd *(*import_pd)(struct ibv_context *context, uint32_t pd_handle); int (*modify_cq)(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr); @@ -372,6 +374,7 @@ struct verbs_context_ops { void *addr, size_t length, int access); int (*resize_cq)(struct ibv_cq *cq, int cqe); int (*set_ece)(struct ibv_qp *qp, struct ibv_ece *ece); + void (*unimport_mr)(struct ibv_mr *mr); void (*unimport_pd)(struct ibv_pd *pd); }; diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c index 225e93d..45037a5 100644 --- a/libibverbs/dummy_ops.c +++ b/libibverbs/dummy_ops.c @@ -287,6 +287,13 @@ static int get_srq_num(struct ibv_srq *srq, uint32_t *srq_num) return EOPNOTSUPP; } +static struct ibv_mr *import_mr(struct ibv_pd *pd, + uint32_t mr_handle) +{ + errno = EOPNOTSUPP; + return NULL; +} + static struct ibv_pd *import_pd(struct ibv_context *context, uint32_t pd_handle) { @@ -467,6 +474,10 @@ static int set_ece(struct ibv_qp *qp, struct ibv_ece *ece) return EOPNOTSUPP; } +static void unimport_mr(struct ibv_mr *mr) +{ +} + static void unimport_pd(struct ibv_pd *pd) { } @@ -525,6 +536,7 @@ const struct verbs_context_ops verbs_dummy_ops = { free_context, free_dm, get_srq_num, + import_mr, import_pd, modify_cq, modify_flow_action_esp, @@ -553,6 +565,7 @@ const struct verbs_context_ops verbs_dummy_ops = { rereg_mr, resize_cq, set_ece, + unimport_mr, unimport_pd, }; @@ -645,6 +658,7 @@ void verbs_set_ops(struct verbs_context *vctx, SET_PRIV_OP_IC(ctx, free_context); SET_OP(vctx, free_dm); SET_OP(vctx, get_srq_num); + SET_PRIV_OP_IC(vctx, import_mr); SET_PRIV_OP_IC(vctx, import_pd); SET_OP(vctx, modify_cq); SET_OP(vctx, modify_flow_action_esp); @@ -673,6 +687,7 @@ void verbs_set_ops(struct verbs_context *vctx, SET_PRIV_OP(ctx, rereg_mr); SET_PRIV_OP(ctx, resize_cq); SET_PRIV_OP_IC(vctx, set_ece); + SET_PRIV_OP_IC(vctx, unimport_mr); SET_PRIV_OP_IC(vctx, unimport_pd); #undef SET_OP diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index bd6fd34..ada856a 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -134,9 +134,11 @@ IBVERBS_1.9 { IBVERBS_1.10 { global: ibv_import_device; + ibv_import_mr; ibv_import_pd; ibv_query_ece; ibv_set_ece; + ibv_unimport_mr; ibv_unimport_pd; } IBVERBS_1.9; diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt index ffca343..b925004 100644 --- a/libibverbs/man/CMakeLists.txt +++ b/libibverbs/man/CMakeLists.txt @@ -38,6 +38,7 @@ rdma_man_pages( ibv_get_pkey_index.3.md ibv_get_srq_num.3.md ibv_import_device.3.md + ibv_import_mr.3.md ibv_import_pd.3.md ibv_inc_rkey.3.md ibv_modify_qp.3 @@ -103,6 +104,7 @@ rdma_alias_man_pages( ibv_get_cq_event.3 ibv_ack_cq_events.3 ibv_get_device_list.3 ibv_free_device_list.3 ibv_import_pd.3 ibv_unimport_pd.3 + ibv_import_mr.3 ibv_unimport_mr.3 ibv_open_device.3 ibv_close_device.3 ibv_open_xrcd.3 ibv_close_xrcd.3 ibv_rate_to_mbps.3 mbps_to_ibv_rate.3 diff --git a/libibverbs/man/ibv_import_mr.3.md b/libibverbs/man/ibv_import_mr.3.md new file mode 100644 index 0000000..f6346e4 --- /dev/null +++ b/libibverbs/man/ibv_import_mr.3.md @@ -0,0 +1,64 @@ +--- +date: 2020-5-3 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: ibv_import_mr ibv_unimport_mr +--- + +# NAME + +ibv_import_mr - import an MR from a given ibv_pd + +ibv_unimport_mr - unimport an MR + +# SYNOPSIS + +```c +#include + +struct ibv_mr *ibv_import_mr(struct ibv_pd *pd, uint32_t mr_handle); +void ibv_unimport_mr(struct ibv_mr *mr) + +``` + + +# DESCRIPTION + +**ibv_import_mr()** returns a Memory region (MR) that is associated with the given +*mr_handle* in the RDMA context that assosicated with the given *pd*. + +The input *mr_handle* value must be a valid kernel handle for an MR object in the assosicated RDMA context. +It can be achieved from the original MR by getting its ibv_mr->handle member value. + +**ibv_unimport_mr()** un import the MR. +Once the MR usage has been ended ibv_dereg_mr() or ibv_unimport_mr() should be called. +The first one will go to the kernel to destroy the object once the second one way cleanup what +ever is needed/opposite of the import without calling the kernel. + +This is the responsibility of the application to coordinate between all ibv_context(s) that use this MR. +Once destroy is done no other process can touch the object except for unimport. All users of the context must +collaborate to ensure this. + +# RETURN VALUE + +**ibv_import_mr()** returns a pointer to the allocated MR, or NULL if the request fails. + +# NOTES + +The *addr* field in the imported MR is not applicable, NULL value is expected. + +# SEE ALSO + +**ibv_reg_mr**(3), +**ibv_reg_dm_mr**(3), +**ibv_reg_mr_iova**(3), +**ibv_reg_mr_iova2**(3), +**ibv_dereg_mr**(3), + +# AUTHOR + +Yishai Hadas + diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index ea32c02..b54e2b8 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -366,6 +366,22 @@ void ibv_unimport_pd(struct ibv_pd *pd) } +/** + * ibv_import_mr - Import a memory region + */ +struct ibv_mr *ibv_import_mr(struct ibv_pd *pd, uint32_t mr_handle) +{ + return get_ops(pd->context)->import_mr(pd, mr_handle); +} + +/** + * ibv_unimport_mr - Unimport a memory region + */ +void ibv_unimport_mr(struct ibv_mr *mr) +{ + get_ops(mr->context)->unimport_mr(mr); +} + LATEST_SYMVER_FUNC(ibv_rereg_mr, 1_1, "IBVERBS_1.1", int, struct ibv_mr *mr, int flags, diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 1a2c68e..6d66a31 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -2258,6 +2258,16 @@ struct ibv_pd *ibv_import_pd(struct ibv_context *context, void ibv_unimport_pd(struct ibv_pd *pd); /** + * ibv_import_mr - Import a memory region + */ +struct ibv_mr *ibv_import_mr(struct ibv_pd *pd, uint32_t mr_handle); + +/** + * ibv_unimport_mr - Unimport a memory region + */ +void ibv_unimport_mr(struct ibv_mr *mr); + +/** * ibv_get_async_event - Get next async event * @event: Pointer to use to return async event * From patchwork Sun Jul 5 08:19:46 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644199 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5B3F218B3 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 42AAA206B6 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726591AbgGEIUN (ORCPT ); Sun, 5 Jul 2020 04:20:13 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33104 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726600AbgGEIUM (ORCPT ); Sun, 5 Jul 2020 04:20:12 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K50e001726; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5OJ009317; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5Q6009316; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com Subject: [PATCH V1 rdma-core 10/13] mlx5: Implement the import/unimport MR verbs Date: Sun, 5 Jul 2020 11:19:46 +0300 Message-Id: <1593937189-8744-11-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Implement the import/unimport MR verbs based on their definition as described in the man page. It uses the query MR ioctl command to retrieve the original MR properties based on its given handle. Signed-off-by: Yishai Hadas --- libibverbs/cmd_mr.c | 31 +++++++++++++++++++++++++++++++ libibverbs/driver.h | 3 +++ libibverbs/libibverbs.map.in | 1 + providers/mlx5/mlx5.c | 2 ++ providers/mlx5/mlx5.h | 3 +++ providers/mlx5/verbs.c | 24 ++++++++++++++++++++++++ 6 files changed, 64 insertions(+) diff --git a/libibverbs/cmd_mr.c b/libibverbs/cmd_mr.c index cb729b6..42dbe42 100644 --- a/libibverbs/cmd_mr.c +++ b/libibverbs/cmd_mr.c @@ -85,3 +85,34 @@ int ibv_cmd_dereg_mr(struct verbs_mr *vmr) return ret; return 0; } + +int ibv_cmd_query_mr(struct ibv_pd *pd, struct verbs_mr *vmr, + uint32_t mr_handle) +{ + DECLARE_FBCMD_BUFFER(cmd, UVERBS_OBJECT_MR, + UVERBS_METHOD_QUERY_MR, + 4, NULL); + struct ibv_mr *mr = &vmr->ibv_mr; + int ret; + + fill_attr_in_obj(cmd, UVERBS_ATTR_QUERY_MR_HANDLE, mr_handle); + fill_attr_out_ptr(cmd, UVERBS_ATTR_QUERY_MR_RESP_LKEY, + &mr->lkey); + fill_attr_out_ptr(cmd, UVERBS_ATTR_QUERY_MR_RESP_RKEY, + &mr->rkey); + fill_attr_out_ptr(cmd, UVERBS_ATTR_QUERY_MR_RESP_LENGTH, + &mr->length); + + ret = execute_ioctl(pd->context, cmd); + if (ret) + return ret; + + mr->handle = mr_handle; + mr->context = pd->context; + mr->pd = pd; + mr->addr = NULL; + + vmr->mr_type = IBV_MR_TYPE_IMPORTED_MR; + return 0; +} + diff --git a/libibverbs/driver.h b/libibverbs/driver.h index 9065605..4436363 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -80,6 +80,7 @@ enum ibv_gid_type { enum ibv_mr_type { IBV_MR_TYPE_MR, IBV_MR_TYPE_NULL_MR, + IBV_MR_TYPE_IMPORTED_MR, }; struct verbs_mr { @@ -485,6 +486,8 @@ int ibv_cmd_rereg_mr(struct verbs_mr *vmr, uint32_t flags, void *addr, size_t cmd_sz, struct ib_uverbs_rereg_mr_resp *resp, size_t resp_sz); int ibv_cmd_dereg_mr(struct verbs_mr *vmr); +int ibv_cmd_query_mr(struct ibv_pd *pd, struct verbs_mr *vmr, + uint32_t mr_handle); int ibv_cmd_advise_mr(struct ibv_pd *pd, enum ibv_advise_mr_advice advice, uint32_t flags, diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index ada856a..3240e00 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -199,6 +199,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ { ibv_cmd_query_context; ibv_cmd_query_device; ibv_cmd_query_device_ex; + ibv_cmd_query_mr; ibv_cmd_query_port; ibv_cmd_query_qp; ibv_cmd_query_srq; diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c index ea55e0f..0a091f5 100644 --- a/providers/mlx5/mlx5.c +++ b/providers/mlx5/mlx5.c @@ -144,6 +144,7 @@ static const struct verbs_context_ops mlx5_ctx_common_ops = { .destroy_wq = mlx5_destroy_wq, .free_dm = mlx5_free_dm, .get_srq_num = mlx5_get_srq_num, + .import_mr = mlx5_import_mr, .import_pd = mlx5_import_pd, .modify_cq = mlx5_modify_cq, .modify_flow_action_esp = mlx5_modify_flow_action_esp, @@ -160,6 +161,7 @@ static const struct verbs_context_ops mlx5_ctx_common_ops = { .alloc_null_mr = mlx5_alloc_null_mr, .free_context = mlx5_free_context, .set_ece = mlx5_set_ece, + .unimport_mr = mlx5_unimport_mr, .unimport_pd = mlx5_unimport_pd, }; diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index 3c129d7..939de77 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -1040,6 +1040,9 @@ int mlx5_advise_mr(struct ibv_pd *pd, uint32_t flags, struct ibv_sge *sg_list, uint32_t num_sges); +struct ibv_mr *mlx5_import_mr(struct ibv_pd *pd, + uint32_t mr_handle); +void mlx5_unimport_mr(struct ibv_mr *mr); struct ibv_pd *mlx5_import_pd(struct ibv_context *context, uint32_t pd_handle); void mlx5_unimport_pd(struct ibv_pd *pd); diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 81e1ed9..915566a 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -756,6 +756,30 @@ void mlx5_unimport_pd(struct ibv_pd *pd) assert(false); } +struct ibv_mr *mlx5_import_mr(struct ibv_pd *pd, + uint32_t mr_handle) +{ + struct mlx5_mr *mr; + int ret; + + mr = calloc(1, sizeof *mr); + if (!mr) + return NULL; + + ret = ibv_cmd_query_mr(pd, &mr->vmr, mr_handle); + if (ret) { + free(mr); + return NULL; + } + + return &mr->vmr.ibv_mr; +} + +void mlx5_unimport_mr(struct ibv_mr *ibmr) +{ + free(to_mmr(ibmr)); +} + struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; From patchwork Sun Jul 5 08:19:47 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644191 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D9700739 for ; Sun, 5 Jul 2020 08:20:15 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id C79A4206B6 for ; Sun, 5 Jul 2020 08:20:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726538AbgGEIUN (ORCPT ); Sun, 5 Jul 2020 04:20:13 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33110 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726605AbgGEIUL (ORCPT ); Sun, 5 Jul 2020 04:20:11 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5dw001729; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5gG009327; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5le009323; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com, Edward Srouji Subject: [PATCH V1 rdma-core 11/13] pyverbs: Support verbs import APIs Date: Sun, 5 Jul 2020 11:19:47 +0300 Message-Id: <1593937189-8744-12-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Edward Srouji Importing a device, PD and MR enables processes to share their context and then share PDs and MRs that is associated with. This commit supports importing/unimporting a device, PD and MR by wrapping the relevant verbs in the current Context, PD and MR objects. Reviewed-by: Ido Kalir Signed-off-by: Edward Srouji --- pyverbs/device.pyx | 12 ++++++++-- pyverbs/libibverbs.pxd | 5 +++++ pyverbs/mr.pxd | 1 + pyverbs/mr.pyx | 60 +++++++++++++++++++++++++++++++++++++++++--------- pyverbs/pd.pxd | 1 + pyverbs/pd.pyx | 37 +++++++++++++++++++++++++------ 6 files changed, 96 insertions(+), 20 deletions(-) diff --git a/pyverbs/device.pyx b/pyverbs/device.pyx index 88cd906..95e112b 100755 --- a/pyverbs/device.pyx +++ b/pyverbs/device.pyx @@ -87,6 +87,9 @@ cdef class Context(PyverbsCM): * *cmid* A CMID object. If not None, it means that the device was already opened by a CMID class, and only a pointer assignment is missing. + * *cmd_fd* + A command FD. If passed, the device will be imported from the + given cmd_fd using ibv_import_device. :return: None """ cdef int count @@ -107,10 +110,16 @@ cdef class Context(PyverbsCM): self.name = kwargs.get('name') provider_attr = kwargs.get('attr') cmid = kwargs.get('cmid') + cmd_fd = kwargs.get('cmd_fd') if cmid is not None: self.context = cmid.id.verbs cmid.ctx = self return + if cmd_fd is not None: + self.context = v.ibv_import_device(cmd_fd) + if self.context == NULL: + raise PyverbsRDMAErrno('Failed to import device') + return if self.name is None: raise PyverbsUserError('Device name must be provided') @@ -152,8 +161,7 @@ cdef class Context(PyverbsCM): self.xrcds, self.vars]) rc = v.ibv_close_device(self.context) if rc != 0: - raise PyverbsRDMAErrno('Failed to close device {dev}'. - format(dev=self.device.name)) + raise PyverbsRDMAErrno(f'Failed to close device {self.name}') self.context = NULL @property diff --git a/pyverbs/libibverbs.pxd b/pyverbs/libibverbs.pxd index 7a75511..dfd4924 100755 --- a/pyverbs/libibverbs.pxd +++ b/pyverbs/libibverbs.pxd @@ -601,6 +601,11 @@ cdef extern from 'infiniband/verbs.h': void ibv_wr_start(ibv_qp_ex *qp) int ibv_wr_complete(ibv_qp_ex *qp) void ibv_wr_abort(ibv_qp_ex *qp) + ibv_context *ibv_import_device(int cmd_fd) + ibv_mr *ibv_import_mr(ibv_pd *pd, uint32_t handle) + void ibv_unimport_mr(ibv_mr *mr) + ibv_pd *ibv_import_pd(ibv_context *context, uint32_t handle) + void ibv_unimport_pd(ibv_pd *pd) cdef extern from 'infiniband/driver.h': diff --git a/pyverbs/mr.pxd b/pyverbs/mr.pxd index 82ae79f..7c3bb8e 100644 --- a/pyverbs/mr.pxd +++ b/pyverbs/mr.pxd @@ -14,6 +14,7 @@ cdef class MR(PyverbsCM): cdef object is_huge cdef object is_user_addr cdef void *buf + cdef object _is_imported cpdef read(self, length, offset) cdef class MWBindInfo(PyverbsCM): diff --git a/pyverbs/mr.pyx b/pyverbs/mr.pyx index b7b2196..da566cb 100644 --- a/pyverbs/mr.pyx +++ b/pyverbs/mr.pyx @@ -27,7 +27,7 @@ cdef class MR(PyverbsCM): MR class represents ibv_mr. Buffer allocation in done in the c'tor. Freeing it is done in close(). """ - def __init__(self, PD pd not None, length, access, address=None): + def __init__(self, PD pd not None, length=0, access=0, address=None, **kwargs): """ Allocate a user-level buffer of length and register a Memory Region of the given length and access flags. @@ -37,6 +37,11 @@ cdef class MR(PyverbsCM): :param address: Memory address to register (Optional). If it's not provided, a memory will be allocated in the class initialization. + :param kwargs: Arguments: + * *handle* + A valid kernel handle for a MR object in the given PD. + If passed, the MR will be imported and associated with the + context that is associated with the given PD using ibv_import_mr. :return: The newly created MR on success """ super().__init__() @@ -52,7 +57,20 @@ cdef class MR(PyverbsCM): # uintptr_t is guaranteed to be large enough to hold any pointer. # In order to safely cast addr to void*, it is firstly cast to uintptr_t. self.buf = address - else: + + mr_handle = kwargs.get('handle') + # If a MR handle is passed import MR and finish + if mr_handle is not None: + self.mr = v.ibv_import_mr(pd.pd, mr_handle) + if self.mr == NULL: + raise PyverbsRDMAErrno('Failed to import MR') + self._is_imported = True + self.pd = pd + pd.add_ref(self) + return + + # Allocate a buffer + if not address: if self.is_huge: # Rounding up to multiple of HUGE_PAGE_SIZE self.mmap_length = length + (HUGE_PAGE_SIZE - length % HUGE_PAGE_SIZE) \ @@ -77,6 +95,10 @@ cdef class MR(PyverbsCM): self.logger.debug('Registered ibv_mr. Length: {l}, access flags {a}'. format(l=length, a=access)) + def unimport(self): + v.ibv_unimport_mr(self.mr) + self.close() + def __dealloc__(self): self.close() @@ -86,21 +108,24 @@ cdef class MR(PyverbsCM): MR may be deleted directly or indirectly by closing its context, which leaves the Python PD object without the underlying C object, so during destruction, need to check whether or not the C object exists. + In case of an imported MR no deregistration will be done, it's left + for the original MR, in order to prevent double dereg by the GC. :return: None """ if self.mr != NULL: self.logger.debug('Closing MR') - rc = v.ibv_dereg_mr(self.mr) - if rc != 0: - raise PyverbsRDMAError('Failed to dereg MR', rc) + if not self._is_imported: + rc = v.ibv_dereg_mr(self.mr) + if rc != 0: + raise PyverbsRDMAError('Failed to dereg MR', rc) + if not self.is_user_addr: + if self.is_huge: + munmap(self.buf, self.mmap_length) + else: + free(self.buf) self.mr = NULL self.pd = None - if not self.is_user_addr: - if self.is_huge: - munmap(self.buf, self.mmap_length) - else: - free(self.buf) - self.buf = NULL + self.buf = NULL def write(self, data, length): """ @@ -144,6 +169,19 @@ cdef class MR(PyverbsCM): def length(self): return self.mr.length + @property + def handle(self): + return self.mr.handle + + def __str__(self): + print_format = '{:22}: {:<20}\n' + return 'MR\n' + \ + print_format.format('lkey', self.lkey) + \ + print_format.format('rkey', self.rkey) + \ + print_format.format('length', self.length) + \ + print_format.format('buf', self.buf) + \ + print_format.format('handle', self.handle) + cdef class MWBindInfo(PyverbsCM): def __init__(self, MR mr not None, addr, length, mw_access_flags): diff --git a/pyverbs/pd.pxd b/pyverbs/pd.pxd index ae4324a..94d453e 100644 --- a/pyverbs/pd.pxd +++ b/pyverbs/pd.pxd @@ -19,6 +19,7 @@ cdef class PD(PyverbsCM): cdef object ahs cdef object qps cdef object parent_domains + cdef object _is_imported cdef class ParentDomainInitAttr(PyverbsObject): cdef v.ibv_parent_domain_init_attr init_attr diff --git a/pyverbs/pd.pyx b/pyverbs/pd.pyx index d54c4f8..2a35d11 100755 --- a/pyverbs/pd.pyx +++ b/pyverbs/pd.pyx @@ -20,19 +20,31 @@ from pyverbs.qp cimport QP cdef class PD(PyverbsCM): - def __init__(self, object creator not None): + def __init__(self, object creator not None, **kwargs): """ Initializes a PD object. A reference for the creating Context is kept so that Python's GC will destroy the objects in the right order. :param creator: The Context/CMID object creating the PD + :param kwargs: Arguments: + * *handle* + A valid kernel handle for a PD object in the given creator + (Context). If passed, the PD will be imported and associated + with the given handle in the given context using ibv_import_pd. """ super().__init__() + pd_handle = kwargs.get('handle') if issubclass(type(creator), Context): # Check if the ibv_pd* was initialized by an inheriting class if self.pd == NULL: - self.pd = v.ibv_alloc_pd((creator).context) + if pd_handle is not None: + self.pd = v.ibv_import_pd((creator).context, pd_handle) + self._is_imported = True + err_str = 'Failed to import PD' + else: + self.pd = v.ibv_alloc_pd((creator).context) + err_str = 'Failed to allocate PD' if self.pd == NULL: - raise PyverbsRDMAErrno('Failed to allocate PD') + raise PyverbsRDMAErrno(err_str) self.ctx = creator elif issubclass(type(creator), CMID): cmid = creator @@ -43,7 +55,7 @@ cdef class PD(PyverbsCM): raise PyverbsUserError('Cannot create PD from {type}' .format(type=type(creator))) self.ctx.add_ref(self) - self.logger.debug('PD: Allocated ibv_pd') + self.logger.debug('Created PD') self.srqs = weakref.WeakSet() self.mrs = weakref.WeakSet() self.mws = weakref.WeakSet() @@ -68,6 +80,10 @@ cdef class PD(PyverbsCM): raise PyverbsRDMAError('Failed to advise MR', rc) return rc + def unimport(self): + v.ibv_unimport_pd(self.pd) + self.close() + def __dealloc__(self): """ Closes the inner PD. @@ -81,15 +97,18 @@ cdef class PD(PyverbsCM): PD may be deleted directly or indirectly by closing its context, which leaves the Python PD object without the underlying C object, so during destruction, need to check whether or not the C object exists. + In case of an imported PD no deallocation will be done, it's left for + the original PD, in order to prevent double dealloc by the GC. :return: None """ if self.pd != NULL: self.logger.debug('Closing PD') close_weakrefs([self.parent_domains, self.qps, self.ahs, self.mws, self.mrs, self.srqs]) - rc = v.ibv_dealloc_pd(self.pd) - if rc != 0: - raise PyverbsRDMAError('Failed to dealloc PD', rc) + if not self._is_imported: + rc = v.ibv_dealloc_pd(self.pd) + if rc != 0: + raise PyverbsRDMAError('Failed to dealloc PD', rc) self.pd = NULL self.ctx = None @@ -109,6 +128,10 @@ cdef class PD(PyverbsCM): else: raise PyverbsError('Unrecognized object type') + @property + def handle(self): + return self.pd.handle + cdef void *pd_alloc(v.ibv_pd *pd, void *pd_context, size_t size, size_t alignment, v.uint64_t resource_type): From patchwork Sun Jul 5 08:19:48 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644201 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A88E718C2 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 99807206B6 for ; Sun, 5 Jul 2020 08:20:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726495AbgGEIUM (ORCPT ); Sun, 5 Jul 2020 04:20:12 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33120 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726494AbgGEIUL (ORCPT ); Sun, 5 Jul 2020 04:20:11 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:06 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5Gt001732; Sun, 5 Jul 2020 11:20:05 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5aw009331; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5gj009330; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com, Edward Srouji Subject: [PATCH V1 rdma-core 12/13] Documentation: Add usage example for verbs import Date: Sun, 5 Jul 2020 11:19:48 +0300 Message-Id: <1593937189-8744-13-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Edward Srouji Add a documentation for shared PD and verbs import usage in Pyverbs. This includes a code snippet to demonstrate a usage example. Reviewed-by: Ido Kalir Signed-off-by: Edward Srouji --- Documentation/pyverbs.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Documentation/pyverbs.md b/Documentation/pyverbs.md index 77a5406..2516b10 100755 --- a/Documentation/pyverbs.md +++ b/Documentation/pyverbs.md @@ -626,3 +626,43 @@ ctx = Context(name='rocep0s8f0') uar = Mlx5UAR(ctx) uar.close() ``` + +##### Import device, PD and MR +Importing a device, PD and MR enables processes to share their context and then +share PDs and MRs that is associated with. +A process creates a device and then uses some of the Linux systems calls to dup +its 'cmd_fd' member which lets other process to obtain ownership. +Once other process obtains the 'cmd_fd' it can import the device, then PD(s) and +MR(s) to share these objects. +Like in C, Pyverbs users are responsible for unimporting the imported objects +(which will also close the Pyverbs instance in our case) after they finish using +them, and they have to sync between the different processes in order to +coordinate the closure of the objects. +Unlike in C, closing the underlying objects is currently supported only via the +"original" object (meaning only by the process that creates them) and not via +the imported object. This limitation is made because currently there's no +reference or relation between different Pyverbs objects in different processes. +But it's doable and might be added in the future. +Here is a demonstration of importing a device, PD and MR in one process. +```python +from pyverbs.device import Context +from pyverbs.pd import PD +from pyverbs.mr import MR +import pyverbs.enums as e +import os + +ctx = Context(name='ibp0s8f0') +pd = PD(ctx) +mr = MR(pd, 100, e.IBV_ACCESS_LOCAL_WRITE) +cmd_fd_dup = os.dup(ctx.cmd_fd) +improted_ctx = Context(cmd_fd=cmd_fd_dup) +imported_pd = PD(improted_ctx, handle=pd.handle) +imported_mr = MR(imported_pd, handle=mr.handle) +# MRs can be created as usual on the imported PD +secondary_mr = MR(imported_pd, 100, e.IBV_ACCESS_REMOTE_READ) +# Must manually unimport the imported objects (which close the object and frees +# other resources that use them) before closing the "original" objects. +# This prevents unexpected behaviours caused by the GC. +imported_mr.unimport() +imported_pd.unimport() +``` From patchwork Sun Jul 5 08:19:49 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 11644209 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 67522913 for ; Sun, 5 Jul 2020 08:20:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 56E3A206BE for ; Sun, 5 Jul 2020 08:20:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726565AbgGEIUQ (ORCPT ); Sun, 5 Jul 2020 04:20:16 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33123 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726572AbgGEIUM (ORCPT ); Sun, 5 Jul 2020 04:20:12 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with SMTP; 5 Jul 2020 11:20:06 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 0658K6W9001735; Sun, 5 Jul 2020 11:20:06 +0300 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id 0658K5qK009336; Sun, 5 Jul 2020 11:20:05 +0300 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id 0658K5D0009335; Sun, 5 Jul 2020 11:20:05 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: jgg@mellanox.com, yishaih@mellanox.com, maorg@mellanox.com, Edward Srouji Subject: [PATCH V1 rdma-core 13/13] tests: Add a shared PD Pyverbs test Date: Sun, 5 Jul 2020 11:19:49 +0300 Message-Id: <1593937189-8744-14-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> References: <1593937189-8744-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Edward Srouji The test creates a client and server sides of RC resources. Then the server resources are imported and used for RDMA write traffic. Reviewed-by: Ido Kalir Signed-off-by: Edward Srouji --- tests/CMakeLists.txt | 1 + tests/base.py | 11 ++++-- tests/test_shared_pd.py | 95 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 tests/test_shared_pd.py diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1708fb9..33ffc78 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -25,6 +25,7 @@ rdma_python_test(tests test_qpex.py test_rdmacm.py test_relaxed_ordering.py + test_shared_pd.py utils.py ) diff --git a/tests/base.py b/tests/base.py index 2824ca2..4311362 100755 --- a/tests/base.py +++ b/tests/base.py @@ -247,10 +247,17 @@ class BaseResources(object): :param ib_port: IB port of the device to use (default: 1) :param gid_index: Which GID index to use (default: 0) """ - self.ctx = Context(name=dev_name) + self.dev_name = dev_name self.gid_index = gid_index - self.pd = PD(self.ctx) self.ib_port = ib_port + self.create_context() + self.create_pd() + + def create_context(self): + self.ctx = Context(name=self.dev_name) + + def create_pd(self): + self.pd = PD(self.ctx) class TrafficResources(BaseResources): diff --git a/tests/test_shared_pd.py b/tests/test_shared_pd.py new file mode 100644 index 0000000..e533074 --- /dev/null +++ b/tests/test_shared_pd.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2020 Mellanox Technologies, Inc. All rights reserved. See COPYING file +""" +Test module for Shared PD. +""" +import unittest +import errno +import os + +from tests.test_qpex import QpExRCRDMAWrite +from tests.base import RDMATestCase +from pyverbs.device import Context +from pyverbs.pd import PD +from pyverbs.mr import MR +import pyverbs.enums as e +import tests.utils as u + + +def get_import_res_class(base_class): + """ + This function creates a class that inherits base_class of any BaseResources + type. Its purpose is to behave exactly as base_class does, except for the + objects creation, which instead of creating context, PD and MR, it imports + them. Hence the returned class must be initialized with (cmd_fd, pd_handle, + mr_handle, mr_addr, **kwargs), while kwargs are the arguments needed + (if any) for base_class. In addition it has unimport_resources() method + which unimprot all the resources and closes the imported PD object. + :param base_class: The base resources class to inherit from + :return: ImportResources(cmd_fd, pd_handle, mr_handle, mr_addr, **kwargs) + class + """ + class ImportResources(base_class): + def __init__(self, cmd_fd, pd_handle, mr_handle, mr_addr=None, **kwargs): + self.cmd_fd = cmd_fd + self.pd_handle = pd_handle + self.mr_handle = mr_handle + self.mr_addr = mr_addr + super(ImportResources, self).__init__(**kwargs) + + def create_context(self): + try: + self.ctx = Context(cmd_fd=self.cmd_fd) + except u.PyverbsRDMAError as ex: + if ex.error_code in [errno.EOPNOTSUPP, errno.EPROTONOSUPPORT]: + raise unittest.SkipTest('Importing a device is not supported') + raise ex + + def create_pd(self): + self.pd = PD(self.ctx, handle=self.pd_handle) + + def create_mr(self): + self.mr = MR(self.pd, handle=self.mr_handle, address=self.mr_addr) + + def unimport_resources(self): + self.mr.unimport() + self.pd.unimport() + self.pd.close() + + return ImportResources + + +class SharedPDTestCase(RDMATestCase): + def setUp(self): + super().setUp() + self.iters = 10 + self.server_res = None + self.imported_res = [] + + def tearDown(self): + for res in self.imported_res: + res.unimport_resources() + super().tearDown() + + def test_imported_rc_ex_rdma_write(self): + setup_params = {'dev_name': self.dev_name, 'ib_port': self.ib_port, + 'gid_index': self.gid_index} + self.server_res = QpExRCRDMAWrite(**setup_params) + cmd_fd_dup = os.dup(self.server_res.ctx.cmd_fd) + import_cls = get_import_res_class(QpExRCRDMAWrite) + server_import = import_cls( + cmd_fd_dup, self.server_res.pd.handle, self.server_res.mr.handle, + # The imported MR's address is NULL, so using the address of the + # "main" MR object to be able to validate the message + self.server_res.mr.buf, + **setup_params) + self.imported_res.append(server_import) + client = QpExRCRDMAWrite(**setup_params) + client.pre_run(server_import.psn, server_import.qpn) + server_import.pre_run(client.psn, client.qpn) + client.rkey = server_import.mr.rkey + server_import.rkey = client.mr.rkey + client.raddr = server_import.mr.buf + server_import.raddr = client.mr.buf + u.rdma_traffic(client, server_import, self.iters, self.gid_index, + self.ib_port, send_op=e.IBV_QP_EX_WITH_RDMA_WRITE)