From patchwork Wed Dec 5 13:24:46 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10714191 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A034F1057 for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 926B02CF37 for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 846802CF32; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7FDE82CF32 for ; Wed, 5 Dec 2018 13:25:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727514AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:36140 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727491AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 5 Dec 2018 15:31:10 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP9Be000822; Wed, 5 Dec 2018 15:25:09 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP9I3011302; Wed, 5 Dec 2018 15:25:09 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id wB5DP95R011301; Wed, 5 Dec 2018 15:25:09 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, guyle@mellanox.com, aviadye@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 1/6] Update kernel headers Date: Wed, 5 Dec 2018 15:24:46 +0200 Message-Id: <1544016291-10815-2-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> References: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP To commit 90523ed0f2c6 ("IB/mlx5: Add advise_mr() support") Signed-off-by: Yishai Hadas --- kernel-headers/rdma/ib_user_ioctl_cmds.h | 18 ++++++++++++++++++ kernel-headers/rdma/ib_user_ioctl_verbs.h | 9 +++++++++ kernel-headers/rdma/ib_user_verbs.h | 2 +- kernel-headers/rdma/mlx5-abi.h | 2 ++ kernel-headers/rdma/rdma_netlink.h | 14 ++++++++++++++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/kernel-headers/rdma/ib_user_ioctl_cmds.h b/kernel-headers/rdma/ib_user_ioctl_cmds.h index 2c881aa..a931100 100644 --- a/kernel-headers/rdma/ib_user_ioctl_cmds.h +++ b/kernel-headers/rdma/ib_user_ioctl_cmds.h @@ -63,6 +63,16 @@ enum { UVERBS_ATTR_UHW_OUT, }; +enum uverbs_methods_device { + UVERBS_METHOD_INVOKE_WRITE, +}; + +enum uverbs_attrs_invoke_write_cmd_attr_ids { + UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_CORE_OUT, + UVERBS_ATTR_WRITE_CMD, +}; + enum uverbs_attrs_create_cq_cmd_attr_ids { UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_ATTR_CREATE_CQ_CQE, @@ -133,8 +143,16 @@ enum uverbs_attrs_reg_dm_mr_cmd_attr_ids { UVERBS_ATTR_REG_DM_MR_RESP_RKEY, }; +enum uverbs_attrs_advise_mr_cmd_attr_ids { + UVERBS_ATTR_ADVISE_MR_PD_HANDLE, + UVERBS_ATTR_ADVISE_MR_ADVICE, + UVERBS_ATTR_ADVISE_MR_FLAGS, + UVERBS_ATTR_ADVISE_MR_SGE_LIST, +}; + enum uverbs_methods_mr { UVERBS_METHOD_DM_MR_REG, + UVERBS_METHOD_ADVISE_MR, }; enum uverbs_attrs_create_counters_cmd_attr_ids { diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h index 6cdf192..80d5814 100644 --- a/kernel-headers/rdma/ib_user_ioctl_verbs.h +++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h @@ -157,4 +157,13 @@ enum ib_uverbs_read_counters_flags { IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0, }; +enum ib_uverbs_advise_mr_advice { + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH, + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, +}; + +enum ib_uverbs_advise_mr_flag { + IB_UVERBS_ADVISE_MR_FLAG_FLUSH = (1 << 0), +}; + #endif diff --git a/kernel-headers/rdma/ib_user_verbs.h b/kernel-headers/rdma/ib_user_verbs.h index c586fc4..480d9a6 100644 --- a/kernel-headers/rdma/ib_user_verbs.h +++ b/kernel-headers/rdma/ib_user_verbs.h @@ -46,7 +46,7 @@ #define IB_USER_VERBS_ABI_VERSION 6 #define IB_USER_VERBS_CMD_THRESHOLD 50 -enum { +enum ib_uverbs_write_cmds { IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, diff --git a/kernel-headers/rdma/mlx5-abi.h b/kernel-headers/rdma/mlx5-abi.h index 8fa9f90..87b3198 100644 --- a/kernel-headers/rdma/mlx5-abi.h +++ b/kernel-headers/rdma/mlx5-abi.h @@ -48,6 +48,7 @@ enum { MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6, MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7, MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8, + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9, }; enum { @@ -236,6 +237,7 @@ enum mlx5_ib_query_dev_resp_flags { /* Support 128B CQE compression */ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, + MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, }; enum mlx5_ib_tunnel_offloads { diff --git a/kernel-headers/rdma/rdma_netlink.h b/kernel-headers/rdma/rdma_netlink.h index f9c41bf..2f311c4 100644 --- a/kernel-headers/rdma/rdma_netlink.h +++ b/kernel-headers/rdma/rdma_netlink.h @@ -428,6 +428,20 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_DRIVER_U64, /* u64 */ /* + * Unique ID for the resource needed to allow executing + * query command for the specific resource without need + * to dump and scan all resources. + */ + RDMA_NLDEV_ATTR_RES_ID, /* u64 */ + /* + * Parent ID points to the object from which this specific + * was created. It allows to group objects by their origin. + * For example, all PDs created from same CTX will have same + * parent ID. + */ + RDMA_NLDEV_ATTR_RES_PARENT_ID, /* u64 */ + + /* * Always the end */ RDMA_NLDEV_ATTR_MAX From patchwork Wed Dec 5 13:24:47 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10714193 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D66B117DB for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C7C772CF37 for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id B98AE2CF38; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 141FA2CF33 for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727520AbeLENZQ (ORCPT ); Wed, 5 Dec 2018 08:25:16 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:36137 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727468AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 5 Dec 2018 15:31:10 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP92I000825; Wed, 5 Dec 2018 15:25:09 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP9oY011308; Wed, 5 Dec 2018 15:25:09 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id wB5DP9sc011307; Wed, 5 Dec 2018 15:25:09 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, guyle@mellanox.com, aviadye@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 2/6] verbs: Add helper for command interface Date: Wed, 5 Dec 2018 15:24:47 +0200 Message-Id: <1544016291-10815-3-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> References: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Add fill_attr_in_ptr_array() to store array of elements in a command buffer. Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Yishai Hadas --- libibverbs/cmd_ioctl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libibverbs/cmd_ioctl.h b/libibverbs/cmd_ioctl.h index df3dc41..5587b86 100644 --- a/libibverbs/cmd_ioctl.h +++ b/libibverbs/cmd_ioctl.h @@ -376,6 +376,9 @@ static inline size_t _array_len(size_t size, size_t nelems) #define fill_attr_out_ptr_array(cmd, attr_id, ptr, nelems) \ fill_attr_out(cmd, attr_id, ptr, _array_len(sizeof(*ptr), nelems)) +#define fill_attr_in_ptr_array(cmd, attr_id, ptr, nelems) \ + fill_attr_in(cmd, attr_id, ptr, _array_len(sizeof(*ptr), nelems)) + static inline size_t __check_divide(size_t val, unsigned int div) { assert(val % div == 0); From patchwork Wed Dec 5 13:24:48 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10714199 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A02111057 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8E2E42CF32 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8217B2CF34; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7CFE42CF35 for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727521AbeLENZR (ORCPT ); Wed, 5 Dec 2018 08:25:17 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:36138 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727500AbeLENZQ (ORCPT ); Wed, 5 Dec 2018 08:25:16 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 5 Dec 2018 15:31:11 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP9ad000828; Wed, 5 Dec 2018 15:25:09 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP98r011312; Wed, 5 Dec 2018 15:25:09 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id wB5DP9mP011311; Wed, 5 Dec 2018 15:25:09 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, guyle@mellanox.com, aviadye@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 3/6] verbs: Introduce ibv_advise_mr verb Date: Wed, 5 Dec 2018 15:24:48 +0200 Message-Id: <1544016291-10815-4-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> References: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Introduce a new verb named ibv_advise_mr(), it includes: - The application interface. - The command interface with the kernel. The purpose of this patch is to give an advice to the kernel about an address range belongs to a memory region. Performance - applications that are aware of future access patterns can use this verb in order to leverage this knowledge to improve system or application performance. One example is for pre-fetching a range part of an on-demand paging MR. On-demand-paging (ODP) is a technique to alleviate much of the shortcomings of memory registration. The main idea is that applications no longer pin down the underlying physical pages of the address space and need not track the validity of the mappings. Rather, the HCA requests the latest translations from the OS when pages are not present, and the OS invalidates translations which are no longer valid due to mapping changes. Pre-fetching ODP address ranges, therefore make them present before the actual IO is conducted. This would provide a way to reduce latency by overlapping paging-in and either compute time or IO to other ranges. Signed-off-by: Moni Shoua Signed-off-by: Aviad Yehezkel Reviewed-by: Guy Levi Signed-off-by: Yishai Hadas --- libibverbs/CMakeLists.txt | 1 + libibverbs/cmd_mr.c | 54 ++++++++++++++++ libibverbs/driver.h | 10 +++ libibverbs/dummy_ops.c | 11 ++++ libibverbs/libibverbs.map.in | 1 + libibverbs/man/CMakeLists.txt | 1 + libibverbs/man/ibv_advise_mr.3.md | 126 ++++++++++++++++++++++++++++++++++++++ libibverbs/verbs.h | 28 +++++++++ libibverbs/verbs_api.h | 6 ++ 9 files changed, 238 insertions(+) create mode 100644 libibverbs/cmd_mr.c create mode 100644 libibverbs/man/ibv_advise_mr.3.md diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt index ddf5995..2ddafd6 100644 --- a/libibverbs/CMakeLists.txt +++ b/libibverbs/CMakeLists.txt @@ -36,6 +36,7 @@ rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map" cmd_fallback.c cmd_flow_action.c cmd_ioctl.c + cmd_mr.c compat-1_0.c device.c dummy_ops.c diff --git a/libibverbs/cmd_mr.c b/libibverbs/cmd_mr.c new file mode 100644 index 0000000..aee63a8 --- /dev/null +++ b/libibverbs/cmd_mr.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +int ibv_cmd_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +{ + DECLARE_COMMAND_BUFFER(cmd, UVERBS_OBJECT_MR, + UVERBS_METHOD_ADVISE_MR, + 4); + + fill_attr_in_obj(cmd, UVERBS_ATTR_ADVISE_MR_PD_HANDLE, pd->handle); + fill_attr_const_in(cmd, UVERBS_ATTR_ADVISE_MR_ADVICE, advice); + fill_attr_in_uint32(cmd, UVERBS_ATTR_ADVISE_MR_FLAGS, flags); + fill_attr_in_ptr_array(cmd, UVERBS_ATTR_ADVISE_MR_SGE_LIST, + sg_list, num_sge); + + return execute_ioctl(pd->context, cmd); +} diff --git a/libibverbs/driver.h b/libibverbs/driver.h index 9030dc8..b9f648c 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -218,6 +218,11 @@ struct verbs_counters { * Keep sorted. */ struct verbs_context_ops { + int (*advise_mr)(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges); struct ibv_dm *(*alloc_dm)(struct ibv_context *context, struct ibv_alloc_dm_attr *attr); struct ibv_mw *(*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type); @@ -440,6 +445,11 @@ int ibv_cmd_rereg_mr(struct verbs_mr *vmr, uint32_t flags, void *addr, size_t cmd_sz, struct ib_uverbs_rereg_mr_resp *resp, size_t resp_sz); int ibv_cmd_dereg_mr(struct verbs_mr *vmr); +int ibv_cmd_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge); int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type, struct ibv_mw *mw, struct ibv_alloc_mw *cmd, size_t cmd_size, diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c index 43f8430..a5e9380 100644 --- a/libibverbs/dummy_ops.c +++ b/libibverbs/dummy_ops.c @@ -33,6 +33,15 @@ #include "ibverbs.h" #include +static int advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges) +{ + return ENOSYS; +} + static struct ibv_dm *alloc_dm(struct ibv_context *context, struct ibv_alloc_dm_attr *attr) { @@ -436,6 +445,7 @@ static int resize_cq(struct ibv_cq *cq, int cqe) * Keep sorted. */ const struct verbs_context_ops verbs_dummy_ops = { + advise_mr, alloc_dm, alloc_mw, alloc_null_mr, @@ -550,6 +560,7 @@ void verbs_set_ops(struct verbs_context *vctx, } \ } while (0) + SET_OP(vctx, advise_mr); SET_OP(vctx, alloc_dm); SET_OP(ctx, alloc_mw); SET_OP(vctx, alloc_null_mr); diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index c489c76..4bffb1b 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -120,6 +120,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ { __ioctl_final_num_attrs; _verbs_init_and_alloc_context; execute_ioctl; + ibv_cmd_advise_mr; ibv_cmd_alloc_dm; ibv_cmd_alloc_mw; ibv_cmd_alloc_pd; diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt index 6efd671..4d5abef 100644 --- a/libibverbs/man/CMakeLists.txt +++ b/libibverbs/man/CMakeLists.txt @@ -1,4 +1,5 @@ rdma_man_pages( + ibv_advise_mr.3.md ibv_alloc_dm.3 ibv_alloc_mw.3 ibv_alloc_null_mr.3.md diff --git a/libibverbs/man/ibv_advise_mr.3.md b/libibverbs/man/ibv_advise_mr.3.md new file mode 100644 index 0000000..e2f51e6 --- /dev/null +++ b/libibverbs/man/ibv_advise_mr.3.md @@ -0,0 +1,126 @@ +--- +date: 2018-10-19 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: IBV_ADVISE_MR +--- + +# NAME + +ibv_advise_mr - Gives advice or directions to the kernel about an + address range belongs to a memory region (MR). + +# SYNOPSIS + +```c +#include + +int ibv_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +``` + +# DESCRIPTION + +**ibv_advise_mr()** Give advice or directions to the kernel about an +address range belonging to a memory region (MR). +Applications that are aware of future access patterns can use this verb +in order to leverage this knowledge to improve system or +application performance. + +**Conventional advice values** + +*IBV_ADVISE_MR_ADVICE_PREFETCH* +: Pre-fetch a range of an on-demand paging MR. + Make pages present with read-only permission before the actual IO is conducted. + This would provide a way to reduce latency by overlapping paging-in + and either compute time or IO to other ranges. + +*IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE* +: Like IBV_ADVISE_MR_ADVICE_PREFETCH but with read-access + and write-access permission to the fetched memory. + +# ARGUMENTS +*pd* +: The protection domain (PD) associated with the MR. + +*advice* +: The requested advise value (as listed above). + +*flags* +: Describes the properties of the advise operation + **Conventional advice values** + *IBV_ADVISE_MR_FLAG_FLUSH* + : Request to be a synchronized operation. Return to the caller + after the operation is completed. + +*sg_list* +: Pointer to the s/g array + When using IBV_ADVISE_OP_PREFETCH advise value, all the lkeys of all + the scatter gather elements (SGEs) must be associated with ODP MRs + (MRs that were registered with IBV_ACCESS_ON_DEMAND). + +*num_sge* +: Number of elements in the s/g array + +# RETURN VALUE + +**ibv_advise_mr()** returns 0 when the call was successful, or the value + of errno on failure (which indicates the failure reason). + +*ENOSYS* +: libibverbs or provider driver doesn't support the ibv_advise_mr() verb. + +*ENOTSUP* +: The advise operation isn't supported. + +*EFAULT* +: In one of the following: + o When the range requested is out of the MR bounds, or when parts of + it are not part of the process address space. + o One of the lkeys provided in the scatter gather list is invalid or + with wrong write access. + +*EINVAL* +: In one of the following: + o The PD is invalid. + o The flags are invalid. + +# NOTES + +An application may pre-fetch any address range within an ODP MR when using the +**IBV_ADVISE_MR_ADVICE_PREFETCH** or **IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE** advice. +Semantically, this operation is best-effort. That means the kernel does not +guarantee that underlying pages are updated in the HCA or the pre-fetched pages +would remain resident. + +When using **IBV_ADVISE_MR_ADVICE_PREFETCH** or **IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE** +advice, the operation will be done in the following stages: + o Page in the user pages to memory (pages aren't pinned). + o Get the dma mapping of these user pages. + o Post the underlying page translations to the HCA. + +If **IBV_ADVISE_MR_FLAG_FLUSH** is specified then the underlying pages are +guaranteed to be updated in the HCA before returning SUCCESS. +Otherwise the driver can choose to postpone the posting of the new translations +to the HCA. +When performing a local RDMA access operation it is recommended to use +IBV_ADVISE_MR_FLAG_FLUSH flag with one of the pre-fetch advices to +increase probability that the pages translations are valid in the HCA +and avoid future page faults. + +# SEE ALSO + +**ibv_reg_mr**(3), +**ibv_rereg_mr**(3), +**ibv_dereg_mr**(3) + +# AUTHOR + +Aviad Yehezkel + diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 62c6922..caf78f4 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -1786,6 +1786,11 @@ struct ibv_values_ex { struct verbs_context { /* "grows up" - new fields go here */ + int (*advise_mr)(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges); struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd); int (*read_counters)(struct ibv_counters *counters, uint64_t *counters_value, @@ -2210,6 +2215,29 @@ struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context); int ibv_destroy_comp_channel(struct ibv_comp_channel *channel); /** + * ibv_advise_mr - Gives advice about an address range in MRs + * @pd - protection domain of all MRs for which the advice is for + * @advice - type of advice + * @flags - advice modifiers + * @sg_list - an array of memory ranges + * @num_sge - number of elements in the array + */ +static inline int ibv_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +{ + struct verbs_context *vctx; + + vctx = verbs_get_ctx_op(pd->context, advise_mr); + if (!vctx) + return ENOSYS; + + return vctx->advise_mr(pd, advice, flags, sg_list, num_sge); +} + +/** * ibv_alloc_dm - Allocate device memory * @context - Context DM will be attached to * @attr - Attributes to allocate the DM with diff --git a/libibverbs/verbs_api.h b/libibverbs/verbs_api.h index 4ac1335..bdfd677 100644 --- a/libibverbs/verbs_api.h +++ b/libibverbs/verbs_api.h @@ -85,6 +85,12 @@ #define ibv_flow_action_esp_encap ib_uverbs_flow_action_esp_encap #define ibv_flow_action_esp ib_uverbs_flow_action_esp +#define ibv_advise_mr_advice ib_uverbs_advise_mr_advice +#define IBV_ADVISE_MR_ADVICE_PREFETCH IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH +#define IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE + +#define IBV_ADVISE_MR_FLAG_FLUSH IB_UVERBS_ADVISE_MR_FLAG_FLUSH + #define IBV_QPF_GRH_REQUIRED IB_UVERBS_QPF_GRH_REQUIRED #endif From patchwork Wed Dec 5 13:24:49 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10714201 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E07F417DB for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D0CF02CF32 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C50852CF34; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6E99A2CF33 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727511AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:36153 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727496AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 5 Dec 2018 15:31:11 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP9dZ000831; Wed, 5 Dec 2018 15:25:09 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id wB5DP9x8011316; Wed, 5 Dec 2018 15:25:09 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id wB5DP9nm011315; Wed, 5 Dec 2018 15:25:09 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, guyle@mellanox.com, aviadye@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 4/6] mlx5: Support advise_mr verb Date: Wed, 5 Dec 2018 15:24:49 +0200 Message-Id: <1544016291-10815-5-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> References: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Add an implementation to verb ibv_advise_mr(). Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Yishai Hadas --- providers/mlx5/mlx5.c | 1 + providers/mlx5/mlx5.h | 6 +++++- providers/mlx5/verbs.c | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c index 86939bf..0d71992 100644 --- a/providers/mlx5/mlx5.c +++ b/providers/mlx5/mlx5.c @@ -115,6 +115,7 @@ static const struct verbs_context_ops mlx5_ctx_common_ops = { .attach_mcast = mlx5_attach_mcast, .detach_mcast = mlx5_detach_mcast, + .advise_mr = mlx5_advise_mr, .alloc_dm = mlx5_alloc_dm, .alloc_parent_domain = mlx5_alloc_parent_domain, .alloc_td = mlx5_alloc_td, diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index c812195..977bc0a 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -904,7 +904,11 @@ int mlx5_read_counters(struct ibv_counters *counters, uint64_t *counters_value, uint32_t ncounters, uint32_t flags); - +int mlx5_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sges); static inline void *mlx5_find_uidx(struct mlx5_context *ctx, uint32_t uidx) { int tind = uidx >> MLX5_UIDX_TABLE_SHIFT; diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 9e5bc47..0dc7807 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -493,6 +493,15 @@ free: return 0; } +int mlx5_advise_mr(struct ibv_pd *pd, + enum ibv_advise_mr_advice advice, + uint32_t flags, + struct ibv_sge *sg_list, + uint32_t num_sge) +{ + return ibv_cmd_advise_mr(pd, advice, flags, sg_list, num_sge); +} + struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; From patchwork Wed Dec 5 13:24:50 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10714197 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 153B414E2 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 056972CF32 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id EE6462CF39; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A7AC92CF32 for ; Wed, 5 Dec 2018 13:25:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727515AbeLENZQ (ORCPT ); Wed, 5 Dec 2018 08:25:16 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:36154 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727486AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 5 Dec 2018 15:31:11 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wB5DPAYY000834; Wed, 5 Dec 2018 15:25:10 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id wB5DPAxk011320; Wed, 5 Dec 2018 15:25:10 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id wB5DPAgt011319; Wed, 5 Dec 2018 15:25:10 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, guyle@mellanox.com, aviadye@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 5/6] verbs: Device memory cannot be on demand in rc_pingpong Date: Wed, 5 Dec 2018 15:24:50 +0200 Message-Id: <1544016291-10815-6-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> References: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua If MR is of type DM it doesn't make sense to ask MR to be on-demand. Fixes: f06164d5ea8d ("verbs: Add device memory support in rc_pingpong example") Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Yishai Hadas --- libibverbs/examples/rc_pingpong.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libibverbs/examples/rc_pingpong.c b/libibverbs/examples/rc_pingpong.c index 089d482..d4c5df5 100644 --- a/libibverbs/examples/rc_pingpong.c +++ b/libibverbs/examples/rc_pingpong.c @@ -856,6 +856,11 @@ int main(int argc, char *argv[]) return 1; } + if (use_odp && use_dm) { + fprintf(stderr, "DM memory region can't be on demand\n"); + return 1; + } + if (use_ts) { ts.comp_recv_max_time_delta = 0; ts.comp_recv_min_time_delta = 0xffffffff; From patchwork Wed Dec 5 13:24:51 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10714203 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2E1C51923 for ; Wed, 5 Dec 2018 13:25:19 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 1DC942CF32 for ; Wed, 5 Dec 2018 13:25:19 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 121D32CF33; Wed, 5 Dec 2018 13:25:19 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 9382E2CF37 for ; Wed, 5 Dec 2018 13:25:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727505AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:36162 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727498AbeLENZP (ORCPT ); Wed, 5 Dec 2018 08:25:15 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 5 Dec 2018 15:31:11 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id wB5DPAfM000838; Wed, 5 Dec 2018 15:25:10 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id wB5DPAdC011324; Wed, 5 Dec 2018 15:25:10 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id wB5DPALQ011323; Wed, 5 Dec 2018 15:25:10 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, guyle@mellanox.com, aviadye@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 6/6] verbs: Let caller pre-fetch a sub-region of ODP MR in rc_pingpong Date: Wed, 5 Dec 2018 15:24:51 +0200 Message-Id: <1544016291-10815-7-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> References: <1544016291-10815-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Add option (-P) to pre-fetch the memory that the application uses. This option is applicable only with ODP MR and it should prevent a page fault later on when the HCA tries to read or write from or to the memory. The pre-fetch is done once, just after creating the MR. Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Yishai Hadas --- libibverbs/examples/rc_pingpong.c | 29 ++++++++++++++++++++++++++++- libibverbs/man/ibv_rc_pingpong.1 | 7 +++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/libibverbs/examples/rc_pingpong.c b/libibverbs/examples/rc_pingpong.c index d4c5df5..8b2253d 100644 --- a/libibverbs/examples/rc_pingpong.c +++ b/libibverbs/examples/rc_pingpong.c @@ -57,6 +57,7 @@ enum { static int page_size; static int use_odp; +static int prefetch_mr; static int use_ts; static int validate_buf; static int use_dm; @@ -430,6 +431,22 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, goto clean_dm; } + if (prefetch_mr) { + struct ibv_sge sg_list; + int ret; + + sg_list.lkey = ctx->mr->lkey; + sg_list.addr = (uintptr_t)ctx->buf; + sg_list.length = size; + + ret = ibv_advise_mr(ctx->pd, IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE, + IB_UVERBS_ADVISE_MR_FLAG_FLUSH, + &sg_list, 1); + + if (ret) + fprintf(stderr, "Couldn't prefetch MR(%d). Continue anyway\n", ret); + } + if (use_ts) { struct ibv_cq_init_attr_ex attr_ex = { .cqe = rx_depth + 1, @@ -717,6 +734,7 @@ static void usage(const char *argv0) printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); printf(" -o, --odp use on demand paging\n"); + printf(" -P, --prefetch prefetch an ODP MR\n"); printf(" -t, --ts get CQE with timestamp\n"); printf(" -c, --chk validate received buffer\n"); printf(" -j, --dm use device memory\n"); @@ -764,13 +782,14 @@ int main(int argc, char *argv[]) { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, { .name = "odp", .has_arg = 0, .val = 'o' }, + { .name = "prefetch", .has_arg = 0, .val = 'P' }, { .name = "ts", .has_arg = 0, .val = 't' }, { .name = "chk", .has_arg = 0, .val = 'c' }, { .name = "dm", .has_arg = 0, .val = 'j' }, {} }; - c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:otcj", + c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:oPtcj", long_options, NULL); if (c == -1) @@ -832,6 +851,9 @@ int main(int argc, char *argv[]) case 'o': use_odp = 1; break; + case 'P': + prefetch_mr = 1; + break; case 't': use_ts = 1; break; @@ -861,6 +883,11 @@ int main(int argc, char *argv[]) return 1; } + if (!use_odp && prefetch_mr) { + fprintf(stderr, "prefetch is valid only with on-demand memory region\n"); + return 1; + } + if (use_ts) { ts.comp_recv_max_time_delta = 0; ts.comp_recv_min_time_delta = 0xffffffff; diff --git a/libibverbs/man/ibv_rc_pingpong.1 b/libibverbs/man/ibv_rc_pingpong.1 index ad5c834..5561fe5 100644 --- a/libibverbs/man/ibv_rc_pingpong.1 +++ b/libibverbs/man/ibv_rc_pingpong.1 @@ -8,12 +8,12 @@ ibv_rc_pingpong \- simple InfiniBand RC transport test .B ibv_rc_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-m size] [\-r rx depth] [\-n iters] [\-l sl] [\-e] [\-g gid index] -[\-o] [\-t] \fBHOSTNAME\fR +[\-o] [\-P] [\-t] \fBHOSTNAME\fR .B ibv_rc_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-m size] [\-r rx depth] [\-n iters] [\-l sl] [\-e] [\-g gid index] -[\-o] [\-t] +[\-o] [\-P] [\-t] .SH DESCRIPTION .PP @@ -58,6 +58,9 @@ local port \fIGIDINDEX\fR \fB\-o\fR, \fB\-\-odp\fR use on demand paging .TP +\fB\-P\fR, \fB\-\-prefetch=\fR +prefetch an ODP MR +.TP \fB\-t\fR, \fB\-\-ts\fR get CQE with timestamp .TP