From patchwork Wed Feb 20 14:57:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10822265 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 00257184E for ; Wed, 20 Feb 2019 14:58:03 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id DD7F72E620 for ; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id D15CC2E7E4; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 5E45B2E620 for ; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726557AbfBTO6B (ORCPT ); Wed, 20 Feb 2019 09:58:01 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33032 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726219AbfBTO6A (ORCPT ); Wed, 20 Feb 2019 09:58:00 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvs1k030844; Wed, 20 Feb 2019 16:57:54 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvsPR013380; Wed, 20 Feb 2019 16:57:54 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id x1KEvsrO013378; Wed, 20 Feb 2019 16:57:54 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 1/6] verbs: Add xrc_odp_caps field to response of query_device Date: Wed, 20 Feb 2019 16:57:33 +0200 Message-Id: <1550674658-13295-2-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua ODP support is described by per-transport bit array, an array for XRC transport is added. In addition, ibv_devinfo was updated to report this capability. Signed-off-by: Moni Shoua Signed-off-by: Yishai Hadas --- libibverbs/cmd.c | 9 +++++++++ libibverbs/examples/devinfo.c | 7 +++++-- libibverbs/man/ibv_query_device_ex.3 | 1 + libibverbs/verbs.h | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/libibverbs/cmd.c b/libibverbs/cmd.c index 34c71e5..ec551e2 100644 --- a/libibverbs/cmd.c +++ b/libibverbs/cmd.c @@ -265,6 +265,15 @@ int ibv_cmd_query_device_ex(struct ibv_context *context, } } + if (attr_size >= offsetof(struct ibv_device_attr_ex, xrc_odp_caps) + + sizeof(attr->xrc_odp_caps)) { + if (resp->response_length >= + offsetof(struct ib_uverbs_ex_query_device_resp, xrc_odp_caps) + + sizeof(resp->xrc_odp_caps)) { + attr->xrc_odp_caps = resp->xrc_odp_caps; + } + } + return 0; } diff --git a/libibverbs/examples/devinfo.c b/libibverbs/examples/devinfo.c index 735adb1..75bdd8c 100644 --- a/libibverbs/examples/devinfo.c +++ b/libibverbs/examples/devinfo.c @@ -309,9 +309,10 @@ static void print_odp_trans_caps(uint32_t trans) } } -static void print_odp_caps(const struct ibv_odp_caps *caps) +static void print_odp_caps(const struct ibv_device_attr_ex *device_attr) { uint64_t unknown_general_caps = ~(IBV_ODP_SUPPORT); + const struct ibv_odp_caps *caps = &device_attr->odp_caps; /* general odp caps */ printf("\tgeneral_odp_caps:\n"); @@ -328,6 +329,8 @@ static void print_odp_caps(const struct ibv_odp_caps *caps) print_odp_trans_caps(caps->per_transport_caps.uc_odp_caps); printf("\tud_odp_caps:\n"); print_odp_trans_caps(caps->per_transport_caps.ud_odp_caps); + printf("\txrc_odp_caps:\n"); + print_odp_trans_caps(device_attr->xrc_odp_caps); } static void print_device_cap_flags_ex(uint64_t device_cap_flags_ex) @@ -531,7 +534,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) printf("\tmax_pkeys:\t\t\t%d\n", device_attr.orig_attr.max_pkeys); printf("\tlocal_ca_ack_delay:\t\t%d\n", device_attr.orig_attr.local_ca_ack_delay); - print_odp_caps(&device_attr.odp_caps); + print_odp_caps(&device_attr); if (device_attr.completion_timestamp_mask) printf("\tcompletion timestamp_mask:\t\t\t0x%016" PRIx64 "\n", device_attr.completion_timestamp_mask); diff --git a/libibverbs/man/ibv_query_device_ex.3 b/libibverbs/man/ibv_query_device_ex.3 index 15a430d..3ad9eec 100644 --- a/libibverbs/man/ibv_query_device_ex.3 +++ b/libibverbs/man/ibv_query_device_ex.3 @@ -37,6 +37,7 @@ struct ibv_tm_caps tm_caps; /* Tag matching capabilities struct ibv_cq_moderation_caps cq_mod_caps; /* CQ moderation max capabilities */ uint64_t max_dm_size; /* Max Device Memory size (in bytes) available for allocation */ struct ibv_pci_atomic_caps atomic_caps; /* PCI atomic operations capabilities, use enum ibv_pci_atomic_op_size */ +uint32_t xrc_odp_caps; /* Mask with enum ibv_odp_transport_cap_bits to know which operations are supported. */ .in -8 }; diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 4cc8720..94e4916 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -325,6 +325,7 @@ struct ibv_device_attr_ex { struct ibv_cq_moderation_caps cq_mod_caps; uint64_t max_dm_size; struct ibv_pci_atomic_caps pci_atomic_caps; + uint32_t xrc_odp_caps; }; enum ibv_mtu { From patchwork Wed Feb 20 14:57:34 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10822269 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AFD6E1575 for ; Wed, 20 Feb 2019 14:58:03 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 989A42E5FE for ; Wed, 20 Feb 2019 14:58:03 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8C8112E620; Wed, 20 Feb 2019 14:58:03 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 335342E5FE for ; Wed, 20 Feb 2019 14:58:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726591AbfBTO6B (ORCPT ); Wed, 20 Feb 2019 09:58:01 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33029 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726470AbfBTO6A (ORCPT ); Wed, 20 Feb 2019 09:58:00 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtNm030847; Wed, 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvsij013387; Wed, 20 Feb 2019 16:57:54 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id x1KEvsnW013386; Wed, 20 Feb 2019 16:57:54 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 2/6] verbs: Add SRQ as ODP capability support Date: Wed, 20 Feb 2019 16:57:34 +0200 Message-Id: <1550674658-13295-3-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Support in ODP with SRQ is considered as a per-transport capability. Applications need to check that SRQ with ODP is supported if access to an ODP MR happens with a QP that has a shared receive queue. In addition, ibv_devinfo was updated to report this capability. Signed-off-by: Moni Shoua Signed-off-by: Yishai Hadas --- libibverbs/examples/devinfo.c | 5 ++++- libibverbs/man/ibv_query_device_ex.3 | 1 + libibverbs/verbs.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libibverbs/examples/devinfo.c b/libibverbs/examples/devinfo.c index 75bdd8c..e928b05 100644 --- a/libibverbs/examples/devinfo.c +++ b/libibverbs/examples/devinfo.c @@ -288,7 +288,8 @@ static void print_odp_trans_caps(uint32_t trans) IBV_ODP_SUPPORT_RECV | IBV_ODP_SUPPORT_WRITE | IBV_ODP_SUPPORT_READ | - IBV_ODP_SUPPORT_ATOMIC); + IBV_ODP_SUPPORT_ATOMIC | + IBV_ODP_SUPPORT_SRQ_RECV); if (!trans) { printf("\t\t\t\t\tNO SUPPORT\n"); @@ -303,6 +304,8 @@ static void print_odp_trans_caps(uint32_t trans) printf("\t\t\t\t\tSUPPORT_READ\n"); if (trans & IBV_ODP_SUPPORT_ATOMIC) printf("\t\t\t\t\tSUPPORT_ATOMIC\n"); + if (trans & IBV_ODP_SUPPORT_SRQ_RECV) + printf("\t\t\t\t\tSUPPORT_SRQ\n"); if (trans & unknown_transport_caps) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", trans & unknown_transport_caps); diff --git a/libibverbs/man/ibv_query_device_ex.3 b/libibverbs/man/ibv_query_device_ex.3 index 3ad9eec..f99f818 100644 --- a/libibverbs/man/ibv_query_device_ex.3 +++ b/libibverbs/man/ibv_query_device_ex.3 @@ -60,6 +60,7 @@ enum ibv_odp_transport_cap_bits { IBV_ODP_SUPPORT_WRITE = 1 << 2, /* RDMA-Write operations support on-demand paging */ IBV_ODP_SUPPORT_READ = 1 << 3, /* RDMA-Read operations support on-demand paging */ IBV_ODP_SUPPORT_ATOMIC = 1 << 4, /* RDMA-Atomic operations support on-demand paging */ + IBV_ODP_SUPPORT_SRQ_RECV = 1 << 5, /* SRQ receive operations support on-demand paging */ }; struct ibv_tso_caps { diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 94e4916..9561e39 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -204,6 +204,7 @@ enum ibv_odp_transport_cap_bits { IBV_ODP_SUPPORT_WRITE = 1 << 2, IBV_ODP_SUPPORT_READ = 1 << 3, IBV_ODP_SUPPORT_ATOMIC = 1 << 4, + IBV_ODP_SUPPORT_SRQ_RECV = 1 << 5, }; struct ibv_odp_caps { From patchwork Wed Feb 20 14:57:35 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10822271 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 60A081575 for ; Wed, 20 Feb 2019 14:58:04 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 488A22E5FE for ; Wed, 20 Feb 2019 14:58:04 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 3CB792E607; Wed, 20 Feb 2019 14:58:04 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8D0072E7DC for ; Wed, 20 Feb 2019 14:58:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726757AbfBTO6C (ORCPT ); Wed, 20 Feb 2019 09:58:02 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33031 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726524AbfBTO6C (ORCPT ); Wed, 20 Feb 2019 09:58:02 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtsi030857; Wed, 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtrG013391; Wed, 20 Feb 2019 16:57:55 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id x1KEvtMf013390; Wed, 20 Feb 2019 16:57:55 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 3/6] mlx5: Introduce a wait queue for SRQ WQEs Date: Wed, 20 Feb 2019 16:57:35 +0200 Message-Id: <1550674658-13295-4-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua When allocating the WQE buffer try to allocate more space than required. The extra space will serve as a place for WQEs that were recently switched from HW ownership to SW ownership to cool down before being posted again. This is useful with WQEs with ODP buffers that were consumed by HW but weren't handled yet by the page-fault handler in kernel. The policy of the wait queue is FIFO so a WQE gets out the wait queue after N-1 WQEs get in when N is the size of the wait queue. WQEs in the wait queue are considered to be in SW ownership except they are not counted as candidates for posting. This means that WQEs in the wait queue aren't in HW ownership while there. Putting a WQE in the wait queue means that it's no longer available for posting. When that happens, another WQE in the wait queue needs to be taken out of there to replace it. Having a wait queue is not mandatory. If the extra resources that are required for the wait queue are beyond the limits of the system then the SRQ will operate without a wait queue. Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Yishai Hadas --- providers/mlx5/mlx5.h | 15 +++++++++++- providers/mlx5/srq.c | 63 +++++++++++++++++++++++++++++++++++++++----------- providers/mlx5/verbs.c | 33 ++++++++++++++++++-------- 3 files changed, 87 insertions(+), 24 deletions(-) diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index 75d599a..f315f63 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -415,6 +415,8 @@ struct mlx5_srq { int wqe_shift; int head; int tail; + int waitq_head; + int waitq_tail; __be32 *db; uint16_t counter; int wq_sig; @@ -807,7 +809,8 @@ int mlx5_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, int mlx5_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr); int mlx5_destroy_srq(struct ibv_srq *srq); -int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq); +int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq, + uint32_t nwr); void mlx5_free_srq_wqe(struct mlx5_srq *srq, int ind); int mlx5_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, @@ -1017,4 +1020,14 @@ static inline uint8_t calc_sig(void *wqe, int size) return ~res; } +static inline int align_queue_size(long long req) +{ + return mlx5_round_up_power_of_two(req); +} + +static inline bool srq_has_waitq(struct mlx5_srq *srq) +{ + return srq->waitq_head >= 0; +} + #endif /* MLX5_H */ diff --git a/providers/mlx5/srq.c b/providers/mlx5/srq.c index 94528bb..a2d37d0 100644 --- a/providers/mlx5/srq.c +++ b/providers/mlx5/srq.c @@ -145,13 +145,29 @@ int mlx5_post_srq_recv(struct ibv_srq *ibsrq, return err; } -int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) +/* Build a linked list on an array of SRQ WQEs. + * Since WQEs are always added to the tail and taken from the head + * it doesn't matter where the last WQE points to. + */ +static void set_srq_buf_ll(struct mlx5_srq *srq, int start, int end) { struct mlx5_wqe_srq_next_seg *next; + int i; + + for (i = start; i < end; ++i) { + next = get_wqe(srq, i); + next->next_wqe_index = htobe16(i + 1); + } +} + +int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq, + uint32_t max_wr) +{ int size; int buf_size; - int i; struct mlx5_context *ctx; + uint32_t orig_max_wr = max_wr; + bool have_wq = true; ctx = to_mctx(context); @@ -160,9 +176,18 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) return -1; } - srq->wrid = malloc(srq->max * sizeof *srq->wrid); - if (!srq->wrid) - return -1; + /* At first, try to allocate more WQEs than requested so the extra will + * be used for the wait queue. + */ + max_wr = orig_max_wr * 2 + 1; + + if (max_wr > ctx->max_srq_recv_wr) { + /* Device limits are smaller than required + * to provide a wait queue, continue without. + */ + max_wr = orig_max_wr + 1; + have_wq = false; + } size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->max_gs * sizeof(struct mlx5_wqe_data_seg); @@ -179,14 +204,28 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) srq->wqe_shift = mlx5_ilog2(size); + srq->max = align_queue_size(max_wr); buf_size = srq->max * size; if (mlx5_alloc_buf(&srq->buf, buf_size, - to_mdev(context->device)->page_size)) { - free(srq->wrid); + to_mdev(context->device)->page_size)) return -1; + + srq->head = 0; + srq->tail = align_queue_size(orig_max_wr + 1) - 1; + if (have_wq) { + srq->waitq_head = srq->tail + 1; + srq->waitq_tail = srq->max - 1; + } else { + srq->waitq_head = -1; + srq->waitq_tail = -1; } + srq->wrid = malloc(srq->max * sizeof(*srq->wrid)); + if (!srq->wrid) { + mlx5_free_buf(&srq->buf); + return -1; + } memset(srq->buf.buf, 0, buf_size); /* @@ -194,13 +233,9 @@ int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq) * linked into the list of free WQEs. */ - for (i = 0; i < srq->max; ++i) { - next = get_wqe(srq, i); - next->next_wqe_index = htobe16((i + 1) & (srq->max - 1)); - } - - srq->head = 0; - srq->tail = srq->max - 1; + set_srq_buf_ll(srq, srq->head, srq->tail); + if (have_wq) + set_srq_buf_ll(srq, srq->waitq_head, srq->waitq_tail); return 0; } diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index 7e1c125..2bccdf8 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -553,11 +553,6 @@ int mlx5_round_up_power_of_two(long long sz) return (int)ret; } -static int align_queue_size(long long req) -{ - return mlx5_round_up_power_of_two(req); -} - static int get_cqe_size(struct mlx5dv_cq_init_attr *mlx5cq_attr) { char *env; @@ -1016,11 +1011,10 @@ struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd, goto err; } - srq->max = align_queue_size(attr->attr.max_wr + 1); srq->max_gs = attr->attr.max_sge; srq->counter = 0; - if (mlx5_alloc_srq_buf(pd->context, srq)) { + if (mlx5_alloc_srq_buf(pd->context, srq, attr->attr.max_wr)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err; } @@ -1041,11 +1035,22 @@ struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd, attr->attr.max_sge = srq->max_gs; pthread_mutex_lock(&ctx->srq_table_mutex); + + /* Override max_wr to let kernel know about extra WQEs for the + * wait queue. + */ + attr->attr.max_wr = srq->max - 1; + ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (ret) goto err_db; + /* Override kernel response that includes the wait queue with the real + * number of WQEs that are applicable for the application. + */ + attr->attr.max_wr = srq->tail; + ret = mlx5_store_srq(ctx, resp.srqn, srq); if (ret) goto err_destroy; @@ -2707,11 +2712,10 @@ struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, goto err; } - msrq->max = align_queue_size(attr->attr.max_wr + 1); msrq->max_gs = attr->attr.max_sge; msrq->counter = 0; - if (mlx5_alloc_srq_buf(context, msrq)) { + if (mlx5_alloc_srq_buf(context, msrq, attr->attr.max_wr)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err; } @@ -2743,9 +2747,20 @@ struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, pthread_mutex_lock(&ctx->srq_table_mutex); } + /* Override max_wr to let kernel know about extra WQEs for the + * wait queue. + */ + attr->attr.max_wr = msrq->max - 1; + err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq), attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); + + /* Override kernel response that includes the wait queue with the real + * number of WQEs that are applicable for the application. + */ + attr->attr.max_wr = msrq->tail; + if (err) goto err_free_uidx; From patchwork Wed Feb 20 14:57:36 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10822273 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 12B3117E4 for ; Wed, 20 Feb 2019 14:58:06 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id EED2A2E5FE for ; Wed, 20 Feb 2019 14:58:05 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id E27732E607; Wed, 20 Feb 2019 14:58:05 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 3D7E72E607 for ; Wed, 20 Feb 2019 14:58:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726524AbfBTO6E (ORCPT ); Wed, 20 Feb 2019 09:58:04 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33030 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726219AbfBTO6E (ORCPT ); Wed, 20 Feb 2019 09:58:04 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtfA030864; Wed, 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtVK013395; Wed, 20 Feb 2019 16:57:55 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id x1KEvtmi013394; Wed, 20 Feb 2019 16:57:55 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 4/6] mlx5: Handle ODP fault completion in SRQ Date: Wed, 20 Feb 2019 16:57:36 +0200 Message-Id: <1550674658-13295-5-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua A SRQ WQE that has ODP buffers might be completed with error and a special syndrome. This indicates that the HW couldn't scatter the data to the WQE buffers on one hand but needed to consume the WQE anyway. This type of error shouldn't be raised to the poller of the CQ but handled inside the driver. The WQE for which the completions arrived for need to be re-posted to keep the integrity of the SRQ from the application point of view. The rep-posted WQE is taken from the SRQ head which means that the completed WQE becomes free. To prevent it from being posted again with different addresses and thus interfere the page-fault handler in the kernel, it is recommended to put this WQE in the wait queue for cooling down. Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Yishai Hadas --- providers/mlx5/cq.c | 33 +++++++++++++++--- providers/mlx5/mlx5.h | 3 ++ providers/mlx5/mlx5dv.h | 4 +++ providers/mlx5/srq.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 5 deletions(-) diff --git a/providers/mlx5/cq.c b/providers/mlx5/cq.c index 6f5c9f1..b9b47df 100644 --- a/providers/mlx5/cq.c +++ b/providers/mlx5/cq.c @@ -49,7 +49,8 @@ enum { CQ_OK = 0, CQ_EMPTY = -1, - CQ_POLL_ERR = -2 + CQ_POLL_ERR = -2, + CQ_POLL_NODATA = ENOENT }; enum { @@ -659,6 +660,12 @@ static int handle_tag_matching(struct mlx5_cq *cq, return CQ_OK; } +static inline int is_odp_pfault_err(struct mlx5_err_cqe *ecqe) +{ + return ecqe->syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR && + ecqe->vendor_err_synd == MLX5_CQE_VENDOR_SYNDROME_ODP_PFAULT; +} + static inline int mlx5_parse_cqe(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe64, void *cqe, @@ -682,10 +689,14 @@ static inline int mlx5_parse_cqe(struct mlx5_cq *cq, int idx; uint8_t opcode; struct mlx5_err_cqe *ecqe; - int err = 0; + int err; struct mlx5_qp *mqp; struct mlx5_context *mctx; - uint8_t is_srq = 0; + uint8_t is_srq; + +again: + is_srq = 0; + err = 0; mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context); qpn = be32toh(cqe64->sop_drop_qpn) & 0xffffff; @@ -811,7 +822,8 @@ static inline int mlx5_parse_cqe(struct mlx5_cq *cq, wc->vendor_err = ecqe->vendor_err_synd; if (unlikely(ecqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR && - ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR)) { + ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR && + !is_odp_pfault_err(ecqe))) { FILE *fp = mctx->dbg_fp; fprintf(fp, PFX "%s: got completion with error:\n", mctx->hostname); @@ -844,6 +856,17 @@ static inline int mlx5_parse_cqe(struct mlx5_cq *cq, if (is_srq) { wqe_ctr = be16toh(cqe64->wqe_counter); + if (is_odp_pfault_err(ecqe)) { + mlx5_complete_odp_fault(*cur_srq, wqe_ctr); + err = mlx5_get_next_cqe(cq, &cqe64, &cqe); + /* CQ_POLL_NODATA indicates that CQ was not empty but the polled CQE + * was handled internally and should not processed by the caller. + */ + if (err == CQ_EMPTY) + return CQ_POLL_NODATA; + goto again; + } + if (lazy) cq->ibv_cq.wr_id = (*cur_srq)->wrid[wqe_ctr]; else @@ -1060,7 +1083,7 @@ static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_att if (lock && err) mlx5_spin_unlock(&cq->lock); - if (stall && err) { + if (stall && err == CQ_POLL_ERR) { if (stall == POLLING_MODE_STALL_ADAPTIVE) { cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, mlx5_stall_cq_poll_min); diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index f315f63..9129c0f 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -811,6 +811,7 @@ int mlx5_query_srq(struct ibv_srq *srq, int mlx5_destroy_srq(struct ibv_srq *srq); int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq, uint32_t nwr); +void mlx5_complete_odp_fault(struct mlx5_srq *srq, int ind); void mlx5_free_srq_wqe(struct mlx5_srq *srq, int ind); int mlx5_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, @@ -1030,4 +1031,6 @@ static inline bool srq_has_waitq(struct mlx5_srq *srq) return srq->waitq_head >= 0; } +bool srq_cooldown_wqe(struct mlx5_srq *srq, int ind); + #endif /* MLX5_H */ diff --git a/providers/mlx5/mlx5dv.h b/providers/mlx5/mlx5dv.h index 796ea7b..e2788d8 100644 --- a/providers/mlx5/mlx5dv.h +++ b/providers/mlx5/mlx5dv.h @@ -512,6 +512,10 @@ enum { }; enum { + MLX5_CQE_VENDOR_SYNDROME_ODP_PFAULT = 0x93, +}; + +enum { MLX5_CQE_L2_OK = 1 << 0, MLX5_CQE_L3_OK = 1 << 1, MLX5_CQE_L4_OK = 1 << 2, diff --git a/providers/mlx5/srq.c b/providers/mlx5/srq.c index a2d37d0..71d74a7 100644 --- a/providers/mlx5/srq.c +++ b/providers/mlx5/srq.c @@ -82,6 +82,95 @@ void mlx5_free_srq_wqe(struct mlx5_srq *srq, int ind) mlx5_spin_unlock(&srq->lock); } +/* Take an index and put it last in wait queue */ +static void srq_put_in_waitq(struct mlx5_srq *srq, int ind) +{ + struct mlx5_wqe_srq_next_seg *waitq_tail; + + waitq_tail = get_wqe(srq, srq->waitq_tail); + waitq_tail->next_wqe_index = htobe16(ind); + srq->waitq_tail = ind; +} + +/* Take first in wait queue and put in tail of SRQ */ +static void srq_get_from_waitq(struct mlx5_srq *srq) +{ + struct mlx5_wqe_srq_next_seg *tail; + struct mlx5_wqe_srq_next_seg *waitq_head; + + tail = get_wqe(srq, srq->tail); + waitq_head = get_wqe(srq, srq->waitq_head); + + tail->next_wqe_index = htobe16(srq->waitq_head); + srq->tail = srq->waitq_head; + srq->waitq_head = be16toh(waitq_head->next_wqe_index); +} + +/* Put the given WQE that is in SW ownership at the end of the wait queue. + * Take a WQE from the wait queue and add it to WQEs in SW ownership instead. + */ +bool srq_cooldown_wqe(struct mlx5_srq *srq, int ind) +{ + if (!srq_has_waitq(srq)) + return false; + + srq_put_in_waitq(srq, ind); + srq_get_from_waitq(srq); + return true; +} + +/* Post a WQE internally, based on a previous application post. + * Copy a given WQE's data segments to the SRQ head, advance the head + * and ring the HW doorbell. + */ +static void srq_repost(struct mlx5_srq *srq, int ind) +{ + struct mlx5_wqe_srq_next_seg *src, *dst; + struct mlx5_wqe_data_seg *src_scat, *dst_scat; + int i; + + srq->wrid[srq->head] = srq->wrid[ind]; + + src = get_wqe(srq, ind); + dst = get_wqe(srq, srq->head); + src_scat = (struct mlx5_wqe_data_seg *)(src + 1); + dst_scat = (struct mlx5_wqe_data_seg *)(dst + 1); + + for (i = 0; i < srq->max_gs; ++i) { + dst_scat[i] = src_scat[i]; + + if (dst_scat[i].lkey == htobe32(MLX5_INVALID_LKEY)) + break; + } + + srq->head = be16toh(dst->next_wqe_index); + srq->counter++; + /* Flush descriptors */ + udma_to_device_barrier(); + *srq->db = htobe32(srq->counter); +} + +void mlx5_complete_odp_fault(struct mlx5_srq *srq, int ind) +{ + mlx5_spin_lock(&srq->lock); + + if (!srq_cooldown_wqe(srq, ind)) { + struct mlx5_wqe_srq_next_seg *tail = get_wqe(srq, srq->tail); + + /* Without a wait queue put the page-faulted wqe + * back in SRQ tail. The repost is still possible but + * the risk of overriding the page-faulted WQE with a future + * post_srq_recv() is now higher. + */ + tail->next_wqe_index = htobe16(ind); + srq->tail = ind; + } + + srq_repost(srq, ind); + + mlx5_spin_unlock(&srq->lock); +} + int mlx5_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) From patchwork Wed Feb 20 14:57:37 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10822263 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6DEF31575 for ; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 49CDA2E5FE for ; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 3C7FA2E7EC; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id BA9F72E607 for ; Wed, 20 Feb 2019 14:58:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726531AbfBTO6B (ORCPT ); Wed, 20 Feb 2019 09:58:01 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33035 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726591AbfBTO6A (ORCPT ); Wed, 20 Feb 2019 09:58:00 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtQs030875; Wed, 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtl7013399; Wed, 20 Feb 2019 16:57:55 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id x1KEvtwO013398; Wed, 20 Feb 2019 16:57:55 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 5/6] verbs: Add option to register ODP MR in ibv_srq_pingpong Date: Wed, 20 Feb 2019 16:57:37 +0200 Message-Id: <1550674658-13295-6-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Flag -o indicates that the MR should be registered as ODP. Since one memory region is used for both send and receive operations the flag applies to send WQEs as well as to receive WQEs. Signed-off-by: Moni Shoua Signed-off-by: Yishai Hadas --- libibverbs/examples/srq_pingpong.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/libibverbs/examples/srq_pingpong.c b/libibverbs/examples/srq_pingpong.c index 230e9bf..2a91fd3 100644 --- a/libibverbs/examples/srq_pingpong.c +++ b/libibverbs/examples/srq_pingpong.c @@ -56,6 +56,7 @@ enum { static int page_size; static int validate_buf; +static int use_odp; struct pingpong_context { struct ibv_context *context; @@ -352,6 +353,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, { struct pingpong_context *ctx; int i; + int access_flags = IBV_ACCESS_LOCAL_WRITE; ctx = calloc(1, sizeof *ctx); if (!ctx) @@ -376,6 +378,24 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, ibv_get_device_name(ib_dev)); goto clean_buffer; } + if (use_odp) { + struct ibv_device_attr_ex attrx; + const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND | + IBV_ODP_SUPPORT_RECV | + IBV_ODP_SUPPORT_SRQ_RECV; + + if (ibv_query_device_ex(ctx->context, NULL, &attrx)) { + fprintf(stderr, "Couldn't query device for its features\n"); + goto clean_device; + } + if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || + (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { + fprintf(stderr, "The device isn't ODP capable or does not support RC send, receive and srq with ODP\n"); + goto clean_device; + } + access_flags |= IBV_ACCESS_ON_DEMAND; + } + if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); @@ -392,7 +412,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, goto clean_comp_channel; } - ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); + ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; @@ -617,6 +637,7 @@ static void usage(const char *argv0) printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); + printf(" -o, --odp use on demand paging\n"); printf(" -c, --chk validate received buffer\n"); } @@ -664,12 +685,13 @@ int main(int argc, char *argv[]) { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "odp", .has_arg = 0, .val = 'o' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, { .name = "chk", .has_arg = 0, .val = 'c' }, {} }; - c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:c", + c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eog:c", long_options, NULL); if (c == -1) break; @@ -735,6 +757,10 @@ int main(int argc, char *argv[]) gidx = strtol(optarg, NULL, 0); break; + case 'o': + use_odp = 1; + break; + case 'c': validate_buf = 1; break; From patchwork Wed Feb 20 14:57:38 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 10822261 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5571417E4 for ; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2C2352E620 for ; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 1B8162E7DC; Wed, 20 Feb 2019 14:58:02 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 882342E5FE for ; Wed, 20 Feb 2019 14:58:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726458AbfBTO6A (ORCPT ); Wed, 20 Feb 2019 09:58:00 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33034 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726531AbfBTO6A (ORCPT ); Wed, 20 Feb 2019 09:58:00 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from yishaih@mellanox.com) with ESMTPS (AES256-SHA encrypted); 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvtVG030886; Wed, 20 Feb 2019 16:57:55 +0200 Received: from vnc17.mtl.labs.mlnx (vnc17.mtl.labs.mlnx [127.0.0.1]) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8) with ESMTP id x1KEvt0B013403; Wed, 20 Feb 2019 16:57:55 +0200 Received: (from yishaih@localhost) by vnc17.mtl.labs.mlnx (8.13.8/8.13.8/Submit) id x1KEvtjE013402; Wed, 20 Feb 2019 16:57:55 +0200 From: Yishai Hadas To: linux-rdma@vger.kernel.org Cc: yishaih@mellanox.com, monis@mellanox.com, artemyko@mellanox.com, jgg@mellanox.com, majd@mellanox.com Subject: [PATCH rdma-core 6/6] verbs: Add option to register ODP MR in ibv_xsrq_pingpong Date: Wed, 20 Feb 2019 16:57:38 +0200 Message-Id: <1550674658-13295-7-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.8.2.3 In-Reply-To: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> References: <1550674658-13295-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Moni Shoua Flag -o indicates that the MR should be registered as ODP. Since one memory region is used for both send and receive operations the flag applies to send WQEs as well as to recv WQEs. Signed-off-by: Moni Shoua Signed-off-by: Yishai Hadas --- libibverbs/examples/xsrq_pingpong.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/libibverbs/examples/xsrq_pingpong.c b/libibverbs/examples/xsrq_pingpong.c index 4c0d825..0f60ca2 100644 --- a/libibverbs/examples/xsrq_pingpong.c +++ b/libibverbs/examples/xsrq_pingpong.c @@ -59,6 +59,7 @@ #define TERMINATION_MSG_SIZE 4 #define TERMINATION_MSG "END" static int page_size; +static int use_odp; struct pingpong_dest { union ibv_gid gid; @@ -202,6 +203,7 @@ static int pp_init_ctx(char *ib_devname) struct ibv_srq_init_attr_ex attr; struct ibv_xrcd_init_attr xrcd_attr; struct ibv_port_attr port_attr; + int access_flags = IBV_ACCESS_LOCAL_WRITE; ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp); ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp); @@ -214,6 +216,24 @@ static int pp_init_ctx(char *ib_devname) return 1; } + if (use_odp) { + struct ibv_device_attr_ex attrx; + const uint32_t xrc_caps_mask = IBV_ODP_SUPPORT_SEND | + IBV_ODP_SUPPORT_RECV | + IBV_ODP_SUPPORT_SRQ_RECV; + + if (ibv_query_device_ex(ctx.context, NULL, &attrx)) { + fprintf(stderr, "Couldn't query device for its features\n"); + return 1; + } + if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || + (attrx.xrc_odp_caps & xrc_caps_mask) != xrc_caps_mask) { + fprintf(stderr, "The device isn't ODP capable or does not support XRC send, receive and srq with ODP\n"); + return 1; + } + access_flags |= IBV_ACCESS_ON_DEMAND; + } + if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) { fprintf(stderr, "Failed to get port info\n"); return 1; @@ -247,7 +267,7 @@ static int pp_init_ctx(char *ib_devname) return 1; } - ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE); + ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, access_flags); if (!ctx.mr) { fprintf(stderr, "Couldn't register MR\n"); return 1; @@ -846,6 +866,7 @@ static void usage(const char *argv0) printf(" -n, --num_tests= number of tests per client (default 5)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -o, --odp use on demand paging\n"); printf(" -g, --gid-idx= local port gid index\n"); } @@ -872,11 +893,12 @@ int main(int argc, char *argv[]) { .name = "num_tests", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "odp", .has_arg = 0, .val = 'o' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; - c = getopt_long(argc, argv, "p:d:i:s:m:n:l:eg:c", long_options, + c = getopt_long(argc, argv, "p:d:i:s:m:n:l:eog:c", long_options, NULL); if (c == -1) break; @@ -924,6 +946,9 @@ int main(int argc, char *argv[]) case 'e': ctx.use_event = 1; break; + case 'o': + use_odp = 1; + break; default: usage(argv[0]); return 1;