From patchwork Thu Dec 30 11:23:18 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701389 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E4128C433EF for ; Thu, 30 Dec 2021 11:23:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238886AbhL3LXi (ORCPT ); Thu, 30 Dec 2021 06:23:38 -0500 Received: from ams.source.kernel.org ([145.40.68.75]:60622 "EHLO ams.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235325AbhL3LXh (ORCPT ); Thu, 30 Dec 2021 06:23:37 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 26AAEB80B3A; Thu, 30 Dec 2021 11:23:36 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 10A59C36AEA; Thu, 30 Dec 2021 11:23:33 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863414; bh=YZKLVpxRB2F1djgguiiA+gLT6qBxeg7fWB1+NZn0g3o=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=WGt+ZtiP05KwZgUx8dQTuBEioi/oFiKTi54CpS1ee9M7ZKixpZlPFp30Q84IBvtB0 ECnUrLrsFZZ9w5LQcWu1e8BRDLhGwApaY9/v5B7l4gStcrkVrHZjZ0akf1FvsJ0JrH YzRu4B3taUf75KJ8PwpmtjFNOwJujjW/ELmNfpy9Lid4BjLyh3OueS2DPK7uOe2Xdy XlHue8DCW7CRpz4AVUrWfaySLWbPnGyutNSqtWDn/Ja/IOl9u8+I5hnTIS4vEfNe1x XrA6ZHc/kQlN+JwyzqUkaDpP7VR5nDuQUf30tLThia+5H7J8443cz/PBWDBjZokZpG YRAkndLf8pPWg== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 1/7] RDMA/mlx5: Merge similar flows of allocating MR from the cache Date: Thu, 30 Dec 2021 13:23:18 +0200 Message-Id: X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau When allocating an MR from the cache, the driver calls to get_cache_mr(), and in case of failure, retries with create_cache_mr(). This is the flow of mlx5_mr_cache_alloc(), so use it instead. Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +- drivers/infiniband/hw/mlx5/mr.c | 51 +++++----------------------- drivers/infiniband/hw/mlx5/odp.c | 11 ++++-- 3 files changed, 19 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 48eda21a358f..9c3cf6f26ad1 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1344,7 +1344,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags); + struct mlx5_cache_ent *ent, + int access_flags); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 157d862fb864..2cba55bb7825 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -566,25 +566,22 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -/* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags) + struct mlx5_cache_ent *ent, + int access_flags) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || - entry >= ARRAY_SIZE(cache->ent))) - return ERR_PTR(-EINVAL); - /* Matches access in alloc_cache_mr() */ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); - ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { + if (ent->limit) { + queue_adjust_cache_locked(ent); + ent->miss++; + } spin_unlock_irq(&ent->lock); mr = create_cache_mr(ent); if (IS_ERR(mr)) @@ -598,32 +595,9 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, mlx5_clear_mr(mr); } - mr->access_flags = access_flags; return mr; } -/* Return a MR already available in the cache */ -static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) -{ - struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent = req_ent; - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - req_ent->miss++; - return NULL; -} - static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; @@ -959,16 +933,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } - mr = get_cache_mr(ent); - if (!mr) { - mr = create_cache_mr(ent); - /* - * The above already tried to do the same stuff as reg_create(), - * no reason to try it again. - */ - if (IS_ERR(mr)) - return mr; - } + mr = mlx5_mr_cache_alloc(dev, ent, access_flags); + if (IS_ERR(mr)) + return mr; mr->ibmr.pd = pd; mr->umem = umem; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91eb615b89ee..0972afc3e952 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -407,6 +407,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, unsigned long idx) { + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; struct mlx5_ib_mr *ret; @@ -418,13 +419,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - mr = mlx5_mr_cache_alloc( - mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); + mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY], + imr->access_flags); if (IS_ERR(mr)) { ib_umem_odp_release(odp); return mr; } + mr->access_flags = imr->access_flags; mr->ibmr.pd = imr->ibmr.pd; mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; @@ -493,12 +495,15 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); + imr = mlx5_mr_cache_alloc(dev, + &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY], + access_flags); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); return imr; } + imr->access_flags = access_flags; imr->ibmr.pd = &pd->ibpd; imr->ibmr.iova = 0; imr->umem = &umem_odp->umem; From patchwork Thu Dec 30 11:23:19 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701390 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 02C8FC433F5 for ; Thu, 30 Dec 2021 11:23:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238896AbhL3LXm (ORCPT ); Thu, 30 Dec 2021 06:23:42 -0500 Received: from ams.source.kernel.org ([145.40.68.75]:60640 "EHLO ams.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235325AbhL3LXm (ORCPT ); Thu, 30 Dec 2021 06:23:42 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 782F5B80B3A; Thu, 30 Dec 2021 11:23:40 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6BA7FC36AE9; Thu, 30 Dec 2021 11:23:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863419; bh=bS2EKpoVJc+DiSe++w2nFwHEZ5MGz5BZ5zTNfPsgExs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=a7UWFJyCREV3t1aa2xH35hhqq59pu25fy0hY+XBnbWI8NerEk8YZ9UTFCF6R17chq UsqelD7c4rJ7NwlNLFFRr88hz9ShoSXkzVtPgWWW5euXQENp7Qz0KG17p0UocGlPjG RXdnwy+dBd8a8+Sr7QnO2nlsHo0s3aRnsvLxtdvakvSBpfCj5JWHYBqdS7uyXqV58p afh7zbi9TDQImXXDO3HkOaR8uTCiRCnuyiBlrJFX5MAMQCx/WnFmXiIjN7NNVjWFqN JPQmSjoFhGU/4BmbmLFUvPfhGoh5CeVzBSpzxuG1NGrUzRj+QDr1itDC9X9jnt2IUp navQbeMPX0sWg== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 2/7] RDMA/mlx5: Replace cache list with Xarray Date: Thu, 30 Dec 2021 13:23:19 +0200 Message-Id: <58c847ceb443d1836fcf6c8602f2ccb5e84728d7.1640862842.git.leonro@nvidia.com> X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau The Xarray allows us to store the cached mkeys in memory efficient way and internal xa_lock is used to protect the indexes. It helps us to get rid of ent->lock as it is not required anymore. Entries are reserved in the Xarray using xa_cmpxchg before calling to the upcoming callbacks to avoid allocations in interrupt context. The xa_cmpxchg can sleep when using GFP_KERNEL, so we call it in a loop to ensure one reserved entry for each process trying to reserve. Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 13 +- drivers/infiniband/hw/mlx5/mr.c | 253 ++++++++++++++++----------- 2 files changed, 153 insertions(+), 113 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9c3cf6f26ad1..213894053bfe 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -755,11 +755,9 @@ struct umr_common { }; struct mlx5_cache_ent { - struct list_head head; - /* sync access to the cahce entry - */ - spinlock_t lock; - + struct xarray mkeys; + unsigned long stored; + unsigned long reserved; char name[4]; u32 order; @@ -771,18 +769,13 @@ struct mlx5_cache_ent { u8 fill_to_high_water:1; /* - * - available_mrs is the length of list head, ie the number of MRs - * available for immediate allocation. * - total_mrs is available_mrs plus all in use MRs that could be * returned to the cache. * - limit is the low water mark for available_mrs, 2* limit is the * upper water mark. - * - pending is the number of MRs currently being created */ u32 total_mrs; - u32 available_mrs; u32 limit; - u32 pending; /* Statistics */ u32 miss; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2cba55bb7825..8936b504ff99 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -147,14 +147,17 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = mr->cache_ent; struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; + void *old; if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); kfree(mr); - spin_lock_irqsave(&ent->lock, flags); - ent->pending--; + xa_lock_irqsave(&ent->mkeys, flags); + ent->reserved--; + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old != NULL); WRITE_ONCE(dev->fill_delay, 1); - spin_unlock_irqrestore(&ent->lock, flags); + xa_unlock_irqrestore(&ent->mkeys, flags); mod_timer(&dev->delay_timer, jiffies + HZ); return; } @@ -166,14 +169,14 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) WRITE_ONCE(dev->cache.last_add, jiffies); - spin_lock_irqsave(&ent->lock, flags); - list_add_tail(&mr->list, &ent->head); - ent->available_mrs++; + xa_lock_irqsave(&ent->mkeys, flags); + old = __xa_store(&ent->mkeys, ent->stored, mr, GFP_ATOMIC); + WARN_ON(old != NULL); + ent->stored++; ent->total_mrs++; /* If we are doing fill_to_high_water then keep going. */ queue_adjust_cache_locked(ent); - ent->pending--; - spin_unlock_irqrestore(&ent->lock, flags); + xa_unlock_irqrestore(&ent->mkeys, flags); } static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) @@ -196,12 +199,48 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) return mr; } +static int _push_reserve_mkey(struct mlx5_cache_ent *ent) +{ + unsigned long to_reserve; + void *old; + + while (true) { + to_reserve = ent->reserved; + old = __xa_cmpxchg(&ent->mkeys, to_reserve, NULL, XA_ZERO_ENTRY, + GFP_KERNEL); + + if (xa_is_err(old)) + return xa_err(old); + + if (to_reserve != ent->reserved || old != NULL) { + if (to_reserve > ent->reserved && old == NULL) + __xa_erase(&ent->mkeys, to_reserve); + continue; + } + + ent->reserved++; + break; + } + return 0; +} + +static int push_reserve_mkey(struct mlx5_cache_ent *ent) +{ + int ret; + + xa_lock_irq(&ent->mkeys); + ret = _push_reserve_mkey(ent); + xa_unlock_irq(&ent->mkeys); + + return ret; +} + /* Asynchronously schedule new MRs to be populated in the cache. */ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; - void *mkc; + void *mkc, *old; u32 *in; int err = 0; int i; @@ -215,31 +254,41 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) mr = alloc_cache_mr(ent, mkc); if (!mr) { err = -ENOMEM; - break; + goto err; } - spin_lock_irq(&ent->lock); - if (ent->pending >= MAX_PENDING_REG_MR) { + + xa_lock_irq(&ent->mkeys); + err = _push_reserve_mkey(ent); + if (err) + goto err_unlock; + if ((ent->reserved - ent->stored) > MAX_PENDING_REG_MR) { err = -EAGAIN; - spin_unlock_irq(&ent->lock); - kfree(mr); - break; + goto err_undo_reserve; } - ent->pending++; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); + err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, &ent->dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), &mr->cb_work); if (err) { - spin_lock_irq(&ent->lock); - ent->pending--; - spin_unlock_irq(&ent->lock); mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); - kfree(mr); - break; + xa_lock_irq(&ent->mkeys); + goto err_undo_reserve; } } + kfree(in); + return 0; + +err_undo_reserve: + ent->reserved--; + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old != NULL); +err_unlock: + xa_unlock_irq(&ent->mkeys); + kfree(mr); +err: kfree(in); return err; } @@ -271,9 +320,9 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) init_waitqueue_head(&mr->mmkey.wait); mr->mmkey.type = MLX5_MKEY_MR; WRITE_ONCE(ent->dev->cache.last_add, jiffies); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); ent->total_mrs++; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); kfree(in); return mr; free_mr: @@ -286,40 +335,42 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; + void *old; - lockdep_assert_held(&ent->lock); - if (list_empty(&ent->head)) + if (!ent->stored) return; - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; + ent->stored--; + mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, GFP_KERNEL); + WARN_ON(mr == NULL || xa_is_err(mr)); + ent->reserved--; + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old != NULL); ent->total_mrs--; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key); kfree(mr); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); } static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, bool limit_fill) + __acquires(&ent->lock) __releases(&ent->lock) { int err; - lockdep_assert_held(&ent->lock); - while (true) { if (limit_fill) target = ent->limit * 2; - if (target == ent->available_mrs + ent->pending) + if (target == ent->reserved) return 0; - if (target > ent->available_mrs + ent->pending) { - u32 todo = target - (ent->available_mrs + ent->pending); + if (target > ent->reserved) { + u32 todo = target - ent->reserved; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); err = add_keys(ent, todo); if (err == -EAGAIN) usleep_range(3000, 5000); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (err) { if (err != -EAGAIN) return err; @@ -347,12 +398,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf, * cannot free MRs that are in use. Compute the target value for * available_mrs. */ - spin_lock_irq(&ent->lock); - if (target < ent->total_mrs - ent->available_mrs) { + + xa_lock_irq(&ent->mkeys); + if (target < ent->total_mrs - ent->stored) { err = -EINVAL; goto err_unlock; } - target = target - (ent->total_mrs - ent->available_mrs); + target = target - (ent->total_mrs - ent->stored); if (target < ent->limit || target > ent->limit*2) { err = -EINVAL; goto err_unlock; @@ -360,12 +412,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf, err = resize_available_mrs(ent, target, false); if (err) goto err_unlock; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); return count; err_unlock: - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); return err; } @@ -405,10 +457,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, * Upon set we immediately fill the cache to high water mark implied by * the limit. */ - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); ent->limit = var; err = resize_available_mrs(ent, 0, true); - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); if (err) return err; return count; @@ -443,9 +495,9 @@ static bool someone_adding(struct mlx5_mr_cache *cache) struct mlx5_cache_ent *ent = &cache->ent[i]; bool ret; - spin_lock_irq(&ent->lock); - ret = ent->available_mrs < ent->limit; - spin_unlock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); + ret = ent->stored < ent->limit; + xa_unlock_irq(&ent->mkeys); if (ret) return true; } @@ -459,26 +511,24 @@ static bool someone_adding(struct mlx5_mr_cache *cache) */ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { - lockdep_assert_held(&ent->lock); - if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) return; - if (ent->available_mrs < ent->limit) { + if (ent->stored < ent->limit) { ent->fill_to_high_water = true; queue_work(ent->dev->cache.wq, &ent->work); } else if (ent->fill_to_high_water && - ent->available_mrs + ent->pending < 2 * ent->limit) { + ent->reserved < 2 * ent->limit) { /* * Once we start populating due to hitting a low water mark * continue until we pass the high water mark. */ queue_work(ent->dev->cache.wq, &ent->work); - } else if (ent->available_mrs == 2 * ent->limit) { + } else if (ent->stored == 2 * ent->limit) { ent->fill_to_high_water = false; - } else if (ent->available_mrs > 2 * ent->limit) { + } else if (ent->stored > 2 * ent->limit) { /* Queue deletion of excess entries */ ent->fill_to_high_water = false; - if (ent->pending) + if (ent->stored != ent->reserved) queue_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(1000)); else @@ -492,22 +542,21 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) struct mlx5_mr_cache *cache = &dev->cache; int err; - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (ent->disabled) goto out; - if (ent->fill_to_high_water && - ent->available_mrs + ent->pending < 2 * ent->limit && + if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit && !READ_ONCE(dev->fill_delay)) { - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); err = add_keys(ent, 1); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (ent->disabled) goto out; if (err) { /* - * EAGAIN only happens if pending is positive, so we - * will be rescheduled from reg_mr_callback(). The only + * EAGAIN only happens if there are pending MRs, so we + * will be rescheduled when storing them. The only * failure path here is ENOMEM. */ if (err != -EAGAIN) { @@ -519,7 +568,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) msecs_to_jiffies(1000)); } } - } else if (ent->available_mrs > 2 * ent->limit) { + } else if (ent->stored > 2 * ent->limit) { bool need_delay; /* @@ -534,11 +583,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) * the garbage collection work to try to run in next cycle, in * order to free CPU resources to other tasks. */ - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); need_delay = need_resched() || someone_adding(cache) || !time_after(jiffies, READ_ONCE(cache->last_add) + 300 * HZ); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (ent->disabled) goto out; if (need_delay) @@ -547,7 +596,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) queue_adjust_cache_locked(ent); } out: - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); } static void delayed_cache_work_func(struct work_struct *work) @@ -571,27 +620,32 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int access_flags) { struct mlx5_ib_mr *mr; + void *old; /* Matches access in alloc_cache_mr() */ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { + xa_lock_irq(&ent->mkeys); + if (!ent->stored) { if (ent->limit) { queue_adjust_cache_locked(ent); ent->miss++; } - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); mr = create_cache_mr(ent); if (IS_ERR(mr)) return mr; } else { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; + ent->stored--; + mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, + GFP_KERNEL); + WARN_ON(mr == NULL || xa_is_err(mr)); + ent->reserved--; + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old != NULL); queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); mlx5_clear_mr(mr); } @@ -601,41 +655,35 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; + void *old; - spin_lock_irq(&ent->lock); - list_add_tail(&mr->list, &ent->head); - ent->available_mrs++; + xa_lock_irq(&ent->mkeys); + old = __xa_store(&ent->mkeys, ent->stored, mr, 0); + WARN_ON(old != NULL); + ent->stored++; queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); } static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - LIST_HEAD(del_list); cancel_delayed_work(&ent->dwork); - while (1) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - break; - } - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_move(&mr->list, &del_list); - ent->available_mrs--; + xa_lock_irq(&ent->mkeys); + while (ent->stored) { + ent->stored--; + mr = __xa_erase(&ent->mkeys, ent->stored); + WARN_ON(mr == NULL); ent->total_mrs--; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); - } - - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { - list_del(&mr->list); kfree(mr); + xa_lock_irq(&ent->mkeys); } + xa_unlock_irq(&ent->mkeys); } static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -665,7 +713,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) dir = debugfs_create_dir(ent->name, cache->root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); + debugfs_create_ulong("cur", 0400, dir, &ent->stored); debugfs_create_u32("miss", 0600, dir, &ent->miss); } } @@ -694,8 +742,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; - INIT_LIST_HEAD(&ent->head); - spin_lock_init(&ent->lock); + xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); ent->order = i + 2; ent->dev = dev; ent->limit = 0; @@ -721,9 +768,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); } mlx5_mr_cache_debugfs_init(dev); @@ -741,9 +788,9 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { struct mlx5_cache_ent *ent = &dev->cache.ent[i]; - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); ent->disabled = true; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); cancel_work_sync(&ent->work); cancel_delayed_work_sync(&ent->dwork); } @@ -1932,10 +1979,10 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) /* Stop DMA */ if (mr->cache_ent) { - if (revoke_mr(mr)) { - spin_lock_irq(&mr->cache_ent->lock); + if (revoke_mr(mr) || push_reserve_mkey(mr->cache_ent)) { + xa_lock_irq(&mr->cache_ent->mkeys); mr->cache_ent->total_mrs--; - spin_unlock_irq(&mr->cache_ent->lock); + xa_unlock_irq(&mr->cache_ent->mkeys); mr->cache_ent = NULL; } } From patchwork Thu Dec 30 11:23:20 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701393 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6841FC433EF for ; Thu, 30 Dec 2021 11:24:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238974AbhL3LX7 (ORCPT ); Thu, 30 Dec 2021 06:23:59 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54870 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S238933AbhL3LX4 (ORCPT ); Thu, 30 Dec 2021 06:23:56 -0500 Received: from ams.source.kernel.org (ams.source.kernel.org [IPv6:2604:1380:4601:e00::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 34715C061574; Thu, 30 Dec 2021 03:23:55 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id CD1CFB80B3A; Thu, 30 Dec 2021 11:23:53 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id C1288C36AEA; Thu, 30 Dec 2021 11:23:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863432; bh=BTppMV9tZIN/oliwuCnQUNrOKLjUHpfqt6y+/FO633A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ew2t6SJsyetKZFBdU2sOBUE6Mzd6hAo01iI7/E/lAh9kqKILuJtFy7GRep1z+wDGm zu30aZYiP6zchlPfVfG5RLlD+az/NkCTo1LqmIFde1zttGze8lMAgV5+y/kTI7LWVl ktBmTrxLB+p1vgiyUZ+qm0MYfQR6Bh0TjGuBZc+fJ3BtT+C6tqb+Oahri8XaWHqb2v jjUyr6xJDF9PPoWp/nwgGC4yA0K5jAdk3Liv86JncwVT1cPfR7HlkaZEX/Ihg2EO0I kMDG2ZvZi8foC357rR/WIrpDvvjpIvXexikqp0ysvEU6u7JOkQIjeqWHEb4t3NTl4H Ul7JYfbdINz7w== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 3/7] RDMA/mlx5: Store in the cache mkeys instead of mrs Date: Thu, 30 Dec 2021 13:23:20 +0200 Message-Id: <0d3379edbf41911646cd7a20020bf5a28dcd603f.1640862842.git.leonro@nvidia.com> X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau Currently, the driver stores the entire mlx5_ib_mr struct in the cache, although the only use of the cached MR is the mkey. Store only the mkey in the cache. Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 25 ++--- drivers/infiniband/hw/mlx5/mr.c | 161 +++++++++++++-------------- 2 files changed, 83 insertions(+), 103 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 213894053bfe..cfc77d43c7a8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -641,6 +641,7 @@ struct mlx5_ib_mkey { unsigned int ndescs; struct wait_queue_head wait; refcount_t usecount; + struct mlx5_cache_ent *cache_ent; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) @@ -663,20 +664,9 @@ struct mlx5_ib_mr { struct ib_mr ibmr; struct mlx5_ib_mkey mmkey; - /* User MR data */ - struct mlx5_cache_ent *cache_ent; - /* Everything after cache_ent is zero'd when MR allocated */ struct ib_umem *umem; union { - /* Used only while the MR is in the cache */ - struct { - u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; - struct mlx5_async_work cb_work; - /* Cache list element */ - struct list_head list; - }; - /* Used only by kernel MRs (umem == NULL) */ struct { void *descs; @@ -716,12 +706,6 @@ struct mlx5_ib_mr { }; }; -/* Zero the fields in the mr that are variant depending on usage */ -static inline void mlx5_clear_mr(struct mlx5_ib_mr *mr) -{ - memset_after(mr, 0, cache_ent); -} - static inline bool is_odp_mr(struct mlx5_ib_mr *mr) { return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && @@ -785,6 +769,13 @@ struct mlx5_cache_ent { struct delayed_work dwork; }; +struct mlx5_async_create_mkey { + u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; + struct mlx5_async_work cb_work; + struct mlx5_cache_ent *ent; + u32 mkey; +}; + struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8936b504ff99..204c37a37421 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -88,15 +88,14 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET64(mkc, mkc, start_addr, start_addr); } -static void assign_mkey_variant(struct mlx5_ib_dev *dev, - struct mlx5_ib_mkey *mkey, u32 *in) +static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in) { u8 key = atomic_inc_return(&dev->mkey_var); void *mkc; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, mkey_7_0, key); - mkey->key = key; + *mkey = key; } static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, @@ -104,7 +103,7 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, { int ret; - assign_mkey_variant(dev, mkey, in); + assign_mkey_variant(dev, &mkey->key, in); ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); if (!ret) init_waitqueue_head(&mkey->wait); @@ -113,8 +112,7 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, } static int -mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, - struct mlx5_ib_mkey *mkey, +mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, u32 *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, int inlen, u32 *out, int outlen, struct mlx5_async_work *context) @@ -142,16 +140,16 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void create_mkey_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = - container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_async_create_mkey *mkey_out = + container_of(context, struct mlx5_async_create_mkey, cb_work); + struct mlx5_cache_ent *ent = mkey_out->ent; struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; void *old; if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); - kfree(mr); + kfree(mkey_out); xa_lock_irqsave(&ent->mkeys, flags); ent->reserved--; old = __xa_erase(&ent->mkeys, ent->reserved); @@ -162,32 +160,24 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) return; } - mr->mmkey.type = MLX5_MKEY_MR; - mr->mmkey.key |= mlx5_idx_to_mkey( - MLX5_GET(create_mkey_out, mr->out, mkey_index)); - init_waitqueue_head(&mr->mmkey.wait); - + mkey_out->mkey |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); WRITE_ONCE(dev->cache.last_add, jiffies); xa_lock_irqsave(&ent->mkeys, flags); - old = __xa_store(&ent->mkeys, ent->stored, mr, GFP_ATOMIC); + old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey_out->mkey), + GFP_ATOMIC); WARN_ON(old != NULL); ent->stored++; ent->total_mrs++; /* If we are doing fill_to_high_water then keep going. */ queue_adjust_cache_locked(ent); xa_unlock_irqrestore(&ent->mkeys, flags); + kfree(mkey_out); } -static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) +static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) { - struct mlx5_ib_mr *mr; - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return NULL; - mr->cache_ent = ent; - set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); @@ -196,7 +186,6 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); MLX5_SET(mkc, mkc, log_page_size, ent->page); - return mr; } static int _push_reserve_mkey(struct mlx5_cache_ent *ent) @@ -239,7 +228,7 @@ static int push_reserve_mkey(struct mlx5_cache_ent *ent) static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mr *mr; + struct mlx5_async_create_mkey *async_out; void *mkc, *old; u32 *in; int err = 0; @@ -250,12 +239,15 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + set_cache_mkc(ent, mkc); for (i = 0; i < num; i++) { - mr = alloc_cache_mr(ent, mkc); - if (!mr) { + async_out = kzalloc(sizeof(struct mlx5_async_create_mkey), + GFP_KERNEL); + if (!async_out) { err = -ENOMEM; goto err; } + async_out->ent = ent; xa_lock_irq(&ent->mkeys); err = _push_reserve_mkey(ent); @@ -266,11 +258,11 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) goto err_undo_reserve; } xa_unlock_irq(&ent->mkeys); - - err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, + err = mlx5_ib_create_mkey_cb(ent->dev, &async_out->mkey, &ent->dev->async_ctx, in, inlen, - mr->out, sizeof(mr->out), - &mr->cb_work); + async_out->out, + sizeof(async_out->out), + &async_out->cb_work); if (err) { mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); xa_lock_irq(&ent->mkeys); @@ -287,68 +279,55 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) WARN_ON(old != NULL); err_unlock: xa_unlock_irq(&ent->mkeys); - kfree(mr); + kfree(async_out); err: kfree(in); return err; } /* Synchronously create a MR in the cache */ -static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) +static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) { size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mr *mr; void *mkc; u32 *in; int err; in = kzalloc(inlen, GFP_KERNEL); if (!in) - return ERR_PTR(-ENOMEM); + return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + set_cache_mkc(ent, mkc); - mr = alloc_cache_mr(ent, mkc); - if (!mr) { - err = -ENOMEM; - goto free_in; - } - - err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen); + err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); if (err) - goto free_mr; + goto free_in; - init_waitqueue_head(&mr->mmkey.wait); - mr->mmkey.type = MLX5_MKEY_MR; WRITE_ONCE(ent->dev->cache.last_add, jiffies); xa_lock_irq(&ent->mkeys); ent->total_mrs++; xa_unlock_irq(&ent->mkeys); - kfree(in); - return mr; -free_mr: - kfree(mr); free_in: kfree(in); - return ERR_PTR(err); + return err; } static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { - struct mlx5_ib_mr *mr; - void *old; + void *old, *xa_mkey; if (!ent->stored) return; ent->stored--; - mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, GFP_KERNEL); - WARN_ON(mr == NULL || xa_is_err(mr)); + xa_mkey = + __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, GFP_KERNEL); + WARN_ON(xa_mkey == NULL || xa_is_err(xa_mkey)); ent->reserved--; old = __xa_erase(&ent->mkeys, ent->reserved); WARN_ON(old != NULL); ent->total_mrs--; xa_unlock_irq(&ent->mkeys); - mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key); - kfree(mr); + mlx5_core_destroy_mkey(ent->dev->mdev, (u32)xa_to_value(xa_mkey)); xa_lock_irq(&ent->mkeys); } @@ -620,12 +599,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int access_flags) { struct mlx5_ib_mr *mr; - void *old; + void *old, *xa_mkey; + int err; - /* Matches access in alloc_cache_mr() */ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + xa_lock_irq(&ent->mkeys); if (!ent->stored) { if (ent->limit) { @@ -633,32 +616,39 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, ent->miss++; } xa_unlock_irq(&ent->mkeys); - mr = create_cache_mr(ent); - if (IS_ERR(mr)) - return mr; + err = create_cache_mkey(ent, &mr->mmkey.key); + if (err) { + kfree(mr); + return ERR_PTR(err); + } } else { ent->stored--; - mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, - GFP_KERNEL); - WARN_ON(mr == NULL || xa_is_err(mr)); + xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, + GFP_KERNEL); + WARN_ON(xa_mkey == NULL || xa_is_err(xa_mkey)); ent->reserved--; old = __xa_erase(&ent->mkeys, ent->reserved); WARN_ON(old != NULL); queue_adjust_cache_locked(ent); xa_unlock_irq(&ent->mkeys); - mlx5_clear_mr(mr); + mr->mmkey.key = (u32)xa_to_value(xa_mkey); } + mr->mmkey.cache_ent = ent; + mr->mmkey.type = MLX5_MKEY_MR; + init_waitqueue_head(&mr->mmkey.wait); return mr; + } static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; void *old; xa_lock_irq(&ent->mkeys); - old = __xa_store(&ent->mkeys, ent->stored, mr, 0); + old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mr->mmkey.key), + 0); WARN_ON(old != NULL); ent->stored++; queue_adjust_cache_locked(ent); @@ -669,18 +659,17 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_ib_mr *mr; + void *xa_mkey; cancel_delayed_work(&ent->dwork); xa_lock_irq(&ent->mkeys); while (ent->stored) { ent->stored--; - mr = __xa_erase(&ent->mkeys, ent->stored); - WARN_ON(mr == NULL); + xa_mkey = __xa_erase(&ent->mkeys, ent->stored); + WARN_ON(xa_mkey == NULL); ent->total_mrs--; xa_unlock_irq(&ent->mkeys); - mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); - kfree(mr); + mlx5_core_destroy_mkey(dev->mdev, (u32)xa_to_value(xa_mkey)); xa_lock_irq(&ent->mkeys); } xa_unlock_irq(&ent->mkeys); @@ -1729,7 +1718,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); /* We only track the allocated sizes of MRs from the cache */ - if (!mr->cache_ent) + if (!mr->mmkey.cache_ent) return false; if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) return false; @@ -1738,7 +1727,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); if (WARN_ON(!*page_size)) return false; - return (1ULL << mr->cache_ent->order) >= + return (1ULL << mr->mmkey.cache_ent->order) >= ib_umem_num_dma_blocks(new_umem, *page_size); } @@ -1978,15 +1967,15 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } /* Stop DMA */ - if (mr->cache_ent) { - if (revoke_mr(mr) || push_reserve_mkey(mr->cache_ent)) { - xa_lock_irq(&mr->cache_ent->mkeys); - mr->cache_ent->total_mrs--; - xa_unlock_irq(&mr->cache_ent->mkeys); - mr->cache_ent = NULL; + if (mr->mmkey.cache_ent) { + if (revoke_mr(mr) || push_reserve_mkey(mr->mmkey.cache_ent)) { + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); + mr->mmkey.cache_ent->total_mrs--; + xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); + mr->mmkey.cache_ent = NULL; } } - if (!mr->cache_ent) { + if (!mr->mmkey.cache_ent) { rc = destroy_mkey(to_mdev(mr->ibmr.device), mr); if (rc) return rc; @@ -2003,12 +1992,12 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) mlx5_ib_free_odp_mr(mr); } - if (mr->cache_ent) { + if (mr->mmkey.cache_ent) mlx5_mr_cache_free(dev, mr); - } else { + else mlx5_free_priv_descs(mr); - kfree(mr); - } + + kfree(mr); return 0; } From patchwork Thu Dec 30 11:23:21 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701391 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2B0FBC433EF for ; Thu, 30 Dec 2021 11:23:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238939AbhL3LXs (ORCPT ); Thu, 30 Dec 2021 06:23:48 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54794 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S238938AbhL3LXo (ORCPT ); Thu, 30 Dec 2021 06:23:44 -0500 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8340BC061574; Thu, 30 Dec 2021 03:23:44 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 244826167E; Thu, 30 Dec 2021 11:23:44 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id B2952C36AEA; Thu, 30 Dec 2021 11:23:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863423; bh=Jh6qCe/dAFhpxkqRJ0cJO14fGA2lUW/FT/5AYvDZkFI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FyjzyPI7X8/hASs7Z8ZGi2y+WsjCYdYJtQM6WKPqfojtmWEqC85Ix3b2k+uvsbhoJ ufNBV2AWh4eqCxqY2xhfwV2YwquAvTU3FjkaZBr8jrHyIJ0gFH9cQAv7Z8dzMfyN96 Muj9nAvG+qwcKh23E/CsKOk95jbTxf1wwT50242o6llRG+P03VO52+NvB7qNW2z+yO M543B4t2VTSn8GILiVkGKBYz9kq/OXDFkpDeXR7tXm4ft4mQayZ77ydD5Qok1mTSZT ZzC7H5ysFczMBfP65HbchR5v/GL81m+7vZz7IRF3fZzH0PZyAbnlefc1Ev5RmT72Df xlDQW1gDoD3uQ== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 4/7] RDMA/mlx5: Reorder calls to pcie_relaxed_ordering_enabled() Date: Thu, 30 Dec 2021 13:23:21 +0200 Message-Id: X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau The mkc is the key for the mkey cache, hence, created in each attempt to get a cache mkey, while pcie_relaxed_ordering_enabled() is called during the setting of the mkc, but used only for cases where IB_ACCESS_RELAXED_ORDERING is set. pcie_relaxed_ordering_enabled() is an expensive call (26 us). Reorder the code so the driver will call it only when it is needed. Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 204c37a37421..182bdd537e43 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -68,7 +68,6 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); @@ -76,12 +75,13 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); + if ((acc & IB_ACCESS_RELAXED_ORDERING) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev)) { + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); + } MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); From patchwork Thu Dec 30 11:23:22 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701392 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4B5FEC433F5 for ; Thu, 30 Dec 2021 11:23:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238937AbhL3LXv (ORCPT ); Thu, 30 Dec 2021 06:23:51 -0500 Received: from ams.source.kernel.org ([145.40.68.75]:60720 "EHLO ams.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S238932AbhL3LXu (ORCPT ); Thu, 30 Dec 2021 06:23:50 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 649F0B81B77; Thu, 30 Dec 2021 11:23:49 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id DA908C36AEC; Thu, 30 Dec 2021 11:23:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863428; bh=ooa+ky9uXWJXZ9hhIlXGJTlGD0ylWiSti1Ie4Vbuwz8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=m8oM1g/zzfQKdmjtXB3hJocW3qWCB7EufzPLEJKBdxz/NfnGKO98clG+hxBympVdJ zU3ImgBXzyNnZV2D/meCwa1zyXI5bSxTgtUCAmXi3K7gpax1ORsuXz2c3oNYB6xDyV cP/0HbcPVFdpdjc0fL7OGG9hbpcwBlrwfkhr5Vy7jlhqRkDs9qhi72FGY7LzVgmsCu R7nbeDoZ5Whno1gtbzbHb+2+yJIjjtz/isWgt8WAyKy2P3G9eHLMsu0cTnSVNOoKtW RpQXywBGyC5+J94WXC9KJPc001uDj2NqLX5bmZDiFWOE6/L/kZs5cM/qQjfMDdjpTT QOTMxiDextKpQ== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 5/7] RDMA/mlx5: Change the cache structure to an RB-tree Date: Thu, 30 Dec 2021 13:23:22 +0200 Message-Id: <46970c6c09eef71128de04f02ed6afd2dc716443.1640862842.git.leonro@nvidia.com> X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau Currently, the cache structure is a linear array held within mlx5_ib_dev. Therefore, limits to the number of entries. The existing entries are dedicated to mkeys of size 2^x and with no access_flags and later in the series, we allow caching mkeys with different attributes. In this patch, we change the cache structure to an RB-tree of Xarray of mkeys. The tree key is the mkc used to create the stored mkeys. Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 22 +- drivers/infiniband/hw/mlx5/mr.c | 486 ++++++++++++++++++--------- drivers/infiniband/hw/mlx5/odp.c | 71 ++-- include/linux/mlx5/driver.h | 5 +- 4 files changed, 381 insertions(+), 203 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index cfc77d43c7a8..ce1f48cc8370 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -744,10 +744,7 @@ struct mlx5_cache_ent { unsigned long reserved; char name[4]; - u32 order; - u32 xlt; - u32 access_mode; - u32 page; + unsigned int ndescs; u8 disabled:1; u8 fill_to_high_water:1; @@ -767,6 +764,9 @@ struct mlx5_cache_ent { struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; + + struct rb_node node; + void *mkc; }; struct mlx5_async_create_mkey { @@ -778,7 +778,8 @@ struct mlx5_async_create_mkey { struct mlx5_mr_cache { struct workqueue_struct *wq; - struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; + struct rb_root cache_root; + struct mutex cache_lock; struct dentry *root; unsigned long last_add; }; @@ -1327,9 +1328,12 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags); +int mlx5_acc_flags_to_ent_flags(struct mlx5_ib_dev *dev, int access_flags); +void mlx5_set_cache_mkc(struct mlx5_ib_dev *dev, void *mkc, int access_flags, + unsigned int access_mode, unsigned int page_shift); +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int *in, + int inlen, unsigned int ndescs, + unsigned int access_mode); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); @@ -1353,7 +1357,6 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1372,7 +1375,6 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} -static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 182bdd537e43..631bb12697fd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -176,16 +176,16 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) kfree(mkey_out); } -static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) +void mlx5_set_cache_mkc(struct mlx5_ib_dev *dev, void *mkc, int access_flags, + unsigned int access_mode, unsigned int page_shift) { - set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); + set_mkc_access_pd_addr_fields(mkc, access_flags, 0, dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); - MLX5_SET(mkc, mkc, log_page_size, ent->page); + MLX5_SET(mkc, mkc, log_page_size, page_shift); } static int _push_reserve_mkey(struct mlx5_cache_ent *ent) @@ -224,6 +224,19 @@ static int push_reserve_mkey(struct mlx5_cache_ent *ent) return ret; } +static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) +{ + if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) + return DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_mtt)); + if (access_mode == MLX5_MKC_ACCESS_MODE_KSM) + return DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_klm)); + + WARN_ON(1); + return 0; +} + /* Asynchronously schedule new MRs to be populated in the cache. */ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { @@ -239,7 +252,9 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_cache_mkc(ent, mkc); + memcpy(mkc, ent->mkc, MLX5_ST_SZ_BYTES(mkc)); + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(MLX5_MKC_ACCESS_MODE_MTT, ent->ndescs)); for (i = 0; i < num; i++) { async_out = kzalloc(sizeof(struct mlx5_async_create_mkey), GFP_KERNEL); @@ -285,33 +300,6 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return err; } -/* Synchronously create a MR in the cache */ -static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) -{ - size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - void *mkc; - u32 *in; - int err; - - in = kzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_cache_mkc(ent, mkc); - - err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); - if (err) - goto free_in; - - WRITE_ONCE(ent->dev->cache.last_add, jiffies); - xa_lock_irq(&ent->mkeys); - ent->total_mrs++; - xa_unlock_irq(&ent->mkeys); -free_in: - kfree(in); - return err; -} - static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { void *old, *xa_mkey; @@ -468,18 +456,22 @@ static const struct file_operations limit_fops = { static bool someone_adding(struct mlx5_mr_cache *cache) { - unsigned int i; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - struct mlx5_cache_ent *ent = &cache->ent[i]; - bool ret; + struct mlx5_cache_ent *ent; + struct rb_node *node; + bool ret; + mutex_lock(&cache->cache_lock); + for (node = rb_first(&cache->cache_root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ret = ent->stored < ent->limit; xa_unlock_irq(&ent->mkeys); - if (ret) + if (ret) { + mutex_unlock(&cache->cache_lock); return true; + } } + mutex_unlock(&cache->cache_lock); return false; } @@ -541,8 +533,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (err != -EAGAIN) { mlx5_ib_warn( dev, - "command failed order %d, err %d\n", - ent->order, err); + "command failed order %s, err %d\n", + ent->name, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } @@ -594,51 +586,177 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags) +static int mlx5_cache_ent_insert_locked(struct mlx5_mr_cache *cache, + struct mlx5_cache_ent *ent) +{ + struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL; + size_t size = MLX5_ST_SZ_BYTES(mkc); + struct mlx5_cache_ent *cur; + int cmp; + + /* Figure out where to put new node */ + while (*new) { + cur = rb_entry(*new, struct mlx5_cache_ent, node); + parent = *new; + cmp = memcmp(ent->mkc, cur->mkc, size); + if (cmp < 0) + new = &((*new)->rb_left); + if (cmp > 0) + new = &((*new)->rb_right); + if (cmp == 0) { + if (ent->ndescs < cur->ndescs) + new = &((*new)->rb_left); + if (ent->ndescs > cur->ndescs) + new = &((*new)->rb_right); + if (ent->ndescs == cur->ndescs) + return -EEXIST; + } + } + + /* Add new node and rebalance tree. */ + rb_link_node(&ent->node, parent, new); + rb_insert_color(&ent->node, &cache->cache_root); + + return 0; +} + +static struct mlx5_cache_ent * +mlx5_cache_find_smallest_ent(struct mlx5_mr_cache *cache, void *mkc, + unsigned int lower_bound, unsigned int upper_bound) { - struct mlx5_ib_mr *mr; - void *old, *xa_mkey; - int err; + struct rb_node *node = cache->cache_root.rb_node; + struct mlx5_cache_ent *cur, *smallest = NULL; + size_t size = MLX5_ST_SZ_BYTES(mkc); + int cmp; - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) - return ERR_PTR(-EOPNOTSUPP); + /* + * Find the smallest node within the boundaries. + */ + while (node) { + cur = rb_entry(node, struct mlx5_cache_ent, node); + cmp = memcmp(mkc, cur->mkc, size); + + if (cmp < 0) + node = node->rb_left; + if (cmp > 0) + node = node->rb_right; + if (cmp == 0) { + if ((upper_bound >= cur->ndescs) && + (cur->ndescs >= lower_bound)) + smallest = cur; + + if (cur->ndescs > lower_bound) + node = node->rb_left; + if (cur->ndescs < lower_bound) + node = node->rb_right; + if (cur->ndescs == lower_bound) + return cur; + } + } - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); + return smallest; +} + +static void mlx5_ent_get_mkey_locked(struct mlx5_cache_ent *ent, + struct mlx5_ib_mr *mr) +{ + void *xa_mkey, *old; + + ent->stored--; + xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, + GFP_KERNEL); + WARN_ON(xa_mkey == NULL || xa_is_err(xa_mkey)); + ent->reserved--; + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old != NULL); + queue_adjust_cache_locked(ent); + mr->mmkey.key = (u32)xa_to_value(xa_mkey); + mr->mmkey.cache_ent = ent; +} + +static bool mlx5_cache_get_mkey(struct mlx5_mr_cache *cache, void *mkc, + unsigned int ndescs, struct mlx5_ib_mr *mr) +{ + size_t size = MLX5_ST_SZ_BYTES(mkc); + struct mlx5_cache_ent *ent; + struct rb_node *node; + unsigned int order; + int cmp; + + order = order_base_2(ndescs) > 2 ? order_base_2(ndescs) : 2; + + mutex_lock(&cache->cache_lock); + ent = mlx5_cache_find_smallest_ent(cache, mkc, ndescs, 1 << order); + if (!ent) { + mutex_unlock(&cache->cache_lock); + return false; + } + + /* + * Find the smallest node in the range with available mkeys. + */ + node = &ent->node; + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + cmp = memcmp(mkc, ent->mkc, size); + + if (cmp != 0 || ent->ndescs > (1 << order)) + break; + + xa_lock_irq(&ent->mkeys); + if (ent->stored) { + mutex_unlock(&cache->cache_lock); + mlx5_ent_get_mkey_locked(ent, mr); + xa_unlock_irq(&ent->mkeys); + + return true; + } - xa_lock_irq(&ent->mkeys); - if (!ent->stored) { if (ent->limit) { queue_adjust_cache_locked(ent); ent->miss++; } xa_unlock_irq(&ent->mkeys); - err = create_cache_mkey(ent, &mr->mmkey.key); - if (err) { - kfree(mr); - return ERR_PTR(err); - } - } else { - ent->stored--; - xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, - GFP_KERNEL); - WARN_ON(xa_mkey == NULL || xa_is_err(xa_mkey)); - ent->reserved--; - old = __xa_erase(&ent->mkeys, ent->reserved); - WARN_ON(old != NULL); - queue_adjust_cache_locked(ent); - xa_unlock_irq(&ent->mkeys); + node = rb_next(node); + } + + mutex_unlock(&cache->cache_lock); + + return false; +} + +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int *in, + int inlen, unsigned int ndescs, + unsigned int access_mode) +{ + struct mlx5_ib_mr *mr; + void *mkc; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - mr->mmkey.key = (u32)xa_to_value(xa_mkey); + if (!mlx5_cache_get_mkey(&dev->cache, mkc, ndescs, mr)) { + /* + * Can not use a cache mkey. + * Create an mkey with the exact needed size. + */ + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(access_mode, ndescs)); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); + if (err) + goto err; } - mr->mmkey.cache_ent = ent; mr->mmkey.type = MLX5_MKEY_MR; init_waitqueue_head(&mr->mmkey.wait); return mr; +err: + kfree(mr); + return ERR_PTR(err); } static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) @@ -655,10 +773,8 @@ static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) xa_unlock_irq(&ent->mkeys); } -static void clean_keys(struct mlx5_ib_dev *dev, int c) +static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; void *xa_mkey; cancel_delayed_work(&ent->dwork); @@ -684,27 +800,21 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.root = NULL; } -static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_cache_ent_debugfs_init(struct mlx5_ib_dev *dev, + struct mlx5_cache_ent *ent, int order) { struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; struct dentry *dir; - int i; if (!mlx5_debugfs_root || dev->is_rep) return; - cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - sprintf(ent->name, "%d", ent->order); - dir = debugfs_create_dir(ent->name, cache->root); - debugfs_create_file("size", 0600, dir, ent, &size_fops); - debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_ulong("cur", 0400, dir, &ent->stored); - debugfs_create_u32("miss", 0600, dir, &ent->miss); - } + sprintf(ent->name, "%d", order); + dir = debugfs_create_dir(ent->name, cache->root); + debugfs_create_file("size", 0600, dir, ent, &size_fops); + debugfs_create_file("limit", 0600, dir, ent, &limit_fops); + debugfs_create_ulong("cur", 0400, dir, &ent->stored); + debugfs_create_u32("miss", 0600, dir, &ent->miss); } static void delay_time_func(struct timer_list *t) @@ -714,69 +824,107 @@ static void delay_time_func(struct timer_list *t) WRITE_ONCE(dev->fill_delay, 0); } +static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev, + unsigned int order) +{ + struct mlx5_cache_ent *ent; + int ret; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return ERR_PTR(-ENOMEM); + + ent->mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL); + if (!ent->mkc) { + kfree(ent); + return ERR_PTR(-ENOMEM); + } + + ent->ndescs = 1 << order; + + xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); + ent->dev = dev; + + INIT_WORK(&ent->work, cache_work_func); + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + + mlx5_cache_ent_debugfs_init(dev, ent, order); + + mlx5_set_cache_mkc(dev, ent->mkc, 0, MLX5_MKC_ACCESS_MODE_MTT, + PAGE_SHIFT); + mutex_lock(&dev->cache.cache_lock); + ret = mlx5_cache_ent_insert_locked(&dev->cache, ent); + mutex_unlock(&dev->cache.cache_lock); + if (ret) { + kfree(ent->mkc); + kfree(ent); + return ERR_PTR(ret); + } + return ent; +} + int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; + bool can_use_cache, need_cache; struct mlx5_cache_ent *ent; - int i; + int order, err; mutex_init(&dev->slow_path_mutex); + mutex_init(&dev->cache.cache_lock); + cache->cache_root = RB_ROOT; cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); return -ENOMEM; } + if (mlx5_debugfs_root && !dev->is_rep) + cache->root = debugfs_create_dir("mr_cache", + dev->mdev->priv.dbg_root); + + can_use_cache = !dev->is_rep && mlx5_ib_can_load_pas_with_umr(dev, 0); + need_cache = (dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && + mlx5_core_is_pf(dev->mdev); + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); - ent->order = i + 2; - ent->dev = dev; - ent->limit = 0; - - INIT_WORK(&ent->work, cache_work_func); - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - - if (i > MR_CACHE_LAST_STD_ENTRY) { - mlx5_odp_init_mr_cache_entry(ent); - continue; - } + for (order = 2; order < MAX_MR_CACHE_ENTRIES + 2; order++) { + ent = mlx5_ib_create_cache_ent(dev, order); - if (ent->order > mr_cache_max_order(dev)) - continue; + if (IS_ERR(ent)) { + err = PTR_ERR(ent); + goto err; + } - ent->page = PAGE_SHIFT; - ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5_ib_can_load_pas_with_umr(dev, 0)) - ent->limit = dev->mdev->profile.mr_cache[i].limit; - else - ent->limit = 0; - xa_lock_irq(&ent->mkeys); - queue_adjust_cache_locked(ent); - xa_unlock_irq(&ent->mkeys); + if (can_use_cache && need_cache && + order <= mr_cache_max_order(dev)) { + ent->limit = + dev->mdev->profile.mr_cache[order - 2].limit; + xa_lock_irq(&ent->mkeys); + queue_adjust_cache_locked(ent); + xa_unlock_irq(&ent->mkeys); + } } - mlx5_mr_cache_debugfs_init(dev); - return 0; +err: + mlx5_mr_cache_cleanup(dev); + return err; } int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { - unsigned int i; + struct rb_root *root = &dev->cache.cache_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; if (!dev->cache.wq) return 0; - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - struct mlx5_cache_ent *ent = &dev->cache.ent[i]; - + mutex_lock(&dev->cache.cache_lock); + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ent->disabled = true; xa_unlock_irq(&ent->mkeys); @@ -787,8 +935,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_mr_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) - clean_keys(dev, i); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + kfree(ent->mkc); + kfree(ent); + } + mutex_unlock(&dev->cache.cache_lock); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); @@ -857,7 +1013,7 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) static int mr_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MR_CACHE_LAST_STD_ENTRY + 2; + return MAX_MR_CACHE_ENTRIES + 2; return MLX5_MAX_UMR_SHIFT; } @@ -904,18 +1060,6 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, - unsigned int order) -{ - struct mlx5_mr_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return &cache->ent[0]; - order = order - cache->ent[0].order; - if (order > MR_CACHE_LAST_STD_ENTRY) - return NULL; - return &cache->ent[order]; -} static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u64 length, int access_flags, u64 iova) @@ -939,14 +1083,38 @@ static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, return PAGE_SIZE; } +int mlx5_acc_flags_to_ent_flags(struct mlx5_ib_dev *dev, int access_flags) +{ + int ret = 0; + + if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + ret |= IB_ACCESS_REMOTE_ATOMIC; + + if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + ret |= IB_ACCESS_RELAXED_ORDERING; + + if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + ret |= IB_ACCESS_RELAXED_ORDERING; + + return ret; +} + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_cache_ent *ent; + unsigned int page_size, ndescs; struct mlx5_ib_mr *mr; - unsigned int page_size; + void *mkc; + int inlen; + int *in; if (umem->is_dmabuf) page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); @@ -955,29 +1123,31 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); - ent = mr_cache_ent_from_order( - dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); - /* - * Matches access in alloc_cache_mr(). If the MR can't come from the - * cache then synchronously create an uncached one. - */ - if (!ent || ent->limit == 0 || - !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { - mutex_lock(&dev->slow_path_mutex); - mr = reg_create(pd, umem, iova, access_flags, page_size, false); - mutex_unlock(&dev->slow_path_mutex); - return mr; - } - mr = mlx5_mr_cache_alloc(dev, ent, access_flags); - if (IS_ERR(mr)) + ndescs = ib_umem_num_dma_blocks(umem, page_size); + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + mlx5_set_cache_mkc(dev, mkc, + mlx5_acc_flags_to_ent_flags(dev, access_flags), + MLX5_MKC_ACCESS_MODE_MTT, PAGE_SHIFT); + + mr = mlx5_mr_cache_alloc(dev, in, inlen, ndescs, + MLX5_MKC_ACCESS_MODE_MTT); + if (IS_ERR(mr)) { + kfree(in); return mr; + } mr->ibmr.pd = pd; mr->umem = umem; mr->page_shift = order_base_2(page_size); set_mr_fields(dev, mr, umem->length, access_flags, iova); + kfree(in); return mr; } @@ -1727,7 +1897,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); if (WARN_ON(!*page_size)) return false; - return (1ULL << mr->mmkey.cache_ent->order) >= + return (mr->mmkey.cache_ent->ndescs) >= ib_umem_num_dma_blocks(new_umem, *page_size); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 0972afc3e952..89aaf783fe25 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -411,6 +411,9 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; struct mlx5_ib_mr *ret; + void *mkc; + int inlen; + int *in; int err; odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), @@ -419,10 +422,23 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY], - imr->access_flags); + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + ib_umem_odp_release(odp); + return ERR_PTR(-ENOMEM); + } + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + mlx5_set_cache_mkc(dev, mkc, + mlx5_acc_flags_to_ent_flags(dev, imr->access_flags), + MLX5_MKC_ACCESS_MODE_MTT, PAGE_SHIFT); + + mr = mlx5_mr_cache_alloc(dev, in, inlen, MLX5_IMR_MTT_ENTRIES, + MLX5_MKC_ACCESS_MODE_MTT); if (IS_ERR(mr)) { ib_umem_odp_release(odp); + kfree(in); return mr; } @@ -470,12 +486,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); + kfree(in); return mr; out_lock: xa_unlock(&imr->implicit_children); out_mr: mlx5_ib_dereg_mr(&mr->ibmr, NULL); + kfree(in); return ret; } @@ -485,6 +503,9 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); struct ib_umem_odp *umem_odp; struct mlx5_ib_mr *imr; + void *mkc; + int inlen; + int *in; int err; if (!mlx5_ib_can_load_pas_with_umr(dev, @@ -495,11 +516,23 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, - &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY], - access_flags); + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + ib_umem_odp_release(umem_odp); + return ERR_PTR(-ENOMEM); + } + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + mlx5_set_cache_mkc(dev, mkc, + mlx5_acc_flags_to_ent_flags(dev, access_flags), + MLX5_MKC_ACCESS_MODE_KSM, PAGE_SHIFT); + + imr = mlx5_mr_cache_alloc(dev, in, inlen, mlx5_imr_ksm_entries, + MLX5_MKC_ACCESS_MODE_KSM); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); + kfree(in); return imr; } @@ -528,10 +561,12 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, goto out_mr; mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr); + kfree(in); return imr; out_mr: mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); mlx5_ib_dereg_mr(&imr->ibmr, NULL); + kfree(in); return ERR_PTR(err); } @@ -1596,32 +1631,6 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) return err; } -void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) -{ - if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return; - - switch (ent->order - 2) { - case MLX5_IMR_MTT_CACHE_ENTRY: - ent->page = PAGE_SHIFT; - ent->xlt = MLX5_IMR_MTT_ENTRIES * - sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; - ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - ent->limit = 0; - break; - - case MLX5_IMR_KSM_CACHE_ENTRY: - ent->page = MLX5_KSM_PAGE_SHIFT; - ent->xlt = mlx5_imr_ksm_entries * - sizeof(struct mlx5_klm) / - MLX5_IB_UMR_OCTOWORD; - ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; - ent->limit = 0; - break; - } -} - static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a623ec635947..c33f71134136 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -699,10 +699,7 @@ enum { }; enum { - MR_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_MTT_CACHE_ENTRY, - MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MR_CACHE_ENTRIES + MAX_MR_CACHE_ENTRIES = 21, }; struct mlx5_profile { From patchwork Thu Dec 30 11:23:23 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701395 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 31A49C433EF for ; Thu, 30 Dec 2021 11:24:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238945AbhL3LYE (ORCPT ); Thu, 30 Dec 2021 06:24:04 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54926 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239010AbhL3LYD (ORCPT ); Thu, 30 Dec 2021 06:24:03 -0500 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ADB13C06173E; Thu, 30 Dec 2021 03:24:02 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 4A46A6168B; Thu, 30 Dec 2021 11:24:02 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id A9EA5C36AEC; Thu, 30 Dec 2021 11:24:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863441; bh=CPKISPGTbcoObFThEXqTFHDva0XDHTqBXOhYb8c6dGw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SiDtfuVZBtabg6cj76h4Be0/mTQ4Hh3msR8gyGB/ceR89gw7jA94N1gLJcM0Eqzev Mxygbowsx/4aqP/kdj0nq4mb+PgBMst8JkTMnGJwJyi7IOh6Vj9KeMQhrIwc6ubWUq tssY+bFPq/1JKwwMrkEJvVsaB5R9z0fSjn7ywVesMJH8cfa3VHAiPlCJl8RkFTdrjC pdte3h6bJ8GGUvh5d7z2aah1T3I1tLRkxrytPmYw+4E8CBkDrMAmncnl2/KG/bLH3c H3bP+EgNbQQ0no6vcaOleDBIRUY+XqspnayEoBPoDPdh5YJDMx75ua5C+T31ZCMg27 kEY9Zl5oE4QKg== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 6/7] RDMA/mlx5: Delay the deregistration of a non-cache mkey Date: Thu, 30 Dec 2021 13:23:23 +0200 Message-Id: X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau When restarting an application with many non-cached mkeys, all the mkeys will be destroyed and then recreated. This process takes a long time (about 20 seconds for deregistration and 28 seconds for registration of 100,000 MRs). To shorten the restart runtime, insert the mkeys temporarily into the cache and schedule a delayed work to destroy them later. If there is no fitting entry to these mkeys, create a temporary entry that fits them. If 30 seconds have passed and no user reclaimed the temporarily cached mkeys, the scheduled work will destroy the mkeys and the temporary entries. When restarting an application, the mkeys will still be in the cache when trying to reg them again, therefore, the registration will be faster (4 seconds for deregistration and 5 seconds or registration of 100,000 MRs). Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 + drivers/infiniband/hw/mlx5/mr.c | 131 ++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ce1f48cc8370..8ebe1edce190 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -639,6 +639,7 @@ struct mlx5_ib_mkey { u32 key; enum mlx5_mkey_type type; unsigned int ndescs; + unsigned int access_mode; struct wait_queue_head wait; refcount_t usecount; struct mlx5_cache_ent *cache_ent; @@ -746,6 +747,7 @@ struct mlx5_cache_ent { char name[4]; unsigned int ndescs; + u8 is_tmp:1; u8 disabled:1; u8 fill_to_high_water:1; @@ -782,6 +784,7 @@ struct mlx5_mr_cache { struct mutex cache_lock; struct dentry *root; unsigned long last_add; + struct delayed_work remove_ent_dwork; }; struct mlx5_ib_port_resources { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 631bb12697fd..43e993b360d8 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -482,7 +482,7 @@ static bool someone_adding(struct mlx5_mr_cache *cache) */ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { - if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) + if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) return; if (ent->stored < ent->limit) { ent->fill_to_high_water = true; @@ -671,7 +671,16 @@ static void mlx5_ent_get_mkey_locked(struct mlx5_cache_ent *ent, WARN_ON(old != NULL); queue_adjust_cache_locked(ent); mr->mmkey.key = (u32)xa_to_value(xa_mkey); - mr->mmkey.cache_ent = ent; + + if (!ent->is_tmp) + mr->mmkey.cache_ent = ent; + else { + ent->total_mrs--; + cancel_delayed_work(&ent->dev->cache.remove_ent_dwork); + queue_delayed_work(ent->dev->cache.wq, + &ent->dev->cache.remove_ent_dwork, + msecs_to_jiffies(30 * 1000)); + } } static bool mlx5_cache_get_mkey(struct mlx5_mr_cache *cache, void *mkc, @@ -750,8 +759,10 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int *in, if (err) goto err; } + mr->mmkey.ndescs = ndescs; mr->mmkey.type = MLX5_MKEY_MR; init_waitqueue_head(&mr->mmkey.wait); + mr->mmkey.access_mode = access_mode; return mr; err: @@ -863,6 +874,42 @@ static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev, return ent; } +static void remove_ent_work_func(struct work_struct *work) +{ + struct mlx5_mr_cache *cache; + struct mlx5_cache_ent *ent; + struct rb_node *cur; + + cache = container_of(work, struct mlx5_mr_cache, remove_ent_dwork.work); + mutex_lock(&cache->cache_lock); + cur = rb_last(&cache->cache_root); + while (cur) { + ent = rb_entry(cur, struct mlx5_cache_ent, node); + cur = rb_prev(cur); + mutex_unlock(&cache->cache_lock); + + xa_lock_irq(&ent->mkeys); + if (!ent->is_tmp || ent->total_mrs != ent->stored) { + if (ent->total_mrs != ent->stored) + queue_delayed_work(cache->wq, + &cache->remove_ent_dwork, + msecs_to_jiffies(30 * 1000)); + xa_unlock_irq(&ent->mkeys); + mutex_lock(&cache->cache_lock); + continue; + } + ent->disabled = true; + xa_unlock_irq(&ent->mkeys); + + clean_keys(ent->dev, ent); + mutex_lock(&cache->cache_lock); + rb_erase(&ent->node, &cache->cache_root); + kfree(ent->mkc); + kfree(ent); + } + mutex_unlock(&cache->cache_lock); +} + int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; @@ -873,6 +920,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) mutex_init(&dev->slow_path_mutex); mutex_init(&dev->cache.cache_lock); cache->cache_root = RB_ROOT; + INIT_DELAYED_WORK(&cache->remove_ent_dwork, remove_ent_work_func); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); @@ -922,6 +970,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) if (!dev->cache.wq) return 0; + cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); mutex_lock(&dev->cache.cache_lock); for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); @@ -2092,6 +2141,81 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) } } +static struct mlx5_cache_ent *mlx5_cache_create_tmp_ent(struct mlx5_ib_dev *dev, + void *mkc, + unsigned int ndescs) +{ + struct mlx5_cache_ent *ent; + int ret; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return ERR_PTR(-ENOMEM); + + xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); + ent->ndescs = ndescs; + ent->dev = dev; + ent->is_tmp = true; + + INIT_WORK(&ent->work, cache_work_func); + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + + ent->mkc = mkc; + ret = mlx5_cache_ent_insert_locked(&dev->cache, ent); + if (ret) { + kfree(ent); + return ERR_PTR(ret); + } + + return ent; +} + +static void mlx5_cache_tmp_push_mkey(struct mlx5_ib_dev *dev, + struct mlx5_ib_mr *mr) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct ib_umem *umem = mr->umem; + struct mlx5_cache_ent *ent; + void *mkc; + + if (!umem || !mlx5_ib_can_load_pas_with_umr(dev, umem->length)) + return; + + mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL); + if (!mkc) + return; + + mlx5_set_cache_mkc(dev, mkc, + mlx5_acc_flags_to_ent_flags(dev, mr->access_flags), + mr->mmkey.access_mode, PAGE_SHIFT); + mutex_lock(&cache->cache_lock); + ent = mlx5_cache_find_smallest_ent(&dev->cache, mkc, mr->mmkey.ndescs, + mr->mmkey.ndescs); + if (!ent) { + ent = mlx5_cache_create_tmp_ent(dev, mkc, mr->mmkey.ndescs); + if (IS_ERR(ent)) { + mutex_unlock(&cache->cache_lock); + kfree(mkc); + return; + } + } else + kfree(mkc); + + xa_lock_irq(&ent->mkeys); + if (ent->disabled) { + xa_unlock_irq(&ent->mkeys); + mutex_unlock(&cache->cache_lock); + return; + } + ent->total_mrs++; + xa_unlock_irq(&ent->mkeys); + cancel_delayed_work(&cache->remove_ent_dwork); + queue_delayed_work(cache->wq, &cache->remove_ent_dwork, + msecs_to_jiffies(30 * 1000)); + mutex_unlock(&cache->cache_lock); + mr->mmkey.cache_ent = ent; +} + int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx5_ib_mr *mr = to_mmr(ibmr); @@ -2136,6 +2260,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) mr->sig = NULL; } + if (!mr->mmkey.cache_ent) + mlx5_cache_tmp_push_mkey(dev, mr); + /* Stop DMA */ if (mr->mmkey.cache_ent) { if (revoke_mr(mr) || push_reserve_mkey(mr->mmkey.cache_ent)) { From patchwork Thu Dec 30 11:23:24 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12701394 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3EA89C433EF for ; Thu, 30 Dec 2021 11:24:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238922AbhL3LYA (ORCPT ); Thu, 30 Dec 2021 06:24:00 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54900 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S238945AbhL3LYA (ORCPT ); Thu, 30 Dec 2021 06:24:00 -0500 Received: from ams.source.kernel.org (ams.source.kernel.org [IPv6:2604:1380:4601:e00::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8D508C06173E; Thu, 30 Dec 2021 03:23:59 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 26A76B81B77; Thu, 30 Dec 2021 11:23:58 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 192BFC36AEA; Thu, 30 Dec 2021 11:23:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1640863436; bh=UlzqUewHmi3ptYof/nBz9OvfZw61xGmQG2sgZLa2lS4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=DTOmdl96Wy6vvCkONjpMNQ2z3dkioX8q294IlqW7bm/ljUBC8SMZAuNqEqK2I9ACS cY+emOfbqI3OzjxW6/DhKP2L3IvcYB2VJRzx0HKoKOjnM4q9YkJrjWz5B1prNAC7Mq K9qNS2Ynn/+c5qGDLyZWxKXi5/vAex1PY+VO+ljUIZv6J5whPbTVSjP99zDJulchO/ eMkjjdLQo9+oYlUrpPk02HuBVY8hxpx2YU9XjBQf9RIV3OZZQ4c9FO6LVzal9XB/cC EzAo0qL9HtESOo7YDRyAVaSnnXRjEI0SkIPl9D2OIaEdSmz2TUD86vYKmnvvTEHhXf Xa3P9PQIQFglQ== From: Leon Romanovsky To: Jason Gunthorpe Cc: Aharon Landau , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 7/7] RDMA/mlx5: Rename the mkey cache variables and functions Date: Thu, 30 Dec 2021 13:23:24 +0200 Message-Id: <28648c91910327fe712f9f178e80948ecc2224c4.1640862842.git.leonro@nvidia.com> X-Mailer: git-send-email 2.33.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Aharon Landau After replacing the MR cache with an Mkey cache, rename the variables and functions to fit the new meaning. Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 4 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 20 +++--- drivers/infiniband/hw/mlx5/mr.c | 97 ++++++++++++++-------------- drivers/infiniband/hw/mlx5/odp.c | 8 +-- include/linux/mlx5/driver.h | 4 +- 5 files changed, 67 insertions(+), 66 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5ec8bd2f0b2f..74f32b563109 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4034,7 +4034,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { int err; - err = mlx5_mr_cache_cleanup(dev); + err = mlx5_mkey_cache_cleanup(dev); if (err) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); @@ -4131,7 +4131,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) dev->umrc.pd = pd; sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); + ret = mlx5_mkey_cache_init(dev); if (ret) { mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); goto error_4; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8ebe1edce190..93065492dcb8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -752,12 +752,12 @@ struct mlx5_cache_ent { u8 fill_to_high_water:1; /* - * - total_mrs is available_mrs plus all in use MRs that could be + * - total_mkeys is stored mkeys plus all in use mkeys that could be * returned to the cache. - * - limit is the low water mark for available_mrs, 2* limit is the + * - limit is the low water mark for available_mkeys, 2 * limit is the * upper water mark. */ - u32 total_mrs; + u32 total_mkeys; u32 limit; /* Statistics */ @@ -778,7 +778,7 @@ struct mlx5_async_create_mkey { u32 mkey; }; -struct mlx5_mr_cache { +struct mlx5_mkey_cache { struct workqueue_struct *wq; struct rb_root cache_root; struct mutex cache_lock; @@ -1081,7 +1081,7 @@ struct mlx5_ib_dev { struct mlx5_ib_resources devr; atomic_t mkey_var; - struct mlx5_mr_cache cache; + struct mlx5_mkey_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; @@ -1328,15 +1328,15 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); -int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); -int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); +int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); +int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_acc_flags_to_ent_flags(struct mlx5_ib_dev *dev, int access_flags); void mlx5_set_cache_mkc(struct mlx5_ib_dev *dev, void *mkc, int access_flags, unsigned int access_mode, unsigned int page_shift); -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int *in, - int inlen, unsigned int ndescs, - unsigned int access_mode); +struct mlx5_ib_mr *mlx5_mkey_cache_alloc(struct mlx5_ib_dev *dev, int *in, + int inlen, unsigned int ndescs, + unsigned int access_mode); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 43e993b360d8..827de5fa244d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -123,7 +123,7 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, u32 *mkey, create_mkey_callback, context); } -static int mr_cache_max_order(struct mlx5_ib_dev *dev); +static int mkey_cache_max_order(struct mlx5_ib_dev *dev); static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) @@ -169,7 +169,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) GFP_ATOMIC); WARN_ON(old != NULL); ent->stored++; - ent->total_mrs++; + ent->total_mkeys++; /* If we are doing fill_to_high_water then keep going. */ queue_adjust_cache_locked(ent); xa_unlock_irqrestore(&ent->mkeys, flags); @@ -300,7 +300,7 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return err; } -static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) +static void remove_cache_mkey_locked(struct mlx5_cache_ent *ent) { void *old, *xa_mkey; @@ -313,15 +313,15 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) ent->reserved--; old = __xa_erase(&ent->mkeys, ent->reserved); WARN_ON(old != NULL); - ent->total_mrs--; + ent->total_mkeys--; xa_unlock_irq(&ent->mkeys); mlx5_core_destroy_mkey(ent->dev->mdev, (u32)xa_to_value(xa_mkey)); xa_lock_irq(&ent->mkeys); } -static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, - bool limit_fill) - __acquires(&ent->lock) __releases(&ent->lock) +static int resize_available_mkeys(struct mlx5_cache_ent *ent, + unsigned int target, bool limit_fill) + __acquires(&ent->lock) __releases(&ent->lock) { int err; @@ -344,7 +344,7 @@ static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, } else return 0; } else { - remove_cache_mr_locked(ent); + remove_cache_mkey_locked(ent); } } } @@ -361,22 +361,22 @@ static ssize_t size_write(struct file *filp, const char __user *buf, return err; /* - * Target is the new value of total_mrs the user requests, however we + * Target is the new value of total_mkeys the user requests, however we * cannot free MRs that are in use. Compute the target value for - * available_mrs. + * available_mkeys. */ xa_lock_irq(&ent->mkeys); - if (target < ent->total_mrs - ent->stored) { + if (target < ent->total_mkeys - ent->stored) { err = -EINVAL; goto err_unlock; } - target = target - (ent->total_mrs - ent->stored); + target = target - (ent->total_mkeys - ent->stored); if (target < ent->limit || target > ent->limit*2) { err = -EINVAL; goto err_unlock; } - err = resize_available_mrs(ent, target, false); + err = resize_available_mkeys(ent, target, false); if (err) goto err_unlock; xa_unlock_irq(&ent->mkeys); @@ -395,7 +395,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mkeys); if (err < 0) return err; @@ -426,7 +426,7 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, */ xa_lock_irq(&ent->mkeys); ent->limit = var; - err = resize_available_mrs(ent, 0, true); + err = resize_available_mkeys(ent, 0, true); xa_unlock_irq(&ent->mkeys); if (err) return err; @@ -454,7 +454,7 @@ static const struct file_operations limit_fops = { .read = limit_read, }; -static bool someone_adding(struct mlx5_mr_cache *cache) +static bool someone_adding(struct mlx5_mkey_cache *cache) { struct mlx5_cache_ent *ent; struct rb_node *node; @@ -510,7 +510,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; int err; xa_lock_irq(&ent->mkeys); @@ -563,7 +563,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) goto out; if (need_delay) queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); - remove_cache_mr_locked(ent); + remove_cache_mkey_locked(ent); queue_adjust_cache_locked(ent); } out: @@ -586,7 +586,7 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -static int mlx5_cache_ent_insert_locked(struct mlx5_mr_cache *cache, +static int mlx5_cache_ent_insert_locked(struct mlx5_mkey_cache *cache, struct mlx5_cache_ent *ent) { struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL; @@ -621,7 +621,7 @@ static int mlx5_cache_ent_insert_locked(struct mlx5_mr_cache *cache, } static struct mlx5_cache_ent * -mlx5_cache_find_smallest_ent(struct mlx5_mr_cache *cache, void *mkc, +mlx5_cache_find_smallest_ent(struct mlx5_mkey_cache *cache, void *mkc, unsigned int lower_bound, unsigned int upper_bound) { struct rb_node *node = cache->cache_root.rb_node; @@ -675,7 +675,7 @@ static void mlx5_ent_get_mkey_locked(struct mlx5_cache_ent *ent, if (!ent->is_tmp) mr->mmkey.cache_ent = ent; else { - ent->total_mrs--; + ent->total_mkeys--; cancel_delayed_work(&ent->dev->cache.remove_ent_dwork); queue_delayed_work(ent->dev->cache.wq, &ent->dev->cache.remove_ent_dwork, @@ -683,7 +683,7 @@ static void mlx5_ent_get_mkey_locked(struct mlx5_cache_ent *ent, } } -static bool mlx5_cache_get_mkey(struct mlx5_mr_cache *cache, void *mkc, +static bool mlx5_cache_get_mkey(struct mlx5_mkey_cache *cache, void *mkc, unsigned int ndescs, struct mlx5_ib_mr *mr) { size_t size = MLX5_ST_SZ_BYTES(mkc); @@ -734,9 +734,9 @@ static bool mlx5_cache_get_mkey(struct mlx5_mr_cache *cache, void *mkc, return false; } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int *in, - int inlen, unsigned int ndescs, - unsigned int access_mode) +struct mlx5_ib_mr *mlx5_mkey_cache_alloc(struct mlx5_ib_dev *dev, int *in, + int inlen, unsigned int ndescs, + unsigned int access_mode) { struct mlx5_ib_mr *mr; void *mkc; @@ -770,7 +770,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int *in, return ERR_PTR(err); } -static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +static void mlx5_mkey_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; void *old; @@ -794,7 +794,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) ent->stored--; xa_mkey = __xa_erase(&ent->mkeys, ent->stored); WARN_ON(xa_mkey == NULL); - ent->total_mrs--; + ent->total_mkeys--; xa_unlock_irq(&ent->mkeys); mlx5_core_destroy_mkey(dev->mdev, (u32)xa_to_value(xa_mkey)); xa_lock_irq(&ent->mkeys); @@ -802,7 +802,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) xa_unlock_irq(&ent->mkeys); } -static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) { if (!mlx5_debugfs_root || dev->is_rep) return; @@ -814,7 +814,7 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) static void mlx5_cache_ent_debugfs_init(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent, int order) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; struct dentry *dir; if (!mlx5_debugfs_root || dev->is_rep) @@ -876,11 +876,12 @@ static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev, static void remove_ent_work_func(struct work_struct *work) { - struct mlx5_mr_cache *cache; + struct mlx5_mkey_cache *cache; struct mlx5_cache_ent *ent; struct rb_node *cur; - cache = container_of(work, struct mlx5_mr_cache, remove_ent_dwork.work); + cache = container_of(work, struct mlx5_mkey_cache, + remove_ent_dwork.work); mutex_lock(&cache->cache_lock); cur = rb_last(&cache->cache_root); while (cur) { @@ -889,8 +890,8 @@ static void remove_ent_work_func(struct work_struct *work) mutex_unlock(&cache->cache_lock); xa_lock_irq(&ent->mkeys); - if (!ent->is_tmp || ent->total_mrs != ent->stored) { - if (ent->total_mrs != ent->stored) + if (!ent->is_tmp || ent->total_mkeys != ent->stored) { + if (ent->total_mkeys != ent->stored) queue_delayed_work(cache->wq, &cache->remove_ent_dwork, msecs_to_jiffies(30 * 1000)); @@ -910,9 +911,9 @@ static void remove_ent_work_func(struct work_struct *work) mutex_unlock(&cache->cache_lock); } -int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) +int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; bool can_use_cache, need_cache; struct mlx5_cache_ent *ent; int order, err; @@ -937,7 +938,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); - for (order = 2; order < MAX_MR_CACHE_ENTRIES + 2; order++) { + for (order = 2; order < MAX_MKEY_CACHE_ENTRIES + 2; order++) { ent = mlx5_ib_create_cache_ent(dev, order); if (IS_ERR(ent)) { @@ -946,7 +947,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) } if (can_use_cache && need_cache && - order <= mr_cache_max_order(dev)) { + order <= mkey_cache_max_order(dev)) { ent->limit = dev->mdev->profile.mr_cache[order - 2].limit; xa_lock_irq(&ent->mkeys); @@ -957,11 +958,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; err: - mlx5_mr_cache_cleanup(dev); + mlx5_mkey_cache_cleanup(dev); return err; } -int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) +int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) { struct rb_root *root = &dev->cache.cache_root; struct mlx5_cache_ent *ent; @@ -981,7 +982,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) cancel_delayed_work_sync(&ent->dwork); } - mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); node = rb_first(root); @@ -1059,10 +1060,10 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) return (npages + 1) / 2; } -static int mr_cache_max_order(struct mlx5_ib_dev *dev) +static int mkey_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MAX_MR_CACHE_ENTRIES + 2; + return MAX_MKEY_CACHE_ENTRIES + 2; return MLX5_MAX_UMR_SHIFT; } @@ -1184,8 +1185,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mlx5_acc_flags_to_ent_flags(dev, access_flags), MLX5_MKC_ACCESS_MODE_MTT, PAGE_SHIFT); - mr = mlx5_mr_cache_alloc(dev, in, inlen, ndescs, - MLX5_MKC_ACCESS_MODE_MTT); + mr = mlx5_mkey_cache_alloc(dev, in, inlen, ndescs, + MLX5_MKC_ACCESS_MODE_MTT); if (IS_ERR(mr)) { kfree(in); return mr; @@ -2173,7 +2174,7 @@ static struct mlx5_cache_ent *mlx5_cache_create_tmp_ent(struct mlx5_ib_dev *dev, static void mlx5_cache_tmp_push_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; struct ib_umem *umem = mr->umem; struct mlx5_cache_ent *ent; void *mkc; @@ -2207,7 +2208,7 @@ static void mlx5_cache_tmp_push_mkey(struct mlx5_ib_dev *dev, mutex_unlock(&cache->cache_lock); return; } - ent->total_mrs++; + ent->total_mkeys++; xa_unlock_irq(&ent->mkeys); cancel_delayed_work(&cache->remove_ent_dwork); queue_delayed_work(cache->wq, &cache->remove_ent_dwork, @@ -2267,7 +2268,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (mr->mmkey.cache_ent) { if (revoke_mr(mr) || push_reserve_mkey(mr->mmkey.cache_ent)) { xa_lock_irq(&mr->mmkey.cache_ent->mkeys); - mr->mmkey.cache_ent->total_mrs--; + mr->mmkey.cache_ent->total_mkeys--; xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); mr->mmkey.cache_ent = NULL; } @@ -2290,7 +2291,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } if (mr->mmkey.cache_ent) - mlx5_mr_cache_free(dev, mr); + mlx5_mkey_cache_free(dev, mr); else mlx5_free_priv_descs(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 89aaf783fe25..ddb5f77905d5 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -434,8 +434,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mlx5_acc_flags_to_ent_flags(dev, imr->access_flags), MLX5_MKC_ACCESS_MODE_MTT, PAGE_SHIFT); - mr = mlx5_mr_cache_alloc(dev, in, inlen, MLX5_IMR_MTT_ENTRIES, - MLX5_MKC_ACCESS_MODE_MTT); + mr = mlx5_mkey_cache_alloc(dev, in, inlen, MLX5_IMR_MTT_ENTRIES, + MLX5_MKC_ACCESS_MODE_MTT); if (IS_ERR(mr)) { ib_umem_odp_release(odp); kfree(in); @@ -528,8 +528,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, mlx5_acc_flags_to_ent_flags(dev, access_flags), MLX5_MKC_ACCESS_MODE_KSM, PAGE_SHIFT); - imr = mlx5_mr_cache_alloc(dev, in, inlen, mlx5_imr_ksm_entries, - MLX5_MKC_ACCESS_MODE_KSM); + imr = mlx5_mkey_cache_alloc(dev, in, inlen, mlx5_imr_ksm_entries, + MLX5_MKC_ACCESS_MODE_KSM); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); kfree(in); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c33f71134136..51b30c11116e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -699,7 +699,7 @@ enum { }; enum { - MAX_MR_CACHE_ENTRIES = 21, + MAX_MKEY_CACHE_ENTRIES = 21, }; struct mlx5_profile { @@ -708,7 +708,7 @@ struct mlx5_profile { struct { int size; int limit; - } mr_cache[MAX_MR_CACHE_ENTRIES]; + } mr_cache[MAX_MKEY_CACHE_ENTRIES]; }; struct mlx5_hca_cap {