From patchwork Tue Mar 10 08:22:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428691 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AEA8314B7 for ; Tue, 10 Mar 2020 08:22:50 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8ADB42467F for ; Tue, 10 Mar 2020 08:22:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828570; bh=wJ3/T6+tmJjOFxzSRoPkXeeiT35r4bpOGMRhZP0sbKc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=BC2gWTvNACEec9zmQ2pT0x75wqmx80x672Ljv4VjGUGHVTESq6kPng+xdjQQFG050 NQ/Eqs9X8bsmw0cungeQz9tPgPb6shn/TMYiplowPtUe2AcJawArJH0JE5zjregGTl 5sj558gs8JqKPxUeuU5WWYFMKNdg/XSlZcPEqKtY= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726390AbgCJIWu (ORCPT ); Tue, 10 Mar 2020 04:22:50 -0400 Received: from mail.kernel.org ([198.145.29.99]:44472 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIWt (ORCPT ); Tue, 10 Mar 2020 04:22:49 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id BA4DD24677; Tue, 10 Mar 2020 08:22:48 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828569; bh=wJ3/T6+tmJjOFxzSRoPkXeeiT35r4bpOGMRhZP0sbKc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=TuLSeMWAmbCO9B8QLbyMfczZ7AhCamdmroxSI5558ZQV8peUGmkXLjfSV9RRiRmbF +BJasQQIAcXMBcQkxyZji0pQQWG5SL0SzpVyZRyvsLGlMXPsUzwxh2oyKvk7buZPyo Pm9rsx7gDmS9yAr25f8KDfJsDbOvr6S+T8yotZ20= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Saeed Mahameed , Eli Cohen , linux-rdma@vger.kernel.org, netdev@vger.kernel.org Subject: [PATCH mlx5-next v1 01/12] {IB,net}/mlx5: Setup mkey variant before mr create command invocation Date: Tue, 10 Mar 2020 10:22:27 +0200 Message-Id: <20200310082238.239865-2-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Saeed Mahameed On reg_mr_callback() mlx5_ib is recalculating the mkey variant which is wrong and will lead to using a different key variant than the one submitted to firmware on create mkey command invocation. To fix this, we store the mkey variant before invoking the firmware command and use it later on completion (reg_mr_callback). Signed-off-by: Saeed Mahameed Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 7 ++----- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6fa0a83c19de..45c3282dd5e1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -87,7 +87,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); struct mlx5_cache_ent *ent = &cache->ent[c]; - u8 key; unsigned long flags; spin_lock_irqsave(&ent->lock, flags); @@ -102,10 +101,8 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) } mr->mmkey.type = MLX5_MKEY_MR; - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); - key = dev->mdev->priv.mkey_key++; - spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; + mr->mmkey.key |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mr->out, mkey_index)); cache->last_add = jiffies; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 42cc3c7ac5b6..770d13bb4f20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -56,6 +56,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; if (callback) return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, @@ -68,7 +69,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", From patchwork Tue Mar 10 08:22:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428693 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 679EB14B7 for ; Tue, 10 Mar 2020 08:22:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 3F53B2467F for ; Tue, 10 Mar 2020 08:22:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828574; bh=alQc/oUpRowIN50fInd8eIFbuFI40HlW/Yyy9L+0Yn0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=DD325MPpV1PwyjfqVERmXQpGnMDo2bMv/2AqtFHPDrDuAieAhPGtBtOscZLLavV6D j9EzGSTdtD/x0Vz341ArkNwenEArIiaO8fkE5GxRmEB7WfiuC3OW4R/wsqzG0PPh7j DDPbND1QhkpiCQygCB0mqt/IosVsMMEZDA+rep0w= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726491AbgCJIWx (ORCPT ); Tue, 10 Mar 2020 04:22:53 -0400 Received: from mail.kernel.org ([198.145.29.99]:44514 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIWx (ORCPT ); Tue, 10 Mar 2020 04:22:53 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id DA0A524677; Tue, 10 Mar 2020 08:22:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828572; bh=alQc/oUpRowIN50fInd8eIFbuFI40HlW/Yyy9L+0Yn0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=AgaLm7ReNvIqO6t5bnx09o59rHqkBA/TB+/CWrnf8Mhm8nk63FGWlbxR5Zz4Yp5rr zt2uMrMtTPpBnfAcCn5w7l7yXQr02SLG9LvqKDW+03asFrfT28yerpsO6U0BY5NGtf DlcTKiRB/vL40SKQKXwuQmMQkkiCBeOsR4nWftkU= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Saeed Mahameed , linux-rdma@vger.kernel.org, netdev@vger.kernel.org Subject: [PATCH mlx5-next v1 02/12] {IB,net}/mlx5: Assign mkey variant in mlx5_ib only Date: Tue, 10 Mar 2020 10:22:28 +0200 Message-Id: <20200310082238.239865-3-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Saeed Mahameed mkey variant is not required for mlx5_core use, move the mkey variant counter to mlx5_ib. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++ drivers/infiniband/hw/mlx5/mr.c | 58 +++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/main.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/mr.c | 8 +-- include/linux/mlx5/driver.h | 4 -- 6 files changed, 55 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c89a81e89095..73127c52958f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6393,6 +6393,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); + spin_lock_init(&dev->mkey_lock); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 559070c8e234..9d00cf9ccb08 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -999,6 +999,11 @@ struct mlx5_ib_dev { /* sync used page count stats */ struct mlx5_ib_resources devr; + + /* protect mkey key part */ + spinlock_t mkey_lock; + u8 mkey_key; + struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45c3282dd5e1..1b83d00e8ecd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,6 +47,46 @@ enum { #define MLX5_UMR_ALIGN 2048 +static void +create_mkey_callback(int status, struct mlx5_async_work *context); + +static void +assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in) +{ + void *mkc; + u8 key; + + spin_lock_irq(&dev->mkey_lock); + key = dev->mkey_key++; + spin_unlock_irq(&dev->mkey_lock); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; +} + +static int +mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); +} + +static int +mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mkey, + struct mlx5_async_ctx *async_ctx, + u32 *in, int inlen, u32 *out, int outlen, + struct mlx5_async_work *context) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey_cb(dev->mdev, mkey, async_ctx, + in, inlen, out, outlen, + create_mkey_callback, context); +} + static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); @@ -79,7 +119,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void reg_mr_callback(int status, struct mlx5_async_work *context) +static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); @@ -160,10 +200,10 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, + err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey, &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, &mr->cb_work); + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -682,7 +722,6 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -704,7 +743,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) MLX5_SET(mkc, mkc, length64, 1); set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1094,7 +1133,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, page_shift)); } - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1134,7 +1173,6 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1157,7 +1195,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET64(mkc, mkc, len, length); set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1635,7 +1673,7 @@ static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_free_descs; @@ -1902,7 +1940,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); if (err) goto free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f554cfddcf4e..6b38ec72215a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1282,7 +1282,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->alloc_mutex); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 770d13bb4f20..51814d023efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -49,14 +49,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, int err; u8 key; - spin_lock_irq(&dev->priv.mkey_lock); - key = dev->priv.mkey_key++; - spin_unlock_irq(&dev->priv.mkey_lock); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - MLX5_SET(mkc, mkc, mkey_7_0, key); - mkey->key = key; if (callback) return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, @@ -66,6 +59,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, if (err) return err; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index be2f3689095e..cdae66a0c021 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,10 +575,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; From patchwork Tue Mar 10 08:22:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428695 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id F1C4E139A for ; Tue, 10 Mar 2020 08:22:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id D266C24680 for ; Tue, 10 Mar 2020 08:22:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828576; bh=5MGYsTRI+cFZ28Y1jxtnUR/dUjHDkzUGziMoWdeQQTo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=jGWbA8VsWSgc0O8EZlwP40ylUhvcPmD7kwsdsIGDcFJAGnMyalMFxlFQ04epwDELo S73P8VguSyP5uccePGeF6EkleKviWwr9jhGzHJP8EJ33m1pU6gYcepST8RTF1DI6S7 4WbomAprf3MhfzZfg+jTGDDml+2S8psZC7retae0= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726504AbgCJIW4 (ORCPT ); Tue, 10 Mar 2020 04:22:56 -0400 Received: from mail.kernel.org ([198.145.29.99]:44564 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIW4 (ORCPT ); Tue, 10 Mar 2020 04:22:56 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id EA40F2467D; Tue, 10 Mar 2020 08:22:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828575; bh=5MGYsTRI+cFZ28Y1jxtnUR/dUjHDkzUGziMoWdeQQTo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=eMhnPDFx/d7MS+4jCg/rfLFIo5P3thP0xo+h/QDUx3jI93nXkkVJjR6xHqNLljENP wFAV5dxPfqck+0NLOByTzQRLbqHE+VKy+Y9s7LWvi2poTKlDrVk2lwm4yk/F2+07UR YiJgwW6NR+1MjIHJUMh6K5gyTWJiHyJZ7zzOZ04U= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Saeed Mahameed , linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 03/12] IB/mlx5: Replace spinlock protected write with atomic var Date: Tue, 10 Mar 2020 10:22:29 +0200 Message-Id: <20200310082238.239865-4-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Saeed Mahameed mkey variant calculation was spinlock protected to make it atomic, replace that with one atomic variable. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +---- drivers/infiniband/hw/mlx5/mr.c | 6 +----- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 73127c52958f..9da2787dfcb2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6393,7 +6393,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); - spin_lock_init(&dev->mkey_lock); + atomic_set(&dev->mkey_var, 0); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9d00cf9ccb08..a00b2e8c82f0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1000,10 +1000,7 @@ struct mlx5_ib_dev { */ struct mlx5_ib_resources devr; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - + atomic_t mkey_var; struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1b83d00e8ecd..70ae3372411a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -54,12 +54,8 @@ static void assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, u32 *in) { + u8 key = atomic_inc_return(&dev->mkey_var); void *mkc; - u8 key; - - spin_lock_irq(&dev->mkey_lock); - key = dev->mkey_key++; - spin_unlock_irq(&dev->mkey_lock); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, mkey_7_0, key); From patchwork Tue Mar 10 08:22:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428705 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 296D214B7 for ; Tue, 10 Mar 2020 08:23:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 048AB2467F for ; Tue, 10 Mar 2020 08:23:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828592; bh=f3RW8JQ3TBsAVo4v1BkqMBDNqR/3LDXAY26Fyo/+8cI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=gtEOxVqv/srMGV2I0iNPTW+If7EdtRbrZA5UCW9OnYGiVNTZwMahGX4fllcC7ttiG bD0yJtBq1e2pPZDDpGyz0kHqtqvxSP9MtHwWQNCBUZqKJDsbbk52ApKRvHQ+VuXXoO 6IE8KM7Oe6N+mVKLynJ+YXpKoECDE1h09zClti5E= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726423AbgCJIXL (ORCPT ); Tue, 10 Mar 2020 04:23:11 -0400 Received: from mail.kernel.org ([198.145.29.99]:44798 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXL (ORCPT ); Tue, 10 Mar 2020 04:23:11 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 5F4662467D; Tue, 10 Mar 2020 08:23:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828590; bh=f3RW8JQ3TBsAVo4v1BkqMBDNqR/3LDXAY26Fyo/+8cI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=JgugW1D/iiDhCx7THb1DclLTv08WrhJ22zQhHOcgANOI0n7oKpvzN2fOMGPdsqxuX JwwIOiyCl4tonPFCkuQeozBW7zk6ciQOtcr832+7tE0SLhGIqKhUBAa5y4EHHR+rjA jrPYiuSMI7y6O7RasfpHdJHj8IoR52K9F6BBSJQc= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Michael Guralnik , linux-rdma@vger.kernel.org, netdev@vger.kernel.org, Saeed Mahameed Subject: [PATCH mlx5-next v1 04/12] {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib Date: Tue, 10 Mar 2020 10:22:30 +0200 Message-Id: <20200310082238.239865-5-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Michael Guralnik As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the logic inside mlx5_ib and cleanup the code in mlx5_core. Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++----------------- include/linux/mlx5/driver.h | 6 ------ 3 files changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 70ae3372411a..a1e6ab9b0bed 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -77,10 +77,10 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, u32 *in, int inlen, u32 *out, int outlen, struct mlx5_async_work *context) { + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); assign_mkey_variant(dev, mkey, in); - return mlx5_core_create_mkey_cb(dev->mdev, mkey, async_ctx, - in, inlen, out, outlen, - create_mkey_callback, context); + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, + create_mkey_callback, context); } static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 51814d023efb..fd3e6d217c3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,12 +36,9 @@ #include #include "mlx5_core.h" -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; @@ -51,10 +48,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - if (callback) - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - callback, context); - err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) return err; @@ -70,15 +63,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mkey_index, key, mkey->key); return 0; } -EXPORT_SYMBOL(mlx5_core_create_mkey_cb); - -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) -{ - return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, - NULL, 0, NULL, NULL); -} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cdae66a0c021..3f10a9633012 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -943,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); From patchwork Tue Mar 10 08:22:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428697 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A2C8B14B7 for ; Tue, 10 Mar 2020 08:23:00 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 7B5ED2467F for ; Tue, 10 Mar 2020 08:23:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828580; bh=NubYovImRWYu+cuGw6hmJ5Ze85F46j7taC6tRdutL/k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=gQflARRkMHUT2TlKETNr0rZ4DcVC08vwn2LRVoc+JtQ6kgLYRCFXEXugG09Kni5KB hw9UKhuB6dzYaVoq7JGAarBUxXV2VXQhbEnJEHWqr1dxw/iQCHBBo6YUpJKFgB+8aD cdTAvsuy90AHIruHNi9qWRWBXtGr7IX4wox5486M= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726453AbgCJIXA (ORCPT ); Tue, 10 Mar 2020 04:23:00 -0400 Received: from mail.kernel.org ([198.145.29.99]:44618 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIW7 (ORCPT ); Tue, 10 Mar 2020 04:22:59 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 0820324677; Tue, 10 Mar 2020 08:22:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828578; bh=NubYovImRWYu+cuGw6hmJ5Ze85F46j7taC6tRdutL/k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=C+NyUUSiddUGGqtzJXWEe7uny+WxVQNZ9jYTK3v+cRhn5BgHcj8uVZ/y5JULs35qW iojb6Ga7o/OHZL3DSPSOu0Q8Pf3tVveHm4mW01q3gTXZpfZlQ2Kvs5Kvb007pSLFEF s8+Au9LOJC9RxZ+zbuMgNxu/6xfW1GRGbkiFQA08= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 05/12] RDMA/mlx5: Rename the tracking variables for the MR cache Date: Tue, 10 Mar 2020 10:22:31 +0200 Message-Id: <20200310082238.239865-6-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe The old names do not clearly indicate the intent. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 +++++++--- drivers/infiniband/hw/mlx5/mr.c | 54 ++++++++++++++-------------- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a00b2e8c82f0..f70e6928f088 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -704,15 +704,26 @@ struct mlx5_cache_ent { u32 access_mode; u32 page; - u32 size; - u32 cur; + /* + * - available_mrs is the length of list head, ie the number of MRs + * available for immediate allocation. + * - total_mrs is available_mrs plus all in use MRs that could be + * returned to the cache. + * - limit is the low water mark for available_mrs, 2* limit is the + * upper water mark. + * - pending is the number of MRs currently being created + */ + u32 total_mrs; + u32 available_mrs; + u32 limit; + u32 pending; + + /* Statistics */ u32 miss; - u32 limit; struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; - int pending; struct completion compl; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a1e6ab9b0bed..9f5afa24896d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -144,8 +144,8 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); - ent->cur++; - ent->size++; + ent->available_mrs++; + ent->total_mrs++; spin_unlock_irqrestore(&ent->lock, flags); if (!completion_done(&ent->compl)) @@ -231,8 +231,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; + ent->available_mrs--; + ent->total_mrs--; spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } @@ -265,16 +265,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf, if (var < ent->limit) return -EINVAL; - if (var > ent->size) { + if (var > ent->total_mrs) { do { - err = add_keys(dev, c, var - ent->size); + err = add_keys(dev, c, var - ent->total_mrs); if (err && err != -EAGAIN) return err; usleep_range(3000, 5000); } while (err); - } else if (var < ent->size) { - remove_keys(dev, c, ent->size - var); + } else if (var < ent->total_mrs) { + remove_keys(dev, c, ent->total_mrs - var); } return count; @@ -287,7 +287,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); if (err < 0) return err; @@ -320,13 +320,13 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; - if (var > ent->size) + if (var > ent->total_mrs) return -EINVAL; ent->limit = var; - if (ent->cur < ent->limit) { - err = add_keys(dev, c, 2 * ent->limit - ent->cur); + if (ent->available_mrs < ent->limit) { + err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs); if (err) return err; } @@ -360,7 +360,7 @@ static int someone_adding(struct mlx5_mr_cache *cache) int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - if (cache->ent[i].cur < cache->ent[i].limit) + if (cache->ent[i].available_mrs < cache->ent[i].limit) return 1; } @@ -378,9 +378,9 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) return; ent = &dev->cache.ent[i]; - if (ent->cur < 2 * ent->limit && !dev->fill_delay) { + if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) { err = add_keys(dev, i, 1); - if (ent->cur < 2 * ent->limit) { + if (ent->available_mrs < 2 * ent->limit) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", i + 2); @@ -395,7 +395,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) queue_work(cache->wq, &ent->work); } } - } else if (ent->cur > 2 * ent->limit) { + } else if (ent->available_mrs > 2 * ent->limit) { /* * The remove_keys() logic is performed as garbage collection * task. Such task is intended to be run when no other active @@ -411,7 +411,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { remove_keys(dev, i, 1); - if (ent->cur > ent->limit) + if (ent->available_mrs > ent->limit) queue_work(cache->wq, &ent->work); } else { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); @@ -462,9 +462,9 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); - ent->cur--; + ent->available_mrs--; spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) + if (ent->available_mrs < ent->limit) queue_work(cache->wq, &ent->work); return mr; } @@ -497,9 +497,9 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); - ent->cur--; + ent->available_mrs--; spin_unlock_irq(&ent->lock); - if (ent->cur < ent->limit) + if (ent->available_mrs < ent->limit) queue_work(cache->wq, &ent->work); break; } @@ -531,7 +531,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->allocated_from_cache = false; destroy_mkey(dev, mr); ent = &cache->ent[c]; - if (ent->cur < ent->limit) + if (ent->available_mrs < ent->limit) queue_work(cache->wq, &ent->work); return; } @@ -539,8 +539,8 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); - ent->cur++; - if (ent->cur > 2 * ent->limit) + ent->available_mrs++; + if (ent->available_mrs > 2 * ent->limit) shrink = 1; spin_unlock_irq(&ent->lock); @@ -565,8 +565,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_move(&mr->list, &del_list); - ent->cur--; - ent->size--; + ent->available_mrs--; + ent->total_mrs--; spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } @@ -604,7 +604,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) dir = debugfs_create_dir(ent->name, cache->root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_u32("cur", 0400, dir, &ent->cur); + debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); debugfs_create_u32("miss", 0600, dir, &ent->miss); } } From patchwork Tue Mar 10 08:22:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428699 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2CE661800 for ; Tue, 10 Mar 2020 08:23:04 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id EFA9324677 for ; Tue, 10 Mar 2020 08:23:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828584; bh=hRsMMAB29RvBHLYslMpn/7ScjiUihzvnSB+X06BKYvQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=tywmafw32W1LU5/AH9xznn/+vPC0aX7C066bnimhjqa+1PHQKXjZaiCne1D+J8pO7 6kODoMUEVxAMs0GY86gp73MYyA487cXbopFrTleYnp1yCHH30UBNMZWTlSkd7XV5g4 22dOikNF57LNZPJ/q8H8kxNBdwo8DmHvcwqaO6Oo= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726220AbgCJIXD (ORCPT ); Tue, 10 Mar 2020 04:23:03 -0400 Received: from mail.kernel.org ([198.145.29.99]:44658 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXD (ORCPT ); Tue, 10 Mar 2020 04:23:03 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 1E80224677; Tue, 10 Mar 2020 08:23:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828581; bh=hRsMMAB29RvBHLYslMpn/7ScjiUihzvnSB+X06BKYvQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=CHZZmmiz5bmG5ycGjIQWGcceIVXgX8dUl75LQauD22iIm42fD8FFvxORbzNJYQoQr HjiXa3En0FruvtH3QotawV2v/bLVTYTmqsEHomx0CVgt6T263sU9Be/nlfY+HvAMAT i+iR14F3tDw/8dt40vT3Ubq6onFkMpNaaoDClQLs= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 06/12] RDMA/mlx5: Simplify how the MR cache bucket is located Date: Tue, 10 Mar 2020 10:22:32 +0200 Message-Id: <20200310082238.239865-7-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe There are many bad APIs here that are accepting a cache bucket index instead of a bucket pointer. Many of the callers already have a bucket pointer, so this results in a lot of confusing uses of order2idx(). Pass the struct mlx5_cache_ent into add_keys(), remove_keys(), and alloc_cached_mr(). Once the MR is in the cache, store the cache bucket pointer directly in the MR, replacing the 'bool allocated_from cache'. In the end there is only one place that needs to form index from order, alloc_mr_from_cache(). Increase the safety of this function by disallowing it from accessing cache entries in the ODP special area. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +- drivers/infiniband/hw/mlx5/mr.c | 160 +++++++++++---------------- drivers/infiniband/hw/mlx5/odp.c | 2 +- 3 files changed, 71 insertions(+), 98 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f70e6928f088..58369d38d627 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -622,8 +622,8 @@ struct mlx5_ib_mr { struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; - int order; - bool allocated_from_cache; + unsigned int order; + struct mlx5_cache_ent *cache_ent; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; @@ -1281,7 +1281,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9f5afa24896d..55e31f6effda 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -99,16 +99,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static int order2idx(struct mlx5_ib_dev *dev, int order) -{ - struct mlx5_mr_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return 0; - else - return order - cache->ent[0].order; -} - static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= @@ -120,9 +110,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; - struct mlx5_mr_cache *cache = &dev->cache; - int c = order2idx(dev, mr->order); - struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_cache_ent *ent = mr->cache_ent; unsigned long flags; spin_lock_irqsave(&ent->lock, flags); @@ -140,7 +128,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); - cache->last_add = jiffies; + dev->cache.last_add = jiffies; spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); @@ -152,10 +140,8 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) complete(&ent->compl); } -static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +static int add_keys(struct mlx5_cache_ent *ent, int num) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; void *mkc; @@ -180,8 +166,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->allocated_from_cache = true; - mr->dev = dev; + mr->cache_ent = ent; + mr->dev = ent->dev; MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); @@ -196,15 +182,15 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey, - &dev->async_ctx, in, inlen, - mr->out, sizeof(mr->out), - &mr->cb_work); + err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, + &ent->dev->async_ctx, in, inlen, + mr->out, sizeof(mr->out), + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; spin_unlock_irq(&ent->lock); - mlx5_ib_warn(dev, "create mkey failed %d\n", err); + mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); kfree(mr); break; } @@ -214,10 +200,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) return err; } -static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +static void remove_keys(struct mlx5_cache_ent *ent, int num) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -234,7 +218,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ent->available_mrs--; ent->total_mrs--; spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); } list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -247,18 +231,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; char lbuf[20] = {0}; u32 var; int err; - int c; count = min(count, sizeof(lbuf) - 1); if (copy_from_user(lbuf, buf, count)) return -EFAULT; - c = order2idx(dev, ent->order); - if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -267,14 +247,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf, if (var > ent->total_mrs) { do { - err = add_keys(dev, c, var - ent->total_mrs); + err = add_keys(ent, var - ent->total_mrs); if (err && err != -EAGAIN) return err; usleep_range(3000, 5000); } while (err); } else if (var < ent->total_mrs) { - remove_keys(dev, c, ent->total_mrs - var); + remove_keys(ent, ent->total_mrs - var); } return count; @@ -305,18 +285,14 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - struct mlx5_ib_dev *dev = ent->dev; char lbuf[20] = {0}; u32 var; int err; - int c; count = min(count, sizeof(lbuf) - 1); if (copy_from_user(lbuf, buf, count)) return -EFAULT; - c = order2idx(dev, ent->order); - if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -326,7 +302,7 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, ent->limit = var; if (ent->available_mrs < ent->limit) { - err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs); + err = add_keys(ent, 2 * ent->limit - ent->available_mrs); if (err) return err; } @@ -371,24 +347,22 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; struct mlx5_mr_cache *cache = &dev->cache; - int i = order2idx(dev, ent->order); int err; if (cache->stopped) return; - ent = &dev->cache.ent[i]; if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) { - err = add_keys(dev, i, 1); + err = add_keys(ent, 1); if (ent->available_mrs < 2 * ent->limit) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", - i + 2); + ent->order); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(3)); } else if (err) { mlx5_ib_warn(dev, "command failed order %d, err %d\n", - i + 2, err); + ent->order, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } else { @@ -410,7 +384,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) */ if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_keys(dev, i, 1); + remove_keys(ent, 1); if (ent->available_mrs > ent->limit) queue_work(cache->wq, &ent->work); } else { @@ -435,17 +409,18 @@ static void cache_work_func(struct work_struct *work) __cache_work_func(ent); } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) +/* Allocate a special entry from the cache */ +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + unsigned int entry) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; int err; - if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { - mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); + if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || + entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); - } ent = &cache->ent[entry]; while (1) { @@ -453,7 +428,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - err = add_keys(dev, entry, 1); + err = add_keys(ent, 1); if (err && err != -EAGAIN) return ERR_PTR(err); @@ -471,26 +446,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) } } -static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent; - int last_umr_cache_entry; - int c; - int i; - - c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); - if (c < 0 || c > last_umr_cache_entry) { - mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); - return NULL; - } + struct mlx5_cache_ent *ent = req_ent; - for (i = c; i <= last_umr_cache_entry; i++) { - ent = &cache->ent[i]; - - mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + /* Try larger MR pools from the cache to satisfy the allocation */ + for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { + mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, + ent - dev->cache.ent); spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { @@ -500,43 +465,36 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) ent->available_mrs--; spin_unlock_irq(&ent->lock); if (ent->available_mrs < ent->limit) - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); break; } spin_unlock_irq(&ent->lock); - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); } if (!mr) - cache->ent[c].miss++; + req_ent->miss++; return mr; } void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; + struct mlx5_cache_ent *ent = mr->cache_ent; int shrink = 0; - int c; - if (!mr->allocated_from_cache) + if (!ent) return; - c = order2idx(dev, mr->order); - WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (mlx5_mr_cache_invalidate(mr)) { - mr->allocated_from_cache = false; + mr->cache_ent = NULL; destroy_mkey(dev, mr); - ent = &cache->ent[c]; if (ent->available_mrs < ent->limit) - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); return; } - ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; @@ -545,7 +503,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) spin_unlock_irq(&ent->lock); if (shrink) - queue_work(cache->wq, &ent->work); + queue_work(dev->cache.wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) @@ -872,22 +830,38 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *alloc_mr_from_cache( - struct ib_pd *pd, struct ib_umem *umem, - u64 virt_addr, u64 len, int npages, - int page_shift, int order, int access_flags) +static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, + unsigned int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return &cache->ent[0]; + order = order - cache->ent[0].order; + if (order > MR_CACHE_LAST_STD_ENTRY) + return NULL; + return &cache->ent[order]; +} + +static struct mlx5_ib_mr * +alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, + u64 len, int npages, int page_shift, unsigned int order, + int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); struct mlx5_ib_mr *mr; int err = 0; int i; + if (!ent) + return ERR_PTR(-E2BIG); for (i = 0; i < 1; i++) { - mr = alloc_cached_mr(dev, order); + mr = alloc_cached_mr(ent); if (mr) break; - err = add_keys(dev, order2idx(dev, order), 1); + err = add_keys(ent, 1); if (err && err != -EAGAIN) { mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); break; @@ -1470,7 +1444,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->allocated_from_cache) + if (mr->cache_ent) err = mlx5_mr_cache_invalidate(mr); else err = destroy_mkey(dev, mr); @@ -1486,7 +1460,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - mr->allocated_from_cache = false; + mr->cache_ent = NULL; } else { /* * Send a UMR WQE @@ -1573,8 +1547,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int allocated_from_cache = mr->allocated_from_cache; - if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) @@ -1589,7 +1561,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig = NULL; } - if (!allocated_from_cache) { + if (!mr->cache_ent) { destroy_mkey(dev, mr); mlx5_free_priv_descs(mr); } @@ -1606,7 +1578,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) else clean_mr(dev, mr); - if (mr->allocated_from_cache) + if (mr->cache_ent) mlx5_mr_cache_free(dev, mr); else kfree(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4216814ba871..224f480fc441 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) odp->private = NULL; mutex_unlock(&odp->umem_mutex); - if (!mr->allocated_from_cache) { + if (!mr->cache_ent) { mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); WARN_ON(mr->descs); } From patchwork Tue Mar 10 08:22:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428701 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4D598139A for ; Tue, 10 Mar 2020 08:23:06 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 2E34024677 for ; Tue, 10 Mar 2020 08:23:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828586; bh=QDO2fdmtqaEAPmiK89oIhnb2azmGDr9PkJi3hgM3iz0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=TiFlz0y16WYEX5n42+mztrQbSobSwvQVL+V02fliXph1nMmux2ZZxkqF93ZrgKYF2 oelqTh9ad/Ak+LEUuIXdwv/ZmCYbet8/yvDE7SrfahgL65Qq7apKZwN8Gz1bCezRch t+Bxfs+3f76pNbsXUvS+Z3Vz3Xnf9gHb24xEXxb8= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726436AbgCJIXF (ORCPT ); Tue, 10 Mar 2020 04:23:05 -0400 Received: from mail.kernel.org ([198.145.29.99]:44698 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXF (ORCPT ); Tue, 10 Mar 2020 04:23:05 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 2E9E42467F; Tue, 10 Mar 2020 08:23:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828584; bh=QDO2fdmtqaEAPmiK89oIhnb2azmGDr9PkJi3hgM3iz0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=M0DmsDs0o3rJn01XvlafDQ8RlQuxGy98rcyD3LTeQSPvkbvmfBgAObtYch00PwK/J Z98I4y0AWAHRDYN0yPle41xgTtBA3Je2ZuOfmikXIMek4tdBCSSiXZhTmx/u65Dmpw +mpgDoDuvlJ6u+GB1Q1acOHnv8JtGOIMbLnj8xtk= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Artemy Kovalyov , linux-rdma@vger.kernel.org, Yishai Hadas Subject: [PATCH rdma-next v1 07/12] RDMA/mlx5: Always remove MRs from the cache before destroying them Date: Tue, 10 Mar 2020 10:22:33 +0200 Message-Id: <20200310082238.239865-8-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe The cache bucket tracks the total number of MRs that exists, both inside and outside of the cache. Removing a MR from the cache (by setting cache_ent to NULL) without updating total_mrs will cause the tracking to leak and be inflated. Further fix the rereg_mr path to always destroy the MR. reg_create will always overwrite all the MR data in mlx5_ib_mr, so the MR must be completely destroyed, in all cases, before this function can be called. Detach the MR from the cache and unconditionally destroy it to avoid leaking HW mkeys. Fixes: afd1417404fb ("IB/mlx5: Use direct mkey destroy command upon UMR unreg failure") Fixes: 56e11d628c5d ("IB/mlx5: Added support for re-registration of MRs") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 55e31f6effda..9b980ef326b4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -479,6 +479,16 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) return mr; } +static void detach_mr_from_cache(struct mlx5_ib_mr *mr) +{ + struct mlx5_cache_ent *ent = mr->cache_ent; + + mr->cache_ent = NULL; + spin_lock_irq(&ent->lock); + ent->total_mrs--; + spin_unlock_irq(&ent->lock); +} + void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; @@ -488,7 +498,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return; if (mlx5_mr_cache_invalidate(mr)) { - mr->cache_ent = NULL; + detach_mr_from_cache(mr); destroy_mkey(dev, mr); if (ent->available_mrs < ent->limit) queue_work(dev->cache.wq, &ent->work); @@ -1445,9 +1455,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * UMR can't be used - MKey needs to be replaced. */ if (mr->cache_ent) - err = mlx5_mr_cache_invalidate(mr); - else - err = destroy_mkey(dev, mr); + detach_mr_from_cache(mr); + err = destroy_mkey(dev, mr); if (err) goto err; @@ -1459,8 +1468,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, mr = to_mmr(ib_mr); goto err; } - - mr->cache_ent = NULL; } else { /* * Send a UMR WQE From patchwork Tue Mar 10 08:22:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428703 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B4A55139A for ; Tue, 10 Mar 2020 08:23:09 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8ED512467F for ; Tue, 10 Mar 2020 08:23:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828589; bh=zcYPx6sOvVONJLsQtwtEgf2IIcimthsjWIJMhcrCJNw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=GNg3OugrAcWEUxS6ogK78VcL7fl6rBg99j/Z9vrVRErDy5jKXOj/sPwpTAkT1o7/H mh9GbTDbBX5Ku+K4fV+3nexp4haeXzYAWPTYwigS8a9niNNAXCSuEOL3o8AtPWQgda 3ywrWacDBvournv7qaM4dk9BRKNXZeFEBL6G9wcw= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726385AbgCJIXJ (ORCPT ); Tue, 10 Mar 2020 04:23:09 -0400 Received: from mail.kernel.org ([198.145.29.99]:44756 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXI (ORCPT ); Tue, 10 Mar 2020 04:23:08 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 4C96724677; Tue, 10 Mar 2020 08:23:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828587; bh=zcYPx6sOvVONJLsQtwtEgf2IIcimthsjWIJMhcrCJNw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rOVcl477iOEH31qM09Y1zq6DB/AY5MqsBmaXmKAK8xiMRseD38kWd6WXUKpvoJUQu xalssGrjo52eBUlU7Vn4wbgf8NO09SR6wogiZgMMk28M4rtlwse36+obdzS+D7VKse 0/7cOB8jnoE5iV26uTsytO6IMqb+3Ze2AnFNF6VE= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Eli Cohen , Jack Morgenstein , linux-rdma@vger.kernel.org, Or Gerlitz , Roland Dreier Subject: [PATCH rdma-next v1 08/12] RDMA/mlx5: Fix MR cache size and limit debugfs Date: Tue, 10 Mar 2020 10:22:34 +0200 Message-Id: <20200310082238.239865-9-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe The size_write function is supposed to adjust the total_mr's to match the user's request, but lacks locking and safety checking. total_mrs can only be adjusted by at most available_mrs. mrs already assigned to users cannot be revoked. Ensure that the user provides a target value within the range of available_mrs and within the high/low water mark. limit_write has confusing and wrong sanity checking, and doesn't have the ability to deallocate on limit reduction. Since both functions use the same algorithm to adjust the available_mrs, consolidate it into one function and write it correctly. Fix the locking and by holding the spinlock for all accesses to ent->X. Always fail if the user provides a malformed string. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 152 ++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 64 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9b980ef326b4..091e24c58e2c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -140,7 +140,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) complete(&ent->compl); } -static int add_keys(struct mlx5_cache_ent *ent, int num) +static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; @@ -200,30 +200,54 @@ static int add_keys(struct mlx5_cache_ent *ent, int num) return err; } -static void remove_keys(struct mlx5_cache_ent *ent, int num) +static void remove_cache_mr(struct mlx5_cache_ent *ent) { - struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; - LIST_HEAD(del_list); - int i; - for (i = 0; i < num; i++) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - break; - } - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_move(&mr->list, &del_list); - ent->available_mrs--; - ent->total_mrs--; + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + return; } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + ent->total_mrs--; + spin_unlock_irq(&ent->lock); + mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); + kfree(mr); +} - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { - list_del(&mr->list); - kfree(mr); +static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, + bool limit_fill) +{ + int err; + + lockdep_assert_held(&ent->lock); + + while (true) { + if (limit_fill) + target = ent->limit * 2; + if (target == ent->available_mrs + ent->pending) + return 0; + if (target > ent->available_mrs + ent->pending) { + u32 todo = target - (ent->available_mrs + ent->pending); + + spin_unlock_irq(&ent->lock); + err = add_keys(ent, todo); + if (err == -EAGAIN) + usleep_range(3000, 5000); + spin_lock_irq(&ent->lock); + if (err) { + if (err != -EAGAIN) + return err; + } else + return 0; + } else { + spin_unlock_irq(&ent->lock); + remove_cache_mr(ent); + spin_lock_irq(&ent->lock); + } } } @@ -231,33 +255,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - char lbuf[20] = {0}; - u32 var; + u32 target; int err; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; - - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var < ent->limit) - return -EINVAL; - - if (var > ent->total_mrs) { - do { - err = add_keys(ent, var - ent->total_mrs); - if (err && err != -EAGAIN) - return err; + err = kstrtou32_from_user(buf, count, 0, &target); + if (err) + return err; - usleep_range(3000, 5000); - } while (err); - } else if (var < ent->total_mrs) { - remove_keys(ent, ent->total_mrs - var); + /* + * Target is the new value of total_mrs the user requests, however we + * cannot free MRs that are in use. Compute the target value for + * available_mrs. + */ + spin_lock_irq(&ent->lock); + if (target < ent->total_mrs - ent->available_mrs) { + err = -EINVAL; + goto err_unlock; + } + target = target - (ent->total_mrs - ent->available_mrs); + if (target < ent->limit || target > ent->limit*2) { + err = -EINVAL; + goto err_unlock; } + err = resize_available_mrs(ent, target, false); + if (err) + goto err_unlock; + spin_unlock_irq(&ent->lock); return count; + +err_unlock: + spin_unlock_irq(&ent->lock); + return err; } static ssize_t size_read(struct file *filp, char __user *buf, size_t count, @@ -285,28 +314,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct mlx5_cache_ent *ent = filp->private_data; - char lbuf[20] = {0}; u32 var; int err; - count = min(count, sizeof(lbuf) - 1); - if (copy_from_user(lbuf, buf, count)) - return -EFAULT; - - if (sscanf(lbuf, "%u", &var) != 1) - return -EINVAL; - - if (var > ent->total_mrs) - return -EINVAL; + err = kstrtou32_from_user(buf, count, 0, &var); + if (err) + return err; + /* + * Upon set we immediately fill the cache to high water mark implied by + * the limit. + */ + spin_lock_irq(&ent->lock); ent->limit = var; - - if (ent->available_mrs < ent->limit) { - err = add_keys(ent, 2 * ent->limit - ent->available_mrs); - if (err) - return err; - } - + err = resize_available_mrs(ent, 0, true); + spin_unlock_irq(&ent->lock); + if (err) + return err; return count; } @@ -371,20 +395,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) } } else if (ent->available_mrs > 2 * ent->limit) { /* - * The remove_keys() logic is performed as garbage collection - * task. Such task is intended to be run when no other active - * processes are running. + * The remove_cache_mr() logic is performed as garbage + * collection task. Such task is intended to be run when no + * other active processes are running. * * The need_resched() will return TRUE if there are user tasks * to be activated in near future. * - * In such case, we don't execute remove_keys() and postpone - * the garbage collection work to try to run in next cycle, - * in order to free CPU resources to other tasks. + * In such case, we don't execute remove_cache_mr() and postpone + * the garbage collection work to try to run in next cycle, in + * order to free CPU resources to other tasks. */ if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_keys(ent, 1); + remove_cache_mr(ent); if (ent->available_mrs > ent->limit) queue_work(cache->wq, &ent->work); } else { From patchwork Tue Mar 10 08:22:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428713 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3933914B7 for ; Tue, 10 Mar 2020 08:23:25 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 197E12467D for ; Tue, 10 Mar 2020 08:23:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828605; bh=CXvElbVthgshbxYCeAv5iFHcm/hNG5ksdPm5jH2v/Ik=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=q7buwkDFfJ0CAHYufucGEwmD33/f8NejsrtduFyLrIp/b5JwUnQFa52aa1Dyb9bSh w0S1ipOD4XgnAxtQthggetTu1ChbP4qVmJaBNRHa1dUbEr03poFztNBp6vdqloTis/ P53AfkZ+barTnoCASlThTB1klPT44ETUKFu0ejwM= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726466AbgCJIXY (ORCPT ); Tue, 10 Mar 2020 04:23:24 -0400 Received: from mail.kernel.org ([198.145.29.99]:44962 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXY (ORCPT ); Tue, 10 Mar 2020 04:23:24 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 3386324677; Tue, 10 Mar 2020 08:23:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828603; bh=CXvElbVthgshbxYCeAv5iFHcm/hNG5ksdPm5jH2v/Ik=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=MZCCleyAtMX7Q/Bopji3P3KdFsORgkDX1omoywSccVfQKQVkrRC/g8BaZK6WOqVBR hqDeensrg65hX+IyLRDx827m8sw1MwfFqObWvZVShqYgSpCEzuskW70UDr0MbZeePW LG2WvWadWemk2CFfBTPENqlJbeRVmpbhf2pEk38E= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 09/12] RDMA/mlx5: Lock access to ent->available_mrs/limit when doing queue_work Date: Tue, 10 Mar 2020 10:22:35 +0200 Message-Id: <20200310082238.239865-10-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe Accesses to these members needs to be locked. There is no reason not to hold a spinlock while calling queue_work(), so move the tests into a helper and always call it under lock. The helper should be called when available_mrs is adjusted. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 40 ++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 091e24c58e2c..b46039d86b98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -134,6 +134,10 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) list_add_tail(&mr->list, &ent->head); ent->available_mrs++; ent->total_mrs++; + /* + * Creating is always done in response to some demand, so do not call + * queue_adjust_cache_locked(). + */ spin_unlock_irqrestore(&ent->lock, flags); if (!completion_done(&ent->compl)) @@ -367,6 +371,20 @@ static int someone_adding(struct mlx5_mr_cache *cache) return 0; } +/* + * Check if the bucket is outside the high/low water mark and schedule an async + * update. The cache refill has hysteresis, once the low water mark is hit it is + * refilled up to the high mark. + */ +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) +{ + lockdep_assert_held(&ent->lock); + + if (ent->available_mrs < ent->limit || + ent->available_mrs > 2 * ent->limit) + queue_work(ent->dev->cache.wq, &ent->work); +} + static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; @@ -462,9 +480,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, list); list_del(&mr->list); ent->available_mrs--; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - if (ent->available_mrs < ent->limit) - queue_work(cache->wq, &ent->work); return mr; } } @@ -487,14 +504,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) list); list_del(&mr->list); ent->available_mrs--; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - if (ent->available_mrs < ent->limit) - queue_work(dev->cache.wq, &ent->work); break; } + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - queue_work(dev->cache.wq, &ent->work); } if (!mr) @@ -516,7 +531,6 @@ static void detach_mr_from_cache(struct mlx5_ib_mr *mr) void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; - int shrink = 0; if (!ent) return; @@ -524,20 +538,14 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (mlx5_mr_cache_invalidate(mr)) { detach_mr_from_cache(mr); destroy_mkey(dev, mr); - if (ent->available_mrs < ent->limit) - queue_work(dev->cache.wq, &ent->work); return; } spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; - if (ent->available_mrs > 2 * ent->limit) - shrink = 1; + queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); - - if (shrink) - queue_work(dev->cache.wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) @@ -653,7 +661,9 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; - queue_work(cache->wq, &ent->work); + spin_lock_irq(&ent->lock); + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } mlx5_mr_cache_debugfs_init(dev); From patchwork Tue Mar 10 08:22:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428707 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2EAE214B7 for ; Tue, 10 Mar 2020 08:23:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 069CA2467F for ; Tue, 10 Mar 2020 08:23:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828596; bh=eJzpbLde5mtr4q2HnQK+Xf5A2S0BuxKX2rbXBHhzCWs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=cTRuiF6ImEtu3P9pESuHptUUM+sKVlgxp5g3b21ETZ5sv7WEISBu+zfthbdoqh0yq DOwl91mMbm83KS06IqzuQOmLg0mejOdntaZs2Mm6e7l096F3YGdsvOnDHcXLWIAvou wd2WP3H/lj7whUoYjgGFFtBIK5v+2EBFVTpXUW8c= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726521AbgCJIXP (ORCPT ); Tue, 10 Mar 2020 04:23:15 -0400 Received: from mail.kernel.org ([198.145.29.99]:44842 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXP (ORCPT ); Tue, 10 Mar 2020 04:23:15 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 7231B24677; Tue, 10 Mar 2020 08:23:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828594; bh=eJzpbLde5mtr4q2HnQK+Xf5A2S0BuxKX2rbXBHhzCWs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=u7SLZMXslHM8hFhSF8in/Q5EsN6UukuHDaGZFOES+GrzznBinYcfT/DGHwd5oyrPm 7VNuMU5oFZG/8IBmr9qR1swVBbMfqB30IakuOrzwHm6mrFbAI0nB5M9SODPJ6Hoc31 rqHXOBXfZkEQJCyaDnsXQogRLHwSGxmusk+JQY+M= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 10/12] RDMA/mlx5: Fix locking in MR cache work queue Date: Tue, 10 Mar 2020 10:22:36 +0200 Message-Id: <20200310082238.239865-11-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe All of the members of mlx5_cache_ent must be accessed while holding the spinlock, add the missing spinlock in the __cache_work_func(). Using cache->stopped and flush_workqueue() is an inherently racy way to shutdown self-scheduling work on a queue. Replace it with ent->disabled under lock, and always check disabled before queuing any new work. Use cancel_work_sync() to shutdown the queue. Use READ_ONCE/WRITE_ONCE for dev->last_add to manage concurrency as coherency is less important here. Split fill_delay from the bitfield. C bitfield updates are not atomic and this is just a mess. Use READ_ONCE/WRITE_ONCE, but this could also use test_bit()/set_bit(). Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/mr.c | 121 +++++++++++++++++---------- 2 files changed, 80 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 58369d38d627..21345d7bafcb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -704,6 +704,8 @@ struct mlx5_cache_ent { u32 access_mode; u32 page; + u8 disabled:1; + /* * - available_mrs is the length of list head, ie the number of MRs * available for immediate allocation. @@ -730,7 +732,6 @@ struct mlx5_cache_ent { struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; - int stopped; struct dentry *root; unsigned long last_add; }; @@ -1002,10 +1003,10 @@ struct mlx5_ib_dev { */ struct mutex cap_mask_mutex; u8 ib_active:1; - u8 fill_delay:1; u8 is_rep:1; u8 lag_active:1; u8 wc_support:1; + u8 fill_delay; struct umr_common umrc; /* sync used page count stats */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b46039d86b98..424ce3de3865 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -113,13 +113,13 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = mr->cache_ent; unsigned long flags; - spin_lock_irqsave(&ent->lock, flags); - ent->pending--; - spin_unlock_irqrestore(&ent->lock, flags); if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); kfree(mr); - dev->fill_delay = 1; + spin_lock_irqsave(&ent->lock, flags); + ent->pending--; + WRITE_ONCE(dev->fill_delay, 1); + spin_unlock_irqrestore(&ent->lock, flags); mod_timer(&dev->delay_timer, jiffies + HZ); return; } @@ -128,12 +128,13 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); - dev->cache.last_add = jiffies; + WRITE_ONCE(dev->cache.last_add, jiffies); spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); ent->available_mrs++; ent->total_mrs++; + ent->pending--; /* * Creating is always done in response to some demand, so do not call * queue_adjust_cache_locked(). @@ -159,11 +160,6 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - if (ent->pending >= MAX_PENDING_REG_MR) { - err = -EAGAIN; - break; - } - mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { err = -ENOMEM; @@ -184,6 +180,12 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) MLX5_SET(mkc, mkc, log_page_size, ent->page); spin_lock_irq(&ent->lock); + if (ent->pending >= MAX_PENDING_REG_MR) { + err = -EAGAIN; + spin_unlock_irq(&ent->lock); + kfree(mr); + break; + } ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, @@ -204,15 +206,13 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return err; } -static void remove_cache_mr(struct mlx5_cache_ent *ent) +static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); + lockdep_assert_held(&ent->lock); + if (list_empty(&ent->head)) return; - } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->available_mrs--; @@ -220,6 +220,7 @@ static void remove_cache_mr(struct mlx5_cache_ent *ent) spin_unlock_irq(&ent->lock); mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); kfree(mr); + spin_lock_irq(&ent->lock); } static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, @@ -248,9 +249,7 @@ static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, } else return 0; } else { - spin_unlock_irq(&ent->lock); - remove_cache_mr(ent); - spin_lock_irq(&ent->lock); + remove_cache_mr_locked(ent); } } } @@ -359,16 +358,21 @@ static const struct file_operations limit_fops = { .read = limit_read, }; -static int someone_adding(struct mlx5_mr_cache *cache) +static bool someone_adding(struct mlx5_mr_cache *cache) { - int i; + unsigned int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - if (cache->ent[i].available_mrs < cache->ent[i].limit) - return 1; - } + struct mlx5_cache_ent *ent = &cache->ent[i]; + bool ret; - return 0; + spin_lock_irq(&ent->lock); + ret = ent->available_mrs < ent->limit; + spin_unlock_irq(&ent->lock); + if (ret) + return true; + } + return false; } /* @@ -380,6 +384,8 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { lockdep_assert_held(&ent->lock); + if (ent->disabled) + return; if (ent->available_mrs < ent->limit || ent->available_mrs > 2 * ent->limit) queue_work(ent->dev->cache.wq, &ent->work); @@ -391,27 +397,42 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) struct mlx5_mr_cache *cache = &dev->cache; int err; - if (cache->stopped) - return; + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; - if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) { + if (ent->available_mrs + ent->pending < 2 * ent->limit && + !READ_ONCE(dev->fill_delay)) { + spin_unlock_irq(&ent->lock); err = add_keys(ent, 1); - if (ent->available_mrs < 2 * ent->limit) { + + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (err) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", ent->order); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(3)); - } else if (err) { - mlx5_ib_warn(dev, "command failed order %d, err %d\n", - ent->order, err); + } else { + mlx5_ib_warn( + dev, + "command failed order %d, err %d\n", + ent->order, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); - } else { - queue_work(cache->wq, &ent->work); } } + /* + * Once we start populating due to hitting a low water mark + * continue until we pass the high water mark. + */ + if (ent->available_mrs + ent->pending < 2 * ent->limit) + queue_work(cache->wq, &ent->work); } else if (ent->available_mrs > 2 * ent->limit) { + bool need_delay; + /* * The remove_cache_mr() logic is performed as garbage * collection task. Such task is intended to be run when no @@ -424,15 +445,20 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) * the garbage collection work to try to run in next cycle, in * order to free CPU resources to other tasks. */ - if (!need_resched() && !someone_adding(cache) && - time_after(jiffies, cache->last_add + 300 * HZ)) { - remove_cache_mr(ent); - if (ent->available_mrs > ent->limit) - queue_work(cache->wq, &ent->work); - } else { + spin_unlock_irq(&ent->lock); + need_delay = need_resched() || someone_adding(cache) || + time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); + spin_lock_irq(&ent->lock); + if (ent->disabled) + goto out; + if (need_delay) queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); - } + remove_cache_mr_locked(ent); + queue_adjust_cache_locked(ent); } +out: + spin_unlock_irq(&ent->lock); } static void delayed_cache_work_func(struct work_struct *work) @@ -613,7 +639,7 @@ static void delay_time_func(struct timer_list *t) { struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); - dev->fill_delay = 0; + WRITE_ONCE(dev->fill_delay, 0); } int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) @@ -673,13 +699,20 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { - int i; + unsigned int i; if (!dev->cache.wq) return 0; - dev->cache.stopped = 1; - flush_workqueue(dev->cache.wq); + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + struct mlx5_cache_ent *ent = &dev->cache.ent[i]; + + spin_lock_irq(&ent->lock); + ent->disabled = true; + spin_unlock_irq(&ent->lock); + cancel_work_sync(&ent->work); + cancel_delayed_work_sync(&ent->dwork); + } mlx5_mr_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); From patchwork Tue Mar 10 08:22:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428709 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D869E139A for ; Tue, 10 Mar 2020 08:23:18 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id B955D24680 for ; Tue, 10 Mar 2020 08:23:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828598; bh=Jnb97byeJITOmW06xcELb2I6usiUHYVwx7b8vuBlZec=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=rK47s1A0FzkDPOx1gFfqJnuT299vTUUPfF3g+bkr0vk1DNU0VS+pFJmtIPwKWNUaI vDc6pSTUI+QgjrPXq76Yrhwvd2qKwQ7TIalL1jIX07MQOPDDJzTSU+dgwXvnqsaO58 2hOb5s2UKPtQI8XB2gKG62bNQ6j937eOk1e9YwP4= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726523AbgCJIXS (ORCPT ); Tue, 10 Mar 2020 04:23:18 -0400 Received: from mail.kernel.org ([198.145.29.99]:44892 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXS (ORCPT ); Tue, 10 Mar 2020 04:23:18 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 8298A2467D; Tue, 10 Mar 2020 08:23:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828597; bh=Jnb97byeJITOmW06xcELb2I6usiUHYVwx7b8vuBlZec=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=WVKD4xMgDOfl05z74a+IgRqvEZiW9SYoPBrwKsQwQMxXbPTlbpKDo974EnHKyNKoG J6oOqHua7WaN1b0+Vx9GALs4cRxgDLuulY19ve3FN1UGZ1lGXm9NHkWSPI6SV/Dwhc SCDG9Vpkf01UHOGB4rseioKXZeKRMNqeWucoNJAU= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 11/12] RDMA/mlx5: Revise how the hysteresis scheme works for cache filling Date: Tue, 10 Mar 2020 10:22:37 +0200 Message-Id: <20200310082238.239865-12-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe Currently if the work queue is running then it is in 'hysteresis' mode and will fill until the cache reaches the high water mark. This implicit state is very tricky and doesn't interact with pending very well. Instead of self re-scheduling the work queue after the add_keys() has started to create the new MR, have the queue scheduled from reg_mr_callback() only after the requested MR has been added. This avoids the bad design of an in-rush of queue'd work doing back to back add_keys() until EAGAIN then sleeping. The add_keys() will be paced one at a time as they complete, slowly filling up the cache. Also, fix pending to be only manipulated under lock. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 41 ++++++++++++++++++---------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 21345d7bafcb..20f2bfeabee4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -705,6 +705,7 @@ struct mlx5_cache_ent { u32 page; u8 disabled:1; + u8 fill_to_high_water:1; /* * - available_mrs is the length of list head, ie the number of MRs diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 424ce3de3865..afacaf8981fa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -86,6 +86,7 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); +static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -134,11 +135,9 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) list_add_tail(&mr->list, &ent->head); ent->available_mrs++; ent->total_mrs++; + /* If we are doing fill_to_high_water then keep going. */ + queue_adjust_cache_locked(ent); ent->pending--; - /* - * Creating is always done in response to some demand, so do not call - * queue_adjust_cache_locked(). - */ spin_unlock_irqrestore(&ent->lock, flags); if (!completion_done(&ent->compl)) @@ -384,11 +383,29 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { lockdep_assert_held(&ent->lock); - if (ent->disabled) + if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) return; - if (ent->available_mrs < ent->limit || - ent->available_mrs > 2 * ent->limit) + if (ent->available_mrs < ent->limit) { + ent->fill_to_high_water = true; + queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit) { + /* + * Once we start populating due to hitting a low water mark + * continue until we pass the high water mark. + */ queue_work(ent->dev->cache.wq, &ent->work); + } else if (ent->available_mrs == 2 * ent->limit) { + ent->fill_to_high_water = false; + } else if (ent->available_mrs > 2 * ent->limit) { + /* Queue deletion of excess entries */ + ent->fill_to_high_water = false; + if (ent->pending) + queue_delayed_work(ent->dev->cache.wq, &ent->dwork, + msecs_to_jiffies(1000)); + else + queue_work(ent->dev->cache.wq, &ent->work); + } } static void __cache_work_func(struct mlx5_cache_ent *ent) @@ -401,11 +418,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (ent->disabled) goto out; - if (ent->available_mrs + ent->pending < 2 * ent->limit && + if (ent->fill_to_high_water && + ent->available_mrs + ent->pending < 2 * ent->limit && !READ_ONCE(dev->fill_delay)) { spin_unlock_irq(&ent->lock); err = add_keys(ent, 1); - spin_lock_irq(&ent->lock); if (ent->disabled) goto out; @@ -424,12 +441,6 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) msecs_to_jiffies(1000)); } } - /* - * Once we start populating due to hitting a low water mark - * continue until we pass the high water mark. - */ - if (ent->available_mrs + ent->pending < 2 * ent->limit) - queue_work(cache->wq, &ent->work); } else if (ent->available_mrs > 2 * ent->limit) { bool need_delay; From patchwork Tue Mar 10 08:22:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 11428711 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0A17B14B7 for ; Tue, 10 Mar 2020 08:23:22 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id D82142467F for ; Tue, 10 Mar 2020 08:23:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828601; bh=efQUZgIxixWfdX4FEOTHwy1x0BrLC2a4TF0W9n7dgn0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=kzQqDZ7LVezdy0BhOgPn0+2/a9kl9Ff4Yngc9KxRwnrX0mabySMBWl0pONSaKlg+E SgX47WcjTfyJaShdtSkk286q9a9VMjJHrVudeYNyRLkGso4rF88EcC/i3w4I085SpG DFT/tQVwmWFexQapx1eSPoELleJDt1JWUQ9tMq+Y= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726420AbgCJIXV (ORCPT ); Tue, 10 Mar 2020 04:23:21 -0400 Received: from mail.kernel.org ([198.145.29.99]:44936 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725919AbgCJIXV (ORCPT ); Tue, 10 Mar 2020 04:23:21 -0400 Received: from localhost (unknown [193.47.165.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 921C624677; Tue, 10 Mar 2020 08:23:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1583828600; bh=efQUZgIxixWfdX4FEOTHwy1x0BrLC2a4TF0W9n7dgn0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ozPo/wosLwniwPynj3+ZLw84ku8JzoRVCC2WAPEdg3tPvyTIzm+cjXcY4ejP3hvCw ZXYmWs8WsETXbilRK5MbhdH7/7n40+6Csumh24upYOw9V5q9uIGgpWSPzVissJULgy X+Lg3RTfHSxtbwxZUVSSOWPMj/3ABf0scnSi/XKY= From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Subject: [PATCH rdma-next v1 12/12] RDMA/mlx5: Allow MRs to be created in the cache synchronously Date: Tue, 10 Mar 2020 10:22:38 +0200 Message-Id: <20200310082238.239865-13-leon@kernel.org> X-Mailer: git-send-email 2.24.1 In-Reply-To: <20200310082238.239865-1-leon@kernel.org> References: <20200310082238.239865-1-leon@kernel.org> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe If the cache is completely out of MRs, and we are running in cache mode, then directly, and synchronously, create an MR that is compatible with the cache bucket using a sleeping mailbox command. This ensures that the thread that is waiting for the MR absolutely will get one. When a MR allocated in this way becomes freed then it is compatible with the cache bucket and will be recycled back into it. Deletes the very buggy ent->compl scheme to create a synchronous MR allocation. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 147 ++++++++++++++++----------- 2 files changed, 87 insertions(+), 61 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 20f2bfeabee4..aae88311e714 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -727,7 +727,6 @@ struct mlx5_cache_ent { struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; - struct completion compl; }; struct mlx5_mr_cache { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index afacaf8981fa..a401931189b7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -139,14 +139,34 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) queue_adjust_cache_locked(ent); ent->pending--; spin_unlock_irqrestore(&ent->lock, flags); +} + +static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) +{ + struct mlx5_ib_mr *mr; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return NULL; + mr->order = ent->order; + mr->cache_ent = ent; + mr->dev = ent->dev; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - if (!completion_done(&ent->compl)) - complete(&ent->compl); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, log_page_size, ent->page); + return mr; } +/* Asynchronously schedule new MRs to be populated in the cache. */ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -159,25 +179,11 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = alloc_cache_mr(ent, mkc); if (!mr) { err = -ENOMEM; break; } - mr->order = ent->order; - mr->cache_ent = ent; - mr->dev = ent->dev; - - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (ent->access_mode >> 2) & 0x7); - - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); - MLX5_SET(mkc, mkc, log_page_size, ent->page); - spin_lock_irq(&ent->lock); if (ent->pending >= MAX_PENDING_REG_MR) { err = -EAGAIN; @@ -205,6 +211,44 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) return err; } +/* Synchronously create a MR in the cache */ +static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) +{ + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + mr = alloc_cache_mr(ent, mkc); + if (!mr) { + err = -ENOMEM; + goto free_in; + } + + err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto free_mr; + + mr->mmkey.type = MLX5_MKEY_MR; + WRITE_ONCE(ent->dev->cache.last_add, jiffies); + spin_lock_irq(&ent->lock); + ent->total_mrs++; + spin_unlock_irq(&ent->lock); + kfree(in); + return mr; +free_mr: + kfree(mr); +free_in: + kfree(in); + return ERR_PTR(err); +} + static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; @@ -427,12 +471,12 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (ent->disabled) goto out; if (err) { - if (err == -EAGAIN) { - mlx5_ib_dbg(dev, "returned eagain, order %d\n", - ent->order); - queue_delayed_work(cache->wq, &ent->dwork, - msecs_to_jiffies(3)); - } else { + /* + * EAGAIN only happens if pending is positive, so we + * will be rescheduled from reg_mr_callback(). The only + * failure path here is ENOMEM. + */ + if (err != -EAGAIN) { mlx5_ib_warn( dev, "command failed order %d, err %d\n", @@ -495,36 +539,30 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - int err; if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); ent = &cache->ent[entry]; - while (1) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - - err = add_keys(ent, 1); - if (err && err != -EAGAIN) - return ERR_PTR(err); - - wait_for_completion(&ent->compl); - } else { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + mr = create_cache_mr(ent); + if (IS_ERR(mr)) return mr; - } + } else { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); } + return mr; } -static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent) +/* Return a MR already available in the cache */ +static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; @@ -676,7 +714,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->dev = dev; ent->limit = 0; - init_completion(&ent->compl); INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); @@ -939,26 +976,16 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order); struct mlx5_ib_mr *mr; - int err = 0; - int i; if (!ent) return ERR_PTR(-E2BIG); - for (i = 0; i < 1; i++) { - mr = alloc_cached_mr(ent); - if (mr) - break; - - err = add_keys(ent, 1); - if (err && err != -EAGAIN) { - mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); - break; - } + mr = get_cache_mr(ent); + if (!mr) { + mr = create_cache_mr(ent); + if (IS_ERR(mr)) + return mr; } - if (!mr) - return ERR_PTR(-EAGAIN); - mr->ibmr.pd = pd; mr->umem = umem; mr->access_flags = access_flags;