diff mbox series

[rdma-next,2/8] RDMA/mlx5: Generalize mlx5_cache_cache_mr() to fit all cacheable mkeys

Message ID 20220908205421.210048-3-michaelgur@nvidia.com (mailing list archive)
State Superseded
Headers show
Series RDMA/mlx5: Switch MR cache to use RB-tree | expand

Commit Message

Michael Guralnik Sept. 8, 2022, 8:54 p.m. UTC
From: Aharon Landau <aharonl@nvidia.com>

All the mkeys that can use UMR, can be cached.
Instead of using reg_create() for mkeys that are not available in the
cache, generalize mlx5_cache_cache_mr() to fit all cacheable mkeys.
When a specific mkey request can not be satisfied by the available cache
mkeys, synchronously create an mkey with the requested properties.

On the following patches, I will introduce a mechanism to cache all
cacheable mkeys upon dereging the MR.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   6 +-
 drivers/infiniband/hw/mlx5/mr.c      | 146 +++++++++++++++------------
 drivers/infiniband/hw/mlx5/odp.c     |   9 +-
 3 files changed, 87 insertions(+), 74 deletions(-)

Comments

Jason Gunthorpe Sept. 9, 2022, 2:47 p.m. UTC | #1
On Thu, Sep 08, 2022 at 11:54:15PM +0300, Michael Guralnik wrote:
> From: Aharon Landau <aharonl@nvidia.com>
> 
> All the mkeys that can use UMR, can be cached.
> Instead of using reg_create() for mkeys that are not available in the
> cache, generalize mlx5_cache_cache_mr() to fit all cacheable mkeys.
> When a specific mkey request can not be satisfied by the available cache
> mkeys, synchronously create an mkey with the requested properties.

This doesn't seem right to me.

The point of the cache mkeys is this:

> -	if (!ent || ent->limit == 0 ||
> -	    !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
                                        ^^^^^

Cached mkeys are created with 0 access_flags, so any access_flags
asking for a change away that requires UMR cannot be in the cache.

> -		mutex_lock(&dev->slow_path_mutex);
> -		mr = reg_create(pd, umem, iova, access_flags, page_size, false);
> -		mutex_unlock(&dev->slow_path_mutex);
> -		return mr;
> -	}
>  
> -	mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
> +	ndescs = ib_umem_num_dma_blocks(umem, page_size);
> +
> +	mr = mlx5_mr_cache_alloc(dev, MLX5_MKC_ACCESS_MODE_MTT,
> +				 access_flags, ndescs);

So passing non-zero access_flags to a cache function that has no way
to record the access_flags in the cache is quite illogical.

Before any of this can be reworked the cache infrastructure has to be
fixed to record the exact details of the Mkeys it is holding,
specifically access flags.

But even if that is done, I don't think it makes sense to eliminate
the reg_create. Arguably once all mkeys are cachable we should
eliminate the mr_cache_alloc in the synchronous path instead.

Simply call reg_create always and on destruction put the mkey into the
proper place based on its size and access_flags.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d260132d8f45..cb8006b854e0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1312,9 +1312,9 @@  int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-				       struct mlx5_cache_ent *ent,
-				       int access_flags);
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, u8 access_mode,
+				       unsigned int access_flags,
+				       unsigned int ndescs);
 
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d8bdba0056a1..b8529f73b306 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -287,16 +287,18 @@  static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
 	return ret;
 }
 
-static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
+static void set_cache_mkc(struct mlx5_ib_dev *dev, u8 access_mode,
+			  unsigned int access_flags, unsigned int ndescs,
+			  void *mkc)
 {
-	set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
+	set_mkc_access_pd_addr_fields(mkc, access_flags, 0, dev->umrc.pd);
 	MLX5_SET(mkc, mkc, free, 1);
 	MLX5_SET(mkc, mkc, umr_en, 1);
-	MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
-	MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
+	MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+	MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
 
 	MLX5_SET(mkc, mkc, translations_octword_size,
-		 get_mkc_octo_size(ent->access_mode, ent->ndescs));
+		 get_mkc_octo_size(access_mode, ndescs));
 	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
 }
 
@@ -315,7 +317,7 @@  static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 			return -ENOMEM;
 		mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
 				   memory_key_mkey_entry);
-		set_cache_mkc(ent, mkc);
+		set_cache_mkc(ent->dev, ent->access_mode, 0, ent->ndescs, mkc);
 		async_create->ent = ent;
 
 		err = push_mkey(ent, true, NULL);
@@ -340,8 +342,10 @@  static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
 	return err;
 }
 
-/* Synchronously create a MR in the cache */
-static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
+/* Synchronously create a cacheable mkey */
+static int create_cache_mkey(struct mlx5_ib_dev *dev, u8 access_mode,
+			     unsigned int access_flags, unsigned int ndescs,
+			     struct mlx5_ib_mkey *mkey)
 {
 	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	void *mkc;
@@ -352,14 +356,9 @@  static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
 	if (!in)
 		return -ENOMEM;
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
-	set_cache_mkc(ent, mkc);
-
-	err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
-	if (err)
-		goto free_in;
+	set_cache_mkc(dev, access_mode, access_flags, ndescs, mkc);
 
-	WRITE_ONCE(ent->dev->cache.last_add, jiffies);
-free_in:
+	err = mlx5_ib_create_mkey(dev, mkey, in, inlen);
 	kfree(in);
 	return err;
 }
@@ -637,41 +636,80 @@  static void delayed_cache_work_func(struct work_struct *work)
 	__cache_work_func(ent);
 }
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-				       struct mlx5_cache_ent *ent,
-				       int access_flags)
+static bool mlx5_ent_get_mkey(struct mlx5_cache_ent *ent, struct mlx5_ib_mr *mr)
 {
-	struct mlx5_ib_mr *mr;
-	int err;
+	xa_lock_irq(&ent->mkeys);
+	if (!ent->stored) {
+		queue_adjust_cache_locked(ent);
+		ent->miss++;
+		xa_unlock_irq(&ent->mkeys);
+		return false;
+	}
+
+	mr->mmkey.key = pop_stored_mkey(ent);
+	mr->mmkey.ndescs = ent->ndescs;
+	mr->mmkey.cache_ent = ent;
+	queue_adjust_cache_locked(ent);
+	ent->in_use++;
+	xa_unlock_irq(&ent->mkeys);
+	return true;
+}
+
+static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
+							unsigned int order)
+{
+	struct mlx5_mkey_cache *cache = &dev->cache;
+
+	if (order < cache->ent[0].order)
+		return &cache->ent[0];
+	order = order - cache->ent[0].order;
+	if (order > MKEY_CACHE_LAST_STD_ENTRY)
+		return NULL;
+	return &cache->ent[order];
+}
+
+static bool mlx5_cache_get_mkey(struct mlx5_ib_dev *dev, u8 access_mode,
+				u8 access_flags, unsigned int ndescs,
+				struct mlx5_ib_mr *mr)
+{
+	struct mlx5_cache_ent *ent;
 
 	if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
-		return ERR_PTR(-EOPNOTSUPP);
+		return false;
+
+	if (access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+		ent = &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY];
+
+	ent = mkey_cache_ent_from_order(dev, order_base_2(ndescs));
+	if (!ent)
+		return false;
+
+	return mlx5_ent_get_mkey(ent, mr);
+}
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, u8 access_mode,
+				       unsigned int access_flags,
+				       unsigned int ndescs)
+{
+	struct mlx5_ib_mr *mr;
+	int err;
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	xa_lock_irq(&ent->mkeys);
-	ent->in_use++;
-
-	if (!ent->stored) {
-		queue_adjust_cache_locked(ent);
-		ent->miss++;
-		xa_unlock_irq(&ent->mkeys);
-		err = create_cache_mkey(ent, &mr->mmkey.key);
+	if (!mlx5_cache_get_mkey(dev, access_mode, access_flags, ndescs, mr)) {
+		/*
+		 * Didn't find an mkey in cache.
+		 * Create an mkey with the exact needed size.
+		 */
+		err = create_cache_mkey(dev, access_mode, access_flags, ndescs,
+					&mr->mmkey);
 		if (err) {
-			xa_lock_irq(&ent->mkeys);
-			ent->in_use--;
-			xa_unlock_irq(&ent->mkeys);
 			kfree(mr);
 			return ERR_PTR(err);
 		}
-	} else {
-		mr->mmkey.key = pop_stored_mkey(ent);
-		queue_adjust_cache_locked(ent);
-		xa_unlock_irq(&ent->mkeys);
 	}
-	mr->mmkey.cache_ent = ent;
 	mr->mmkey.type = MLX5_MKEY_MR;
 	init_waitqueue_head(&mr->mmkey.wait);
 	return mr;
@@ -876,19 +914,6 @@  static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
 	return MLX5_MAX_UMR_SHIFT;
 }
 
-static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
-							unsigned int order)
-{
-	struct mlx5_mkey_cache *cache = &dev->cache;
-
-	if (order < cache->ent[0].order)
-		return &cache->ent[0];
-	order = order - cache->ent[0].order;
-	if (order > MKEY_CACHE_LAST_STD_ENTRY)
-		return NULL;
-	return &cache->ent[order];
-}
-
 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 			  u64 length, int access_flags, u64 iova)
 {
@@ -916,9 +941,8 @@  static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 					     int access_flags)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct mlx5_cache_ent *ent;
+	unsigned int page_size, ndescs;
 	struct mlx5_ib_mr *mr;
-	unsigned int page_size;
 
 	if (umem->is_dmabuf)
 		page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
@@ -927,21 +951,11 @@  static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 						     0, iova);
 	if (WARN_ON(!page_size))
 		return ERR_PTR(-EINVAL);
-	ent = mkey_cache_ent_from_order(
-		dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
-	/*
-	 * Matches access in alloc_cache_mr(). If the MR can't come from the
-	 * cache then synchronously create an uncached one.
-	 */
-	if (!ent || ent->limit == 0 ||
-	    !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
-		mutex_lock(&dev->slow_path_mutex);
-		mr = reg_create(pd, umem, iova, access_flags, page_size, false);
-		mutex_unlock(&dev->slow_path_mutex);
-		return mr;
-	}
 
-	mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
+	ndescs = ib_umem_num_dma_blocks(umem, page_size);
+
+	mr = mlx5_mr_cache_alloc(dev, MLX5_MKC_ACCESS_MODE_MTT,
+				 access_flags, ndescs);
 	if (IS_ERR(mr))
 		return mr;
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e9a29adef7dc..f7c9eeaa8e79 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -418,8 +418,8 @@  static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
 	if (IS_ERR(odp))
 		return ERR_CAST(odp);
 
-	mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY],
-				 imr->access_flags);
+	mr = mlx5_mr_cache_alloc(dev, MLX5_MKC_ACCESS_MODE_MTT,
+				 imr->access_flags, MLX5_IMR_MTT_ENTRIES);
 	if (IS_ERR(mr)) {
 		ib_umem_odp_release(odp);
 		return mr;
@@ -493,9 +493,8 @@  struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 	if (IS_ERR(umem_odp))
 		return ERR_CAST(umem_odp);
 
-	imr = mlx5_mr_cache_alloc(dev,
-				  &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY],
-				  access_flags);
+	imr = mlx5_mr_cache_alloc(dev, MLX5_MKC_ACCESS_MODE_KSM, access_flags,
+				  mlx5_imr_ksm_entries);
 	if (IS_ERR(imr)) {
 		ib_umem_odp_release(umem_odp);
 		return imr;