diff mbox series

[v2,rdma-next,4/6] RDMA/mlx5: Introduce mlx5r_cache_rb_key

Message ID 20221207085752.82458-5-michaelgur@nvidia.com (mailing list archive)
State Superseded
Headers show
Series RDMA/mlx5: Switch MR cache to use RB-tree | expand

Commit Message

Michael Guralnik Dec. 7, 2022, 8:57 a.m. UTC
Switch from using the mkey order to using the new struct as the key to
the RB tree of cache entries.
The key is all the mkey properties that UMR operations can't modify.
Using this key to define the cache entries and to search and create
cache mkeys.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  32 +++--
 drivers/infiniband/hw/mlx5/mr.c      | 196 +++++++++++++++++++--------
 drivers/infiniband/hw/mlx5/odp.c     |  26 ++--
 3 files changed, 180 insertions(+), 74 deletions(-)

Comments

Jason Gunthorpe Dec. 8, 2022, 12:39 a.m. UTC | #1
On Wed, Dec 07, 2022 at 10:57:50AM +0200, Michael Guralnik wrote:
> Switch from using the mkey order to using the new struct as the key to
> the RB tree of cache entries.
> The key is all the mkey properties that UMR operations can't modify.
> Using this key to define the cache entries and to search and create
> cache mkeys.
> 
> Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
> ---
>  drivers/infiniband/hw/mlx5/mlx5_ib.h |  32 +++--
>  drivers/infiniband/hw/mlx5/mr.c      | 196 +++++++++++++++++++--------
>  drivers/infiniband/hw/mlx5/odp.c     |  26 ++--
>  3 files changed, 180 insertions(+), 74 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> index 10e22fb01e1b..d795e9fc2c2f 100644
> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
> @@ -731,17 +731,26 @@ struct umr_common {
>  	unsigned int state;
>  };
>  
> +struct mlx5r_cache_rb_key {
> +	u8 ats:1;
> +	unsigned int access_mode;
> +	unsigned int access_flags;
> +	/*
> +	 * keep ndescs as the last member so entries with about the same ndescs
> +	 * will be close in the tree
> +	 */

? How does this happen? The compare function doesn't use memcmp..

I think this comment should go in the compare function because the
search function does this:

> -	return smallest;
> +	return (smallest &&
> +		smallest->rb_key.access_mode == rb_key.access_mode &&
> +		smallest->rb_key.access_flags == rb_key.access_flags &&
> +		smallest->rb_key.ats == rb_key.ats) ?
> +		       smallest :
> +		       NULL;

So it isn't that they have to be close in the tree, it is that
"smallest" has to find a matching mode/flags/ats before finding the
smallest ndescs of the matching list. Thus ndescs must always by the
last thing in the compare ladder.

> +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
> +				       int access_flags, int access_mode,
> +				       int ndescs)
> +{
> +	struct mlx5r_cache_rb_key rb_key = {
> +		.ndescs = ndescs,
> +		.access_mode = access_mode,
> +		.access_flags = get_unchangeable_access_flags(dev, access_flags)
> +	};
> +	struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
> +	if (!ent)

Missing newline

>  struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
> -					      int order)
> +					      struct mlx5r_cache_rb_key rb_key,
> +					      bool debugfs)
>  {
>  	struct mlx5_cache_ent *ent;
>  	int ret;
> @@ -808,7 +873,10 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
>  		return ERR_PTR(-ENOMEM);
>  
>  	xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
> -	ent->order = order;
> +	ent->rb_key.ats = rb_key.ats;
> +	ent->rb_key.access_mode = rb_key.access_mode;
> +	ent->rb_key.access_flags = rb_key.access_flags;
> +	ent->rb_key.ndescs = rb_key.ndescs;

ent->rb_key = rb_key

>  int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
>  {
> +	struct mlx5r_cache_rb_key rb_key = {
> +		.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
> +	};
>  	struct mlx5_mkey_cache *cache = &dev->cache;
>  	struct mlx5_cache_ent *ent;
>  	int i;
> @@ -838,19 +913,26 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
>  
>  	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
>  	timer_setup(&dev->delay_timer, delay_time_func, 0);
> +	mlx5_mkey_cache_debugfs_init(dev);
>  	for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
> -		ent = mlx5r_cache_create_ent(dev, i);
> -
> -		if (i > MKEY_CACHE_LAST_STD_ENTRY) {
> -			mlx5_odp_init_mkey_cache_entry(ent);
> +		if (i > mkey_cache_max_order(dev))
>  			continue;

This is goofy, just make the for loop go from 2 to
mkey_cache_max_order() + 2 (and probably have the function do the + 2
internally)

Get rid of MAX_MKEY_CACHE_ENTRIES
> +
> +		if (i == MLX5_IMR_KSM_CACHE_ENTRY) {
> +			ent = mlx5_odp_init_mkey_cache_entry(dev);
> +			if (!ent)
> +				continue;

This too, just call mlx5_odp_init_mkey_cache_entry() outside the loop

And rename it to something like mlx5_odp_init_mkey_cache(), and don't
return ent.

Set ent->limit inside mlx5r_cache_create_ent()

And run over the whole rbtree in a final loop to do the final
queue_adjust_cache_locked() step.

> -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
> +struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev)
>  {
> -	if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
> -		return;
> -	ent->ndescs = mlx5_imr_ksm_entries;
> -	ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
> +	struct mlx5r_cache_rb_key rb_key = {
> +		.ats = 0,
> +		.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
> +		.access_flags = 0,
> +		.ndescs = mlx5_imr_ksm_entries,

Don't need to zero init things here

Jason
Michael Guralnik Dec. 13, 2022, 12:12 p.m. UTC | #2
On 12/8/2022 2:39 AM, Jason Gunthorpe wrote:
> On Wed, Dec 07, 2022 at 10:57:50AM +0200, Michael Guralnik wrote:
>> Switch from using the mkey order to using the new struct as the key to
>> the RB tree of cache entries.
>> The key is all the mkey properties that UMR operations can't modify.
>> Using this key to define the cache entries and to search and create
>> cache mkeys.
>>
>> Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
>> ---
>>   drivers/infiniband/hw/mlx5/mlx5_ib.h |  32 +++--
>>   drivers/infiniband/hw/mlx5/mr.c      | 196 +++++++++++++++++++--------
>>   drivers/infiniband/hw/mlx5/odp.c     |  26 ++--
>>   3 files changed, 180 insertions(+), 74 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
>> index 10e22fb01e1b..d795e9fc2c2f 100644
>> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
>> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
>> @@ -731,17 +731,26 @@ struct umr_common {
>>   	unsigned int state;
>>   };
>>   
>> +struct mlx5r_cache_rb_key {
>> +	u8 ats:1;
>> +	unsigned int access_mode;
>> +	unsigned int access_flags;
>> +	/*
>> +	 * keep ndescs as the last member so entries with about the same ndescs
>> +	 * will be close in the tree
>> +	 */
> ? How does this happen? The compare function doesn't use memcmp..
>
> I think this comment should go in the compare function because the
> search function does this:
>
>> -	return smallest;
>> +	return (smallest &&
>> +		smallest->rb_key.access_mode == rb_key.access_mode &&
>> +		smallest->rb_key.access_flags == rb_key.access_flags &&
>> +		smallest->rb_key.ats == rb_key.ats) ?
>> +		       smallest :
>> +		       NULL;
> So it isn't that they have to be close in the tree, it is that
> "smallest" has to find a matching mode/flags/ats before finding the
> smallest ndescs of the matching list. Thus ndescs must always by the
> last thing in the compare ladder.
Correct, I'll move the comment to the compare function.
We've had a previous version of the compare that used memcmp but we've 
changed it so now the comment is not relevant in the struct anymore
I'll fix this and rest of the comments in v3.

Thanks
Michael

>
>> +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
>> +				       int access_flags, int access_mode,
>> +				       int ndescs)
>> +{
>> +	struct mlx5r_cache_rb_key rb_key = {
>> +		.ndescs = ndescs,
>> +		.access_mode = access_mode,
>> +		.access_flags = get_unchangeable_access_flags(dev, access_flags)
>> +	};
>> +	struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
>> +	if (!ent)
> Missing newline
>
>>   struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
>> -					      int order)
>> +					      struct mlx5r_cache_rb_key rb_key,
>> +					      bool debugfs)
>>   {
>>   	struct mlx5_cache_ent *ent;
>>   	int ret;
>> @@ -808,7 +873,10 @@ struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
>>   		return ERR_PTR(-ENOMEM);
>>   
>>   	xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
>> -	ent->order = order;
>> +	ent->rb_key.ats = rb_key.ats;
>> +	ent->rb_key.access_mode = rb_key.access_mode;
>> +	ent->rb_key.access_flags = rb_key.access_flags;
>> +	ent->rb_key.ndescs = rb_key.ndescs;
> ent->rb_key = rb_key
>
>>   int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
>>   {
>> +	struct mlx5r_cache_rb_key rb_key = {
>> +		.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
>> +	};
>>   	struct mlx5_mkey_cache *cache = &dev->cache;
>>   	struct mlx5_cache_ent *ent;
>>   	int i;
>> @@ -838,19 +913,26 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
>>   
>>   	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
>>   	timer_setup(&dev->delay_timer, delay_time_func, 0);
>> +	mlx5_mkey_cache_debugfs_init(dev);
>>   	for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
>> -		ent = mlx5r_cache_create_ent(dev, i);
>> -
>> -		if (i > MKEY_CACHE_LAST_STD_ENTRY) {
>> -			mlx5_odp_init_mkey_cache_entry(ent);
>> +		if (i > mkey_cache_max_order(dev))
>>   			continue;
> This is goofy, just make the for loop go from 2 to
> mkey_cache_max_order() + 2 (and probably have the function do the + 2
> internally)
>
> Get rid of MAX_MKEY_CACHE_ENTRIES
>> +
>> +		if (i == MLX5_IMR_KSM_CACHE_ENTRY) {
>> +			ent = mlx5_odp_init_mkey_cache_entry(dev);
>> +			if (!ent)
>> +				continue;
> This too, just call mlx5_odp_init_mkey_cache_entry() outside the loop
>
> And rename it to something like mlx5_odp_init_mkey_cache(), and don't
> return ent.
>
> Set ent->limit inside mlx5r_cache_create_ent()
>
> And run over the whole rbtree in a final loop to do the final
> queue_adjust_cache_locked() step.
>
>> -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
>> +struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev)
>>   {
>> -	if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
>> -		return;
>> -	ent->ndescs = mlx5_imr_ksm_entries;
>> -	ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
>> +	struct mlx5r_cache_rb_key rb_key = {
>> +		.ats = 0,
>> +		.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
>> +		.access_flags = 0,
>> +		.ndescs = mlx5_imr_ksm_entries,
> Don't need to zero init things here
>
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 10e22fb01e1b..d795e9fc2c2f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -731,17 +731,26 @@  struct umr_common {
 	unsigned int state;
 };
 
+struct mlx5r_cache_rb_key {
+	u8 ats:1;
+	unsigned int access_mode;
+	unsigned int access_flags;
+	/*
+	 * keep ndescs as the last member so entries with about the same ndescs
+	 * will be close in the tree
+	 */
+	unsigned int ndescs;
+};
+
 struct mlx5_cache_ent {
 	struct xarray		mkeys;
 	unsigned long		stored;
 	unsigned long		reserved;
 
 	char                    name[4];
-	u32                     order;
-	u32			access_mode;
-	unsigned int		ndescs;
 
 	struct rb_node		node;
+	struct mlx5r_cache_rb_key rb_key;
 
 	u8 disabled:1;
 	u8 fill_to_high_water:1;
@@ -1320,14 +1329,13 @@  int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
 struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-					      int order);
+					      struct mlx5r_cache_rb_key rb_key,
+					      bool debugfs);
 
 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-				       struct mlx5_cache_ent *ent,
-				       int access_flags);
+				       int access_flags, int access_mode,
+				       int ndescs);
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev, u32 order,
-					     int access_flags);
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -1350,7 +1358,7 @@  int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
 void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
 int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
+struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev);
 void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 			   struct mlx5_ib_mr *mr, int flags);
 
@@ -1369,7 +1377,11 @@  static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
 static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
 static inline void mlx5_ib_odp_cleanup(void)				    {}
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline struct mlx5_cache_ent *
+mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev)
+{
+	return NULL;
+}
 static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 					 struct mlx5_ib_mr *mr, int flags) {}
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index f35022067769..6531e38ef4ec 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -292,11 +292,13 @@  static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
 	set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
 	MLX5_SET(mkc, mkc, free, 1);
 	MLX5_SET(mkc, mkc, umr_en, 1);
-	MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
-	MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
+	MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
+	MLX5_SET(mkc, mkc, access_mode_4_2,
+		(ent->rb_key.access_mode >> 2) & 0x7);
 
 	MLX5_SET(mkc, mkc, translations_octword_size,
-		 get_mkc_octo_size(ent->access_mode, ent->ndescs));
+		 get_mkc_octo_size(ent->rb_key.access_mode,
+				   ent->rb_key.ndescs));
 	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
 }
 
@@ -594,8 +596,8 @@  static void __cache_work_func(struct mlx5_cache_ent *ent)
 			if (err != -EAGAIN) {
 				mlx5_ib_warn(
 					dev,
-					"command failed order %d, err %d\n",
-					ent->order, err);
+					"add keys command failed, err %d\n",
+					err);
 				queue_delayed_work(cache->wq, &ent->dwork,
 						   msecs_to_jiffies(1000));
 			}
@@ -641,22 +643,44 @@  static void delayed_cache_work_func(struct work_struct *work)
 	__cache_work_func(ent);
 }
 
+static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
+			     struct mlx5r_cache_rb_key key2)
+{
+	int res;
+
+	res = key1.ats - key2.ats;
+	if (res)
+		return res;
+
+	res = key1.access_mode - key2.access_mode;
+	if (res)
+		return res;
+
+	res = key1.access_flags - key2.access_flags;
+	if (res)
+		return res;
+
+	return key1.ndescs - key2.ndescs;
+}
+
 static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
 				 struct mlx5_cache_ent *ent)
 {
 	struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
 	struct mlx5_cache_ent *cur;
+	int cmp;
 
 	mutex_lock(&cache->rb_lock);
 	/* Figure out where to put new node */
 	while (*new) {
 		cur = rb_entry(*new, struct mlx5_cache_ent, node);
 		parent = *new;
-		if (ent->order < cur->order)
+		cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key);
+		if (cmp > 0)
 			new = &((*new)->rb_left);
-		if (ent->order > cur->order)
+		if (cmp < 0)
 			new = &((*new)->rb_right);
-		if (ent->order == cur->order) {
+		if (cmp == 0) {
 			mutex_unlock(&cache->rb_lock);
 			return -EEXIST;
 		}
@@ -670,40 +694,45 @@  static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
 	return 0;
 }
 
-static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev,
-							unsigned int order)
+static struct mlx5_cache_ent *
+mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
+			   struct mlx5r_cache_rb_key rb_key)
 {
 	struct rb_node *node = dev->cache.rb_root.rb_node;
 	struct mlx5_cache_ent *cur, *smallest = NULL;
+	int cmp;
 
 	/*
 	 * Find the smallest ent with order >= requested_order.
 	 */
 	while (node) {
 		cur = rb_entry(node, struct mlx5_cache_ent, node);
-		if (cur->order > order) {
+		cmp = cache_ent_key_cmp(cur->rb_key, rb_key);
+		if (cmp > 0) {
 			smallest = cur;
 			node = node->rb_left;
 		}
-		if (cur->order < order)
+		if (cmp < 0)
 			node = node->rb_right;
-		if (cur->order == order)
+		if (cmp == 0)
 			return cur;
 	}
 
-	return smallest;
+	return (smallest &&
+		smallest->rb_key.access_mode == rb_key.access_mode &&
+		smallest->rb_key.access_flags == rb_key.access_flags &&
+		smallest->rb_key.ats == rb_key.ats) ?
+		       smallest :
+		       NULL;
 }
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
-				       struct mlx5_cache_ent *ent,
-				       int access_flags)
+static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+					struct mlx5_cache_ent *ent,
+					int access_flags)
 {
 	struct mlx5_ib_mr *mr;
 	int err;
 
-	if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
-		return ERR_PTR(-EOPNOTSUPP);
-
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
@@ -734,12 +763,43 @@  struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
 	return mr;
 }
 
-struct mlx5_ib_mr *mlx5_mr_cache_alloc_order(struct mlx5_ib_dev *dev,
-					     u32 order, int access_flags)
+static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev,
+					 int access_flags)
 {
-	struct mlx5_cache_ent *ent = mkey_cache_ent_from_order(dev, order);
+	int ret = 0;
+
+	if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	    MLX5_CAP_GEN(dev->mdev, atomic) &&
+	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+		ret |= IB_ACCESS_REMOTE_ATOMIC;
 
-	return mlx5_mr_cache_alloc(dev, ent, access_flags);
+	if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+		ret |= IB_ACCESS_RELAXED_ORDERING;
+
+	if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+	    MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+		ret |= IB_ACCESS_RELAXED_ORDERING;
+
+	return ret;
+}
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+				       int access_flags, int access_mode,
+				       int ndescs)
+{
+	struct mlx5r_cache_rb_key rb_key = {
+		.ndescs = ndescs,
+		.access_mode = access_mode,
+		.access_flags = get_unchangeable_access_flags(dev, access_flags)
+	};
+	struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
+	if (!ent)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return _mlx5_mr_cache_alloc(dev, ent, access_flags);
 }
 
 static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
@@ -766,28 +826,32 @@  static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 	dev->cache.fs_root = NULL;
 }
 
+static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
+					    struct mlx5_cache_ent *ent)
+{
+	int order = order_base_2(ent->rb_key.ndescs);
+	struct dentry *dir;
+
+	if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+		order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
+
+	sprintf(ent->name, "%d", order);
+	dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
+	debugfs_create_file("size", 0600, dir, ent, &size_fops);
+	debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
+	debugfs_create_ulong("cur", 0400, dir, &ent->stored);
+	debugfs_create_u32("miss", 0600, dir, &ent->miss);
+}
+
 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
 {
+	struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev);
 	struct mlx5_mkey_cache *cache = &dev->cache;
-	struct mlx5_cache_ent *ent;
-	struct dentry *dir;
-	int i;
 
 	if (!mlx5_debugfs_root || dev->is_rep)
 		return;
 
-	dir = mlx5_debugfs_get_dev_root(dev->mdev);
-	cache->fs_root = debugfs_create_dir("mr_cache", dir);
-
-	for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
-		ent = mkey_cache_ent_from_order(dev, i);
-		sprintf(ent->name, "%d", ent->order);
-		dir = debugfs_create_dir(ent->name, cache->fs_root);
-		debugfs_create_file("size", 0600, dir, ent, &size_fops);
-		debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
-		debugfs_create_ulong("cur", 0400, dir, &ent->stored);
-		debugfs_create_u32("miss", 0600, dir, &ent->miss);
-	}
+	cache->fs_root = debugfs_create_dir("mr_cache", dbg_root);
 }
 
 static void delay_time_func(struct timer_list *t)
@@ -798,7 +862,8 @@  static void delay_time_func(struct timer_list *t)
 }
 
 struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
-					      int order)
+					      struct mlx5r_cache_rb_key rb_key,
+					      bool debugfs)
 {
 	struct mlx5_cache_ent *ent;
 	int ret;
@@ -808,7 +873,10 @@  struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
 		return ERR_PTR(-ENOMEM);
 
 	xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
-	ent->order = order;
+	ent->rb_key.ats = rb_key.ats;
+	ent->rb_key.access_mode = rb_key.access_mode;
+	ent->rb_key.access_flags = rb_key.access_flags;
+	ent->rb_key.ndescs = rb_key.ndescs;
 	ent->dev = dev;
 
 	INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
@@ -818,11 +886,18 @@  struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev,
 		kfree(ent);
 		return ERR_PTR(ret);
 	}
+
+	if (debugfs)
+		mlx5_mkey_cache_debugfs_add_ent(dev, ent);
+
 	return ent;
 }
 
 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
 {
+	struct mlx5r_cache_rb_key rb_key = {
+		.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+	};
 	struct mlx5_mkey_cache *cache = &dev->cache;
 	struct mlx5_cache_ent *ent;
 	int i;
@@ -838,19 +913,26 @@  int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
 
 	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
 	timer_setup(&dev->delay_timer, delay_time_func, 0);
+	mlx5_mkey_cache_debugfs_init(dev);
 	for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
-		ent = mlx5r_cache_create_ent(dev, i);
-
-		if (i > MKEY_CACHE_LAST_STD_ENTRY) {
-			mlx5_odp_init_mkey_cache_entry(ent);
+		if (i > mkey_cache_max_order(dev))
 			continue;
+
+		if (i == MLX5_IMR_KSM_CACHE_ENTRY) {
+			ent = mlx5_odp_init_mkey_cache_entry(dev);
+			if (!ent)
+				continue;
+		}
+		else {
+			rb_key.ndescs = 1 << (i + 2);
+			ent = mlx5r_cache_create_ent(dev, rb_key, true);
 		}
 
-		if (ent->order > mkey_cache_max_order(dev))
-			continue;
+		if (IS_ERR(ent)) {
+			mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
+			return PTR_ERR(ent);
+		}
 
-		ent->ndescs = 1 << ent->order;
-		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 		if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
 		    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
 		    mlx5r_umr_can_load_pas(dev, 0))
@@ -862,8 +944,6 @@  int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
 		xa_unlock_irq(&ent->mkeys);
 	}
 
-	mlx5_mkey_cache_debugfs_init(dev);
-
 	return 0;
 }
 
@@ -995,6 +1075,9 @@  static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 					     struct ib_umem *umem, u64 iova,
 					     int access_flags)
 {
+	struct mlx5r_cache_rb_key rb_key = {
+		.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+	};
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_cache_ent *ent;
 	struct mlx5_ib_mr *mr;
@@ -1007,8 +1090,11 @@  static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 						     0, iova);
 	if (WARN_ON(!page_size))
 		return ERR_PTR(-EINVAL);
-	ent = mkey_cache_ent_from_order(
-		dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
+
+	rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
+	rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
+	rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
+	ent = mkey_cache_ent_from_rb_key(dev, rb_key);
 	/*
 	 * Matches access in alloc_cache_mr(). If the MR can't come from the
 	 * cache then synchronously create an uncached one.
@@ -1022,7 +1108,7 @@  static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 		return mr;
 	}
 
-	mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
+	mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
 	if (IS_ERR(mr))
 		return mr;
 
@@ -1451,7 +1537,7 @@  static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
 		mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
 	if (WARN_ON(!*page_size))
 		return false;
-	return (1ULL << mr->mmkey.cache_ent->order) >=
+	return (mr->mmkey.cache_ent->rb_key.ndescs) >=
 	       ib_umem_num_dma_blocks(new_umem, *page_size);
 }
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index c41e091618ce..90de87ba3b96 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -406,7 +406,6 @@  static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
 						unsigned long idx)
 {
-	int order = order_base_2(MLX5_IMR_MTT_ENTRIES);
 	struct mlx5_ib_dev *dev = mr_to_mdev(imr);
 	struct ib_umem_odp *odp;
 	struct mlx5_ib_mr *mr;
@@ -419,7 +418,9 @@  static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
 	if (IS_ERR(odp))
 		return ERR_CAST(odp);
 
-	mr = mlx5_mr_cache_alloc_order(dev, order, imr->access_flags);
+	mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
+				 MLX5_MKC_ACCESS_MODE_MTT,
+				 MLX5_IMR_MTT_ENTRIES);
 	if (IS_ERR(mr)) {
 		ib_umem_odp_release(odp);
 		return mr;
@@ -493,8 +494,8 @@  struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 	if (IS_ERR(umem_odp))
 		return ERR_CAST(umem_odp);
 
-	imr = mlx5_mr_cache_alloc_order(dev, MLX5_IMR_KSM_CACHE_ENTRY,
-					access_flags);
+	imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+				  mlx5_imr_ksm_entries);
 	if (IS_ERR(imr)) {
 		ib_umem_odp_release(umem_odp);
 		return imr;
@@ -1587,12 +1588,19 @@  mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 	return err;
 }
 
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
+struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev)
 {
-	if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
-		return;
-	ent->ndescs = mlx5_imr_ksm_entries;
-	ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
+	struct mlx5r_cache_rb_key rb_key = {
+		.ats = 0,
+		.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
+		.access_flags = 0,
+		.ndescs = mlx5_imr_ksm_entries,
+	};
+
+	if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+		return NULL;
+
+	return mlx5r_cache_create_ent(dev, rb_key, true);
 }
 
 static const struct ib_device_ops mlx5_ib_dev_odp_ops = {