diff mbox series

[rdma-next,2/7] RDMA/mlx5: Fix cache entry update on dereg error

Message ID 97e979dff636f232ff4c83ce709c17c727da1fdb.1741875692.git.leon@kernel.org (mailing list archive)
State New
Headers show
Series Batch of mlx5_ib fixes | expand

Commit Message

Leon Romanovsky March 13, 2025, 2:29 p.m. UTC
From: Michael Guralnik <michaelgur@nvidia.com>

Fix double decrement of 'in_use' counter on push_mkey_locked() failure
while deregistering an MR.
If we fail to return an mkey to the cache in cache_ent_find_and_store()
it'll update the 'in_use' counter. Its caller, revoke_mr(), also updates
it, thus having double decrement.

Wrong value of 'in_use' counter will be exposed through debugfs and can
also cause wrong resizing of the cache when users try to set cache
entry size using the 'size' debugfs.

To address this issue, the 'in_use' counter is now decremented within
mlx5_revoke_mr() also after a successful call to
cache_ent_find_and_store() and not within cache_ent_find_and_store().
Other success or failure flows remains unchanged where it was also
decremented.

Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys")
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Leon Romanovsky March 18, 2025, 10:28 a.m. UTC | #1
On Thu, Mar 13, 2025 at 04:29:49PM +0200, Leon Romanovsky wrote:
> From: Michael Guralnik <michaelgur@nvidia.com>
> 
> Fix double decrement of 'in_use' counter on push_mkey_locked() failure
> while deregistering an MR.
> If we fail to return an mkey to the cache in cache_ent_find_and_store()
> it'll update the 'in_use' counter. Its caller, revoke_mr(), also updates
> it, thus having double decrement.
> 
> Wrong value of 'in_use' counter will be exposed through debugfs and can
> also cause wrong resizing of the cache when users try to set cache
> entry size using the 'size' debugfs.
> 
> To address this issue, the 'in_use' counter is now decremented within
> mlx5_revoke_mr() also after a successful call to
> cache_ent_find_and_store() and not within cache_ent_find_and_store().
> Other success or failure flows remains unchanged where it was also
> decremented.
> 
> Fixes: 8c1185fef68c ("RDMA/mlx5: Change check for cacheable mkeys")
> Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
> Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/hw/mlx5/mr.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

<...>

> @@ -2042,6 +2041,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
>  		ent = mr->mmkey.cache_ent;
>  		/* upon storing to a clean temp entry - schedule its cleanup */
>  		spin_lock_irq(&ent->mkeys_queue.lock);
> +		ent->in_use--;

This needs slightly different fix, fixed it locally.
@@ -2033,6 +2032,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
        struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
        struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
        bool is_odp = is_odp_mr(mr);
+       bool from_cache = !!ent;
        int ret = 0;

        if (is_odp)
@@ -2042,6 +2042,8 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
                ent = mr->mmkey.cache_ent;
                /* upon storing to a clean temp entry - schedule its cleanup */
                spin_lock_irq(&ent->mkeys_queue.lock);
+               if (from_cache)
+                       ent->in_use--;
                if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
                        mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
                                         msecs_to_jiffies(30 * 1000));


>  		if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
>  			mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
>  					 msecs_to_jiffies(30 * 1000));
> -- 
> 2.48.1
> 
>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1ffa4b3d0f76..cbab0240c7e5 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1967,7 +1967,6 @@  static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
 
 	if (mr->mmkey.cache_ent) {
 		spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
-		mr->mmkey.cache_ent->in_use--;
 		goto end;
 	}
 
@@ -2042,6 +2041,7 @@  static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
 		ent = mr->mmkey.cache_ent;
 		/* upon storing to a clean temp entry - schedule its cleanup */
 		spin_lock_irq(&ent->mkeys_queue.lock);
+		ent->in_use--;
 		if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
 			mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
 					 msecs_to_jiffies(30 * 1000));