diff mbox series

[rdma-rc,06/10] RDMA/mlx5: Fix mkey cache possible deadlock on cleanup

Message ID babba5ce5a995ced9ea35133dbc938d2a19510d2.1685960567.git.leon@kernel.org (mailing list archive)
State Rejected
Headers show
Series Batch of uverbs and mlx5_ib fixes | expand

Commit Message

Leon Romanovsky June 5, 2023, 10:33 a.m. UTC
From: Michael Guralnik <michaelgur@nvidia.com>

Move cancellation of delayed cache work that adds or removes mkeys to the
a separate iteration in the mkey cleanup so that we don't call
someone_adding() while holding the rb_lock.

Lockdep:
WARNING: possible circular locking dependency detected
 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted
 ------------------------------------------------------
 devlink/53872 is trying to acquire lock:
 ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900

 but task is already holding lock:
 ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib]

 which lock already depends on the new lock.


 the existing dependency chain (in reverse order) is:

 -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}:
        __mutex_lock+0x14c/0x15c0
        delayed_cache_work_func+0x2d1/0x610 [mlx5_ib]
        process_one_work+0x7c2/0x1310
        worker_thread+0x59d/0xec0
        kthread+0x28f/0x330
        ret_from_fork+0x1f/0x30

 -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}:
        __lock_acquire+0x2d8a/0x5fe0
        lock_acquire+0x1c1/0x540
        __flush_work+0xe8/0x900
        __cancel_work_timer+0x2c7/0x3f0
        mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib]
        mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib]
        __mlx5_ib_remove+0x68/0x120 [mlx5_ib]
        mlx5r_remove+0x63/0x80 [mlx5_ib]
        auxiliary_bus_remove+0x52/0x70
        device_release_driver_internal+0x3c1/0x600
        bus_remove_device+0x2a5/0x560
        device_del+0x492/0xb80
        mlx5_detach_device+0x1a9/0x360 [mlx5_core]
        mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core]
        mlx5_devlink_reload_down+0x292/0x580 [mlx5_core]
        devlink_reload+0x439/0x590
        devlink_nl_cmd_reload+0xaef/0xff0
        genl_family_rcv_msg_doit.isra.0+0x1bd/0x290
        genl_rcv_msg+0x3ca/0x6c0
        netlink_rcv_skb+0x12c/0x360
        genl_rcv+0x24/0x40
        netlink_unicast+0x438/0x710
        netlink_sendmsg+0x7a1/0xca0
        sock_sendmsg+0xc5/0x190
        __sys_sendto+0x1bc/0x290
        __x64_sys_sendto+0xdc/0x1b0
        do_syscall_64+0x3d/0x90
        entry_SYSCALL_64_after_hwframe+0x46/0xb0

 other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&dev->cache.rb_lock);
                                lock((work_completion)(&(&ent->dwork)->work));
                                lock(&dev->cache.rb_lock);
   lock((work_completion)(&(&ent->dwork)->work));

  *** DEADLOCK ***

 6 locks held by devlink/53872:
  #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40
  #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0
  #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core]
  #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core]
  #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600
  #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib]

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

Comments

Jason Gunthorpe June 5, 2023, 4:54 p.m. UTC | #1
On Mon, Jun 05, 2023 at 01:33:22PM +0300, Leon Romanovsky wrote:

> diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> index 1ce48e485c5b..f113656e4027 100644
> --- a/drivers/infiniband/hw/mlx5/mr.c
> +++ b/drivers/infiniband/hw/mlx5/mr.c
> @@ -1033,7 +1033,15 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
>  		xa_lock_irq(&ent->mkeys);
>  		ent->disabled = true;
>  		xa_unlock_irq(&ent->mkeys);
> -		cancel_delayed_work_sync(&ent->dwork);
> +	}
> +
> +	/* Run the canceling of delayed works on the cache in a separate loop after
> +	 * disabling all entries to ensure someone_adding() will not try taking the
> +	 * rb_lock while flushing the workqueue.
> +	 */
> +	for (node = rb_first(root); node; node = rb_next(node)) {
> +		ent = rb_entry(node, struct mlx5_cache_ent, node);
> +		cancel_delayed_work(&ent->dwork);
>  	}
>
This goes on to kfree end, so this can't drop the sync.

Jason
Leon Romanovsky June 6, 2023, 5:50 a.m. UTC | #2
On Mon, Jun 05, 2023 at 01:54:38PM -0300, Jason Gunthorpe wrote:
> On Mon, Jun 05, 2023 at 01:33:22PM +0300, Leon Romanovsky wrote:
> 
> > diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
> > index 1ce48e485c5b..f113656e4027 100644
> > --- a/drivers/infiniband/hw/mlx5/mr.c
> > +++ b/drivers/infiniband/hw/mlx5/mr.c
> > @@ -1033,7 +1033,15 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
> >  		xa_lock_irq(&ent->mkeys);
> >  		ent->disabled = true;
> >  		xa_unlock_irq(&ent->mkeys);
> > -		cancel_delayed_work_sync(&ent->dwork);
> > +	}
> > +
> > +	/* Run the canceling of delayed works on the cache in a separate loop after
> > +	 * disabling all entries to ensure someone_adding() will not try taking the
> > +	 * rb_lock while flushing the workqueue.
> > +	 */
> > +	for (node = rb_first(root); node; node = rb_next(node)) {
> > +		ent = rb_entry(node, struct mlx5_cache_ent, node);
> > +		cancel_delayed_work(&ent->dwork);
> >  	}
> >
> This goes on to kfree end, so this can't drop the sync.

with _sync, we will get same code as it was before.
Let's put this patch aside.

Thanks

> 
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1ce48e485c5b..f113656e4027 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1033,7 +1033,15 @@  void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
 		xa_lock_irq(&ent->mkeys);
 		ent->disabled = true;
 		xa_unlock_irq(&ent->mkeys);
-		cancel_delayed_work_sync(&ent->dwork);
+	}
+
+	/* Run the canceling of delayed works on the cache in a separate loop after
+	 * disabling all entries to ensure someone_adding() will not try taking the
+	 * rb_lock while flushing the workqueue.
+	 */
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		ent = rb_entry(node, struct mlx5_cache_ent, node);
+		cancel_delayed_work(&ent->dwork);
 	}
 
 	mlx5_mkey_cache_debugfs_cleanup(dev);