diff mbox series

[RESEND] Avoid that SCSI device removal through sysfs triggers a deadlock

Message ID 20180725173828.2227-1-bart.vanassche@wdc.com (mailing list archive)
State Changes Requested
Headers show
Series [RESEND] Avoid that SCSI device removal through sysfs triggers a deadlock | expand

Commit Message

Bart Van Assche July 25, 2018, 5:38 p.m. UTC
This patch avoids that self-removal triggers the following deadlock:

Comments

Martin K. Petersen July 26, 2018, 1:47 a.m. UTC | #1
Bart,

> This patch avoids that self-removal triggers the following deadlock:

Somebody please review this so we can get it merged.
Johannes Thumshirn July 26, 2018, 11:46 a.m. UTC | #2
From my limited insight into this:

Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Jack Wang July 26, 2018, 12:50 p.m. UTC | #3
Bart Van Assche <bart.vanassche@wdc.com> 于2018年7月25日周三 下午7:39写道:
>
> This patch avoids that self-removal triggers the following deadlock:
>
> ======================================================
> WARNING: possible circular locking dependency detected
> 4.18.0-rc2-dbg+ #5 Not tainted
> ------------------------------------------------------
> modprobe/6539 is trying to acquire lock:
> 000000008323c4cd (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x45/0x90
>
> but task is already holding lock:
> 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod]
>
> which lock already depends on the new lock.
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (&shost->scan_mutex){+.+.}:
>        __mutex_lock+0xfe/0xc70
>        mutex_lock_nested+0x1b/0x20
>        scsi_remove_device+0x26/0x40 [scsi_mod]
>        sdev_store_delete+0x27/0x30 [scsi_mod]
>        dev_attr_store+0x3e/0x50
>        sysfs_kf_write+0x87/0xa0
>        kernfs_fop_write+0x190/0x230
>        __vfs_write+0xd2/0x3b0
>        vfs_write+0x101/0x270
>        ksys_write+0xab/0x120
>        __x64_sys_write+0x43/0x50
>        do_syscall_64+0x77/0x230
>        entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> -> #0 (kn->count#202){++++}:
>        lock_acquire+0xd2/0x260
>        __kernfs_remove+0x424/0x4a0
>        kernfs_remove_by_name_ns+0x45/0x90
>        remove_files.isra.1+0x3a/0x90
>        sysfs_remove_group+0x5c/0xc0
>        sysfs_remove_groups+0x39/0x60
>        device_remove_attrs+0x82/0xb0
>        device_del+0x251/0x580
>        __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
>        scsi_forget_host+0x37/0xb0 [scsi_mod]
>        scsi_remove_host+0x9b/0x150 [scsi_mod]
>        sdebug_driver_remove+0x4b/0x150 [scsi_debug]
>        device_release_driver_internal+0x241/0x360
>        device_release_driver+0x12/0x20
>        bus_remove_device+0x1bc/0x290
>        device_del+0x259/0x580
>        device_unregister+0x1a/0x70
>        sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
>        scsi_debug_exit+0x76/0xe8 [scsi_debug]
>        __x64_sys_delete_module+0x1c1/0x280
>        do_syscall_64+0x77/0x230
>        entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> other info that might help us debug this:
>
>  Possible unsafe locking scenario:
>
>        CPU0                    CPU1
>        ----                    ----
>   lock(&shost->scan_mutex);
>                                lock(kn->count#202);
>                                lock(&shost->scan_mutex);
>   lock(kn->count#202);
>
>  *** DEADLOCK ***
>
> 2 locks held by modprobe/6539:
>  #0: 00000000efaf9298 (&dev->mutex){....}, at: device_release_driver_internal+0x68/0x360
>  #1: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod]
>
> stack backtrace:
> CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
> Call Trace:
>  dump_stack+0xa4/0xf5
>  print_circular_bug.isra.34+0x213/0x221
>  __lock_acquire+0x1a7e/0x1b50
>  lock_acquire+0xd2/0x260
>  __kernfs_remove+0x424/0x4a0
>  kernfs_remove_by_name_ns+0x45/0x90
>  remove_files.isra.1+0x3a/0x90
>  sysfs_remove_group+0x5c/0xc0
>  sysfs_remove_groups+0x39/0x60
>  device_remove_attrs+0x82/0xb0
>  device_del+0x251/0x580
>  __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
>  scsi_forget_host+0x37/0xb0 [scsi_mod]
>  scsi_remove_host+0x9b/0x150 [scsi_mod]
>  sdebug_driver_remove+0x4b/0x150 [scsi_debug]
>  device_release_driver_internal+0x241/0x360
>  device_release_driver+0x12/0x20
>  bus_remove_device+0x1bc/0x290
>  device_del+0x259/0x580
>  device_unregister+0x1a/0x70
>  sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
>  scsi_debug_exit+0x76/0xe8 [scsi_debug]
>  __x64_sys_delete_module+0x1c1/0x280
>  do_syscall_64+0x77/0x230
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html.
>
> Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of device_schedule_callback()")
> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Tejun Heo <tj@kernel.org>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Johannes Thumshirn <jthumshirn@suse.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: <stable@vger.kernel.org>
Looks good to me!
Reviewed-by: Jack Wang <jinpu.wang@profitbricks.com>
Tejun Heo July 26, 2018, 1:35 p.m. UTC | #4
Hello,

ISTR giving the same feedback before.

On Wed, Jul 25, 2018 at 10:38:28AM -0700, Bart Van Assche wrote:
> +struct remove_dev_work {
> +	struct callback_head	head;
> +	struct scsi_device	*sdev;
> +};
> +
> +static void delete_sdev(struct callback_head *head)
> +{
> +	struct remove_dev_work *work = container_of(head, typeof(*work), head);
> +	struct scsi_device *sdev = work->sdev;
> +
> +	scsi_remove_device(sdev);
> +	kfree(work);
> +	scsi_device_put(sdev);
> +}
> +
>  static ssize_t
>  sdev_store_delete(struct device *dev, struct device_attribute *attr,
>  		  const char *buf, size_t count)
>  {
> -	if (device_remove_file_self(dev, attr))
> -		scsi_remove_device(to_scsi_device(dev));
> -	return count;
> -};
> +	struct scsi_device *sdev = to_scsi_device(dev);
> +	struct remove_dev_work *work;
> +	int ret;
> +
> +	ret = scsi_device_get(sdev);
> +	if (ret < 0)
> +		goto out;
> +	ret = -ENOMEM;
> +	work = kmalloc(sizeof(*work), GFP_KERNEL);
> +	if (!work)
> +		goto put;
> +	work->head.func = delete_sdev;
> +	work->sdev = sdev;
> +	ret = task_work_add(current, &work->head, false);
> +	if (ret < 0)
> +		goto free;
> +	ret = count;
> +
> +out:
> +	return ret;
> +
> +free:
> +	kfree(work);
> +
> +put:
> +	scsi_device_put(sdev);
> +	goto out;
> +}

Making removal asynchronous this way sometimes causes issues because
whether the user sees the device released or not is racy.
kernfs/sysfs have mechanisms to deal with these cases - remove_self
and kernfs_break_active_protection().  Have you looked at those?

Thanks.
Bart Van Assche July 26, 2018, 2:09 p.m. UTC | #5
On Thu, 2018-07-26 at 06:35 -0700, Tejun Heo wrote:
> Making removal asynchronous this way sometimes causes issues because
> whether the user sees the device released or not is racy.
> kernfs/sysfs have mechanisms to deal with these cases - remove_self
> and kernfs_break_active_protection().  Have you looked at those?

Hello Tejun,

The call stack in the patch description shows that sdev_store_delete() is
involved in the deadlock. The implementation of that function is as follows:

static ssize_t
sdev_store_delete(struct device *dev, struct device_attribute *attr,
		  const char *buf, size_t count)
{
	if (device_remove_file_self(dev, attr))
		scsi_remove_device(to_scsi_device(dev));
	return count;
};

device_remove_file_self() calls sysfs_remove_file_self() and that last
function calls kernfs_remove_self(). In other words, kernfs_remove_self()
is already being used. Please let me know if I misunderstood your comment.

Thanks,

Bart.
Tejun Heo July 26, 2018, 2:14 p.m. UTC | #6
Hello,

On Thu, Jul 26, 2018 at 02:09:41PM +0000, Bart Van Assche wrote:
> On Thu, 2018-07-26 at 06:35 -0700, Tejun Heo wrote:
> > Making removal asynchronous this way sometimes causes issues because
> > whether the user sees the device released or not is racy.
> > kernfs/sysfs have mechanisms to deal with these cases - remove_self
> > and kernfs_break_active_protection().  Have you looked at those?
> 
> Hello Tejun,
> 
> The call stack in the patch description shows that sdev_store_delete() is
> involved in the deadlock. The implementation of that function is as follows:
> 
> static ssize_t
> sdev_store_delete(struct device *dev, struct device_attribute *attr,
> 		  const char *buf, size_t count)
> {
> 	if (device_remove_file_self(dev, attr))
> 		scsi_remove_device(to_scsi_device(dev));
> 	return count;
> };
> 
> device_remove_file_self() calls sysfs_remove_file_self() and that last
> function calls kernfs_remove_self(). In other words, kernfs_remove_self()
> is already being used. Please let me know if I misunderstood your comment.

So, here, because scsi_remove_device() is the one involved in the
circular dependency, just breaking the dependency chain on the file
itself (self removal) isn't enough.  You can wrap the whole operation
with kernfs_break_active_protection() to also move
scsi_remove_device() invocation outside the kernfs synchronization.
This will need to be piped through sysfs but shouldn't be too complex.

Thanks.
Bart Van Assche July 26, 2018, 9:57 p.m. UTC | #7
On Thu, 2018-07-26 at 07:14 -0700, tj@kernel.org wrote:
> Hello,
> 
> On Thu, Jul 26, 2018 at 02:09:41PM +0000, Bart Van Assche wrote:
> > On Thu, 2018-07-26 at 06:35 -0700, Tejun Heo wrote:
> > > Making removal asynchronous this way sometimes causes issues because
> > > whether the user sees the device released or not is racy.
> > > kernfs/sysfs have mechanisms to deal with these cases - remove_self
> > > and kernfs_break_active_protection().  Have you looked at those?
> > 
> > Hello Tejun,
> > 
> > The call stack in the patch description shows that sdev_store_delete() is
> > involved in the deadlock. The implementation of that function is as follows:
> > 
> > static ssize_t
> > sdev_store_delete(struct device *dev, struct device_attribute *attr,
> > 		  const char *buf, size_t count)
> > {
> > 	if (device_remove_file_self(dev, attr))
> > 		scsi_remove_device(to_scsi_device(dev));
> > 	return count;
> > };
> > 
> > device_remove_file_self() calls sysfs_remove_file_self() and that last
> > function calls kernfs_remove_self(). In other words, kernfs_remove_self()
> > is already being used. Please let me know if I misunderstood your comment.
> 
> So, here, because scsi_remove_device() is the one involved in the
> circular dependency, just breaking the dependency chain on the file
> itself (self removal) isn't enough.  You can wrap the whole operation
> with kernfs_break_active_protection() to also move
> scsi_remove_device() invocation outside the kernfs synchronization.
> This will need to be piped through sysfs but shouldn't be too complex.

Hello Tejun,

I have tried to implement the above but my implementation triggered a kernel
warning. Can you have a look at the attached patches and see what I did wrong?

Thanks,

Bart.

The kernel warning I ran into is as follows:

kernfs_put: 6:0:0:0/delete: released with incorrect active_ref 2147483647
WARNING: CPU: 6 PID: 1014 at fs/kernfs/dir.c:527 kernfs_put+0x136/0x180
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
RIP: 0010:kernfs_put+0x136/0x180
Call Trace:
 evict+0xc1/0x190
 __dentry_kill+0xc4/0x150
 shrink_dentry_list+0x9e/0x1c0
 shrink_dcache_parent+0x63/0x70
 d_invalidate+0x42/0xb0
 lookup_fast+0x278/0x2a0
 walk_component+0x38/0x450
 link_path_walk+0x2a8/0x4f0
 path_openat+0x89/0x13a0
 do_filp_open+0x8c/0xf0
 do_sys_open+0x1a6/0x230
 do_syscall_64+0x4f/0x110
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
From f280e1cb31eda63c47db02574dfdfaee8a3f1dbc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 26 Jul 2018 09:23:08 -0700
Subject: [PATCH 1/3] fs/sysfs: Introduce sysfs_remove_file_self_w_cb()

This patch makes it possible to execute more code than only the
__kernfs_remove() call while the active protection is dropped.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
---
 fs/sysfs/file.c       | 17 ++++++++++++++++-
 include/linux/sysfs.h | 16 ++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..db9386070571 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -429,7 +429,12 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
  *
  * See kernfs_remove_self() for details.
  */
-bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
+bool sysfs_remove_file_self_w_cb(struct kobject *kobj,
+				 const struct attribute *attr,
+				 void (*cb)(struct kobject *kobj,
+					    const struct attribute *attr,
+					    void *data),
+				 void *data)
 {
 	struct kernfs_node *parent = kobj->sd;
 	struct kernfs_node *kn;
@@ -440,11 +445,21 @@ bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
 		return false;
 
 	ret = kernfs_remove_self(kn);
+	if (ret && cb) {
+		kernfs_break_active_protection(kn);
+		cb(kobj, attr, data);
+		kernfs_break_active_protection(kn);
+	}
 
 	kernfs_put(kn);
 	return ret;
 }
 
+bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
+{
+	return sysfs_remove_file_self_w_cb(kobj, attr, NULL, NULL);
+}
+
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
 {
 	int i;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b8bfdc173ec0..3c954d677736 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -239,6 +239,12 @@ int __must_check sysfs_chmod_file(struct kobject *kobj,
 				  const struct attribute *attr, umode_t mode);
 void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
 			  const void *ns);
+bool sysfs_remove_file_self_w_cb(struct kobject *kobj,
+				 const struct attribute *attr,
+				 void (*cb)(struct kobject *kobj,
+					    const struct attribute *attr,
+					    void *),
+				 void *data);
 bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
 void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr);
 
@@ -356,6 +362,16 @@ static inline void sysfs_remove_file_ns(struct kobject *kobj,
 {
 }
 
+static inline bool sysfs_remove_file_self_w_cb(struct kobject *kobj,
+				const struct attribute *attr,
+				void (*cb)(struct kobject *kobj,
+					   const struct attribute *attr,
+					   void *),
+				void *data)
+{
+	return false;
+}
+
 static inline bool sysfs_remove_file_self(struct kobject *kobj,
 					  const struct attribute *attr)
 {
Bart Van Assche July 29, 2018, 4:03 a.m. UTC | #8
On Thu, 2018-07-26 at 06:35 -0700, Tejun Heo wrote:
> Making removal asynchronous this way sometimes causes issues because
> whether the user sees the device released or not is racy.

Hello Tejun,

How could this change cause any user-visible changes? My understanding is
that any work queued with task_work_add() is executed before system call
execution leaves kernel context and returns back to user space context.

Thanks,

Bart.
Tejun Heo July 30, 2018, 2:13 p.m. UTC | #9
Hello, Bart.

On Thu, Jul 26, 2018 at 09:57:40PM +0000, Bart Van Assche wrote:
...
> @@ -440,11 +445,21 @@ bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
>  		return false;
>  
>  	ret = kernfs_remove_self(kn);
> +	if (ret && cb) {
> +		kernfs_break_active_protection(kn);
> +		cb(kobj, attr, data);
> +		kernfs_break_active_protection(kn);

unbreak?

Also, wouldn't it be better to just expose sysfs_break/unbreak and
then do sth like the following from scsi?

        kobject_get();
	sysfs_break_active_protection();
	do normal sysfs removal;
	sysfs_unbreak..();
	kobject_put();

Thanks.
Tejun Heo July 30, 2018, 2:17 p.m. UTC | #10
On Sun, Jul 29, 2018 at 04:03:41AM +0000, Bart Van Assche wrote:
> On Thu, 2018-07-26 at 06:35 -0700, Tejun Heo wrote:
> > Making removal asynchronous this way sometimes causes issues because
> > whether the user sees the device released or not is racy.
> 
> Hello Tejun,
> 
> How could this change cause any user-visible changes? My understanding is
> that any work queued with task_work_add() is executed before system call
> execution leaves kernel context and returns back to user space context.

Ah, you're right.  This isn't workqueue.  Hmm... yeah, needing to
allocate sth in removal path is a bit icky but, it should be okay I
guess.  I *think* the kernfs active break/unbreak is likely gonna be
cleaner tho.

Thanks.
Bart Van Assche July 30, 2018, 5:28 p.m. UTC | #11
On Mon, 2018-07-30 at 07:13 -0700, tj@kernel.org wrote:
> Also, wouldn't it be better to just expose sysfs_break/unbreak and
> then do sth like the following from scsi?
> 
>         kobject_get();
> 	sysfs_break_active_protection();
> 	do normal sysfs removal;
> 	sysfs_unbreak..();
> 	kobject_put();

Hello Tejun,

It's not clear to me how the sysfs_break_active_protection() should obtain
the struct kernfs_node pointer to the attribute. Calling that function before
device_remove_file_self() causes a double call to kernfs_break_active_protection(),
which is wrong. Calling kernfs_find_and_get(kobj->sd, attr->name) after the
attribute has been removed results in a NULL pointer because the attribute that
that call tries to look up has already been removed. Should I proceed with the
approach proposed in the patches attached to a previous e-mail?

Thanks,

Bart.
Tejun Heo July 30, 2018, 5:31 p.m. UTC | #12
Hello, Bart.

On Mon, Jul 30, 2018 at 05:28:11PM +0000, Bart Van Assche wrote:
> It's not clear to me how the sysfs_break_active_protection() should obtain
> the struct kernfs_node pointer to the attribute. Calling that function before
> device_remove_file_self() causes a double call to kernfs_break_active_protection(),
> which is wrong. Calling kernfs_find_and_get(kobj->sd, attr->name) after the

So, if you braek active protection explicitly, there's no need to call
remove_self().  It can just use regular remove.

> attribute has been removed results in a NULL pointer because the attribute that
> that call tries to look up has already been removed. Should I proceed with the
> approach proposed in the patches attached to a previous e-mail?

I don't have an objection for that.

Thanks.
Bart Van Assche July 30, 2018, 5:50 p.m. UTC | #13
On Mon, 2018-07-30 at 10:31 -0700, tj@kernel.org wrote:
> On Mon, Jul 30, 2018 at 05:28:11PM +0000, Bart Van Assche wrote:
> > It's not clear to me how the sysfs_break_active_protection() should obtain
> > the struct kernfs_node pointer to the attribute. Calling that function before
> > device_remove_file_self() causes a double call to kernfs_break_active_protection(),
> > which is wrong. Calling kernfs_find_and_get(kobj->sd, attr->name) after the
> 
> So, if you braek active protection explicitly, there's no need to call
> remove_self().  It can just use regular remove.

But how to avoid that scsi_remove_device(to_scsi_device(dev)) gets called
multiple times when using device_remove_self() and in case of concurrent
writes into the SCSI device "delete" sysfs attribute?

Thanks,

Bart.
Tejun Heo July 30, 2018, 5:57 p.m. UTC | #14
Hello,

On Mon, Jul 30, 2018 at 05:50:02PM +0000, Bart Van Assche wrote:
> On Mon, 2018-07-30 at 10:31 -0700, tj@kernel.org wrote:
> > On Mon, Jul 30, 2018 at 05:28:11PM +0000, Bart Van Assche wrote:
> > > It's not clear to me how the sysfs_break_active_protection() should obtain
> > > the struct kernfs_node pointer to the attribute. Calling that function before
> > > device_remove_file_self() causes a double call to kernfs_break_active_protection(),
> > > which is wrong. Calling kernfs_find_and_get(kobj->sd, attr->name) after the
> > 
> > So, if you braek active protection explicitly, there's no need to call
> > remove_self().  It can just use regular remove.
> 
> But how to avoid that scsi_remove_device(to_scsi_device(dev)) gets called
> multiple times when using device_remove_self() and in case of concurrent
> writes into the SCSI device "delete" sysfs attribute?

So, scsi_remove_device() internally protects using scan_mutex and if
the whole thing is wrapped with break_active_prot, I don't think you
need to call remove_file_self at all, right?

Thanks.
diff mbox series

Patch

======================================================
WARNING: possible circular locking dependency detected
4.18.0-rc2-dbg+ #5 Not tainted
------------------------------------------------------
modprobe/6539 is trying to acquire lock:
000000008323c4cd (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x45/0x90

but task is already holding lock:
00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&shost->scan_mutex){+.+.}:
       __mutex_lock+0xfe/0xc70
       mutex_lock_nested+0x1b/0x20
       scsi_remove_device+0x26/0x40 [scsi_mod]
       sdev_store_delete+0x27/0x30 [scsi_mod]
       dev_attr_store+0x3e/0x50
       sysfs_kf_write+0x87/0xa0
       kernfs_fop_write+0x190/0x230
       __vfs_write+0xd2/0x3b0
       vfs_write+0x101/0x270
       ksys_write+0xab/0x120
       __x64_sys_write+0x43/0x50
       do_syscall_64+0x77/0x230
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

-> #0 (kn->count#202){++++}:
       lock_acquire+0xd2/0x260
       __kernfs_remove+0x424/0x4a0
       kernfs_remove_by_name_ns+0x45/0x90
       remove_files.isra.1+0x3a/0x90
       sysfs_remove_group+0x5c/0xc0
       sysfs_remove_groups+0x39/0x60
       device_remove_attrs+0x82/0xb0
       device_del+0x251/0x580
       __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
       scsi_forget_host+0x37/0xb0 [scsi_mod]
       scsi_remove_host+0x9b/0x150 [scsi_mod]
       sdebug_driver_remove+0x4b/0x150 [scsi_debug]
       device_release_driver_internal+0x241/0x360
       device_release_driver+0x12/0x20
       bus_remove_device+0x1bc/0x290
       device_del+0x259/0x580
       device_unregister+0x1a/0x70
       sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
       scsi_debug_exit+0x76/0xe8 [scsi_debug]
       __x64_sys_delete_module+0x1c1/0x280
       do_syscall_64+0x77/0x230
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&shost->scan_mutex);
                               lock(kn->count#202);
                               lock(&shost->scan_mutex);
  lock(kn->count#202);

 *** DEADLOCK ***

2 locks held by modprobe/6539:
 #0: 00000000efaf9298 (&dev->mutex){....}, at: device_release_driver_internal+0x68/0x360
 #1: 00000000a6ec2c69 (&shost->scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 [scsi_mod]

stack backtrace:
CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0xa4/0xf5
 print_circular_bug.isra.34+0x213/0x221
 __lock_acquire+0x1a7e/0x1b50
 lock_acquire+0xd2/0x260
 __kernfs_remove+0x424/0x4a0
 kernfs_remove_by_name_ns+0x45/0x90
 remove_files.isra.1+0x3a/0x90
 sysfs_remove_group+0x5c/0xc0
 sysfs_remove_groups+0x39/0x60
 device_remove_attrs+0x82/0xb0
 device_del+0x251/0x580
 __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
 scsi_forget_host+0x37/0xb0 [scsi_mod]
 scsi_remove_host+0x9b/0x150 [scsi_mod]
 sdebug_driver_remove+0x4b/0x150 [scsi_debug]
 device_release_driver_internal+0x241/0x360
 device_release_driver+0x12/0x20
 bus_remove_device+0x1bc/0x290
 device_del+0x259/0x580
 device_unregister+0x1a/0x70
 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
 scsi_debug_exit+0x76/0xe8 [scsi_debug]
 __x64_sys_delete_module+0x1c1/0x280
 do_syscall_64+0x77/0x230
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of device_schedule_callback()")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
---
 drivers/scsi/scsi_sysfs.c | 48 +++++++++++++++++++++++++++++++++++----
 kernel/task_work.c        |  1 +
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index de122354d09a..c43f645900d4 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -12,6 +12,7 @@ 
 #include <linux/blkdev.h>
 #include <linux/device.h>
 #include <linux/pm_runtime.h>
+#include <linux/task_work.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
@@ -718,14 +719,53 @@  store_rescan_field (struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+struct remove_dev_work {
+	struct callback_head	head;
+	struct scsi_device	*sdev;
+};
+
+static void delete_sdev(struct callback_head *head)
+{
+	struct remove_dev_work *work = container_of(head, typeof(*work), head);
+	struct scsi_device *sdev = work->sdev;
+
+	scsi_remove_device(sdev);
+	kfree(work);
+	scsi_device_put(sdev);
+}
+
 static ssize_t
 sdev_store_delete(struct device *dev, struct device_attribute *attr,
 		  const char *buf, size_t count)
 {
-	if (device_remove_file_self(dev, attr))
-		scsi_remove_device(to_scsi_device(dev));
-	return count;
-};
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct remove_dev_work *work;
+	int ret;
+
+	ret = scsi_device_get(sdev);
+	if (ret < 0)
+		goto out;
+	ret = -ENOMEM;
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work)
+		goto put;
+	work->head.func = delete_sdev;
+	work->sdev = sdev;
+	ret = task_work_add(current, &work->head, false);
+	if (ret < 0)
+		goto free;
+	ret = count;
+
+out:
+	return ret;
+
+free:
+	kfree(work);
+
+put:
+	scsi_device_put(sdev);
+	goto out;
+}
 static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);
 
 static ssize_t
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 0fef395662a6..75dc496b9997 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -40,6 +40,7 @@  task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
 		set_notify_resume(task);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(task_work_add);
 
 /**
  * task_work_cancel - cancel a pending work added by task_work_add()