diff mbox series

scsi: fix race reading async_scan value

Message ID 20200811170238.72879-1-gedwards@ddn.com (mailing list archive)
State Deferred
Headers show
Series scsi: fix race reading async_scan value | expand

Commit Message

Greg Edwards Aug. 11, 2020, 5:02 p.m. UTC
Readers of async_scan are protected by scan_mutex, except one at the
beginning of scsi_prep_async_scan().  Threads can race reading the
async_scan value, which may result in two threads for the same Scsi_Host
going down the do_async_scan() path at the same time.  One of those
threads may then hit the following check in scsi_scan_host_selected()
after async_scan has been set back to zero:

        if (!shost->async_scan)
                scsi_complete_async_scans();

and a hung task is encountered:

[  370.197123] INFO: task kworker/u40:18:967 blocked for more than 122 seconds.
[  370.198550]       Not tainted 5.8.0 #1
[  370.199538] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  370.201492] kworker/u40:18  D13768   967      2 0x00004000
[  370.202699] Workqueue: events_unbound async_run_entry_fn
[  370.203905] Call Trace:
[  370.204668]  __schedule+0x229/0x690
[  370.205620]  ? vprintk_store+0x114/0x1d0
[  370.206627]  schedule+0x45/0xb0
[  370.207527]  schedule_timeout+0x10f/0x160
[  370.208558]  wait_for_completion+0x81/0xe0
[  370.209599]  scsi_complete_async_scans+0xe4/0x140
[  370.210722]  scsi_scan_host_selected+0x8f/0x100
[  370.211774]  do_scan_async+0x13/0x140
[  370.212746]  async_run_entry_fn+0x32/0xe0
[  370.213776]  process_one_work+0x1d2/0x390
[  370.214799]  worker_thread+0x48/0x3c0
[  370.215777]  ? rescuer_thread+0x3e0/0x3e0
[  370.216807]  kthread+0x116/0x130
[  370.217720]  ? kthread_create_worker_on_cpu+0x60/0x60
[  370.218906]  ret_from_fork+0x1f/0x30

The issue may be observed when hot plugging many LUNs to a virtio_scsi
HBA at once.  The virtio_scsi event queue is small, and can easily
overflow, with the target reporting VIRTIO_SCSI_T_EVENTS_MISSED.
Multiple threads may be processing different VIRTIO_SCSI_T_EVENTS_MISSED
events simultaneously, and waiting on the scan_mutex in
scsi_prep_async_scan() behind other successfully processed events adding
devices.

Signed-off-by: Greg Edwards <gedwards@ddn.com>
---
 drivers/scsi/scsi_scan.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index f2437a7570ce..7dd113556234 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1720,20 +1720,20 @@  static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 	if (strncmp(scsi_scan_type, "sync", 4) == 0)
 		return NULL;
 
+	mutex_lock(&shost->scan_mutex);
 	if (shost->async_scan) {
 		shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
-		return NULL;
+		goto err_unlock;
 	}
 
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
-		goto err;
+		goto err_unlock;
 	data->shost = scsi_host_get(shost);
 	if (!data->shost)
 		goto err;
 	init_completion(&data->prev_finished);
 
-	mutex_lock(&shost->scan_mutex);
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->async_scan = 1;
 	spin_unlock_irqrestore(shost->host_lock, flags);
@@ -1749,6 +1749,8 @@  static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 
  err:
 	kfree(data);
+ err_unlock:
+	mutex_unlock(&shost->scan_mutex);
 	return NULL;
 }