@@ -1720,20 +1720,20 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
if (strncmp(scsi_scan_type, "sync", 4) == 0)
return NULL;
+ mutex_lock(&shost->scan_mutex);
if (shost->async_scan) {
shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
- return NULL;
+ goto err_unlock;
}
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- goto err;
+ goto err_unlock;
data->shost = scsi_host_get(shost);
if (!data->shost)
goto err;
init_completion(&data->prev_finished);
- mutex_lock(&shost->scan_mutex);
spin_lock_irqsave(shost->host_lock, flags);
shost->async_scan = 1;
spin_unlock_irqrestore(shost->host_lock, flags);
@@ -1749,6 +1749,8 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
err:
kfree(data);
+ err_unlock:
+ mutex_unlock(&shost->scan_mutex);
return NULL;
}
Readers of async_scan are protected by scan_mutex, except one at the beginning of scsi_prep_async_scan(). Threads can race reading the async_scan value, which may result in two threads for the same Scsi_Host going down the do_async_scan() path at the same time. One of those threads may then hit the following check in scsi_scan_host_selected() after async_scan has been set back to zero: if (!shost->async_scan) scsi_complete_async_scans(); and a hung task is encountered: [ 370.197123] INFO: task kworker/u40:18:967 blocked for more than 122 seconds. [ 370.198550] Not tainted 5.8.0 #1 [ 370.199538] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 370.201492] kworker/u40:18 D13768 967 2 0x00004000 [ 370.202699] Workqueue: events_unbound async_run_entry_fn [ 370.203905] Call Trace: [ 370.204668] __schedule+0x229/0x690 [ 370.205620] ? vprintk_store+0x114/0x1d0 [ 370.206627] schedule+0x45/0xb0 [ 370.207527] schedule_timeout+0x10f/0x160 [ 370.208558] wait_for_completion+0x81/0xe0 [ 370.209599] scsi_complete_async_scans+0xe4/0x140 [ 370.210722] scsi_scan_host_selected+0x8f/0x100 [ 370.211774] do_scan_async+0x13/0x140 [ 370.212746] async_run_entry_fn+0x32/0xe0 [ 370.213776] process_one_work+0x1d2/0x390 [ 370.214799] worker_thread+0x48/0x3c0 [ 370.215777] ? rescuer_thread+0x3e0/0x3e0 [ 370.216807] kthread+0x116/0x130 [ 370.217720] ? kthread_create_worker_on_cpu+0x60/0x60 [ 370.218906] ret_from_fork+0x1f/0x30 The issue may be observed when hot plugging many LUNs to a virtio_scsi HBA at once. The virtio_scsi event queue is small, and can easily overflow, with the target reporting VIRTIO_SCSI_T_EVENTS_MISSED. Multiple threads may be processing different VIRTIO_SCSI_T_EVENTS_MISSED events simultaneously, and waiting on the scan_mutex in scsi_prep_async_scan() behind other successfully processed events adding devices. Signed-off-by: Greg Edwards <gedwards@ddn.com> --- drivers/scsi/scsi_scan.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-)