diff mbox series

[RFC,5/6] scsi_transport_spi: Freeze request queues instead of quiescing

Message ID 20200831025357.32700-6-bvanassche@acm.org (mailing list archive)
State Changes Requested
Headers show
Series Fix a deadlock in the SCSI power management code | expand

Commit Message

Bart Van Assche Aug. 31, 2020, 2:53 a.m. UTC
Instead of quiescing the request queues involved in domain validation,
freeze these. As a result, the struct request_queue pm_only member is no
longer set during domain validation. That will allow to modify
scsi_execute() such that it stops setting the BLK_MQ_REQ_PREEMPT flag.
Three additional changes in this patch are that scsi_mq_alloc_queue() is
exported, that scsi_device_quiesce() is no longer exported and that
scsi_target_{quiesce,resume}() have been changed into
scsi_target_{freeze,unfreeze}().

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/scsi_lib.c           | 22 ++++++------
 drivers/scsi/scsi_priv.h          |  2 ++
 drivers/scsi/scsi_transport_spi.c | 56 ++++++++++++++++++++-----------
 include/scsi/scsi_device.h        |  6 ++--
 4 files changed, 51 insertions(+), 35 deletions(-)

Comments

Alan Stern Sept. 3, 2020, 1:39 a.m. UTC | #1
On Sun, Aug 30, 2020 at 07:53:56PM -0700, Bart Van Assche wrote:
> Instead of quiescing the request queues involved in domain validation,
> freeze these. As a result, the struct request_queue pm_only member is no
> longer set during domain validation. That will allow to modify
> scsi_execute() such that it stops setting the BLK_MQ_REQ_PREEMPT flag.
> Three additional changes in this patch are that scsi_mq_alloc_queue() is
> exported, that scsi_device_quiesce() is no longer exported and that
> scsi_target_{quiesce,resume}() have been changed into
> scsi_target_{freeze,unfreeze}().
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---

> --- a/drivers/scsi/scsi_transport_spi.c
> +++ b/drivers/scsi/scsi_transport_spi.c
> @@ -997,59 +997,75 @@ void
>  spi_dv_device(struct scsi_device *sdev)
>  {
>  	struct scsi_target *starget = sdev->sdev_target;
> +	struct request_queue *q1, *q2;
>  	u8 *buffer;
>  	const int len = SPI_MAX_ECHO_BUFFER_SIZE*2;
>  
>  	/*
> -	 * Because this function and the power management code both call
> -	 * scsi_device_quiesce(), it is not safe to perform domain validation
> -	 * while suspend or resume is in progress. Hence the
> -	 * lock/unlock_system_sleep() calls.
> +	 * Because creates a new request queue that is not visible to the rest
> +	 * of the system, domain validation must be serialized against suspend,
> +	 * resume and runtime power management. Hence the
> +	 * lock/unlock_system_sleep() and scsi_autopm_{get,put}_device() calls.
>  	 */
>  	lock_system_sleep();
>  
> +	if (scsi_autopm_get_device(sdev))
> +		goto unlock_system_sleep;
> +
>  	if (unlikely(spi_dv_in_progress(starget)))
> -		goto unlock;
> +		goto put_autopm;
>  
>  	if (unlikely(scsi_device_get(sdev)))
> -		goto unlock;
> +		goto put_autopm;
>  
>  	spi_dv_in_progress(starget) = 1;
>  
>  	buffer = kzalloc(len, GFP_KERNEL);
>  
>  	if (unlikely(!buffer))
> -		goto out_put;
> -
> -	/* We need to verify that the actual device will quiesce; the
> -	 * later target quiesce is just a nice to have */
> -	if (unlikely(scsi_device_quiesce(sdev)))
> -		goto out_free;
> -
> -	scsi_target_quiesce(starget);
> +		goto put_sdev;
>  
>  	spi_dv_pending(starget) = 1;

I'm not familiar with this code.  What happens if the test and 
assignment of spi_dv_in_progress() race between two threads?

It looks like the first to acquire the spi_dv_mutex will do a domain 
validation, then clear the spi_dv_in_progress and spi_dv_pending flags.
Then the second thread will perform another domain validation with 
those flags set to 0.  Is it supposed to work like that?

>  	mutex_lock(&spi_dv_mutex(starget));
>  
>  	starget_printk(KERN_INFO, starget, "Beginning Domain Validation\n");
>  
> -	spi_dv_device_internal(sdev, sdev->request_queue, buffer);
> +	/*
> +	 * Save the request queue pointer before it is overwritten by
> +	 * scsi_mq_alloc_queue().
> +	 */
> +	q1 = sdev->request_queue;
> +	q2 = scsi_mq_alloc_queue(sdev);
> +
> +	if (q2) {
> +		/*
> +		 * Restore the request queue pointer such that no other
> +		 * subsystem can submit SCSI commands to 'sdev'.
> +		 */

Too bad there's a little window here during which external commands can 
get sent to q2.

> +		sdev->request_queue = q1;
> +		scsi_target_freeze(starget);
> +		spi_dv_device_internal(sdev, q2, buffer);
> +		blk_cleanup_queue(q2);
> +		scsi_target_unfreeze(starget);
> +	}

No error message if the domain validation couldn't be performed?

Also, do you need to restore sdev->request_queue if the allocation 
failed?  It would be a little safer to move the restoration line 
immediately after the scsi_mq_alloc_queue call.

Ideally scsi_mq_alloc_queue would return q2 without assigning it to 
sdev->request_queue.

>  
>  	starget_printk(KERN_INFO, starget, "Ending Domain Validation\n");
>  
>  	mutex_unlock(&spi_dv_mutex(starget));
>  	spi_dv_pending(starget) = 0;
>  
> -	scsi_target_resume(starget);
> -
>  	spi_initial_dv(starget) = 1;
>  
> - out_free:
>  	kfree(buffer);
> - out_put:
> +
> +put_sdev:
>  	spi_dv_in_progress(starget) = 0;
>  	scsi_device_put(sdev);
> -unlock:
> +
> +put_autopm:
> +	scsi_autopm_put_device(sdev);
> +
> +unlock_system_sleep:
>  	unlock_system_sleep();
>  }
>  EXPORT_SYMBOL(spi_dv_device);

Alan Stern
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1d7135f61962..49eb8f2dffd8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1866,6 +1866,7 @@  struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
 	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
 	return sdev->request_queue;
 }
+EXPORT_SYMBOL_GPL(scsi_mq_alloc_queue);
 
 int scsi_mq_setup_tags(struct Scsi_Host *shost)
 {
@@ -2540,7 +2541,6 @@  scsi_device_quiesce(struct scsi_device *sdev)
 
 	return err;
 }
-EXPORT_SYMBOL(scsi_device_quiesce);
 
 /**
  *	scsi_device_resume - Restart user issued commands to a quiesced device.
@@ -2569,30 +2569,30 @@  void scsi_device_resume(struct scsi_device *sdev)
 EXPORT_SYMBOL(scsi_device_resume);
 
 static void
-device_quiesce_fn(struct scsi_device *sdev, void *data)
+device_freeze_fn(struct scsi_device *sdev, void *data)
 {
-	scsi_device_quiesce(sdev);
+	blk_mq_freeze_queue(sdev->request_queue);
 }
 
 void
-scsi_target_quiesce(struct scsi_target *starget)
+scsi_target_freeze(struct scsi_target *starget)
 {
-	starget_for_each_device(starget, NULL, device_quiesce_fn);
+	starget_for_each_device(starget, NULL, device_freeze_fn);
 }
-EXPORT_SYMBOL(scsi_target_quiesce);
+EXPORT_SYMBOL(scsi_target_freeze);
 
 static void
-device_resume_fn(struct scsi_device *sdev, void *data)
+device_unfreeze_fn(struct scsi_device *sdev, void *data)
 {
-	scsi_device_resume(sdev);
+	blk_mq_unfreeze_queue(sdev->request_queue);
 }
 
 void
-scsi_target_resume(struct scsi_target *starget)
+scsi_target_unfreeze(struct scsi_target *starget)
 {
-	starget_for_each_device(starget, NULL, device_resume_fn);
+	starget_for_each_device(starget, NULL, device_unfreeze_fn);
 }
-EXPORT_SYMBOL(scsi_target_resume);
+EXPORT_SYMBOL(scsi_target_unfreeze);
 
 /**
  * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index d12ada035961..6b9203df84c8 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -95,6 +95,8 @@  extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
 extern void scsi_mq_destroy_tags(struct Scsi_Host *shost);
 extern void scsi_exit_queue(void);
 extern void scsi_evt_thread(struct work_struct *work);
+extern int scsi_device_quiesce(struct scsi_device *sdev);
+extern void scsi_device_resume(struct scsi_device *sdev);
 struct request_queue;
 struct request;
 
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 959990f66865..63bec8980b27 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -997,59 +997,75 @@  void
 spi_dv_device(struct scsi_device *sdev)
 {
 	struct scsi_target *starget = sdev->sdev_target;
+	struct request_queue *q1, *q2;
 	u8 *buffer;
 	const int len = SPI_MAX_ECHO_BUFFER_SIZE*2;
 
 	/*
-	 * Because this function and the power management code both call
-	 * scsi_device_quiesce(), it is not safe to perform domain validation
-	 * while suspend or resume is in progress. Hence the
-	 * lock/unlock_system_sleep() calls.
+	 * Because creates a new request queue that is not visible to the rest
+	 * of the system, domain validation must be serialized against suspend,
+	 * resume and runtime power management. Hence the
+	 * lock/unlock_system_sleep() and scsi_autopm_{get,put}_device() calls.
 	 */
 	lock_system_sleep();
 
+	if (scsi_autopm_get_device(sdev))
+		goto unlock_system_sleep;
+
 	if (unlikely(spi_dv_in_progress(starget)))
-		goto unlock;
+		goto put_autopm;
 
 	if (unlikely(scsi_device_get(sdev)))
-		goto unlock;
+		goto put_autopm;
 
 	spi_dv_in_progress(starget) = 1;
 
 	buffer = kzalloc(len, GFP_KERNEL);
 
 	if (unlikely(!buffer))
-		goto out_put;
-
-	/* We need to verify that the actual device will quiesce; the
-	 * later target quiesce is just a nice to have */
-	if (unlikely(scsi_device_quiesce(sdev)))
-		goto out_free;
-
-	scsi_target_quiesce(starget);
+		goto put_sdev;
 
 	spi_dv_pending(starget) = 1;
 	mutex_lock(&spi_dv_mutex(starget));
 
 	starget_printk(KERN_INFO, starget, "Beginning Domain Validation\n");
 
-	spi_dv_device_internal(sdev, sdev->request_queue, buffer);
+	/*
+	 * Save the request queue pointer before it is overwritten by
+	 * scsi_mq_alloc_queue().
+	 */
+	q1 = sdev->request_queue;
+	q2 = scsi_mq_alloc_queue(sdev);
+
+	if (q2) {
+		/*
+		 * Restore the request queue pointer such that no other
+		 * subsystem can submit SCSI commands to 'sdev'.
+		 */
+		sdev->request_queue = q1;
+		scsi_target_freeze(starget);
+		spi_dv_device_internal(sdev, q2, buffer);
+		blk_cleanup_queue(q2);
+		scsi_target_unfreeze(starget);
+	}
 
 	starget_printk(KERN_INFO, starget, "Ending Domain Validation\n");
 
 	mutex_unlock(&spi_dv_mutex(starget));
 	spi_dv_pending(starget) = 0;
 
-	scsi_target_resume(starget);
-
 	spi_initial_dv(starget) = 1;
 
- out_free:
 	kfree(buffer);
- out_put:
+
+put_sdev:
 	spi_dv_in_progress(starget) = 0;
 	scsi_device_put(sdev);
-unlock:
+
+put_autopm:
+	scsi_autopm_put_device(sdev);
+
+unlock_system_sleep:
 	unlock_system_sleep();
 }
 EXPORT_SYMBOL(spi_dv_device);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index ef6e96e12c7c..08f88ef04bc9 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -422,10 +422,8 @@  extern struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type,
 extern void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt);
 extern void sdev_evt_send_simple(struct scsi_device *sdev,
 			  enum scsi_device_event evt_type, gfp_t gfpflags);
-extern int scsi_device_quiesce(struct scsi_device *sdev);
-extern void scsi_device_resume(struct scsi_device *sdev);
-extern void scsi_target_quiesce(struct scsi_target *);
-extern void scsi_target_resume(struct scsi_target *);
+extern void scsi_target_freeze(struct scsi_target *);
+extern void scsi_target_unfreeze(struct scsi_target *);
 extern void scsi_scan_target(struct device *parent, unsigned int channel,
 			     unsigned int id, u64 lun,
 			     enum scsi_scan_mode rescan);