diff mbox series

[1/1] scsi: scsi_forget_host() shuffle

Message ID 20200529221442.8404-1-dgilbert@interlog.com (mailing list archive)
State Changes Requested
Headers show
Series [1/1] scsi: scsi_forget_host() shuffle | expand

Commit Message

Douglas Gilbert May 29, 2020, 10:14 p.m. UTC
This patch leaves me a bit uneasy but it does cure the crash
that occurs in this function. xarray iterators are pretty safe
_unless_ something deletes the parent node holding the
collection. The problem seems to be these nested loops do not
_explicitly_ remove the starget object. That is done magically
at the sdev level on the removal of the last sdev in a starget.
And that is half an iteration too soon! Hence the shuffle which
isn't a great solution. The magical starget removal is wrong IMO
and this will burn us elsewhere, I suspect.

Thes patch is on top of Hannes Reinecke's "[PATCHv4 0/5] scsi:
use xarray for devices and targets" patchset.

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
 drivers/scsi/scsi_scan.c | 47 +++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

Comments

Hannes Reinecke May 30, 2020, 9:30 a.m. UTC | #1
On 5/30/20 12:14 AM, Douglas Gilbert wrote:
> This patch leaves me a bit uneasy but it does cure the crash
> that occurs in this function. xarray iterators are pretty safe
> _unless_ something deletes the parent node holding the
> collection. The problem seems to be these nested loops do not
> _explicitly_ remove the starget object. That is done magically
> at the sdev level on the removal of the last sdev in a starget.
> And that is half an iteration too soon! Hence the shuffle which
> isn't a great solution. The magical starget removal is wrong IMO
> and this will burn us elsewhere, I suspect.
> 
> Thes patch is on top of Hannes Reinecke's "[PATCHv4 0/5] scsi:
> use xarray for devices and targets" patchset.
> 
> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
> ---
>   drivers/scsi/scsi_scan.c | 47 +++++++++++++++++++++++++++++++---------
>   1 file changed, 37 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 0a344653487d..e378f03d0297 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1858,25 +1858,52 @@ void scsi_scan_host(struct Scsi_Host *shost)
>   }
>   EXPORT_SYMBOL(scsi_scan_host);
>   
> +static void scsi_forget_host_inner(struct Scsi_Host *shost,
> +				   struct scsi_target *starg,
> +				   unsigned long *flagsp)
> +{
> +	struct scsi_device *sdev;
> +	struct scsi_device *prev_sdev = NULL;
> +	unsigned long lun_id;
> +
> +	xa_for_each(&starg->__devices, lun_id, sdev) {
> +		if (sdev->sdev_state == SDEV_DEL)
> +			continue;
> +		if (!prev_sdev) {
> +			prev_sdev = sdev;
> +			continue;
> +		}
> +		spin_unlock_irqrestore(shost->host_lock, *flagsp);
> +		__scsi_remove_device(prev_sdev);
> +		spin_lock_irqsave(shost->host_lock, *flagsp);
> +		prev_sdev = sdev;
> +	}
> +	if (prev_sdev) {
> +		spin_unlock_irqrestore(shost->host_lock, *flagsp);
> +		__scsi_remove_device(prev_sdev);
> +		spin_lock_irqsave(shost->host_lock, *flagsp);
> +	}
> +}
> +
> +/* N.B. Keeping iteration one step ahead of destruction point */
>   void scsi_forget_host(struct Scsi_Host *shost)
>   {
>   	struct scsi_target *starget;
> -	struct scsi_device *sdev;
> +	struct scsi_target *prev_starget = NULL;
>   	unsigned long flags;
> -	unsigned long tid = 0;
> +	unsigned long tid;
>   
>   	spin_lock_irqsave(shost->host_lock, flags);
>   	xa_for_each(&shost->__targets, tid, starget) {
> -		unsigned long lun_id = 0;
> -
> -		xa_for_each(&starget->__devices, lun_id, sdev) {
> -			if (sdev->sdev_state == SDEV_DEL)
> -				continue;
> -			spin_unlock_irqrestore(shost->host_lock, flags);
> -			__scsi_remove_device(sdev);
> -			spin_lock_irqsave(shost->host_lock, flags);
> +		if (!prev_starget) {
> +			prev_starget = starget;
> +			continue;
>   		}
> +		scsi_forget_host_inner(shost, prev_starget, &flags);
> +		prev_starget = starget;
>   	}
> +	if (prev_starget)
> +		scsi_forget_host_inner(shost, prev_starget, &flags);
>   	spin_unlock_irqrestore(shost->host_lock, flags);
>   }
>   
> 
This is probably easier:

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 0a344653487d..fb0b6710b138 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1868,7 +1868,7 @@ void scsi_forget_host(struct Scsi_Host *shost)
         spin_lock_irqsave(shost->host_lock, flags);
         xa_for_each(&shost->__targets, tid, starget) {
                 unsigned long lun_id = 0;
-
+               kref_get(&starget->reap_ref);
                 xa_for_each(&starget->__devices, lun_id, sdev) {
                         if (sdev->sdev_state == SDEV_DEL)
                                 continue;
@@ -1876,6 +1876,7 @@ void scsi_forget_host(struct Scsi_Host *shost)
                         __scsi_remove_device(sdev);
                         spin_lock_irqsave(shost->host_lock, flags);
                 }
+               scsi_target_reap(starget);
         }
         spin_unlock_irqrestore(shost->host_lock, flags);
  }

But yeah, this 'implicit target destroy' is an abomination.
Actually, I'm planning on using xa_cmpxchg when deleting elements to 
eliminate any confusion on who's responsible for removing it from the 
xarray; the current code has a design flaw in that it does the iteration 
first, and then after various layers and indirections it does the 
removal from the iteration.
What _should_ have happened is that you do an iteration which does 
implicitly removes it from the list, and _then_ go through the various 
hoops to actually remove the device.

But after the xarray series has stabilized :-)

Cheers,

Hannes
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 0a344653487d..e378f03d0297 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1858,25 +1858,52 @@  void scsi_scan_host(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_scan_host);
 
+static void scsi_forget_host_inner(struct Scsi_Host *shost,
+				   struct scsi_target *starg,
+				   unsigned long *flagsp)
+{
+	struct scsi_device *sdev;
+	struct scsi_device *prev_sdev = NULL;
+	unsigned long lun_id;
+
+	xa_for_each(&starg->__devices, lun_id, sdev) {
+		if (sdev->sdev_state == SDEV_DEL)
+			continue;
+		if (!prev_sdev) {
+			prev_sdev = sdev;
+			continue;
+		}
+		spin_unlock_irqrestore(shost->host_lock, *flagsp);
+		__scsi_remove_device(prev_sdev);
+		spin_lock_irqsave(shost->host_lock, *flagsp);
+		prev_sdev = sdev;
+	}
+	if (prev_sdev) {
+		spin_unlock_irqrestore(shost->host_lock, *flagsp);
+		__scsi_remove_device(prev_sdev);
+		spin_lock_irqsave(shost->host_lock, *flagsp);
+	}
+}
+
+/* N.B. Keeping iteration one step ahead of destruction point */
 void scsi_forget_host(struct Scsi_Host *shost)
 {
 	struct scsi_target *starget;
-	struct scsi_device *sdev;
+	struct scsi_target *prev_starget = NULL;
 	unsigned long flags;
-	unsigned long tid = 0;
+	unsigned long tid;
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	xa_for_each(&shost->__targets, tid, starget) {
-		unsigned long lun_id = 0;
-
-		xa_for_each(&starget->__devices, lun_id, sdev) {
-			if (sdev->sdev_state == SDEV_DEL)
-				continue;
-			spin_unlock_irqrestore(shost->host_lock, flags);
-			__scsi_remove_device(sdev);
-			spin_lock_irqsave(shost->host_lock, flags);
+		if (!prev_starget) {
+			prev_starget = starget;
+			continue;
 		}
+		scsi_forget_host_inner(shost, prev_starget, &flags);
+		prev_starget = starget;
 	}
+	if (prev_starget)
+		scsi_forget_host_inner(shost, prev_starget, &flags);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 }