diff mbox series

[1/2] scsi: smartpqi: grab scsi device ref in slave_configure()

Message ID 20200616153145.16949-1-mwilck@suse.com (mailing list archive)
State Changes Requested
Headers show
Series [1/2] scsi: smartpqi: grab scsi device ref in slave_configure() | expand

Commit Message

Martin Wilck June 16, 2020, 3:31 p.m. UTC
From: Martin Wilck <mwilck@suse.com>

We have observed kernel crashes caused by the smartpqi driver holding
a pointer to a struct scsi_device that had been removed already.
Fix this by getting and holding a ref for the device as long as
the driver uses it.

Use a lock to avoid races between device probing and removal.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 drivers/scsi/smartpqi/smartpqi.h      |   1 +
 drivers/scsi/smartpqi/smartpqi_init.c | 122 +++++++++++++++++++++-----
 2 files changed, 103 insertions(+), 20 deletions(-)

Comments

Seymour, Shane M June 16, 2020, 11:44 p.m. UTC | #1
Reviewed-by: Shane Seymour <shane.seymour@hpe.com>

> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org [mailto:linux-scsi-
> owner@vger.kernel.org] On Behalf Of mwilck@suse.com
> Sent: Wednesday, 17 June 2020 1:32 AM
> To: Don Brace <don.brace@microsemi.com>; Martin K. Petersen
> <martin.petersen@oracle.com>
> Cc: esc.storagedev@microsemi.com; linux-scsi@vger.kernel.org; Martin
> Wilck <mwilck@suse.com>
> Subject: [PATCH 1/2] scsi: smartpqi: grab scsi device ref in slave_configure()
> 
> From: Martin Wilck <mwilck@suse.com>
> 
> We have observed kernel crashes caused by the smartpqi driver holding
> a pointer to a struct scsi_device that had been removed already.
> Fix this by getting and holding a ref for the device as long as
> the driver uses it.
> 
> Use a lock to avoid races between device probing and removal.
> 
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  drivers/scsi/smartpqi/smartpqi.h      |   1 +
>  drivers/scsi/smartpqi/smartpqi_init.c | 122 +++++++++++++++++++++-----
>  2 files changed, 103 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/scsi/smartpqi/smartpqi.h
> b/drivers/scsi/smartpqi/smartpqi.h
> index 1129fe7a27ed..5801080c8dbc 100644
> --- a/drivers/scsi/smartpqi/smartpqi.h
> +++ b/drivers/scsi/smartpqi/smartpqi.h
> @@ -954,6 +954,7 @@ struct pqi_scsi_dev {
>  	struct raid_map *raid_map;	/* RAID bypass map */
> 
>  	struct pqi_sas_port *sas_port;
> +	spinlock_t sdev_lock;		/* protect access to sdev */
>  	struct scsi_device *sdev;
> 
>  	struct list_head scsi_device_list_entry;
> diff --git a/drivers/scsi/smartpqi/smartpqi_init.c
> b/drivers/scsi/smartpqi/smartpqi_init.c
> index cd157f11eb22..54a72f465f85 100644
> --- a/drivers/scsi/smartpqi/smartpqi_init.c
> +++ b/drivers/scsi/smartpqi/smartpqi_init.c
> @@ -1514,6 +1514,18 @@ static int pqi_add_device(struct pqi_ctrl_info
> *ctrl_info,
> 
>  #define PQI_PENDING_IO_TIMEOUT_SECS	20
> 
> +static inline struct scsi_device *
> +pqi_get_scsi_device(struct pqi_scsi_dev *device)
> +{
> +	unsigned long flags;
> +	struct scsi_device *sdev;
> +
> +	spin_lock_irqsave(&device->sdev_lock, flags);
> +	sdev = device->sdev;
> +	spin_unlock_irqrestore(&device->sdev_lock, flags);
> +	return sdev;
> +}
> +
>  static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info,
>  	struct pqi_scsi_dev *device)
>  {
> @@ -1530,9 +1542,26 @@ static inline void pqi_remove_device(struct
> pqi_ctrl_info *ctrl_info,
>  			device->target, device->lun,
>  			atomic_read(&device->scsi_cmds_outstanding));
> 
> -	if (pqi_is_logical_device(device))
> -		scsi_remove_device(device->sdev);
> -	else
> +	if (pqi_is_logical_device(device)) {
> +		struct scsi_device *sdev;
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&device->sdev_lock, flags);
> +		sdev = device->sdev;
> +		if (sdev)
> +			get_device(&sdev->sdev_gendev);
> +		spin_unlock_irqrestore(&device->sdev_lock, flags);
> +
> +		/*
> +		 * As scsi_remove_device() will call pqi_slave_destroy(),
> +		 * we can't keep the sdev_lock held. But we've taken a ref,
> +		 * so sdev will not go away under us.
> +		 */
> +		if (sdev) {
> +			scsi_remove_device(sdev);
> +			put_device(&sdev->sdev_gendev);
> +		}
> +	} else
>  		pqi_remove_sas_device(device);
>  }
> 
> @@ -1749,7 +1778,7 @@ static inline bool pqi_is_device_added(struct
> pqi_scsi_dev *device)
>  	if (device->is_expander_smp_device)
>  		return device->sas_port != NULL;
> 
> -	return device->sdev != NULL;
> +	return pqi_get_scsi_device(device) != NULL;
>  }
> 
>  static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
> @@ -1861,11 +1890,24 @@ static void pqi_update_device_list(struct
> pqi_ctrl_info *ctrl_info,
>  	 */
>  	list_for_each_entry(device, &ctrl_info->scsi_device_list,
>  		scsi_device_list_entry) {
> -		if (device->sdev && device->queue_depth !=
> -			device->advertised_queue_depth) {
> -			device->advertised_queue_depth = device-
> >queue_depth;
> -			scsi_change_queue_depth(device->sdev,
> -				device->advertised_queue_depth);
> +		struct scsi_device *sdev;
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&device->sdev_lock, flags);
> +		sdev = device->sdev;
> +		if (sdev)
> +			get_device(&sdev->sdev_gendev);
> +		spin_unlock_irqrestore(&device->sdev_lock, flags);
> +
> +		if (sdev) {
> +			if (device->queue_depth !=
> +			    device->advertised_queue_depth) {
> +				device->advertised_queue_depth =
> +					device->queue_depth;
> +				scsi_change_queue_depth(sdev,
> +					device->advertised_queue_depth);
> +			}
> +			put_device(&sdev->sdev_gendev);
>  		}
>  	}
> 
> @@ -2082,6 +2124,7 @@ static int pqi_update_scsi_devices(struct
> pqi_ctrl_info *ctrl_info)
>  			device = list_first_entry(&new_device_list_head,
>  				struct pqi_scsi_dev, new_device_list_entry);
> 
> +		spin_lock_init(&device->sdev_lock);
>  		memcpy(device->scsi3addr, scsi3addr, sizeof(device-
> >scsi3addr));
>  		device->is_physical_device = is_physical_device;
>  		if (is_physical_device) {
> @@ -5785,6 +5828,7 @@ static int pqi_slave_alloc(struct scsi_device *sdev)
>  	struct pqi_ctrl_info *ctrl_info;
>  	struct scsi_target *starget;
>  	struct sas_rphy *rphy;
> +	int ret;
> 
>  	ctrl_info = shost_to_hba(sdev->host);
> 
> @@ -5806,23 +5850,59 @@ static int pqi_slave_alloc(struct scsi_device
> *sdev)
> 
>  	if (device) {
>  		sdev->hostdata = device;
> -		device->sdev = sdev;
> -		if (device->queue_depth) {
> -			device->advertised_queue_depth = device-
> >queue_depth;
> -			scsi_change_queue_depth(sdev,
> -				device->advertised_queue_depth);
> -		}
> -		if (pqi_is_logical_device(device))
> -			pqi_disable_write_same(sdev);
> -		else
> -			sdev->allow_restart = 1;
> -	}
> +		ret = 0;
> +	} else
> +		ret = -ENXIO;
> 
>  	spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
> 
> +	return ret;
> +}
> +
> +static int pqi_slave_configure(struct scsi_device *sdev)
> +{
> +	struct pqi_scsi_dev *device = sdev->hostdata;
> +	unsigned long flags;
> +
> +	/*
> +	 * Grab a ref to the SCSI device, lest it be removed under us. It will
> +	 * be dropped in pqi_slave_destroy().
> +	 * Don't use scsi_device_get() here, we'd increment our own  use
> count
> +	 */
> +	if (!get_device(&sdev->sdev_gendev))
> +		return -ENXIO;
> +
> +	spin_lock_irqsave(&device->sdev_lock, flags);
> +	device->sdev = sdev;
> +	spin_unlock_irqrestore(&device->sdev_lock, flags);
> +
> +	if (device->queue_depth) {
> +		device->advertised_queue_depth = device->queue_depth;
> +		scsi_change_queue_depth(sdev,
> +					device->advertised_queue_depth);
> +	}
> +	if (pqi_is_logical_device(device))
> +		pqi_disable_write_same(sdev);
> +	else
> +		sdev->allow_restart = 1;
>  	return 0;
>  }
> 
> +static void pqi_slave_destroy(struct scsi_device *sdev)
> +{
> +	struct pqi_scsi_dev *device = sdev->hostdata;
> +	unsigned long flags;
> +
> +	if (!device)
> +		return;
> +
> +	spin_lock_irqsave(&device->sdev_lock, flags);
> +	sdev = device->sdev;
> +	device->sdev = NULL;
> +	spin_unlock_irqrestore(&device->sdev_lock, flags);
> +	put_device(&sdev->sdev_gendev);
> +}
> +
>  static int pqi_map_queues(struct Scsi_Host *shost)
>  {
>  	struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
> @@ -6501,6 +6581,8 @@ static struct scsi_host_template
> pqi_driver_template = {
>  	.eh_device_reset_handler = pqi_eh_device_reset_handler,
>  	.ioctl = pqi_ioctl,
>  	.slave_alloc = pqi_slave_alloc,
> +	.slave_configure = pqi_slave_configure,
> +	.slave_destroy = pqi_slave_destroy,
>  	.map_queues = pqi_map_queues,
>  	.sdev_attrs = pqi_sdev_attrs,
>  	.shost_attrs = pqi_shost_attrs,
> --
> 2.26.2
diff mbox series

Patch

diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index 1129fe7a27ed..5801080c8dbc 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -954,6 +954,7 @@  struct pqi_scsi_dev {
 	struct raid_map *raid_map;	/* RAID bypass map */
 
 	struct pqi_sas_port *sas_port;
+	spinlock_t sdev_lock;		/* protect access to sdev */
 	struct scsi_device *sdev;
 
 	struct list_head scsi_device_list_entry;
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index cd157f11eb22..54a72f465f85 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -1514,6 +1514,18 @@  static int pqi_add_device(struct pqi_ctrl_info *ctrl_info,
 
 #define PQI_PENDING_IO_TIMEOUT_SECS	20
 
+static inline struct scsi_device *
+pqi_get_scsi_device(struct pqi_scsi_dev *device)
+{
+	unsigned long flags;
+	struct scsi_device *sdev;
+
+	spin_lock_irqsave(&device->sdev_lock, flags);
+	sdev = device->sdev;
+	spin_unlock_irqrestore(&device->sdev_lock, flags);
+	return sdev;
+}
+
 static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *device)
 {
@@ -1530,9 +1542,26 @@  static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info,
 			device->target, device->lun,
 			atomic_read(&device->scsi_cmds_outstanding));
 
-	if (pqi_is_logical_device(device))
-		scsi_remove_device(device->sdev);
-	else
+	if (pqi_is_logical_device(device)) {
+		struct scsi_device *sdev;
+		unsigned long flags;
+
+		spin_lock_irqsave(&device->sdev_lock, flags);
+		sdev = device->sdev;
+		if (sdev)
+			get_device(&sdev->sdev_gendev);
+		spin_unlock_irqrestore(&device->sdev_lock, flags);
+
+		/*
+		 * As scsi_remove_device() will call pqi_slave_destroy(),
+		 * we can't keep the sdev_lock held. But we've taken a ref,
+		 * so sdev will not go away under us.
+		 */
+		if (sdev) {
+			scsi_remove_device(sdev);
+			put_device(&sdev->sdev_gendev);
+		}
+	} else
 		pqi_remove_sas_device(device);
 }
 
@@ -1749,7 +1778,7 @@  static inline bool pqi_is_device_added(struct pqi_scsi_dev *device)
 	if (device->is_expander_smp_device)
 		return device->sas_port != NULL;
 
-	return device->sdev != NULL;
+	return pqi_get_scsi_device(device) != NULL;
 }
 
 static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
@@ -1861,11 +1890,24 @@  static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
 	 */
 	list_for_each_entry(device, &ctrl_info->scsi_device_list,
 		scsi_device_list_entry) {
-		if (device->sdev && device->queue_depth !=
-			device->advertised_queue_depth) {
-			device->advertised_queue_depth = device->queue_depth;
-			scsi_change_queue_depth(device->sdev,
-				device->advertised_queue_depth);
+		struct scsi_device *sdev;
+		unsigned long flags;
+
+		spin_lock_irqsave(&device->sdev_lock, flags);
+		sdev = device->sdev;
+		if (sdev)
+			get_device(&sdev->sdev_gendev);
+		spin_unlock_irqrestore(&device->sdev_lock, flags);
+
+		if (sdev) {
+			if (device->queue_depth !=
+			    device->advertised_queue_depth) {
+				device->advertised_queue_depth =
+					device->queue_depth;
+				scsi_change_queue_depth(sdev,
+					device->advertised_queue_depth);
+			}
+			put_device(&sdev->sdev_gendev);
 		}
 	}
 
@@ -2082,6 +2124,7 @@  static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info)
 			device = list_first_entry(&new_device_list_head,
 				struct pqi_scsi_dev, new_device_list_entry);
 
+		spin_lock_init(&device->sdev_lock);
 		memcpy(device->scsi3addr, scsi3addr, sizeof(device->scsi3addr));
 		device->is_physical_device = is_physical_device;
 		if (is_physical_device) {
@@ -5785,6 +5828,7 @@  static int pqi_slave_alloc(struct scsi_device *sdev)
 	struct pqi_ctrl_info *ctrl_info;
 	struct scsi_target *starget;
 	struct sas_rphy *rphy;
+	int ret;
 
 	ctrl_info = shost_to_hba(sdev->host);
 
@@ -5806,23 +5850,59 @@  static int pqi_slave_alloc(struct scsi_device *sdev)
 
 	if (device) {
 		sdev->hostdata = device;
-		device->sdev = sdev;
-		if (device->queue_depth) {
-			device->advertised_queue_depth = device->queue_depth;
-			scsi_change_queue_depth(sdev,
-				device->advertised_queue_depth);
-		}
-		if (pqi_is_logical_device(device))
-			pqi_disable_write_same(sdev);
-		else
-			sdev->allow_restart = 1;
-	}
+		ret = 0;
+	} else
+		ret = -ENXIO;
 
 	spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
 
+	return ret;
+}
+
+static int pqi_slave_configure(struct scsi_device *sdev)
+{
+	struct pqi_scsi_dev *device = sdev->hostdata;
+	unsigned long flags;
+
+	/*
+	 * Grab a ref to the SCSI device, lest it be removed under us. It will
+	 * be dropped in pqi_slave_destroy().
+	 * Don't use scsi_device_get() here, we'd increment our own  use count
+	 */
+	if (!get_device(&sdev->sdev_gendev))
+		return -ENXIO;
+
+	spin_lock_irqsave(&device->sdev_lock, flags);
+	device->sdev = sdev;
+	spin_unlock_irqrestore(&device->sdev_lock, flags);
+
+	if (device->queue_depth) {
+		device->advertised_queue_depth = device->queue_depth;
+		scsi_change_queue_depth(sdev,
+					device->advertised_queue_depth);
+	}
+	if (pqi_is_logical_device(device))
+		pqi_disable_write_same(sdev);
+	else
+		sdev->allow_restart = 1;
 	return 0;
 }
 
+static void pqi_slave_destroy(struct scsi_device *sdev)
+{
+	struct pqi_scsi_dev *device = sdev->hostdata;
+	unsigned long flags;
+
+	if (!device)
+		return;
+
+	spin_lock_irqsave(&device->sdev_lock, flags);
+	sdev = device->sdev;
+	device->sdev = NULL;
+	spin_unlock_irqrestore(&device->sdev_lock, flags);
+	put_device(&sdev->sdev_gendev);
+}
+
 static int pqi_map_queues(struct Scsi_Host *shost)
 {
 	struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
@@ -6501,6 +6581,8 @@  static struct scsi_host_template pqi_driver_template = {
 	.eh_device_reset_handler = pqi_eh_device_reset_handler,
 	.ioctl = pqi_ioctl,
 	.slave_alloc = pqi_slave_alloc,
+	.slave_configure = pqi_slave_configure,
+	.slave_destroy = pqi_slave_destroy,
 	.map_queues = pqi_map_queues,
 	.sdev_attrs = pqi_sdev_attrs,
 	.shost_attrs = pqi_shost_attrs,