diff mbox series

[V2,1/2] scsi: core: fix failure handling of scsi_add_host_with_dma

Message ID 20210528011838.2122559-2-ming.lei@redhat.com (mailing list archive)
State Superseded
Headers show
Series scsi: two fixes in scsi_add_host_with_dma | expand

Commit Message

Ming Lei May 28, 2021, 1:18 a.m. UTC
When scsi_add_host_with_dma() return failure, the caller will call
scsi_host_put(shost) to release everything allocated for this host
instance. So we can't free allocated stuff in scsi_add_host_with_dma(),
otherwise double free will be caused.

Strictly speaking, these host resources allocation should have been
moved to scsi_host_alloc(), but the allocation may need driver's
info which can be built between calling scsi_host_alloc() and
scsi_add_host(), so just keep the allocations in
scsi_add_host_with_dma().

Fixes the problem by relying on host device's release handler to
release everything.

Cc: Bart Van Assche <bvanassche@acm.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/hosts.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

Comments

John Garry May 28, 2021, 8:34 a.m. UTC | #1
On 28/05/2021 02:18, Ming Lei wrote:
> When scsi_add_host_with_dma() return failure, the caller will call
> scsi_host_put(shost) to release everything allocated for this host
> instance. So we can't free allocated stuff in scsi_add_host_with_dma(),
> otherwise double free will be caused.
> 
> Strictly speaking, these host resources allocation should have been
> moved to scsi_host_alloc(), but the allocation may need driver's
> info which can be built between calling scsi_host_alloc() and
> scsi_add_host(), so just keep the allocations in
> scsi_add_host_with_dma().
> 
> Fixes the problem by relying on host device's release handler to
> release everything.
> 
> Cc: Bart Van Assche <bvanassche@acm.org>
> Cc: John Garry <john.garry@huawei.com>
> Cc: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>

It now looks like we have a memory leak:

unreferenced object 0xffff0410070a4e00 (size 128):
   comm "swapper/0", pid 1, jiffies 4294894100 (age 90.744s)
   hex dump (first 32 bytes):
68 6f 73 74 30 00 00 00 00 00 00 00 00 00 00 00  host0...........
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
   backtrace:
[<(____ptrval____)>] __kmalloc_track_caller+0x25c/0x380
[<(____ptrval____)>] kvasprintf+0xe8/0x1a4
[<(____ptrval____)>] kvasprintf_const+0xc8/0x17c
[<(____ptrval____)>] kobject_set_name_vargs+0x58/0xf4
[<(____ptrval____)>] dev_set_name+0xa4/0xe0
[<(____ptrval____)>] scsi_host_alloc+0x45c/0x5d0
[<(____ptrval____)>] hisi_sas_probe+0x40/0x570
[<(____ptrval____)>] hisi_sas_v2_probe+0x1c/0x2c
[<(____ptrval____)>] platform_probe+0x90/0x110
[<(____ptrval____)>] really_probe+0x148/0x744
[<(____ptrval____)>] driver_probe_device+0x8c/0xfc
[<(____ptrval____)>] device_driver_attach+0x11c/0x12c
[<(____ptrval____)>] __driver_attach+0xc8/0x1a0
[<(____ptrval____)>] bus_for_each_dev+0xe4/0x154
[<(____ptrval____)>] driver_attach+0x38/0x50
[<(____ptrval____)>] bus_add_driver+0x1bc/0x2c4
unreferenced object 0xffff001056581800 (size 256):
   comm "swapper/0", pid 1, jiffies 4294894398 (age 89.560s)
   hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 18 58 56 10 00 ff ff  ..........XV....
08 18 58 56 10 00 ff ff c0 f4 b4 10 00 80 ff ff  ..XV............
   backtrace:
[<(____ptrval____)>] kmem_cache_alloc+0x298/0x350
[<(____ptrval____)>] device_add+0x6d8/0xc4c
[<(____ptrval____)>] scsi_add_host_with_dma+0x370/0x5dc
[<(____ptrval____)>] hisi_sas_probe+0x418/0x570
[<(____ptrval____)>] hisi_sas_v2_probe+0x1c/0x2c
[<(____ptrval____)>] platform_probe+0x90/0x110
[<(____ptrval____)>] really_probe+0x148/0x744
[<(____ptrval____)>] driver_probe_device+0x8c/0xfc
[<(____ptrval____)>] device_driver_attach+0x11c/0x12c
[<(____ptrval[  101.941505] random: fast init done
____)>] __driver_attach+0xc8/0x1a0
[<(____ptrval____)>] bus_for_each_dev+0xe4/0x154
[<(____ptrval____)>] driver_attach+0x38/0x50
[<(____ptrval____)>] bus_add_driver+0x1bc/0x2c4
[<(____ptrval____)>] driver_register+0xe4/0x210
[<(____ptrval____)>] __platform_driver_register+0x48/0x60
[<(____ptrval____)>] hisi_sas_v2_driver_init+0x20/0x2c

I think that the release for the shost_dev dev name memory needs fixing.

In scsi_host_dev_release(), for my experiment, shost state is running, 
so we miss the kfree(dev_name(&shost->shost_dev)), I guess. Not sure on 
the proper fix.

> ---
>   drivers/scsi/hosts.c | 14 ++++++--------
>   1 file changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
> index 697c09ef259b..ea50856cb203 100644
> --- a/drivers/scsi/hosts.c
> +++ b/drivers/scsi/hosts.c
> @@ -278,23 +278,22 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
>   
>   		if (!shost->work_q) {
>   			error = -EINVAL;
> -			goto out_free_shost_data;
> +			goto out_del_dev;
>   		}
>   	}
>   
>   	error = scsi_sysfs_add_host(shost);
>   	if (error)
> -		goto out_destroy_host;
> +		goto out_del_dev;
>   
>   	scsi_proc_host_add(shost);
>   	scsi_autopm_put_host(shost);
>   	return error;
>   
> - out_destroy_host:
> -	if (shost->work_q)
> -		destroy_workqueue(shost->work_q);
> - out_free_shost_data:
> -	kfree(shost->shost_data);
> +	/*
> +	 * any host allocation in this function will be freed in
> +	 * scsi_host_dev_release, so not free them in the failure path

nit: "so do not free them"

> +	 */
>    out_del_dev:
>   	device_del(&shost->shost_dev);
>    out_del_gendev:
> @@ -304,7 +303,6 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
>   	pm_runtime_disable(&shost->shost_gendev);
>   	pm_runtime_set_suspended(&shost->shost_gendev);
>   	pm_runtime_put_noidle(&shost->shost_gendev);
> -	scsi_mq_destroy_tags(shost);
>    fail:
>   	return error;
>   }
> 

Thanks,
John
Ming Lei May 31, 2021, 1:22 a.m. UTC | #2
On Fri, May 28, 2021 at 09:34:44AM +0100, John Garry wrote:
> On 28/05/2021 02:18, Ming Lei wrote:
> > When scsi_add_host_with_dma() return failure, the caller will call
> > scsi_host_put(shost) to release everything allocated for this host
> > instance. So we can't free allocated stuff in scsi_add_host_with_dma(),
> > otherwise double free will be caused.
> > 
> > Strictly speaking, these host resources allocation should have been
> > moved to scsi_host_alloc(), but the allocation may need driver's
> > info which can be built between calling scsi_host_alloc() and
> > scsi_add_host(), so just keep the allocations in
> > scsi_add_host_with_dma().
> > 
> > Fixes the problem by relying on host device's release handler to
> > release everything.
> > 
> > Cc: Bart Van Assche <bvanassche@acm.org>
> > Cc: John Garry <john.garry@huawei.com>
> > Cc: Hannes Reinecke <hare@suse.de>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> 
> It now looks like we have a memory leak:
> 
> unreferenced object 0xffff0410070a4e00 (size 128):
>   comm "swapper/0", pid 1, jiffies 4294894100 (age 90.744s)
>   hex dump (first 32 bytes):
> 68 6f 73 74 30 00 00 00 00 00 00 00 00 00 00 00  host0...........
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>   backtrace:
> [<(____ptrval____)>] __kmalloc_track_caller+0x25c/0x380
> [<(____ptrval____)>] kvasprintf+0xe8/0x1a4
> [<(____ptrval____)>] kvasprintf_const+0xc8/0x17c
> [<(____ptrval____)>] kobject_set_name_vargs+0x58/0xf4
> [<(____ptrval____)>] dev_set_name+0xa4/0xe0
> [<(____ptrval____)>] scsi_host_alloc+0x45c/0x5d0
> [<(____ptrval____)>] hisi_sas_probe+0x40/0x570
> [<(____ptrval____)>] hisi_sas_v2_probe+0x1c/0x2c
> [<(____ptrval____)>] platform_probe+0x90/0x110
> [<(____ptrval____)>] really_probe+0x148/0x744
> [<(____ptrval____)>] driver_probe_device+0x8c/0xfc
> [<(____ptrval____)>] device_driver_attach+0x11c/0x12c
> [<(____ptrval____)>] __driver_attach+0xc8/0x1a0
> [<(____ptrval____)>] bus_for_each_dev+0xe4/0x154
> [<(____ptrval____)>] driver_attach+0x38/0x50
> [<(____ptrval____)>] bus_add_driver+0x1bc/0x2c4
> unreferenced object 0xffff001056581800 (size 256):
>   comm "swapper/0", pid 1, jiffies 4294894398 (age 89.560s)
>   hex dump (first 32 bytes):
> 00 00 00 00 00 00 00 00 08 18 58 56 10 00 ff ff  ..........XV....
> 08 18 58 56 10 00 ff ff c0 f4 b4 10 00 80 ff ff  ..XV............
>   backtrace:
> [<(____ptrval____)>] kmem_cache_alloc+0x298/0x350
> [<(____ptrval____)>] device_add+0x6d8/0xc4c
> [<(____ptrval____)>] scsi_add_host_with_dma+0x370/0x5dc
> [<(____ptrval____)>] hisi_sas_probe+0x418/0x570
> [<(____ptrval____)>] hisi_sas_v2_probe+0x1c/0x2c
> [<(____ptrval____)>] platform_probe+0x90/0x110
> [<(____ptrval____)>] really_probe+0x148/0x744
> [<(____ptrval____)>] driver_probe_device+0x8c/0xfc
> [<(____ptrval____)>] device_driver_attach+0x11c/0x12c
> [<(____ptrval[  101.941505] random: fast init done
> ____)>] __driver_attach+0xc8/0x1a0
> [<(____ptrval____)>] bus_for_each_dev+0xe4/0x154
> [<(____ptrval____)>] driver_attach+0x38/0x50
> [<(____ptrval____)>] bus_add_driver+0x1bc/0x2c4
> [<(____ptrval____)>] driver_register+0xe4/0x210
> [<(____ptrval____)>] __platform_driver_register+0x48/0x60
> [<(____ptrval____)>] hisi_sas_v2_driver_init+0x20/0x2c
> 
> I think that the release for the shost_dev dev name memory needs fixing.
> 
> In scsi_host_dev_release(), for my experiment, shost state is running, so we
> miss the kfree(dev_name(&shost->shost_dev)), I guess. Not sure on the proper
> fix.

kobject_cleanup() is responsible for freeing dev->kobj.name, so nothing
to do with freeing the device name.

The following delta patch may fix the issue, and we should have
wrap it into the 2nd patch, can you test it and see if the kmemleak
warning is fixed?

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 492b64f349e1..7f7e0b3f6a3c 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -298,6 +298,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
        put_device(&shost->shost_gendev);
        device_del(&shost->shost_dev);
  out_del_gendev:
+       put_device(shost->shost_gendev.parent);
        device_del(&shost->shost_gendev);
  out_disable_runtime_pm:
        device_disable_async_suspend(&shost->shost_gendev);



Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 697c09ef259b..ea50856cb203 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -278,23 +278,22 @@  int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 
 		if (!shost->work_q) {
 			error = -EINVAL;
-			goto out_free_shost_data;
+			goto out_del_dev;
 		}
 	}
 
 	error = scsi_sysfs_add_host(shost);
 	if (error)
-		goto out_destroy_host;
+		goto out_del_dev;
 
 	scsi_proc_host_add(shost);
 	scsi_autopm_put_host(shost);
 	return error;
 
- out_destroy_host:
-	if (shost->work_q)
-		destroy_workqueue(shost->work_q);
- out_free_shost_data:
-	kfree(shost->shost_data);
+	/*
+	 * any host allocation in this function will be freed in
+	 * scsi_host_dev_release, so not free them in the failure path
+	 */
  out_del_dev:
 	device_del(&shost->shost_dev);
  out_del_gendev:
@@ -304,7 +303,6 @@  int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 	pm_runtime_disable(&shost->shost_gendev);
 	pm_runtime_set_suspended(&shost->shost_gendev);
 	pm_runtime_put_noidle(&shost->shost_gendev);
-	scsi_mq_destroy_tags(shost);
  fail:
 	return error;
 }