diff mbox series

[1/9] md: fix error handling in md_alloc

Message ID 20220713113125.2232975-2-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/9] md: fix error handling in md_alloc | expand

Commit Message

Christoph Hellwig July 13, 2022, 11:31 a.m. UTC
Error handling in md_alloc is a mess.  Untangle it to just free the mddev
directly before add_disk is called and thus the gendisk is globally
visible.  After that clear the hold flag and let the mddev_put take care
of cleaning up the mddev through the usual mechanisms.

Fixes: 5e55e2f5fc95 ("[PATCH] md: convert compile time warnings into runtime warnings")
Fixes: 9be68dd7ac0e ("md: add error handling support for add_disk()")
Fixes: 7ad1069166c0 ("md: properly unwind when failing to add the kobject in md_alloc")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---

Note: the put_disk here is not fully correct on md-next, but will
do the right thing once merged with the block tree.

 drivers/md/md.c | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

Comments

Logan Gunthorpe July 13, 2022, 3:26 p.m. UTC | #1
On 2022-07-13 05:31, Christoph Hellwig wrote:
> Error handling in md_alloc is a mess.  Untangle it to just free the mddev
> directly before add_disk is called and thus the gendisk is globally
> visible.  After that clear the hold flag and let the mddev_put take care
> of cleaning up the mddev through the usual mechanisms.
> 
> Fixes: 5e55e2f5fc95 ("[PATCH] md: convert compile time warnings into runtime warnings")
> Fixes: 9be68dd7ac0e ("md: add error handling support for add_disk()")
> Fixes: 7ad1069166c0 ("md: properly unwind when failing to add the kobject in md_alloc")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> 
> Note: the put_disk here is not fully correct on md-next, but will
> do the right thing once merged with the block tree.
> 
>  drivers/md/md.c | 45 ++++++++++++++++++++++++++++++++-------------
>  1 file changed, 32 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index b64de313838f2..7affddade8b6b 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -791,6 +791,15 @@ static struct mddev *mddev_alloc(dev_t unit)
>  	return ERR_PTR(error);
>  }
>  
> +static void mddev_free(struct mddev *mddev)
> +{
> +	spin_lock(&all_mddevs_lock);
> +	list_del(&mddev->all_mddevs);
> +	spin_unlock(&all_mddevs_lock);
> +
> +	kfree(mddev);
> +}

Sadly, I still don't think this is correct. mddev_init() has already
called kobject_init() and according to the documentation it *must* be
cleaned up with a call to kobject_put(). That doesn't happen in this
path -- so I believe this will cause memory leaks.

I have also noticed this code base already makes that same mistake in
mddev_alloc(): freeing the mddev on the error path after kobject_init().
But that would be easy to fix up.

Logan
Logan Gunthorpe July 13, 2022, 4:31 p.m. UTC | #2
On 2022-07-13 09:26, Logan Gunthorpe wrote:
> 
> 
> 
> On 2022-07-13 05:31, Christoph Hellwig wrote:
>> Error handling in md_alloc is a mess.  Untangle it to just free the mddev
>> directly before add_disk is called and thus the gendisk is globally
>> visible.  After that clear the hold flag and let the mddev_put take care
>> of cleaning up the mddev through the usual mechanisms.
>>
>> Fixes: 5e55e2f5fc95 ("[PATCH] md: convert compile time warnings into runtime warnings")
>> Fixes: 9be68dd7ac0e ("md: add error handling support for add_disk()")
>> Fixes: 7ad1069166c0 ("md: properly unwind when failing to add the kobject in md_alloc")
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>>
>> Note: the put_disk here is not fully correct on md-next, but will
>> do the right thing once merged with the block tree.
>>
>>  drivers/md/md.c | 45 ++++++++++++++++++++++++++++++++-------------
>>  1 file changed, 32 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index b64de313838f2..7affddade8b6b 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -791,6 +791,15 @@ static struct mddev *mddev_alloc(dev_t unit)
>>  	return ERR_PTR(error);
>>  }
>>  
>> +static void mddev_free(struct mddev *mddev)
>> +{
>> +	spin_lock(&all_mddevs_lock);
>> +	list_del(&mddev->all_mddevs);
>> +	spin_unlock(&all_mddevs_lock);
>> +
>> +	kfree(mddev);
>> +}
> 
> Sadly, I still don't think this is correct. mddev_init() has already
> called kobject_init() and according to the documentation it *must* be
> cleaned up with a call to kobject_put(). That doesn't happen in this
> path -- so I believe this will cause memory leaks.

What about something along these lines:



diff --git a/drivers/md/md.c b/drivers/md/md.c
index 78e2588ed43e..1d13840cec6d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5581,6 +5581,15 @@ md_attr_store(struct kobject *kobj, struct
attribute *at>
        return rv;
 }

+static void mddev_free(struct mddev *mddev)
+{
+       percpu_ref_exit(&mddev->writes_pending);
+       bioset_exit(&mddev->bio_set);
+       bioset_exit(&mddev->sync_set);
+
+       kfree(mddev);
+}
+
 static void md_free(struct kobject *ko)
 {
        struct mddev *mddev = container_of(ko, struct mddev, kobj);
@@ -5593,12 +5602,9 @@ static void md_free(struct kobject *ko)
        if (mddev->gendisk) {
                del_gendisk(mddev->gendisk);
                blk_cleanup_disk(mddev->gendisk);
+       } else {
+               mddev_free(mddev);
        }
-       percpu_ref_exit(&mddev->writes_pending);
-
-       bioset_exit(&mddev->bio_set);
-       bioset_exit(&mddev->sync_set);
-       kfree(mddev);
 }

 static const struct sysfs_ops md_sysfs_ops = {
@@ -7858,6 +7864,13 @@ static unsigned int md_check_events(struct
gendisk *disk>
        return ret;
 }

+static void md_free_disk(struct gendisk *disk)
+{
+       struct mddev *mddev = disk->private_data;
+
+       mddev_free(mddev);
+}
+
 const struct block_device_operations md_fops =
 {
        .owner          = THIS_MODULE,
@@ -7871,6 +7884,7 @@ const struct block_device_operations md_fops =
        .getgeo         = md_getgeo,
        .check_events   = md_check_events,
        .set_read_only  = md_set_read_only,
+       .free_disk      = md_free_disk,
 };
diff mbox series

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b64de313838f2..7affddade8b6b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -791,6 +791,15 @@  static struct mddev *mddev_alloc(dev_t unit)
 	return ERR_PTR(error);
 }
 
+static void mddev_free(struct mddev *mddev)
+{
+	spin_lock(&all_mddevs_lock);
+	list_del(&mddev->all_mddevs);
+	spin_unlock(&all_mddevs_lock);
+
+	kfree(mddev);
+}
+
 static const struct attribute_group md_redundancy_group;
 
 void mddev_unlock(struct mddev *mddev)
@@ -5664,8 +5673,8 @@  int md_alloc(dev_t dev, char *name)
 	mutex_lock(&disks_mutex);
 	mddev = mddev_alloc(dev);
 	if (IS_ERR(mddev)) {
-		mutex_unlock(&disks_mutex);
-		return PTR_ERR(mddev);
+		error = PTR_ERR(mddev);
+		goto out_unlock;
 	}
 
 	partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
@@ -5683,7 +5692,7 @@  int md_alloc(dev_t dev, char *name)
 			    strcmp(mddev2->gendisk->disk_name, name) == 0) {
 				spin_unlock(&all_mddevs_lock);
 				error = -EEXIST;
-				goto out_unlock_disks_mutex;
+				goto out_free_mddev;
 			}
 		spin_unlock(&all_mddevs_lock);
 	}
@@ -5696,7 +5705,7 @@  int md_alloc(dev_t dev, char *name)
 	error = -ENOMEM;
 	disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
-		goto out_unlock_disks_mutex;
+		goto out_free_mddev;
 
 	disk->major = MAJOR(mddev->unit);
 	disk->first_minor = unit << shift;
@@ -5717,25 +5726,35 @@  int md_alloc(dev_t dev, char *name)
 	mddev->gendisk = disk;
 	error = add_disk(disk);
 	if (error)
-		goto out_cleanup_disk;
+		goto out_put_disk;
 
 	error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
-	if (error)
-		goto out_del_gendisk;
+	if (error) {
+		/*
+		 * The disk is already live at this point.  Clear the hold flag
+		 * and let mddev_put take care of the deletion, as it isn't any
+		 * different from a normal close on last release now.
+		 */
+		mddev->hold_active = 0;
+		goto done;
+	}
 
 	kobject_uevent(&mddev->kobj, KOBJ_ADD);
 	mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
 	mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
-	goto out_unlock_disks_mutex;
 
-out_del_gendisk:
-	del_gendisk(disk);
-out_cleanup_disk:
-	blk_cleanup_disk(disk);
-out_unlock_disks_mutex:
+done:
 	mutex_unlock(&disks_mutex);
 	mddev_put(mddev);
 	return error;
+
+out_put_disk:
+	put_disk(disk);
+out_free_mddev:
+	mddev_free(mddev);
+out_unlock:
+	mutex_unlock(&disks_mutex);
+	return error;
 }
 
 static void md_probe(dev_t dev)