diff mbox series

[7/7] block: store the holder kobject in bd_holder_disk

Message ID 20221030153120.1045101-8-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/7] block: clear ->slave_dir when dropping the main slave_dir reference | expand

Commit Message

Christoph Hellwig Oct. 30, 2022, 3:31 p.m. UTC
We hold a reference to the holder kobject for each bd_holder_disk,
so to make the code a bit more robust, use a reference to it instead
of the block_device.  As long as no one clears ->bd_holder_dir in
before freeing the disk, this isn't strictly required, but it does
make the code more clear and more robust.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/holder.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

Comments

Yu Kuai Oct. 31, 2022, 1:52 a.m. UTC | #1
Hi

在 2022/10/30 23:31, Christoph Hellwig 写道:
> We hold a reference to the holder kobject for each bd_holder_disk,
> so to make the code a bit more robust, use a reference to it instead
> of the block_device.  As long as no one clears ->bd_holder_dir in
> before freeing the disk, this isn't strictly required, but it does
> make the code more clear and more robust.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/holder.c | 23 ++++++++++-------------
>   1 file changed, 10 insertions(+), 13 deletions(-)
> 
> diff --git a/block/holder.c b/block/holder.c
> index dd9327b43ce05..a8c355b9d0806 100644
> --- a/block/holder.c
> +++ b/block/holder.c
> @@ -4,7 +4,7 @@
>   
>   struct bd_holder_disk {
>   	struct list_head	list;
> -	struct block_device	*bdev;
> +	struct kobject		*holder_dir;
>   	int			refcnt;
>   };
>   
> @@ -14,7 +14,7 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
>   	struct bd_holder_disk *holder;
>   
>   	list_for_each_entry(holder, &disk->slave_bdevs, list)
> -		if (holder->bdev == bdev)
> +		if (holder->holder_dir == bdev->bd_holder_dir)
>   			return holder;
>   	return NULL;
>   }
> @@ -82,27 +82,24 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   	}
>   
>   	INIT_LIST_HEAD(&holder->list);
> -	holder->bdev = bdev;
>   	holder->refcnt = 1;
> +	holder->holder_dir = kobject_get(bdev->bd_holder_dir);

I wonder is this safe here, if kobject reference is 0 here and
bd_holder_dir is about to be freed. Here in kobject_get, kref_get() will
warn about uaf, and kobject_get will return a address that is about to
be freed.

Thansk,
Kuai
> +
>   	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
>   	if (ret)
> -		goto out_free_holder;
> -	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +		goto out_put_holder_dir;
> +	ret = add_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj);
>   	if (ret)
>   		goto out_del_symlink;
>   	list_add(&holder->list, &disk->slave_bdevs);
>   
> -	/*
> -	 * del_gendisk drops the initial reference to bd_holder_dir, so we need
> -	 * to keep our own here to allow for cleanup past that point.
> -	 */
> -	kobject_get(bdev->bd_holder_dir);
>   	mutex_unlock(&disk->open_mutex);
>   	return 0;
>   
>   out_del_symlink:
>   	del_symlink(disk->slave_dir, bdev_kobj(bdev));
> -out_free_holder:
> +out_put_holder_dir:
> +	kobject_put(holder->holder_dir);
>   	kfree(holder);
>   out_unlock:
>   	mutex_unlock(&disk->open_mutex);
> @@ -131,8 +128,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   	holder = bd_find_holder_disk(bdev, disk);
>   	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
>   		del_symlink(disk->slave_dir, bdev_kobj(bdev));
> -		del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> -		kobject_put(bdev->bd_holder_dir);
> +		del_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj);
> +		kobject_put(holder->holder_dir);
>   		list_del_init(&holder->list);
>   		kfree(holder);
>   	}
>
Christoph Hellwig Nov. 1, 2022, 10:49 a.m. UTC | #2
On Mon, Oct 31, 2022 at 09:52:04AM +0800, Yu Kuai wrote:
>>     	INIT_LIST_HEAD(&holder->list);
>> -	holder->bdev = bdev;
>>   	holder->refcnt = 1;
>> +	holder->holder_dir = kobject_get(bdev->bd_holder_dir);
>
> I wonder is this safe here, if kobject reference is 0 here and
> bd_holder_dir is about to be freed. Here in kobject_get, kref_get() will
> warn about uaf, and kobject_get will return a address that is about to
> be freed.

But how could the reference be 0 here?  The driver that calls
bd_link_disk_holder must have the block device open and thus hold a
reference to it.
Yu Kuai Nov. 1, 2022, 11:12 a.m. UTC | #3
Hi,

在 2022/11/01 18:49, Christoph Hellwig 写道:
> On Mon, Oct 31, 2022 at 09:52:04AM +0800, Yu Kuai wrote:
>>>      	INIT_LIST_HEAD(&holder->list);
>>> -	holder->bdev = bdev;
>>>    	holder->refcnt = 1;
>>> +	holder->holder_dir = kobject_get(bdev->bd_holder_dir);
>>
>> I wonder is this safe here, if kobject reference is 0 here and
>> bd_holder_dir is about to be freed. Here in kobject_get, kref_get() will
>> warn about uaf, and kobject_get will return a address that is about to
>> be freed.
> 
> But how could the reference be 0 here?  The driver that calls
> bd_link_disk_holder must have the block device open and thus hold a
> reference to it.

Like I said before, the caller of bd_link_disk_holder() get bdev by
blkdev_get_by_dev(), which do not grab reference of holder_dir, and
grab disk reference can only prevent disk_release() to be called, not
del_gendisk() while holder_dir reference is dropped in del_gendisk()
and can be decreased to 0.

If you agree with above explanation, I tried to fix this:

1) move kobject_put(bd_holder_dir) from del_gendisk to disk_release,
there seems to be a lot of other dependencies.

2) protect bd_holder_dir reference by open_mutex.

Thanks,
Kuai
> 
> .
>
Christoph Hellwig Nov. 1, 2022, 11:21 a.m. UTC | #4
On Tue, Nov 01, 2022 at 07:12:51PM +0800, Yu Kuai wrote:
>> But how could the reference be 0 here?  The driver that calls
>> bd_link_disk_holder must have the block device open and thus hold a
>> reference to it.
>
> Like I said before, the caller of bd_link_disk_holder() get bdev by
> blkdev_get_by_dev(), which do not grab reference of holder_dir, and
> grab disk reference can only prevent disk_release() to be called, not
> del_gendisk() while holder_dir reference is dropped in del_gendisk()
> and can be decreased to 0.

Oh, the bd_holder_dir reference, not the block_device one.  So yes,
I agree.

> If you agree with above explanation, I tried to fix this:
>
> 1) move kobject_put(bd_holder_dir) from del_gendisk to disk_release,
> there seems to be a lot of other dependencies.
>
> 2) protect bd_holder_dir reference by open_mutex.

I think simply switching the kobject_get in bd_link_disk_holder
into a kobject_get_unless_zero and unwinding if there is no reference
should be enough:

diff --git a/block/holder.c b/block/holder.c
index a8c355b9d0806..cd18064f6ff80 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -83,7 +83,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 
 	INIT_LIST_HEAD(&holder->list);
 	holder->refcnt = 1;
-	holder->holder_dir = kobject_get(bdev->bd_holder_dir);
+	if (!kobject_get_unless_zero(bdev->bd_holder_dir)) {
+		ret = -EBUSY;
+		goto out_free_holder;
+	}
+	holder->holder_dir = bdev->bd_holder_dir;
 
 	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
 	if (ret)
@@ -100,6 +104,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	del_symlink(disk->slave_dir, bdev_kobj(bdev));
 out_put_holder_dir:
 	kobject_put(holder->holder_dir);
+out_free_holder:
 	kfree(holder);
 out_unlock:
 	mutex_unlock(&disk->open_mutex);
Yu Kuai Nov. 1, 2022, 11:28 a.m. UTC | #5
Hi,

在 2022/11/01 19:21, Christoph Hellwig 写道:
> On Tue, Nov 01, 2022 at 07:12:51PM +0800, Yu Kuai wrote:
>>> But how could the reference be 0 here?  The driver that calls
>>> bd_link_disk_holder must have the block device open and thus hold a
>>> reference to it.
>>
>> Like I said before, the caller of bd_link_disk_holder() get bdev by
>> blkdev_get_by_dev(), which do not grab reference of holder_dir, and
>> grab disk reference can only prevent disk_release() to be called, not
>> del_gendisk() while holder_dir reference is dropped in del_gendisk()
>> and can be decreased to 0.
> 
> Oh, the bd_holder_dir reference, not the block_device one.  So yes,
> I agree.
> 
>> If you agree with above explanation, I tried to fix this:
>>
>> 1) move kobject_put(bd_holder_dir) from del_gendisk to disk_release,
>> there seems to be a lot of other dependencies.
>>
>> 2) protect bd_holder_dir reference by open_mutex.
> 
> I think simply switching the kobject_get in bd_link_disk_holder
> into a kobject_get_unless_zero and unwinding if there is no reference
> should be enough:
> 
> diff --git a/block/holder.c b/block/holder.c
> index a8c355b9d0806..cd18064f6ff80 100644
> --- a/block/holder.c
> +++ b/block/holder.c
> @@ -83,7 +83,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   
>   	INIT_LIST_HEAD(&holder->list);
>   	holder->refcnt = 1;
> -	holder->holder_dir = kobject_get(bdev->bd_holder_dir);
> +	if (!kobject_get_unless_zero(bdev->bd_holder_dir)) {
> +		ret = -EBUSY;
> +		goto out_free_holder;
> +	}
> +	holder->holder_dir = bdev->bd_holder_dir;

What if bd_holder_dir is already freed here, then uaf can be triggered.
Thus bd_holder_dir need to be resed in del_gendisk() if it's reference
is dropped to 0, however, kobject apis can't do that...

Thanks,
Kuai
>   
>   	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
>   	if (ret)
> @@ -100,6 +104,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>   	del_symlink(disk->slave_dir, bdev_kobj(bdev));
>   out_put_holder_dir:
>   	kobject_put(holder->holder_dir);
> +out_free_holder:
>   	kfree(holder);
>   out_unlock:
>   	mutex_unlock(&disk->open_mutex);
> 
> .
>
Christoph Hellwig Nov. 1, 2022, 1:18 p.m. UTC | #6
On Tue, Nov 01, 2022 at 07:28:17PM +0800, Yu Kuai wrote:
> What if bd_holder_dir is already freed here, then uaf can be triggered.
> Thus bd_holder_dir need to be resed in del_gendisk() if it's reference
> is dropped to 0, however, kobject apis can't do that...

Indeed.  I don't think we can simply move the dropping of the reference
as you suggested as that also implies taking it earlier, and the
device in the disk is only initialized in add_disk.

Now what I think we could do is:

 - hold open_mutex in bd_link_disk_holder as you suggested
 - check that the bdev inode is hashed inside open_mutex before doing
   the kobject_get
Yu Kuai Nov. 1, 2022, 1:29 p.m. UTC | #7
Hi,

在 2022/11/01 21:18, Christoph Hellwig 写道:
> On Tue, Nov 01, 2022 at 07:28:17PM +0800, Yu Kuai wrote:
>> What if bd_holder_dir is already freed here, then uaf can be triggered.
>> Thus bd_holder_dir need to be resed in del_gendisk() if it's reference
>> is dropped to 0, however, kobject apis can't do that...
> 
> Indeed.  I don't think we can simply move the dropping of the reference
> as you suggested as that also implies taking it earlier, and the
> device in the disk is only initialized in add_disk.
> 
> Now what I think we could do is:
> 
>   - hold open_mutex in bd_link_disk_holder as you suggested
>   - check that the bdev inode is hashed inside open_mutex before doing
>     the kobject_get

Yes, that's sounds good, check if inode is hashed is better than
what I did in another thread to introduce a new field.

Thansk,
Kuai
> 
> .
>
diff mbox series

Patch

diff --git a/block/holder.c b/block/holder.c
index dd9327b43ce05..a8c355b9d0806 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -4,7 +4,7 @@ 
 
 struct bd_holder_disk {
 	struct list_head	list;
-	struct block_device	*bdev;
+	struct kobject		*holder_dir;
 	int			refcnt;
 };
 
@@ -14,7 +14,7 @@  static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
 	struct bd_holder_disk *holder;
 
 	list_for_each_entry(holder, &disk->slave_bdevs, list)
-		if (holder->bdev == bdev)
+		if (holder->holder_dir == bdev->bd_holder_dir)
 			return holder;
 	return NULL;
 }
@@ -82,27 +82,24 @@  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	}
 
 	INIT_LIST_HEAD(&holder->list);
-	holder->bdev = bdev;
 	holder->refcnt = 1;
+	holder->holder_dir = kobject_get(bdev->bd_holder_dir);
+
 	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
 	if (ret)
-		goto out_free_holder;
-	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+		goto out_put_holder_dir;
+	ret = add_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj);
 	if (ret)
 		goto out_del_symlink;
 	list_add(&holder->list, &disk->slave_bdevs);
 
-	/*
-	 * del_gendisk drops the initial reference to bd_holder_dir, so we need
-	 * to keep our own here to allow for cleanup past that point.
-	 */
-	kobject_get(bdev->bd_holder_dir);
 	mutex_unlock(&disk->open_mutex);
 	return 0;
 
 out_del_symlink:
 	del_symlink(disk->slave_dir, bdev_kobj(bdev));
-out_free_holder:
+out_put_holder_dir:
+	kobject_put(holder->holder_dir);
 	kfree(holder);
 out_unlock:
 	mutex_unlock(&disk->open_mutex);
@@ -131,8 +128,8 @@  void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	holder = bd_find_holder_disk(bdev, disk);
 	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
 		del_symlink(disk->slave_dir, bdev_kobj(bdev));
-		del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
-		kobject_put(bdev->bd_holder_dir);
+		del_symlink(holder->holder_dir, &disk_to_dev(disk)->kobj);
+		kobject_put(holder->holder_dir);
 		list_del_init(&holder->list);
 		kfree(holder);
 	}