diff mbox series

[2/2] btrfs: simplify waiting for encoded read endios

Message ID 22c7231a2ce9c0c7c187dff159be1c868d783765.1731316882.git.jth@kernel.org (mailing list archive)
State New
Headers show
Series btrfs: fix use-after-free in btrfs_encoded_read_endio | expand

Commit Message

Johannes Thumshirn Nov. 11, 2024, 9:28 a.m. UTC
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>

Simplifiy the I/O completion path for encoded reads by using a
completion instead of a wait_queue.

Furthermore skip taking an extra reference that is instantly
dropped anyways.

Suggested-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/inode.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

Comments

Damien Le Moal Nov. 11, 2024, 9:56 a.m. UTC | #1
On 11/11/24 18:28, Johannes Thumshirn wrote:
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> Simplifiy the I/O completion path for encoded reads by using a
> completion instead of a wait_queue.
> 
> Furthermore skip taking an extra reference that is instantly
> dropped anyways.
> 
> Suggested-by: Damien Le Moal <Damien.LeMoal@wdc.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/inode.c | 12 +++++-------
>  1 file changed, 5 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index cb8b23a3e56b..916c9d7ca112 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -9068,7 +9068,7 @@ static ssize_t btrfs_encoded_read_inline(
>  }
>  
>  struct btrfs_encoded_read_private {
> -	wait_queue_head_t wait;
> +	struct completion done;
>  	void *uring_ctx;
>  	atomic_t pending;
>  	blk_status_t status;
> @@ -9097,7 +9097,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
>  			btrfs_uring_read_extent_endio(priv->uring_ctx, err);
>  			kfree(priv);
>  		} else {
> -			wake_up(&priv->wait);
> +			complete(&priv->done);
>  		}
>  	}
>  }
> @@ -9116,7 +9116,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>  	if (!priv)
>  		return -ENOMEM;
>  
> -	init_waitqueue_head(&priv->wait);
> +	init_completion(&priv->done);
>  	atomic_set(&priv->pending, 1);
>  	priv->status = 0;
>  	priv->uring_ctx = uring_ctx;
> @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>  		disk_io_size -= bytes;
>  	} while (disk_io_size);
>  
> -	atomic_inc(&priv->pending);
>  	btrfs_submit_bbio(bbio, 0);
>  
>  	if (uring_ctx) {
> -		if (atomic_dec_return(&priv->pending) == 0) {
> +		if (atomic_read(&priv->pending) == 0) {

This does not look right... Testing this essentially means that you are doing
wait_for_completion(). So I would move this hunk after the call for
wait_for_completion(). That will also allow avoiding duplicating the cleanup
path getting ret and doing the kfree(priv).

>  			ret = blk_status_to_errno(READ_ONCE(priv->status));
>  			btrfs_uring_read_extent_endio(uring_ctx, ret);
>  			kfree(priv);
> @@ -9158,8 +9157,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>  
>  		return -EIOCBQUEUED;
>  	} else {
> -		if (atomic_dec_return(&priv->pending) != 0)
> -			io_wait_event(priv->wait, !atomic_read(&priv->pending));
> +		wait_for_completion(&priv->done);
>  		/* See btrfs_encoded_read_endio() for ordering. */
>  		ret = blk_status_to_errno(READ_ONCE(priv->status));
>  		kfree(priv);
Johannes Thumshirn Nov. 11, 2024, 10:13 a.m. UTC | #2
On 11.11.24 10:56, Damien Le Moal wrote:
>> @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>>   		disk_io_size -= bytes;
>>   	} while (disk_io_size);
>>   
>> -	atomic_inc(&priv->pending);
>>   	btrfs_submit_bbio(bbio, 0);
>>   
>>   	if (uring_ctx) {
>> -		if (atomic_dec_return(&priv->pending) == 0) {
>> +		if (atomic_read(&priv->pending) == 0) {
> 
> This does not look right... Testing this essentially means that you are doing
> wait_for_completion(). So I would move this hunk after the call for
> wait_for_completion(). That will also allow avoiding duplicating the cleanup
> path getting ret and doing the kfree(priv).
> 

Are you sure? Because we initialize as 1, then submit and test for 0. 
Until the bio completes we're returning -EIOCBQUEUED, once it completes 
it drops a reference and atomic_read() will return 0.

BUT I just saw, it's doing a double free:

In 'btrfs_encoded_read_regular_fill_pages':
if (uring_ctx) {
          if (atomic_read(&priv->pending) == 0) {
                  ret = blk_status_to_errno(READ_ONCE(priv->status));
                  btrfs_uring_read_extent_endio(uring_ctx, ret);
                  kfree(priv);
                  return ret;
          }

          return -EIOCBQUEUED;
}

and 'btrfs_encoded_read_endio':
bio_put(&bbio->bio);
if (atomic_dec_and_test(&priv->pending)) {
         int err = blk_status_to_errno(READ_ONCE(priv->status));

         if (priv->uring_ctx) {
                 btrfs_uring_read_extent_endio(priv->uring_ctx, err);
                 kfree(priv);
Mark Harmstone Nov. 11, 2024, 6:36 p.m. UTC | #3
On 11/11/24 09:28, Johannes Thumshirn wrote:
> > 
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> Simplifiy the I/O completion path for encoded reads by using a
> completion instead of a wait_queue.
> 
> Furthermore skip taking an extra reference that is instantly
> dropped anyways.
> 
> Suggested-by: Damien Le Moal <Damien.LeMoal@wdc.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   fs/btrfs/inode.c | 12 +++++-------
>   1 file changed, 5 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index cb8b23a3e56b..916c9d7ca112 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -9068,7 +9068,7 @@ static ssize_t btrfs_encoded_read_inline(
>   }
>   
>   struct btrfs_encoded_read_private {
> -	wait_queue_head_t wait;
> +	struct completion done;
>   	void *uring_ctx;
>   	atomic_t pending;
>   	blk_status_t status;
> @@ -9097,7 +9097,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
>   			btrfs_uring_read_extent_endio(priv->uring_ctx, err);
>   			kfree(priv);
>   		} else {
> -			wake_up(&priv->wait);
> +			complete(&priv->done);
>   		}
>   	}
>   }
> @@ -9116,7 +9116,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>   	if (!priv)
>   		return -ENOMEM;
>   
> -	init_waitqueue_head(&priv->wait);
> +	init_completion(&priv->done);
>   	atomic_set(&priv->pending, 1);
>   	priv->status = 0;
>   	priv->uring_ctx = uring_ctx;
> @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>   		disk_io_size -= bytes;
>   	} while (disk_io_size);
>   
> -	atomic_inc(&priv->pending);
>   	btrfs_submit_bbio(bbio, 0);

Is this safe for the io_uring path? The bio completion calls 
btrfs_encoded_read_endio, which frees priv if it decreases pending to 0.

>   
>   	if (uring_ctx) {
> -		if (atomic_dec_return(&priv->pending) == 0) {
> +		if (atomic_read(&priv->pending) == 0) {
>   			ret = blk_status_to_errno(READ_ONCE(priv->status));
>   			btrfs_uring_read_extent_endio(uring_ctx, ret);
>   			kfree(priv);
> @@ -9158,8 +9157,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
>   
>   		return -EIOCBQUEUED;
>   	} else {
> -		if (atomic_dec_return(&priv->pending) != 0)
> -			io_wait_event(priv->wait, !atomic_read(&priv->pending));
> +		wait_for_completion(&priv->done);

Probably a nitpick, but completion.txt implies that this ought to be 
wait_for_completion_io.

>   		/* See btrfs_encoded_read_endio() for ordering. */
>   		ret = blk_status_to_errno(READ_ONCE(priv->status));
>   		kfree(priv);

Mark
diff mbox series

Patch

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cb8b23a3e56b..916c9d7ca112 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9068,7 +9068,7 @@  static ssize_t btrfs_encoded_read_inline(
 }
 
 struct btrfs_encoded_read_private {
-	wait_queue_head_t wait;
+	struct completion done;
 	void *uring_ctx;
 	atomic_t pending;
 	blk_status_t status;
@@ -9097,7 +9097,7 @@  static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
 			btrfs_uring_read_extent_endio(priv->uring_ctx, err);
 			kfree(priv);
 		} else {
-			wake_up(&priv->wait);
+			complete(&priv->done);
 		}
 	}
 }
@@ -9116,7 +9116,7 @@  int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
 	if (!priv)
 		return -ENOMEM;
 
-	init_waitqueue_head(&priv->wait);
+	init_completion(&priv->done);
 	atomic_set(&priv->pending, 1);
 	priv->status = 0;
 	priv->uring_ctx = uring_ctx;
@@ -9145,11 +9145,10 @@  int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
 		disk_io_size -= bytes;
 	} while (disk_io_size);
 
-	atomic_inc(&priv->pending);
 	btrfs_submit_bbio(bbio, 0);
 
 	if (uring_ctx) {
-		if (atomic_dec_return(&priv->pending) == 0) {
+		if (atomic_read(&priv->pending) == 0) {
 			ret = blk_status_to_errno(READ_ONCE(priv->status));
 			btrfs_uring_read_extent_endio(uring_ctx, ret);
 			kfree(priv);
@@ -9158,8 +9157,7 @@  int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
 
 		return -EIOCBQUEUED;
 	} else {
-		if (atomic_dec_return(&priv->pending) != 0)
-			io_wait_event(priv->wait, !atomic_read(&priv->pending));
+		wait_for_completion(&priv->done);
 		/* See btrfs_encoded_read_endio() for ordering. */
 		ret = blk_status_to_errno(READ_ONCE(priv->status));
 		kfree(priv);