Message ID | 22c7231a2ce9c0c7c187dff159be1c868d783765.1731316882.git.jth@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: fix use-after-free in btrfs_encoded_read_endio | expand |
On 11/11/24 18:28, Johannes Thumshirn wrote: > From: Johannes Thumshirn <johannes.thumshirn@wdc.com> > > Simplifiy the I/O completion path for encoded reads by using a > completion instead of a wait_queue. > > Furthermore skip taking an extra reference that is instantly > dropped anyways. > > Suggested-by: Damien Le Moal <Damien.LeMoal@wdc.com> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/inode.c | 12 +++++------- > 1 file changed, 5 insertions(+), 7 deletions(-) > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index cb8b23a3e56b..916c9d7ca112 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -9068,7 +9068,7 @@ static ssize_t btrfs_encoded_read_inline( > } > > struct btrfs_encoded_read_private { > - wait_queue_head_t wait; > + struct completion done; > void *uring_ctx; > atomic_t pending; > blk_status_t status; > @@ -9097,7 +9097,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) > btrfs_uring_read_extent_endio(priv->uring_ctx, err); > kfree(priv); > } else { > - wake_up(&priv->wait); > + complete(&priv->done); > } > } > } > @@ -9116,7 +9116,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, > if (!priv) > return -ENOMEM; > > - init_waitqueue_head(&priv->wait); > + init_completion(&priv->done); > atomic_set(&priv->pending, 1); > priv->status = 0; > priv->uring_ctx = uring_ctx; > @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, > disk_io_size -= bytes; > } while (disk_io_size); > > - atomic_inc(&priv->pending); > btrfs_submit_bbio(bbio, 0); > > if (uring_ctx) { > - if (atomic_dec_return(&priv->pending) == 0) { > + if (atomic_read(&priv->pending) == 0) { This does not look right... Testing this essentially means that you are doing wait_for_completion(). So I would move this hunk after the call for wait_for_completion(). That will also allow avoiding duplicating the cleanup path getting ret and doing the kfree(priv). > ret = blk_status_to_errno(READ_ONCE(priv->status)); > btrfs_uring_read_extent_endio(uring_ctx, ret); > kfree(priv); > @@ -9158,8 +9157,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, > > return -EIOCBQUEUED; > } else { > - if (atomic_dec_return(&priv->pending) != 0) > - io_wait_event(priv->wait, !atomic_read(&priv->pending)); > + wait_for_completion(&priv->done); > /* See btrfs_encoded_read_endio() for ordering. */ > ret = blk_status_to_errno(READ_ONCE(priv->status)); > kfree(priv);
On 11.11.24 10:56, Damien Le Moal wrote: >> @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, >> disk_io_size -= bytes; >> } while (disk_io_size); >> >> - atomic_inc(&priv->pending); >> btrfs_submit_bbio(bbio, 0); >> >> if (uring_ctx) { >> - if (atomic_dec_return(&priv->pending) == 0) { >> + if (atomic_read(&priv->pending) == 0) { > > This does not look right... Testing this essentially means that you are doing > wait_for_completion(). So I would move this hunk after the call for > wait_for_completion(). That will also allow avoiding duplicating the cleanup > path getting ret and doing the kfree(priv). > Are you sure? Because we initialize as 1, then submit and test for 0. Until the bio completes we're returning -EIOCBQUEUED, once it completes it drops a reference and atomic_read() will return 0. BUT I just saw, it's doing a double free: In 'btrfs_encoded_read_regular_fill_pages': if (uring_ctx) { if (atomic_read(&priv->pending) == 0) { ret = blk_status_to_errno(READ_ONCE(priv->status)); btrfs_uring_read_extent_endio(uring_ctx, ret); kfree(priv); return ret; } return -EIOCBQUEUED; } and 'btrfs_encoded_read_endio': bio_put(&bbio->bio); if (atomic_dec_and_test(&priv->pending)) { int err = blk_status_to_errno(READ_ONCE(priv->status)); if (priv->uring_ctx) { btrfs_uring_read_extent_endio(priv->uring_ctx, err); kfree(priv);
On 11/11/24 09:28, Johannes Thumshirn wrote: > > > From: Johannes Thumshirn <johannes.thumshirn@wdc.com> > > Simplifiy the I/O completion path for encoded reads by using a > completion instead of a wait_queue. > > Furthermore skip taking an extra reference that is instantly > dropped anyways. > > Suggested-by: Damien Le Moal <Damien.LeMoal@wdc.com> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/inode.c | 12 +++++------- > 1 file changed, 5 insertions(+), 7 deletions(-) > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index cb8b23a3e56b..916c9d7ca112 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -9068,7 +9068,7 @@ static ssize_t btrfs_encoded_read_inline( > } > > struct btrfs_encoded_read_private { > - wait_queue_head_t wait; > + struct completion done; > void *uring_ctx; > atomic_t pending; > blk_status_t status; > @@ -9097,7 +9097,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) > btrfs_uring_read_extent_endio(priv->uring_ctx, err); > kfree(priv); > } else { > - wake_up(&priv->wait); > + complete(&priv->done); > } > } > } > @@ -9116,7 +9116,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, > if (!priv) > return -ENOMEM; > > - init_waitqueue_head(&priv->wait); > + init_completion(&priv->done); > atomic_set(&priv->pending, 1); > priv->status = 0; > priv->uring_ctx = uring_ctx; > @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, > disk_io_size -= bytes; > } while (disk_io_size); > > - atomic_inc(&priv->pending); > btrfs_submit_bbio(bbio, 0); Is this safe for the io_uring path? The bio completion calls btrfs_encoded_read_endio, which frees priv if it decreases pending to 0. > > if (uring_ctx) { > - if (atomic_dec_return(&priv->pending) == 0) { > + if (atomic_read(&priv->pending) == 0) { > ret = blk_status_to_errno(READ_ONCE(priv->status)); > btrfs_uring_read_extent_endio(uring_ctx, ret); > kfree(priv); > @@ -9158,8 +9157,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, > > return -EIOCBQUEUED; > } else { > - if (atomic_dec_return(&priv->pending) != 0) > - io_wait_event(priv->wait, !atomic_read(&priv->pending)); > + wait_for_completion(&priv->done); Probably a nitpick, but completion.txt implies that this ought to be wait_for_completion_io. > /* See btrfs_encoded_read_endio() for ordering. */ > ret = blk_status_to_errno(READ_ONCE(priv->status)); > kfree(priv); Mark
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cb8b23a3e56b..916c9d7ca112 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9068,7 +9068,7 @@ static ssize_t btrfs_encoded_read_inline( } struct btrfs_encoded_read_private { - wait_queue_head_t wait; + struct completion done; void *uring_ctx; atomic_t pending; blk_status_t status; @@ -9097,7 +9097,7 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio) btrfs_uring_read_extent_endio(priv->uring_ctx, err); kfree(priv); } else { - wake_up(&priv->wait); + complete(&priv->done); } } } @@ -9116,7 +9116,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, if (!priv) return -ENOMEM; - init_waitqueue_head(&priv->wait); + init_completion(&priv->done); atomic_set(&priv->pending, 1); priv->status = 0; priv->uring_ctx = uring_ctx; @@ -9145,11 +9145,10 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, disk_io_size -= bytes; } while (disk_io_size); - atomic_inc(&priv->pending); btrfs_submit_bbio(bbio, 0); if (uring_ctx) { - if (atomic_dec_return(&priv->pending) == 0) { + if (atomic_read(&priv->pending) == 0) { ret = blk_status_to_errno(READ_ONCE(priv->status)); btrfs_uring_read_extent_endio(uring_ctx, ret); kfree(priv); @@ -9158,8 +9157,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, return -EIOCBQUEUED; } else { - if (atomic_dec_return(&priv->pending) != 0) - io_wait_event(priv->wait, !atomic_read(&priv->pending)); + wait_for_completion(&priv->done); /* See btrfs_encoded_read_endio() for ordering. */ ret = blk_status_to_errno(READ_ONCE(priv->status)); kfree(priv);