diff mbox series

[v2,2/3] btrfs: remove BUG() after failure to insert delayed dir index item

Message ID ee7caf888c95075685cd068d6e78f96be283b4b5.1693209858.git.fdmanana@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: updates to error path for delayed dir index insertion failure | expand

Commit Message

Filipe Manana Aug. 28, 2023, 8:06 a.m. UTC
From: Filipe Manana <fdmanana@suse.com>

Instead of calling BUG() when we fail to insert a delayed dir index item
into the delayed node's tree, we can just release all the resources we
have allocated/acquired before and return the error to the caller. This is
fine because all existing call chains undo anything they have done before
calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending
snapshots in the transaction commit path).

So remove the BUG() call and do proper error handling.

This relates to a syzbot report linked below, but does not fix it because
it only prevents hitting a BUG(), it does not fix the issue where somehow
we attempt to use twice the same index number for different index items.

Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 27 deletions(-)

Comments

Qu Wenruo Aug. 28, 2023, 8:42 a.m. UTC | #1
On 2023/8/28 16:06, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
>
> Instead of calling BUG() when we fail to insert a delayed dir index item
> into the delayed node's tree, we can just release all the resources we
> have allocated/acquired before and return the error to the caller. This is
> fine because all existing call chains undo anything they have done before
> calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending
> snapshots in the transaction commit path).
>
> So remove the BUG() call and do proper error handling.
>
> This relates to a syzbot report linked below, but does not fix it because
> it only prevents hitting a BUG(), it does not fix the issue where somehow
> we attempt to use twice the same index number for different index items.
>
> Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/
> Signed-off-by: Filipe Manana <fdmanana@suse.com>
> ---
>   fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++---------------
>   1 file changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
> index f9dae729811b..eb175ae52245 100644
> --- a/fs/btrfs/delayed-inode.c
> +++ b/fs/btrfs/delayed-inode.c
> @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
>   	btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
>   }
>
> -/* Will return 0 or -ENOMEM */
> +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
> +{
> +	struct btrfs_fs_info *fs_info = trans->fs_info;
> +	const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
> +
> +	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
> +		return;
> +
> +	/*
> +	 * Adding the new dir index item does not require touching another
> +	 * leaf, so we can release 1 unit of metadata that was previously
> +	 * reserved when starting the transaction. This applies only to
> +	 * the case where we had a transaction start and excludes the
> +	 * transaction join case (when replaying log trees).
> +	 */
> +	trace_btrfs_space_reservation(fs_info, "transaction",
> +				      trans->transid, bytes, 0);

I know this is from the old code, but shouldn't we use "-bytes" instead?

Otherwise looks fine to me.

Thanks,
Qu

> +	btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
> +	ASSERT(trans->bytes_reserved >= bytes);
> +	trans->bytes_reserved -= bytes;
> +}
> +
> +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
>   int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
>   				   const char *name, int name_len,
>   				   struct btrfs_inode *dir,
> @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
>
>   	mutex_lock(&delayed_node->mutex);
>
> +	/*
> +	 * First attempt to insert the delayed item. This is to make the error
> +	 * handling path simpler in case we fail (-EEXIST). There's no risk of
> +	 * any other task coming in and running the delayed item before we do
> +	 * the metadata space reservation below, because we are holding the
> +	 * delayed node's mutex and that mutex must also be locked before the
> +	 * node's delayed items can be run.
> +	 */
> +	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
> +	if (unlikely(ret)) {
> +		btrfs_err(trans->fs_info,
> +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
> +			  name_len, name, index, btrfs_root_id(delayed_node->root),
> +			  delayed_node->inode_id, dir->index_cnt,
> +			  delayed_node->index_cnt, ret);
> +		btrfs_release_delayed_item(delayed_item);
> +		btrfs_release_dir_index_item_space(trans); > +		mutex_unlock(&delayed_node->mutex);
> +		goto release_node;
> +	}
> +
>   	if (delayed_node->index_item_leaves == 0 ||
>   	    delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
>   		delayed_node->curr_index_batch_size = data_len;
> @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
>   		 * impossible.
>   		 */
>   		if (WARN_ON(ret)) {
> -			mutex_unlock(&delayed_node->mutex);
>   			btrfs_release_delayed_item(delayed_item);
> +			mutex_unlock(&delayed_node->mutex);
>   			goto release_node;
>   		}
>
>   		delayed_node->index_item_leaves++;
> -	} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
> -		const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
> -
> -		/*
> -		 * Adding the new dir index item does not require touching another
> -		 * leaf, so we can release 1 unit of metadata that was previously
> -		 * reserved when starting the transaction. This applies only to
> -		 * the case where we had a transaction start and excludes the
> -		 * transaction join case (when replaying log trees).
> -		 */
> -		trace_btrfs_space_reservation(fs_info, "transaction",
> -					      trans->transid, bytes, 0);
> -		btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
> -		ASSERT(trans->bytes_reserved >= bytes);
> -		trans->bytes_reserved -= bytes;
> -	}
> -
> -	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
> -	if (unlikely(ret)) {
> -		btrfs_err(trans->fs_info,
> -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
> -			  name_len, name, index, btrfs_root_id(delayed_node->root),
> -			  delayed_node->inode_id, dir->index_cnt,
> -			  delayed_node->index_cnt, ret);
> -		BUG();
> +	} else {
> +		btrfs_release_dir_index_item_space(trans);
>   	}
>   	mutex_unlock(&delayed_node->mutex);
>
Filipe Manana Aug. 28, 2023, 8:53 a.m. UTC | #2
On Mon, Aug 28, 2023 at 9:42 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> On 2023/8/28 16:06, fdmanana@kernel.org wrote:
> > From: Filipe Manana <fdmanana@suse.com>
> >
> > Instead of calling BUG() when we fail to insert a delayed dir index item
> > into the delayed node's tree, we can just release all the resources we
> > have allocated/acquired before and return the error to the caller. This is
> > fine because all existing call chains undo anything they have done before
> > calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending
> > snapshots in the transaction commit path).
> >
> > So remove the BUG() call and do proper error handling.
> >
> > This relates to a syzbot report linked below, but does not fix it because
> > it only prevents hitting a BUG(), it does not fix the issue where somehow
> > we attempt to use twice the same index number for different index items.
> >
> > Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/
> > Signed-off-by: Filipe Manana <fdmanana@suse.com>
> > ---
> >   fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++---------------
> >   1 file changed, 47 insertions(+), 27 deletions(-)
> >
> > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
> > index f9dae729811b..eb175ae52245 100644
> > --- a/fs/btrfs/delayed-inode.c
> > +++ b/fs/btrfs/delayed-inode.c
> > @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
> >       btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
> >   }
> >
> > -/* Will return 0 or -ENOMEM */
> > +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
> > +{
> > +     struct btrfs_fs_info *fs_info = trans->fs_info;
> > +     const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
> > +
> > +     if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
> > +             return;
> > +
> > +     /*
> > +      * Adding the new dir index item does not require touching another
> > +      * leaf, so we can release 1 unit of metadata that was previously
> > +      * reserved when starting the transaction. This applies only to
> > +      * the case where we had a transaction start and excludes the
> > +      * transaction join case (when replaying log trees).
> > +      */
> > +     trace_btrfs_space_reservation(fs_info, "transaction",
> > +                                   trans->transid, bytes, 0);
>
> I know this is from the old code, but shouldn't we use "-bytes" instead?

Nop.
The last argument, a value of 0, indicates that it's a space release.
Allocations get a value of 1 for that last argument.


>
> Otherwise looks fine to me.
>
> Thanks,
> Qu
>
> > +     btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
> > +     ASSERT(trans->bytes_reserved >= bytes);
> > +     trans->bytes_reserved -= bytes;
> > +}
> > +
> > +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
> >   int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
> >                                  const char *name, int name_len,
> >                                  struct btrfs_inode *dir,
> > @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
> >
> >       mutex_lock(&delayed_node->mutex);
> >
> > +     /*
> > +      * First attempt to insert the delayed item. This is to make the error
> > +      * handling path simpler in case we fail (-EEXIST). There's no risk of
> > +      * any other task coming in and running the delayed item before we do
> > +      * the metadata space reservation below, because we are holding the
> > +      * delayed node's mutex and that mutex must also be locked before the
> > +      * node's delayed items can be run.
> > +      */
> > +     ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
> > +     if (unlikely(ret)) {
> > +             btrfs_err(trans->fs_info,
> > +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
> > +                       name_len, name, index, btrfs_root_id(delayed_node->root),
> > +                       delayed_node->inode_id, dir->index_cnt,
> > +                       delayed_node->index_cnt, ret);
> > +             btrfs_release_delayed_item(delayed_item);
> > +             btrfs_release_dir_index_item_space(trans); > +          mutex_unlock(&delayed_node->mutex);
> > +             goto release_node;
> > +     }
> > +
> >       if (delayed_node->index_item_leaves == 0 ||
> >           delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
> >               delayed_node->curr_index_batch_size = data_len;
> > @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
> >                * impossible.
> >                */
> >               if (WARN_ON(ret)) {
> > -                     mutex_unlock(&delayed_node->mutex);
> >                       btrfs_release_delayed_item(delayed_item);
> > +                     mutex_unlock(&delayed_node->mutex);
> >                       goto release_node;
> >               }
> >
> >               delayed_node->index_item_leaves++;
> > -     } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
> > -             const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
> > -
> > -             /*
> > -              * Adding the new dir index item does not require touching another
> > -              * leaf, so we can release 1 unit of metadata that was previously
> > -              * reserved when starting the transaction. This applies only to
> > -              * the case where we had a transaction start and excludes the
> > -              * transaction join case (when replaying log trees).
> > -              */
> > -             trace_btrfs_space_reservation(fs_info, "transaction",
> > -                                           trans->transid, bytes, 0);
> > -             btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
> > -             ASSERT(trans->bytes_reserved >= bytes);
> > -             trans->bytes_reserved -= bytes;
> > -     }
> > -
> > -     ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
> > -     if (unlikely(ret)) {
> > -             btrfs_err(trans->fs_info,
> > -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
> > -                       name_len, name, index, btrfs_root_id(delayed_node->root),
> > -                       delayed_node->inode_id, dir->index_cnt,
> > -                       delayed_node->index_cnt, ret);
> > -             BUG();
> > +     } else {
> > +             btrfs_release_dir_index_item_space(trans);
> >       }
> >       mutex_unlock(&delayed_node->mutex);
> >
Qu Wenruo Aug. 28, 2023, 9:18 a.m. UTC | #3
On 2023/8/28 16:53, Filipe Manana wrote:
> On Mon, Aug 28, 2023 at 9:42 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>>
>>
>>
>> On 2023/8/28 16:06, fdmanana@kernel.org wrote:
>>> From: Filipe Manana <fdmanana@suse.com>
>>>
>>> Instead of calling BUG() when we fail to insert a delayed dir index item
>>> into the delayed node's tree, we can just release all the resources we
>>> have allocated/acquired before and return the error to the caller. This is
>>> fine because all existing call chains undo anything they have done before
>>> calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending
>>> snapshots in the transaction commit path).
>>>
>>> So remove the BUG() call and do proper error handling.
>>>
>>> This relates to a syzbot report linked below, but does not fix it because
>>> it only prevents hitting a BUG(), it does not fix the issue where somehow
>>> we attempt to use twice the same index number for different index items.
>>>
>>> Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/
>>> Signed-off-by: Filipe Manana <fdmanana@suse.com>
>>> ---
>>>    fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++---------------
>>>    1 file changed, 47 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
>>> index f9dae729811b..eb175ae52245 100644
>>> --- a/fs/btrfs/delayed-inode.c
>>> +++ b/fs/btrfs/delayed-inode.c
>>> @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
>>>        btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
>>>    }
>>>
>>> -/* Will return 0 or -ENOMEM */
>>> +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
>>> +{
>>> +     struct btrfs_fs_info *fs_info = trans->fs_info;
>>> +     const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
>>> +
>>> +     if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
>>> +             return;
>>> +
>>> +     /*
>>> +      * Adding the new dir index item does not require touching another
>>> +      * leaf, so we can release 1 unit of metadata that was previously
>>> +      * reserved when starting the transaction. This applies only to
>>> +      * the case where we had a transaction start and excludes the
>>> +      * transaction join case (when replaying log trees).
>>> +      */
>>> +     trace_btrfs_space_reservation(fs_info, "transaction",
>>> +                                   trans->transid, bytes, 0);
>>
>> I know this is from the old code, but shouldn't we use "-bytes" instead?
>
> Nop.
> The last argument, a value of 0, indicates that it's a space release.
> Allocations get a value of 1 for that last argument.

Oh, my bad.

Then it looks good to me.

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu

>
>
>>
>> Otherwise looks fine to me.
>>
>> Thanks,
>> Qu
>>
>>> +     btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
>>> +     ASSERT(trans->bytes_reserved >= bytes);
>>> +     trans->bytes_reserved -= bytes;
>>> +}
>>> +
>>> +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
>>>    int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
>>>                                   const char *name, int name_len,
>>>                                   struct btrfs_inode *dir,
>>> @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
>>>
>>>        mutex_lock(&delayed_node->mutex);
>>>
>>> +     /*
>>> +      * First attempt to insert the delayed item. This is to make the error
>>> +      * handling path simpler in case we fail (-EEXIST). There's no risk of
>>> +      * any other task coming in and running the delayed item before we do
>>> +      * the metadata space reservation below, because we are holding the
>>> +      * delayed node's mutex and that mutex must also be locked before the
>>> +      * node's delayed items can be run.
>>> +      */
>>> +     ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
>>> +     if (unlikely(ret)) {
>>> +             btrfs_err(trans->fs_info,
>>> +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
>>> +                       name_len, name, index, btrfs_root_id(delayed_node->root),
>>> +                       delayed_node->inode_id, dir->index_cnt,
>>> +                       delayed_node->index_cnt, ret);
>>> +             btrfs_release_delayed_item(delayed_item);
>>> +             btrfs_release_dir_index_item_space(trans); > +          mutex_unlock(&delayed_node->mutex);
>>> +             goto release_node;
>>> +     }
>>> +
>>>        if (delayed_node->index_item_leaves == 0 ||
>>>            delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
>>>                delayed_node->curr_index_batch_size = data_len;
>>> @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
>>>                 * impossible.
>>>                 */
>>>                if (WARN_ON(ret)) {
>>> -                     mutex_unlock(&delayed_node->mutex);
>>>                        btrfs_release_delayed_item(delayed_item);
>>> +                     mutex_unlock(&delayed_node->mutex);
>>>                        goto release_node;
>>>                }
>>>
>>>                delayed_node->index_item_leaves++;
>>> -     } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
>>> -             const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
>>> -
>>> -             /*
>>> -              * Adding the new dir index item does not require touching another
>>> -              * leaf, so we can release 1 unit of metadata that was previously
>>> -              * reserved when starting the transaction. This applies only to
>>> -              * the case where we had a transaction start and excludes the
>>> -              * transaction join case (when replaying log trees).
>>> -              */
>>> -             trace_btrfs_space_reservation(fs_info, "transaction",
>>> -                                           trans->transid, bytes, 0);
>>> -             btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
>>> -             ASSERT(trans->bytes_reserved >= bytes);
>>> -             trans->bytes_reserved -= bytes;
>>> -     }
>>> -
>>> -     ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
>>> -     if (unlikely(ret)) {
>>> -             btrfs_err(trans->fs_info,
>>> -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
>>> -                       name_len, name, index, btrfs_root_id(delayed_node->root),
>>> -                       delayed_node->inode_id, dir->index_cnt,
>>> -                       delayed_node->index_cnt, ret);
>>> -             BUG();
>>> +     } else {
>>> +             btrfs_release_dir_index_item_space(trans);
>>>        }
>>>        mutex_unlock(&delayed_node->mutex);
>>>
diff mbox series

Patch

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f9dae729811b..eb175ae52245 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1413,7 +1413,29 @@  void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
 	btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
 }
 
-/* Will return 0 or -ENOMEM */
+static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+	const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
+
+	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
+		return;
+
+	/*
+	 * Adding the new dir index item does not require touching another
+	 * leaf, so we can release 1 unit of metadata that was previously
+	 * reserved when starting the transaction. This applies only to
+	 * the case where we had a transaction start and excludes the
+	 * transaction join case (when replaying log trees).
+	 */
+	trace_btrfs_space_reservation(fs_info, "transaction",
+				      trans->transid, bytes, 0);
+	btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
+	ASSERT(trans->bytes_reserved >= bytes);
+	trans->bytes_reserved -= bytes;
+}
+
+/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 				   const char *name, int name_len,
 				   struct btrfs_inode *dir,
@@ -1455,6 +1477,27 @@  int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 
 	mutex_lock(&delayed_node->mutex);
 
+	/*
+	 * First attempt to insert the delayed item. This is to make the error
+	 * handling path simpler in case we fail (-EEXIST). There's no risk of
+	 * any other task coming in and running the delayed item before we do
+	 * the metadata space reservation below, because we are holding the
+	 * delayed node's mutex and that mutex must also be locked before the
+	 * node's delayed items can be run.
+	 */
+	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
+	if (unlikely(ret)) {
+		btrfs_err(trans->fs_info,
+"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
+			  name_len, name, index, btrfs_root_id(delayed_node->root),
+			  delayed_node->inode_id, dir->index_cnt,
+			  delayed_node->index_cnt, ret);
+		btrfs_release_delayed_item(delayed_item);
+		btrfs_release_dir_index_item_space(trans);
+		mutex_unlock(&delayed_node->mutex);
+		goto release_node;
+	}
+
 	if (delayed_node->index_item_leaves == 0 ||
 	    delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
 		delayed_node->curr_index_batch_size = data_len;
@@ -1472,37 +1515,14 @@  int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 		 * impossible.
 		 */
 		if (WARN_ON(ret)) {
-			mutex_unlock(&delayed_node->mutex);
 			btrfs_release_delayed_item(delayed_item);
+			mutex_unlock(&delayed_node->mutex);
 			goto release_node;
 		}
 
 		delayed_node->index_item_leaves++;
-	} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
-		const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
-
-		/*
-		 * Adding the new dir index item does not require touching another
-		 * leaf, so we can release 1 unit of metadata that was previously
-		 * reserved when starting the transaction. This applies only to
-		 * the case where we had a transaction start and excludes the
-		 * transaction join case (when replaying log trees).
-		 */
-		trace_btrfs_space_reservation(fs_info, "transaction",
-					      trans->transid, bytes, 0);
-		btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
-		ASSERT(trans->bytes_reserved >= bytes);
-		trans->bytes_reserved -= bytes;
-	}
-
-	ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
-	if (unlikely(ret)) {
-		btrfs_err(trans->fs_info,
-"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
-			  name_len, name, index, btrfs_root_id(delayed_node->root),
-			  delayed_node->inode_id, dir->index_cnt,
-			  delayed_node->index_cnt, ret);
-		BUG();
+	} else {
+		btrfs_release_dir_index_item_space(trans);
 	}
 	mutex_unlock(&delayed_node->mutex);