Message ID | ee7caf888c95075685cd068d6e78f96be283b4b5.1693209858.git.fdmanana@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: updates to error path for delayed dir index insertion failure | expand |
On 2023/8/28 16:06, fdmanana@kernel.org wrote: > From: Filipe Manana <fdmanana@suse.com> > > Instead of calling BUG() when we fail to insert a delayed dir index item > into the delayed node's tree, we can just release all the resources we > have allocated/acquired before and return the error to the caller. This is > fine because all existing call chains undo anything they have done before > calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending > snapshots in the transaction commit path). > > So remove the BUG() call and do proper error handling. > > This relates to a syzbot report linked below, but does not fix it because > it only prevents hitting a BUG(), it does not fix the issue where somehow > we attempt to use twice the same index number for different index items. > > Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ > Signed-off-by: Filipe Manana <fdmanana@suse.com> > --- > fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++--------------- > 1 file changed, 47 insertions(+), 27 deletions(-) > > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c > index f9dae729811b..eb175ae52245 100644 > --- a/fs/btrfs/delayed-inode.c > +++ b/fs/btrfs/delayed-inode.c > @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) > btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); > } > > -/* Will return 0 or -ENOMEM */ > +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans) > +{ > + struct btrfs_fs_info *fs_info = trans->fs_info; > + const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); > + > + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) > + return; > + > + /* > + * Adding the new dir index item does not require touching another > + * leaf, so we can release 1 unit of metadata that was previously > + * reserved when starting the transaction. This applies only to > + * the case where we had a transaction start and excludes the > + * transaction join case (when replaying log trees). > + */ > + trace_btrfs_space_reservation(fs_info, "transaction", > + trans->transid, bytes, 0); I know this is from the old code, but shouldn't we use "-bytes" instead? Otherwise looks fine to me. Thanks, Qu > + btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); > + ASSERT(trans->bytes_reserved >= bytes); > + trans->bytes_reserved -= bytes; > +} > + > +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */ > int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, > const char *name, int name_len, > struct btrfs_inode *dir, > @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, > > mutex_lock(&delayed_node->mutex); > > + /* > + * First attempt to insert the delayed item. This is to make the error > + * handling path simpler in case we fail (-EEXIST). There's no risk of > + * any other task coming in and running the delayed item before we do > + * the metadata space reservation below, because we are holding the > + * delayed node's mutex and that mutex must also be locked before the > + * node's delayed items can be run. > + */ > + ret = __btrfs_add_delayed_item(delayed_node, delayed_item); > + if (unlikely(ret)) { > + btrfs_err(trans->fs_info, > +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", > + name_len, name, index, btrfs_root_id(delayed_node->root), > + delayed_node->inode_id, dir->index_cnt, > + delayed_node->index_cnt, ret); > + btrfs_release_delayed_item(delayed_item); > + btrfs_release_dir_index_item_space(trans); > + mutex_unlock(&delayed_node->mutex); > + goto release_node; > + } > + > if (delayed_node->index_item_leaves == 0 || > delayed_node->curr_index_batch_size + data_len > leaf_data_size) { > delayed_node->curr_index_batch_size = data_len; > @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, > * impossible. > */ > if (WARN_ON(ret)) { > - mutex_unlock(&delayed_node->mutex); > btrfs_release_delayed_item(delayed_item); > + mutex_unlock(&delayed_node->mutex); > goto release_node; > } > > delayed_node->index_item_leaves++; > - } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { > - const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); > - > - /* > - * Adding the new dir index item does not require touching another > - * leaf, so we can release 1 unit of metadata that was previously > - * reserved when starting the transaction. This applies only to > - * the case where we had a transaction start and excludes the > - * transaction join case (when replaying log trees). > - */ > - trace_btrfs_space_reservation(fs_info, "transaction", > - trans->transid, bytes, 0); > - btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); > - ASSERT(trans->bytes_reserved >= bytes); > - trans->bytes_reserved -= bytes; > - } > - > - ret = __btrfs_add_delayed_item(delayed_node, delayed_item); > - if (unlikely(ret)) { > - btrfs_err(trans->fs_info, > -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", > - name_len, name, index, btrfs_root_id(delayed_node->root), > - delayed_node->inode_id, dir->index_cnt, > - delayed_node->index_cnt, ret); > - BUG(); > + } else { > + btrfs_release_dir_index_item_space(trans); > } > mutex_unlock(&delayed_node->mutex); >
On Mon, Aug 28, 2023 at 9:42 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: > > > > On 2023/8/28 16:06, fdmanana@kernel.org wrote: > > From: Filipe Manana <fdmanana@suse.com> > > > > Instead of calling BUG() when we fail to insert a delayed dir index item > > into the delayed node's tree, we can just release all the resources we > > have allocated/acquired before and return the error to the caller. This is > > fine because all existing call chains undo anything they have done before > > calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending > > snapshots in the transaction commit path). > > > > So remove the BUG() call and do proper error handling. > > > > This relates to a syzbot report linked below, but does not fix it because > > it only prevents hitting a BUG(), it does not fix the issue where somehow > > we attempt to use twice the same index number for different index items. > > > > Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ > > Signed-off-by: Filipe Manana <fdmanana@suse.com> > > --- > > fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++--------------- > > 1 file changed, 47 insertions(+), 27 deletions(-) > > > > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c > > index f9dae729811b..eb175ae52245 100644 > > --- a/fs/btrfs/delayed-inode.c > > +++ b/fs/btrfs/delayed-inode.c > > @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) > > btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); > > } > > > > -/* Will return 0 or -ENOMEM */ > > +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans) > > +{ > > + struct btrfs_fs_info *fs_info = trans->fs_info; > > + const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); > > + > > + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) > > + return; > > + > > + /* > > + * Adding the new dir index item does not require touching another > > + * leaf, so we can release 1 unit of metadata that was previously > > + * reserved when starting the transaction. This applies only to > > + * the case where we had a transaction start and excludes the > > + * transaction join case (when replaying log trees). > > + */ > > + trace_btrfs_space_reservation(fs_info, "transaction", > > + trans->transid, bytes, 0); > > I know this is from the old code, but shouldn't we use "-bytes" instead? Nop. The last argument, a value of 0, indicates that it's a space release. Allocations get a value of 1 for that last argument. > > Otherwise looks fine to me. > > Thanks, > Qu > > > + btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); > > + ASSERT(trans->bytes_reserved >= bytes); > > + trans->bytes_reserved -= bytes; > > +} > > + > > +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */ > > int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, > > const char *name, int name_len, > > struct btrfs_inode *dir, > > @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, > > > > mutex_lock(&delayed_node->mutex); > > > > + /* > > + * First attempt to insert the delayed item. This is to make the error > > + * handling path simpler in case we fail (-EEXIST). There's no risk of > > + * any other task coming in and running the delayed item before we do > > + * the metadata space reservation below, because we are holding the > > + * delayed node's mutex and that mutex must also be locked before the > > + * node's delayed items can be run. > > + */ > > + ret = __btrfs_add_delayed_item(delayed_node, delayed_item); > > + if (unlikely(ret)) { > > + btrfs_err(trans->fs_info, > > +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", > > + name_len, name, index, btrfs_root_id(delayed_node->root), > > + delayed_node->inode_id, dir->index_cnt, > > + delayed_node->index_cnt, ret); > > + btrfs_release_delayed_item(delayed_item); > > + btrfs_release_dir_index_item_space(trans); > + mutex_unlock(&delayed_node->mutex); > > + goto release_node; > > + } > > + > > if (delayed_node->index_item_leaves == 0 || > > delayed_node->curr_index_batch_size + data_len > leaf_data_size) { > > delayed_node->curr_index_batch_size = data_len; > > @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, > > * impossible. > > */ > > if (WARN_ON(ret)) { > > - mutex_unlock(&delayed_node->mutex); > > btrfs_release_delayed_item(delayed_item); > > + mutex_unlock(&delayed_node->mutex); > > goto release_node; > > } > > > > delayed_node->index_item_leaves++; > > - } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { > > - const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); > > - > > - /* > > - * Adding the new dir index item does not require touching another > > - * leaf, so we can release 1 unit of metadata that was previously > > - * reserved when starting the transaction. This applies only to > > - * the case where we had a transaction start and excludes the > > - * transaction join case (when replaying log trees). > > - */ > > - trace_btrfs_space_reservation(fs_info, "transaction", > > - trans->transid, bytes, 0); > > - btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); > > - ASSERT(trans->bytes_reserved >= bytes); > > - trans->bytes_reserved -= bytes; > > - } > > - > > - ret = __btrfs_add_delayed_item(delayed_node, delayed_item); > > - if (unlikely(ret)) { > > - btrfs_err(trans->fs_info, > > -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", > > - name_len, name, index, btrfs_root_id(delayed_node->root), > > - delayed_node->inode_id, dir->index_cnt, > > - delayed_node->index_cnt, ret); > > - BUG(); > > + } else { > > + btrfs_release_dir_index_item_space(trans); > > } > > mutex_unlock(&delayed_node->mutex); > >
On 2023/8/28 16:53, Filipe Manana wrote: > On Mon, Aug 28, 2023 at 9:42 AM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote: >> >> >> >> On 2023/8/28 16:06, fdmanana@kernel.org wrote: >>> From: Filipe Manana <fdmanana@suse.com> >>> >>> Instead of calling BUG() when we fail to insert a delayed dir index item >>> into the delayed node's tree, we can just release all the resources we >>> have allocated/acquired before and return the error to the caller. This is >>> fine because all existing call chains undo anything they have done before >>> calling btrfs_insert_delayed_dir_index() or BUG_ON (when creating pending >>> snapshots in the transaction commit path). >>> >>> So remove the BUG() call and do proper error handling. >>> >>> This relates to a syzbot report linked below, but does not fix it because >>> it only prevents hitting a BUG(), it does not fix the issue where somehow >>> we attempt to use twice the same index number for different index items. >>> >>> Link: https://lore.kernel.org/linux-btrfs/00000000000036e1290603e097e0@google.com/ >>> Signed-off-by: Filipe Manana <fdmanana@suse.com> >>> --- >>> fs/btrfs/delayed-inode.c | 74 +++++++++++++++++++++++++--------------- >>> 1 file changed, 47 insertions(+), 27 deletions(-) >>> >>> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c >>> index f9dae729811b..eb175ae52245 100644 >>> --- a/fs/btrfs/delayed-inode.c >>> +++ b/fs/btrfs/delayed-inode.c >>> @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) >>> btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); >>> } >>> >>> -/* Will return 0 or -ENOMEM */ >>> +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans) >>> +{ >>> + struct btrfs_fs_info *fs_info = trans->fs_info; >>> + const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); >>> + >>> + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) >>> + return; >>> + >>> + /* >>> + * Adding the new dir index item does not require touching another >>> + * leaf, so we can release 1 unit of metadata that was previously >>> + * reserved when starting the transaction. This applies only to >>> + * the case where we had a transaction start and excludes the >>> + * transaction join case (when replaying log trees). >>> + */ >>> + trace_btrfs_space_reservation(fs_info, "transaction", >>> + trans->transid, bytes, 0); >> >> I know this is from the old code, but shouldn't we use "-bytes" instead? > > Nop. > The last argument, a value of 0, indicates that it's a space release. > Allocations get a value of 1 for that last argument. Oh, my bad. Then it looks good to me. Reviewed-by: Qu Wenruo <wqu@suse.com> Thanks, Qu > > >> >> Otherwise looks fine to me. >> >> Thanks, >> Qu >> >>> + btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); >>> + ASSERT(trans->bytes_reserved >= bytes); >>> + trans->bytes_reserved -= bytes; >>> +} >>> + >>> +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */ >>> int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, >>> const char *name, int name_len, >>> struct btrfs_inode *dir, >>> @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, >>> >>> mutex_lock(&delayed_node->mutex); >>> >>> + /* >>> + * First attempt to insert the delayed item. This is to make the error >>> + * handling path simpler in case we fail (-EEXIST). There's no risk of >>> + * any other task coming in and running the delayed item before we do >>> + * the metadata space reservation below, because we are holding the >>> + * delayed node's mutex and that mutex must also be locked before the >>> + * node's delayed items can be run. >>> + */ >>> + ret = __btrfs_add_delayed_item(delayed_node, delayed_item); >>> + if (unlikely(ret)) { >>> + btrfs_err(trans->fs_info, >>> +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", >>> + name_len, name, index, btrfs_root_id(delayed_node->root), >>> + delayed_node->inode_id, dir->index_cnt, >>> + delayed_node->index_cnt, ret); >>> + btrfs_release_delayed_item(delayed_item); >>> + btrfs_release_dir_index_item_space(trans); > + mutex_unlock(&delayed_node->mutex); >>> + goto release_node; >>> + } >>> + >>> if (delayed_node->index_item_leaves == 0 || >>> delayed_node->curr_index_batch_size + data_len > leaf_data_size) { >>> delayed_node->curr_index_batch_size = data_len; >>> @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, >>> * impossible. >>> */ >>> if (WARN_ON(ret)) { >>> - mutex_unlock(&delayed_node->mutex); >>> btrfs_release_delayed_item(delayed_item); >>> + mutex_unlock(&delayed_node->mutex); >>> goto release_node; >>> } >>> >>> delayed_node->index_item_leaves++; >>> - } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { >>> - const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); >>> - >>> - /* >>> - * Adding the new dir index item does not require touching another >>> - * leaf, so we can release 1 unit of metadata that was previously >>> - * reserved when starting the transaction. This applies only to >>> - * the case where we had a transaction start and excludes the >>> - * transaction join case (when replaying log trees). >>> - */ >>> - trace_btrfs_space_reservation(fs_info, "transaction", >>> - trans->transid, bytes, 0); >>> - btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); >>> - ASSERT(trans->bytes_reserved >= bytes); >>> - trans->bytes_reserved -= bytes; >>> - } >>> - >>> - ret = __btrfs_add_delayed_item(delayed_node, delayed_item); >>> - if (unlikely(ret)) { >>> - btrfs_err(trans->fs_info, >>> -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", >>> - name_len, name, index, btrfs_root_id(delayed_node->root), >>> - delayed_node->inode_id, dir->index_cnt, >>> - delayed_node->index_cnt, ret); >>> - BUG(); >>> + } else { >>> + btrfs_release_dir_index_item_space(trans); >>> } >>> mutex_unlock(&delayed_node->mutex); >>>
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index f9dae729811b..eb175ae52245 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1413,7 +1413,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); } -/* Will return 0 or -ENOMEM */ +static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); + + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) + return; + + /* + * Adding the new dir index item does not require touching another + * leaf, so we can release 1 unit of metadata that was previously + * reserved when starting the transaction. This applies only to + * the case where we had a transaction start and excludes the + * transaction join case (when replaying log trees). + */ + trace_btrfs_space_reservation(fs_info, "transaction", + trans->transid, bytes, 0); + btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); + ASSERT(trans->bytes_reserved >= bytes); + trans->bytes_reserved -= bytes; +} + +/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, const char *name, int name_len, struct btrfs_inode *dir, @@ -1455,6 +1477,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); + /* + * First attempt to insert the delayed item. This is to make the error + * handling path simpler in case we fail (-EEXIST). There's no risk of + * any other task coming in and running the delayed item before we do + * the metadata space reservation below, because we are holding the + * delayed node's mutex and that mutex must also be locked before the + * node's delayed items can be run. + */ + ret = __btrfs_add_delayed_item(delayed_node, delayed_item); + if (unlikely(ret)) { + btrfs_err(trans->fs_info, +"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", + name_len, name, index, btrfs_root_id(delayed_node->root), + delayed_node->inode_id, dir->index_cnt, + delayed_node->index_cnt, ret); + btrfs_release_delayed_item(delayed_item); + btrfs_release_dir_index_item_space(trans); + mutex_unlock(&delayed_node->mutex); + goto release_node; + } + if (delayed_node->index_item_leaves == 0 || delayed_node->curr_index_batch_size + data_len > leaf_data_size) { delayed_node->curr_index_batch_size = data_len; @@ -1472,37 +1515,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, * impossible. */ if (WARN_ON(ret)) { - mutex_unlock(&delayed_node->mutex); btrfs_release_delayed_item(delayed_item); + mutex_unlock(&delayed_node->mutex); goto release_node; } delayed_node->index_item_leaves++; - } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { - const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); - - /* - * Adding the new dir index item does not require touching another - * leaf, so we can release 1 unit of metadata that was previously - * reserved when starting the transaction. This applies only to - * the case where we had a transaction start and excludes the - * transaction join case (when replaying log trees). - */ - trace_btrfs_space_reservation(fs_info, "transaction", - trans->transid, bytes, 0); - btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL); - ASSERT(trans->bytes_reserved >= bytes); - trans->bytes_reserved -= bytes; - } - - ret = __btrfs_add_delayed_item(delayed_node, delayed_item); - if (unlikely(ret)) { - btrfs_err(trans->fs_info, -"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d", - name_len, name, index, btrfs_root_id(delayed_node->root), - delayed_node->inode_id, dir->index_cnt, - delayed_node->index_cnt, ret); - BUG(); + } else { + btrfs_release_dir_index_item_space(trans); } mutex_unlock(&delayed_node->mutex);