[v2,12/15] btrfs: move log_new_dir_dentries() above btrfs_log_inode()

Message ID	a0d3441b4ae6b66dd6046533679aa947bc18e793.1661165149.git.fdmanana@suse.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-btrfs-owner@kernel.org> From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 12/15] btrfs: move log_new_dir_dentries() above btrfs_log_inode() Date: Mon, 22 Aug 2022 11:51:41 +0100 Message-Id: <a0d3441b4ae6b66dd6046533679aa947bc18e793.1661165149.git.fdmanana@suse.com> In-Reply-To: <cover.1661165149.git.fdmanana@suse.com> References: <cover.1661165149.git.fdmanana@suse.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	btrfs: some updates to delayed items and inode logging \| expand [v2,00/15] btrfs: some updates to delayed items and inode logging [v2,01/15] btrfs: don't drop dir index range items when logging a directory [v2,02/15] btrfs: remove the root argument from log_new_dir_dentries() [v2,03/15] btrfs: update stale comment for log_new_dir_dentries() [v2,04/15] btrfs: free list element sooner at log_new_dir_dentries() [v2,05/15] btrfs: avoid memory allocation at log_new_dir_dentries() for common case [v2,06/15] btrfs: remove root argument from btrfs_delayed_item_reserve_metadata() [v2,07/15] btrfs: store index number instead of key in struct btrfs_delayed_item [v2,08/15] btrfs: remove unused logic when looking up delayed items [v2,09/15] btrfs: shrink the size of struct btrfs_delayed_item [v2,10/15] btrfs: search for last logged dir index if it's not cached in the inode [v2,11/15] btrfs: move need_log_inode() to above log_conflicting_inodes() [v2,12/15] btrfs: move log_new_dir_dentries() above btrfs_log_inode() [v2,13/15] btrfs: log conflicting inodes without holding log mutex of the initial inode [v2,14/15] btrfs: skip logging parent dir when conflicting inode is not a dir [v2,15/15] btrfs: use delayed items when logging a directory

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 15a4a150cb27..94ff4a4598dc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5320,6 +5320,173 @@ static bool need_log_inode(const struct btrfs_trans_handle *trans, return true; } +struct btrfs_dir_list { + u64 ino; + struct list_head list; +}; + +/* + * Log the inodes of the new dentries of a directory. + * See process_dir_items_leaf() for details about why it is needed. + * This is a recursive operation - if an existing dentry corresponds to a + * directory, that directory's new entries are logged too (same behaviour as + * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes + * the dentries point to we do not acquire their VFS lock, otherwise lockdep + * complains about the following circular lock dependency / possible deadlock: + * + * CPU0 CPU1 + * ---- ---- + * lock(&type->i_mutex_dir_key#3/2); + * lock(sb_internal#2); + * lock(&type->i_mutex_dir_key#3/2); + * lock(&sb->s_type->i_mutex_key#14); + * + * Where sb_internal is the lock (a counter that works as a lock) acquired by + * sb_start_intwrite() in btrfs_start_transaction(). + * Not acquiring the VFS lock of the inodes is still safe because: + * + * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible + * that while logging the inode new references (names) are added or removed + * from the inode, leaving the logged inode item with a link count that does + * not match the number of logged inode reference items. This is fine because + * at log replay time we compute the real number of links and correct the + * link count in the inode item (see replay_one_buffer() and + * link_to_fixup_dir()); + * + * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that + * while logging the inode's items new index items (key type + * BTRFS_DIR_INDEX_KEY) are added to fs/subvol tree and the logged inode item + * has a size that doesn't match the sum of the lengths of all the logged + * names - this is ok, not a problem, because at log replay time we set the + * directory's i_size to the correct value (see replay_one_name() and + * do_overwrite_item()). + */ +static int log_new_dir_dentries(struct btrfs_trans_handle *trans, + struct btrfs_inode *start_inode, + struct btrfs_log_ctx *ctx) +{ + struct btrfs_root *root = start_inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_path *path; + LIST_HEAD(dir_list); + struct btrfs_dir_list *dir_elem; + u64 ino = btrfs_ino(start_inode); + int ret = 0; + + /* + * If we are logging a new name, as part of a link or rename operation, + * don't bother logging new dentries, as we just want to log the names + * of an inode and that any new parents exist. + */ + if (ctx->logging_new_name) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while (true) { + struct extent_buffer *leaf; + struct btrfs_key min_key; + bool continue_curr_inode = true; + int nritems; + int i; + + min_key.objectid = ino; + min_key.type = BTRFS_DIR_INDEX_KEY; + min_key.offset = 0; +again: + btrfs_release_path(path); + ret = btrfs_search_forward(root, &min_key, path, trans->transid); + if (ret < 0) { + break; + } else if (ret > 0) { + ret = 0; + goto next; + } + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + for (i = path->slots[0]; i < nritems; i++) { + struct btrfs_dir_item *di; + struct btrfs_key di_key; + struct inode *di_inode; + int log_mode = LOG_INODE_EXISTS; + int type; + + btrfs_item_key_to_cpu(leaf, &min_key, i); + if (min_key.objectid != ino || + min_key.type != BTRFS_DIR_INDEX_KEY) { + continue_curr_inode = false; + break; + } + + di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item); + type = btrfs_dir_type(leaf, di); + if (btrfs_dir_transid(leaf, di) < trans->transid) + continue; + btrfs_dir_item_key_to_cpu(leaf, di, &di_key); + if (di_key.type == BTRFS_ROOT_ITEM_KEY) + continue; + + btrfs_release_path(path); + di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); + if (IS_ERR(di_inode)) { + ret = PTR_ERR(di_inode); + goto out; + } + + if (!need_log_inode(trans, BTRFS_I(di_inode))) { + btrfs_add_delayed_iput(di_inode); + break; + } + + ctx->log_new_dentries = false; + if (type == BTRFS_FT_DIR) + log_mode = LOG_INODE_ALL; + ret = btrfs_log_inode(trans, BTRFS_I(di_inode), + log_mode, ctx); + btrfs_add_delayed_iput(di_inode); + if (ret) + goto out; + if (ctx->log_new_dentries) { + dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS); + if (!dir_elem) { + ret = -ENOMEM; + goto out; + } + dir_elem->ino = di_key.objectid; + list_add_tail(&dir_elem->list, &dir_list); + } + break; + } + + if (continue_curr_inode && min_key.offset < (u64)-1) { + min_key.offset++; + goto again; + } + +next: + if (list_empty(&dir_list)) + break; + + dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list, list); + ino = dir_elem->ino; + list_del(&dir_elem->list); + kfree(dir_elem); + } +out: + btrfs_free_path(path); + if (ret) { + struct btrfs_dir_list *next; + + list_for_each_entry_safe(dir_elem, next, &dir_list, list) + kfree(dir_elem); + } + + return ret; +} + struct btrfs_ino_list { u64 ino; u64 parent; @@ -5956,173 +6123,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return ret; } -struct btrfs_dir_list { - u64 ino; - struct list_head list; -}; - -/* - * Log the inodes of the new dentries of a directory. - * See process_dir_items_leaf() for details about why it is needed. - * This is a recursive operation - if an existing dentry corresponds to a - * directory, that directory's new entries are logged too (same behaviour as - * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes - * the dentries point to we do not acquire their VFS lock, otherwise lockdep - * complains about the following circular lock dependency / possible deadlock: - * - * CPU0 CPU1 - * ---- ---- - * lock(&type->i_mutex_dir_key#3/2); - * lock(sb_internal#2); - * lock(&type->i_mutex_dir_key#3/2); - * lock(&sb->s_type->i_mutex_key#14); - * - * Where sb_internal is the lock (a counter that works as a lock) acquired by - * sb_start_intwrite() in btrfs_start_transaction(). - * Not acquiring the VFS lock of the inodes is still safe because: - * - * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible - * that while logging the inode new references (names) are added or removed - * from the inode, leaving the logged inode item with a link count that does - * not match the number of logged inode reference items. This is fine because - * at log replay time we compute the real number of links and correct the - * link count in the inode item (see replay_one_buffer() and - * link_to_fixup_dir()); - * - * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that - * while logging the inode's items new index items (key type - * BTRFS_DIR_INDEX_KEY) are added to fs/subvol tree and the logged inode item - * has a size that doesn't match the sum of the lengths of all the logged - * names - this is ok, not a problem, because at log replay time we set the - * directory's i_size to the correct value (see replay_one_name() and - * do_overwrite_item()). - */ -static int log_new_dir_dentries(struct btrfs_trans_handle *trans, - struct btrfs_inode *start_inode, - struct btrfs_log_ctx *ctx) -{ - struct btrfs_root *root = start_inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_path *path; - LIST_HEAD(dir_list); - struct btrfs_dir_list *dir_elem; - u64 ino = btrfs_ino(start_inode); - int ret = 0; - - /* - * If we are logging a new name, as part of a link or rename operation, - * don't bother logging new dentries, as we just want to log the names - * of an inode and that any new parents exist. - */ - if (ctx->logging_new_name) - return 0; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - while (true) { - struct extent_buffer *leaf; - struct btrfs_key min_key; - bool continue_curr_inode = true; - int nritems; - int i; - - min_key.objectid = ino; - min_key.type = BTRFS_DIR_INDEX_KEY; - min_key.offset = 0; -again: - btrfs_release_path(path); - ret = btrfs_search_forward(root, &min_key, path, trans->transid); - if (ret < 0) { - break; - } else if (ret > 0) { - ret = 0; - goto next; - } - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - for (i = path->slots[0]; i < nritems; i++) { - struct btrfs_dir_item *di; - struct btrfs_key di_key; - struct inode *di_inode; - int log_mode = LOG_INODE_EXISTS; - int type; - - btrfs_item_key_to_cpu(leaf, &min_key, i); - if (min_key.objectid != ino || - min_key.type != BTRFS_DIR_INDEX_KEY) { - continue_curr_inode = false; - break; - } - - di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item); - type = btrfs_dir_type(leaf, di); - if (btrfs_dir_transid(leaf, di) < trans->transid) - continue; - btrfs_dir_item_key_to_cpu(leaf, di, &di_key); - if (di_key.type == BTRFS_ROOT_ITEM_KEY) - continue; - - btrfs_release_path(path); - di_inode = btrfs_iget(fs_info->sb, di_key.objectid, root); - if (IS_ERR(di_inode)) { - ret = PTR_ERR(di_inode); - goto out; - } - - if (!need_log_inode(trans, BTRFS_I(di_inode))) { - btrfs_add_delayed_iput(di_inode); - break; - } - - ctx->log_new_dentries = false; - if (type == BTRFS_FT_DIR) - log_mode = LOG_INODE_ALL; - ret = btrfs_log_inode(trans, BTRFS_I(di_inode), - log_mode, ctx); - btrfs_add_delayed_iput(di_inode); - if (ret) - goto out; - if (ctx->log_new_dentries) { - dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS); - if (!dir_elem) { - ret = -ENOMEM; - goto out; - } - dir_elem->ino = di_key.objectid; - list_add_tail(&dir_elem->list, &dir_list); - } - break; - } - - if (continue_curr_inode && min_key.offset < (u64)-1) { - min_key.offset++; - goto again; - } - -next: - if (list_empty(&dir_list)) - break; - - dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list, list); - ino = dir_elem->ino; - list_del(&dir_elem->list); - kfree(dir_elem); - } -out: - btrfs_free_path(path); - if (ret) { - struct btrfs_dir_list *next; - - list_for_each_entry_safe(dir_elem, next, &dir_list, list) - kfree(dir_elem); - } - - return ret; -} - static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx)

[v2,12/15] btrfs: move log_new_dir_dentries() above btrfs_log_inode()

Commit Message

Patch