From patchwork Fri Sep 8 17:20:18 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377726 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 771DDEE49BF for ; Fri, 8 Sep 2023 17:20:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S244570AbjIHRUr (ORCPT ); Fri, 8 Sep 2023 13:20:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51448 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244475AbjIHRUq (ORCPT ); Fri, 8 Sep 2023 13:20:46 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3DB0F199F for ; Fri, 8 Sep 2023 10:20:43 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 571D7C433C8 for ; Fri, 8 Sep 2023 17:20:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193642; bh=eX/m8ZC4DKItBFwkr5XLXzmlOp6ggKJZZzQhndSBjB8=; h=From:To:Subject:Date:In-Reply-To:References:From; b=oeasi+XNScDvSxKhcVYxm9JSwmLNQdErkuSjbs78GlUpV483lj8GG9mylTJ9vPvuB vQXjwYawEqbTPNVAJPu94R/3TiTBdWeLFcNE/jMMOYWSyxvQAB5F8ltqJIuaJj1Jgu 9wAYbzjVi2jZ50Kv/tz5LFXlwn6U1WBtuj8b/j9ETuOrNXqh+eDT3LB194RbQDVna1 LsaO8Sv9MJ3e7Hrs/Ps85dC9O5dFxUM0bodIyGoCcAvkSun8A/ERuQElpNs4wKT5+0 Sn/bxe2CTtQ6ZPS7/P5/Djr2I943xzh9tt8HGLgLO/4msyaWXvAQYD7W6IDT2d5j0m D+l/g1uKEcQmA== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 01/21] btrfs: fix race when refilling delayed refs block reserve Date: Fri, 8 Sep 2023 18:20:18 +0100 Message-Id: <54f5c6756b3f9a25e4db3131c2a1c75920ad2911.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana If we have two (or more) tasks attempting to refill the delayed refs block reserve we can end up with the delayed block reserve being over reserved, that is, with a reserved space greater than its size. If this happens, we are holding to more reserved space than necessary for a while. The race happens like this: 1) The delayed refs block reserve has a size of 8M and a reserved space of 6M for example; 2) Task A calls btrfs_delayed_refs_rsv_refill(); 3) Task B also calls btrfs_delayed_refs_rsv_refill(); 4) Task A sees there's a 2M difference between the size and the reserved space of the delayed refs rsv, so it will reserve 2M of space by calling btrfs_reserve_metadata_bytes(); 5) Task B also sees that 2M difference, and like task A, it reserves another 2M of metadata space; 6) Both task A and task B increase the reserved space of block reserve by 2M, by calling btrfs_block_rsv_add_bytes(), so the block reserve ends up with a size of 8M and a reserved space of 10M; 7) The extra, over reserved space will eventually be freed by some task calling btrfs_delayed_refs_rsv_release() -> btrfs_block_rsv_release() -> block_rsv_release_bytes(), as there we will detect the over reserve and release that space. So fix this by checking if we still need to add space to the delayed refs block reserve after reserving the metadata space, and if we don't, just release that space immediately. Signed-off-by: Filipe Manana --- fs/btrfs/delayed-ref.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6a13cf00218b..1043f66cc130 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -163,6 +163,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1); u64 num_bytes = 0; + u64 refilled_bytes; + u64 to_free; int ret = -ENOSPC; spin_lock(&block_rsv->lock); @@ -178,9 +180,38 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); if (ret) return ret; - btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); - trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, num_bytes, 1); + + /* + * We may have raced with someone else, so check again if we the block + * reserve is still not full and release any excess space. + */ + spin_lock(&block_rsv->lock); + if (block_rsv->reserved < block_rsv->size) { + u64 needed = block_rsv->size - block_rsv->reserved; + + if (num_bytes >= needed) { + block_rsv->reserved += needed; + block_rsv->full = true; + to_free = num_bytes - needed; + refilled_bytes = needed; + } else { + block_rsv->reserved += num_bytes; + to_free = 0; + refilled_bytes = num_bytes; + } + } else { + to_free = num_bytes; + refilled_bytes = 0; + } + spin_unlock(&block_rsv->lock); + + if (to_free > 0) + btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info, + to_free); + + if (refilled_bytes > 0) + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0, + refilled_bytes, 1); return 0; } From patchwork Fri Sep 8 17:20:19 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377725 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 608BAEE801F for ; Fri, 8 Sep 2023 17:20:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S244622AbjIHRUv (ORCPT ); Fri, 8 Sep 2023 13:20:51 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51458 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244595AbjIHRUs (ORCPT ); Fri, 8 Sep 2023 13:20:48 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 43987CE6 for ; Fri, 8 Sep 2023 10:20:44 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5B513C433C9 for ; Fri, 8 Sep 2023 17:20:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193643; bh=jA9A5cGAuCBRpJRqXqg/35Vi9L6epnbirlaDi0GjMaI=; h=From:To:Subject:Date:In-Reply-To:References:From; b=DEu4ugEwPRUaxONXwQuVFGLmvjeQdG6WW+fybWbe/a6uUSQg1hNGr1xluOjng1Rl7 QFloXGtiEtcoJ7wSNRzfcsP0yIYBv6AlDWFe7uzfyvWkwG+YtR490JA/f1d4hKKR40 vSUqRNOSt78sCCDSwduFw4in/StU2K6epixSFOHJGKqcq0RZWQ+pe4kWOhzMjP/2ou 2OOejyT/3bEE7YzfIwrcNanno8e5AJZ+l4n0owVUilN1gBJIpBtsy1OBQBFvQBAehY AqvmWg35N6b5bDaTrxgaaXed5+SwPCCkttkdNn2RV9AbbopJaLibKtMnCvxpqdsFr/ 2cpZ00ndoEKTw== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 02/21] btrfs: prevent transaction block reserve underflow when starting transaction Date: Fri, 8 Sep 2023 18:20:19 +0100 Message-Id: <8043421e01104d45fa71cc2b7de7e3668558605e.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana When starting a transaction, with a non-zero number of items, we reserve metadata space for that number of items and for delayed refs by doing a call to btrfs_block_rsv_add(), with the transaction block reserve passed as the block reserve argument. This reserves metadata space and adds it to the transaction block reserve. Later we migrate the space we reserved for delayed references from the transaction block reserve into the delayed refs block reserve, by calling btrfs_migrate_to_delayed_refs_rsv(). btrfs_migrate_to_delayed_refs_rsv() decrements the number of bytes to migrate from the source block reserve, and this however may result in an underflow in case the space added to the transaction block reserve ended up being used by another task that has not reserved enough space for its own use - examples are tasks doing reflinks or hole punching because they end up calling btrfs_replace_file_extents() -> btrfs_drop_extents() and may need to modify/COW a variable number of leaves/paths, so they keep trying to use space from the transaction block reserve when they need to COW an extent buffer, and may end up trying to use more space then they have reserved (1 unit/path only for removing file extent items). This can be avoided by simply reserving space first without adding it to the transaction block reserve, then add the space for delayed refs to the delayed refs block reserve and finally add the remaining reserved space to the transaction block reserve. This also makes the code a bit shorter and simpler. So just do that. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/delayed-ref.c | 9 +-------- fs/btrfs/delayed-ref.h | 1 - fs/btrfs/transaction.c | 5 +++-- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 1043f66cc130..9fe4ccca50a0 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -103,24 +103,17 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) * Transfer bytes to our delayed refs rsv. * * @fs_info: the filesystem - * @src: source block rsv to transfer from * @num_bytes: number of bytes to transfer * - * This transfers up to the num_bytes amount from the src rsv to the + * This transfers up to the num_bytes amount, previously reserved, to the * delayed_refs_rsv. Any extra bytes are returned to the space info. */ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, u64 num_bytes) { struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; u64 to_free = 0; - spin_lock(&src->lock); - src->reserved -= num_bytes; - src->size -= num_bytes; - spin_unlock(&src->lock); - spin_lock(&delayed_refs_rsv->lock); if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { u64 delta = delayed_refs_rsv->size - diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index b8e14b0ba5f1..fd9bf2b709c0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -407,7 +407,6 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, u64 num_bytes); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ab09542f2170..1985ab543ad2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -625,14 +625,15 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_block_rsv_add(fs_info, rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush); if (ret) goto reserve_fail; if (delayed_refs_bytes) { - btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv, + btrfs_migrate_to_delayed_refs_rsv(fs_info, delayed_refs_bytes); num_bytes -= delayed_refs_bytes; } + btrfs_block_rsv_add_bytes(rsv, num_bytes, true); if (rsv->space_info->force_alloc) do_chunk_alloc = true; From patchwork Fri Sep 8 17:20:20 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377730 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A753CEE57CD for ; Fri, 8 Sep 2023 17:20:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245219AbjIHRUx (ORCPT ); Fri, 8 Sep 2023 13:20:53 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51464 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244687AbjIHRUu (ORCPT ); Fri, 8 Sep 2023 13:20:50 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 482881FCD for ; Fri, 8 Sep 2023 10:20:45 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5FBD0C433CA for ; Fri, 8 Sep 2023 17:20:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193644; bh=QhsAYN1/B4SuD/xagbZC1aqcx7Yf80RfluZulI3MC/4=; h=From:To:Subject:Date:In-Reply-To:References:From; b=jlK2pjR91+HtXUVWqOJSQaCxdzD8qn8cAjo9YUHuKROpDxWvAxa0vQ7Ztb4mRG4RE vbcjvpcqN0Qrly9V4/AtNneY1NR/XqcjOxsv1H9fvIfMdjh3g1dM8U0NDfBth/eq7h 3gJMjFPWqfbelPoJyzRArx7iuLmxG78UbCu7GCQ11SdPdJ1CKkWtAp1+f8qQG9vSz+ Rrrd8EPv4Gs1125zCXumzbqyNZTkYKXia/OFrvfKfHTQT314FHJfKtE6xZAjS0vYLf c3pPZJ+7UzNs1NjJ9XxwzrnNo7gTTe8raegjXUYT/hxF1AAUXfWfbVnGSUidHFx59j H2TSaRL5o6XrQ== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 03/21] btrfs: pass a space_info argument to btrfs_reserve_metadata_bytes() Date: Fri, 8 Sep 2023 18:20:20 +0100 Message-Id: <9c419540b416841b3661e837f310299cf64a6ba9.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana We are passing a block reserve argument to btrfs_reserve_metadata_bytes() which is not really used, all we need is to pass the space_info associated to the block reserve, we don't change the block reserve at all. Not only it's pointless to pass the block reserve, it's also confusing as one might think that the reserved bytes will end up being added to the passed block reserve, when that's not the case. The pattern for reserving space and adding it to a block reserve is to first reserve space with btrfs_reserve_metadata_bytes() and if that succeeds, then add the space to a block reserve by calling btrfs_block_rsv_add_bytes(). Also the reverse of btrfs_reserve_metadata_bytes(), which is btrfs_space_info_free_bytes_may_use(), takes a space_info argument and not a block reserve, so one more reason to pass a space_info and not a block reserve to btrfs_reserve_metadata_bytes(). So change btrfs_reserve_metadata_bytes() and its callers to pass a space_info argument instead of a block reserve argument. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/block-rsv.c | 12 +++++++----- fs/btrfs/delalloc-space.c | 3 ++- fs/btrfs/delayed-ref.c | 6 +++--- fs/btrfs/space-info.c | 12 +++++------- fs/btrfs/space-info.h | 2 +- fs/btrfs/transaction.c | 3 ++- 6 files changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 77684c5e0c8b..6ccd91bbff3e 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -221,7 +221,8 @@ int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info, if (num_bytes == 0) return 0; - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, + num_bytes, flush); if (!ret) btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true); @@ -261,7 +262,8 @@ int btrfs_block_rsv_refill(struct btrfs_fs_info *fs_info, if (!ret) return 0; - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, + num_bytes, flush); if (!ret) { btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); return 0; @@ -517,8 +519,8 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, block_rsv->type, ret); } try_reserve: - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, blocksize, - BTRFS_RESERVE_NO_FLUSH); + ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, + blocksize, BTRFS_RESERVE_NO_FLUSH); if (!ret) return block_rsv; /* @@ -539,7 +541,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, * one last time to force a reservation if there's enough actual space * on disk to make the reservation. */ - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, blocksize, + ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, blocksize, BTRFS_RESERVE_FLUSH_EMERGENCY); if (!ret) return block_rsv; diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 427abaf608b8..a764db67c033 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -346,7 +346,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, noflush); if (ret) return ret; - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, meta_reserve, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, + meta_reserve, flush); if (ret) { btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); return ret; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9fe4ccca50a0..0991f66471ff 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -154,6 +154,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush) { struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_space_info *space_info = block_rsv->space_info; u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1); u64 num_bytes = 0; u64 refilled_bytes; @@ -170,7 +171,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, if (!num_bytes) return 0; - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, space_info, num_bytes, flush); if (ret) return ret; @@ -199,8 +200,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); if (to_free > 0) - btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info, - to_free); + btrfs_space_info_free_bytes_may_use(fs_info, space_info, to_free); if (refilled_bytes > 0) trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d7e8cd4f140c..ca7c702172fd 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -1742,7 +1742,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * Try to reserve metadata bytes from the block_rsv's space. * * @fs_info: the filesystem - * @block_rsv: block_rsv we're allocating for + * @space_info: the space_info we're allocating for * @orig_bytes: number of bytes we want * @flush: whether or not we can flush to make our reservation * @@ -1754,21 +1754,19 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * space already. */ int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, + struct btrfs_space_info *space_info, u64 orig_bytes, enum btrfs_reserve_flush_enum flush) { int ret; - ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush); + ret = __reserve_bytes(fs_info, space_info, orig_bytes, flush); if (ret == -ENOSPC) { trace_btrfs_space_reservation(fs_info, "space_info:enospc", - block_rsv->space_info->flags, - orig_bytes, 1); + space_info->flags, orig_bytes, 1); if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) - btrfs_dump_space_info(fs_info, block_rsv->space_info, - orig_bytes, 0); + btrfs_dump_space_info(fs_info, space_info, orig_bytes, 0); } return ret; } diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 0bb9d14e60a8..ef0fac9c70ae 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -212,7 +212,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, struct btrfs_space_info *info, u64 bytes, int dump_block_groups); int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, + struct btrfs_space_info *space_info, u64 orig_bytes, enum btrfs_reserve_flush_enum flush); void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1985ab543ad2..ee63d92d66b4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -625,7 +625,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_reserve_metadata_bytes(fs_info, rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, rsv->space_info, + num_bytes, flush); if (ret) goto reserve_fail; if (delayed_refs_bytes) { From patchwork Fri Sep 8 17:20:21 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377724 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 867B6EE57DF for ; Fri, 8 Sep 2023 17:20:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245083AbjIHRUw (ORCPT ); Fri, 8 Sep 2023 13:20:52 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51472 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244788AbjIHRUu (ORCPT ); Fri, 8 Sep 2023 13:20:50 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 49F2C1FCF for ; Fri, 8 Sep 2023 10:20:46 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6263AC433C9 for ; Fri, 8 Sep 2023 17:20:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193645; bh=WC5XyTgi0HiQ+ZF+zyXgvKQhnjNTXPdeR1yAUkvVffY=; h=From:To:Subject:Date:In-Reply-To:References:From; b=f/cNBCvMmT8BWjBOlnKU7jLK8mu7GbEwmcITyLpeZPXPvbWoj98CKSRIjwIbn6KWX SYzUni4vukL/P4lRxOZ9XOwl6NDACPRx1ijFmsBnE+B9zaQFt4boYw14jNWhJDX2Kv dW8Oni5Ynbhkm96K5DRhwMFYgA8k14Uh6LeTRQbBcVyj3JxI2z3eMOWQOTFpU0Uc8L kmQojbP7ZDsNJL1SRR157nmkI8F6telaTmHs7Wh7+d+RseLlX1u+x+Ksn369jPg0dM YsxpReuttJY+XfqD2xWt+HlthjVPZEXjsJh/7W0nHGbm20blwwcAlrHA4PbnHlF/4q buTzqY0XK2zVQ== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 04/21] btrfs: remove unnecessary logic when running new delayed references Date: Fri, 8 Sep 2023 18:20:21 +0100 Message-Id: <03c71b956f576a30904349327a672b7a7d4df450.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana When running delayed references, at btrfs_run_delayed_refs(), we have this logic to run any new delayed references that might have been added just after we ran all delayed references. This logic grabs the first delayed reference, then locks it to wait for any contention on it before running all new delayed references. This however is pointless and not necessary because at __btrfs_run_delayed_refs() when we start running delayed references, we pick the first reference with btrfs_obtain_ref_head() and then we will lock it (with btrfs_delayed_ref_lock()). So remove the duplicate and unnecessary logic at btrfs_run_delayed_refs(). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 07d3896e6824..929fbb620d68 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2135,9 +2135,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_delayed_ref_head *head; int ret; int run_all = count == (unsigned long)-1; @@ -2166,25 +2164,16 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, btrfs_create_pending_block_groups(trans); spin_lock(&delayed_refs->lock); - node = rb_first_cached(&delayed_refs->href_root); - if (!node) { + if (RB_EMPTY_ROOT(&delayed_refs->href_root.rb_root)) { spin_unlock(&delayed_refs->lock); - goto out; + return 0; } - head = rb_entry(node, struct btrfs_delayed_ref_head, - href_node); - refcount_inc(&head->refs); spin_unlock(&delayed_refs->lock); - /* Mutex was contended, block until it's released and retry. */ - mutex_lock(&head->mutex); - mutex_unlock(&head->mutex); - - btrfs_put_delayed_ref_head(head); cond_resched(); goto again; } -out: + return 0; } From patchwork Fri Sep 8 17:20:22 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377727 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 16AA6EE14D8 for ; Fri, 8 Sep 2023 17:20:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S244788AbjIHRUx (ORCPT ); Fri, 8 Sep 2023 13:20:53 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51484 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244689AbjIHRUu (ORCPT ); Fri, 8 Sep 2023 13:20:50 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4B6901FD5 for ; Fri, 8 Sep 2023 10:20:47 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 64E55C433C8 for ; Fri, 8 Sep 2023 17:20:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193646; bh=0NQR0PFuMgzDjpJKs1IOmYS7XDZV+BTPRN8OFVvCerU=; h=From:To:Subject:Date:In-Reply-To:References:From; b=OjO7DsxEliXZGQfSJdrL3GqGHOrR1PLdDkXkDxsq8QhtsWpK76AjBHpDdI0vD8J6L m9JqeM5l5njMypbW8e8/S3+jD8d4Xbzk61o5c6ZhLA6wwTmG9qYOfFaSzQPKv8s3zO re3eZdQ+Zar5D8XT3FDA+dgagFB6Wjo/CQfixS6xOD9wMCuAzshQD2x1ObQlilwZy2 bQaN7W7EL1qOS/JIb8aCMqGT4lOu1uLgkf8N3PwJXQYQbNAxLOk/it3TveUgTCxdg9 8fUEO15kPAIL8ebdfD/9iSM7chZ5ia0drHADxPoCXTHUm7euVZHysKnRZY2vEhMkfi ggJ4GFpxuSlIw== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 05/21] btrfs: remove the refcount warning/check at btrfs_put_delayed_ref() Date: Fri, 8 Sep 2023 18:20:22 +0100 Message-Id: <3d8c3c336dbda73eeb118f1ecc9cee796a1eaf28.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana At btrfs_put_delayed_ref(), it's pointless to have a WARN_ON() to check if the refcount of the delayed ref is zero. Such check is already done by the refcount_t module and refcount_dec_and_test(), which loudly complains if we try to decrement a reference count that is currently 0. The WARN_ON() dates back to the time when used a regular atomic_t type for the reference counter, before we switched to the refcount_t type. The main goal of the refcount_t type/module is precisely to catch such types of bugs and loudly complain if they happen. This also reduces a bit the module's text size. Before this change: $ size fs/btrfs/btrfs.ko text data bss dec hex filename 1612483 167145 16864 1796492 1b698c fs/btrfs/btrfs.ko After this change: $ size fs/btrfs/btrfs.ko text data bss dec hex filename 1612371 167073 16864 1796308 1b68d4 fs/btrfs/btrfs.ko Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/delayed-ref.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index fd9bf2b709c0..46a1421cd99d 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -338,7 +338,6 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) { - WARN_ON(refcount_read(&ref->refs) == 0); if (refcount_dec_and_test(&ref->refs)) { WARN_ON(!RB_EMPTY_NODE(&ref->ref_node)); switch (ref->type) { From patchwork Fri Sep 8 17:20:23 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377728 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 55D20EEB560 for ; Fri, 8 Sep 2023 17:20:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245304AbjIHRUy (ORCPT ); Fri, 8 Sep 2023 13:20:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56992 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244595AbjIHRUw (ORCPT ); Fri, 8 Sep 2023 13:20:52 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5EC10CE6 for ; Fri, 8 Sep 2023 10:20:48 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 67A6CC433C9 for ; Fri, 8 Sep 2023 17:20:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193648; bh=gCXc2CaFF7Jt4URwSAn9CIJjkNy0IXpKpcarbaKkiD0=; h=From:To:Subject:Date:In-Reply-To:References:From; b=PzuNa6jTCy3huBnds3HxgEK71WL2Eq1RnsWeFAb09ocetzNED9n2ryNyzC5ivhFwq EC0y4DhxQToQYXYt7uf/3pzkbOSTsd+Q3F+/jXV1JBxhCo8tIQHgwp8KFFfMn4D+fW nGHCygmCjkd55DgBP/sP1O3xdWGHbJfgd2P72TrwDB+2xASXEOjshooV5YoZkveFoz kD1z/4r/KSgkstpaTwresUJpCVwtRtJEaP3V+4hK9dSjA76Eu0pAHiRqoLckwHQSfE Q/cP2TzWrs+n53txLNPaATlSGsRGOU9lHemfYwrVZzhYBtYOcr3l3BdWZ6+1PWu6Fr PPacKV7avC5uQ== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 06/21] btrfs: return -EUCLEAN for delayed tree ref with a ref count not equals to 1 Date: Fri, 8 Sep 2023 18:20:23 +0100 Message-Id: <55ab327daf38130664d2a4558a14dc1b04a76d2a.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana When running a delayed tree reference, if we find a ref count different from 1, we return -EIO. This isn't an IO error, as it indicates either a bug in the delayed refs code or a memory corruption, so change the error code from -EIO to -EUCLEAN. Also tag the branch as 'unlikely' as this is not expected to ever happen, and change the error message to print the tree block's bytenr without the parenthesis (and there was a missing space between the 'block' word and the opening parenthesis), for consistency as that's the style we used everywhere else. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 929fbb620d68..8fca9c2b8917 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1699,12 +1699,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, parent = ref->parent; ref_root = ref->root; - if (node->ref_mod != 1) { + if (unlikely(node->ref_mod != 1)) { btrfs_err(trans->fs_info, - "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu", + "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu", node->bytenr, node->ref_mod, node->action, ref_root, parent); - return -EIO; + return -EUCLEAN; } if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { BUG_ON(!extent_op || !extent_op->update_flags); From patchwork Fri Sep 8 17:20:24 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377729 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E1037EEB562 for ; Fri, 8 Sep 2023 17:20:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245370AbjIHRUz (ORCPT ); Fri, 8 Sep 2023 13:20:55 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57000 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S245159AbjIHRUw (ORCPT ); Fri, 8 Sep 2023 13:20:52 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 69DF5CE6 for ; Fri, 8 Sep 2023 10:20:49 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6A1B9C433C8 for ; Fri, 8 Sep 2023 17:20:48 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193649; bh=ch6hzQCf5+/4tM6R9la/DCt2U4s80l5B2XAUzv8/kfU=; h=From:To:Subject:Date:In-Reply-To:References:From; b=R5ImJ/byTjS18Aw2J29vcyy0P0Gk87dRwg1cTAC6EA5KW7oaKMbUYsGXXxPIjK/Mk pqxLtGxePQQTRV6XcFjcNrKFmop9N0fNz1MMMkYgLaCyoevR0AhKxhJ2Krd4azNWoc kr/tUI47qT7ZMEsQYbM9+6uk8t9oi55xVqbnak8MYNds+oe4/kVVjqk8E4olQC0VZ0 McNEjii1w7D/G8sHxIwc00/8SegSetpXDb20EJ8ftVTunYTh/b/fMm+8JAcosyBSf0 ijUfOS8xBNsJNgBlowSP4+JVFIKR/2n6j+wIMnuzG8ECCPSZDfKUAomSXDRbias9ZR K0piDE1cqXZ2w== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 07/21] btrfs: remove redundant BUG_ON() from __btrfs_inc_extent_ref() Date: Fri, 8 Sep 2023 18:20:24 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana At __btrfs_inc_extent_ref() we are doing a BUG_ON() if we are dealing with a tree block reference that has a reference count that is different from 1, but we have already dealt with this case at run_delayed_tree_ref(), making it useless. So remove the BUG_ON(). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8fca9c2b8917..cf503f2972a1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1514,15 +1514,14 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* now insert the actual backref */ - if (owner < BTRFS_FIRST_FREE_OBJECTID) { - BUG_ON(refs_to_add != 1); + if (owner < BTRFS_FIRST_FREE_OBJECTID) ret = insert_tree_block_ref(trans, path, bytenr, parent, root_objectid); - } else { + else ret = insert_extent_data_ref(trans, path, bytenr, parent, root_objectid, owner, offset, refs_to_add); - } + if (ret) btrfs_abort_transaction(trans, ret); out: From patchwork Fri Sep 8 17:20:25 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377731 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 697DEEEB563 for ; Fri, 8 Sep 2023 17:20:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245396AbjIHRUz (ORCPT ); Fri, 8 Sep 2023 13:20:55 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57012 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244687AbjIHRUy (ORCPT ); Fri, 8 Sep 2023 13:20:54 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 522EBCE6 for ; Fri, 8 Sep 2023 10:20:50 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6CE84C433CB for ; Fri, 8 Sep 2023 17:20:49 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193650; bh=TxZmlvyZ6C3+PQhT/HuAYK6BHwt4zLCulzjUm+W5JL4=; h=From:To:Subject:Date:In-Reply-To:References:From; b=D32ko8QhbcGlJr4hwrcmX2aZhw2BzHWCtM0LNy8Sz3vilSW1CaGb/ag1IMe5EHD3d /Kk+Cj2ZFEOm5SSpDn9kwDeeLQ0gJakNSFDsl+HMq2Z4G+LD9r/VfMgrNuiMJAGITP B45UK/8tu5CVCwjYI+9rbaxgi+R98Ly94f3ecR7PUJqvxwbBwPpXXNZtASUOnPDQ24 TpQAL2Lz0e3Lu1MeCg7t7ABCjM+1S0J3jWz9kIaf9NkXjUMr7Xpte/ATdru+9R3WSf zsei+gKo+LH+NNW7Hya0uqQ+uGdYox6S/v9xbuG+GcWcnAEmc/dPjdFanlw9LGtvEP BAvoTrepwJWtA== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 08/21] btrfs: remove refs_to_add argument from __btrfs_inc_extent_ref() Date: Fri, 8 Sep 2023 18:20:25 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana Currently the 'refs_to_add' argument of __btrfs_inc_extent_ref() always matches the value of node->ref_mod, so remove the argument and use node->ref_mod at __btrfs_inc_extent_ref(). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cf503f2972a1..16e511f3d24b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1465,8 +1465,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, * always passed as 0. For data extents it is the fileoffset * this extent belongs to. * - * @refs_to_add Number of references to add - * * @extent_op Pointer to a structure, holding information necessary when * updating a tree block's flags * @@ -1474,7 +1472,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int refs_to_add, + u64 owner, u64 offset, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_path *path; @@ -1484,6 +1482,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 bytenr = node->bytenr; u64 num_bytes = node->num_bytes; u64 refs; + int refs_to_add = node->ref_mod; int ret; path = btrfs_alloc_path(); @@ -1562,7 +1561,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, ref->objectid, ref->offset, - node->ref_mod, extent_op); + extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, node, parent, ref_root, ref->objectid, @@ -1710,7 +1709,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ret = alloc_reserved_tree_block(trans, node, extent_op); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, - ref->level, 0, 1, extent_op); + ref->level, 0, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, node, parent, ref_root, ref->level, 0, 1, extent_op); From patchwork Fri Sep 8 17:20:26 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377733 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DE790EEB561 for ; Fri, 8 Sep 2023 17:20:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245404AbjIHRU4 (ORCPT ); Fri, 8 Sep 2023 13:20:56 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57028 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S245341AbjIHRUz (ORCPT ); Fri, 8 Sep 2023 13:20:55 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 53BDD199F for ; Fri, 8 Sep 2023 10:20:51 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6FF3BC433C9 for ; Fri, 8 Sep 2023 17:20:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193651; bh=oSekCeaXH8j4TNAJnFikiKQcu1ew2ny8AW4C0vzQa7o=; h=From:To:Subject:Date:In-Reply-To:References:From; b=Qf/928iagRgzI6+bMMwMykDfOoYTzqNbObxnERzqkpu24gqp2FFNApzo499zkO92q iquUfD6nLOTzfLJa2ueEaOM7ancc5nRI8QjcNyEWSnjmZcJzmroePbsuaQT/xPAr33 mOiHlRJ+acxpYlDmq5reY5C5h4Yjr9nnWGtf0s+7+hbKvPB7o9a1WlUagt51AXflp2 Q2VVJd2X8gcpf4ZS1LQt1xlQr3K9b5matYpCeVEQxNQh1WmoTx6EZZBhRu01vqJruJ YRKbqTaxm6Uj2zcB1BpRTR0yMogt4lg+ofVVFhf70hT6kzu7hvx+l6w0ZKkC3S3Cyc wTp4XmllHGhhQ== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 09/21] btrfs: remove refs_to_drop argument from __btrfs_free_extent() Date: Fri, 8 Sep 2023 18:20:26 +0100 Message-Id: <70661a0f9e1799b1213071a7313e2ec612c7177b.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana Currently the 'refs_to_drop' argument of __btrfs_free_extent() always matches the value of node->ref_mod, so remove the argument and use node->ref_mod at __btrfs_free_extent(). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 16e511f3d24b..de63e873be3a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,7 +49,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, u64 owner_objectid, - u64 owner_offset, int refs_to_drop, + u64 owner_offset, struct btrfs_delayed_extent_op *extra_op); static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, struct extent_buffer *leaf, @@ -1565,8 +1565,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, node, parent, ref_root, ref->objectid, - ref->offset, node->ref_mod, - extent_op); + ref->offset, extent_op); } else { BUG(); } @@ -1712,7 +1711,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ref->level, 0, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, node, parent, ref_root, - ref->level, 0, 1, extent_op); + ref->level, 0, extent_op); } else { BUG(); } @@ -2927,7 +2926,7 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans, static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, u64 owner_objectid, - u64 owner_offset, int refs_to_drop, + u64 owner_offset, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_fs_info *info = trans->fs_info; @@ -2942,6 +2941,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, int extent_slot = 0; int found_extent = 0; int num_to_del = 1; + int refs_to_drop = node->ref_mod; u32 item_size; u64 refs; u64 bytenr = node->bytenr; From patchwork Fri Sep 8 17:20:27 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377732 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2F28EEEB565 for ; Fri, 8 Sep 2023 17:20:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245406AbjIHRU4 (ORCPT ); Fri, 8 Sep 2023 13:20:56 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57042 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S245399AbjIHRUz (ORCPT ); Fri, 8 Sep 2023 13:20:55 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 59550199F for ; Fri, 8 Sep 2023 10:20:52 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7349AC433CD for ; Fri, 8 Sep 2023 17:20:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193652; bh=LdoCaAHdILvGMagXDNJCdW6byGwG+gSiARwnBDld4c0=; h=From:To:Subject:Date:In-Reply-To:References:From; b=UfpHZWzr1ewttu8V1cs0fv+J05UdcORFaDMWxAOJdv06Eo0/xR3KiY5oJeXN8heMh DcA+8LJTDTxae3BfS/X8Jvqkwhd9L3NsZjDxw51vBDGhJTwwdxoK0qtpRNCesRSWaN MaUCO5vn9J/YY2hdNsTm7f4vIBFELo0ceJe1RIveqTqd7uB3KFG/zYgxLwqPbwDiV4 /qrtiEjO+0BV3GfJWE4WN7VupQKQi+jslA7SAf4RDs9ItIOtb9VbOwbjo3WiDt0QHs UGu3W3kMc2IPs8MI+9/KgwPEcli1cTahrC1DgiHoTHSqk18f5lJFP4tYAZUaKPQyfp fik9MnmkmwUkw== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 10/21] btrfs: initialize key where it's used when running delayed data ref Date: Fri, 8 Sep 2023 18:20:27 +0100 Message-Id: <5b936afa138b6e190a322987e7d6c9b2ad7230f1.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana At run_delayed_data_ref() we are always initializing a key but the key is only needed and used if we are inserting a new extent. So move the declaration and initialization of the key to 'if' branch where it's used. Also rename the key from 'ins' to 'key', as it's a more clear name. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index de63e873be3a..16c7122bdfb5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1535,15 +1535,10 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, { int ret = 0; struct btrfs_delayed_data_ref *ref; - struct btrfs_key ins; u64 parent = 0; u64 ref_root = 0; u64 flags = 0; - ins.objectid = node->bytenr; - ins.offset = node->num_bytes; - ins.type = BTRFS_EXTENT_ITEM_KEY; - ref = btrfs_delayed_node_to_data_ref(node); trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action); @@ -1552,11 +1547,18 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ref_root = ref->root; if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { + struct btrfs_key key; + if (extent_op) flags |= extent_op->flags_to_set; + + key.objectid = node->bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = node->num_bytes; + ret = alloc_reserved_file_extent(trans, parent, ref_root, flags, ref->objectid, - ref->offset, &ins, + ref->offset, &key, node->ref_mod); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, From patchwork Fri Sep 8 17:20:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377734 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1D203EEB562 for ; Fri, 8 Sep 2023 17:20:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245408AbjIHRU6 (ORCPT ); Fri, 8 Sep 2023 13:20:58 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57050 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S245399AbjIHRU5 (ORCPT ); Fri, 8 Sep 2023 13:20:57 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5F772199F for ; Fri, 8 Sep 2023 10:20:53 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 77CAFC433C9 for ; Fri, 8 Sep 2023 17:20:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193653; bh=/9zl7CzhPubtrhywg73kyoR9t3NquCE4wttMRFycPMk=; h=From:To:Subject:Date:In-Reply-To:References:From; b=S7vvDLKouN8IDTLpvTFG+OJgaA6VmYlEIpX4rkRewVDRG85A1dGlrt/VtOA4aWXhQ 9RdMpltj/Z00FnsIyw6Kgr0BF4tcwDMdY+fYcdfEOd5lO73+sxO/9UBkspPtH+mGzv 3NXm7MgFJb32EFOGBVyMozQ2nVEQc5+zMjslt5VVi3uAw6yeEZPVTX5IbA8dgQM1qR Jnf0JRKn2SmprP+N6rCF3TBVlkfdGcSFijp3hIVKH6XHrnbyR5VEi3N4nV4Xi2bzbt ugDBRfxoRTukMuTmj0lCgkcH/GtNbl62sgp5VznPnJdnc4WSMjGPyz6XUWLR8v8JSK Sh4R2xG1C2ogQ== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 11/21] btrfs: remove pointless 'ref_root' variable from run_delayed_data_ref() Date: Fri, 8 Sep 2023 18:20:28 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana The 'ref_root' variable, at run_delayed_data_ref(), is not really needed as we can always use ref->root directly, plus its initialization to 0 is completely pointless as we assign it ref->root before its first use. So just drop that variable and use ref->root directly. This may help avoid some warnings with clang tools such as the one reported/fixed by commit 966de47ff0c9 ("btrfs: remove redundant initialization of variables in log_new_ancestors"). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 16c7122bdfb5..21049609c5fc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1536,7 +1536,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_delayed_data_ref *ref; u64 parent = 0; - u64 ref_root = 0; u64 flags = 0; ref = btrfs_delayed_node_to_data_ref(node); @@ -1544,7 +1543,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; - ref_root = ref->root; if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { struct btrfs_key key; @@ -1556,17 +1554,17 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = node->num_bytes; - ret = alloc_reserved_file_extent(trans, parent, ref_root, + ret = alloc_reserved_file_extent(trans, parent, ref->root, flags, ref->objectid, ref->offset, &key, node->ref_mod); } else if (node->action == BTRFS_ADD_DELAYED_REF) { - ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, + ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root, ref->objectid, ref->offset, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { ret = __btrfs_free_extent(trans, node, parent, - ref_root, ref->objectid, + ref->root, ref->objectid, ref->offset, extent_op); } else { BUG(); From patchwork Fri Sep 8 17:20:29 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377735 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DCEB1EEB560 for ; Fri, 8 Sep 2023 17:20:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245412AbjIHRU6 (ORCPT ); Fri, 8 Sep 2023 13:20:58 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57054 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239072AbjIHRU5 (ORCPT ); Fri, 8 Sep 2023 13:20:57 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 629521FCD for ; Fri, 8 Sep 2023 10:20:54 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7C064C433CA for ; Fri, 8 Sep 2023 17:20:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193654; bh=jwuYcvop/9E7P2ZX6LtQ5vOMGKiV+/0ZJkxyn7k1S/Y=; h=From:To:Subject:Date:In-Reply-To:References:From; b=nEw3jNDpswQUdrxrEDsBhCS2q01b3IxwT2gu5ZgF1jXcZ0mld67+gD6pFJtIch/2N h2TTQ+coG3Kfs3xzVhFBAjRlqjp2KrRDY1ciT0Y1GLYgbdCJPF5VAdyB81LVL8WcW2 IaxdzHSsiOrsZ1xpjVR5Q5pINeYZYp96RqLz1xz5SbpEPGdFw64D8PGOv8ZzK+P/nZ hXrwDkOXe9GmR0CZ+rJaYwXmaVm2QsLvRnmK9XYka6r25MBK2v3GplQ/Otik0CfKEN qSANzVK8Yxp85CpdMjneJRYOcqJvq2Fo4jO/Q49qX2+InffxZQUI6Qn4R6vw1EmkLq hOGen8BgJIUUQ== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 12/21] btrfs: log message if extent item not found when running delayed extent op Date: Fri, 8 Sep 2023 18:20:29 +0100 Message-Id: <89c5e2e0918dc291f85d8024898e9c142ea6b1e4.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana When running a delayed extent operation, if we don't find the extent item in the extent tree we just return -EIO without any logged message. This indicates some bug or possibly a memory or fs corruption, so the return value should not be -EIO but -EUCLEAN instead, and since it's not expected to ever happen, print an informative error message so that if it happens we have some idea of what went wrong, where to look at. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 21049609c5fc..167d0438da6e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1653,7 +1653,10 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, goto again; } } else { - err = -EIO; + err = -EUCLEAN; + btrfs_err(fs_info, + "missing extent item for extent %llu num_bytes %llu level %d", + head->bytenr, head->num_bytes, extent_op->level); goto out; } } From patchwork Fri Sep 8 17:20:30 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377736 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8CE9EEEB563 for ; Fri, 8 Sep 2023 17:20:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245414AbjIHRU7 (ORCPT ); Fri, 8 Sep 2023 13:20:59 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57060 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S245411AbjIHRU6 (ORCPT ); Fri, 8 Sep 2023 13:20:58 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 66756199F for ; Fri, 8 Sep 2023 10:20:55 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 80ECFC433C8 for ; Fri, 8 Sep 2023 17:20:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193655; bh=Sr76n7opmfLz2+fc8EwIiwSQ3SrcHWpeL4ymY3yoX2U=; h=From:To:Subject:Date:In-Reply-To:References:From; b=b+F2lFZMyPxmvPrbqUXENzfmuOWUNiJE3NZdzJ2HfRFNtBWIbSnV0A8qLW1xHypqi yVN6wLkk/t46UviwUJzsur3df+T07KgUvxrJShS8i2HlygYeZVnIOu+6RR265xGOPh Y0XWMoe3TVGOdGdpc/Fp1APFX9rnq8AWz/JnLIkupJy/oFG4OSt/3kSFz+FhbH9bS7 tyFc5ZDLEB7Zz/ONKAuVw+XWzUju48fmd8yCR6TUsqZOzBt+j+oFawTxQWgby+Lmfv 6jwMDW31+dmCWezND8sZ2+cs7seb3gDGTsfq2Xh8Bqcp1NpD29N/tZGNtZli37I0IX POxIiFm7IzGBg== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 13/21] btrfs: use a single variable for return value at run_delayed_extent_op() Date: Fri, 8 Sep 2023 18:20:30 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana Instead of using a 'ret' and an 'err' variable at run_delayed_extent_op() for tracking the return value, use a single one ('ret'). This simplifies the code, makes it comply with most of the existing code and it's less prone for logic errors as time has proven over and over in the btrfs code. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 167d0438da6e..e33c4b393922 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1602,7 +1602,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; u32 item_size; int ret; - int err = 0; int metadata = 1; if (TRANS_ABORTED(trans)) @@ -1629,10 +1628,8 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, again: ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { - err = ret; goto out; - } - if (ret > 0) { + } else if (ret > 0) { if (metadata) { if (path->slots[0] > 0) { path->slots[0]--; @@ -1653,7 +1650,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, goto again; } } else { - err = -EUCLEAN; + ret = -EUCLEAN; btrfs_err(fs_info, "missing extent item for extent %llu num_bytes %llu level %d", head->bytenr, head->num_bytes, extent_op->level); @@ -1665,11 +1662,11 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, item_size = btrfs_item_size(leaf, path->slots[0]); if (unlikely(item_size < sizeof(*ei))) { - err = -EUCLEAN; + ret = -EUCLEAN; btrfs_err(fs_info, "unexpected extent item size, has %u expect >= %zu", item_size, sizeof(*ei)); - btrfs_abort_transaction(trans, err); + btrfs_abort_transaction(trans, ret); goto out; } @@ -1679,7 +1676,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); - return err; + return ret; } static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, From patchwork Fri Sep 8 17:20:31 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377737 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 91CC1EEB561 for ; Fri, 8 Sep 2023 17:20:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245411AbjIHRVB (ORCPT ); Fri, 8 Sep 2023 13:21:01 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57088 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229600AbjIHRVA (ORCPT ); Fri, 8 Sep 2023 13:21:00 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 69AC9CE6 for ; Fri, 8 Sep 2023 10:20:56 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 85C3AC433C9 for ; Fri, 8 Sep 2023 17:20:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193656; bh=XtqW2I24Gk9XFj3/2eOuoMq87j9Maazd4694Qj1HyQE=; h=From:To:Subject:Date:In-Reply-To:References:From; b=Mk0+QA6zz/BOaPEqJMv5U2PNqSghLbLNkhc2mLQhmZDMqoozbWQDhp9I7saDKC3zq uMDmIYrfaZV8t4gIUTU44ey4TowHHRRoEsoGFOkz8WnxuTfIafQ8LXdSf/jpNbcGMs XXgoVfdS4qnD+BIg9OnAQC8mdQdNHToT1HDgqdFtgaW3jeB0tPKC86Eq3IlOZVsa+M T/USLS7W1+xstczDFVYgswc1MNRZlVXJ2VxUKG+uTtZudxyXKaAHGQWB1prXncYSZI 5d4tOwVimdr5VytziOW7k2bhCSaBAhlhDdl2OsX8hGTDFKFXnuGSn61/965YBFPSou SiA2CUfXGS4Cw== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 14/21] btrfs: use a single variable for return value at lookup_inline_extent_backref() Date: Fri, 8 Sep 2023 18:20:31 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana At lookup_inline_extent_backref(), instead of using a 'ret' and an 'err' variable for tracking the return value, use a single one ('ret'). This simplifies the code, makes it comply with most of the existing code and it's less prone for logic errors as time has proven over and over in the btrfs code. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e33c4b393922..e57061106860 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -789,7 +789,6 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, int type; int want; int ret; - int err = 0; bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); int needed; @@ -816,10 +815,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, again: ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); - if (ret < 0) { - err = ret; + if (ret < 0) goto out; - } /* * We may be a newly converted file system which still has the old fat @@ -846,7 +843,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, } if (ret && !insert) { - err = -ENOENT; + ret = -ENOENT; goto out; } else if (WARN_ON(ret)) { btrfs_print_leaf(path->nodes[0]); @@ -854,18 +851,18 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, "extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu", bytenr, num_bytes, parent, root_objectid, owner, offset); - err = -EIO; + ret = -EIO; goto out; } leaf = path->nodes[0]; item_size = btrfs_item_size(leaf, path->slots[0]); if (unlikely(item_size < sizeof(*ei))) { - err = -EUCLEAN; + ret = -EUCLEAN; btrfs_err(fs_info, "unexpected extent item size, has %llu expect >= %zu", item_size, sizeof(*ei)); - btrfs_abort_transaction(trans, err); + btrfs_abort_transaction(trans, ret); goto out; } @@ -885,11 +882,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, else needed = BTRFS_REF_TYPE_BLOCK; - err = -ENOENT; + ret = -ENOENT; while (1) { if (ptr >= end) { if (ptr > end) { - err = -EUCLEAN; + ret = -EUCLEAN; btrfs_print_leaf(path->nodes[0]); btrfs_crit(fs_info, "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu", @@ -900,7 +897,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, iref = (struct btrfs_extent_inline_ref *)ptr; type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); if (type == BTRFS_REF_TYPE_INVALID) { - err = -EUCLEAN; + ret = -EUCLEAN; goto out; } @@ -916,7 +913,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, dref = (struct btrfs_extent_data_ref *)(&iref->offset); if (match_extent_data_ref(leaf, dref, root_objectid, owner, offset)) { - err = 0; + ret = 0; break; } if (hash_extent_data_ref_item(leaf, dref) < @@ -927,14 +924,14 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); if (parent > 0) { if (parent == ref_offset) { - err = 0; + ret = 0; break; } if (ref_offset < parent) break; } else { if (root_objectid == ref_offset) { - err = 0; + ret = 0; break; } if (ref_offset < root_objectid) @@ -943,10 +940,10 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, } ptr += btrfs_extent_inline_ref_size(type); } - if (err == -ENOENT && insert) { + if (ret == -ENOENT && insert) { if (item_size + extra_size >= BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { - err = -EAGAIN; + ret = -EAGAIN; goto out; } /* @@ -958,7 +955,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, if (find_next_key(path, 0, &key) == 0 && key.objectid == bytenr && key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { - err = -EAGAIN; + ret = -EAGAIN; goto out; } } @@ -969,7 +966,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, path->search_for_extension = 0; btrfs_unlock_up_safe(path, 1); } - return err; + return ret; } /* From patchwork Fri Sep 8 17:20:32 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377738 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id AA75BEEB562 for ; Fri, 8 Sep 2023 17:21:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245416AbjIHRVC (ORCPT ); Fri, 8 Sep 2023 13:21:02 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57094 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233161AbjIHRVA (ORCPT ); Fri, 8 Sep 2023 13:21:00 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6B820199F for ; Fri, 8 Sep 2023 10:20:57 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 89B76C433C8 for ; Fri, 8 Sep 2023 17:20:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193657; bh=g6XNS7/EsJySe6oWOGbizXbEj0VN2mldNvkXEtl2OkE=; h=From:To:Subject:Date:In-Reply-To:References:From; b=CuQpK9PalqOoYOQiPalfP6L/mG86BbiLg8kmyM+rHGMb0T6in6YtbxqWgNJ/sCj1j uD5qpY6qnn3JMC9Vx8NqX2R2p0Ge7Z/Idvl2aTNkZh8/1urwby8udR2kP3AOZHeF/9 sXMtmz87V6Aoqd17F2BRc12a5/bnkzCjIDJJccjh7L8KTrOth9sgAuEV15B8fEvYX4 UzPU4kaQwPm9Qinv5axELFaZ2smmdlvXA50+eC8Yahekm5uR1ngkrydb3P6XAGhJ5k 6booLxdiSeY/y2skDrrNHT+43xx3ClvF5qWwTLL7cHTsC+vp3lBCgwkaLmnqj/H5/I xo2KaX57u4eJg== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 15/21] btrfs: return -EUCLEAN if extent item is missing when searching inline backref Date: Fri, 8 Sep 2023 18:20:32 +0100 Message-Id: <20d52d8e4ff00c2f64907e83bc379669cc20bb8b.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana At lookup_inline_extent_backref() when trying to insert an inline backref, if we don't find the extent item we log an error and then return -EIO. This error code is confusing because there was actually no IO error, and this means we have some corruption, either caused by a bug or something like a memory bitflip for example. So change the error code from -EIO to -EUCLEAN. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e57061106860..756589195ed7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -851,7 +851,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, "extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu", bytenr, num_bytes, parent, root_objectid, owner, offset); - ret = -EIO; + ret = -EUCLEAN; goto out; } From patchwork Fri Sep 8 17:20:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377739 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2464CEEB560 for ; Fri, 8 Sep 2023 17:21:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239735AbjIHRVD (ORCPT ); Fri, 8 Sep 2023 13:21:03 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58622 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229600AbjIHRVC (ORCPT ); Fri, 8 Sep 2023 13:21:02 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6DCDD199F for ; Fri, 8 Sep 2023 10:20:58 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8C09AC433D9 for ; Fri, 8 Sep 2023 17:20:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193658; bh=p4DpYQJU4IDKKFFOXXn+x0V0ymG8TqWRwOgMEQPyZcc=; h=From:To:Subject:Date:In-Reply-To:References:From; b=fPqnuyo06B2RbN5SE0Zk6kyAvr3IMp7DZHjd/XhA5iHUo/AzwUibQzJzhtV7r3Fy1 5NAI/teh2m5lv8NoHk6ssAluy9VC6b7ib6f/+a8ojdKZD1ekQilVCUx5bsni56usZ2 p1CrjON7ImZ9m/2CZ4rbYQ0t+TeMVJVx++NlIXDgb1YwO1UDAO4wPIFJdZcCI6yjM+ oP9qXs4alnHHs1AmxXn+X3QHfGmEPrYgncBfa0cxvCCbEoL4Ucd2czgHyPGJmQP/AD L2vqOKZ1Jp2n6Kt63PXGFYK7zpLIQsJixgpgyrJLQziFFHE4bijRs1HZ7QJKH93Yd2 AlPhiQWquIUBA== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 16/21] btrfs: simplify check for extent item overrun at lookup_inline_extent_backref() Date: Fri, 8 Sep 2023 18:20:33 +0100 Message-Id: <85ad072aedd963064108b0bdc643a7b11140aac6.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana At lookup_inline_extent_backref() we can simplify the check for an overrun of the extent item by making the while loop's condition to be "ptr < end" and then check after the loop if an overrun happened ("ptr > end"). This reduces indentation and makes the loop condition more clear. So move the check out of the loop and change the loop condition accordingly, while also adding the 'unlikely' tag to the check since it's not supposed to be triggered. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 756589195ed7..b27e0a6878b3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -883,17 +883,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, needed = BTRFS_REF_TYPE_BLOCK; ret = -ENOENT; - while (1) { - if (ptr >= end) { - if (ptr > end) { - ret = -EUCLEAN; - btrfs_print_leaf(path->nodes[0]); - btrfs_crit(fs_info, -"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu", - path->slots[0], root_objectid, owner, offset, parent); - } - break; - } + while (ptr < end) { iref = (struct btrfs_extent_inline_ref *)ptr; type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); if (type == BTRFS_REF_TYPE_INVALID) { @@ -940,6 +930,16 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, } ptr += btrfs_extent_inline_ref_size(type); } + + if (unlikely(ptr > end)) { + ret = -EUCLEAN; + btrfs_print_leaf(path->nodes[0]); + btrfs_crit(fs_info, +"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu", + path->slots[0], root_objectid, owner, offset, parent); + goto out; + } + if (ret == -ENOENT && insert) { if (item_size + extra_size >= BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { From patchwork Fri Sep 8 17:20:34 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377740 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 817CDEEB561 for ; Fri, 8 Sep 2023 17:21:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245421AbjIHRVE (ORCPT ); Fri, 8 Sep 2023 13:21:04 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58640 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229600AbjIHRVE (ORCPT ); Fri, 8 Sep 2023 13:21:04 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 79CEB1FCD for ; Fri, 8 Sep 2023 10:20:59 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 90A5FC433C8 for ; Fri, 8 Sep 2023 17:20:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193659; bh=XOntC1jRBHX65IMSdn4ph6MJX2hkTmZzK2ys5eLMDA4=; h=From:To:Subject:Date:In-Reply-To:References:From; b=k17+0YuPAqIkHiTPkCYHCNj8IAbr8lMZqeISHqvEAXxLoJsRr3B34IHuoqm3v0XLH 7dQRyRg5mEZu4JKAgRzOPXVzvnkt9jx5kLPe1TF/qTIm8D91qeUAmtTTUhgWcZ9sWC aFdlN1UM+QlaMLQ8lVOrgUbGI+GgZANuim2P0zx/Nnt/VHzR7NGq5cwHxtNO+qDzme TOIk8hJicSNdkZxLwp/ahqh1QMzY+tUlm0wC+1W//Fa0Mk0ABqvNG6CYMfZJIJs0fQ QmLvXoAssv4eyA+b97wtQ10VUpjSLbUiKFiPNIID2k8FRG8wCHcpZQaKzpwlmg+vzB UsCiO2p41fdLw== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 17/21] btrfs: allow to run delayed refs by bytes to be released instead of count Date: Fri, 8 Sep 2023 18:20:34 +0100 Message-Id: <84017ea76addadb4f74aeb93ad1e63b7f1a78be6.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana When running delayed references, through btrfs_run_delayed_refs(), we can specify how many to run, run all existing delayed references and keep running delayed references while we can find any. This is controlled with the value of the 'count' argument, where a value of 0 means to run all delayed references that exist by the time btrfs_run_delayed_refs() is called, (unsigned long)-1 means to keep running delayed references while we are able find any, and any other value to run that exact number of delayed references. Typically a specific value other than 0 or -1 is used when flushing space to try to release a certain amount of bytes for a ticket. In this case we just simply calculate how many delayed reference heads correspond to a specific amount of bytes, with calc_delayed_refs_nr(). However that only takes into account the space reserved for the reference heads themselves, and does not account for the space reserved for deleting checksums from the csum tree (see add_delayed_ref_head() and update_existing_head_ref()) in case we are going to delete a data extent. This means we may end up running more delayed references than necessary in case we process delayed references for deleting a data extent. So change the logic of btrfs_run_delayed_refs() to take a bytes argument to specify how many bytes of delayed references to run/release, using the special values of 0 to mean all existing delayed references and U64_MAX (or (u64)-1) to keep running delayed references while we can find any. This prevents running more delayed references than necessary, when we have delayed references for deleting data extents, but also makes the upcoming changes/patches simpler and it's preparatory work for them. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/block-group.c | 3 +-- fs/btrfs/extent-tree.c | 53 +++++++++++++++++++++++++++--------------- fs/btrfs/extent-tree.h | 4 ++-- fs/btrfs/space-info.c | 17 ++------------ fs/btrfs/transaction.c | 8 +++---- 5 files changed, 43 insertions(+), 42 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index b2e5107b7cec..fb506ee51d2c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3474,8 +3474,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache_save_setup(cache, trans, path); if (!ret) - ret = btrfs_run_delayed_refs(trans, - (unsigned long) -1); + ret = btrfs_run_delayed_refs(trans, U64_MAX); if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { cache->io_ctl.inode = NULL; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b27e0a6878b3..475903a8d772 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1815,7 +1815,7 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans, return ret ? ret : 1; } -void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, +u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head) { @@ -1833,10 +1833,13 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, } btrfs_delayed_refs_rsv_release(fs_info, nr_items); + + return btrfs_calc_delayed_ref_bytes(fs_info, nr_items); } static int cleanup_ref_head(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_head *head) + struct btrfs_delayed_ref_head *head, + u64 *bytes_released) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1881,7 +1884,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, } } - btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); + *bytes_released = btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); trace_run_delayed_ref_head(fs_info, head, 0); btrfs_delayed_ref_unlock(head); @@ -2002,15 +2005,22 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, * Returns -ENOMEM or -EIO on failure and will abort the transaction. */ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - unsigned long nr) + u64 min_bytes) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_head *locked_ref = NULL; int ret; unsigned long count = 0; + unsigned long max_count = 0; + u64 bytes_processed = 0; delayed_refs = &trans->transaction->delayed_refs; + if (min_bytes == 0) { + max_count = delayed_refs->num_heads_ready; + min_bytes = U64_MAX; + } + do { if (!locked_ref) { locked_ref = btrfs_obtain_ref_head(trans); @@ -2046,11 +2056,14 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ return ret; } else if (!ret) { + u64 bytes_released = 0; + /* * Success, perform the usual cleanup of a processed * head */ - ret = cleanup_ref_head(trans, locked_ref); + ret = cleanup_ref_head(trans, locked_ref, &bytes_released); + bytes_processed += bytes_released; if (ret > 0 ) { /* We dropped our lock, we need to loop. */ ret = 0; @@ -2067,7 +2080,9 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, locked_ref = NULL; cond_resched(); - } while ((nr != -1 && count < nr) || locked_ref); + } while ((min_bytes != U64_MAX && bytes_processed < min_bytes) || + (max_count > 0 && count < max_count) || + locked_ref); return 0; } @@ -2116,22 +2131,25 @@ static u64 find_middle(struct rb_root *root) #endif /* - * this starts processing the delayed reference count updates and - * extent insertions we have queued up so far. count can be - * 0, which means to process everything in the tree at the start - * of the run (but not newly added entries), or it can be some target - * number you'd like to process. + * This starts processing the delayed reference count updates and extent + * insertions we have queued up so far. + * + * @trans: Transaction handle. + * @min_bytes: How many bytes of delayed references to process. After this + * many bytes we stop processing delayed references if there are + * any more. If 0 it means to run all existing delayed references, + * but not new ones added after running all existing ones. + * Use (u64)-1 (U64_MAX) to run all existing delayed references + * plus any new ones that are added. * * Returns 0 on success or if called with an aborted transaction * Returns <0 on error and aborts the transaction */ -int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - unsigned long count) +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, u64 min_bytes) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; int ret; - int run_all = count == (unsigned long)-1; /* We'll clean this up in btrfs_cleanup_transaction */ if (TRANS_ABORTED(trans)) @@ -2141,20 +2159,17 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, return 0; delayed_refs = &trans->transaction->delayed_refs; - if (count == 0) - count = delayed_refs->num_heads_ready; - again: #ifdef SCRAMBLE_DELAYED_REFS delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); #endif - ret = __btrfs_run_delayed_refs(trans, count); + ret = __btrfs_run_delayed_refs(trans, min_bytes); if (ret < 0) { btrfs_abort_transaction(trans, ret); return ret; } - if (run_all) { + if (min_bytes == U64_MAX) { btrfs_create_pending_block_groups(trans); spin_lock(&delayed_refs->lock); diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 88c249c37516..64436f15c4eb 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -91,8 +91,8 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, enum btrfs_inline_ref_type is_data); u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); -int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count); -void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, u64 min_bytes); +u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head); int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index ca7c702172fd..5443d76c83cb 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -556,18 +556,6 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info, return nr; } -static inline u64 calc_delayed_refs_nr(const struct btrfs_fs_info *fs_info, - u64 to_reclaim) -{ - const u64 bytes = btrfs_calc_delayed_ref_bytes(fs_info, 1); - u64 nr; - - nr = div64_u64(to_reclaim, bytes); - if (!nr) - nr = 1; - return nr; -} - #define EXTENT_SIZE_PER_ITEM SZ_256K /* @@ -749,10 +737,9 @@ static void flush_space(struct btrfs_fs_info *fs_info, break; } if (state == FLUSH_DELAYED_REFS_NR) - nr = calc_delayed_refs_nr(fs_info, num_bytes); + btrfs_run_delayed_refs(trans, num_bytes); else - nr = 0; - btrfs_run_delayed_refs(trans, nr); + btrfs_run_delayed_refs(trans, 0); btrfs_end_transaction(trans); break; case ALLOC_CHUNK: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ee63d92d66b4..06050aa520dc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1330,7 +1330,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) } /* Now flush any delayed refs generated by updating all of the roots */ - ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, U64_MAX); if (ret) return ret; @@ -1345,7 +1345,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) * so we want to keep this flushing in this loop to make sure * everything gets run. */ - ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, U64_MAX); if (ret) return ret; } @@ -1563,7 +1563,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, * for now flush the delayed refs to narrow the race window where the * qgroup counters could end up wrong. */ - ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, U64_MAX); if (ret) { btrfs_abort_transaction(trans, ret); return ret; @@ -2397,7 +2397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto unlock_reloc; - ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); + ret = btrfs_run_delayed_refs(trans, U64_MAX); if (ret) goto unlock_reloc; From patchwork Fri Sep 8 17:20:35 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377742 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8BF7BEEB563 for ; Fri, 8 Sep 2023 17:21:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245429AbjIHRVF (ORCPT ); Fri, 8 Sep 2023 13:21:05 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58652 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S245427AbjIHRVF (ORCPT ); Fri, 8 Sep 2023 13:21:05 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7AB49199F for ; Fri, 8 Sep 2023 10:21:00 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 94353C433CA for ; Fri, 8 Sep 2023 17:20:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193660; bh=+1cka6dCMGEwwoMx9QTYn4Pm108pRcl0Yfcs7RqOWS0=; h=From:To:Subject:Date:In-Reply-To:References:From; b=YKBUigwgslLIFc1+MZuvIrbVh+faRr8MP3bRvQgvhds3/TM0UTsNaawIC7Czw8NjA +65Xb0Id7tLCLvkBikXL7Ln7aiHBvLHiY4ERLlq1wPQ9BTBnWtsHWRevPj/JyYAPbK TmaieL85GAjW0LhogVHEDeZqWeD5cd/eZ35T+FD+minApZBpJI0h5Vza4bMsvlI98y pSDipo5CUTHX+Nrqo/gd2d2r0OuQdmxhv5/VAiDwPpOEgp8bDPwQLS0IDRiqG3GCtG L09PEznS0VfbHDt+Ni9z8CjmgEFrWurQOnIUZuzySHMyKi8FXCyrlXStBiMpYo/Ypw nMTTlglbgmJ/A== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 18/21] btrfs: reserve space for delayed refs on a per ref basis Date: Fri, 8 Sep 2023 18:20:35 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana Currently when reserving space for delayed refs we do it on a per ref head basis. This is generally enough because most back refs for an extent end up being inlined in the extent item - with the default leaf size of 16K we can have at most 33 inline back refs (this is calculated by the macro BTRFS_MAX_EXTENT_ITEM_SIZE()). The amount of bytes reserved for each ref head is given by btrfs_calc_delayed_ref_bytes(), which basically corresponds to a single path for insertion into the extent tree plus another path for insertion into the free space tree if it's enabled. However if we have reached the limit of inline refs or we have a mix of inline and non-inline refs, then we will need to insert a non-inline ref and update the existing extent item to update the total number of references for the extent. This implies we need reserved space for two insertion paths in the extent tree, but we only reserved for one path. The extent item and the non-inline ref item may be located in different leaves, or even if they are located in the same leaf, after updating the extent item and before inserting the non-inline ref item, the extent buffers in the btree path may have been written (due to memory pressure for e.g.), in which case we need to COW the entire path again. In this case since we have not reserved enough space for the delayed refs block reserve, we will use the global block reserve. If we are in a situation where the fs has no more unallocated space enough to allocate a new metadata block group and available space in the existing metadata block groups is close to the maximum size of the global block reserve (512M), we may end up consuming too much of the free metadata space to the point where we can't commit any future transaction because it will fail, with -ENOSPC, during its commit when trying to allocate an extent for some COW operation (running delayed refs generated by running delayed refs or COWing the root tree's root node at commit_cowonly_roots() for example). Such dramatic scenario can happen if we have many delayed refs that require the insertion of non-inline ref items, due to too many reflinks or snapshots. We also have situations where we use the global block reserve because we could not in advance know that we will need space to update some trees (block group creation for example), so this all adds up to increase the chances of exhausting the global block reserve and making any future transaction commit to fail with -ENOSPC and turn the fs into RO mode, or fail the mount operation in case the mount needs to start and commit a transaction, such as when we have orphans to cleanup for example - such case was reported and hit by someone running a SLE (SUSE Linux Enterprise) distribution for example - where the fs had no more unallocated space that could be used to allocate a new metadata block group, and the available metadata space was about 1.5M, not enough to commit a transaction to cleanup an orphan inode (or do relocation of data block groups that were far from being full). So reserve space for delayed refs by individual refs and not by ref heads, as we may need to COW multiple extent tree paths due to non-inline ref items. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/delayed-ref.c | 32 ++++++++++++++++++++++---------- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 29 +++++++++++++++-------------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 0991f66471ff..067176ac5a2b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -422,7 +422,8 @@ int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, return 0; } -static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs, +static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *ref) { @@ -433,9 +434,11 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs, list_del(&ref->add_list); btrfs_put_delayed_ref(ref); atomic_dec(&delayed_refs->num_entries); + btrfs_delayed_refs_rsv_release(fs_info, 1); } -static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs, +static bool merge_ref(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *ref, u64 seq) @@ -464,10 +467,10 @@ static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs, mod = -next->ref_mod; } - drop_delayed_ref(delayed_refs, head, next); + drop_delayed_ref(fs_info, delayed_refs, head, next); ref->ref_mod += mod; if (ref->ref_mod == 0) { - drop_delayed_ref(delayed_refs, head, ref); + drop_delayed_ref(fs_info, delayed_refs, head, ref); done = true; } else { /* @@ -505,7 +508,7 @@ void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); if (seq && ref->seq >= seq) continue; - if (merge_ref(delayed_refs, head, ref, seq)) + if (merge_ref(fs_info, delayed_refs, head, ref, seq)) goto again; } } @@ -584,10 +587,11 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, * Return true if the ref was merged into an existing one (and therefore can be * freed by the caller). */ -static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root, +static bool insert_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, struct btrfs_delayed_ref_node *ref) { + struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs; struct btrfs_delayed_ref_node *exist; int mod; @@ -598,6 +602,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root, list_add_tail(&ref->add_list, &href->ref_add_list); atomic_inc(&root->num_entries); spin_unlock(&href->lock); + trans->delayed_ref_updates++; return false; } @@ -626,7 +631,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root, /* remove existing tail if its ref_mod is zero */ if (exist->ref_mod == 0) - drop_delayed_ref(root, href, exist); + drop_delayed_ref(trans->fs_info, root, href, exist); spin_unlock(&href->lock); return true; } @@ -695,6 +700,8 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, /* * If we are going to from a positive ref mod to a negative or vice * versa we need to make sure to adjust pending_csums accordingly. + * We reserve bytes for csum deletion when adding or updating a ref head + * see add_delayed_ref_head() for more details. */ if (existing->is_data) { u64 csum_leaves = @@ -819,6 +826,12 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); head_ref = existing; } else { + /* + * We reserve the amount of bytes needed to delete csums when + * adding the ref head and not when adding individual drop refs + * since the csum items are deleted only after running the last + * delayed drop ref (the data extent's ref count drops to 0). + */ if (head_ref->is_data && head_ref->ref_mod < 0) { delayed_refs->pending_csums += head_ref->num_bytes; trans->delayed_ref_updates += @@ -828,7 +841,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, delayed_refs->num_heads++; delayed_refs->num_heads_ready++; atomic_inc(&delayed_refs->num_entries); - trans->delayed_ref_updates++; } if (qrecord_inserted_ret) *qrecord_inserted_ret = qrecord_inserted; @@ -959,7 +971,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, head_ref = add_delayed_ref_head(trans, head_ref, record, action, &qrecord_inserted); - merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node); + merged = insert_delayed_ref(trans, head_ref, &ref->node); spin_unlock(&delayed_refs->lock); /* @@ -1051,7 +1063,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, head_ref = add_delayed_ref_head(trans, head_ref, record, action, &qrecord_inserted); - merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node); + merged = insert_delayed_ref(trans, head_ref, &ref->node); spin_unlock(&delayed_refs->lock); /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0a96ea8c1d3a..f4fcdbf312f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4609,6 +4609,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, list_del(&ref->add_list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); + btrfs_delayed_refs_rsv_release(fs_info, 1); } if (head->must_insert_reserved) pin_bytes = true; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 475903a8d772..79aa3e68fd34 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1819,22 +1819,24 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head) { - int nr_items = 1; /* Dropping this ref head update. */ - /* * We had csum deletions accounted for in our delayed refs rsv, we need * to drop the csum leaves for this update from our delayed_refs_rsv. */ if (head->total_ref_mod < 0 && head->is_data) { + int nr_items; + spin_lock(&delayed_refs->lock); delayed_refs->pending_csums -= head->num_bytes; spin_unlock(&delayed_refs->lock); - nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); - } + nr_items = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); + + btrfs_delayed_refs_rsv_release(fs_info, nr_items); - btrfs_delayed_refs_rsv_release(fs_info, nr_items); + return btrfs_calc_delayed_ref_bytes(fs_info, nr_items); + } - return btrfs_calc_delayed_ref_bytes(fs_info, nr_items); + return 0; } static int cleanup_ref_head(struct btrfs_trans_handle *trans, @@ -1884,7 +1886,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, } } - *bytes_released = btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); + *bytes_released += btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); trace_run_delayed_ref_head(fs_info, head, 0); btrfs_delayed_ref_unlock(head); @@ -1926,7 +1928,8 @@ static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head( } static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_head *locked_ref) + struct btrfs_delayed_ref_head *locked_ref, + u64 *bytes_released) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; @@ -1982,7 +1985,8 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, ret = run_one_delayed_ref(trans, ref, extent_op, must_insert_reserved); - + btrfs_delayed_refs_rsv_release(fs_info, 1); + *bytes_released += btrfs_calc_delayed_ref_bytes(fs_info, 1); btrfs_free_delayed_extent_op(extent_op); if (ret) { unselect_delayed_ref_head(delayed_refs, locked_ref); @@ -2048,7 +2052,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, spin_lock(&locked_ref->lock); btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref); - ret = btrfs_run_delayed_refs_for_head(trans, locked_ref); + ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, &bytes_processed); if (ret < 0 && ret != -EAGAIN) { /* * Error, btrfs_run_delayed_refs_for_head already @@ -2056,14 +2060,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ return ret; } else if (!ret) { - u64 bytes_released = 0; - /* * Success, perform the usual cleanup of a processed * head */ - ret = cleanup_ref_head(trans, locked_ref, &bytes_released); - bytes_processed += bytes_released; + ret = cleanup_ref_head(trans, locked_ref, &bytes_processed); if (ret > 0 ) { /* We dropped our lock, we need to loop. */ ret = 0; From patchwork Fri Sep 8 17:20:36 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377741 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B4431EEB564 for ; Fri, 8 Sep 2023 17:21:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245427AbjIHRVG (ORCPT ); Fri, 8 Sep 2023 13:21:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58644 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229600AbjIHRVF (ORCPT ); Fri, 8 Sep 2023 13:21:05 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 79F54CE6 for ; Fri, 8 Sep 2023 10:21:01 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 97D2DC433C9 for ; Fri, 8 Sep 2023 17:21:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193661; bh=OVeCedQ8YLMYiu2q+p/D/EBzjoqooSwoLr+efHiEdLU=; h=From:To:Subject:Date:In-Reply-To:References:From; b=OM6SwbGw61f3cXmRxAbTXrpPH2VyHn7P/QNa3bqHpank9B+OcoAxFiNIe9GjPY+nX 3kHI9uKKXBOmX5fuBsu06qBtBhHuvZys0oSqw63GJm3OWM0S2Jt0jH8v7IBZPWb4ZL GJS3/eUpaGy99y8cwNRPaoQBqkJvBe83JYFJfdT2+P/NhAyEfobZWWpXOwp7GXC6Rq WwxMtT6SdPlwetcyHRnx2qO50GC/jO5fgzuavZ0xYkRAyVx5FSRErgZ5tz3HJBoBXz Eo2H51NWp8PfyQxjyEl2MmIRCPvx8Da0XBvpQXpwpQEJsoFPj5BpObHCXJ9NEvT2/B aaMxacAr3Avig== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 19/21] btrfs: remove pointless initialization at btrfs_delayed_refs_rsv_release() Date: Fri, 8 Sep 2023 18:20:36 +0100 Message-Id: <6c4681f572a1ff834b241f8d8e4b148724ad5611.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana There's no point in initializing to 0 the local variable 'released' as we don't use it before the next assignment to it. So remove the initialization. This may help avoid some warnings with clang tools such as the one reported/fixed by commit 966de47ff0c9 ("btrfs: remove redundant initialization of variables in log_new_ancestors"). Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/delayed-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 067176ac5a2b..19dd74b236c6 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -66,7 +66,7 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) { struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; const u64 num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr); - u64 released = 0; + u64 released; released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); if (released) From patchwork Fri Sep 8 17:20:37 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377743 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5BA0BEEB560 for ; Fri, 8 Sep 2023 17:21:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245431AbjIHRVH (ORCPT ); Fri, 8 Sep 2023 13:21:07 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58712 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229600AbjIHRVH (ORCPT ); Fri, 8 Sep 2023 13:21:07 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 80CFDCE6 for ; Fri, 8 Sep 2023 10:21:02 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9D2D5C433CC for ; Fri, 8 Sep 2023 17:21:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193662; bh=iiIj/mmMxvcXkrAyPyk/3HcJyusZrfys+cxpkND6b8M=; h=From:To:Subject:Date:In-Reply-To:References:From; b=a1oj9vGoE5fvlVyGkzIWVZFgj1USt8rktft8Uzkmz0KkNqUJfIKBFCQUYWzU39+VF uOsdDqZz0KcLzHdIIJuS/4pPuBXorTramOdaLqby3gYGJ4oGjrd3ugF4GjzYoMOE3U oClRuCdtIKB0J3t2CapW1cwS5JbxieorHliFkL/AbarOw+b4OmO0VAw1yUD3LPssPO d/1jJjIYleVBuGeuvTusEahi2r6Yfz+MrNjP+Ag62LKnBSmnxAZgHVPFCCplIKYwXI ZkxU666cnrCYlipnxp9hAySBKqiDestWqqvDZEx1nnsurNoG4xRFW/r5E80k17K7HW Aem/MEtb8sE9w== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 20/21] btrfs: stop doing excessive space reservation for csum deletion Date: Fri, 8 Sep 2023 18:20:37 +0100 Message-Id: X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana Currently when reserving space for deleting the csum items for a data extent, when adding or updating a delayed ref head, we determine how many leaves of csum items we can have and then pass that number to the helper btrfs_calc_delayed_ref_bytes(). This helper is used for calculating space for all tree modifications we need when running delayed references, however the amount of space it computes is excessive for deleting csum items because: 1) It uses btrfs_calc_insert_metadata_size() which is excessive because we only need to delete csum items from the csum tree, we don't need to insert any items, so btrfs_calc_metadata_size() is all we need (as it computes space needed to delete an item); 2) If the free space tree is enabled, it doubles the amount of space, which is pointless for csum deletion since we don't need to touch the free space tree or any other tree other than the csum tree. So improve on this by tracking how many csum deletions we have and using a new helper to calculate space for csum deletions (just a wrapper around btrfs_calc_metadata_size() with a comment). This reduces the amount of space we need to reserve for csum deletions by a factor of 4, and it helps reduce the number of times we have to block space reservations and have the reclaim task enter the space flushing algorihm (flush delayed items, flush delayed refs, etc) in order to satisfy tickets. For example this results in a total time decrease when unlinking (or truncating) files with many extents, as we end up having to block on space metadata reservations less often. Example test: $ cat test.sh #!/bin/bash DEV=/dev/nullb0 MNT=/mnt/test umount $DEV &> /dev/null mkfs.btrfs -f $DEV # Use compression to quickly create files with a lot of extents # (each with a size of 128K). mount -o compress=lzo $DEV $MNT # 100G gives at least 983040 extents with a size of 128K. xfs_io -f -c "pwrite -S 0xab -b 1M 0 120G" $MNT/foobar # Flush all delalloc and clear all metadata from memory. umount $MNT mount -o compress=lzo $DEV $MNT start=$(date +%s%N) rm -f $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "rm took $dur milliseconds" umount $MNT Before this change rm took: 7504 milliseconds After this change rm took: 6574 milliseconds (-12.4%) Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/block-group.c | 8 ++++---- fs/btrfs/delayed-ref.c | 33 ++++++++++++++++++++------------- fs/btrfs/delayed-ref.h | 13 ++++++++++++- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent-tree.c | 10 +++++----- fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 1 + 7 files changed, 45 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index fb506ee51d2c..82c77dbad2e8 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1286,7 +1286,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, /* Once for the lookup reference */ btrfs_put_block_group(block_group); if (remove_rsv) - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); btrfs_free_path(path); return ret; } @@ -2709,7 +2709,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) /* Already aborted the transaction if it failed. */ next: - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); list_del_init(&block_group->bg_list); clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); } @@ -3370,7 +3370,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) if (should_put) btrfs_put_block_group(cache); if (drop_reserve) - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); /* * Avoid blocking other tasks for too long. It might even save * us from writing caches for block groups that are going to be @@ -3517,7 +3517,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) /* If its not on the io list, we need to put the block group */ if (should_put) btrfs_put_block_group(cache); - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 19dd74b236c6..a6680e8756a1 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -57,17 +57,21 @@ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) * Release a ref head's reservation. * * @fs_info: the filesystem - * @nr: number of items to drop + * @nr_refs: number of delayed refs to drop + * @nr_csums: number of csum items to drop * * Drops the delayed ref head's count from the delayed refs rsv and free any * excess reservation we had. */ -void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums) { struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; - const u64 num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr); + u64 num_bytes; u64 released; + num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr_refs); + num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); + released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); if (released) trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", @@ -77,8 +81,9 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) /* * Adjust the size of the delayed refs rsv. * - * This is to be called anytime we may have adjusted trans->delayed_ref_updates, - * it'll calculate the additional size and add it to the delayed_refs_rsv. + * This is to be called anytime we may have adjusted trans->delayed_ref_updates + * or trans->delayed_ref_csum_deletions, it'll calculate the additional size and + * add it to the delayed_refs_rsv. */ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) { @@ -86,17 +91,19 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; u64 num_bytes; - if (!trans->delayed_ref_updates) - return; + num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates); + num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, + trans->delayed_ref_csum_deletions); - num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, - trans->delayed_ref_updates); + if (num_bytes == 0) + return; spin_lock(&delayed_rsv->lock); delayed_rsv->size += num_bytes; delayed_rsv->full = false; spin_unlock(&delayed_rsv->lock); trans->delayed_ref_updates = 0; + trans->delayed_ref_csum_deletions = 0; } /* @@ -434,7 +441,7 @@ static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info, list_del(&ref->add_list); btrfs_put_delayed_ref(ref); atomic_dec(&delayed_refs->num_entries); - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); } static bool merge_ref(struct btrfs_fs_info *fs_info, @@ -710,11 +717,11 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, if (existing->total_ref_mod >= 0 && old_ref_mod < 0) { delayed_refs->pending_csums -= existing->num_bytes; - btrfs_delayed_refs_rsv_release(fs_info, csum_leaves); + btrfs_delayed_refs_rsv_release(fs_info, 0, csum_leaves); } if (existing->total_ref_mod < 0 && old_ref_mod >= 0) { delayed_refs->pending_csums += existing->num_bytes; - trans->delayed_ref_updates += csum_leaves; + trans->delayed_ref_csum_deletions += csum_leaves; } } @@ -834,7 +841,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, */ if (head_ref->is_data && head_ref->ref_mod < 0) { delayed_refs->pending_csums += head_ref->num_bytes; - trans->delayed_ref_updates += + trans->delayed_ref_csum_deletions += btrfs_csum_bytes_to_leaves(trans->fs_info, head_ref->num_bytes); } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 46a1421cd99d..1bbabfebb329 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -277,6 +277,17 @@ static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_in return num_bytes; } +static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *fs_info, + int num_csum_items) +{ + /* + * Deleting csum items does not result in new nodes/leaves and does not + * require changing the free space tree, only the csum tree, so this is + * all we need. + */ + return btrfs_calc_metadata_size(fs_info, num_csum_items); +} + static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, int action, u64 bytenr, u64 len, u64 parent) { @@ -401,7 +412,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head( int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); -void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr); +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums); void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f4fcdbf312f4..bf8dc572b9d2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4609,7 +4609,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, list_del(&ref->add_list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); } if (head->must_insert_reserved) pin_bytes = true; @@ -4807,7 +4807,7 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, spin_unlock(&cur_trans->dirty_bgs_lock); btrfs_put_block_group(cache); - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 79aa3e68fd34..caef12198cf6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1824,16 +1824,16 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, * to drop the csum leaves for this update from our delayed_refs_rsv. */ if (head->total_ref_mod < 0 && head->is_data) { - int nr_items; + int nr_csums; spin_lock(&delayed_refs->lock); delayed_refs->pending_csums -= head->num_bytes; spin_unlock(&delayed_refs->lock); - nr_items = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); + nr_csums = btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); - btrfs_delayed_refs_rsv_release(fs_info, nr_items); + btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums); - return btrfs_calc_delayed_ref_bytes(fs_info, nr_items); + return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); } return 0; @@ -1985,7 +1985,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, ret = run_one_delayed_ref(trans, ref, extent_op, must_insert_reserved); - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); *bytes_released += btrfs_calc_delayed_ref_bytes(fs_info, 1); btrfs_free_delayed_extent_op(extent_op); if (ret) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 06050aa520dc..4453d8113b37 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2081,7 +2081,7 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) struct btrfs_block_group *block_group, *tmp; list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { - btrfs_delayed_refs_rsv_release(fs_info, 1); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); list_del_init(&block_group->bg_list); } } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 8e9fa23bd7fe..474ce34ed02e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -119,6 +119,7 @@ struct btrfs_trans_handle { u64 bytes_reserved; u64 chunk_bytes_reserved; unsigned long delayed_ref_updates; + unsigned long delayed_ref_csum_deletions; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; From patchwork Fri Sep 8 17:20:38 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Filipe Manana X-Patchwork-Id: 13377744 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4CACBEEB561 for ; Fri, 8 Sep 2023 17:21:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S245433AbjIHRVJ (ORCPT ); Fri, 8 Sep 2023 13:21:09 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58726 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229600AbjIHRVI (ORCPT ); Fri, 8 Sep 2023 13:21:08 -0400 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8BB16A1 for ; Fri, 8 Sep 2023 10:21:03 -0700 (PDT) Received: by smtp.kernel.org (Postfix) with ESMTPSA id A2381C433C8 for ; Fri, 8 Sep 2023 17:21:02 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1694193663; bh=KBzwVFH3+xugvQIZVsarU2ldCJoAgH/4VERJy9clLdM=; h=From:To:Subject:Date:In-Reply-To:References:From; b=U0+NbFWJyujUNu8milrcnpvYejAumD5aeehFwx5554A2Cqcq3CJ3h8w4AcXkJuUff QjRU7dylZReZHXuCAwx5w3iXwtxdp6bDz8zdMqrJL2m6rWh8MPLitrCVqOIfAelHVw i5Qtb9OlIHlwmZgjpBkXO1lYW0YerrgODiTDJsTAZGsR3hAc1/SYbkI0xvPlO1Qw8k wTtWPF4EZ1eFhXn7lJiba2fbAHT4u03wIFlqdZ4ZIHrrmnP8sTEdQzUPMa7L4KFS0U wZOOZobFlTsd5Esykpc/N/fQkSe74c7ybwHs3nEg0hMYcYHO81T9w+oqLMFKkMdd/V yCkMxi7x3rY/Q== From: fdmanana@kernel.org To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 21/21] btrfs: always reserve space for delayed refs when starting transaction Date: Fri, 8 Sep 2023 18:20:38 +0100 Message-Id: <2e3540a727d0d75682f4f70e5de76c503e33049f.1694192469.git.fdmanana@suse.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org From: Filipe Manana When starting a transaction (or joining an existing one with btrfs_start_transaction()), we reserve space for the number of items we want to insert in a btree, but we don't do it for the delayed refs we will generate while using the transaction to modify (COW) extent buffers in a btree or allocate new extent buffers. Basically how it works: 1) When we start a transaction we reserve space for the number of items the caller wants to be inserted/modified/deleted in a btree. This space goes to the transaction block reserve; 2) If the delayed refs block reserve is not full, its size is greater than the amount of its reserved space, and the flush method is BTRFS_RESERVE_FLUSH_ALL, then we attempt to reserve more space for it corresponding to the number of items the caller wants to insert/modify/delete in a btree; 3) The size of the delayed refs block reserve is increased when a task creates delayed refs after COWing an extent buffer, allocating a new one or deleting (freeing) an extent buffer. This happens after the the task started or joined a transaction, whenever it calls btrfs_update_delayed_refs_rsv(); 4) The delayed refs block reserve is then refilled by anyone calling btrfs_delayed_refs_rsv_refill(), either during unlink/truncate operations or when someone else calls btrfs_start_transaction() with a 0 number of items and flush method BTRFS_RESERVE_FLUSH_ALL; 5) As a task COWs or allocates extent buffers, it consumes space from the transaction block reserve. When the task releases its transaction handle (btrfs_end_transaction()) or it attempts to commit the transaction, it releases any remaining space in the transaction block reserve that it did not use, as not all space may have been used (due to pessimistic space calculation) by calling btrfs_block_rsv_release() which will try to add that unused space to the delayed refs block reserve (if its current size is greater than its reserved space). That transferred space may not be enough to completely fulfill the delayed refs block reserve. Plus we have some tasks that will attempt do modify as many leaves as they can before getting -ENOSPC (and then reserving more space and retrying), such as hole punching and extent cloning which call btrfs_replace_file_extents(). Such tasks can generate therefore a high number of delayed refs, for both metadata and data (we can't know in advance how many file extent items we will find in a range and therefore how many delayed refs for dropping references on data extents we will generate); 6) If a transaction starts its commit before the delayeds refs block reserve is refilled, for example by the transaction kthread or by someone who called btrfs_join_transaction() before starting the commit, then when running delayed references if we don't have enough reserved space in the delayed refs block reserve, we will consume space from the global block reserve. Now this doesn't make a lot of sense because: 1) We should reserve space for delayed references when starting the transaction, since we have no guarantees the delayed refs block reserve will be refilled; 2) If no refill happens then we will consume from the global block reserve when running delayed refs during the transaction commit; 3) If we have a bunch of tasks calling btrfs_start_transaction() with a number of items greater than zero and at the time the delayed refs reserve is full, then we don't reserve any space at btrfs_start_transaction() for the delayed refs that will be generated by a task, and we can therefore end up using a lot of space from the global reserve when running the delayed refs during a transaction commit; 4) There are also other operations that result in bumping the size of the delayed refs reserve, such as creating and deleting block groups, as well as the need to update a block group item because we allocated or freed an extent from the respective block group; 5) If we have a significant gap betweent the delayed refs reserve's size and its reserved space, two very bad things may happen: 1) The reserved space of the global reserve may not be enough and we fail the transaction commit with -ENOSPC when running delayed refs; 2) If the available space in the global reserve is enough it may result in nearly exhausting it. If the fs has no more unallocated device space for allocating a new block group and all the available space in existing metadata block groups is not far from the global reserve's size before we started the transaction commit, we may end up in a situation where after the transaction commit we have too little available metadata space, and any future transaction commit will fail with -ENOSPC, because although we were able to reserve space to start the transaction, we were not able to commit it, as running delayed refs generates some more delayed refs (to update the extent tree for example) - this includes not even being able to commit a transaction that was started with the goal of unlinking a file, removing an empty data block group or doing reclaim/balance, so there's no way to release metadata space. In the worst case the next time we mount the filesystem we may also fail with -ENOSPC due to failure to commit a transaction to cleanup orphan inodes. This later case was reported and hit by someone running a SLE (SUSE Linux Enterprise) distribution for example - where the fs had no more unallocated space that could be used to allocate a new metadata block group, and the available metadata space was about 1.5M, not enough to commit a transaction to cleanup an orphan inode (or do relocation of data block groups that were far from being full). So improve on this situation by always reserving space for delayed refs when calling start_transaction(), and if the flush method is BTRFS_RESERVE_FLUSH_ALL, also try to refill the delayeds refs block reserve if it's not full. The space reserved for the delayed refs is added to a local block reserve that is part of the transaction handle, and when a task updates the delayed refs block reserve size, after creating a delayed ref, the space is transferred from that local reserve to the global delayed refs reserve (fs_info->delayed_refs_rsv). In case the local reserve does not have enough space, which may happen for tasks that generate a variable and potentially large number of delayed refs (such as the hole punching and extent cloning cases mentioned before), we transfer any available space and then rely on the current behaviour of hoping some other task refills the delayed refs reserve or fallback to the global block reserve. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana --- fs/btrfs/block-rsv.c | 6 +- fs/btrfs/delayed-ref.c | 21 ++++++- fs/btrfs/transaction.c | 135 ++++++++++++++++++++++++++++++++--------- fs/btrfs/transaction.h | 2 + 4 files changed, 132 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 6ccd91bbff3e..6a8f9629bbbd 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -281,10 +281,10 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *target = NULL; /* - * If we are the delayed_rsv then push to the global rsv, otherwise dump - * into the delayed rsv if it is not full. + * If we are a delayed block reserve then push to the global rsv, + * otherwise dump into the global delayed reserve if it is not full. */ - if (block_rsv == delayed_rsv) + if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS) target = global_rsv; else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv)) target = delayed_rsv; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index a6680e8756a1..383d5be22941 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -89,7 +89,9 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv; u64 num_bytes; + u64 reserved_bytes; num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates); num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, @@ -98,9 +100,26 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) if (num_bytes == 0) return; + /* + * Try to take num_bytes from the transaction's local delayed reserve. + * If not possible, try to take as much as it's available. If the local + * reserve doesn't have enough reserved space, the delayed refs reserve + * will be refilled next time btrfs_delayed_refs_rsv_refill() is called + * by someone or if a transaction commit is triggered before that, the + * global block reserve will be used. We want to minimize using the + * global block reserve for cases we can account for in advance, to + * avoid exhausting it and reach -ENOSPC during a transaction commit. + */ + spin_lock(&local_rsv->lock); + reserved_bytes = min(num_bytes, local_rsv->reserved); + local_rsv->reserved -= reserved_bytes; + local_rsv->full = (local_rsv->reserved >= local_rsv->size); + spin_unlock(&local_rsv->lock); + spin_lock(&delayed_rsv->lock); delayed_rsv->size += num_bytes; - delayed_rsv->full = false; + delayed_rsv->reserved += reserved_bytes; + delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size); spin_unlock(&delayed_rsv->lock); trans->delayed_ref_updates = 0; trans->delayed_ref_csum_deletions = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4453d8113b37..ac347de1ffb8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -555,6 +555,69 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root) return true; } +static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info, + enum btrfs_reserve_flush_enum flush, + u64 num_bytes, + u64 *delayed_refs_bytes) +{ + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info; + u64 extra_delayed_refs_bytes = 0; + u64 bytes; + int ret; + + /* + * If there's a gap between the size of the delayed refs reserve and + * its reserved space, than some tasks have added delayed refs or bumped + * its size otherwise (due to block group creation or removal, or block + * group item update). Also try to allocate that gap in order to prevent + * using (and possibly abusing) the global reserve when committing the + * transaction. + */ + if (flush == BTRFS_RESERVE_FLUSH_ALL && + !btrfs_block_rsv_full(delayed_refs_rsv)) { + spin_lock(&delayed_refs_rsv->lock); + if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) + extra_delayed_refs_bytes = delayed_refs_rsv->size - + delayed_refs_rsv->reserved; + spin_unlock(&delayed_refs_rsv->lock); + } + + bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes; + + /* + * We want to reserve all the bytes we may need all at once, so we only + * do 1 enospc flushing cycle per transaction start. + */ + ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); + if (ret == 0) { + if (extra_delayed_refs_bytes > 0) + btrfs_migrate_to_delayed_refs_rsv(fs_info, + extra_delayed_refs_bytes); + return 0; + } + + if (extra_delayed_refs_bytes > 0) { + bytes -= extra_delayed_refs_bytes; + ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); + if (ret == 0) + return 0; + } + + /* + * If we are an emergency flush, which can steal from the global block + * reserve, then attempt to not reserve space for the delayed refs, as + * we will consume space for them from the global block reserve. + */ + if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) { + bytes -= *delayed_refs_bytes; + *delayed_refs_bytes = 0; + ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); + } + + return ret; +} + static struct btrfs_trans_handle * start_transaction(struct btrfs_root *root, unsigned int num_items, unsigned int type, enum btrfs_reserve_flush_enum flush, @@ -562,10 +625,12 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv; struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; u64 num_bytes = 0; u64 qgroup_reserved = 0; + u64 delayed_refs_bytes = 0; bool reloc_reserved = false; bool do_chunk_alloc = false; int ret; @@ -588,9 +653,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, * the appropriate flushing if need be. */ if (num_items && root != fs_info->chunk_root) { - struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv; - u64 delayed_refs_bytes = 0; - qgroup_reserved = num_items * fs_info->nodesize; /* * Use prealloc for now, as there might be a currently running @@ -602,20 +664,16 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, if (ret) return ERR_PTR(ret); + num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); /* - * We want to reserve all the bytes we may need all at once, so - * we only do 1 enospc flushing cycle per transaction start. We - * accomplish this by simply assuming we'll do num_items worth - * of delayed refs updates in this trans handle, and refill that - * amount for whatever is missing in the reserve. + * If we plan to insert/update/delete "num_items" from a btree, + * we will also generate delayed refs for extent buffers in the + * respective btree paths, so reserve space for the delayed refs + * that will be generated by the caller as it modifies btrees. + * Try to reserve them to avoid excessive use of the global + * block reserve. */ - num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); - if (flush == BTRFS_RESERVE_FLUSH_ALL && - !btrfs_block_rsv_full(delayed_refs_rsv)) { - delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info, - num_items); - num_bytes += delayed_refs_bytes; - } + delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info, num_items); /* * Do the reservation for the relocation root creation @@ -625,18 +683,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, reloc_reserved = true; } - ret = btrfs_reserve_metadata_bytes(fs_info, rsv->space_info, - num_bytes, flush); + ret = btrfs_reserve_trans_metadata(fs_info, flush, num_bytes, + &delayed_refs_bytes); if (ret) goto reserve_fail; - if (delayed_refs_bytes) { - btrfs_migrate_to_delayed_refs_rsv(fs_info, - delayed_refs_bytes); - num_bytes -= delayed_refs_bytes; - } - btrfs_block_rsv_add_bytes(rsv, num_bytes, true); - if (rsv->space_info->force_alloc) + btrfs_block_rsv_add_bytes(trans_rsv, num_bytes, true); + + if (trans_rsv->space_info->force_alloc) do_chunk_alloc = true; } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL && !btrfs_block_rsv_full(delayed_refs_rsv)) { @@ -696,6 +750,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, h->type = type; INIT_LIST_HEAD(&h->new_bgs); + btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS); smp_mb(); if (cur_trans->state >= TRANS_STATE_COMMIT_START && @@ -708,8 +763,17 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, if (num_bytes) { trace_btrfs_space_reservation(fs_info, "transaction", h->transid, num_bytes, 1); - h->block_rsv = &fs_info->trans_block_rsv; + h->block_rsv = trans_rsv; h->bytes_reserved = num_bytes; + if (delayed_refs_bytes > 0) { + trace_btrfs_space_reservation(fs_info, + "local_delayed_refs_rsv", + h->transid, + delayed_refs_bytes, 1); + h->delayed_refs_bytes_reserved = delayed_refs_bytes; + btrfs_block_rsv_add_bytes(&h->delayed_rsv, delayed_refs_bytes, true); + delayed_refs_bytes = 0; + } h->reloc_reserved = reloc_reserved; } @@ -765,8 +829,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, kmem_cache_free(btrfs_trans_handle_cachep, h); alloc_fail: if (num_bytes) - btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, - num_bytes, NULL); + btrfs_block_rsv_release(fs_info, trans_rsv, num_bytes, NULL); + if (delayed_refs_bytes) + btrfs_space_info_free_bytes_may_use(fs_info, trans_rsv->space_info, + delayed_refs_bytes); reserve_fail: btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); return ERR_PTR(ret); @@ -987,11 +1053,14 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) if (!trans->block_rsv) { ASSERT(!trans->bytes_reserved); + ASSERT(!trans->delayed_refs_bytes_reserved); return; } - if (!trans->bytes_reserved) + if (!trans->bytes_reserved) { + ASSERT(!trans->delayed_refs_bytes_reserved); return; + } ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); trace_btrfs_space_reservation(fs_info, "transaction", @@ -999,6 +1068,16 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) btrfs_block_rsv_release(fs_info, trans->block_rsv, trans->bytes_reserved, NULL); trans->bytes_reserved = 0; + + if (!trans->delayed_refs_bytes_reserved) + return; + + trace_btrfs_space_reservation(fs_info, "local_delayed_refs_rsv", + trans->transid, + trans->delayed_refs_bytes_reserved, 0); + btrfs_block_rsv_release(fs_info, &trans->delayed_rsv, + trans->delayed_refs_bytes_reserved, NULL); + trans->delayed_refs_bytes_reserved = 0; } static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 474ce34ed02e..4dc68d7ec955 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -117,6 +117,7 @@ enum { struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; + u64 delayed_refs_bytes_reserved; u64 chunk_bytes_reserved; unsigned long delayed_ref_updates; unsigned long delayed_ref_csum_deletions; @@ -139,6 +140,7 @@ struct btrfs_trans_handle { bool in_fsync; struct btrfs_fs_info *fs_info; struct list_head new_bgs; + struct btrfs_block_rsv delayed_rsv; }; /*