diff mbox series

[v3] btrfs-progs: add rudimentary log checking

Message ID 20240715143830.2067478-1-maharmstone@fb.com (mailing list archive)
State New, archived
Headers show
Series [v3] btrfs-progs: add rudimentary log checking | expand

Commit Message

Mark Harmstone July 15, 2024, 2:38 p.m. UTC
Currently the transaction log is more or less ignored by btrfs check,
meaning that it's possible for a FS with a corrupt log to pass btrfs
check, but be immediately corrupted by the kernel when it's mounted.

This patch adds a check that if there's an inode in the log, any pending
non-compressed writes also have corresponding csum entries.

Signed-off-by: Mark Harmstone <maharmstone@fb.com>
---
Changes:
v2:
helper to load log root
handle compressed extents
loop logic improvements
fix bug in check_log_csum

v3:
added test
added explanatory comment to check_log_csum
changed length operation to -=

 check/main.c                                  | 304 +++++++++++++++++-
 .../063-log-missing-csum/default.img.xz       | Bin 0 -> 1288 bytes
 tests/fsck-tests/063-log-missing-csum/test.sh |  14 +
 3 files changed, 306 insertions(+), 12 deletions(-)
 create mode 100644 tests/fsck-tests/063-log-missing-csum/default.img.xz
 create mode 100755 tests/fsck-tests/063-log-missing-csum/test.sh

Comments

Boris Burkov July 24, 2024, 5:57 p.m. UTC | #1
On Mon, Jul 15, 2024 at 03:38:24PM +0100, Mark Harmstone wrote:
> Currently the transaction log is more or less ignored by btrfs check,
> meaning that it's possible for a FS with a corrupt log to pass btrfs
> check, but be immediately corrupted by the kernel when it's mounted.
> 
> This patch adds a check that if there's an inode in the log, any pending
> non-compressed writes also have corresponding csum entries.
> 
> Signed-off-by: Mark Harmstone <maharmstone@fb.com>

Reviewed-by: Boris Burkov <boris@bur.io>

> ---
> Changes:
> v2:
> helper to load log root
> handle compressed extents
> loop logic improvements
> fix bug in check_log_csum
> 
> v3:
> added test
> added explanatory comment to check_log_csum
> changed length operation to -=
> 
>  check/main.c                                  | 304 +++++++++++++++++-
>  .../063-log-missing-csum/default.img.xz       | Bin 0 -> 1288 bytes
>  tests/fsck-tests/063-log-missing-csum/test.sh |  14 +
>  3 files changed, 306 insertions(+), 12 deletions(-)
>  create mode 100644 tests/fsck-tests/063-log-missing-csum/default.img.xz
>  create mode 100755 tests/fsck-tests/063-log-missing-csum/test.sh
> 
> diff --git a/check/main.c b/check/main.c
> index 83c721d3..eaae3042 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -9787,6 +9787,274 @@ static int zero_log_tree(struct btrfs_root *root)
>  	return ret;
>  }
>  
> +/* Searches the given root for checksums in the range [addr, addr+length].
> + * Returns 1 if found, 0 if not found, and < 0 for an error. */
> +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length)
> +{
> +	struct btrfs_path path = { 0 };
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	u16 csum_size = gfs_info->csum_size;
> +	u16 num_entries;
> +	u64 data_len;
> +	int ret;
> +
> +	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
> +	key.type = BTRFS_EXTENT_CSUM_KEY;
> +	key.offset = addr;
> +
> +	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (ret > 0 && path.slots[0])
> +		path.slots[0]--;
> +
> +	ret = 0;
> +
> +	while (1) {
> +		leaf = path.nodes[0];
> +		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(root, &path);
> +			if (ret) {
> +				if (ret > 0)
> +					ret = 0;
> +
> +				break;
> +			}
> +			leaf = path.nodes[0];
> +		}
> +
> +		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +		if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
> +			break;
> +
> +		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
> +		    key.type != BTRFS_EXTENT_CSUM_KEY)
> +			goto next;
> +
> +		if (key.offset >= addr + length)
> +			break;
> +
> +		num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
> +		data_len = num_entries * gfs_info->sectorsize;
> +
> +		if (addr >= key.offset && addr <= key.offset + data_len) {
> +			u64 end = min(addr + length, key.offset + data_len);
> +
> +			length -= (end - addr);
> +			addr = end;
> +
> +			if (length == 0)
> +				break;
> +		}
> +
> +next:
> +		path.slots[0]++;
> +	}
> +
> +	btrfs_release_path(&path);
> +
> +	if (ret >= 0)
> +		ret = length == 0 ? 1 : 0;
> +
> +	return ret;
> +}
> +
> +static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache,
> +			  struct walk_control *wc)
> +{
> +	struct btrfs_path path = { 0 };
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	int ret, err = 0;
> +	u64 last_csum_inode = 0;
> +
> +	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
> +	key.type = BTRFS_INODE_ITEM_KEY;
> +	key.offset = 0;
> +	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> +	if (ret < 0)
> +		return 1;
> +
> +	while (1) {
> +		leaf = path.nodes[0];
> +		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(root, &path);
> +			if (ret) {
> +				if (ret < 0)
> +					err = 1;
> +
> +				break;
> +			}
> +			leaf = path.nodes[0];
> +		}
> +		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
> +			break;
> +
> +		if (key.type == BTRFS_INODE_ITEM_KEY) {
> +			struct btrfs_inode_item *item;
> +
> +			item = btrfs_item_ptr(leaf, path.slots[0],
> +					      struct btrfs_inode_item);
> +
> +			if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM))
> +				last_csum_inode = key.objectid;
> +		} else if (key.type == BTRFS_EXTENT_DATA_KEY &&
> +			   key.objectid == last_csum_inode) {
> +			struct btrfs_file_extent_item *fi;
> +			u64 addr, length;
> +
> +			fi = btrfs_item_ptr(leaf, path.slots[0],
> +					    struct btrfs_file_extent_item);
> +
> +			if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
> +				goto next;
> +
> +			addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
> +				btrfs_file_extent_offset(leaf, fi);
> +			length = btrfs_file_extent_num_bytes(leaf, fi);
> +
> +			ret = check_log_csum(root, addr, length);
> +			if (ret < 0) {
> +				err = 1;
> +				break;
> +			}
> +
> +			if (!ret) {
> +				error("csum missing in log (root %llu, inode %llu, "
> +				      "offset %llu, address 0x%llx, length %llu)",
> +				      root->objectid, last_csum_inode, key.offset,
> +				      addr, length);
> +				err = 1;
> +			}
> +		}
> +
> +next:
> +		path.slots[0]++;
> +	}
> +
> +	btrfs_release_path(&path);
> +
> +	return err;
> +}
> +
> +static int load_log_root(u64 root_id, struct btrfs_path *path,
> +			 struct btrfs_root *tmp_root)
> +{
> +	struct extent_buffer *l;
> +	struct btrfs_tree_parent_check check = { 0 };
> +
> +	btrfs_setup_root(tmp_root, gfs_info, root_id);
> +
> +	l = path->nodes[0];
> +	read_extent_buffer(l, &tmp_root->root_item,
> +			btrfs_item_ptr_offset(l, path->slots[0]),
> +			sizeof(tmp_root->root_item));
> +
> +	tmp_root->root_key.objectid = root_id;
> +	tmp_root->root_key.type = BTRFS_ROOT_ITEM_KEY;
> +	tmp_root->root_key.offset = 0;
> +
> +	check.owner_root = btrfs_root_id(tmp_root);
> +	check.transid = btrfs_root_generation(&tmp_root->root_item);
> +	check.level = btrfs_root_level(&tmp_root->root_item);
> +
> +	tmp_root->node = read_tree_block(gfs_info,
> +					 btrfs_root_bytenr(&tmp_root->root_item),
> +					 &check);
> +	if (IS_ERR(tmp_root->node)) {
> +		tmp_root->node = NULL;
> +		return 1;
> +	}
> +
> +	if (btrfs_header_level(tmp_root->node) != btrfs_root_level(&tmp_root->root_item)) {
> +		error("root [%llu %llu] level %d does not match %d",
> +			tmp_root->root_key.objectid,
> +			tmp_root->root_key.offset,
> +			btrfs_header_level(tmp_root->node),
> +			btrfs_root_level(&tmp_root->root_item));
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int check_log(struct cache_tree *root_cache)
> +{
> +	struct btrfs_path path = { 0 };
> +	struct walk_control wc = { 0 };
> +	struct btrfs_key key;
> +	struct extent_buffer *leaf;
> +	struct btrfs_root *log_root = gfs_info->log_root_tree;
> +	int ret;
> +	int err = 0;
> +
> +	cache_tree_init(&wc.shared);
> +
> +	key.objectid = BTRFS_TREE_LOG_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = 0;
> +	ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
> +	if (ret < 0) {
> +		err = 1;
> +		goto out;
> +	}
> +
> +	while (1) {
> +		leaf = path.nodes[0];
> +		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +			ret = btrfs_next_leaf(log_root, &path);
> +			if (ret) {
> +				if (ret < 0)
> +					err = 1;
> +				break;
> +			}
> +			leaf = path.nodes[0];
> +		}
> +		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +		if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
> +		    key.type > BTRFS_ROOT_ITEM_KEY)
> +			break;
> +
> +		if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
> +		    key.type == BTRFS_ROOT_ITEM_KEY &&
> +		    fs_root_objectid(key.offset)) {
> +			struct btrfs_root tmp_root;
> +
> +			memset(&tmp_root, 0, sizeof(tmp_root));
> +
> +			ret = load_log_root(key.offset, &path, &tmp_root);
> +			if (ret) {
> +				err = 1;
> +				goto next;
> +			}
> +
> +			ret = check_log_root(&tmp_root, root_cache, &wc);
> +			if (ret)
> +				err = 1;
> +
> +next:
> +			if (tmp_root.node)
> +				free_extent_buffer(tmp_root.node);
> +		}
> +
> +		path.slots[0]++;
> +	}
> +out:
> +	btrfs_release_path(&path);
> +	if (err)
> +		free_extent_cache_tree(&wc.shared);
> +	if (!cache_tree_empty(&wc.shared))
> +		fprintf(stderr, "warning line %d\n", __LINE__);
> +
> +	return err;
> +}
> +
>  static void free_roots_info_cache(void)
>  {
>  	if (!roots_info_cache)
> @@ -10585,9 +10853,21 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  		goto close_out;
>  	}
>  
> +	if (gfs_info->log_root_tree) {
> +		fprintf(stderr, "[1/8] checking log\n");
> +		ret = check_log(&root_cache);
> +
> +		if (ret)
> +			error("errors found in log");
> +		err |= !!ret;
> +	} else {
> +		fprintf(stderr,
> +		"[1/8] checking log skipped (none written)\n");
> +	}
> +
>  	if (!init_extent_tree) {
>  		if (!g_task_ctx.progress_enabled) {
> -			fprintf(stderr, "[1/7] checking root items\n");
> +			fprintf(stderr, "[2/8] checking root items\n");
>  		} else {
>  			g_task_ctx.tp = TASK_ROOT_ITEMS;
>  			task_start(g_task_ctx.info, &g_task_ctx.start_time,
> @@ -10622,11 +10902,11 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  			}
>  		}
>  	} else {
> -		fprintf(stderr, "[1/7] checking root items... skipped\n");
> +		fprintf(stderr, "[2/8] checking root items... skipped\n");
>  	}
>  
>  	if (!g_task_ctx.progress_enabled) {
> -		fprintf(stderr, "[2/7] checking extents\n");
> +		fprintf(stderr, "[3/8] checking extents\n");
>  	} else {
>  		g_task_ctx.tp = TASK_EXTENTS;
>  		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10644,9 +10924,9 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  
>  	if (!g_task_ctx.progress_enabled) {
>  		if (is_free_space_tree)
> -			fprintf(stderr, "[3/7] checking free space tree\n");
> +			fprintf(stderr, "[4/8] checking free space tree\n");
>  		else
> -			fprintf(stderr, "[3/7] checking free space cache\n");
> +			fprintf(stderr, "[4/8] checking free space cache\n");
>  	} else {
>  		g_task_ctx.tp = TASK_FREE_SPACE;
>  		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10664,7 +10944,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  	 */
>  	no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES);
>  	if (!g_task_ctx.progress_enabled) {
> -		fprintf(stderr, "[4/7] checking fs roots\n");
> +		fprintf(stderr, "[5/8] checking fs roots\n");
>  	} else {
>  		g_task_ctx.tp = TASK_FS_ROOTS;
>  		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10680,10 +10960,10 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  
>  	if (!g_task_ctx.progress_enabled) {
>  		if (check_data_csum)
> -			fprintf(stderr, "[5/7] checking csums against data\n");
> +			fprintf(stderr, "[6/8] checking csums against data\n");
>  		else
>  			fprintf(stderr,
> -		"[5/7] checking only csums items (without verifying data)\n");
> +		"[6/8] checking only csums items (without verifying data)\n");
>  	} else {
>  		g_task_ctx.tp = TASK_CSUMS;
>  		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10702,7 +10982,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  	/* For low memory mode, check_fs_roots_v2 handles root refs */
>          if (check_mode != CHECK_MODE_LOWMEM) {
>  		if (!g_task_ctx.progress_enabled) {
> -			fprintf(stderr, "[6/7] checking root refs\n");
> +			fprintf(stderr, "[7/8] checking root refs\n");
>  		} else {
>  			g_task_ctx.tp = TASK_ROOT_REFS;
>  			task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10717,7 +10997,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  		}
>  	} else {
>  		fprintf(stderr,
> -	"[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
> +	"[7/8] checking root refs done with fs roots in lowmem mode, skipping\n");
>  	}
>  
>  	while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) {
> @@ -10749,7 +11029,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  
>  	if (gfs_info->quota_enabled) {
>  		if (!g_task_ctx.progress_enabled) {
> -			fprintf(stderr, "[7/7] checking quota groups\n");
> +			fprintf(stderr, "[8/8] checking quota groups\n");
>  		} else {
>  			g_task_ctx.tp = TASK_QGROUPS;
>  			task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10772,7 +11052,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>  		ret = 0;
>  	} else {
>  		fprintf(stderr,
> -		"[7/7] checking quota groups skipped (not enabled on this FS)\n");
> +		"[8/8] checking quota groups skipped (not enabled on this FS)\n");
>  	}
>  
>  	if (!list_empty(&gfs_info->recow_ebs)) {
> diff --git a/tests/fsck-tests/063-log-missing-csum/default.img.xz b/tests/fsck-tests/063-log-missing-csum/default.img.xz
> new file mode 100644
> index 0000000000000000000000000000000000000000..c9b4f420ac23866cd142428daf21739efda0762d
> GIT binary patch
> literal 1288
> zcmV+j1^4>>H+ooF000E$*0e?f03iVu0001VFXf}+)Bgm;T>wRyj;C3^v%$$4d1wo3
> zjjaF1$8Jv*pMMm%#Ch6IM%}7&=@^TvKIIdYjZ@t7T($M|Hz%Cr>ZwJ6sj_bRsY^a*
> zIr#q#U>$p<Go1bOhu5#Q<?@08{(UO6n<sqYIY$<RXz3}#&sBuS|AUrsgQ{%?z1>tr
> zW^k~hR~HJs$P?zuw_V+%>i2AU4x-C~^RmH%0#2o7VY;D>d@D<+CC=J4l?Bs2d@5yC
> z&pKh_V}nG$A*f{hGHlfKeT*E3NA1Q(Nt;TxvV6m2A_RVpD=`*t<8b6_KTom1S=|eG
> z>OMhPCcl7*3tYx)YhtFm-3)~e_SPBasmqw|$jnE_-QZfatuNh^bkJ5yW{ZX7NmD)i
> zLKCQYs5y!To5G>tIYzSNigXu|<l&x%JzxMA5aW-t+nod|B!efqr+_#STUo@Xu>wY=
> z*;zh8-nLPC+GYq6HU^A<Qjkh41{(MMB=p?8i7|;p<cL9o>Q7;56uclyq=uCMb1GmZ
> zuq5@iwv1^<TQdlj9#Xdq$Vj!1d~GO$P#HmKmPt*t2iPme{8~y6{{T++2nv+X7)Fnf
> z`$pgevh)w!LR&hx2a|{sW@ac43(Sl>??<SGJB`t-nDXvSDSY*87<ziAenKb9p4UlP
> z*(x(s)OBgD_f*R-*yBUaiD3U<uhy+{P{T&x@o^!9gXa@=Kts_p@u`Tb)rQcu{)tzQ
> zhCXcu`CGkyKuQRy(M0c#fPDWJwyhC`w2mizWqVSf==Sk-nywG!nJzNT5aM0tVb}Va
> zK?^BQbY*CPLeJtG``VF4F@BVWBpSVWDFk`J!cOhagXH=GB1GApQ?zsg8%y=J$Fgl1
> z)+=jFc)g}pALW^HG}wkoY7fOfbZ274b~+dEoA?l%Xzz`P-F)vb1^UziIz|no!02bS
> zWPfo?`TiKe8aIuiPilXte381GZ4tQc6=Y*KS0rehLE^7!W+MOr0AEFmQgw<KSH}Ix
> ze4qlov3aIg5e(Oh1%CwTZN^`UI!7g(U;iDFt(>e4XDB-M)l0stJd3pf<XujqB2}MW
> z6EY1#$l>s;T1IYy#{A9ljl@Ba_*dh)Qfq@8Y1A#q%p|d;USrIAZ*n1`%ho*!-RYOY
> z4j%X?D?CoiaW8A*7aue82GA9jmMGg6Bv&mO#v6uKVw1V~Uu01wXbz(RJN_|xw{`8K
> zSfdyQCJOdIwdN%h+Jr{$9!6otgQ8^`lkU4u;5wJlsy#P<D{Nc39m@=*OHZ533tvD<
> zfb=c>-IWOj%>%mQ@Ja!#!QIOVuluA}ecSFg;*v@<bR@g*YX>1-?kl9WGes{ulrRWq
> zTCW@Om{iybhK+m8SiS%n^)Kgk21T`IEW#){Czvz;Fix;5bdkB$BFXimx9iBaRZ9r=
> zV(H0OD8mK94(j^6gF8)|^i-78H`hR#ebBvFq}>vK6-ni7j1S>*viG^nA7<~Bn2KW0
> zw%D36qxTcpO;c2bYwf)K?p#^LFL4Ifq8?kP?#;(Jo8*(twFPRBj*u~h%Ej$X6JI2E
> z<L5)YZY#@aM-2sxodox#)=G1ycE6A6j?am&HLbKPv@FI{(HAJ#D*V@xG^U`bnRtH<
> y%w+Y9$G2sBKv@6)0000tuOhH&%TnS10pSUNs0#q29h=Ir#Ao{g000001X)@}7Iu~Z
> 
> literal 0
> HcmV?d00001
> 
> diff --git a/tests/fsck-tests/063-log-missing-csum/test.sh b/tests/fsck-tests/063-log-missing-csum/test.sh
> new file mode 100755
> index 00000000..40a48508
> --- /dev/null
> +++ b/tests/fsck-tests/063-log-missing-csum/test.sh
> @@ -0,0 +1,14 @@
> +#!/bin/bash
> +#
> +# Verify that check can detect missing log csum items.
> +
> +source "$TEST_TOP/common" || exit
> +
> +check_prereq btrfs
> +
> +check_image() {
> +	run_mustfail "missing log csum items not detected" \
> +		"$TOP/btrfs" check "$1"
> +}
> +
> +check_all_images
> -- 
> 2.45.2
>
Filipe Manana Oct. 2, 2024, 3:06 p.m. UTC | #2
On Mon, Jul 15, 2024 at 3:38 PM Mark Harmstone <maharmstone@fb.com> wrote:
>
> Currently the transaction log is more or less ignored by btrfs check,
> meaning that it's possible for a FS with a corrupt log to pass btrfs
> check, but be immediately corrupted by the kernel when it's mounted.
>
> This patch adds a check that if there's an inode in the log, any pending
> non-compressed writes also have corresponding csum entries.
>
> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
> ---
> Changes:
> v2:
> helper to load log root
> handle compressed extents
> loop logic improvements
> fix bug in check_log_csum
>
> v3:
> added test
> added explanatory comment to check_log_csum
> changed length operation to -=
>
>  check/main.c                                  | 304 +++++++++++++++++-
>  .../063-log-missing-csum/default.img.xz       | Bin 0 -> 1288 bytes
>  tests/fsck-tests/063-log-missing-csum/test.sh |  14 +
>  3 files changed, 306 insertions(+), 12 deletions(-)
>  create mode 100644 tests/fsck-tests/063-log-missing-csum/default.img.xz
>  create mode 100755 tests/fsck-tests/063-log-missing-csum/test.sh
>
> diff --git a/check/main.c b/check/main.c
> index 83c721d3..eaae3042 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -9787,6 +9787,274 @@ static int zero_log_tree(struct btrfs_root *root)
>         return ret;
>  }
>
> +/* Searches the given root for checksums in the range [addr, addr+length].
> + * Returns 1 if found, 0 if not found, and < 0 for an error. */
> +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length)
> +{
> +       struct btrfs_path path = { 0 };
> +       struct btrfs_key key;
> +       struct extent_buffer *leaf;
> +       u16 csum_size = gfs_info->csum_size;
> +       u16 num_entries;
> +       u64 data_len;
> +       int ret;
> +
> +       key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
> +       key.type = BTRFS_EXTENT_CSUM_KEY;
> +       key.offset = addr;
> +
> +       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> +       if (ret < 0)
> +               return ret;
> +
> +       if (ret > 0 && path.slots[0])
> +               path.slots[0]--;
> +
> +       ret = 0;
> +
> +       while (1) {
> +               leaf = path.nodes[0];
> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +                       ret = btrfs_next_leaf(root, &path);
> +                       if (ret) {
> +                               if (ret > 0)
> +                                       ret = 0;
> +
> +                               break;
> +                       }
> +                       leaf = path.nodes[0];
> +               }
> +
> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +               if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
> +                       break;
> +
> +               if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
> +                   key.type != BTRFS_EXTENT_CSUM_KEY)
> +                       goto next;
> +
> +               if (key.offset >= addr + length)
> +                       break;
> +
> +               num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
> +               data_len = num_entries * gfs_info->sectorsize;
> +
> +               if (addr >= key.offset && addr <= key.offset + data_len) {
> +                       u64 end = min(addr + length, key.offset + data_len);
> +
> +                       length -= (end - addr);
> +                       addr = end;
> +
> +                       if (length == 0)
> +                               break;
> +               }
> +
> +next:
> +               path.slots[0]++;
> +       }
> +
> +       btrfs_release_path(&path);
> +
> +       if (ret >= 0)
> +               ret = length == 0 ? 1 : 0;
> +
> +       return ret;
> +}
> +
> +static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache,
> +                         struct walk_control *wc)
> +{
> +       struct btrfs_path path = { 0 };
> +       struct btrfs_key key;
> +       struct extent_buffer *leaf;
> +       int ret, err = 0;
> +       u64 last_csum_inode = 0;
> +
> +       key.objectid = BTRFS_FIRST_FREE_OBJECTID;
> +       key.type = BTRFS_INODE_ITEM_KEY;
> +       key.offset = 0;
> +       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> +       if (ret < 0)
> +               return 1;
> +
> +       while (1) {
> +               leaf = path.nodes[0];
> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +                       ret = btrfs_next_leaf(root, &path);
> +                       if (ret) {
> +                               if (ret < 0)
> +                                       err = 1;
> +
> +                               break;
> +                       }
> +                       leaf = path.nodes[0];
> +               }
> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +               if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
> +                       break;
> +
> +               if (key.type == BTRFS_INODE_ITEM_KEY) {
> +                       struct btrfs_inode_item *item;
> +
> +                       item = btrfs_item_ptr(leaf, path.slots[0],
> +                                             struct btrfs_inode_item);
> +
> +                       if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM))
> +                               last_csum_inode = key.objectid;
> +               } else if (key.type == BTRFS_EXTENT_DATA_KEY &&
> +                          key.objectid == last_csum_inode) {
> +                       struct btrfs_file_extent_item *fi;
> +                       u64 addr, length;
> +
> +                       fi = btrfs_item_ptr(leaf, path.slots[0],
> +                                           struct btrfs_file_extent_item);
> +
> +                       if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
> +                               goto next;
> +
> +                       addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
> +                               btrfs_file_extent_offset(leaf, fi);
> +                       length = btrfs_file_extent_num_bytes(leaf, fi);
> +
> +                       ret = check_log_csum(root, addr, length);
> +                       if (ret < 0) {
> +                               err = 1;
> +                               break;
> +                       }
> +
> +                       if (!ret) {
> +                               error("csum missing in log (root %llu, inode %llu, "
> +                                     "offset %llu, address 0x%llx, length %llu)",
> +                                     root->objectid, last_csum_inode, key.offset,
> +                                     addr, length);

This is causing some false failures when running fstests, like test
btrfs/056 for example.
There it's attempting to lookup csums for file extents representing
holes (disk_bytenr == 0), which don't exist.

Also this change assumes that for every file extent item we must have
a csum item logged, which is not always the case.
For example, before the following kernel commit:

commit 7f30c07288bb9e20463182d0db56416025f85e08
Author: Filipe Manana <fdmanana@suse.com>
Date:   Thu Feb 17 12:12:03 2022 +0000

    btrfs: stop copying old file extents when doing a full fsync

We could log old file extent items, from past transactions, but
because they were from past transactions, we didn't log the csums
because they aren't needed.

So on older kernels that triggers a false alarm too.

Thanks.

> +                               err = 1;
> +                       }
> +               }
> +
> +next:
> +               path.slots[0]++;
> +       }
> +
> +       btrfs_release_path(&path);
> +
> +       return err;
> +}
> +
> +static int load_log_root(u64 root_id, struct btrfs_path *path,
> +                        struct btrfs_root *tmp_root)
> +{
> +       struct extent_buffer *l;
> +       struct btrfs_tree_parent_check check = { 0 };
> +
> +       btrfs_setup_root(tmp_root, gfs_info, root_id);
> +
> +       l = path->nodes[0];
> +       read_extent_buffer(l, &tmp_root->root_item,
> +                       btrfs_item_ptr_offset(l, path->slots[0]),
> +                       sizeof(tmp_root->root_item));
> +
> +       tmp_root->root_key.objectid = root_id;
> +       tmp_root->root_key.type = BTRFS_ROOT_ITEM_KEY;
> +       tmp_root->root_key.offset = 0;
> +
> +       check.owner_root = btrfs_root_id(tmp_root);
> +       check.transid = btrfs_root_generation(&tmp_root->root_item);
> +       check.level = btrfs_root_level(&tmp_root->root_item);
> +
> +       tmp_root->node = read_tree_block(gfs_info,
> +                                        btrfs_root_bytenr(&tmp_root->root_item),
> +                                        &check);
> +       if (IS_ERR(tmp_root->node)) {
> +               tmp_root->node = NULL;
> +               return 1;
> +       }
> +
> +       if (btrfs_header_level(tmp_root->node) != btrfs_root_level(&tmp_root->root_item)) {
> +               error("root [%llu %llu] level %d does not match %d",
> +                       tmp_root->root_key.objectid,
> +                       tmp_root->root_key.offset,
> +                       btrfs_header_level(tmp_root->node),
> +                       btrfs_root_level(&tmp_root->root_item));
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +static int check_log(struct cache_tree *root_cache)
> +{
> +       struct btrfs_path path = { 0 };
> +       struct walk_control wc = { 0 };
> +       struct btrfs_key key;
> +       struct extent_buffer *leaf;
> +       struct btrfs_root *log_root = gfs_info->log_root_tree;
> +       int ret;
> +       int err = 0;
> +
> +       cache_tree_init(&wc.shared);
> +
> +       key.objectid = BTRFS_TREE_LOG_OBJECTID;
> +       key.type = BTRFS_ROOT_ITEM_KEY;
> +       key.offset = 0;
> +       ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
> +       if (ret < 0) {
> +               err = 1;
> +               goto out;
> +       }
> +
> +       while (1) {
> +               leaf = path.nodes[0];
> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
> +                       ret = btrfs_next_leaf(log_root, &path);
> +                       if (ret) {
> +                               if (ret < 0)
> +                                       err = 1;
> +                               break;
> +                       }
> +                       leaf = path.nodes[0];
> +               }
> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
> +
> +               if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
> +                   key.type > BTRFS_ROOT_ITEM_KEY)
> +                       break;
> +
> +               if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
> +                   key.type == BTRFS_ROOT_ITEM_KEY &&
> +                   fs_root_objectid(key.offset)) {
> +                       struct btrfs_root tmp_root;
> +
> +                       memset(&tmp_root, 0, sizeof(tmp_root));
> +
> +                       ret = load_log_root(key.offset, &path, &tmp_root);
> +                       if (ret) {
> +                               err = 1;
> +                               goto next;
> +                       }
> +
> +                       ret = check_log_root(&tmp_root, root_cache, &wc);
> +                       if (ret)
> +                               err = 1;
> +
> +next:
> +                       if (tmp_root.node)
> +                               free_extent_buffer(tmp_root.node);
> +               }
> +
> +               path.slots[0]++;
> +       }
> +out:
> +       btrfs_release_path(&path);
> +       if (err)
> +               free_extent_cache_tree(&wc.shared);
> +       if (!cache_tree_empty(&wc.shared))
> +               fprintf(stderr, "warning line %d\n", __LINE__);
> +
> +       return err;
> +}
> +
>  static void free_roots_info_cache(void)
>  {
>         if (!roots_info_cache)
> @@ -10585,9 +10853,21 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>                 goto close_out;
>         }
>
> +       if (gfs_info->log_root_tree) {
> +               fprintf(stderr, "[1/8] checking log\n");
> +               ret = check_log(&root_cache);
> +
> +               if (ret)
> +                       error("errors found in log");
> +               err |= !!ret;
> +       } else {
> +               fprintf(stderr,
> +               "[1/8] checking log skipped (none written)\n");
> +       }
> +
>         if (!init_extent_tree) {
>                 if (!g_task_ctx.progress_enabled) {
> -                       fprintf(stderr, "[1/7] checking root items\n");
> +                       fprintf(stderr, "[2/8] checking root items\n");
>                 } else {
>                         g_task_ctx.tp = TASK_ROOT_ITEMS;
>                         task_start(g_task_ctx.info, &g_task_ctx.start_time,
> @@ -10622,11 +10902,11 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>                         }
>                 }
>         } else {
> -               fprintf(stderr, "[1/7] checking root items... skipped\n");
> +               fprintf(stderr, "[2/8] checking root items... skipped\n");
>         }
>
>         if (!g_task_ctx.progress_enabled) {
> -               fprintf(stderr, "[2/7] checking extents\n");
> +               fprintf(stderr, "[3/8] checking extents\n");
>         } else {
>                 g_task_ctx.tp = TASK_EXTENTS;
>                 task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10644,9 +10924,9 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>
>         if (!g_task_ctx.progress_enabled) {
>                 if (is_free_space_tree)
> -                       fprintf(stderr, "[3/7] checking free space tree\n");
> +                       fprintf(stderr, "[4/8] checking free space tree\n");
>                 else
> -                       fprintf(stderr, "[3/7] checking free space cache\n");
> +                       fprintf(stderr, "[4/8] checking free space cache\n");
>         } else {
>                 g_task_ctx.tp = TASK_FREE_SPACE;
>                 task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10664,7 +10944,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>          */
>         no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES);
>         if (!g_task_ctx.progress_enabled) {
> -               fprintf(stderr, "[4/7] checking fs roots\n");
> +               fprintf(stderr, "[5/8] checking fs roots\n");
>         } else {
>                 g_task_ctx.tp = TASK_FS_ROOTS;
>                 task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10680,10 +10960,10 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>
>         if (!g_task_ctx.progress_enabled) {
>                 if (check_data_csum)
> -                       fprintf(stderr, "[5/7] checking csums against data\n");
> +                       fprintf(stderr, "[6/8] checking csums against data\n");
>                 else
>                         fprintf(stderr,
> -               "[5/7] checking only csums items (without verifying data)\n");
> +               "[6/8] checking only csums items (without verifying data)\n");
>         } else {
>                 g_task_ctx.tp = TASK_CSUMS;
>                 task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10702,7 +10982,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>         /* For low memory mode, check_fs_roots_v2 handles root refs */
>          if (check_mode != CHECK_MODE_LOWMEM) {
>                 if (!g_task_ctx.progress_enabled) {
> -                       fprintf(stderr, "[6/7] checking root refs\n");
> +                       fprintf(stderr, "[7/8] checking root refs\n");
>                 } else {
>                         g_task_ctx.tp = TASK_ROOT_REFS;
>                         task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10717,7 +10997,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>                 }
>         } else {
>                 fprintf(stderr,
> -       "[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
> +       "[7/8] checking root refs done with fs roots in lowmem mode, skipping\n");
>         }
>
>         while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) {
> @@ -10749,7 +11029,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>
>         if (gfs_info->quota_enabled) {
>                 if (!g_task_ctx.progress_enabled) {
> -                       fprintf(stderr, "[7/7] checking quota groups\n");
> +                       fprintf(stderr, "[8/8] checking quota groups\n");
>                 } else {
>                         g_task_ctx.tp = TASK_QGROUPS;
>                         task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
> @@ -10772,7 +11052,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>                 ret = 0;
>         } else {
>                 fprintf(stderr,
> -               "[7/7] checking quota groups skipped (not enabled on this FS)\n");
> +               "[8/8] checking quota groups skipped (not enabled on this FS)\n");
>         }
>
>         if (!list_empty(&gfs_info->recow_ebs)) {
> diff --git a/tests/fsck-tests/063-log-missing-csum/default.img.xz b/tests/fsck-tests/063-log-missing-csum/default.img.xz
> new file mode 100644
> index 0000000000000000000000000000000000000000..c9b4f420ac23866cd142428daf21739efda0762d
> GIT binary patch
> literal 1288
> zcmV+j1^4>>H+ooF000E$*0e?f03iVu0001VFXf}+)Bgm;T>wRyj;C3^v%$$4d1wo3
> zjjaF1$8Jv*pMMm%#Ch6IM%}7&=@^TvKIIdYjZ@t7T($M|Hz%Cr>ZwJ6sj_bRsY^a*
> zIr#q#U>$p<Go1bOhu5#Q<?@08{(UO6n<sqYIY$<RXz3}#&sBuS|AUrsgQ{%?z1>tr
> zW^k~hR~HJs$P?zuw_V+%>i2AU4x-C~^RmH%0#2o7VY;D>d@D<+CC=J4l?Bs2d@5yC
> z&pKh_V}nG$A*f{hGHlfKeT*E3NA1Q(Nt;TxvV6m2A_RVpD=`*t<8b6_KTom1S=|eG
> z>OMhPCcl7*3tYx)YhtFm-3)~e_SPBasmqw|$jnE_-QZfatuNh^bkJ5yW{ZX7NmD)i
> zLKCQYs5y!To5G>tIYzSNigXu|<l&x%JzxMA5aW-t+nod|B!efqr+_#STUo@Xu>wY=
> z*;zh8-nLPC+GYq6HU^A<Qjkh41{(MMB=p?8i7|;p<cL9o>Q7;56uclyq=uCMb1GmZ
> zuq5@iwv1^<TQdlj9#Xdq$Vj!1d~GO$P#HmKmPt*t2iPme{8~y6{{T++2nv+X7)Fnf
> z`$pgevh)w!LR&hx2a|{sW@ac43(Sl>??<SGJB`t-nDXvSDSY*87<ziAenKb9p4UlP
> z*(x(s)OBgD_f*R-*yBUaiD3U<uhy+{P{T&x@o^!9gXa@=Kts_p@u`Tb)rQcu{)tzQ
> zhCXcu`CGkyKuQRy(M0c#fPDWJwyhC`w2mizWqVSf==Sk-nywG!nJzNT5aM0tVb}Va
> zK?^BQbY*CPLeJtG``VF4F@BVWBpSVWDFk`J!cOhagXH=GB1GApQ?zsg8%y=J$Fgl1
> z)+=jFc)g}pALW^HG}wkoY7fOfbZ274b~+dEoA?l%Xzz`P-F)vb1^UziIz|no!02bS
> zWPfo?`TiKe8aIuiPilXte381GZ4tQc6=Y*KS0rehLE^7!W+MOr0AEFmQgw<KSH}Ix
> ze4qlov3aIg5e(Oh1%CwTZN^`UI!7g(U;iDFt(>e4XDB-M)l0stJd3pf<XujqB2}MW
> z6EY1#$l>s;T1IYy#{A9ljl@Ba_*dh)Qfq@8Y1A#q%p|d;USrIAZ*n1`%ho*!-RYOY
> z4j%X?D?CoiaW8A*7aue82GA9jmMGg6Bv&mO#v6uKVw1V~Uu01wXbz(RJN_|xw{`8K
> zSfdyQCJOdIwdN%h+Jr{$9!6otgQ8^`lkU4u;5wJlsy#P<D{Nc39m@=*OHZ533tvD<
> zfb=c>-IWOj%>%mQ@Ja!#!QIOVuluA}ecSFg;*v@<bR@g*YX>1-?kl9WGes{ulrRWq
> zTCW@Om{iybhK+m8SiS%n^)Kgk21T`IEW#){Czvz;Fix;5bdkB$BFXimx9iBaRZ9r=
> zV(H0OD8mK94(j^6gF8)|^i-78H`hR#ebBvFq}>vK6-ni7j1S>*viG^nA7<~Bn2KW0
> zw%D36qxTcpO;c2bYwf)K?p#^LFL4Ifq8?kP?#;(Jo8*(twFPRBj*u~h%Ej$X6JI2E
> z<L5)YZY#@aM-2sxodox#)=G1ycE6A6j?am&HLbKPv@FI{(HAJ#D*V@xG^U`bnRtH<
> y%w+Y9$G2sBKv@6)0000tuOhH&%TnS10pSUNs0#q29h=Ir#Ao{g000001X)@}7Iu~Z
>
> literal 0
> HcmV?d00001
>
> diff --git a/tests/fsck-tests/063-log-missing-csum/test.sh b/tests/fsck-tests/063-log-missing-csum/test.sh
> new file mode 100755
> index 00000000..40a48508
> --- /dev/null
> +++ b/tests/fsck-tests/063-log-missing-csum/test.sh
> @@ -0,0 +1,14 @@
> +#!/bin/bash
> +#
> +# Verify that check can detect missing log csum items.
> +
> +source "$TEST_TOP/common" || exit
> +
> +check_prereq btrfs
> +
> +check_image() {
> +       run_mustfail "missing log csum items not detected" \
> +               "$TOP/btrfs" check "$1"
> +}
> +
> +check_all_images
> --
> 2.45.2
>
>
Mark Harmstone Oct. 2, 2024, 4:23 p.m. UTC | #3
Thanks Filipe, I'll look into it.

Mark

On 2/10/24 16:06, Filipe Manana wrote:
> > 
> On Mon, Jul 15, 2024 at 3:38 PM Mark Harmstone <maharmstone@fb.com> wrote:
>>
>> Currently the transaction log is more or less ignored by btrfs check,
>> meaning that it's possible for a FS with a corrupt log to pass btrfs
>> check, but be immediately corrupted by the kernel when it's mounted.
>>
>> This patch adds a check that if there's an inode in the log, any pending
>> non-compressed writes also have corresponding csum entries.
>>
>> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
>> ---
>> Changes:
>> v2:
>> helper to load log root
>> handle compressed extents
>> loop logic improvements
>> fix bug in check_log_csum
>>
>> v3:
>> added test
>> added explanatory comment to check_log_csum
>> changed length operation to -=
>>
>>   check/main.c                                  | 304 +++++++++++++++++-
>>   .../063-log-missing-csum/default.img.xz       | Bin 0 -> 1288 bytes
>>   tests/fsck-tests/063-log-missing-csum/test.sh |  14 +
>>   3 files changed, 306 insertions(+), 12 deletions(-)
>>   create mode 100644 tests/fsck-tests/063-log-missing-csum/default.img.xz
>>   create mode 100755 tests/fsck-tests/063-log-missing-csum/test.sh
>>
>> diff --git a/check/main.c b/check/main.c
>> index 83c721d3..eaae3042 100644
>> --- a/check/main.c
>> +++ b/check/main.c
>> @@ -9787,6 +9787,274 @@ static int zero_log_tree(struct btrfs_root *root)
>>          return ret;
>>   }
>>
>> +/* Searches the given root for checksums in the range [addr, addr+length].
>> + * Returns 1 if found, 0 if not found, and < 0 for an error. */
>> +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length)
>> +{
>> +       struct btrfs_path path = { 0 };
>> +       struct btrfs_key key;
>> +       struct extent_buffer *leaf;
>> +       u16 csum_size = gfs_info->csum_size;
>> +       u16 num_entries;
>> +       u64 data_len;
>> +       int ret;
>> +
>> +       key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
>> +       key.type = BTRFS_EXTENT_CSUM_KEY;
>> +       key.offset = addr;
>> +
>> +       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       if (ret > 0 && path.slots[0])
>> +               path.slots[0]--;
>> +
>> +       ret = 0;
>> +
>> +       while (1) {
>> +               leaf = path.nodes[0];
>> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
>> +                       ret = btrfs_next_leaf(root, &path);
>> +                       if (ret) {
>> +                               if (ret > 0)
>> +                                       ret = 0;
>> +
>> +                               break;
>> +                       }
>> +                       leaf = path.nodes[0];
>> +               }
>> +
>> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
>> +
>> +               if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
>> +                       break;
>> +
>> +               if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
>> +                   key.type != BTRFS_EXTENT_CSUM_KEY)
>> +                       goto next;
>> +
>> +               if (key.offset >= addr + length)
>> +                       break;
>> +
>> +               num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
>> +               data_len = num_entries * gfs_info->sectorsize;
>> +
>> +               if (addr >= key.offset && addr <= key.offset + data_len) {
>> +                       u64 end = min(addr + length, key.offset + data_len);
>> +
>> +                       length -= (end - addr);
>> +                       addr = end;
>> +
>> +                       if (length == 0)
>> +                               break;
>> +               }
>> +
>> +next:
>> +               path.slots[0]++;
>> +       }
>> +
>> +       btrfs_release_path(&path);
>> +
>> +       if (ret >= 0)
>> +               ret = length == 0 ? 1 : 0;
>> +
>> +       return ret;
>> +}
>> +
>> +static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache,
>> +                         struct walk_control *wc)
>> +{
>> +       struct btrfs_path path = { 0 };
>> +       struct btrfs_key key;
>> +       struct extent_buffer *leaf;
>> +       int ret, err = 0;
>> +       u64 last_csum_inode = 0;
>> +
>> +       key.objectid = BTRFS_FIRST_FREE_OBJECTID;
>> +       key.type = BTRFS_INODE_ITEM_KEY;
>> +       key.offset = 0;
>> +       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
>> +       if (ret < 0)
>> +               return 1;
>> +
>> +       while (1) {
>> +               leaf = path.nodes[0];
>> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
>> +                       ret = btrfs_next_leaf(root, &path);
>> +                       if (ret) {
>> +                               if (ret < 0)
>> +                                       err = 1;
>> +
>> +                               break;
>> +                       }
>> +                       leaf = path.nodes[0];
>> +               }
>> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
>> +
>> +               if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
>> +                       break;
>> +
>> +               if (key.type == BTRFS_INODE_ITEM_KEY) {
>> +                       struct btrfs_inode_item *item;
>> +
>> +                       item = btrfs_item_ptr(leaf, path.slots[0],
>> +                                             struct btrfs_inode_item);
>> +
>> +                       if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM))
>> +                               last_csum_inode = key.objectid;
>> +               } else if (key.type == BTRFS_EXTENT_DATA_KEY &&
>> +                          key.objectid == last_csum_inode) {
>> +                       struct btrfs_file_extent_item *fi;
>> +                       u64 addr, length;
>> +
>> +                       fi = btrfs_item_ptr(leaf, path.slots[0],
>> +                                           struct btrfs_file_extent_item);
>> +
>> +                       if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
>> +                               goto next;
>> +
>> +                       addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
>> +                               btrfs_file_extent_offset(leaf, fi);
>> +                       length = btrfs_file_extent_num_bytes(leaf, fi);
>> +
>> +                       ret = check_log_csum(root, addr, length);
>> +                       if (ret < 0) {
>> +                               err = 1;
>> +                               break;
>> +                       }
>> +
>> +                       if (!ret) {
>> +                               error("csum missing in log (root %llu, inode %llu, "
>> +                                     "offset %llu, address 0x%llx, length %llu)",
>> +                                     root->objectid, last_csum_inode, key.offset,
>> +                                     addr, length);
> 
> This is causing some false failures when running fstests, like test
> btrfs/056 for example.
> There it's attempting to lookup csums for file extents representing
> holes (disk_bytenr == 0), which don't exist.
> 
> Also this change assumes that for every file extent item we must have
> a csum item logged, which is not always the case.
> For example, before the following kernel commit:
> 
> commit 7f30c07288bb9e20463182d0db56416025f85e08
> Author: Filipe Manana <fdmanana@suse.com>
> Date:   Thu Feb 17 12:12:03 2022 +0000
> 
>      btrfs: stop copying old file extents when doing a full fsync
> 
> We could log old file extent items, from past transactions, but
> because they were from past transactions, we didn't log the csums
> because they aren't needed.
> 
> So on older kernels that triggers a false alarm too.
> 
> Thanks.
> 
>> +                               err = 1;
>> +                       }
>> +               }
>> +
>> +next:
>> +               path.slots[0]++;
>> +       }
>> +
>> +       btrfs_release_path(&path);
>> +
>> +       return err;
>> +}
>> +
>> +static int load_log_root(u64 root_id, struct btrfs_path *path,
>> +                        struct btrfs_root *tmp_root)
>> +{
>> +       struct extent_buffer *l;
>> +       struct btrfs_tree_parent_check check = { 0 };
>> +
>> +       btrfs_setup_root(tmp_root, gfs_info, root_id);
>> +
>> +       l = path->nodes[0];
>> +       read_extent_buffer(l, &tmp_root->root_item,
>> +                       btrfs_item_ptr_offset(l, path->slots[0]),
>> +                       sizeof(tmp_root->root_item));
>> +
>> +       tmp_root->root_key.objectid = root_id;
>> +       tmp_root->root_key.type = BTRFS_ROOT_ITEM_KEY;
>> +       tmp_root->root_key.offset = 0;
>> +
>> +       check.owner_root = btrfs_root_id(tmp_root);
>> +       check.transid = btrfs_root_generation(&tmp_root->root_item);
>> +       check.level = btrfs_root_level(&tmp_root->root_item);
>> +
>> +       tmp_root->node = read_tree_block(gfs_info,
>> +                                        btrfs_root_bytenr(&tmp_root->root_item),
>> +                                        &check);
>> +       if (IS_ERR(tmp_root->node)) {
>> +               tmp_root->node = NULL;
>> +               return 1;
>> +       }
>> +
>> +       if (btrfs_header_level(tmp_root->node) != btrfs_root_level(&tmp_root->root_item)) {
>> +               error("root [%llu %llu] level %d does not match %d",
>> +                       tmp_root->root_key.objectid,
>> +                       tmp_root->root_key.offset,
>> +                       btrfs_header_level(tmp_root->node),
>> +                       btrfs_root_level(&tmp_root->root_item));
>> +               return 1;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int check_log(struct cache_tree *root_cache)
>> +{
>> +       struct btrfs_path path = { 0 };
>> +       struct walk_control wc = { 0 };
>> +       struct btrfs_key key;
>> +       struct extent_buffer *leaf;
>> +       struct btrfs_root *log_root = gfs_info->log_root_tree;
>> +       int ret;
>> +       int err = 0;
>> +
>> +       cache_tree_init(&wc.shared);
>> +
>> +       key.objectid = BTRFS_TREE_LOG_OBJECTID;
>> +       key.type = BTRFS_ROOT_ITEM_KEY;
>> +       key.offset = 0;
>> +       ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
>> +       if (ret < 0) {
>> +               err = 1;
>> +               goto out;
>> +       }
>> +
>> +       while (1) {
>> +               leaf = path.nodes[0];
>> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
>> +                       ret = btrfs_next_leaf(log_root, &path);
>> +                       if (ret) {
>> +                               if (ret < 0)
>> +                                       err = 1;
>> +                               break;
>> +                       }
>> +                       leaf = path.nodes[0];
>> +               }
>> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
>> +
>> +               if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
>> +                   key.type > BTRFS_ROOT_ITEM_KEY)
>> +                       break;
>> +
>> +               if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
>> +                   key.type == BTRFS_ROOT_ITEM_KEY &&
>> +                   fs_root_objectid(key.offset)) {
>> +                       struct btrfs_root tmp_root;
>> +
>> +                       memset(&tmp_root, 0, sizeof(tmp_root));
>> +
>> +                       ret = load_log_root(key.offset, &path, &tmp_root);
>> +                       if (ret) {
>> +                               err = 1;
>> +                               goto next;
>> +                       }
>> +
>> +                       ret = check_log_root(&tmp_root, root_cache, &wc);
>> +                       if (ret)
>> +                               err = 1;
>> +
>> +next:
>> +                       if (tmp_root.node)
>> +                               free_extent_buffer(tmp_root.node);
>> +               }
>> +
>> +               path.slots[0]++;
>> +       }
>> +out:
>> +       btrfs_release_path(&path);
>> +       if (err)
>> +               free_extent_cache_tree(&wc.shared);
>> +       if (!cache_tree_empty(&wc.shared))
>> +               fprintf(stderr, "warning line %d\n", __LINE__);
>> +
>> +       return err;
>> +}
>> +
>>   static void free_roots_info_cache(void)
>>   {
>>          if (!roots_info_cache)
>> @@ -10585,9 +10853,21 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>                  goto close_out;
>>          }
>>
>> +       if (gfs_info->log_root_tree) {
>> +               fprintf(stderr, "[1/8] checking log\n");
>> +               ret = check_log(&root_cache);
>> +
>> +               if (ret)
>> +                       error("errors found in log");
>> +               err |= !!ret;
>> +       } else {
>> +               fprintf(stderr,
>> +               "[1/8] checking log skipped (none written)\n");
>> +       }
>> +
>>          if (!init_extent_tree) {
>>                  if (!g_task_ctx.progress_enabled) {
>> -                       fprintf(stderr, "[1/7] checking root items\n");
>> +                       fprintf(stderr, "[2/8] checking root items\n");
>>                  } else {
>>                          g_task_ctx.tp = TASK_ROOT_ITEMS;
>>                          task_start(g_task_ctx.info, &g_task_ctx.start_time,
>> @@ -10622,11 +10902,11 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>                          }
>>                  }
>>          } else {
>> -               fprintf(stderr, "[1/7] checking root items... skipped\n");
>> +               fprintf(stderr, "[2/8] checking root items... skipped\n");
>>          }
>>
>>          if (!g_task_ctx.progress_enabled) {
>> -               fprintf(stderr, "[2/7] checking extents\n");
>> +               fprintf(stderr, "[3/8] checking extents\n");
>>          } else {
>>                  g_task_ctx.tp = TASK_EXTENTS;
>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
>> @@ -10644,9 +10924,9 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>
>>          if (!g_task_ctx.progress_enabled) {
>>                  if (is_free_space_tree)
>> -                       fprintf(stderr, "[3/7] checking free space tree\n");
>> +                       fprintf(stderr, "[4/8] checking free space tree\n");
>>                  else
>> -                       fprintf(stderr, "[3/7] checking free space cache\n");
>> +                       fprintf(stderr, "[4/8] checking free space cache\n");
>>          } else {
>>                  g_task_ctx.tp = TASK_FREE_SPACE;
>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
>> @@ -10664,7 +10944,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>           */
>>          no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES);
>>          if (!g_task_ctx.progress_enabled) {
>> -               fprintf(stderr, "[4/7] checking fs roots\n");
>> +               fprintf(stderr, "[5/8] checking fs roots\n");
>>          } else {
>>                  g_task_ctx.tp = TASK_FS_ROOTS;
>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
>> @@ -10680,10 +10960,10 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>
>>          if (!g_task_ctx.progress_enabled) {
>>                  if (check_data_csum)
>> -                       fprintf(stderr, "[5/7] checking csums against data\n");
>> +                       fprintf(stderr, "[6/8] checking csums against data\n");
>>                  else
>>                          fprintf(stderr,
>> -               "[5/7] checking only csums items (without verifying data)\n");
>> +               "[6/8] checking only csums items (without verifying data)\n");
>>          } else {
>>                  g_task_ctx.tp = TASK_CSUMS;
>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
>> @@ -10702,7 +10982,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>          /* For low memory mode, check_fs_roots_v2 handles root refs */
>>           if (check_mode != CHECK_MODE_LOWMEM) {
>>                  if (!g_task_ctx.progress_enabled) {
>> -                       fprintf(stderr, "[6/7] checking root refs\n");
>> +                       fprintf(stderr, "[7/8] checking root refs\n");
>>                  } else {
>>                          g_task_ctx.tp = TASK_ROOT_REFS;
>>                          task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
>> @@ -10717,7 +10997,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>                  }
>>          } else {
>>                  fprintf(stderr,
>> -       "[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
>> +       "[7/8] checking root refs done with fs roots in lowmem mode, skipping\n");
>>          }
>>
>>          while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) {
>> @@ -10749,7 +11029,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>
>>          if (gfs_info->quota_enabled) {
>>                  if (!g_task_ctx.progress_enabled) {
>> -                       fprintf(stderr, "[7/7] checking quota groups\n");
>> +                       fprintf(stderr, "[8/8] checking quota groups\n");
>>                  } else {
>>                          g_task_ctx.tp = TASK_QGROUPS;
>>                          task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
>> @@ -10772,7 +11052,7 @@ static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
>>                  ret = 0;
>>          } else {
>>                  fprintf(stderr,
>> -               "[7/7] checking quota groups skipped (not enabled on this FS)\n");
>> +               "[8/8] checking quota groups skipped (not enabled on this FS)\n");
>>          }
>>
>>          if (!list_empty(&gfs_info->recow_ebs)) {
>> diff --git a/tests/fsck-tests/063-log-missing-csum/default.img.xz b/tests/fsck-tests/063-log-missing-csum/default.img.xz
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..c9b4f420ac23866cd142428daf21739efda0762d
>> GIT binary patch
>> literal 1288
>> zcmV+j1^4>>H+ooF000E$*0e?f03iVu0001VFXf}+)Bgm;T>wRyj;C3^v%$$4d1wo3
>> zjjaF1$8Jv*pMMm%#Ch6IM%}7&=@^TvKIIdYjZ@t7T($M|Hz%Cr>ZwJ6sj_bRsY^a*
>> zIr#q#U>$p<Go1bOhu5#Q<?@08{(UO6n<sqYIY$<RXz3}#&sBuS|AUrsgQ{%?z1>tr
>> zW^k~hR~HJs$P?zuw_V+%>i2AU4x-C~^RmH%0#2o7VY;D>d@D<+CC=J4l?Bs2d@5yC
>> z&pKh_V}nG$A*f{hGHlfKeT*E3NA1Q(Nt;TxvV6m2A_RVpD=`*t<8b6_KTom1S=|eG
>> z>OMhPCcl7*3tYx)YhtFm-3)~e_SPBasmqw|$jnE_-QZfatuNh^bkJ5yW{ZX7NmD)i
>> zLKCQYs5y!To5G>tIYzSNigXu|<l&x%JzxMA5aW-t+nod|B!efqr+_#STUo@Xu>wY=
>> z*;zh8-nLPC+GYq6HU^A<Qjkh41{(MMB=p?8i7|;p<cL9o>Q7;56uclyq=uCMb1GmZ
>> zuq5@iwv1^<TQdlj9#Xdq$Vj!1d~GO$P#HmKmPt*t2iPme{8~y6{{T++2nv+X7)Fnf
>> z`$pgevh)w!LR&hx2a|{sW@ac43(Sl>??<SGJB`t-nDXvSDSY*87<ziAenKb9p4UlP
>> z*(x(s)OBgD_f*R-*yBUaiD3U<uhy+{P{T&x@o^!9gXa@=Kts_p@u`Tb)rQcu{)tzQ
>> zhCXcu`CGkyKuQRy(M0c#fPDWJwyhC`w2mizWqVSf==Sk-nywG!nJzNT5aM0tVb}Va
>> zK?^BQbY*CPLeJtG``VF4F@BVWBpSVWDFk`J!cOhagXH=GB1GApQ?zsg8%y=J$Fgl1
>> z)+=jFc)g}pALW^HG}wkoY7fOfbZ274b~+dEoA?l%Xzz`P-F)vb1^UziIz|no!02bS
>> zWPfo?`TiKe8aIuiPilXte381GZ4tQc6=Y*KS0rehLE^7!W+MOr0AEFmQgw<KSH}Ix
>> ze4qlov3aIg5e(Oh1%CwTZN^`UI!7g(U;iDFt(>e4XDB-M)l0stJd3pf<XujqB2}MW
>> z6EY1#$l>s;T1IYy#{A9ljl@Ba_*dh)Qfq@8Y1A#q%p|d;USrIAZ*n1`%ho*!-RYOY
>> z4j%X?D?CoiaW8A*7aue82GA9jmMGg6Bv&mO#v6uKVw1V~Uu01wXbz(RJN_|xw{`8K
>> zSfdyQCJOdIwdN%h+Jr{$9!6otgQ8^`lkU4u;5wJlsy#P<D{Nc39m@=*OHZ533tvD<
>> zfb=c>-IWOj%>%mQ@Ja!#!QIOVuluA}ecSFg;*v@<bR@g*YX>1-?kl9WGes{ulrRWq
>> zTCW@Om{iybhK+m8SiS%n^)Kgk21T`IEW#){Czvz;Fix;5bdkB$BFXimx9iBaRZ9r=
>> zV(H0OD8mK94(j^6gF8)|^i-78H`hR#ebBvFq}>vK6-ni7j1S>*viG^nA7<~Bn2KW0
>> zw%D36qxTcpO;c2bYwf)K?p#^LFL4Ifq8?kP?#;(Jo8*(twFPRBj*u~h%Ej$X6JI2E
>> z<L5)YZY#@aM-2sxodox#)=G1ycE6A6j?am&HLbKPv@FI{(HAJ#D*V@xG^U`bnRtH<
>> y%w+Y9$G2sBKv@6)0000tuOhH&%TnS10pSUNs0#q29h=Ir#Ao{g000001X)@}7Iu~Z
>>
>> literal 0
>> HcmV?d00001
>>
>> diff --git a/tests/fsck-tests/063-log-missing-csum/test.sh b/tests/fsck-tests/063-log-missing-csum/test.sh
>> new file mode 100755
>> index 00000000..40a48508
>> --- /dev/null
>> +++ b/tests/fsck-tests/063-log-missing-csum/test.sh
>> @@ -0,0 +1,14 @@
>> +#!/bin/bash
>> +#
>> +# Verify that check can detect missing log csum items.
>> +
>> +source "$TEST_TOP/common" || exit
>> +
>> +check_prereq btrfs
>> +
>> +check_image() {
>> +       run_mustfail "missing log csum items not detected" \
>> +               "$TOP/btrfs" check "$1"
>> +}
>> +
>> +check_all_images
>> --
>> 2.45.2
>>
>>
Mark Harmstone Oct. 4, 2024, 3:23 p.m. UTC | #4
This PR fixes the issues in btrfs/056: 
https://github.com/kdave/btrfs-progs/pull/904

Mark

On 2/10/24 17:23, Mark Harmstone wrote:
> Thanks Filipe, I'll look into it.
> 
> Mark
> 
> On 2/10/24 16:06, Filipe Manana wrote:
>> >>
>> On Mon, Jul 15, 2024 at 3:38 PM Mark Harmstone <maharmstone@fb.com> 
>> wrote:
>>>
>>> Currently the transaction log is more or less ignored by btrfs check,
>>> meaning that it's possible for a FS with a corrupt log to pass btrfs
>>> check, but be immediately corrupted by the kernel when it's mounted.
>>>
>>> This patch adds a check that if there's an inode in the log, any pending
>>> non-compressed writes also have corresponding csum entries.
>>>
>>> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
>>> ---
>>> Changes:
>>> v2:
>>> helper to load log root
>>> handle compressed extents
>>> loop logic improvements
>>> fix bug in check_log_csum
>>>
>>> v3:
>>> added test
>>> added explanatory comment to check_log_csum
>>> changed length operation to -=
>>>
>>>   check/main.c                                  | 304 +++++++++++++++++-
>>>   .../063-log-missing-csum/default.img.xz       | Bin 0 -> 1288 bytes
>>>   tests/fsck-tests/063-log-missing-csum/test.sh |  14 +
>>>   3 files changed, 306 insertions(+), 12 deletions(-)
>>>   create mode 100644 
>>> tests/fsck-tests/063-log-missing-csum/default.img.xz
>>>   create mode 100755 tests/fsck-tests/063-log-missing-csum/test.sh
>>>
>>> diff --git a/check/main.c b/check/main.c
>>> index 83c721d3..eaae3042 100644
>>> --- a/check/main.c
>>> +++ b/check/main.c
>>> @@ -9787,6 +9787,274 @@ static int zero_log_tree(struct btrfs_root 
>>> *root)
>>>          return ret;
>>>   }
>>>
>>> +/* Searches the given root for checksums in the range [addr, 
>>> addr+length].
>>> + * Returns 1 if found, 0 if not found, and < 0 for an error. */
>>> +static int check_log_csum(struct btrfs_root *root, u64 addr, u64 
>>> length)
>>> +{
>>> +       struct btrfs_path path = { 0 };
>>> +       struct btrfs_key key;
>>> +       struct extent_buffer *leaf;
>>> +       u16 csum_size = gfs_info->csum_size;
>>> +       u16 num_entries;
>>> +       u64 data_len;
>>> +       int ret;
>>> +
>>> +       key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
>>> +       key.type = BTRFS_EXTENT_CSUM_KEY;
>>> +       key.offset = addr;
>>> +
>>> +       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
>>> +       if (ret < 0)
>>> +               return ret;
>>> +
>>> +       if (ret > 0 && path.slots[0])
>>> +               path.slots[0]--;
>>> +
>>> +       ret = 0;
>>> +
>>> +       while (1) {
>>> +               leaf = path.nodes[0];
>>> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
>>> +                       ret = btrfs_next_leaf(root, &path);
>>> +                       if (ret) {
>>> +                               if (ret > 0)
>>> +                                       ret = 0;
>>> +
>>> +                               break;
>>> +                       }
>>> +                       leaf = path.nodes[0];
>>> +               }
>>> +
>>> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
>>> +
>>> +               if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
>>> +                       break;
>>> +
>>> +               if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
>>> +                   key.type != BTRFS_EXTENT_CSUM_KEY)
>>> +                       goto next;
>>> +
>>> +               if (key.offset >= addr + length)
>>> +                       break;
>>> +
>>> +               num_entries = btrfs_item_size(leaf, path.slots[0]) / 
>>> csum_size;
>>> +               data_len = num_entries * gfs_info->sectorsize;
>>> +
>>> +               if (addr >= key.offset && addr <= key.offset + 
>>> data_len) {
>>> +                       u64 end = min(addr + length, key.offset + 
>>> data_len);
>>> +
>>> +                       length -= (end - addr);
>>> +                       addr = end;
>>> +
>>> +                       if (length == 0)
>>> +                               break;
>>> +               }
>>> +
>>> +next:
>>> +               path.slots[0]++;
>>> +       }
>>> +
>>> +       btrfs_release_path(&path);
>>> +
>>> +       if (ret >= 0)
>>> +               ret = length == 0 ? 1 : 0;
>>> +
>>> +       return ret;
>>> +}
>>> +
>>> +static int check_log_root(struct btrfs_root *root, struct cache_tree 
>>> *root_cache,
>>> +                         struct walk_control *wc)
>>> +{
>>> +       struct btrfs_path path = { 0 };
>>> +       struct btrfs_key key;
>>> +       struct extent_buffer *leaf;
>>> +       int ret, err = 0;
>>> +       u64 last_csum_inode = 0;
>>> +
>>> +       key.objectid = BTRFS_FIRST_FREE_OBJECTID;
>>> +       key.type = BTRFS_INODE_ITEM_KEY;
>>> +       key.offset = 0;
>>> +       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
>>> +       if (ret < 0)
>>> +               return 1;
>>> +
>>> +       while (1) {
>>> +               leaf = path.nodes[0];
>>> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
>>> +                       ret = btrfs_next_leaf(root, &path);
>>> +                       if (ret) {
>>> +                               if (ret < 0)
>>> +                                       err = 1;
>>> +
>>> +                               break;
>>> +                       }
>>> +                       leaf = path.nodes[0];
>>> +               }
>>> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
>>> +
>>> +               if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
>>> +                       break;
>>> +
>>> +               if (key.type == BTRFS_INODE_ITEM_KEY) {
>>> +                       struct btrfs_inode_item *item;
>>> +
>>> +                       item = btrfs_item_ptr(leaf, path.slots[0],
>>> +                                             struct btrfs_inode_item);
>>> +
>>> +                       if (!(btrfs_inode_flags(leaf, item) & 
>>> BTRFS_INODE_NODATASUM))
>>> +                               last_csum_inode = key.objectid;
>>> +               } else if (key.type == BTRFS_EXTENT_DATA_KEY &&
>>> +                          key.objectid == last_csum_inode) {
>>> +                       struct btrfs_file_extent_item *fi;
>>> +                       u64 addr, length;
>>> +
>>> +                       fi = btrfs_item_ptr(leaf, path.slots[0],
>>> +                                           struct 
>>> btrfs_file_extent_item);
>>> +
>>> +                       if (btrfs_file_extent_type(leaf, fi) != 
>>> BTRFS_FILE_EXTENT_REG)
>>> +                               goto next;
>>> +
>>> +                       addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
>>> +                               btrfs_file_extent_offset(leaf, fi);
>>> +                       length = btrfs_file_extent_num_bytes(leaf, fi);
>>> +
>>> +                       ret = check_log_csum(root, addr, length);
>>> +                       if (ret < 0) {
>>> +                               err = 1;
>>> +                               break;
>>> +                       }
>>> +
>>> +                       if (!ret) {
>>> +                               error("csum missing in log (root 
>>> %llu, inode %llu, "
>>> +                                     "offset %llu, address 0x%llx, 
>>> length %llu)",
>>> +                                     root->objectid, 
>>> last_csum_inode, key.offset,
>>> +                                     addr, length);
>>
>> This is causing some false failures when running fstests, like test
>> btrfs/056 for example.
>> There it's attempting to lookup csums for file extents representing
>> holes (disk_bytenr == 0), which don't exist.
>>
>> Also this change assumes that for every file extent item we must have
>> a csum item logged, which is not always the case.
>> For example, before the following kernel commit:
>>
>> commit 7f30c07288bb9e20463182d0db56416025f85e08
>> Author: Filipe Manana <fdmanana@suse.com>
>> Date:   Thu Feb 17 12:12:03 2022 +0000
>>
>>      btrfs: stop copying old file extents when doing a full fsync
>>
>> We could log old file extent items, from past transactions, but
>> because they were from past transactions, we didn't log the csums
>> because they aren't needed.
>>
>> So on older kernels that triggers a false alarm too.
>>
>> Thanks.
>>
>>> +                               err = 1;
>>> +                       }
>>> +               }
>>> +
>>> +next:
>>> +               path.slots[0]++;
>>> +       }
>>> +
>>> +       btrfs_release_path(&path);
>>> +
>>> +       return err;
>>> +}
>>> +
>>> +static int load_log_root(u64 root_id, struct btrfs_path *path,
>>> +                        struct btrfs_root *tmp_root)
>>> +{
>>> +       struct extent_buffer *l;
>>> +       struct btrfs_tree_parent_check check = { 0 };
>>> +
>>> +       btrfs_setup_root(tmp_root, gfs_info, root_id);
>>> +
>>> +       l = path->nodes[0];
>>> +       read_extent_buffer(l, &tmp_root->root_item,
>>> +                       btrfs_item_ptr_offset(l, path->slots[0]),
>>> +                       sizeof(tmp_root->root_item));
>>> +
>>> +       tmp_root->root_key.objectid = root_id;
>>> +       tmp_root->root_key.type = BTRFS_ROOT_ITEM_KEY;
>>> +       tmp_root->root_key.offset = 0;
>>> +
>>> +       check.owner_root = btrfs_root_id(tmp_root);
>>> +       check.transid = btrfs_root_generation(&tmp_root->root_item);
>>> +       check.level = btrfs_root_level(&tmp_root->root_item);
>>> +
>>> +       tmp_root->node = read_tree_block(gfs_info,
>>> +                                        
>>> btrfs_root_bytenr(&tmp_root->root_item),
>>> +                                        &check);
>>> +       if (IS_ERR(tmp_root->node)) {
>>> +               tmp_root->node = NULL;
>>> +               return 1;
>>> +       }
>>> +
>>> +       if (btrfs_header_level(tmp_root->node) != 
>>> btrfs_root_level(&tmp_root->root_item)) {
>>> +               error("root [%llu %llu] level %d does not match %d",
>>> +                       tmp_root->root_key.objectid,
>>> +                       tmp_root->root_key.offset,
>>> +                       btrfs_header_level(tmp_root->node),
>>> +                       btrfs_root_level(&tmp_root->root_item));
>>> +               return 1;
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int check_log(struct cache_tree *root_cache)
>>> +{
>>> +       struct btrfs_path path = { 0 };
>>> +       struct walk_control wc = { 0 };
>>> +       struct btrfs_key key;
>>> +       struct extent_buffer *leaf;
>>> +       struct btrfs_root *log_root = gfs_info->log_root_tree;
>>> +       int ret;
>>> +       int err = 0;
>>> +
>>> +       cache_tree_init(&wc.shared);
>>> +
>>> +       key.objectid = BTRFS_TREE_LOG_OBJECTID;
>>> +       key.type = BTRFS_ROOT_ITEM_KEY;
>>> +       key.offset = 0;
>>> +       ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
>>> +       if (ret < 0) {
>>> +               err = 1;
>>> +               goto out;
>>> +       }
>>> +
>>> +       while (1) {
>>> +               leaf = path.nodes[0];
>>> +               if (path.slots[0] >= btrfs_header_nritems(leaf)) {
>>> +                       ret = btrfs_next_leaf(log_root, &path);
>>> +                       if (ret) {
>>> +                               if (ret < 0)
>>> +                                       err = 1;
>>> +                               break;
>>> +                       }
>>> +                       leaf = path.nodes[0];
>>> +               }
>>> +               btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
>>> +
>>> +               if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
>>> +                   key.type > BTRFS_ROOT_ITEM_KEY)
>>> +                       break;
>>> +
>>> +               if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
>>> +                   key.type == BTRFS_ROOT_ITEM_KEY &&
>>> +                   fs_root_objectid(key.offset)) {
>>> +                       struct btrfs_root tmp_root;
>>> +
>>> +                       memset(&tmp_root, 0, sizeof(tmp_root));
>>> +
>>> +                       ret = load_log_root(key.offset, &path, 
>>> &tmp_root);
>>> +                       if (ret) {
>>> +                               err = 1;
>>> +                               goto next;
>>> +                       }
>>> +
>>> +                       ret = check_log_root(&tmp_root, root_cache, 
>>> &wc);
>>> +                       if (ret)
>>> +                               err = 1;
>>> +
>>> +next:
>>> +                       if (tmp_root.node)
>>> +                               free_extent_buffer(tmp_root.node);
>>> +               }
>>> +
>>> +               path.slots[0]++;
>>> +       }
>>> +out:
>>> +       btrfs_release_path(&path);
>>> +       if (err)
>>> +               free_extent_cache_tree(&wc.shared);
>>> +       if (!cache_tree_empty(&wc.shared))
>>> +               fprintf(stderr, "warning line %d\n", __LINE__);
>>> +
>>> +       return err;
>>> +}
>>> +
>>>   static void free_roots_info_cache(void)
>>>   {
>>>          if (!roots_info_cache)
>>> @@ -10585,9 +10853,21 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>                  goto close_out;
>>>          }
>>>
>>> +       if (gfs_info->log_root_tree) {
>>> +               fprintf(stderr, "[1/8] checking log\n");
>>> +               ret = check_log(&root_cache);
>>> +
>>> +               if (ret)
>>> +                       error("errors found in log");
>>> +               err |= !!ret;
>>> +       } else {
>>> +               fprintf(stderr,
>>> +               "[1/8] checking log skipped (none written)\n");
>>> +       }
>>> +
>>>          if (!init_extent_tree) {
>>>                  if (!g_task_ctx.progress_enabled) {
>>> -                       fprintf(stderr, "[1/7] checking root items\n");
>>> +                       fprintf(stderr, "[2/8] checking root items\n");
>>>                  } else {
>>>                          g_task_ctx.tp = TASK_ROOT_ITEMS;
>>>                          task_start(g_task_ctx.info, 
>>> &g_task_ctx.start_time,
>>> @@ -10622,11 +10902,11 @@ static int cmd_check(const struct 
>>> cmd_struct *cmd, int argc, char **argv)
>>>                          }
>>>                  }
>>>          } else {
>>> -               fprintf(stderr, "[1/7] checking root items... 
>>> skipped\n");
>>> +               fprintf(stderr, "[2/8] checking root items... 
>>> skipped\n");
>>>          }
>>>
>>>          if (!g_task_ctx.progress_enabled) {
>>> -               fprintf(stderr, "[2/7] checking extents\n");
>>> +               fprintf(stderr, "[3/8] checking extents\n");
>>>          } else {
>>>                  g_task_ctx.tp = TASK_EXTENTS;
>>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, 
>>> &g_task_ctx.item_count);
>>> @@ -10644,9 +10924,9 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>
>>>          if (!g_task_ctx.progress_enabled) {
>>>                  if (is_free_space_tree)
>>> -                       fprintf(stderr, "[3/7] checking free space 
>>> tree\n");
>>> +                       fprintf(stderr, "[4/8] checking free space 
>>> tree\n");
>>>                  else
>>> -                       fprintf(stderr, "[3/7] checking free space 
>>> cache\n");
>>> +                       fprintf(stderr, "[4/8] checking free space 
>>> cache\n");
>>>          } else {
>>>                  g_task_ctx.tp = TASK_FREE_SPACE;
>>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, 
>>> &g_task_ctx.item_count);
>>> @@ -10664,7 +10944,7 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>           */
>>>          no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES);
>>>          if (!g_task_ctx.progress_enabled) {
>>> -               fprintf(stderr, "[4/7] checking fs roots\n");
>>> +               fprintf(stderr, "[5/8] checking fs roots\n");
>>>          } else {
>>>                  g_task_ctx.tp = TASK_FS_ROOTS;
>>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, 
>>> &g_task_ctx.item_count);
>>> @@ -10680,10 +10960,10 @@ static int cmd_check(const struct 
>>> cmd_struct *cmd, int argc, char **argv)
>>>
>>>          if (!g_task_ctx.progress_enabled) {
>>>                  if (check_data_csum)
>>> -                       fprintf(stderr, "[5/7] checking csums against 
>>> data\n");
>>> +                       fprintf(stderr, "[6/8] checking csums against 
>>> data\n");
>>>                  else
>>>                          fprintf(stderr,
>>> -               "[5/7] checking only csums items (without verifying 
>>> data)\n");
>>> +               "[6/8] checking only csums items (without verifying 
>>> data)\n");
>>>          } else {
>>>                  g_task_ctx.tp = TASK_CSUMS;
>>>                  task_start(g_task_ctx.info, &g_task_ctx.start_time, 
>>> &g_task_ctx.item_count);
>>> @@ -10702,7 +10982,7 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>          /* For low memory mode, check_fs_roots_v2 handles root refs */
>>>           if (check_mode != CHECK_MODE_LOWMEM) {
>>>                  if (!g_task_ctx.progress_enabled) {
>>> -                       fprintf(stderr, "[6/7] checking root refs\n");
>>> +                       fprintf(stderr, "[7/8] checking root refs\n");
>>>                  } else {
>>>                          g_task_ctx.tp = TASK_ROOT_REFS;
>>>                          task_start(g_task_ctx.info, 
>>> &g_task_ctx.start_time, &g_task_ctx.item_count);
>>> @@ -10717,7 +10997,7 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>                  }
>>>          } else {
>>>                  fprintf(stderr,
>>> -       "[6/7] checking root refs done with fs roots in lowmem mode, 
>>> skipping\n");
>>> +       "[7/8] checking root refs done with fs roots in lowmem mode, 
>>> skipping\n");
>>>          }
>>>
>>>          while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) {
>>> @@ -10749,7 +11029,7 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>
>>>          if (gfs_info->quota_enabled) {
>>>                  if (!g_task_ctx.progress_enabled) {
>>> -                       fprintf(stderr, "[7/7] checking quota 
>>> groups\n");
>>> +                       fprintf(stderr, "[8/8] checking quota 
>>> groups\n");
>>>                  } else {
>>>                          g_task_ctx.tp = TASK_QGROUPS;
>>>                          task_start(g_task_ctx.info, 
>>> &g_task_ctx.start_time, &g_task_ctx.item_count);
>>> @@ -10772,7 +11052,7 @@ static int cmd_check(const struct cmd_struct 
>>> *cmd, int argc, char **argv)
>>>                  ret = 0;
>>>          } else {
>>>                  fprintf(stderr,
>>> -               "[7/7] checking quota groups skipped (not enabled on 
>>> this FS)\n");
>>> +               "[8/8] checking quota groups skipped (not enabled on 
>>> this FS)\n");
>>>          }
>>>
>>>          if (!list_empty(&gfs_info->recow_ebs)) {
>>> diff --git a/tests/fsck-tests/063-log-missing-csum/default.img.xz 
>>> b/tests/fsck-tests/063-log-missing-csum/default.img.xz
>>> new file mode 100644
>>> index 
>>> 0000000000000000000000000000000000000000..c9b4f420ac23866cd142428daf21739efda0762d
>>> GIT binary patch
>>> literal 1288
>>> zcmV+j1^4>>H+ooF000E$*0e?f03iVu0001VFXf}+)Bgm;T>wRyj;C3^v%$$4d1wo3
>>> zjjaF1$8Jv*pMMm%#Ch6IM%}7&=@^TvKIIdYjZ@t7T($M|Hz%Cr>ZwJ6sj_bRsY^a*
>>> zIr#q#U>$p<Go1bOhu5#Q<?@08{(UO6n<sqYIY$<RXz3}#&sBuS|AUrsgQ{%?z1>tr
>>> zW^k~hR~HJs$P?zuw_V+%>i2AU4x-C~^RmH%0#2o7VY;D>d@D<+CC=J4l?Bs2d@5yC
>>> z&pKh_V}nG$A*f{hGHlfKeT*E3NA1Q(Nt;TxvV6m2A_RVpD=`*t<8b6_KTom1S=|eG
>>> z>OMhPCcl7*3tYx)YhtFm-3)~e_SPBasmqw|$jnE_-QZfatuNh^bkJ5yW{ZX7NmD)i
>>> zLKCQYs5y!To5G>tIYzSNigXu|<l&x%JzxMA5aW-t+nod|B!efqr+_#STUo@Xu>wY=
>>> z*;zh8-nLPC+GYq6HU^A<Qjkh41{(MMB=p?8i7|;p<cL9o>Q7;56uclyq=uCMb1GmZ
>>> zuq5@iwv1^<TQdlj9#Xdq$Vj!1d~GO$P#HmKmPt*t2iPme{8~y6{{T++2nv+X7)Fnf
>>> z`$pgevh)w!LR&hx2a|{sW@ac43(Sl>??<SGJB`t-nDXvSDSY*87<ziAenKb9p4UlP
>>> z*(x(s)OBgD_f*R-*yBUaiD3U<uhy+{P{T&x@o^!9gXa@=Kts_p@u`Tb)rQcu{)tzQ
>>> zhCXcu`CGkyKuQRy(M0c#fPDWJwyhC`w2mizWqVSf==Sk-nywG!nJzNT5aM0tVb}Va
>>> zK?^BQbY*CPLeJtG``VF4F@BVWBpSVWDFk`J!cOhagXH=GB1GApQ?zsg8%y=J$Fgl1
>>> z)+=jFc)g}pALW^HG}wkoY7fOfbZ274b~+dEoA?l%Xzz`P-F)vb1^UziIz|no!02bS
>>> zWPfo?`TiKe8aIuiPilXte381GZ4tQc6=Y*KS0rehLE^7!W+MOr0AEFmQgw<KSH}Ix
>>> ze4qlov3aIg5e(Oh1%CwTZN^`UI!7g(U;iDFt(>e4XDB-M)l0stJd3pf<XujqB2}MW
>>> z6EY1#$l>s;T1IYy#{A9ljl@Ba_*dh)Qfq@8Y1A#q%p|d;USrIAZ*n1`%ho*!-RYOY
>>> z4j%X?D?CoiaW8A*7aue82GA9jmMGg6Bv&mO#v6uKVw1V~Uu01wXbz(RJN_|xw{`8K
>>> zSfdyQCJOdIwdN%h+Jr{$9!6otgQ8^`lkU4u;5wJlsy#P<D{Nc39m@=*OHZ533tvD<
>>> zfb=c>-IWOj%>%mQ@Ja!#!QIOVuluA}ecSFg;*v@<bR@g*YX>1-?kl9WGes{ulrRWq
>>> zTCW@Om{iybhK+m8SiS%n^)Kgk21T`IEW#){Czvz;Fix;5bdkB$BFXimx9iBaRZ9r=
>>> zV(H0OD8mK94(j^6gF8)|^i-78H`hR#ebBvFq}>vK6-ni7j1S>*viG^nA7<~Bn2KW0
>>> zw%D36qxTcpO;c2bYwf)K?p#^LFL4Ifq8?kP?#;(Jo8*(twFPRBj*u~h%Ej$X6JI2E
>>> z<L5)YZY#@aM-2sxodox#)=G1ycE6A6j?am&HLbKPv@FI{(HAJ#D*V@xG^U`bnRtH<
>>> y%w+Y9$G2sBKv@6)0000tuOhH&%TnS10pSUNs0#q29h=Ir#Ao{g000001X)@}7Iu~Z
>>>
>>> literal 0
>>> HcmV?d00001
>>>
>>> diff --git a/tests/fsck-tests/063-log-missing-csum/test.sh 
>>> b/tests/fsck-tests/063-log-missing-csum/test.sh
>>> new file mode 100755
>>> index 00000000..40a48508
>>> --- /dev/null
>>> +++ b/tests/fsck-tests/063-log-missing-csum/test.sh
>>> @@ -0,0 +1,14 @@
>>> +#!/bin/bash
>>> +#
>>> +# Verify that check can detect missing log csum items.
>>> +
>>> +source "$TEST_TOP/common" || exit
>>> +
>>> +check_prereq btrfs
>>> +
>>> +check_image() {
>>> +       run_mustfail "missing log csum items not detected" \
>>> +               "$TOP/btrfs" check "$1"
>>> +}
>>> +
>>> +check_all_images
>>> -- 
>>> 2.45.2
>>>
>>>
>
diff mbox series

Patch

diff --git a/check/main.c b/check/main.c
index 83c721d3..eaae3042 100644
--- a/check/main.c
+++ b/check/main.c
@@ -9787,6 +9787,274 @@  static int zero_log_tree(struct btrfs_root *root)
 	return ret;
 }
 
+/* Searches the given root for checksums in the range [addr, addr+length].
+ * Returns 1 if found, 0 if not found, and < 0 for an error. */
+static int check_log_csum(struct btrfs_root *root, u64 addr, u64 length)
+{
+	struct btrfs_path path = { 0 };
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	u16 csum_size = gfs_info->csum_size;
+	u16 num_entries;
+	u64 data_len;
+	int ret;
+
+	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+	key.type = BTRFS_EXTENT_CSUM_KEY;
+	key.offset = addr;
+
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	if (ret > 0 && path.slots[0])
+		path.slots[0]--;
+
+	ret = 0;
+
+	while (1) {
+		leaf = path.nodes[0];
+		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, &path);
+			if (ret) {
+				if (ret > 0)
+					ret = 0;
+
+				break;
+			}
+			leaf = path.nodes[0];
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+
+		if (key.objectid > BTRFS_EXTENT_CSUM_OBJECTID)
+			break;
+
+		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+		    key.type != BTRFS_EXTENT_CSUM_KEY)
+			goto next;
+
+		if (key.offset >= addr + length)
+			break;
+
+		num_entries = btrfs_item_size(leaf, path.slots[0]) / csum_size;
+		data_len = num_entries * gfs_info->sectorsize;
+
+		if (addr >= key.offset && addr <= key.offset + data_len) {
+			u64 end = min(addr + length, key.offset + data_len);
+
+			length -= (end - addr);
+			addr = end;
+
+			if (length == 0)
+				break;
+		}
+
+next:
+		path.slots[0]++;
+	}
+
+	btrfs_release_path(&path);
+
+	if (ret >= 0)
+		ret = length == 0 ? 1 : 0;
+
+	return ret;
+}
+
+static int check_log_root(struct btrfs_root *root, struct cache_tree *root_cache,
+			  struct walk_control *wc)
+{
+	struct btrfs_path path = { 0 };
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	int ret, err = 0;
+	u64 last_csum_inode = 0;
+
+	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+	if (ret < 0)
+		return 1;
+
+	while (1) {
+		leaf = path.nodes[0];
+		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, &path);
+			if (ret) {
+				if (ret < 0)
+					err = 1;
+
+				break;
+			}
+			leaf = path.nodes[0];
+		}
+		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+
+		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID)
+			break;
+
+		if (key.type == BTRFS_INODE_ITEM_KEY) {
+			struct btrfs_inode_item *item;
+
+			item = btrfs_item_ptr(leaf, path.slots[0],
+					      struct btrfs_inode_item);
+
+			if (!(btrfs_inode_flags(leaf, item) & BTRFS_INODE_NODATASUM))
+				last_csum_inode = key.objectid;
+		} else if (key.type == BTRFS_EXTENT_DATA_KEY &&
+			   key.objectid == last_csum_inode) {
+			struct btrfs_file_extent_item *fi;
+			u64 addr, length;
+
+			fi = btrfs_item_ptr(leaf, path.slots[0],
+					    struct btrfs_file_extent_item);
+
+			if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+				goto next;
+
+			addr = btrfs_file_extent_disk_bytenr(leaf, fi) +
+				btrfs_file_extent_offset(leaf, fi);
+			length = btrfs_file_extent_num_bytes(leaf, fi);
+
+			ret = check_log_csum(root, addr, length);
+			if (ret < 0) {
+				err = 1;
+				break;
+			}
+
+			if (!ret) {
+				error("csum missing in log (root %llu, inode %llu, "
+				      "offset %llu, address 0x%llx, length %llu)",
+				      root->objectid, last_csum_inode, key.offset,
+				      addr, length);
+				err = 1;
+			}
+		}
+
+next:
+		path.slots[0]++;
+	}
+
+	btrfs_release_path(&path);
+
+	return err;
+}
+
+static int load_log_root(u64 root_id, struct btrfs_path *path,
+			 struct btrfs_root *tmp_root)
+{
+	struct extent_buffer *l;
+	struct btrfs_tree_parent_check check = { 0 };
+
+	btrfs_setup_root(tmp_root, gfs_info, root_id);
+
+	l = path->nodes[0];
+	read_extent_buffer(l, &tmp_root->root_item,
+			btrfs_item_ptr_offset(l, path->slots[0]),
+			sizeof(tmp_root->root_item));
+
+	tmp_root->root_key.objectid = root_id;
+	tmp_root->root_key.type = BTRFS_ROOT_ITEM_KEY;
+	tmp_root->root_key.offset = 0;
+
+	check.owner_root = btrfs_root_id(tmp_root);
+	check.transid = btrfs_root_generation(&tmp_root->root_item);
+	check.level = btrfs_root_level(&tmp_root->root_item);
+
+	tmp_root->node = read_tree_block(gfs_info,
+					 btrfs_root_bytenr(&tmp_root->root_item),
+					 &check);
+	if (IS_ERR(tmp_root->node)) {
+		tmp_root->node = NULL;
+		return 1;
+	}
+
+	if (btrfs_header_level(tmp_root->node) != btrfs_root_level(&tmp_root->root_item)) {
+		error("root [%llu %llu] level %d does not match %d",
+			tmp_root->root_key.objectid,
+			tmp_root->root_key.offset,
+			btrfs_header_level(tmp_root->node),
+			btrfs_root_level(&tmp_root->root_item));
+		return 1;
+	}
+
+	return 0;
+}
+
+static int check_log(struct cache_tree *root_cache)
+{
+	struct btrfs_path path = { 0 };
+	struct walk_control wc = { 0 };
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct btrfs_root *log_root = gfs_info->log_root_tree;
+	int ret;
+	int err = 0;
+
+	cache_tree_init(&wc.shared);
+
+	key.objectid = BTRFS_TREE_LOG_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, log_root, &key, &path, 0, 0);
+	if (ret < 0) {
+		err = 1;
+		goto out;
+	}
+
+	while (1) {
+		leaf = path.nodes[0];
+		if (path.slots[0] >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(log_root, &path);
+			if (ret) {
+				if (ret < 0)
+					err = 1;
+				break;
+			}
+			leaf = path.nodes[0];
+		}
+		btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
+
+		if (key.objectid > BTRFS_TREE_LOG_OBJECTID ||
+		    key.type > BTRFS_ROOT_ITEM_KEY)
+			break;
+
+		if (key.objectid == BTRFS_TREE_LOG_OBJECTID &&
+		    key.type == BTRFS_ROOT_ITEM_KEY &&
+		    fs_root_objectid(key.offset)) {
+			struct btrfs_root tmp_root;
+
+			memset(&tmp_root, 0, sizeof(tmp_root));
+
+			ret = load_log_root(key.offset, &path, &tmp_root);
+			if (ret) {
+				err = 1;
+				goto next;
+			}
+
+			ret = check_log_root(&tmp_root, root_cache, &wc);
+			if (ret)
+				err = 1;
+
+next:
+			if (tmp_root.node)
+				free_extent_buffer(tmp_root.node);
+		}
+
+		path.slots[0]++;
+	}
+out:
+	btrfs_release_path(&path);
+	if (err)
+		free_extent_cache_tree(&wc.shared);
+	if (!cache_tree_empty(&wc.shared))
+		fprintf(stderr, "warning line %d\n", __LINE__);
+
+	return err;
+}
+
 static void free_roots_info_cache(void)
 {
 	if (!roots_info_cache)
@@ -10585,9 +10853,21 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 		goto close_out;
 	}
 
+	if (gfs_info->log_root_tree) {
+		fprintf(stderr, "[1/8] checking log\n");
+		ret = check_log(&root_cache);
+
+		if (ret)
+			error("errors found in log");
+		err |= !!ret;
+	} else {
+		fprintf(stderr,
+		"[1/8] checking log skipped (none written)\n");
+	}
+
 	if (!init_extent_tree) {
 		if (!g_task_ctx.progress_enabled) {
-			fprintf(stderr, "[1/7] checking root items\n");
+			fprintf(stderr, "[2/8] checking root items\n");
 		} else {
 			g_task_ctx.tp = TASK_ROOT_ITEMS;
 			task_start(g_task_ctx.info, &g_task_ctx.start_time,
@@ -10622,11 +10902,11 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 			}
 		}
 	} else {
-		fprintf(stderr, "[1/7] checking root items... skipped\n");
+		fprintf(stderr, "[2/8] checking root items... skipped\n");
 	}
 
 	if (!g_task_ctx.progress_enabled) {
-		fprintf(stderr, "[2/7] checking extents\n");
+		fprintf(stderr, "[3/8] checking extents\n");
 	} else {
 		g_task_ctx.tp = TASK_EXTENTS;
 		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
@@ -10644,9 +10924,9 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 
 	if (!g_task_ctx.progress_enabled) {
 		if (is_free_space_tree)
-			fprintf(stderr, "[3/7] checking free space tree\n");
+			fprintf(stderr, "[4/8] checking free space tree\n");
 		else
-			fprintf(stderr, "[3/7] checking free space cache\n");
+			fprintf(stderr, "[4/8] checking free space cache\n");
 	} else {
 		g_task_ctx.tp = TASK_FREE_SPACE;
 		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
@@ -10664,7 +10944,7 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 	 */
 	no_holes = btrfs_fs_incompat(gfs_info, NO_HOLES);
 	if (!g_task_ctx.progress_enabled) {
-		fprintf(stderr, "[4/7] checking fs roots\n");
+		fprintf(stderr, "[5/8] checking fs roots\n");
 	} else {
 		g_task_ctx.tp = TASK_FS_ROOTS;
 		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
@@ -10680,10 +10960,10 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 
 	if (!g_task_ctx.progress_enabled) {
 		if (check_data_csum)
-			fprintf(stderr, "[5/7] checking csums against data\n");
+			fprintf(stderr, "[6/8] checking csums against data\n");
 		else
 			fprintf(stderr,
-		"[5/7] checking only csums items (without verifying data)\n");
+		"[6/8] checking only csums items (without verifying data)\n");
 	} else {
 		g_task_ctx.tp = TASK_CSUMS;
 		task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
@@ -10702,7 +10982,7 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 	/* For low memory mode, check_fs_roots_v2 handles root refs */
         if (check_mode != CHECK_MODE_LOWMEM) {
 		if (!g_task_ctx.progress_enabled) {
-			fprintf(stderr, "[6/7] checking root refs\n");
+			fprintf(stderr, "[7/8] checking root refs\n");
 		} else {
 			g_task_ctx.tp = TASK_ROOT_REFS;
 			task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
@@ -10717,7 +10997,7 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 		}
 	} else {
 		fprintf(stderr,
-	"[6/7] checking root refs done with fs roots in lowmem mode, skipping\n");
+	"[7/8] checking root refs done with fs roots in lowmem mode, skipping\n");
 	}
 
 	while (opt_check_repair && !list_empty(&gfs_info->recow_ebs)) {
@@ -10749,7 +11029,7 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 
 	if (gfs_info->quota_enabled) {
 		if (!g_task_ctx.progress_enabled) {
-			fprintf(stderr, "[7/7] checking quota groups\n");
+			fprintf(stderr, "[8/8] checking quota groups\n");
 		} else {
 			g_task_ctx.tp = TASK_QGROUPS;
 			task_start(g_task_ctx.info, &g_task_ctx.start_time, &g_task_ctx.item_count);
@@ -10772,7 +11052,7 @@  static int cmd_check(const struct cmd_struct *cmd, int argc, char **argv)
 		ret = 0;
 	} else {
 		fprintf(stderr,
-		"[7/7] checking quota groups skipped (not enabled on this FS)\n");
+		"[8/8] checking quota groups skipped (not enabled on this FS)\n");
 	}
 
 	if (!list_empty(&gfs_info->recow_ebs)) {
diff --git a/tests/fsck-tests/063-log-missing-csum/default.img.xz b/tests/fsck-tests/063-log-missing-csum/default.img.xz
new file mode 100644
index 0000000000000000000000000000000000000000..c9b4f420ac23866cd142428daf21739efda0762d
GIT binary patch
literal 1288
zcmV+j1^4>>H+ooF000E$*0e?f03iVu0001VFXf}+)Bgm;T>wRyj;C3^v%$$4d1wo3
zjjaF1$8Jv*pMMm%#Ch6IM%}7&=@^TvKIIdYjZ@t7T($M|Hz%Cr>ZwJ6sj_bRsY^a*
zIr#q#U>$p<Go1bOhu5#Q<?@08{(UO6n<sqYIY$<RXz3}#&sBuS|AUrsgQ{%?z1>tr
zW^k~hR~HJs$P?zuw_V+%>i2AU4x-C~^RmH%0#2o7VY;D>d@D<+CC=J4l?Bs2d@5yC
z&pKh_V}nG$A*f{hGHlfKeT*E3NA1Q(Nt;TxvV6m2A_RVpD=`*t<8b6_KTom1S=|eG
z>OMhPCcl7*3tYx)YhtFm-3)~e_SPBasmqw|$jnE_-QZfatuNh^bkJ5yW{ZX7NmD)i
zLKCQYs5y!To5G>tIYzSNigXu|<l&x%JzxMA5aW-t+nod|B!efqr+_#STUo@Xu>wY=
z*;zh8-nLPC+GYq6HU^A<Qjkh41{(MMB=p?8i7|;p<cL9o>Q7;56uclyq=uCMb1GmZ
zuq5@iwv1^<TQdlj9#Xdq$Vj!1d~GO$P#HmKmPt*t2iPme{8~y6{{T++2nv+X7)Fnf
z`$pgevh)w!LR&hx2a|{sW@ac43(Sl>??<SGJB`t-nDXvSDSY*87<ziAenKb9p4UlP
z*(x(s)OBgD_f*R-*yBUaiD3U<uhy+{P{T&x@o^!9gXa@=Kts_p@u`Tb)rQcu{)tzQ
zhCXcu`CGkyKuQRy(M0c#fPDWJwyhC`w2mizWqVSf==Sk-nywG!nJzNT5aM0tVb}Va
zK?^BQbY*CPLeJtG``VF4F@BVWBpSVWDFk`J!cOhagXH=GB1GApQ?zsg8%y=J$Fgl1
z)+=jFc)g}pALW^HG}wkoY7fOfbZ274b~+dEoA?l%Xzz`P-F)vb1^UziIz|no!02bS
zWPfo?`TiKe8aIuiPilXte381GZ4tQc6=Y*KS0rehLE^7!W+MOr0AEFmQgw<KSH}Ix
ze4qlov3aIg5e(Oh1%CwTZN^`UI!7g(U;iDFt(>e4XDB-M)l0stJd3pf<XujqB2}MW
z6EY1#$l>s;T1IYy#{A9ljl@Ba_*dh)Qfq@8Y1A#q%p|d;USrIAZ*n1`%ho*!-RYOY
z4j%X?D?CoiaW8A*7aue82GA9jmMGg6Bv&mO#v6uKVw1V~Uu01wXbz(RJN_|xw{`8K
zSfdyQCJOdIwdN%h+Jr{$9!6otgQ8^`lkU4u;5wJlsy#P<D{Nc39m@=*OHZ533tvD<
zfb=c>-IWOj%>%mQ@Ja!#!QIOVuluA}ecSFg;*v@<bR@g*YX>1-?kl9WGes{ulrRWq
zTCW@Om{iybhK+m8SiS%n^)Kgk21T`IEW#){Czvz;Fix;5bdkB$BFXimx9iBaRZ9r=
zV(H0OD8mK94(j^6gF8)|^i-78H`hR#ebBvFq}>vK6-ni7j1S>*viG^nA7<~Bn2KW0
zw%D36qxTcpO;c2bYwf)K?p#^LFL4Ifq8?kP?#;(Jo8*(twFPRBj*u~h%Ej$X6JI2E
z<L5)YZY#@aM-2sxodox#)=G1ycE6A6j?am&HLbKPv@FI{(HAJ#D*V@xG^U`bnRtH<
y%w+Y9$G2sBKv@6)0000tuOhH&%TnS10pSUNs0#q29h=Ir#Ao{g000001X)@}7Iu~Z

literal 0
HcmV?d00001

diff --git a/tests/fsck-tests/063-log-missing-csum/test.sh b/tests/fsck-tests/063-log-missing-csum/test.sh
new file mode 100755
index 00000000..40a48508
--- /dev/null
+++ b/tests/fsck-tests/063-log-missing-csum/test.sh
@@ -0,0 +1,14 @@ 
+#!/bin/bash
+#
+# Verify that check can detect missing log csum items.
+
+source "$TEST_TOP/common" || exit
+
+check_prereq btrfs
+
+check_image() {
+	run_mustfail "missing log csum items not detected" \
+		"$TOP/btrfs" check "$1"
+}
+
+check_all_images