diff mbox series

[2/3] btrfs-progs: convert: rework file extent iteration to handle unwritten extents

Message ID bc92cbbcf4c530b18b27a4989767188ba8ea4f96.1715051693.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs-progs: convert: proper ext4 unwritten extents handling | expand

Commit Message

Qu Wenruo May 7, 2024, 3:22 a.m. UTC
[BUG]
There is a bug report that btrfs-convert can not handle unwritten
extents (EXT2_EXTENT_FLAGS_UNINIT set, which is pretty much the same as
BTRFS_FILE_EXTENT_PREALLOC), which can cause the converted image to have
incorrect contents.

[CAUSE]
Currently we use ext2fs_block_iterate2() to go through all data extents
of an ext2 inode, but it doesn't provide the info on if the range is
unwritten or not.

Thus for unwritten extents, the results btrfs would just treat it as
regular extents, and read the contents from disk other than setting the
contents to zero.

[FIX]
Instead of the ext2fs_block_iterate2(), here we follow the debugfs'
"dump_extents" command, to use ext2fs_extent_*() helpers to go through
every data extent of the inode, that's if the inode support the
EXT4_EXTENTS_FL flag.

Now we can properly get the info of which extents are unwritten, and use
holes to replace those unwritten extents.

Reported-by: Yordan <y16267966@gmail.com>
Link: https://lore.kernel.org/all/d34c7d77a7f00c93bea6a4d6e83c7caf.mailbg@mail.bg/
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 convert/source-ext2.c | 116 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 109 insertions(+), 7 deletions(-)

Comments

Yordan May 7, 2024, 7:48 a.m. UTC | #1
Converted both, the small and the big 24G, images properly - no failed
file checksums anymore.

Thanks, Yordan.
diff mbox series

Patch

diff --git a/convert/source-ext2.c b/convert/source-ext2.c
index bba81e4034fd..029fa198fc24 100644
--- a/convert/source-ext2.c
+++ b/convert/source-ext2.c
@@ -303,6 +303,88 @@  static int ext2_block_iterate_proc(ext2_filsys fs, blk_t *blocknr,
 	return 0;
 }
 
+static int iterate_one_file_extent(struct blk_iterate_data *data,
+				   u64 filepos, u64 len, u64 disk_bytenr,
+				   bool prealloced)
+{
+	const int sectorsize = data->trans->fs_info->sectorsize;
+	const int sectorbits = ilog2(sectorsize);
+	int ret;
+
+	UASSERT(len > 0);
+	for (int i = 0; i < len; i += sectorsize) {
+		/*
+		 * Just treat preallocated extent as hole.
+		 *
+		 * As there is no way to utilize the preallocated space, since
+		 * any file extent would also be shared by ext2 image.
+		 */
+		if (prealloced)
+			ret = block_iterate_proc(0,
+				(filepos + i) >> sectorbits, data);
+		else
+			ret = block_iterate_proc(
+				(disk_bytenr + i) >> sectorbits,
+				(filepos + i) >> sectorbits, data);
+
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static int iterate_file_extents(struct blk_iterate_data *data,
+				ext2_filsys ext2fs, ext2_ino_t ext2_ino,
+				u32 convert_flags)
+{
+	ext2_extent_handle_t handle = NULL;
+	struct ext2fs_extent extent;
+	const int sectorsize = data->trans->fs_info->sectorsize;
+	const int sectorbits = ilog2(sectorsize);
+	int op = EXT2_EXTENT_ROOT;
+	errcode_t errcode;
+	int ret = 0;
+
+	errcode = ext2fs_extent_open(ext2fs, ext2_ino, &handle);
+	if (errcode) {
+		error("failed to open ext2 inode %u: %s", ext2_ino,
+		      error_message(errcode));
+		return -EIO;
+	}
+	while (1) {
+		u64 disk_bytenr;
+		u64 filepos;
+		u64 len;
+
+		errcode = ext2fs_extent_get(handle, op, &extent);
+		if (errcode == EXT2_ET_EXTENT_NO_NEXT)
+			break;
+		if (errcode) {
+			data->errcode = errcode;
+			ret = -EIO;
+			goto out;
+		}
+		op = EXT2_EXTENT_NEXT;
+
+		if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
+			continue;
+		if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF))
+			continue;
+
+		filepos = extent.e_lblk << sectorbits;
+		len = extent.e_len << sectorbits;
+		disk_bytenr = extent.e_pblk << sectorbits;
+
+		ret = iterate_one_file_extent(data, filepos, len, disk_bytenr,
+				extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT);
+		if (ret < 0)
+			goto out;
+	}
+out:
+	ext2fs_extent_free(handle);
+	return ret;
+}
+
 /*
  * traverse file's data blocks, record these data blocks as file extents.
  */
@@ -315,6 +397,7 @@  static int ext2_create_file_extents(struct btrfs_trans_handle *trans,
 	int ret;
 	char *buffer = NULL;
 	errcode_t err;
+	struct ext2_inode ext2_inode = { 0 };
 	u32 last_block;
 	u32 sectorsize = root->fs_info->sectorsize;
 	u64 inode_size = btrfs_stack_inode_size(btrfs_inode);
@@ -323,10 +406,32 @@  static int ext2_create_file_extents(struct btrfs_trans_handle *trans,
 	init_blk_iterate_data(&data, trans, root, btrfs_inode, objectid,
 			convert_flags & CONVERT_FLAG_DATACSUM);
 
-	err = ext2fs_block_iterate2(ext2_fs, ext2_ino, BLOCK_FLAG_DATA_ONLY,
-				    NULL, ext2_block_iterate_proc, &data);
-	if (err)
-		goto error;
+	err = ext2fs_read_inode(ext2_fs, ext2_ino, &ext2_inode);
+	if (err) {
+		error("failed to read ext2 inode %u: %s",
+			ext2_ino, error_message(err));
+		return -EIO;
+	}
+	/*
+	 * For inodes without extent block maps, go with the older
+	 * ext2fs_block_iterate2().
+	 * Otherwise use ext2fs_extent_*() based solution, as that can provide
+	 * UNINIT extent flags.
+	 */
+	if ((ext2_inode.i_flags & EXT4_EXTENTS_FL) == 0) {
+		err = ext2fs_block_iterate2(ext2_fs, ext2_ino,
+					    BLOCK_FLAG_DATA_ONLY, NULL,
+					    ext2_block_iterate_proc, &data);
+		if (err) {
+			error("ext2fs_block_iterate2: %s", error_message(err));
+			return -EIO;
+		}
+	} else {
+		ret = iterate_file_extents(&data, ext2_fs, ext2_ino,
+					   convert_flags);
+		if (ret < 0)
+			goto fail;
+	}
 	ret = data.errcode;
 	if (ret)
 		goto fail;
@@ -366,9 +471,6 @@  static int ext2_create_file_extents(struct btrfs_trans_handle *trans,
 fail:
 	free(buffer);
 	return ret;
-error:
-	error("ext2fs_block_iterate2: %s", error_message(err));
-	return -1;
 }
 
 static int ext2_create_symlink(struct btrfs_trans_handle *trans,