diff mbox series

[RFC,02/11] btrfs: introduce a new experimental compat RO flag, WRITE_INTENT_BITMAP

Message ID d9aba266923812e2de3a67f0e21c01fa50dd49d0.1657004556.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: introduce write-intent bitmaps for RAID56 | expand

Commit Message

Qu Wenruo July 5, 2022, 7:39 a.m. UTC
The new flag is for the incoming write intent bitmap, mostly to address
the RAID56 write-hole, by doing a mandatory scrub for partial written
stripes at mount time.

Currently the feature is still under development, this patch is mostly
a placeholder for the extra reserved bytes for write intent bitmap.

We will utilize the newly introduce EXTRA_SUPER_RESERVED compat RO flags
to enlarge the reserved bytes to at least (1MiB + 64KiB), and use that
64KiB (exact value is not yet fully determined) for write-intent bitmap.

Only one extra check is introduced, to ensure we have enough space to
place the write-intent bitmap at 1MiB physical offset.

This patch is only a place holder for the incoming on-disk format
change, no real write-intent functionality is implemented yet.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/ctree.h           |  9 +++++++++
 fs/btrfs/disk-io.c         | 20 ++++++++++++++++++++
 fs/btrfs/volumes.c         | 14 +++++++++++++-
 include/uapi/linux/btrfs.h |  7 +++++++
 4 files changed, 49 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 12019904f1cf..908a735a66cf 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -317,11 +317,20 @@  static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 #define BTRFS_FEATURE_COMPAT_SAFE_SET		0ULL
 #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR		0ULL
 
+#ifdef CONFIG_BTRFS_DEBUG
+#define BTRFS_FEATURE_COMPAT_RO_SUPP			\
+	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
+	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
+	 BTRFS_FEATURE_COMPAT_RO_VERITY |		\
+	 BTRFS_FEATURE_COMPAT_RO_EXTRA_SUPER_RESERVED |	\
+	 BTRFS_FEATURE_COMPAT_RO_WRITE_INTENT_BITMAP)
+#else
 #define BTRFS_FEATURE_COMPAT_RO_SUPP			\
 	(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |	\
 	 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
 	 BTRFS_FEATURE_COMPAT_RO_VERITY |		\
 	 BTRFS_FEATURE_COMPAT_RO_EXTRA_SUPER_RESERVED)
+#endif
 
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1df2da2509ca..967c020c380a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2830,6 +2830,26 @@  static int validate_super(struct btrfs_fs_info *fs_info,
 			    BTRFS_DEVICE_RANGE_RESERVED);
 		ret = -EINVAL;
 	}
+	if (btrfs_super_compat_ro_flags(sb) &
+	    BTRFS_FEATURE_COMPAT_RO_WRITE_INTENT_BITMAP) {
+		/* Write intent bitmap requires extra reserve. */
+		if (!(btrfs_super_compat_ro_flags(sb) &
+		      BTRFS_FEATURE_COMPAT_RO_EXTRA_SUPER_RESERVED)) {
+			btrfs_err(fs_info,
+"WRITE_INTENT_BITMAP feature enabled, but missing EXTRA_SUPER_RESERVED feature");
+			ret = -EINVAL;
+		}
+		/*
+		 * Write intent bitmap is always located at 1MiB.
+		 * Extra check like the length check against the reserved space
+		 * will happen at bitmap load time.
+		 */
+		if (btrfs_super_reserved_bytes(sb) < BTRFS_DEVICE_RANGE_RESERVED) {
+			btrfs_err(fs_info,
+			"not enough reserved space for write intent bitmap");
+			ret = -EINVAL;
+		}
+	}
 
 	/*
 	 * The generation is a global counter, we'll trust it more than the others
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2a4ac905e39f..4882c616768c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8016,11 +8016,23 @@  static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
 	 *
 	 * So here, we just give a warning and continue the mount.
 	 */
-	if (physical_offset < super_reserved)
+	if (physical_offset < super_reserved) {
 		btrfs_warn(fs_info,
 		"devid %llu physical %llu len %llu inside the reserved space",
 			   devid, physical_offset, physical_len);
 
+		/* Disable any feature relying on the new reserved_bytes. */
+		if (btrfs_fs_compat_ro(fs_info, WRITE_INTENT_BITMAP)) {
+			struct btrfs_super_block *sb = fs_info->super_copy;
+
+			btrfs_warn(fs_info,
+	"disabling write intent bitmap due to the lack of reserved space.");
+			btrfs_set_super_compat_ro_flags(sb,
+				btrfs_super_compat_ro_flags(sb) |
+				~BTRFS_FEATURE_COMPAT_RO_WRITE_INTENT_BITMAP);
+		}
+	}
+
 	for (i = 0; i < map->num_stripes; i++) {
 		if (map->stripes[i].dev->devid == devid &&
 		    map->stripes[i].physical == physical_offset) {
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 4a0c9f4f55d1..38c74a50323e 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -300,6 +300,13 @@  struct btrfs_ioctl_fs_info_args {
  */
 #define BTRFS_FEATURE_COMPAT_RO_EXTRA_SUPER_RESERVED	(1ULL << 3)
 
+/*
+ * Allow btrfs to have per-device write-intent bitmap.
+ * Will be utilized to close the RAID56 write-hole (by forced scrub for dirty
+ * partial written stripes at mount time).
+ */
+#define BTRFS_FEATURE_COMPAT_RO_WRITE_INTENT_BITMAP	(1ULL << 4)
+
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)