diff mbox series

[3/3] btrfs: add RAID1 preferred read device feature

Message ID 6c2ae62fbae41873ebc52f89c4e506b897ebed82.1727368214.git.anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series raid1 balancing methods | expand

Commit Message

Anand Jain Sept. 27, 2024, 9:55 a.m. UTC
When there's stale data on a mirrored device, this feature lets you choose
which device to read from. Mainly used for testing.

echo "devid:2" > /sys/fs/btrfs/<UUID>/read_policy

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c   | 92 +++++++++++++++++++++++++++++++++++++---------
 fs/btrfs/volumes.c | 20 ++++++++++
 fs/btrfs/volumes.h |  5 +++
 3 files changed, 100 insertions(+), 17 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 15abf931726c..e32999ea761d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1306,7 +1306,7 @@  static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
 BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
 
 #ifdef CONFIG_BTRFS_DEBUG
-static const char * const btrfs_read_policy_name[] = { "pid", "rotation", "latency" };
+static const char * const btrfs_read_policy_name[] = { "pid", "rotation", "latency", "devid" };
 #else
 static const char * const btrfs_read_policy_name[] = { "pid" };
 #endif
@@ -1320,14 +1320,22 @@  static ssize_t btrfs_read_policy_show(struct kobject *kobj,
 	int i;
 
 	for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
-		if (policy == i)
-			ret += sysfs_emit_at(buf, ret, "%s[%s]",
-					 (ret == 0 ? "" : " "),
-					 btrfs_read_policy_name[i]);
-		else
-			ret += sysfs_emit_at(buf, ret, "%s%s",
-					 (ret == 0 ? "" : " "),
-					 btrfs_read_policy_name[i]);
+		if (ret != 0)
+			ret += sysfs_emit_at(buf, ret, " ");
+
+		if (i == policy)
+			ret += sysfs_emit_at(buf, ret, "[");
+
+		ret += sysfs_emit_at(buf, ret, "%s", btrfs_read_policy_name[i]);
+
+#ifdef CONFIG_BTRFS_DEBUG
+		if (i == BTRFS_READ_POLICY_DEVID)
+			ret += sysfs_emit_at(buf, ret, ":%llu",
+							fs_devices->read_devid);
+#endif
+
+		if (i == policy)
+			ret += sysfs_emit_at(buf, ret, "]");
 	}
 
 	ret += sysfs_emit_at(buf, ret, "\n");
@@ -1340,21 +1348,71 @@  static ssize_t btrfs_read_policy_store(struct kobject *kobj,
 				       const char *buf, size_t len)
 {
 	struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+	char *value;
+#ifdef CONFIG_BTRFS_DEBUG
+	u64 devid = 0;
+#endif
+	int index = -1;
 	int i;
+	bool changed = false;
+
+	value = strchr(buf, ':');
+	if (value) {
+		*value = '\0';
+		value = value + 1;
+	}
 
 	for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
 		if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
-			if (i != READ_ONCE(fs_devices->read_policy)) {
-				WRITE_ONCE(fs_devices->read_policy, i);
-				btrfs_info(fs_devices->fs_info,
-					   "read policy set to '%s'",
-					   btrfs_read_policy_name[i]);
-			}
-			return len;
+			index = i;
+			break;
+		}
+	}
+
+	if (index == -1)
+		return -EINVAL;
+
+#ifdef CONFIG_BTRFS_DEBUG
+	/* Extract values from input in devid:value format */
+	if (index == BTRFS_READ_POLICY_DEVID) {
+		BTRFS_DEV_LOOKUP_ARGS(args);
+
+		if (value == NULL || kstrtou64(value, 10, &devid))
+			return -EINVAL;
+
+		args.devid = devid;
+		if (btrfs_find_device(fs_devices, &args) == NULL)
+			return -EINVAL;
+
+		if (READ_ONCE(fs_devices->read_devid) != devid) {
+			WRITE_ONCE(fs_devices->read_devid, devid);
+			changed = true;
 		}
 	}
+#endif
+
+	if (index != READ_ONCE(fs_devices->read_policy)) {
+		WRITE_ONCE(fs_devices->read_policy, index);
+		changed = true;
+	}
+
+	if (changed) {
+#ifdef CONFIG_BTRFS_DEBUG
+		if (devid)
+			btrfs_info(fs_devices->fs_info,
+				   "read policy set to '%s:%llu'",
+				   btrfs_read_policy_name[index], devid);
+		else
+			btrfs_info(fs_devices->fs_info,
+				   "read policy set to '%s'",
+				   btrfs_read_policy_name[index]);
+#else
+		btrfs_info(fs_devices->fs_info, "read policy set to '%s'",
+			   btrfs_read_policy_name[index]);
+#endif
+	}
 
-	return -EINVAL;
+	return len;
 }
 BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 20bc62d85b3b..c49ca48e7b2e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5863,6 +5863,23 @@  int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 }
 
 #ifdef CONFIG_BTRFS_DEBUG
+static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first,
+				int num_stripe)
+{
+	int last = first + num_stripe;
+	int stripe_index;
+
+	for (stripe_index = first; stripe_index < last; stripe_index++) {
+		struct btrfs_device *device = map->stripes[stripe_index].dev;
+
+		if (device->devid == READ_ONCE(device->fs_devices->read_devid))
+			return stripe_index;
+	}
+
+	/* If no read-preferred device, use first stripe */
+	return first;
+}
+
 static int btrfs_best_stripe(struct btrfs_fs_info *fs_info,
 			     struct btrfs_chunk_map *map, int first,
 			     int num_stripe)
@@ -5980,6 +5997,9 @@  static int find_live_mirror(struct btrfs_fs_info *fs_info,
 		preferred_mirror = btrfs_best_stripe(fs_info, map, first,
 								num_stripes);
 		break;
+	case BTRFS_READ_POLICY_DEVID:
+		preferred_mirror = btrfs_read_preferred(map, first, num_stripes);
+		break;
 #endif
 	}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 09920ef76a9b..9850edaafe8c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -308,6 +308,8 @@  enum btrfs_read_policy {
 	BTRFS_READ_POLICY_ROTATION,
 	/* Use the lowest-latency device dynamically */
 	BTRFS_READ_POLICY_LATENCY,
+	/* Read from the specific device */
+	BTRFS_READ_POLICY_DEVID,
 #endif
 	BTRFS_NR_READ_POLICY,
 };
@@ -440,6 +442,9 @@  struct btrfs_fs_devices {
 	/* read counter for the filesystem */ 
 	atomic_t total_reads;
 
+	/* Device to be used for reading in case of RAID1 */
+	u64 read_devid;
+
 	/* Checksum mode - offload it or do it synchronously. */
 	enum btrfs_offload_csum_mode offload_csum_mode;
 #endif