@@ -1306,7 +1306,7 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
#ifdef CONFIG_BTRFS_EXPERIMENTAL
-static const char * const btrfs_read_policy_name[] = { "pid", "round-robin" };
+static const char * const btrfs_read_policy_name[] = { "pid", "round-robin", "devid" };
#else
static const char * const btrfs_read_policy_name[] = { "pid" };
#endif
@@ -1375,8 +1375,11 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
if (i == BTRFS_READ_POLICY_RR)
ret += sysfs_emit_at(buf, ret, ":%d",
fs_devices->min_contiguous_read);
-#endif
+ if (i == BTRFS_READ_POLICY_DEVID)
+ ret += sysfs_emit_at(buf, ret, ":%llu",
+ fs_devices->read_devid);
+#endif
if (i == policy)
ret += sysfs_emit_at(buf, ret, "]");
}
@@ -1425,6 +1428,33 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
return len;
}
+
+ if (index == BTRFS_READ_POLICY_DEVID) {
+
+ if (value != -1) {
+ BTRFS_DEV_LOOKUP_ARGS(args);
+
+ /* Validate input devid */
+ args.devid = value;
+ if (btrfs_find_device(fs_devices, &args) == NULL)
+ return -EINVAL;
+ } else {
+ /* Set default devid to the devid of the latest device */
+ value = fs_devices->latest_dev->devid;
+ }
+
+ if (index != READ_ONCE(fs_devices->read_policy) ||
+ (value != READ_ONCE(fs_devices->read_devid))) {
+ WRITE_ONCE(fs_devices->read_policy, index);
+ WRITE_ONCE(fs_devices->read_devid, value);
+
+ btrfs_info(fs_devices->fs_info, "read policy set to '%s:%llu'",
+ btrfs_read_policy_name[index], value);
+
+ }
+
+ return len;
+ }
#endif
if (index != READ_ONCE(fs_devices->read_policy)) {
WRITE_ONCE(fs_devices->read_policy, index);
@@ -1331,6 +1331,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
#ifdef CONFIG_BTRFS_EXPERIMENTAL
/* Set min_contiguous_read to a default 256kib */
fs_devices->min_contiguous_read = 256 * 1024;
+ fs_devices->read_devid = latest_dev->devid;
#endif
return 0;
@@ -5964,6 +5965,23 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
}
#ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_read_preferred(struct btrfs_chunk_map *map, int first,
+ int num_stripe)
+{
+ int last = first + num_stripe;
+ int stripe_index;
+
+ for (stripe_index = first; stripe_index < last; stripe_index++) {
+ struct btrfs_device *device = map->stripes[stripe_index].dev;
+
+ if (device->devid == READ_ONCE(device->fs_devices->read_devid))
+ return stripe_index;
+ }
+
+ /* If no read-preferred device, use first stripe */
+ return first;
+}
+
struct stripe_mirror {
u64 devid;
int num;
@@ -6047,6 +6065,9 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
case BTRFS_READ_POLICY_RR:
preferred_mirror = btrfs_read_rr(map, first, num_stripes);
break;
+ case BTRFS_READ_POLICY_DEVID:
+ preferred_mirror = btrfs_read_preferred(map, first, num_stripes);
+ break;
#endif
}
@@ -306,6 +306,8 @@ enum btrfs_read_policy {
#ifdef CONFIG_BTRFS_EXPERIMENTAL
/* Balancing raid1 reads across all striped devices (round-robin) */
BTRFS_READ_POLICY_RR,
+ /* Read from the specific device */
+ BTRFS_READ_POLICY_DEVID,
#endif
BTRFS_NR_READ_POLICY,
};
@@ -440,6 +442,9 @@ struct btrfs_fs_devices {
/* Min contiguous reads before switching to next device. */
int min_contiguous_read;
+ /* Device to be used for reading in case of RAID1. */
+ u64 read_devid;
+
/* Checksum mode - offload it or do it synchronously. */
enum btrfs_offload_csum_mode offload_csum_mode;
#endif
When there's stale data on a mirrored device, this feature lets you choose which device to read from. Mainly used for testing. echo "devid:<devid-value>" > /sys/fs/btrfs/<UUID>/read_policy Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/sysfs.c | 34 ++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.c | 21 +++++++++++++++++++++ fs/btrfs/volumes.h | 5 +++++ 3 files changed, 58 insertions(+), 2 deletions(-)