@@ -338,8 +338,28 @@ static int prop_readmirror_validate(struct inode *inode, const char *value,
if (!len)
return 0;
- if (!strncmp("pid", value, 3))
+ if (!strncmp("pid", value, 3)) {
return 0;
+ } else if (!strncmp("devid", value, 5)) {
+ u64 devid;
+ char *value_dup;
+
+ if (len <= 5)
+ return -EINVAL;
+
+ value_dup = kstrndup(value, len, GFP_KERNEL);
+ if (!value_dup)
+ return -ENOMEM;
+ if (kstrtoull(value_dup + 5, 10, &devid)) {
+ kfree(value_dup);
+ return -EINVAL;
+ }
+ kfree(value_dup);
+
+ if (btrfs_find_device(root->fs_info->fs_devices, devid,
+ NULL, NULL, false))
+ return 0;
+ }
return -EINVAL;
}
@@ -349,10 +369,33 @@ static int prop_readmirror_apply(struct inode *inode, const char *value,
{
struct btrfs_fs_devices *fs_devices = btrfs_sb(inode->i_sb)->fs_devices;
- if (!value)
+ if (!value) {
fs_devices->readmirror_policy = BTRFS_READMIRROR_DEFAULT;
- else if (!strncmp("pid", value, 3))
+ } else if (!strncmp("pid", value, 3)) {
fs_devices->readmirror_policy = BTRFS_READMIRROR_PID;
+ } else if (!strncmp("devid", value, 5)) {
+ u64 devid;
+ char *value_dup;
+ struct btrfs_device *device;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ value_dup = kstrndup(value, len, GFP_KERNEL);
+ if (!value_dup)
+ return -ENOMEM;
+ if (kstrtoull(value_dup + 5, 10, &devid)) {
+ kfree(value_dup);
+ return -EINVAL;
+ }
+ kfree(value_dup);
+
+ fs_devices->readmirror_policy = BTRFS_READMIRROR_DEVID;
+ device = btrfs_find_device(root->fs_info->fs_devices, devid,
+ NULL, NULL, false);
+ if (!device)
+ return -ENODEV;
+
+ device->type = BTRFS_DEVICE_TYPE_READ_OPTIMIZED;
+ }
return 0;
}
@@ -5553,6 +5553,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
int preferred_mirror;
int tolerance;
struct btrfs_device *srcdev;
+ bool found = false;
ASSERT((map->type &
(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
@@ -5563,6 +5564,21 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
num_stripes = map->num_stripes;
switch(fs_info->fs_devices->readmirror_policy) {
+ case BTRFS_READMIRROR_DEVID:
+ /* skip raid10 for now */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ for (i = first; i < first + num_stripes; i++) {
+ if (map->stripes[i].dev->type ==
+ BTRFS_DEVICE_TYPE_READ_OPTIMIZED) {
+ preferred_mirror = i;
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+ /* fall through */
case BTRFS_READMIRROR_PID:
/* fall through */
case BTRFS_READMIRROR_DEFAULT:
@@ -208,9 +208,12 @@ struct btrfs_device {
BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
+#define BTRFS_DEVICE_TYPE_READ_OPTIMIZED 1ULL
+
enum btrfs_readmirror_policy {
BTRFS_READMIRROR_DEFAULT,
BTRFS_READMIRROR_PID,
+ BTRFS_READMIRROR_DEVID,
};
struct btrfs_fs_devices {
Introduces devid readmirror property, which directs all read IO to a device. For example: btrfs property set <mnt> readmirror devid<n> As of now readmirror by devid supports only raid1s. Raid10 support has to leverage device grouping feature to facilitate the setting of readmirror by device set. Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/props.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/volumes.c | 16 ++++++++++++++++ fs/btrfs/volumes.h | 3 +++ 3 files changed, 65 insertions(+), 3 deletions(-)