@@ -11,6 +11,7 @@
#include "ctree.h"
#include "xattr.h"
#include "compression.h"
+#include "volumes.h"
#define BTRFS_PROP_HANDLERS_HT_BITS 8
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
@@ -326,6 +327,45 @@ static const char *prop_compression_extract(struct inode *inode)
return NULL;
}
+static int prop_readmirror_validate(struct inode *inode, const char *value,
+ size_t len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID)
+ return -EINVAL;
+
+ if (!len)
+ return 0;
+
+ if (!strncmp("pid", value, 3))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int prop_readmirror_apply(struct inode *inode, const char *value,
+ size_t len)
+{
+ struct btrfs_fs_devices *fs_devices = btrfs_sb(inode->i_sb)->fs_devices;
+
+ if (!value)
+ fs_devices->readmirror_policy = BTRFS_READMIRROR_DEFAULT;
+ else if (!strncmp("pid", value, 3))
+ fs_devices->readmirror_policy = BTRFS_READMIRROR_PID;
+
+ return 0;
+}
+
+static const char *prop_readmirror_extract(struct inode *inode)
+{
+ /*
+ * readmirror policy is applied for the whole FS, inheritance is not
+ * applicable.
+ */
+ return NULL;
+}
+
static struct prop_handler prop_handlers[] = {
{
.xattr_name = XATTR_BTRFS_PREFIX "compression",
@@ -334,6 +374,13 @@ static const char *prop_compression_extract(struct inode *inode)
.extract = prop_compression_extract,
.inheritable = 1
},
+ {
+ .xattr_name = XATTR_BTRFS_PREFIX "readmirror",
+ .validate = prop_readmirror_validate,
+ .apply = prop_readmirror_apply,
+ .extract = prop_readmirror_extract,
+ .inheritable = 0
+ },
};
static int inherit_props(struct btrfs_trans_handle *trans,
@@ -5562,7 +5562,16 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
else
num_stripes = map->num_stripes;
- preferred_mirror = first + current->pid % num_stripes;
+ switch(fs_info->fs_devices->readmirror_policy) {
+ case BTRFS_READMIRROR_PID:
+ /* fall through */
+ case BTRFS_READMIRROR_DEFAULT:
+ /* fall through */
+ default:
+ /* readmirror as per thread pid */
+ preferred_mirror = first + current->pid % num_stripes;
+ break;
+ }
if (dev_replace_is_ongoing &&
fs_info->dev_replace.cont_reading_from_srcdev_mode ==
@@ -208,6 +208,11 @@ struct btrfs_device {
BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
+enum btrfs_readmirror_policy {
+ BTRFS_READMIRROR_DEFAULT,
+ BTRFS_READMIRROR_PID,
+};
+
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
u8 metadata_uuid[BTRFS_FSID_SIZE];
@@ -254,6 +259,8 @@ struct btrfs_fs_devices {
struct kobject fsid_kobj;
struct kobject *device_dir_kobj;
struct completion kobj_unregister;
+
+ int readmirror_policy;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
Function call chain __btrfs_map_block()->find_live_mirror() uses thread %pid to determine the %mirror_num for the read when the mirror_num=0 in the argument. This pid based mirror_num extrapolation has following disadvantages A single-process large read IO will read only from one disk. In a worst scenario all processes read accessing the FS could have either odd or even pid, the read IO gets skewed. There is no deterministic way of knowing/controlling which copy will be used for reading. May see performance variations for a given set of multi process workload ran at different times. So we need other types of readmirror policies. This patch introduces a framework so that we can add more policies, and converts the existing %pid into as a configurable parameter using the property. For example: btrfs property set /btrfs readmirror pid btrfs property set /btrfs readmirror "" Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/props.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 11 ++++++++++- fs/btrfs/volumes.h | 7 +++++++ 3 files changed, 64 insertions(+), 1 deletion(-)