diff mbox series

[RFC,2/5] btrfs: add readmirror pid property

Message ID 1552989624-29577-3-git-send-email-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series readmirror feature | expand

Commit Message

Anand Jain March 19, 2019, 10 a.m. UTC
Function call chain  __btrfs_map_block()->find_live_mirror() uses
thread %pid to determine the %mirror_num for the read when the
mirror_num=0 in the argument.

This pid based mirror_num extrapolation has following disadvantages
 A single-process large read IO will read only from one disk.
 In a worst scenario all processes read accessing the FS could have
   either odd or even pid, the read IO gets skewed.
 There is no deterministic way of knowing/controlling which copy will
   be used for reading.
 May see performance variations for a given set of multi process
   workload ran at different times.

So we need other types of readmirror policies.

This patch introduces a framework so that we can add more policies, and
converts the existing %pid into as a configurable parameter using the
property.

 For example:
  btrfs property set /btrfs readmirror pid
  btrfs property set /btrfs readmirror ""

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/props.c   | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.c | 11 ++++++++++-
 fs/btrfs/volumes.h |  7 +++++++
 3 files changed, 64 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 1a13f10a6ef5..776cdf099f93 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -11,6 +11,7 @@ 
 #include "ctree.h"
 #include "xattr.h"
 #include "compression.h"
+#include "volumes.h"
 
 #define BTRFS_PROP_HANDLERS_HT_BITS 8
 static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
@@ -326,6 +327,45 @@  static const char *prop_compression_extract(struct inode *inode)
 	return NULL;
 }
 
+static int prop_readmirror_validate(struct inode *inode, const char *value,
+				    size_t len)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+
+	if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID)
+		return -EINVAL;
+
+	if (!len)
+		return 0;
+
+	if (!strncmp("pid", value, 3))
+		return 0;
+
+	return -EINVAL;
+}
+
+static int prop_readmirror_apply(struct inode *inode, const char *value,
+				 size_t len)
+{
+	struct btrfs_fs_devices *fs_devices = btrfs_sb(inode->i_sb)->fs_devices;
+
+	if (!value)
+		fs_devices->readmirror_policy = BTRFS_READMIRROR_DEFAULT;
+	else if (!strncmp("pid", value, 3))
+		fs_devices->readmirror_policy = BTRFS_READMIRROR_PID;
+
+	return 0;
+}
+
+static const char *prop_readmirror_extract(struct inode *inode)
+{
+	/*
+	 * readmirror policy is applied for the whole FS, inheritance is not
+	 * applicable.
+	 */
+	return NULL;
+}
+
 static struct prop_handler prop_handlers[] = {
 	{
 		.xattr_name = XATTR_BTRFS_PREFIX "compression",
@@ -334,6 +374,13 @@  static const char *prop_compression_extract(struct inode *inode)
 		.extract = prop_compression_extract,
 		.inheritable = 1
 	},
+	{
+		.xattr_name = XATTR_BTRFS_PREFIX "readmirror",
+		.validate = prop_readmirror_validate,
+		.apply = prop_readmirror_apply,
+		.extract = prop_readmirror_extract,
+		.inheritable = 0
+	},
 };
 
 static int inherit_props(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9024eee889b9..e5072d46e181 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5562,7 +5562,16 @@  static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	else
 		num_stripes = map->num_stripes;
 
-	preferred_mirror = first + current->pid % num_stripes;
+	switch(fs_info->fs_devices->readmirror_policy) {
+	case BTRFS_READMIRROR_PID:
+		/* fall through */
+	case BTRFS_READMIRROR_DEFAULT:
+		/* fall through */
+	default:
+		/* readmirror as per thread pid */
+		preferred_mirror = first + current->pid % num_stripes;
+		break;
+	}
 
 	if (dev_replace_is_ongoing &&
 	    fs_info->dev_replace.cont_reading_from_srcdev_mode ==
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3ad9d58d1b66..27dce9242b55 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -208,6 +208,11 @@  struct btrfs_device {
 BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
 BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
 
+enum btrfs_readmirror_policy {
+	BTRFS_READMIRROR_DEFAULT,
+	BTRFS_READMIRROR_PID,
+};
+
 struct btrfs_fs_devices {
 	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
 	u8 metadata_uuid[BTRFS_FSID_SIZE];
@@ -254,6 +259,8 @@  struct btrfs_fs_devices {
 	struct kobject fsid_kobj;
 	struct kobject *device_dir_kobj;
 	struct completion kobj_unregister;
+
+	int readmirror_policy;
 };
 
 #define BTRFS_BIO_INLINE_CSUM_SIZE	64