diff mbox series

[1/4] btrfs: add read_policy latency

Message ID ae5e526c1549d4e6f602c09d8235aa406c5a1404.1603884539.git.anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series [1/4] btrfs: add read_policy latency | expand

Commit Message

Anand Jain Oct. 28, 2020, 1:26 p.m. UTC
The read policy type latency routes the read IO based on the historical
average wait time experienced by the read IOs through the individual
device factored by 1/10 of inflight commands in the queue. The factor
1/10 is because generally the block device queue depth is more than 1,
so there can be commands in the queue even before the previous commands
have been completed. This patch obtains the historical read IO stats from
the kernel block layer.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v1: Drop part_stat_read_all instead use part_stat_read
    Drop inflight
    
 fs/btrfs/sysfs.c   |  3 ++-
 fs/btrfs/volumes.c | 39 ++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/volumes.h |  1 +
 3 files changed, 41 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4dbf90ff088a..88cbf7b2edf0 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -906,7 +906,8 @@  static bool btrfs_strmatch(const char *given, const char *golden)
 	return false;
 }
 
-static const char * const btrfs_read_policy_name[] = { "pid" };
+/* Must follow the order as in enum btrfs_read_policy */
+static const char * const btrfs_read_policy_name[] = { "pid", "latency" };
 
 static ssize_t btrfs_read_policy_show(struct kobject *kobj,
 				      struct kobj_attribute *a, char *buf)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6bf487626f23..48587009b656 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -14,6 +14,7 @@ 
 #include <linux/semaphore.h>
 #include <linux/uuid.h>
 #include <linux/list_sort.h>
+#include <linux/part_stat.h>
 #include "misc.h"
 #include "ctree.h"
 #include "extent_map.h"
@@ -5468,6 +5469,39 @@  int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	return ret;
 }
 
+static int btrfs_find_best_stripe(struct btrfs_fs_info *fs_info,
+				  struct map_lookup *map, int first,
+				  int num_stripe)
+{
+	u64 est_wait = 0;
+	int best_stripe = 0;
+	int index;
+
+	for (index = first; index < first + num_stripe; index++) {
+		u64 read_wait;
+		u64 avg_wait = 0;
+		unsigned long read_ios;
+		struct btrfs_device *device = map->stripes[index].dev;
+
+		read_wait = part_stat_read(device->bdev->bd_part, nsecs[READ]);
+		read_ios = part_stat_read(device->bdev->bd_part, ios[READ]);
+
+		if (read_wait && read_ios && read_wait >= read_ios)
+			avg_wait = div_u64(read_wait, read_ios);
+		else
+			btrfs_info_rl(device->fs_devices->fs_info,
+			"devid: %llu avg_wait ZERO read_wait %llu read_ios %lu",
+				      device->devid, read_wait, read_ios);
+
+		if (est_wait == 0 || est_wait > avg_wait) {
+			est_wait = avg_wait;
+			best_stripe = index;
+		}
+	}
+
+	return best_stripe;
+}
+
 static int find_live_mirror(struct btrfs_fs_info *fs_info,
 			    struct map_lookup *map, int first,
 			    int dev_replace_is_ongoing)
@@ -5498,6 +5532,10 @@  static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	case BTRFS_READ_POLICY_PID:
 		preferred_mirror = first + current->pid % num_stripes;
 		break;
+	case BTRFS_READ_POLICY_LATENCY:
+		preferred_mirror = btrfs_find_best_stripe(fs_info, map, first,
+							  num_stripes);
+		break;
 	}
 
 	if (dev_replace_is_ongoing &&
@@ -6114,7 +6152,6 @@  static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 		u32 factor = map->num_stripes / map->sub_stripes;
-
 		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
 		stripe_index *= map->sub_stripes;
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 97f075516696..24db586a9837 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -217,6 +217,7 @@  enum btrfs_chunk_allocation_policy {
  */
 enum btrfs_read_policy {
 	BTRFS_READ_POLICY_PID,
+	BTRFS_READ_POLICY_LATENCY,
 	BTRFS_NR_READ_POLICY,
 };