diff mbox series

[01/13] btrfs: use a stable rolling avg for delayed refs avg

Message ID 20200313212330.149024-2-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series Throttle delayed refs based on time | expand

Commit Message

Josef Bacik March 13, 2020, 9:23 p.m. UTC
From: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>

The way we currently calculate the average of delayed refs is very
jittery.  We do a weighted average, weighing the new average at 3/4 of
the previously calculated average.

This jitteriness leads to pretty wild swings in latencies when we are
generating a lot of delayed refs.  Fix this by smoothing out the delayed
ref average calculation with 1000 seconds of data, 1000 refs minimum,
with a 0.75 decay rate.

Signed-off-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/ctree.h       |  7 +++++++
 fs/btrfs/disk-io.c     |  3 +++
 fs/btrfs/extent-tree.c | 19 +++++++++++++++++--
 3 files changed, 27 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2ccb2a090782..992ce47977b8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -620,7 +620,14 @@  struct btrfs_fs_info {
 
 	u64 generation;
 	u64 last_trans_committed;
+
+	/*
+	 * This is for keeping track of how long it takes to run delayed refs so
+	 * that our delayed ref timing doesn't hurt us.
+	 */
 	u64 avg_delayed_ref_runtime;
+	u64 delayed_ref_runtime;
+	u64 delayed_ref_nr_run;
 
 	/*
 	 * this is updated to the current trans every time a full commit
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 772cf0fa7c55..b5846552666e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2734,6 +2734,9 @@  void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	fs_info->tree_mod_log = RB_ROOT;
 	fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 	fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
+	fs_info->delayed_ref_runtime = NSEC_PER_SEC;
+	fs_info->delayed_ref_nr_run = 64;
+
 	/* readahead state */
 	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
 	spin_lock_init(&fs_info->reada_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2925b3ad77a1..645ae95f465e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2082,8 +2082,23 @@  static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 		 * to avoid large swings in the average.
 		 */
 		spin_lock(&delayed_refs->lock);
-		avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
-		fs_info->avg_delayed_ref_runtime = avg >> 2;	/* div by 4 */
+		fs_info->delayed_ref_nr_run += actual_count;
+		fs_info->delayed_ref_runtime += runtime;
+		avg = div64_u64(fs_info->delayed_ref_runtime,
+				fs_info->delayed_ref_nr_run);
+
+		/*
+		 * Once we've built up a fair bit of data, start decaying
+		 * everything by 3/4.
+		 */
+		if (fs_info->delayed_ref_runtime >= (NSEC_PER_SEC * 1000ULL) &&
+		    fs_info->delayed_ref_nr_run > 1000) {
+			fs_info->delayed_ref_runtime *= 3;
+			fs_info->delayed_ref_runtime >>= 2;
+			fs_info->delayed_ref_nr_run *= 3;
+			fs_info->delayed_ref_nr_run >>= 2;
+		}
+		fs_info->avg_delayed_ref_runtime = avg;
 		spin_unlock(&delayed_refs->lock);
 	}
 	return 0;