diff mbox series

[13/13] btrfs: throttle snapshot delete on delayed refs

Message ID 20200313212330.149024-14-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series Throttle delayed refs based on time | expand

Commit Message

Josef Bacik March 13, 2020, 9:23 p.m. UTC
One of the largest generators of delayed refs is snapshot delete.  This
is because we'll walk down to a shared node/leaf and drop all of the
references to the lower layer in that node/leaf.  With our default
nodesize of 16kib this can be hundreds of delayed refs, which can easily
put us over our threshold for running delayed refs.

Instead check and see if we need to throttle ourselves, and if we do
break out with -EAGAIN.  If this happens we do not want to do the
walk_up_tree because we need to keep processing the node we're on.

We also have to get rid of our BUG_ON(drop_level == 0) everywhere,
because we can actually stop at the 0 level.  Since we already have the
ability to restart snapshot deletions from an arbitrary key this works
out fine.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/extent-tree.c | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e490ce994d1d..718c99e5674f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4662,6 +4662,7 @@  struct walk_control {
 	int reada_slot;
 	int reada_count;
 	int restarted;
+	int drop_subtree;
 };
 
 #define DROP_REFERENCE	1
@@ -4766,6 +4767,21 @@  static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 	int ret;
 
+	/*
+	 * We only want to break if we aren't yet at the end of our leaf/node.
+	 * The reason for this is if we're at DROP_REFERENCE we'll grab the
+	 * current slot's key for the drop_progress.  If we're at the end this
+	 * will obviously go wrong.  We are also not going to generate many more
+	 * delayed refs at this point, so allowing us to continue will not hurt
+	 * us.
+	 */
+	if (!wc->drop_subtree &&
+	    (path->slots[level] < btrfs_header_nritems(path->nodes[level])) &&
+	    btrfs_should_throttle_delayed_refs(fs_info,
+					       &trans->transaction->delayed_refs,
+					       true))
+		return -EAGAIN;
+
 	if (wc->stage == UPDATE_BACKREF &&
 	    btrfs_header_owner(eb) != root->root_key.objectid)
 		return 1;
@@ -5198,6 +5214,8 @@  static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 		ret = walk_down_proc(trans, root, path, wc, lookup_info);
 		if (ret > 0)
 			break;
+		if (ret < 0)
+			return ret;
 
 		if (level == 0)
 			break;
@@ -5332,7 +5350,6 @@  int btrfs_drop_snapshot(struct btrfs_root *root,
 		       sizeof(wc->update_progress));
 
 		level = root_item->drop_level;
-		BUG_ON(level == 0);
 		path->lowest_level = level;
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 		path->lowest_level = 0;
@@ -5381,19 +5398,23 @@  int btrfs_drop_snapshot(struct btrfs_root *root,
 	wc->update_ref = update_ref;
 	wc->keep_locks = 0;
 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
+	wc->drop_subtree = 0;
 
 	while (1) {
 
 		ret = walk_down_tree(trans, root, path, wc);
-		if (ret < 0) {
-			err = ret;
-			break;
-		}
-
-		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
-		if (ret < 0) {
+		if (ret < 0 && ret != -EAGAIN) {
 			err = ret;
 			break;
+		} else if (ret != -EAGAIN) {
+			ret = walk_up_tree(trans, root, path, wc,
+					   BTRFS_MAX_LEVEL);
+			if (ret < 0) {
+				err = ret;
+				break;
+			}
+		} else {
+			ret = 0;
 		}
 
 		if (ret > 0) {
@@ -5411,7 +5432,6 @@  int btrfs_drop_snapshot(struct btrfs_root *root,
 				      &wc->drop_progress);
 		root_item->drop_level = wc->drop_level;
 
-		BUG_ON(wc->level == 0);
 		if (btrfs_should_end_transaction(trans) ||
 		    (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
 			ret = btrfs_update_root(trans, tree_root,
@@ -5544,6 +5564,7 @@  int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 	wc->stage = DROP_REFERENCE;
 	wc->update_ref = 0;
 	wc->keep_locks = 1;
+	wc->drop_subtree = 1;
 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
 
 	while (1) {