[06/22] btrfs: handle empty block_group removal

Message ID	41ac461c9007ece384635e600b4ab76f8e757af4.1574282259.git.dennis@kernel.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=4jq6=ZM=vger.kernel.org=linux-btrfs-owner@kernel.org> From: Dennis Zhou <dennis@kernel.org> To: David Sterba <dsterba@suse.com>, Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>, Omar Sandoval <osandov@osandov.com> Cc: kernel-team@fb.com, linux-btrfs@vger.kernel.org, Dennis Zhou <dennis@kernel.org> Subject: [PATCH 06/22] btrfs: handle empty block_group removal Date: Wed, 20 Nov 2019 16:51:05 -0500 Message-Id: <41ac461c9007ece384635e600b4ab76f8e757af4.1574282259.git.dennis@kernel.org> In-Reply-To: <cover.1574282259.git.dennis@kernel.org> References: <cover.1574282259.git.dennis@kernel.org> In-Reply-To: <cover.1574282259.git.dennis@kernel.org> References: <cover.1574282259.git.dennis@kernel.org> Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk
Series	btrfs: async discard support \| expand [v3,00/22] btrfs: async discard support [01/22] bitmap: genericize percpu bitmap region iterators [02/22] btrfs: rename DISCARD opt to DISCARD_SYNC [03/22] btrfs: keep track of which extents have been discarded [04/22] btrfs: keep track of cleanliness of the bitmap [05/22] btrfs: add the beginning of async discard, discard workqueue [06/22] btrfs: handle empty block_group removal [07/22] btrfs: discard one region at a time in async discard [08/22] btrfs: add removal calls for sysfs debug/ [09/22] btrfs: make UUID/debug have its own kobject [10/22] btrfs: add discard sysfs directory [11/22] btrfs: track discardable extents for async discard [12/22] btrfs: keep track of discardable_bytes [13/22] btrfs: calculate discard delay based on number of extents [14/22] btrfs: add bps discard rate limit [15/22] btrfs: limit max discard size for async discard [16/22] btrfs: make max async discard size tunable [17/22] btrfs: have multiple discard lists [18/22] btrfs: only keep track of data extents for async discard [19/22] btrfs: keep track of discard reuse stats [20/22] btrfs: add async discard header [21/22] btrfs: increase the metadata allowance for the free_space_cache [22/22] btrfs: make smaller extents more likely to go into bitmaps

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index d2bc46c365f4..aea2b2637d65 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1250,6 +1250,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; + bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC); int ret = 0; if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) @@ -1259,6 +1260,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) while (!list_empty(&fs_info->unused_bgs)) { u64 start, end; int trimming; + bool async_trimmed; block_group = list_first_entry(&fs_info->unused_bgs, struct btrfs_block_group, @@ -1280,9 +1282,23 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); + + /* + * Async discard moves the final block group discard to be prior + * to the unused_bgs code path. Therefore, if it's not fully + * trimmed, punt it back to the async discard lists. + */ + async_trimmed = (!btrfs_test_opt(fs_info, DISCARD_ASYNC) || + btrfs_is_free_space_trimmed(block_group)); + if (block_group->reserved || block_group->pinned || block_group->used || block_group->ro || - list_is_singular(&block_group->list)) { + list_is_singular(&block_group->list) || + !async_trimmed) { + /* Requeue if we failed because of async discard. */ + if (!async_trimmed) + btrfs_discard_queue_work(&fs_info->discard_ctl, + block_group); /* * We want to bail if we made new allocations or have * outstanding allocations in this block group. We do @@ -1365,6 +1381,17 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); + /* + * The normal path here is an unused block group is passed here, + * then trimming is handled in the transaction commit path. + * Async discard interposes before this to do the trimming + * before coming down the unused block group path as trimming + * will no longer be done later in the transaction commit path. + */ + if (!async_trim_enabled && + btrfs_test_opt(fs_info, DISCARD_ASYNC)) + goto flip_async; + /* DISCARD can flip during remount */ trimming = btrfs_test_opt(fs_info, DISCARD_SYNC); @@ -1409,6 +1436,13 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); } spin_unlock(&fs_info->unused_bgs_lock); + return; + +flip_async: + btrfs_end_transaction(trans); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + btrfs_put_block_group(block_group); + btrfs_discard_punt_unused_bgs_list(fs_info); } void btrfs_mark_bg_unused(struct btrfs_block_group *bg) @@ -1613,6 +1647,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); set_free_space_tree_thresholds(cache); + cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED; + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); init_rwsem(&cache->data_rwsem); @@ -1779,7 +1815,11 @@ static int read_one_block_group(struct btrfs_fs_info *info, inc_block_group_ro(cache, 1); } else if (cache->used == 0) { ASSERT(list_empty(&cache->bg_list)); - btrfs_mark_bg_unused(cache); + if (btrfs_test_opt(info, DISCARD_ASYNC)) + btrfs_add_to_discard_unused_list(&info->discard_ctl, + cache); + else + btrfs_mark_bg_unused(cache); } return 0; error: @@ -2742,8 +2782,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, * dirty list to avoid races between cleaner kthread and space * cache writeout. */ - if (!alloc && old_val == 0) - btrfs_mark_bg_unused(cache); + if (!alloc && old_val == 0) { + if (!btrfs_test_opt(info, DISCARD_ASYNC)) + btrfs_mark_bg_unused(cache); + } btrfs_put_block_group(cache); total -= num_bytes; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d5ce8054f074..68340d65a8b6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -443,9 +443,14 @@ struct btrfs_full_stripe_locks_tree { /* Discard control. */ /* * Async discard uses multiple lists to differentiate the discard filter - * parameters. + * parameters. Index 0 is for completely free block groups where we need to + * ensure the entire block group is trimmed without being lossy. Indices + * afterwards represent monotonically decreasing discard filter sizes to + * prioritize what should be discarded next. */ -#define BTRFS_NR_DISCARD_LISTS 1 +#define BTRFS_NR_DISCARD_LISTS 2 +#define BTRFS_DISCARD_INDEX_UNUSED 0 +#define BTRFS_DISCARD_INDEX_START 1 struct btrfs_discard_ctl { struct workqueue_struct *discard_workers; diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 15d54de3d682..e0f48d6de9a1 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -26,13 +26,33 @@ void btrfs_add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, { spin_lock(&discard_ctl->lock); - if (list_empty(&block_group->discard_list)) + if (list_empty(&block_group->discard_list) || + block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { + if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) + block_group->discard_index = BTRFS_DISCARD_INDEX_START; block_group->discard_eligible_time = (ktime_get_ns() + BTRFS_DISCARD_DELAY); + } list_move_tail(&block_group->discard_list, btrfs_get_discard_list(discard_ctl, block_group)); +spin_unlock(&discard_ctl->lock); +} + +void btrfs_add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group) +{ + spin_lock(&discard_ctl->lock); + + if (!list_empty(&block_group->discard_list)) + list_del_init(&block_group->discard_list); + + block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; + block_group->discard_eligible_time = ktime_get_ns(); + list_add_tail(&block_group->discard_list, + &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); + spin_unlock(&discard_ctl->lock); } @@ -151,7 +171,11 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) return; - btrfs_add_to_discard_list(discard_ctl, block_group); + if (block_group->used == 0) + btrfs_add_to_discard_unused_list(discard_ctl, block_group); + else + btrfs_add_to_discard_list(discard_ctl, block_group); + if (!delayed_work_pending(&discard_ctl->work)) btrfs_discard_schedule_work(discard_ctl, false); } @@ -196,6 +220,30 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, spin_unlock(&discard_ctl->lock); } +/** + * btrfs_finish_discard_pass - determine next step of a block_group + * @discard_ctl: discard control + * @block_group: block_group of interest + * + * This determines the next step for a block group after it's finished going + * through a pass on a discard list. If it is unused and fully trimmed, we can + * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto + * the appropriate filter list or let it fall off. + */ +static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group) +{ + remove_from_discard_list(discard_ctl, block_group); + + if (block_group->used == 0) { + if (btrfs_is_free_space_trimmed(block_group)) + btrfs_mark_bg_unused(block_group); + else + btrfs_add_to_discard_unused_list(discard_ctl, + block_group); + } +} + /** * btrfs_discard_workfn - discard work function * @work: work @@ -217,7 +265,7 @@ static void btrfs_discard_workfn(struct work_struct *work) btrfs_trim_block_group(block_group, &trimmed, block_group->start, btrfs_block_group_end(block_group), 0); - remove_from_discard_list(discard_ctl, block_group); + btrfs_finish_discard_pass(discard_ctl, block_group); btrfs_discard_schedule_work(discard_ctl, false); } @@ -238,6 +286,65 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); } +/** + * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists + * @fs_info: fs_info of interest + * + * The unused_bgs list needs to be punted to the discard lists because the + * order of operations is changed. In the normal sychronous discard path, the + * block groups are trimmed via a single large trim in transaction commit. This + * is ultimately what we are trying to avoid with asynchronous discard. Thus, + * it must be done before going down the unused_bgs path. + */ +void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group *block_group, *next; + + spin_lock(&fs_info->unused_bgs_lock); + + /* We enabled async discard, so punt all to the queue. */ + list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, + bg_list) { + list_del_init(&block_group->bg_list); + btrfs_add_to_discard_unused_list(&fs_info->discard_ctl, + block_group); + } + + spin_unlock(&fs_info->unused_bgs_lock); +} + +/** + * btrfs_discard_purge_list - purge discard lists + * @discard_ctl: discard control + * + * If we are disabling async discard, we may have intercepted block groups that + * are completely free and ready for the unused_bgs path. As discarding will + * now happen in transaction commit or not at all, we can safely mark the + * corresponding block groups as unused and they will be sent on their merry + * way to the unused_bgs list. + */ +static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) +{ + struct btrfs_block_group *block_group, *next; + int i; + + spin_lock(&discard_ctl->lock); + + for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { + list_for_each_entry_safe(block_group, next, + &discard_ctl->discard_list[i], + discard_list) { + list_del_init(&block_group->discard_list); + spin_unlock(&discard_ctl->lock); + if (block_group->used == 0) + btrfs_mark_bg_unused(block_group); + spin_lock(&discard_ctl->lock); + } + } + + spin_unlock(&discard_ctl->lock); +} + void btrfs_discard_resume(struct btrfs_fs_info *fs_info) { if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { @@ -245,6 +352,8 @@ void btrfs_discard_resume(struct btrfs_fs_info *fs_info) return; } + btrfs_discard_punt_unused_bgs_list(fs_info); + set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); } @@ -270,4 +379,6 @@ void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) { btrfs_discard_stop(fs_info); cancel_delayed_work_sync(&fs_info->discard_ctl.work); + + btrfs_discard_purge_list(&fs_info->discard_ctl); } diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h index 439ca8c51877..b61ea684b48d 100644 --- a/fs/btrfs/discard.h +++ b/fs/btrfs/discard.h @@ -7,9 +7,13 @@ struct btrfs_fs_info; struct btrfs_discard_ctl; struct btrfs_block_group; +/* List operations. */ void btrfs_add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group); +void btrfs_add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group); +/* Work operations. */ void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group); void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, @@ -18,6 +22,8 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, bool override); bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl); +/* Setup/Cleanup operations. */ +void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info); void btrfs_discard_resume(struct btrfs_fs_info *fs_info); void btrfs_discard_stop(struct btrfs_fs_info *fs_info); void btrfs_discard_init(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 72933996e743..55d3d139c113 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2684,6 +2684,37 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) } +/** + * btrfs_is_free_space_trimmed - see if everything is trimmed + * @block_group: block_group of interest + * + * Walk @block_group's free space rb_tree to determine if everything is trimmed. + */ +bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *info; + struct rb_node *node; + bool ret = true; + + spin_lock(&ctl->tree_lock); + node = rb_first(&ctl->free_space_offset); + + while (node) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + + if (!btrfs_free_space_trimmed(info)) { + ret = false; + break; + } + + node = rb_next(node); + } + + spin_unlock(&ctl->tree_lock); + return ret; +} + u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, u64 offset, u64 bytes, u64 empty_size, u64 *max_extent_size) @@ -2770,6 +2801,9 @@ int btrfs_return_cluster_to_free_space( ret = __btrfs_return_cluster_to_free_space(block_group, cluster); spin_unlock(&ctl->tree_lock); + btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, + block_group); + /* finally drop our ref */ btrfs_put_block_group(block_group); return ret; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 29d16f58b40b..49ff6d6f333b 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -117,6 +117,7 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group, u64 bytenr, u64 size); void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl); void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group); +bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group); u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, u64 offset, u64 bytes, u64 empty_size, u64 *max_extent_size); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 21de630b0730..49b7afa38895 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -8,6 +8,7 @@ #include <linux/sched/mm.h> #include <crypto/hash.h> #include "ctree.h" +#include "discard.h" #include "volumes.h" #include "disk-io.h" #include "ordered-data.h" @@ -3659,7 +3660,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (!cache->removed && !cache->ro && cache->reserved == 0 && cache->used == 0) { spin_unlock(&cache->lock); - btrfs_mark_bg_unused(cache); + if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) + btrfs_add_to_discard_unused_list( + &fs_info->discard_ctl, cache); + else + btrfs_mark_bg_unused(cache); } else { spin_unlock(&cache->lock); }

[06/22] btrfs: handle empty block_group removal

Commit Message

Patch