Message ID | 1434375680-4115-6-git-send-email-jeffm@suse.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
On Mon, Jun 15, 2015 at 2:41 PM, <jeffm@suse.com> wrote: > From: Jeff Mahoney <jeffm@suse.com> > > When we clear the dirty bits in btrfs_delete_unused_bgs for extents > in the empty block group, it results in btrfs_finish_extent_commit being > unable to discard the freed extents. > > The block group removal patch added an alternate path to forget extents > other than btrfs_finish_extent_commit. As a result, any extents that > would be freed when the block group is removed aren't discarded. In my > test run, with a large copy of mixed sized files followed by removal, it > left nearly 2/3 of extents undiscarded. > > To clean up the block groups, we add the removed block group onto a list > that will be discarded after transaction commit. > > Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Tested-by: Filipe Manana <fdmanana@suse.com> > --- > fs/btrfs/ctree.h | 3 ++ > fs/btrfs/extent-tree.c | 68 ++++++++++++++++++++++++++++++++++++++++++--- > fs/btrfs/free-space-cache.c | 57 +++++++++++++++++++++---------------- > fs/btrfs/super.c | 2 +- > fs/btrfs/transaction.c | 2 ++ > fs/btrfs/transaction.h | 2 ++ > 6 files changed, 105 insertions(+), 29 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 6f364e1..780acf1 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -3438,6 +3438,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, > struct btrfs_root *root, u64 group_start, > struct extent_map *em); > void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); > +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); > +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); > void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, > struct btrfs_root *root); > u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); > @@ -4068,6 +4070,7 @@ __printf(5, 6) > void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, > unsigned int line, int errno, const char *fmt, ...); > > +const char *btrfs_decode_error(int errno); > > void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, > struct btrfs_root *root, const char *function, > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 24b48df..3598440 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -6103,20 +6103,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, > struct btrfs_root *root) > { > struct btrfs_fs_info *fs_info = root->fs_info; > + struct btrfs_block_group_cache *block_group, *tmp; > + struct list_head *deleted_bgs; > struct extent_io_tree *unpin; > u64 start; > u64 end; > int ret; > > - if (trans->aborted) > - return 0; > - > if (fs_info->pinned_extents == &fs_info->freed_extents[0]) > unpin = &fs_info->freed_extents[1]; > else > unpin = &fs_info->freed_extents[0]; > > - while (1) { > + while (!trans->aborted) { > mutex_lock(&fs_info->unused_bg_unpin_mutex); > ret = find_first_extent_bit(unpin, 0, &start, &end, > EXTENT_DIRTY, NULL); > @@ -6135,6 +6134,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, > cond_resched(); > } > > + /* > + * Transaction is finished. We don't need the lock anymore. We > + * do need to clean up the block groups in case of a transaction > + * abort. > + */ > + deleted_bgs = &trans->transaction->deleted_bgs; > + list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) { > + u64 trimmed = 0; > + > + ret = -EROFS; > + if (!trans->aborted) > + ret = btrfs_discard_extent(root, > + block_group->key.objectid, > + block_group->key.offset, > + &trimmed); > + > + list_del_init(&block_group->bg_list); > + btrfs_put_block_group_trimming(block_group); > + btrfs_put_block_group(block_group); > + > + if (ret) { > + const char *errstr = btrfs_decode_error(ret); > + btrfs_warn(fs_info, > + "Discard failed while removing blockgroup: errno=%d %s\n", > + ret, errstr); > + } > + } > + > return 0; > } > > @@ -9914,6 +9941,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, > * currently running transaction might finish and a new one start, > * allowing for new block groups to be created that can reuse the same > * physical device locations unless we take this special care. > + * > + * There may also be an implicit trim operation if the file system > + * is mounted with -odiscard. The same protections must remain > + * in place until the extents have been discarded completely when > + * the transaction commit has completed. > */ > remove_em = (atomic_read(&block_group->trimming) == 0); > /* > @@ -9988,6 +10020,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) > spin_lock(&fs_info->unused_bgs_lock); > while (!list_empty(&fs_info->unused_bgs)) { > u64 start, end; > + int trimming; > > block_group = list_first_entry(&fs_info->unused_bgs, > struct btrfs_block_group_cache, > @@ -10085,12 +10118,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) > spin_unlock(&block_group->lock); > spin_unlock(&space_info->lock); > > + /* DISCARD can flip during remount */ > + trimming = btrfs_test_opt(root, DISCARD); > + > + /* Implicit trim during transaction commit. */ > + if (trimming) > + btrfs_get_block_group_trimming(block_group); > + > /* > * Btrfs_remove_chunk will abort the transaction if things go > * horribly wrong. > */ > ret = btrfs_remove_chunk(trans, root, > block_group->key.objectid); > + > + if (ret) { > + if (trimming) > + btrfs_put_block_group_trimming(block_group); > + goto end_trans; > + } > + > + /* > + * If we're not mounted with -odiscard, we can just forget > + * about this block group. Otherwise we'll need to wait > + * until transaction commit to do the actual discard. > + */ > + if (trimming) { > + WARN_ON(!list_empty(&block_group->bg_list)); > + spin_lock(&trans->transaction->deleted_bgs_lock); > + list_move(&block_group->bg_list, > + &trans->transaction->deleted_bgs); > + spin_unlock(&trans->transaction->deleted_bgs_lock); > + btrfs_get_block_group(block_group); > + } > end_trans: > btrfs_end_transaction(trans, root); > next: > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c > index 9dbe5b5..c79253e 100644 > --- a/fs/btrfs/free-space-cache.c > +++ b/fs/btrfs/free-space-cache.c > @@ -3274,35 +3274,23 @@ next: > return ret; > } > > -int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, > - u64 *trimmed, u64 start, u64 end, u64 minlen) > +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) > { > - int ret; > + atomic_inc(&cache->trimming); > +} > > - *trimmed = 0; > +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) > +{ > + struct extent_map_tree *em_tree; > + struct extent_map *em; > + bool cleanup; > > spin_lock(&block_group->lock); > - if (block_group->removed) { > - spin_unlock(&block_group->lock); > - return 0; > - } > - atomic_inc(&block_group->trimming); > + cleanup = (atomic_dec_and_test(&block_group->trimming) && > + block_group->removed); > spin_unlock(&block_group->lock); > > - ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); > - if (ret) > - goto out; > - > - ret = trim_bitmaps(block_group, trimmed, start, end, minlen); > -out: > - spin_lock(&block_group->lock); > - if (atomic_dec_and_test(&block_group->trimming) && > - block_group->removed) { > - struct extent_map_tree *em_tree; > - struct extent_map *em; > - > - spin_unlock(&block_group->lock); > - > + if (cleanup) { > lock_chunks(block_group->fs_info->chunk_root); > em_tree = &block_group->fs_info->mapping_tree.map_tree; > write_lock(&em_tree->lock); > @@ -3326,10 +3314,31 @@ out: > * this block group have left 1 entry each one. Free them. > */ > __btrfs_remove_free_space_cache(block_group->free_space_ctl); > - } else { > + } > +} > + > +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, > + u64 *trimmed, u64 start, u64 end, u64 minlen) > +{ > + int ret; > + > + *trimmed = 0; > + > + spin_lock(&block_group->lock); > + if (block_group->removed) { > spin_unlock(&block_group->lock); > + return 0; > } > + btrfs_get_block_group_trimming(block_group); > + spin_unlock(&block_group->lock); > + > + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); > + if (ret) > + goto out; > > + ret = trim_bitmaps(block_group, trimmed, start, end, minlen); > +out: > + btrfs_put_block_group_trimming(block_group); > return ret; > } > > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index 2ccd8d4..a80da03 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type; > > static int btrfs_remount(struct super_block *sb, int *flags, char *data); > > -static const char *btrfs_decode_error(int errno) > +const char *btrfs_decode_error(int errno) > { > char *errstr = "unknown"; > > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index 5628e25..2005262 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -256,6 +256,8 @@ loop: > mutex_init(&cur_trans->cache_write_mutex); > cur_trans->num_dirty_bgs = 0; > spin_lock_init(&cur_trans->dirty_bgs_lock); > + INIT_LIST_HEAD(&cur_trans->deleted_bgs); > + spin_lock_init(&cur_trans->deleted_bgs_lock); > list_add_tail(&cur_trans->list, &fs_info->trans_list); > extent_io_tree_init(&cur_trans->dirty_pages, > fs_info->btree_inode->i_mapping); > diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h > index 0b24755..14325f2 100644 > --- a/fs/btrfs/transaction.h > +++ b/fs/btrfs/transaction.h > @@ -74,6 +74,8 @@ struct btrfs_transaction { > */ > struct mutex cache_write_mutex; > spinlock_t dirty_bgs_lock; > + struct list_head deleted_bgs; > + spinlock_t deleted_bgs_lock; > struct btrfs_delayed_ref_root delayed_refs; > int aborted; > int dirty_bg_run; > -- > 2.4.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f364e1..780acf1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3438,6 +3438,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start, struct extent_map *em); void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache); +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache); void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); @@ -4068,6 +4070,7 @@ __printf(5, 6) void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); +const char *btrfs_decode_error(int errno); void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 24b48df..3598440 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6103,20 +6103,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *block_group, *tmp; + struct list_head *deleted_bgs; struct extent_io_tree *unpin; u64 start; u64 end; int ret; - if (trans->aborted) - return 0; - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) unpin = &fs_info->freed_extents[1]; else unpin = &fs_info->freed_extents[0]; - while (1) { + while (!trans->aborted) { mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, NULL); @@ -6135,6 +6134,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, cond_resched(); } + /* + * Transaction is finished. We don't need the lock anymore. We + * do need to clean up the block groups in case of a transaction + * abort. + */ + deleted_bgs = &trans->transaction->deleted_bgs; + list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) { + u64 trimmed = 0; + + ret = -EROFS; + if (!trans->aborted) + ret = btrfs_discard_extent(root, + block_group->key.objectid, + block_group->key.offset, + &trimmed); + + list_del_init(&block_group->bg_list); + btrfs_put_block_group_trimming(block_group); + btrfs_put_block_group(block_group); + + if (ret) { + const char *errstr = btrfs_decode_error(ret); + btrfs_warn(fs_info, + "Discard failed while removing blockgroup: errno=%d %s\n", + ret, errstr); + } + } + return 0; } @@ -9914,6 +9941,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * currently running transaction might finish and a new one start, * allowing for new block groups to be created that can reuse the same * physical device locations unless we take this special care. + * + * There may also be an implicit trim operation if the file system + * is mounted with -odiscard. The same protections must remain + * in place until the extents have been discarded completely when + * the transaction commit has completed. */ remove_em = (atomic_read(&block_group->trimming) == 0); /* @@ -9988,6 +10020,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { u64 start, end; + int trimming; block_group = list_first_entry(&fs_info->unused_bgs, struct btrfs_block_group_cache, @@ -10085,12 +10118,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); + /* DISCARD can flip during remount */ + trimming = btrfs_test_opt(root, DISCARD); + + /* Implicit trim during transaction commit. */ + if (trimming) + btrfs_get_block_group_trimming(block_group); + /* * Btrfs_remove_chunk will abort the transaction if things go * horribly wrong. */ ret = btrfs_remove_chunk(trans, root, block_group->key.objectid); + + if (ret) { + if (trimming) + btrfs_put_block_group_trimming(block_group); + goto end_trans; + } + + /* + * If we're not mounted with -odiscard, we can just forget + * about this block group. Otherwise we'll need to wait + * until transaction commit to do the actual discard. + */ + if (trimming) { + WARN_ON(!list_empty(&block_group->bg_list)); + spin_lock(&trans->transaction->deleted_bgs_lock); + list_move(&block_group->bg_list, + &trans->transaction->deleted_bgs); + spin_unlock(&trans->transaction->deleted_bgs_lock); + btrfs_get_block_group(block_group); + } end_trans: btrfs_end_transaction(trans, root); next: diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9dbe5b5..c79253e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3274,35 +3274,23 @@ next: return ret; } -int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, - u64 *trimmed, u64 start, u64 end, u64 minlen) +void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) { - int ret; + atomic_inc(&cache->trimming); +} - *trimmed = 0; +void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) +{ + struct extent_map_tree *em_tree; + struct extent_map *em; + bool cleanup; spin_lock(&block_group->lock); - if (block_group->removed) { - spin_unlock(&block_group->lock); - return 0; - } - atomic_inc(&block_group->trimming); + cleanup = (atomic_dec_and_test(&block_group->trimming) && + block_group->removed); spin_unlock(&block_group->lock); - ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); - if (ret) - goto out; - - ret = trim_bitmaps(block_group, trimmed, start, end, minlen); -out: - spin_lock(&block_group->lock); - if (atomic_dec_and_test(&block_group->trimming) && - block_group->removed) { - struct extent_map_tree *em_tree; - struct extent_map *em; - - spin_unlock(&block_group->lock); - + if (cleanup) { lock_chunks(block_group->fs_info->chunk_root); em_tree = &block_group->fs_info->mapping_tree.map_tree; write_lock(&em_tree->lock); @@ -3326,10 +3314,31 @@ out: * this block group have left 1 entry each one. Free them. */ __btrfs_remove_free_space_cache(block_group->free_space_ctl); - } else { + } +} + +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + int ret; + + *trimmed = 0; + + spin_lock(&block_group->lock); + if (block_group->removed) { spin_unlock(&block_group->lock); + return 0; } + btrfs_get_block_group_trimming(block_group); + spin_unlock(&block_group->lock); + + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); + if (ret) + goto out; + ret = trim_bitmaps(block_group, trimmed, start, end, minlen); +out: + btrfs_put_block_group_trimming(block_group); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2ccd8d4..a80da03 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); -static const char *btrfs_decode_error(int errno) +const char *btrfs_decode_error(int errno) { char *errstr = "unknown"; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5628e25..2005262 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -256,6 +256,8 @@ loop: mutex_init(&cur_trans->cache_write_mutex); cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); + INIT_LIST_HEAD(&cur_trans->deleted_bgs); + spin_lock_init(&cur_trans->deleted_bgs_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0b24755..14325f2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -74,6 +74,8 @@ struct btrfs_transaction { */ struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; + struct list_head deleted_bgs; + spinlock_t deleted_bgs_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; int dirty_bg_run;