@@ -2730,7 +2730,8 @@ enum btrfs_flush_state {
FLUSH_DELALLOC = 5,
FLUSH_DELALLOC_WAIT = 6,
ALLOC_CHUNK = 7,
- COMMIT_TRANS = 8,
+ ALLOC_CHUNK_FORCE = 8,
+ COMMIT_TRANS = 9,
};
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
@@ -4914,6 +4914,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
btrfs_end_transaction(trans);
break;
case ALLOC_CHUNK:
+ case ALLOC_CHUNK_FORCE:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -4921,7 +4922,9 @@ static void flush_space(struct btrfs_fs_info *fs_info,
}
ret = do_chunk_alloc(trans,
btrfs_metadata_alloc_profile(fs_info),
- CHUNK_ALLOC_NO_FORCE);
+ (state == ALLOC_CHUNK) ?
+ CHUNK_ALLOC_NO_FORCE :
+ CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
if (ret > 0 || ret == -ENOSPC)
ret = 0;
@@ -5057,6 +5060,19 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
commit_cycles--;
}
+ /*
+ * We don't want to force a chunk allocation until we've tried
+ * pretty hard to reclaim space. Think of the case where we
+ * free'd up a bunch of space and so have a lot of pinned space
+ * to reclaim. We would rather use that than possibly create a
+ * underutilized metadata chunk. So if this is our first run
+ * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+ * commit the transaction. If nothing has changed the next go
+ * around then we can force a chunk allocation.
+ */
+ if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+ flush_state++;
+
if (flush_state > COMMIT_TRANS) {
commit_cycles++;
if (commit_cycles > 2) {
@@ -1051,6 +1051,7 @@ TRACE_EVENT(btrfs_trigger_flush,
{ FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR"}, \
{ FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS"}, \
{ ALLOC_CHUNK, "ALLOC_CHUNK"}, \
+ { ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE"}, \
{ COMMIT_TRANS, "COMMIT_TRANS"})
TRACE_EVENT(btrfs_flush_space,
With my change to no longer take into account the global reserve for metadata allocation chunks we have this side-effect for mixed block group fs'es where we are no longer allocating enough chunks for the data/metadata requirements. To deal with this add a ALLOC_CHUNK_FORCE step to the flushing state machine. This will only get used if we've already made a full loop through the flushing machinery and tried committing the transaction. If we have then we can try and force a chunk allocation since we likely need it to make progress. This resolves the issues I was seeing with the mixed bg tests in xfstests with my previous patch. Signed-off-by: Josef Bacik <josef@toxicpanda.com> --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 18 +++++++++++++++++- include/trace/events/btrfs.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-)