@@ -206,6 +206,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
INIT_LIST_HEAD(&space_info->ro_bgs);
INIT_LIST_HEAD(&space_info->tickets);
INIT_LIST_HEAD(&space_info->priority_tickets);
+ space_info->clamp = 1;
ret = btrfs_sysfs_add_space_info_type(info, space_info);
if (ret)
@@ -801,13 +802,13 @@ static inline int need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
* because this doesn't quite work how we want. If we had more than 50%
* of the space_info used by bytes_used and we had 0 available we'd just
* constantly run the background flusher. Instead we want it to kick in
- * if our reclaimable space exceeds 50% of our available free space.
+ * if our reclaimable space exceeds our clamped free space.
*/
thresh = calc_available_free_space(fs_info, space_info,
BTRFS_RESERVE_FLUSH_ALL);
thresh += (space_info->total_bytes - space_info->bytes_used -
space_info->bytes_reserved - space_info->bytes_readonly);
- thresh >>= 1;
+ thresh >>= space_info->clamp;
used = space_info->bytes_may_use + space_info->bytes_pinned;
@@ -1006,6 +1007,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
struct btrfs_block_rsv *delayed_refs_rsv;
struct btrfs_block_rsv *global_rsv;
struct btrfs_block_rsv *trans_rsv;
+ int loops = 0;
fs_info = container_of(work, struct btrfs_fs_info,
preempt_reclaim_work);
@@ -1022,6 +1024,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
u64 to_reclaim, block_rsv_size;
u64 global_rsv_size = global_rsv->reserved;
+ loops++;
+
/*
* If we're just full of pinned, commit the transaction. We
* don't call flush_space(COMMIT_TRANS) here because that has
@@ -1100,6 +1104,10 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
cond_resched();
spin_lock(&space_info->lock);
}
+
+ /* We only went through once, back off our clamping. */
+ if (loops == 1 && !space_info->reclaim_size)
+ space_info->clamp = max(1, space_info->clamp - 1);
spin_unlock(&space_info->lock);
}
@@ -1499,6 +1507,14 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
list_add_tail(&ticket.list,
&space_info->priority_tickets);
}
+
+ /*
+ * We were forced to add a reserve ticket, so our preemptive
+ * flushing is unable to keep up. Clamp down on the threshold
+ * for the preemptive flushing in order to keep up with the
+ * workload.
+ */
+ space_info->clamp = min(space_info->clamp + 1, 8);
trace_btrfs_add_reserve_ticket(fs_info, space_info->flags,
flush, orig_bytes);
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
@@ -22,6 +22,9 @@ struct btrfs_space_info {
the space info if we had an ENOSPC in the
allocator. */
+ int clamp; /* Used to scale our threshold for preemptive
+ flushing. */
+
unsigned int full:1; /* indicates that we cannot allocate any more
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
Starting preemptive flushing at 50% of available free space is a good start, but some workloads are particularly abusive and can quickly overwhelm the preemptive flushing code and drive us into using tickets. Handle this by clamping down on our threshold for starting and continuing to run preemptive flushing. This is particularly important for our overcommit case, as we can really drive the file system into overages and then it's more difficult to pull it back as we start to actually fill up the file system. Signed-off-by: Josef Bacik <josef@toxicpanda.com> --- fs/btrfs/space-info.c | 20 ++++++++++++++++++-- fs/btrfs/space-info.h | 3 +++ 2 files changed, 21 insertions(+), 2 deletions(-)