Message ID | 20200630135921.745612-14-josef@toxicpanda.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Change data reservations to use the ticketing infra | expand |
On 30.06.20 г. 16:59 ч., Josef Bacik wrote: > Data space flushing currently unconditionally commits the transaction > twice in a row, and the last time it checks if there's enough pinned > extents to satisfy it's reservation before deciding to commit the > transaction for the 3rd and final time. > > Encode this logic into may_commit_transaction(). In the next patch we > will pass in U64_MAX for bytes_needed the first two times, and the final > time we will pass in the actual bytes we need so the normal logic will > apply. > > This patch exists soley to make the logical changes I will make to the > flushing state machine separate to make it easier to bisect any > performance related regressions. > > Reviewed-by: Nikolay Borisov <nborisov@suse.com> > Tested-by: Nikolay Borisov <nborisov@suse.com> > Signed-off-by: Josef Bacik <josef@toxicpanda.com> On a second pass through I'm now somewhat reluctant to merge this code. may_commit_transaction has grown more logic which pertain solely to metadata. As such I think we should separate that logic (i.e current may_commit_transaction) and any further adjustments we might want to make for data space info. For example all the delayed refs/trans reservation checks etc. make absolutely no sense for data space info. > --- > fs/btrfs/space-info.c | 24 +++++++++++++++++++----- > 1 file changed, 19 insertions(+), 5 deletions(-) > > diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c > index e041b1d58e28..fb63ddc31540 100644 > --- a/fs/btrfs/space-info.c > +++ b/fs/btrfs/space-info.c > @@ -579,21 +579,33 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, > * will return -ENOSPC. > */ > static int may_commit_transaction(struct btrfs_fs_info *fs_info, > - struct btrfs_space_info *space_info) > + struct btrfs_space_info *space_info, > + u64 bytes_needed) > { > struct reserve_ticket *ticket = NULL; > struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; > struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; > struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv; > struct btrfs_trans_handle *trans; > - u64 bytes_needed; > u64 reclaim_bytes = 0; > u64 cur_free_bytes = 0; > + bool do_commit = false; > > trans = (struct btrfs_trans_handle *)current->journal_info; > if (trans) > return -EAGAIN; > > + /* > + * If we are data and have passed in U64_MAX we just want to > + * unconditionally commit the transaction to match the previous data > + * flushing behavior. > + */ > + if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && > + bytes_needed == U64_MAX) { > + do_commit = true; > + goto check_pinned; > + } > + > spin_lock(&space_info->lock); > cur_free_bytes = btrfs_space_info_used(space_info, true); > if (cur_free_bytes < space_info->total_bytes) > @@ -607,7 +619,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, > else if (!list_empty(&space_info->tickets)) > ticket = list_first_entry(&space_info->tickets, > struct reserve_ticket, list); > - bytes_needed = (ticket) ? ticket->bytes : 0; > + bytes_needed = (ticket) ? ticket->bytes : bytes_needed; > > if (bytes_needed > cur_free_bytes) > bytes_needed -= cur_free_bytes; > @@ -618,6 +630,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, > if (!bytes_needed) > return 0; > > +check_pinned: > trans = btrfs_join_transaction(fs_info->extent_root); > if (IS_ERR(trans)) > return PTR_ERR(trans); > @@ -627,7 +640,8 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, > * we have block groups that are going to be freed, allowing us to > * possibly do a chunk allocation the next loop through. > */ > - if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || > + if (do_commit || > + test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || > __percpu_counter_compare(&space_info->total_bytes_pinned, > bytes_needed, > BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) > @@ -743,7 +757,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, > btrfs_wait_on_delayed_iputs(fs_info); > break; > case COMMIT_TRANS: > - ret = may_commit_transaction(fs_info, space_info); > + ret = may_commit_transaction(fs_info, space_info, num_bytes); > break; > default: > ret = -ENOSPC; >
On 7/7/20 10:39 AM, Nikolay Borisov wrote: > > > On 30.06.20 г. 16:59 ч., Josef Bacik wrote: >> Data space flushing currently unconditionally commits the transaction >> twice in a row, and the last time it checks if there's enough pinned >> extents to satisfy it's reservation before deciding to commit the >> transaction for the 3rd and final time. >> >> Encode this logic into may_commit_transaction(). In the next patch we >> will pass in U64_MAX for bytes_needed the first two times, and the final >> time we will pass in the actual bytes we need so the normal logic will >> apply. >> >> This patch exists soley to make the logical changes I will make to the >> flushing state machine separate to make it easier to bisect any >> performance related regressions. >> >> Reviewed-by: Nikolay Borisov <nborisov@suse.com> >> Tested-by: Nikolay Borisov <nborisov@suse.com> >> Signed-off-by: Josef Bacik <josef@toxicpanda.com> > > On a second pass through I'm now somewhat reluctant to merge this code. > may_commit_transaction has grown more logic which pertain solely to > metadata. As such I think we should separate that logic (i.e current > may_commit_transaction) and any further adjustments we might want to > make for data space info. For example all the delayed refs/trans > reservation checks etc. make absolutely no sense for data space info. > Right, which is why there's this check /* * See if there is some space in the delayed insertion reservation for * this reservation. */ if (space_info != delayed_rsv->space_info) goto enospc; before all of those checks. The logic is the same, minus the delayed ref stuff, which is kept separate. If you want I can make the comment a bit more clear, it's not very verbose. Thanks, Josef
On 7/7/20 10:39 AM, Nikolay Borisov wrote: > > > On 30.06.20 г. 16:59 ч., Josef Bacik wrote: >> Data space flushing currently unconditionally commits the transaction >> twice in a row, and the last time it checks if there's enough pinned >> extents to satisfy it's reservation before deciding to commit the >> transaction for the 3rd and final time. >> >> Encode this logic into may_commit_transaction(). In the next patch we >> will pass in U64_MAX for bytes_needed the first two times, and the final >> time we will pass in the actual bytes we need so the normal logic will >> apply. >> >> This patch exists soley to make the logical changes I will make to the >> flushing state machine separate to make it easier to bisect any >> performance related regressions. >> >> Reviewed-by: Nikolay Borisov <nborisov@suse.com> >> Tested-by: Nikolay Borisov <nborisov@suse.com> >> Signed-off-by: Josef Bacik <josef@toxicpanda.com> > > On a second pass through I'm now somewhat reluctant to merge this code. > may_commit_transaction has grown more logic which pertain solely to > metadata. As such I think we should separate that logic (i.e current > may_commit_transaction) and any further adjustments we might want to > make for data space info. For example all the delayed refs/trans > reservation checks etc. make absolutely no sense for data space info. Oooh I forgot another thing, the reason I did this is because I was trying to maintain the behavior while converting, and then change the behavior in subsequent patches. I'm doing this specifically because we may have performance regressions, and I didn't want a bisect to show up at "Giant patch that changes all the things". I want it to show up at the patch that actually changed the behavior, so we have a better idea of where to look. So this intermediate step is a bit wonky for sure, but the end result at the end of the series is less wonky. But I'm still going to update that comment. Thanks, Josef
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index e041b1d58e28..fb63ddc31540 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -579,21 +579,33 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, * will return -ENOSPC. */ static int may_commit_transaction(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info) + struct btrfs_space_info *space_info, + u64 bytes_needed) { struct reserve_ticket *ticket = NULL; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv; struct btrfs_trans_handle *trans; - u64 bytes_needed; u64 reclaim_bytes = 0; u64 cur_free_bytes = 0; + bool do_commit = false; trans = (struct btrfs_trans_handle *)current->journal_info; if (trans) return -EAGAIN; + /* + * If we are data and have passed in U64_MAX we just want to + * unconditionally commit the transaction to match the previous data + * flushing behavior. + */ + if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && + bytes_needed == U64_MAX) { + do_commit = true; + goto check_pinned; + } + spin_lock(&space_info->lock); cur_free_bytes = btrfs_space_info_used(space_info, true); if (cur_free_bytes < space_info->total_bytes) @@ -607,7 +619,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, else if (!list_empty(&space_info->tickets)) ticket = list_first_entry(&space_info->tickets, struct reserve_ticket, list); - bytes_needed = (ticket) ? ticket->bytes : 0; + bytes_needed = (ticket) ? ticket->bytes : bytes_needed; if (bytes_needed > cur_free_bytes) bytes_needed -= cur_free_bytes; @@ -618,6 +630,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, if (!bytes_needed) return 0; +check_pinned: trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -627,7 +640,8 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, * we have block groups that are going to be freed, allowing us to * possibly do a chunk allocation the next loop through. */ - if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || + if (do_commit || + test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || __percpu_counter_compare(&space_info->total_bytes_pinned, bytes_needed, BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) @@ -743,7 +757,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, btrfs_wait_on_delayed_iputs(fs_info); break; case COMMIT_TRANS: - ret = may_commit_transaction(fs_info, space_info); + ret = may_commit_transaction(fs_info, space_info, num_bytes); break; default: ret = -ENOSPC;