[13/23] btrfs: add the data transaction commit logic into may_commit_transaction
diff mbox series

Message ID 20200630135921.745612-14-josef@toxicpanda.com
State New
Headers show
Series
  • Change data reservations to use the ticketing infra
Related show

Commit Message

Josef Bacik June 30, 2020, 1:59 p.m. UTC
Data space flushing currently unconditionally commits the transaction
twice in a row, and the last time it checks if there's enough pinned
extents to satisfy it's reservation before deciding to commit the
transaction for the 3rd and final time.

Encode this logic into may_commit_transaction().  In the next patch we
will pass in U64_MAX for bytes_needed the first two times, and the final
time we will pass in the actual bytes we need so the normal logic will
apply.

This patch exists soley to make the logical changes I will make to the
flushing state machine separate to make it easier to bisect any
performance related regressions.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Tested-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/space-info.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

Comments

Nikolay Borisov July 7, 2020, 2:39 p.m. UTC | #1
On 30.06.20 г. 16:59 ч., Josef Bacik wrote:
> Data space flushing currently unconditionally commits the transaction
> twice in a row, and the last time it checks if there's enough pinned
> extents to satisfy it's reservation before deciding to commit the
> transaction for the 3rd and final time.
> 
> Encode this logic into may_commit_transaction().  In the next patch we
> will pass in U64_MAX for bytes_needed the first two times, and the final
> time we will pass in the actual bytes we need so the normal logic will
> apply.
> 
> This patch exists soley to make the logical changes I will make to the
> flushing state machine separate to make it easier to bisect any
> performance related regressions.
> 
> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
> Tested-by: Nikolay Borisov <nborisov@suse.com>
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>

On a second pass through I'm now somewhat reluctant to merge this code.
may_commit_transaction has grown more logic which pertain solely to
metadata. As such I think we should separate that logic (i.e current
may_commit_transaction) and any further adjustments we might want to
make for data space info. For example all the delayed refs/trans
reservation checks etc. make absolutely no sense for data space info.

> ---
>  fs/btrfs/space-info.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index e041b1d58e28..fb63ddc31540 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -579,21 +579,33 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
>   * will return -ENOSPC.
>   */
>  static int may_commit_transaction(struct btrfs_fs_info *fs_info,
> -				  struct btrfs_space_info *space_info)
> +				  struct btrfs_space_info *space_info,
> +				  u64 bytes_needed)
>  {
>  	struct reserve_ticket *ticket = NULL;
>  	struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
>  	struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
>  	struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
>  	struct btrfs_trans_handle *trans;
> -	u64 bytes_needed;
>  	u64 reclaim_bytes = 0;
>  	u64 cur_free_bytes = 0;
> +	bool do_commit = false;
>  
>  	trans = (struct btrfs_trans_handle *)current->journal_info;
>  	if (trans)
>  		return -EAGAIN;
>  
> +	/*
> +	 * If we are data and have passed in U64_MAX we just want to
> +	 * unconditionally commit the transaction to match the previous data
> +	 * flushing behavior.
> +	 */
> +	if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
> +	   bytes_needed == U64_MAX) {
> +		do_commit = true;
> +		goto check_pinned;
> +	}
> +
>  	spin_lock(&space_info->lock);
>  	cur_free_bytes = btrfs_space_info_used(space_info, true);
>  	if (cur_free_bytes < space_info->total_bytes)
> @@ -607,7 +619,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
>  	else if (!list_empty(&space_info->tickets))
>  		ticket = list_first_entry(&space_info->tickets,
>  					  struct reserve_ticket, list);
> -	bytes_needed = (ticket) ? ticket->bytes : 0;
> +	bytes_needed = (ticket) ? ticket->bytes : bytes_needed;
>  
>  	if (bytes_needed > cur_free_bytes)
>  		bytes_needed -= cur_free_bytes;
> @@ -618,6 +630,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
>  	if (!bytes_needed)
>  		return 0;
>  
> +check_pinned:
>  	trans = btrfs_join_transaction(fs_info->extent_root);
>  	if (IS_ERR(trans))
>  		return PTR_ERR(trans);
> @@ -627,7 +640,8 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
>  	 * we have block groups that are going to be freed, allowing us to
>  	 * possibly do a chunk allocation the next loop through.
>  	 */
> -	if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
> +	if (do_commit ||
> +	    test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
>  	    __percpu_counter_compare(&space_info->total_bytes_pinned,
>  				     bytes_needed,
>  				     BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
> @@ -743,7 +757,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
>  		btrfs_wait_on_delayed_iputs(fs_info);
>  		break;
>  	case COMMIT_TRANS:
> -		ret = may_commit_transaction(fs_info, space_info);
> +		ret = may_commit_transaction(fs_info, space_info, num_bytes);
>  		break;
>  	default:
>  		ret = -ENOSPC;
>
Josef Bacik July 7, 2020, 2:54 p.m. UTC | #2
On 7/7/20 10:39 AM, Nikolay Borisov wrote:
> 
> 
> On 30.06.20 г. 16:59 ч., Josef Bacik wrote:
>> Data space flushing currently unconditionally commits the transaction
>> twice in a row, and the last time it checks if there's enough pinned
>> extents to satisfy it's reservation before deciding to commit the
>> transaction for the 3rd and final time.
>>
>> Encode this logic into may_commit_transaction().  In the next patch we
>> will pass in U64_MAX for bytes_needed the first two times, and the final
>> time we will pass in the actual bytes we need so the normal logic will
>> apply.
>>
>> This patch exists soley to make the logical changes I will make to the
>> flushing state machine separate to make it easier to bisect any
>> performance related regressions.
>>
>> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
>> Tested-by: Nikolay Borisov <nborisov@suse.com>
>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> 
> On a second pass through I'm now somewhat reluctant to merge this code.
> may_commit_transaction has grown more logic which pertain solely to
> metadata. As such I think we should separate that logic (i.e current
> may_commit_transaction) and any further adjustments we might want to
> make for data space info. For example all the delayed refs/trans
> reservation checks etc. make absolutely no sense for data space info.
> 

Right, which is why there's this check

         /*
          * See if there is some space in the delayed insertion reservation for
          * this reservation.
          */
         if (space_info != delayed_rsv->space_info)
                 goto enospc;

before all of those checks.  The logic is the same, minus the delayed ref stuff, 
which is kept separate.  If you want I can make the comment a bit more clear, 
it's not very verbose.  Thanks,

Josef
Josef Bacik July 7, 2020, 3:01 p.m. UTC | #3
On 7/7/20 10:39 AM, Nikolay Borisov wrote:
> 
> 
> On 30.06.20 г. 16:59 ч., Josef Bacik wrote:
>> Data space flushing currently unconditionally commits the transaction
>> twice in a row, and the last time it checks if there's enough pinned
>> extents to satisfy it's reservation before deciding to commit the
>> transaction for the 3rd and final time.
>>
>> Encode this logic into may_commit_transaction().  In the next patch we
>> will pass in U64_MAX for bytes_needed the first two times, and the final
>> time we will pass in the actual bytes we need so the normal logic will
>> apply.
>>
>> This patch exists soley to make the logical changes I will make to the
>> flushing state machine separate to make it easier to bisect any
>> performance related regressions.
>>
>> Reviewed-by: Nikolay Borisov <nborisov@suse.com>
>> Tested-by: Nikolay Borisov <nborisov@suse.com>
>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> 
> On a second pass through I'm now somewhat reluctant to merge this code.
> may_commit_transaction has grown more logic which pertain solely to
> metadata. As such I think we should separate that logic (i.e current
> may_commit_transaction) and any further adjustments we might want to
> make for data space info. For example all the delayed refs/trans
> reservation checks etc. make absolutely no sense for data space info.

Oooh I forgot another thing, the reason I did this is because I was trying to 
maintain the behavior while converting, and then change the behavior in 
subsequent patches.  I'm doing this specifically because we may have performance 
regressions, and I didn't want a bisect to show up at "Giant patch that changes 
all the things".  I want it to show up at the patch that actually changed the 
behavior, so we have a better idea of where to look.  So this intermediate step 
is a bit wonky for sure, but the end result at the end of the series is less 
wonky.  But I'm still going to update that comment.  Thanks,

Josef

Patch
diff mbox series

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index e041b1d58e28..fb63ddc31540 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -579,21 +579,33 @@  static void shrink_delalloc(struct btrfs_fs_info *fs_info,
  * will return -ENOSPC.
  */
 static int may_commit_transaction(struct btrfs_fs_info *fs_info,
-				  struct btrfs_space_info *space_info)
+				  struct btrfs_space_info *space_info,
+				  u64 bytes_needed)
 {
 	struct reserve_ticket *ticket = NULL;
 	struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
 	struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
 	struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
 	struct btrfs_trans_handle *trans;
-	u64 bytes_needed;
 	u64 reclaim_bytes = 0;
 	u64 cur_free_bytes = 0;
+	bool do_commit = false;
 
 	trans = (struct btrfs_trans_handle *)current->journal_info;
 	if (trans)
 		return -EAGAIN;
 
+	/*
+	 * If we are data and have passed in U64_MAX we just want to
+	 * unconditionally commit the transaction to match the previous data
+	 * flushing behavior.
+	 */
+	if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
+	   bytes_needed == U64_MAX) {
+		do_commit = true;
+		goto check_pinned;
+	}
+
 	spin_lock(&space_info->lock);
 	cur_free_bytes = btrfs_space_info_used(space_info, true);
 	if (cur_free_bytes < space_info->total_bytes)
@@ -607,7 +619,7 @@  static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	else if (!list_empty(&space_info->tickets))
 		ticket = list_first_entry(&space_info->tickets,
 					  struct reserve_ticket, list);
-	bytes_needed = (ticket) ? ticket->bytes : 0;
+	bytes_needed = (ticket) ? ticket->bytes : bytes_needed;
 
 	if (bytes_needed > cur_free_bytes)
 		bytes_needed -= cur_free_bytes;
@@ -618,6 +630,7 @@  static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	if (!bytes_needed)
 		return 0;
 
+check_pinned:
 	trans = btrfs_join_transaction(fs_info->extent_root);
 	if (IS_ERR(trans))
 		return PTR_ERR(trans);
@@ -627,7 +640,8 @@  static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	 * we have block groups that are going to be freed, allowing us to
 	 * possibly do a chunk allocation the next loop through.
 	 */
-	if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+	if (do_commit ||
+	    test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
 	    __percpu_counter_compare(&space_info->total_bytes_pinned,
 				     bytes_needed,
 				     BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
@@ -743,7 +757,7 @@  static void flush_space(struct btrfs_fs_info *fs_info,
 		btrfs_wait_on_delayed_iputs(fs_info);
 		break;
 	case COMMIT_TRANS:
-		ret = may_commit_transaction(fs_info, space_info);
+		ret = may_commit_transaction(fs_info, space_info, num_bytes);
 		break;
 	default:
 		ret = -ENOSPC;