diff mbox series

[v3,2/2] btrfs: qgroup: use xarray to track dirty extents in transaction.

Message ID 20240607143021.122220-2-sunjunchao2870@gmail.com (mailing list archive)
State New
Headers show
Series [v3,1/2] btrfs: qgroup: use goto style to handle error in add_delayed_ref(). | expand

Commit Message

Junchao Sun June 7, 2024, 2:30 p.m. UTC
Changes since v2:
  - Separate the cleanup code into a single patch.
  - Call qgroup_mark_inconsistent() when record insertion failed.
  - Return negative value when record insertion failed.

Changes since v1:
  - Use xa_load() to update existing entry instead of double
    xa_store().
  - Rename goto lables.
  - Remove unnecessary calls to xa_init().

Using xarray to track dirty extents can reduce the size of the
struct btrfs_qgroup_extent_record from 64 bytes to 40 bytes.
And xarray is more cache line friendly, it also reduces the
complexity of insertion and search code compared to rb tree.

Another change introduced is about error handling.
Before this patch, the result of btrfs_qgroup_trace_extent_nolock()
is always a success. In this patch, because of this function calls
the function xa_store() which has the possibility to fail, so mark
qgroup as inconsistent if error happened and then free preallocated
memory. Also we preallocate memory before spin_lock(), if memory
preallcation failed, error handling is the same the existing code.

This patch passed the check -g qgroup tests using xfstests and
checkpatch tests.

Suggested-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Junchao Sun <sunjunchao2870@gmail.com>
---
 fs/btrfs/delayed-ref.c | 14 +++++++--
 fs/btrfs/delayed-ref.h |  2 +-
 fs/btrfs/qgroup.c      | 66 ++++++++++++++++++++----------------------
 fs/btrfs/transaction.c |  5 ++--
 4 files changed, 46 insertions(+), 41 deletions(-)

Comments

Qu Wenruo June 17, 2024, 3:11 a.m. UTC | #1
在 2024/6/8 00:00, Junchao Sun 写道:
> Changes since v2:
>    - Separate the cleanup code into a single patch.
>    - Call qgroup_mark_inconsistent() when record insertion failed.
>    - Return negative value when record insertion failed.
>
> Changes since v1:
>    - Use xa_load() to update existing entry instead of double
>      xa_store().
>    - Rename goto lables.
>    - Remove unnecessary calls to xa_init().

Forgot to mention, you should not put changelog into the commit message.

You do not need to resent, I'll remove them when pushing into for-next
branch.
(Along with extra testing etc).


>
> Using xarray to track dirty extents can reduce the size of the
> struct btrfs_qgroup_extent_record from 64 bytes to 40 bytes.
> And xarray is more cache line friendly, it also reduces the
> complexity of insertion and search code compared to rb tree.
>
> Another change introduced is about error handling.
> Before this patch, the result of btrfs_qgroup_trace_extent_nolock()
> is always a success. In this patch, because of this function calls
> the function xa_store() which has the possibility to fail, so mark
> qgroup as inconsistent if error happened and then free preallocated
> memory. Also we preallocate memory before spin_lock(), if memory
> preallcation failed, error handling is the same the existing code.
>
> This patch passed the check -g qgroup tests using xfstests and
> checkpatch tests.
>
> Suggested-by: Qu Wenruo <wqu@suse.com>
> Signed-off-by: Junchao Sun <sunjunchao2870@gmail.com>

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu

> ---
>   fs/btrfs/delayed-ref.c | 14 +++++++--
>   fs/btrfs/delayed-ref.h |  2 +-
>   fs/btrfs/qgroup.c      | 66 ++++++++++++++++++++----------------------
>   fs/btrfs/transaction.c |  5 ++--
>   4 files changed, 46 insertions(+), 41 deletions(-)
>
> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
> index 1a41ab991738..ec78d4c7581c 100644
> --- a/fs/btrfs/delayed-ref.c
> +++ b/fs/btrfs/delayed-ref.c
> @@ -891,10 +891,13 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
>
>   	/* Record qgroup extent info if provided */
>   	if (qrecord) {
> -		if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
> -					delayed_refs, qrecord))
> +		int ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info,
> +					delayed_refs, qrecord);
> +		if (ret) {
> +			/* Clean up if insertion fails or item exists. */
> +			xa_release(&delayed_refs->dirty_extents, qrecord->bytenr);
>   			kfree(qrecord);
> -		else
> +		} else
>   			qrecord_inserted = true;
>   	}
>
> @@ -1048,6 +1051,9 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
>   		record = kzalloc(sizeof(*record), GFP_NOFS);
>   		if (!record)
>   			goto free_head_ref;
> +		if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
> +			       generic_ref->bytenr, GFP_NOFS))
> +			goto free_record;
>   	}
>
>   	init_delayed_ref_common(fs_info, node, generic_ref);
> @@ -1084,6 +1090,8 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
>   		return btrfs_qgroup_trace_extent_post(trans, record);
>   	return 0;
>
> +free_record:
> +	kfree(record);
>   free_head_ref:
>   	kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
>   free_node:
> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
> index 04b180ebe1fe..a81d6f2aa799 100644
> --- a/fs/btrfs/delayed-ref.h
> +++ b/fs/btrfs/delayed-ref.h
> @@ -201,7 +201,7 @@ struct btrfs_delayed_ref_root {
>   	struct rb_root_cached href_root;
>
>   	/* dirty extent records */
> -	struct rb_root dirty_extent_root;
> +	struct xarray dirty_extents;
>
>   	/* this spin lock protects the rbtree and the entries inside */
>   	spinlock_t lock;
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index fc2a7ea26354..f75ed67a8edf 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -1902,16 +1902,14 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
>    *
>    * Return 0 for success insert
>    * Return >0 for existing record, caller can free @record safely.
> - * Error is not possible
> + * Return <0 for insertion failed, caller can free @record safely.
>    */
>   int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
>   				struct btrfs_delayed_ref_root *delayed_refs,
>   				struct btrfs_qgroup_extent_record *record)
>   {
> -	struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
> -	struct rb_node *parent_node = NULL;
> -	struct btrfs_qgroup_extent_record *entry;
> -	u64 bytenr = record->bytenr;
> +	struct btrfs_qgroup_extent_record *existing, *ret;
> +	unsigned long bytenr = record->bytenr;
>
>   	if (!btrfs_qgroup_full_accounting(fs_info))
>   		return 1;
> @@ -1919,26 +1917,24 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
>   	lockdep_assert_held(&delayed_refs->lock);
>   	trace_btrfs_qgroup_trace_extent(fs_info, record);
>
> -	while (*p) {
> -		parent_node = *p;
> -		entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
> -				 node);
> -		if (bytenr < entry->bytenr) {
> -			p = &(*p)->rb_left;
> -		} else if (bytenr > entry->bytenr) {
> -			p = &(*p)->rb_right;
> -		} else {
> -			if (record->data_rsv && !entry->data_rsv) {
> -				entry->data_rsv = record->data_rsv;
> -				entry->data_rsv_refroot =
> -					record->data_rsv_refroot;
> -			}
> -			return 1;
> +	xa_lock(&delayed_refs->dirty_extents);
> +	existing = xa_load(&delayed_refs->dirty_extents, bytenr);
> +	if (existing) {
> +		if (record->data_rsv && !existing->data_rsv) {
> +			existing->data_rsv = record->data_rsv;
> +			existing->data_rsv_refroot = record->data_rsv_refroot;
>   		}
> +		xa_unlock(&delayed_refs->dirty_extents);
> +		return 1;
> +	}
> +
> +	ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC);
> +	xa_unlock(&delayed_refs->dirty_extents);
> +	if (xa_is_err(ret)) {
> +		qgroup_mark_inconsistent(fs_info);
> +		return xa_err(ret);
>   	}
>
> -	rb_link_node(&record->node, parent_node, p);
> -	rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
>   	return 0;
>   }
>
> @@ -2045,6 +2041,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
>   	if (!record)
>   		return -ENOMEM;
>
> +	if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) {
> +		kfree(record);
> +		return -ENOMEM;
> +	}
> +
>   	delayed_refs = &trans->transaction->delayed_refs;
>   	record->bytenr = bytenr;
>   	record->num_bytes = num_bytes;
> @@ -2053,7 +2054,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
>   	spin_lock(&delayed_refs->lock);
>   	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
>   	spin_unlock(&delayed_refs->lock);
> -	if (ret > 0) {
> +	if (ret) {
> +		/* Clean up if insertion fails or item exists. */
> +		xa_release(&delayed_refs->dirty_extents, record->bytenr);
>   		kfree(record);
>   		return 0;
>   	}
> @@ -2922,7 +2925,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
>   	struct btrfs_qgroup_extent_record *record;
>   	struct btrfs_delayed_ref_root *delayed_refs;
>   	struct ulist *new_roots = NULL;
> -	struct rb_node *node;
> +	unsigned long index;
>   	u64 num_dirty_extents = 0;
>   	u64 qgroup_to_skip;
>   	int ret = 0;
> @@ -2932,10 +2935,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
>
>   	delayed_refs = &trans->transaction->delayed_refs;
>   	qgroup_to_skip = delayed_refs->qgroup_to_skip;
> -	while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
> -		record = rb_entry(node, struct btrfs_qgroup_extent_record,
> -				  node);
> -
> +	xa_for_each(&delayed_refs->dirty_extents, index, record) {
>   		num_dirty_extents++;
>   		trace_btrfs_qgroup_account_extents(fs_info, record);
>
> @@ -3001,7 +3001,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
>   		ulist_free(record->old_roots);
>   		ulist_free(new_roots);
>   		new_roots = NULL;
> -		rb_erase(node, &delayed_refs->dirty_extent_root);
> +		xa_erase(&delayed_refs->dirty_extents, index);
>   		kfree(record);
>
>   	}
> @@ -4788,15 +4788,13 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
>   void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
>   {
>   	struct btrfs_qgroup_extent_record *entry;
> -	struct btrfs_qgroup_extent_record *next;
> -	struct rb_root *root;
> +	unsigned long index;
>
> -	root = &trans->delayed_refs.dirty_extent_root;
> -	rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
> +	xa_for_each(&trans->delayed_refs.dirty_extents, index, entry) {
>   		ulist_free(entry->old_roots);
>   		kfree(entry);
>   	}
> -	*root = RB_ROOT;
> +	xa_destroy(&trans->delayed_refs.dirty_extents);
>   }
>
>   void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index 3388c836b9a5..c473c049d37f 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -143,8 +143,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
>   		BUG_ON(!list_empty(&transaction->list));
>   		WARN_ON(!RB_EMPTY_ROOT(
>   				&transaction->delayed_refs.href_root.rb_root));
> -		WARN_ON(!RB_EMPTY_ROOT(
> -				&transaction->delayed_refs.dirty_extent_root));
> +		WARN_ON(!xa_empty(&transaction->delayed_refs.dirty_extents));
>   		if (transaction->delayed_refs.pending_csums)
>   			btrfs_err(transaction->fs_info,
>   				  "pending csums is %llu",
> @@ -351,7 +350,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
>   	memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
>
>   	cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
> -	cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
> +	xa_init(&cur_trans->delayed_refs.dirty_extents);
>   	atomic_set(&cur_trans->delayed_refs.num_entries, 0);
>
>   	/*
diff mbox series

Patch

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 1a41ab991738..ec78d4c7581c 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -891,10 +891,13 @@  add_delayed_ref_head(struct btrfs_trans_handle *trans,
 
 	/* Record qgroup extent info if provided */
 	if (qrecord) {
-		if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
-					delayed_refs, qrecord))
+		int ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info,
+					delayed_refs, qrecord);
+		if (ret) {
+			/* Clean up if insertion fails or item exists. */
+			xa_release(&delayed_refs->dirty_extents, qrecord->bytenr);
 			kfree(qrecord);
-		else
+		} else
 			qrecord_inserted = true;
 	}
 
@@ -1048,6 +1051,9 @@  static int add_delayed_ref(struct btrfs_trans_handle *trans,
 		record = kzalloc(sizeof(*record), GFP_NOFS);
 		if (!record)
 			goto free_head_ref;
+		if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
+			       generic_ref->bytenr, GFP_NOFS))
+			goto free_record;
 	}
 
 	init_delayed_ref_common(fs_info, node, generic_ref);
@@ -1084,6 +1090,8 @@  static int add_delayed_ref(struct btrfs_trans_handle *trans,
 		return btrfs_qgroup_trace_extent_post(trans, record);
 	return 0;
 
+free_record:
+	kfree(record);
 free_head_ref:
 	kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 free_node:
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 04b180ebe1fe..a81d6f2aa799 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -201,7 +201,7 @@  struct btrfs_delayed_ref_root {
 	struct rb_root_cached href_root;
 
 	/* dirty extent records */
-	struct rb_root dirty_extent_root;
+	struct xarray dirty_extents;
 
 	/* this spin lock protects the rbtree and the entries inside */
 	spinlock_t lock;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fc2a7ea26354..f75ed67a8edf 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1902,16 +1902,14 @@  int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
  *
  * Return 0 for success insert
  * Return >0 for existing record, caller can free @record safely.
- * Error is not possible
+ * Return <0 for insertion failed, caller can free @record safely.
  */
 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
 				struct btrfs_delayed_ref_root *delayed_refs,
 				struct btrfs_qgroup_extent_record *record)
 {
-	struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
-	struct rb_node *parent_node = NULL;
-	struct btrfs_qgroup_extent_record *entry;
-	u64 bytenr = record->bytenr;
+	struct btrfs_qgroup_extent_record *existing, *ret;
+	unsigned long bytenr = record->bytenr;
 
 	if (!btrfs_qgroup_full_accounting(fs_info))
 		return 1;
@@ -1919,26 +1917,24 @@  int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
 	lockdep_assert_held(&delayed_refs->lock);
 	trace_btrfs_qgroup_trace_extent(fs_info, record);
 
-	while (*p) {
-		parent_node = *p;
-		entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
-				 node);
-		if (bytenr < entry->bytenr) {
-			p = &(*p)->rb_left;
-		} else if (bytenr > entry->bytenr) {
-			p = &(*p)->rb_right;
-		} else {
-			if (record->data_rsv && !entry->data_rsv) {
-				entry->data_rsv = record->data_rsv;
-				entry->data_rsv_refroot =
-					record->data_rsv_refroot;
-			}
-			return 1;
+	xa_lock(&delayed_refs->dirty_extents);
+	existing = xa_load(&delayed_refs->dirty_extents, bytenr);
+	if (existing) {
+		if (record->data_rsv && !existing->data_rsv) {
+			existing->data_rsv = record->data_rsv;
+			existing->data_rsv_refroot = record->data_rsv_refroot;
 		}
+		xa_unlock(&delayed_refs->dirty_extents);
+		return 1;
+	}
+
+	ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC);
+	xa_unlock(&delayed_refs->dirty_extents);
+	if (xa_is_err(ret)) {
+		qgroup_mark_inconsistent(fs_info);
+		return xa_err(ret);
 	}
 
-	rb_link_node(&record->node, parent_node, p);
-	rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
 	return 0;
 }
 
@@ -2045,6 +2041,11 @@  int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 	if (!record)
 		return -ENOMEM;
 
+	if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) {
+		kfree(record);
+		return -ENOMEM;
+	}
+
 	delayed_refs = &trans->transaction->delayed_refs;
 	record->bytenr = bytenr;
 	record->num_bytes = num_bytes;
@@ -2053,7 +2054,9 @@  int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 	spin_lock(&delayed_refs->lock);
 	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
 	spin_unlock(&delayed_refs->lock);
-	if (ret > 0) {
+	if (ret) {
+		/* Clean up if insertion fails or item exists. */
+		xa_release(&delayed_refs->dirty_extents, record->bytenr);
 		kfree(record);
 		return 0;
 	}
@@ -2922,7 +2925,7 @@  int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
 	struct btrfs_qgroup_extent_record *record;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct ulist *new_roots = NULL;
-	struct rb_node *node;
+	unsigned long index;
 	u64 num_dirty_extents = 0;
 	u64 qgroup_to_skip;
 	int ret = 0;
@@ -2932,10 +2935,7 @@  int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
 
 	delayed_refs = &trans->transaction->delayed_refs;
 	qgroup_to_skip = delayed_refs->qgroup_to_skip;
-	while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
-		record = rb_entry(node, struct btrfs_qgroup_extent_record,
-				  node);
-
+	xa_for_each(&delayed_refs->dirty_extents, index, record) {
 		num_dirty_extents++;
 		trace_btrfs_qgroup_account_extents(fs_info, record);
 
@@ -3001,7 +3001,7 @@  int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
 		ulist_free(record->old_roots);
 		ulist_free(new_roots);
 		new_roots = NULL;
-		rb_erase(node, &delayed_refs->dirty_extent_root);
+		xa_erase(&delayed_refs->dirty_extents, index);
 		kfree(record);
 
 	}
@@ -4788,15 +4788,13 @@  int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
 {
 	struct btrfs_qgroup_extent_record *entry;
-	struct btrfs_qgroup_extent_record *next;
-	struct rb_root *root;
+	unsigned long index;
 
-	root = &trans->delayed_refs.dirty_extent_root;
-	rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
+	xa_for_each(&trans->delayed_refs.dirty_extents, index, entry) {
 		ulist_free(entry->old_roots);
 		kfree(entry);
 	}
-	*root = RB_ROOT;
+	xa_destroy(&trans->delayed_refs.dirty_extents);
 }
 
 void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3388c836b9a5..c473c049d37f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -143,8 +143,7 @@  void btrfs_put_transaction(struct btrfs_transaction *transaction)
 		BUG_ON(!list_empty(&transaction->list));
 		WARN_ON(!RB_EMPTY_ROOT(
 				&transaction->delayed_refs.href_root.rb_root));
-		WARN_ON(!RB_EMPTY_ROOT(
-				&transaction->delayed_refs.dirty_extent_root));
+		WARN_ON(!xa_empty(&transaction->delayed_refs.dirty_extents));
 		if (transaction->delayed_refs.pending_csums)
 			btrfs_err(transaction->fs_info,
 				  "pending csums is %llu",
@@ -351,7 +350,7 @@  static noinline int join_transaction(struct btrfs_fs_info *fs_info,
 	memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
 
 	cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
-	cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
+	xa_init(&cur_trans->delayed_refs.dirty_extents);
 	atomic_set(&cur_trans->delayed_refs.num_entries, 0);
 
 	/*