diff mbox

[v3.1,2/3] btrfs: relocation: Fix leaking qgroups numbers on data extents

Message ID 20160815023652.5348-3-quwenruo@cn.fujitsu.com (mailing list archive)
State Accepted
Headers show

Commit Message

Qu Wenruo Aug. 15, 2016, 2:36 a.m. UTC
This patch fixes a REGRESSION introduced in 4.2, caused by the big quota
rework.

When balancing data extents, qgroup will leak all its numbers for
relocated data extents.

The relocation is done in the following steps for data extents:
1) Create data reloc tree and inode
2) Copy all data extents to data reloc tree
   And commit transaction
3) Create tree reloc tree(special snapshot) for any related subvolumes
4) Replace file extent in tree reloc tree with new extents in data reloc
   tree
   And commit transaction
5) Merge tree reloc tree with original fs, by swapping tree blocks

For 1)~4), since tree reloc tree and data reloc tree doesn't count to
qgroup, everything is OK.

But for 5), the swapping of tree blocks will only info qgroup to track
metadata extents.

If metadata extents contain file extents, qgroup number for file extents
will get lost, leading to corrupted qgroup accounting.

The fix is, before commit transaction of step 5), manually info qgroup to
track all file extents in data reloc tree.
Since at commit transaction time, the tree swapping is done, and qgroup
will account these data extents correctly.

Cc: Mark Fasheh <mfasheh@suse.de>
Reported-by: Mark Fasheh <mfasheh@suse.de>
Reported-by: Filipe Manana <fdmanana@gmail.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/btrfs/relocation.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 103 insertions(+), 6 deletions(-)

Comments

Goldwyn Rodrigues Oct. 5, 2016, 1:53 a.m. UTC | #1
On 08/14/2016 09:36 PM, Qu Wenruo wrote:
> This patch fixes a REGRESSION introduced in 4.2, caused by the big quota
> rework.
> 
> When balancing data extents, qgroup will leak all its numbers for
> relocated data extents.
> 
> The relocation is done in the following steps for data extents:
> 1) Create data reloc tree and inode
> 2) Copy all data extents to data reloc tree
>    And commit transaction
> 3) Create tree reloc tree(special snapshot) for any related subvolumes
> 4) Replace file extent in tree reloc tree with new extents in data reloc
>    tree
>    And commit transaction
> 5) Merge tree reloc tree with original fs, by swapping tree blocks
> 
> For 1)~4), since tree reloc tree and data reloc tree doesn't count to
> qgroup, everything is OK.
> 
> But for 5), the swapping of tree blocks will only info qgroup to track
> metadata extents.
> 
> If metadata extents contain file extents, qgroup number for file extents
> will get lost, leading to corrupted qgroup accounting.
> 
> The fix is, before commit transaction of step 5), manually info qgroup to
> track all file extents in data reloc tree.
> Since at commit transaction time, the tree swapping is done, and qgroup
> will account these data extents correctly.
> 
> Cc: Mark Fasheh <mfasheh@suse.de>
> Reported-by: Mark Fasheh <mfasheh@suse.de>
> Reported-by: Filipe Manana <fdmanana@gmail.com>
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/btrfs/relocation.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 103 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index b26a5ae..27480ef 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -31,6 +31,7 @@
>  #include "async-thread.h"
>  #include "free-space-cache.h"
>  #include "inode-map.h"
> +#include "qgroup.h"
>  
>  /*
>   * backref_node, mapping_node and tree_block start with this
> @@ -3916,6 +3917,90 @@ int prepare_to_relocate(struct reloc_control *rc)
>  	return 0;
>  }
>  
> +/*
> + * Qgroup fixer for data chunk relocation.
> + * The data relocation is done in the following steps
> + * 1) Copy data extents into data reloc tree
> + * 2) Create tree reloc tree(special snapshot) for related subvolumes
> + * 3) Modify file extents in tree reloc tree
> + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
> + *
> + * The problem is, data and tree reloc tree are not accounted to qgroup,
> + * and 4) will only info qgroup to track tree blocks change, not file extents
> + * in the tree blocks.
> + *
> + * The good news is, related data extents are all in data reloc tree, so we
> + * only need to info qgroup to track all file extents in data reloc tree
> + * before commit trans.
> + */
> +static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
> +					     struct reloc_control *rc)
> +{
> +	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
> +	struct inode *inode = rc->data_inode;
> +	struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
> +	struct btrfs_path *path;
> +	struct btrfs_key key;
> +	int ret = 0;
> +
> +	if (!fs_info->quota_enabled)
> +		return 0;
> +
> +	/*
> +	 * Only for stage where we update data pointers the qgroup fix is
> +	 * valid.
> +	 * For MOVING_DATA stage, we will miss the timing of swapping tree
> +	 * blocks, and won't fix it.
> +	 */
> +	if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
> +		return 0;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +	key.objectid = btrfs_ino(inode);
> +	key.type = BTRFS_EXTENT_DATA_KEY;
> +	key.offset = 0;
> +
> +	ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
> +	if (ret < 0)
> +		goto out;
> +
> +	lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
> +	while (1) {
> +		struct btrfs_file_extent_item *fi;
> +
> +		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
> +		if (key.objectid > btrfs_ino(inode))
> +			break;
> +		if (key.type != BTRFS_EXTENT_DATA_KEY)
> +			goto next;
> +		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
> +				    struct btrfs_file_extent_item);
> +		if (btrfs_file_extent_type(path->nodes[0], fi) !=
> +				BTRFS_FILE_EXTENT_REG)
> +			goto next;
> +		ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
> +			btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
> +			btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
> +			GFP_NOFS);
> +		if (ret < 0)
> +			break;
> +next:
> +		ret = btrfs_next_item(data_reloc_root, path);
> +		if (ret < 0)
> +			break;
> +		if (ret > 0) {
> +			ret = 0;
> +			break;
> +		}
> +	}
> +	unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
> +out:
> +	btrfs_free_path(path);
> +	return ret;
> +}
> +
>  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  {
>  	struct rb_root blocks = RB_ROOT;
> @@ -4102,10 +4187,16 @@ restart:
>  
>  	/* get rid of pinned extents */
>  	trans = btrfs_join_transaction(rc->extent_root);
> -	if (IS_ERR(trans))
> +	if (IS_ERR(trans)) {
>  		err = PTR_ERR(trans);
> -	else
> -		btrfs_commit_transaction(trans, rc->extent_root);
> +		goto out_free;
> +	}
> +	err = qgroup_fix_relocated_data_extents(trans, rc);
> +	if (err < 0) {
> +		btrfs_abort_transaction(trans, err);
> +		goto out_free;
> +	}
> +	btrfs_commit_transaction(trans, rc->extent_root);
>  out_free:
>  	btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
>  	btrfs_free_path(path);
> @@ -4468,10 +4559,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
>  	unset_reloc_control(rc);
>  
>  	trans = btrfs_join_transaction(rc->extent_root);
> -	if (IS_ERR(trans))
> +	if (IS_ERR(trans)) {
>  		err = PTR_ERR(trans);
> -	else
> -		err = btrfs_commit_transaction(trans, rc->extent_root);
> +		goto out_free;
> +	}
> +	err = qgroup_fix_relocated_data_extents(trans, rc);
> +	if (err < 0) {
> +		btrfs_abort_transaction(trans, err);
> +		goto out_free;
> +	}
> +	err = btrfs_commit_transaction(trans, rc->extent_root);
>  out_free:
>  	kfree(rc);
>  out:

Hi Qu,

This function call to qgroup_fix_relocated_data_extents()
in btrfs_recover_relocation() fails (null pointer reference)
because rc->data_inode is null.

On reading the code it seems we are not moving data around. Do we really
to call qgroup_fix_relocated_data_extents() in btrfs_recover_relocation()?

Without the last hunk, I tried creating a btrfs device by crashing the
system in the middle of a btrfs balance and tried checking for qgroup
inconsistencies, but could not find any. However, I am not sure if my
testcase is complete, though I tried it multiple times.
Qu Wenruo Oct. 6, 2016, 7:06 a.m. UTC | #2
At 10/05/2016 09:53 AM, Goldwyn Rodrigues wrote:
>
>
> On 08/14/2016 09:36 PM, Qu Wenruo wrote:
>> This patch fixes a REGRESSION introduced in 4.2, caused by the big quota
>> rework.
>>
>> When balancing data extents, qgroup will leak all its numbers for
>> relocated data extents.
>>
>> The relocation is done in the following steps for data extents:
>> 1) Create data reloc tree and inode
>> 2) Copy all data extents to data reloc tree
>>    And commit transaction
>> 3) Create tree reloc tree(special snapshot) for any related subvolumes
>> 4) Replace file extent in tree reloc tree with new extents in data reloc
>>    tree
>>    And commit transaction
>> 5) Merge tree reloc tree with original fs, by swapping tree blocks
>>
>> For 1)~4), since tree reloc tree and data reloc tree doesn't count to
>> qgroup, everything is OK.
>>
>> But for 5), the swapping of tree blocks will only info qgroup to track
>> metadata extents.
>>
>> If metadata extents contain file extents, qgroup number for file extents
>> will get lost, leading to corrupted qgroup accounting.
>>
>> The fix is, before commit transaction of step 5), manually info qgroup to
>> track all file extents in data reloc tree.
>> Since at commit transaction time, the tree swapping is done, and qgroup
>> will account these data extents correctly.
>>
>> Cc: Mark Fasheh <mfasheh@suse.de>
>> Reported-by: Mark Fasheh <mfasheh@suse.de>
>> Reported-by: Filipe Manana <fdmanana@gmail.com>
>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
>> Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
>> ---
>>  fs/btrfs/relocation.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 103 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
>> index b26a5ae..27480ef 100644
>> --- a/fs/btrfs/relocation.c
>> +++ b/fs/btrfs/relocation.c
>> @@ -31,6 +31,7 @@
>>  #include "async-thread.h"
>>  #include "free-space-cache.h"
>>  #include "inode-map.h"
>> +#include "qgroup.h"
>>
>>  /*
>>   * backref_node, mapping_node and tree_block start with this
>> @@ -3916,6 +3917,90 @@ int prepare_to_relocate(struct reloc_control *rc)
>>  	return 0;
>>  }
>>
>> +/*
>> + * Qgroup fixer for data chunk relocation.
>> + * The data relocation is done in the following steps
>> + * 1) Copy data extents into data reloc tree
>> + * 2) Create tree reloc tree(special snapshot) for related subvolumes
>> + * 3) Modify file extents in tree reloc tree
>> + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
>> + *
>> + * The problem is, data and tree reloc tree are not accounted to qgroup,
>> + * and 4) will only info qgroup to track tree blocks change, not file extents
>> + * in the tree blocks.
>> + *
>> + * The good news is, related data extents are all in data reloc tree, so we
>> + * only need to info qgroup to track all file extents in data reloc tree
>> + * before commit trans.
>> + */
>> +static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
>> +					     struct reloc_control *rc)
>> +{
>> +	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
>> +	struct inode *inode = rc->data_inode;
>> +	struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
>> +	struct btrfs_path *path;
>> +	struct btrfs_key key;
>> +	int ret = 0;
>> +
>> +	if (!fs_info->quota_enabled)
>> +		return 0;
>> +
>> +	/*
>> +	 * Only for stage where we update data pointers the qgroup fix is
>> +	 * valid.
>> +	 * For MOVING_DATA stage, we will miss the timing of swapping tree
>> +	 * blocks, and won't fix it.
>> +	 */
>> +	if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
>> +		return 0;
>> +
>> +	path = btrfs_alloc_path();
>> +	if (!path)
>> +		return -ENOMEM;
>> +	key.objectid = btrfs_ino(inode);
>> +	key.type = BTRFS_EXTENT_DATA_KEY;
>> +	key.offset = 0;
>> +
>> +	ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
>> +	if (ret < 0)
>> +		goto out;
>> +
>> +	lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
>> +	while (1) {
>> +		struct btrfs_file_extent_item *fi;
>> +
>> +		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
>> +		if (key.objectid > btrfs_ino(inode))
>> +			break;
>> +		if (key.type != BTRFS_EXTENT_DATA_KEY)
>> +			goto next;
>> +		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
>> +				    struct btrfs_file_extent_item);
>> +		if (btrfs_file_extent_type(path->nodes[0], fi) !=
>> +				BTRFS_FILE_EXTENT_REG)
>> +			goto next;
>> +		ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
>> +			btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
>> +			btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
>> +			GFP_NOFS);
>> +		if (ret < 0)
>> +			break;
>> +next:
>> +		ret = btrfs_next_item(data_reloc_root, path);
>> +		if (ret < 0)
>> +			break;
>> +		if (ret > 0) {
>> +			ret = 0;
>> +			break;
>> +		}
>> +	}
>> +	unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
>> +out:
>> +	btrfs_free_path(path);
>> +	return ret;
>> +}
>> +
>>  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>>  {
>>  	struct rb_root blocks = RB_ROOT;
>> @@ -4102,10 +4187,16 @@ restart:
>>
>>  	/* get rid of pinned extents */
>>  	trans = btrfs_join_transaction(rc->extent_root);
>> -	if (IS_ERR(trans))
>> +	if (IS_ERR(trans)) {
>>  		err = PTR_ERR(trans);
>> -	else
>> -		btrfs_commit_transaction(trans, rc->extent_root);
>> +		goto out_free;
>> +	}
>> +	err = qgroup_fix_relocated_data_extents(trans, rc);
>> +	if (err < 0) {
>> +		btrfs_abort_transaction(trans, err);
>> +		goto out_free;
>> +	}
>> +	btrfs_commit_transaction(trans, rc->extent_root);
>>  out_free:
>>  	btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
>>  	btrfs_free_path(path);
>> @@ -4468,10 +4559,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
>>  	unset_reloc_control(rc);
>>
>>  	trans = btrfs_join_transaction(rc->extent_root);
>> -	if (IS_ERR(trans))
>> +	if (IS_ERR(trans)) {
>>  		err = PTR_ERR(trans);
>> -	else
>> -		err = btrfs_commit_transaction(trans, rc->extent_root);
>> +		goto out_free;
>> +	}
>> +	err = qgroup_fix_relocated_data_extents(trans, rc);
>> +	if (err < 0) {
>> +		btrfs_abort_transaction(trans, err);
>> +		goto out_free;
>> +	}
>> +	err = btrfs_commit_transaction(trans, rc->extent_root);
>>  out_free:
>>  	kfree(rc);
>>  out:
>
> Hi Qu,
>
> This function call to qgroup_fix_relocated_data_extents()
> in btrfs_recover_relocation() fails (null pointer reference)
> because rc->data_inode is null.

Right, since rc->data_inode is only initialized in 
btrfs_relocate_block_group().
Here we will access NULL pointer and cause problem.

Would you please provide some images to trigger the NULL pointer error?

I created several images to test the btrfs_recover_relocation() routine 
using the method I mentioned below, but they didn't trigger NULL pointer 
as they didn't meet (rc->stage == UPDATE_DATA_PTRS and 
rc->extents_found) check.

>
> On reading the code it seems we are not moving data around. Do we really
> to call qgroup_fix_relocated_data_extents() in btrfs_recover_relocation()?

Normally, we need to call qgroup_fix_relocated_data_extents() after 
calling merge_reloc_roots().

If not, if merge_reloc_roots() swapped tree blocks containing data 
extents, we will leak data space of that block group again.

Since the problem is not about moving(copying) data, but swapping tree 
blocks, which changed owner of tree blocks and data extents inside it.

In fact I just created such image, which can leads to corrupted qgroup 
since we can't re-dirty extents at btrfs_recover_relocation().

(And found a btrfsck bug which leads to segfault checking the image)

You can download the image and try.
https://drive.google.com/file/d/0BxpkL3ehzX3pV1E3VkpfbmJrWmc/view?usp=sharing


>
> Without the last hunk, I tried creating a btrfs device by crashing the
> system in the middle of a btrfs balance and tried checking for qgroup
> inconsistencies, but could not find any. However, I am not sure if my
> testcase is complete, though I tried it multiple times.

It's a little hard to create such image by just crashing the system 
while balance.
Instead, we need to grab the on-disk data at special time point.

Normally I'd just add 30s sleep and pr_info() before the final 
btrfs_commit_transaction() in relocate_block_group(), just like patch below:

------
@@ -4207,6 +4210,11 @@ restart:
                         err = ret;
                 goto out_free;
         }
+       if (rc->stage == UPDATE_DATA_PTRS && rc->extents_found) {
+               pr_info("sleep 30s\n");
+               msleep(30 * 1000);
+               pr_info("sleep done\n");
+       }
         btrfs_commit_transaction(trans, rc->extent_root);
  out_free:
         btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
------

When the sleep happens, dd the image to some other place. And mount 
it(original disk must be removed).


Another trick is, to bump tree levels to 2, which can expose quite a lot 
of new bugs.
I use the following small script with above kernel hack to dump the image:
------
#!/bin/bash

dev=/dev/vdb5
mnt=/mnt/test/
trace_dir=/sys/kernel/debug/tracing
fsstress=/home/adam/xfstests/ltp/fsstress

create_files () {
	prefix=$1
	size=$2
	nr=$3
	dir=$4

	for i in $(seq $nr); do
		filename=$(printf "%s_%05d" "$prefix" $i)
		xfs_io -f -c "pwrite 0 $size" $dir/$filename > /dev/null
	done
}

umount $mnt &> /dev/null
mkfs.btrfs $dev -f -b 512M -n 4k
mount -o nospace_cache $dev $mnt


create_files inline 2K 1000 $mnt
create_files large 4M 5 $mnt

sync

btrfs quota enable $mnt
btrfs quota rescan -w $mnt
sync
btrfs qgroup show -prce --raw $mnt

btrfs balance start -d $mnt # dump the image during the sleep window
sync
btrfs qgroup show -prce --raw $mnt

------

Unfortunately, I only tested my old patch with tree level 0 images, 
which doesn't exposed the qgroup leak in btrfs_recover_relocation().
(In that case, tree reloc tree are not modified at all, all data extents 
are just pointing to old extents)

I'll create a fix which doesn't use data_inode, but manually searching 
data reloc tree to fix the problem.

Thanks,
Qu
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b26a5ae..27480ef 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@ 
 #include "async-thread.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "qgroup.h"
 
 /*
  * backref_node, mapping_node and tree_block start with this
@@ -3916,6 +3917,90 @@  int prepare_to_relocate(struct reloc_control *rc)
 	return 0;
 }
 
+/*
+ * Qgroup fixer for data chunk relocation.
+ * The data relocation is done in the following steps
+ * 1) Copy data extents into data reloc tree
+ * 2) Create tree reloc tree(special snapshot) for related subvolumes
+ * 3) Modify file extents in tree reloc tree
+ * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
+ *
+ * The problem is, data and tree reloc tree are not accounted to qgroup,
+ * and 4) will only info qgroup to track tree blocks change, not file extents
+ * in the tree blocks.
+ *
+ * The good news is, related data extents are all in data reloc tree, so we
+ * only need to info qgroup to track all file extents in data reloc tree
+ * before commit trans.
+ */
+static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
+					     struct reloc_control *rc)
+{
+	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+	struct inode *inode = rc->data_inode;
+	struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	int ret = 0;
+
+	if (!fs_info->quota_enabled)
+		return 0;
+
+	/*
+	 * Only for stage where we update data pointers the qgroup fix is
+	 * valid.
+	 * For MOVING_DATA stage, we will miss the timing of swapping tree
+	 * blocks, and won't fix it.
+	 */
+	if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+
+	lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
+	while (1) {
+		struct btrfs_file_extent_item *fi;
+
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (key.objectid > btrfs_ino(inode))
+			break;
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			goto next;
+		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(path->nodes[0], fi) !=
+				BTRFS_FILE_EXTENT_REG)
+			goto next;
+		ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
+			btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
+			btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
+			GFP_NOFS);
+		if (ret < 0)
+			break;
+next:
+		ret = btrfs_next_item(data_reloc_root, path);
+		if (ret < 0)
+			break;
+		if (ret > 0) {
+			ret = 0;
+			break;
+		}
+	}
+	unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 {
 	struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4187,16 @@  restart:
 
 	/* get rid of pinned extents */
 	trans = btrfs_join_transaction(rc->extent_root);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
-	else
-		btrfs_commit_transaction(trans, rc->extent_root);
+		goto out_free;
+	}
+	err = qgroup_fix_relocated_data_extents(trans, rc);
+	if (err < 0) {
+		btrfs_abort_transaction(trans, err);
+		goto out_free;
+	}
+	btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
 	btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
 	btrfs_free_path(path);
@@ -4468,10 +4559,16 @@  int btrfs_recover_relocation(struct btrfs_root *root)
 	unset_reloc_control(rc);
 
 	trans = btrfs_join_transaction(rc->extent_root);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
-	else
-		err = btrfs_commit_transaction(trans, rc->extent_root);
+		goto out_free;
+	}
+	err = qgroup_fix_relocated_data_extents(trans, rc);
+	if (err < 0) {
+		btrfs_abort_transaction(trans, err);
+		goto out_free;
+	}
+	err = btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
 	kfree(rc);
 out: