diff mbox

[2/2] Btrfs: qgroup: Introduce a may_use to account space_info->bytes_may_use.

Message ID be147b22171d958c2a346437356448e94691c520.1418124311.git.yangds.fnst@cn.fujitsu.com (mailing list archive)
State Superseded
Headers show

Commit Message

Yang Dongsheng Dec. 9, 2014, 11:27 a.m. UTC
Currently, for pre_alloc or delay_alloc, the bytes will be accounted
in space_info by the three guys.
space_info->bytes_may_use --- space_info->reserved --- space_info->used.
But on the other hand, in qgroup, there are only two counters to account the
bytes, qgroup->reserved and qgroup->excl. And qg->reserved accounts
bytes in space_info->bytes_may_use and qg->excl accounts bytes in
space_info->used. So the bytes in space_info->reserved is not accounted
in qgroup. If so, there is a window we can exceed the quota limit when
bytes is in space_info->reserved.

Example:
	# btrfs quota enable /mnt
	# btrfs qgroup limit -e 10M /mnt
	# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
	# sync
	# btrfs qgroup show -pcre /mnt
qgroupid rfer     excl     max_rfer max_excl parent  child
-------- ----     ----     -------- -------- ------  -----
0/5      20987904 20987904 0        10485760 ---     ---

qg->excl is 20987904 larger than max_excl 10485760.

This patch introduce a new counter named may_use to qgroup, then
there are three counters in qgroup to account bytes in space_info
as below.
space_info->bytes_may_use --- space_info->reserved --- space_info->used.
qgroup->may_use           --- qgroup->reserved     --- qgroup->excl

With this patch applied:
	# btrfs quota enable /mnt
	# btrfs qgroup limit -e 10M /mnt
	# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
fallocate: /mnt/data9: fallocate failed: Disk quota exceeded
fallocate: /mnt/data10: fallocate failed: Disk quota exceeded
fallocate: /mnt/data11: fallocate failed: Disk quota exceeded
fallocate: /mnt/data12: fallocate failed: Disk quota exceeded
fallocate: /mnt/data13: fallocate failed: Disk quota exceeded
fallocate: /mnt/data14: fallocate failed: Disk quota exceeded
fallocate: /mnt/data15: fallocate failed: Disk quota exceeded
fallocate: /mnt/data16: fallocate failed: Disk quota exceeded
fallocate: /mnt/data17: fallocate failed: Disk quota exceeded
fallocate: /mnt/data18: fallocate failed: Disk quota exceeded
fallocate: /mnt/data19: fallocate failed: Disk quota exceeded
	# sync
	# btrfs qgroup show -pcre /mnt
qgroupid rfer    excl    max_rfer max_excl parent  child
-------- ----    ----    -------- -------- ------  -----
0/5      9453568 9453568 0        10485760 ---     ---

Reported-by: Cyril SCETBON <cyril.scetbon@free.fr>
Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
---
 fs/btrfs/extent-tree.c | 25 ++++++++++++++++++-
 fs/btrfs/inode.c       | 22 +++++++++++++++-
 fs/btrfs/qgroup.c      | 68 +++++++++++++++++++++++++++++++++++++++++++++++---
 fs/btrfs/qgroup.h      |  4 +++
 4 files changed, 113 insertions(+), 6 deletions(-)

Comments

Josef Bacik Dec. 9, 2014, 3:55 p.m. UTC | #1
On 12/09/2014 06:27 AM, Dongsheng Yang wrote:
> Currently, for pre_alloc or delay_alloc, the bytes will be accounted
> in space_info by the three guys.
> space_info->bytes_may_use --- space_info->reserved --- space_info->used.
> But on the other hand, in qgroup, there are only two counters to account the
> bytes, qgroup->reserved and qgroup->excl. And qg->reserved accounts
> bytes in space_info->bytes_may_use and qg->excl accounts bytes in
> space_info->used. So the bytes in space_info->reserved is not accounted
> in qgroup. If so, there is a window we can exceed the quota limit when
> bytes is in space_info->reserved.
>
> Example:
> 	# btrfs quota enable /mnt
> 	# btrfs qgroup limit -e 10M /mnt
> 	# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
> 	# sync
> 	# btrfs qgroup show -pcre /mnt
> qgroupid rfer     excl     max_rfer max_excl parent  child
> -------- ----     ----     -------- -------- ------  -----
> 0/5      20987904 20987904 0        10485760 ---     ---
>
> qg->excl is 20987904 larger than max_excl 10485760.
>
> This patch introduce a new counter named may_use to qgroup, then
> there are three counters in qgroup to account bytes in space_info
> as below.
> space_info->bytes_may_use --- space_info->reserved --- space_info->used.
> qgroup->may_use           --- qgroup->reserved     --- qgroup->excl
>
> With this patch applied:
> 	# btrfs quota enable /mnt
> 	# btrfs qgroup limit -e 10M /mnt
> 	# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
> fallocate: /mnt/data9: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data10: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data11: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data12: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data13: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data14: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data15: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data16: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data17: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data18: fallocate failed: Disk quota exceeded
> fallocate: /mnt/data19: fallocate failed: Disk quota exceeded
> 	# sync
> 	# btrfs qgroup show -pcre /mnt
> qgroupid rfer    excl    max_rfer max_excl parent  child
> -------- ----    ----    -------- -------- ------  -----
> 0/5      9453568 9453568 0        10485760 ---     ---
>
> Reported-by: Cyril SCETBON <cyril.scetbon@free.fr>
> Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
> ---
>   fs/btrfs/extent-tree.c | 25 ++++++++++++++++++-
>   fs/btrfs/inode.c       | 22 +++++++++++++++-
>   fs/btrfs/qgroup.c      | 68 +++++++++++++++++++++++++++++++++++++++++++++++---
>   fs/btrfs/qgroup.h      |  4 +++
>   4 files changed, 113 insertions(+), 6 deletions(-)
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 014b7f2..9eaf268 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -5500,8 +5500,13 @@ static int pin_down_extent(struct btrfs_root *root,
>
>   	set_extent_dirty(root->fs_info->pinned_extents, bytenr,
>   			 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
> -	if (reserved)
> +	if (reserved) {
> +		if (root->fs_info->quota_enabled)

You already have this check in btrfs_qgroup_update_reserved_bytes, just 
call it unconditionally everywhere in this patch.  Otherwise this looks 
good, thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Dongsheng Dec. 10, 2014, 8:09 a.m. UTC | #2
On 12/09/2014 11:55 PM, Josef Bacik wrote:
> On 12/09/2014 06:27 AM, Dongsheng Yang wrote:
>> Currently, for pre_alloc or delay_alloc, the bytes will be accounted
>> in space_info by the three guys.
>> space_info->bytes_may_use --- space_info->reserved --- space_info->used.
>> But on the other hand, in qgroup, there are only two counters to 
>> account the
>> bytes, qgroup->reserved and qgroup->excl. And qg->reserved accounts
>> bytes in space_info->bytes_may_use and qg->excl accounts bytes in
>> space_info->used. So the bytes in space_info->reserved is not accounted
>> in qgroup. If so, there is a window we can exceed the quota limit when
>> bytes is in space_info->reserved.
>>
>> Example:
>>     # btrfs quota enable /mnt
>>     # btrfs qgroup limit -e 10M /mnt
>>     # for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
>>     # sync
>>     # btrfs qgroup show -pcre /mnt
>> qgroupid rfer     excl     max_rfer max_excl parent  child
>> -------- ----     ----     -------- -------- ------  -----
>> 0/5      20987904 20987904 0        10485760 ---     ---
>>
>> qg->excl is 20987904 larger than max_excl 10485760.
>>
>> This patch introduce a new counter named may_use to qgroup, then
>> there are three counters in qgroup to account bytes in space_info
>> as below.
>> space_info->bytes_may_use --- space_info->reserved --- space_info->used.
>> qgroup->may_use           --- qgroup->reserved     --- qgroup->excl
>>
>> With this patch applied:
>>     # btrfs quota enable /mnt
>>     # btrfs qgroup limit -e 10M /mnt
>>     # for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
>> fallocate: /mnt/data9: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data10: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data11: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data12: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data13: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data14: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data15: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data16: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data17: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data18: fallocate failed: Disk quota exceeded
>> fallocate: /mnt/data19: fallocate failed: Disk quota exceeded
>>     # sync
>>     # btrfs qgroup show -pcre /mnt
>> qgroupid rfer    excl    max_rfer max_excl parent  child
>> -------- ----    ----    -------- -------- ------  -----
>> 0/5      9453568 9453568 0        10485760 ---     ---
>>
>> Reported-by: Cyril SCETBON <cyril.scetbon@free.fr>
>> Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
>> ---
>>   fs/btrfs/extent-tree.c | 25 ++++++++++++++++++-
>>   fs/btrfs/inode.c       | 22 +++++++++++++++-
>>   fs/btrfs/qgroup.c      | 68 
>> +++++++++++++++++++++++++++++++++++++++++++++++---
>>   fs/btrfs/qgroup.h      |  4 +++
>>   4 files changed, 113 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 014b7f2..9eaf268 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -5500,8 +5500,13 @@ static int pin_down_extent(struct btrfs_root 
>> *root,
>>
>>       set_extent_dirty(root->fs_info->pinned_extents, bytenr,
>>                bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
>> -    if (reserved)
>> +    if (reserved) {
>> +        if (root->fs_info->quota_enabled)
>
> You already have this check in btrfs_qgroup_update_reserved_bytes, 
> just call it unconditionally everywhere in this patch.  Otherwise this 
> looks good, thanks,

Thanx, I will update it in V2.
>
> Josef
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> .
>

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 014b7f2..9eaf268 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5500,8 +5500,13 @@  static int pin_down_extent(struct btrfs_root *root,
 
 	set_extent_dirty(root->fs_info->pinned_extents, bytenr,
 			 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
-	if (reserved)
+	if (reserved) {
+		if (root->fs_info->quota_enabled)
+			btrfs_qgroup_update_reserved_bytes(root->fs_info,
+							   root->root_key.objectid,
+							   num_bytes, -1);
 		trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
+	}
 	return 0;
 }
 
@@ -6230,6 +6235,10 @@  void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 		btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
 		trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
 		pin = 0;
+		if (root->fs_info->quota_enabled)
+			btrfs_qgroup_update_reserved_bytes(root->fs_info,
+							   root->root_key.objectid,
+							   buf->len, -1);
 	}
 out:
 	if (pin)
@@ -6964,7 +6973,12 @@  static int __btrfs_free_reserved_extent(struct btrfs_root *root,
 	else {
 		btrfs_add_free_space(cache, start, len);
 		btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+		if (root->fs_info->quota_enabled)
+			btrfs_qgroup_update_reserved_bytes(root->fs_info,
+							   root->root_key.objectid,
+							   len, -1);
 	}
+
 	btrfs_put_block_group(cache);
 
 	trace_btrfs_reserved_extent_free(root, start, len);
@@ -7200,6 +7214,10 @@  int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 	BUG_ON(ret); /* logic error */
 	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
 					 0, owner, offset, ins, 1);
+	if (root->fs_info->quota_enabled)
+		btrfs_qgroup_update_reserved_bytes(root->fs_info,
+						   root->root_key.objectid,
+						   ins->offset, 1);
 	btrfs_put_block_group(block_group);
 	return ret;
 }
@@ -7346,6 +7364,11 @@  struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 		return ERR_PTR(ret);
 	}
 
+	if (root->fs_info->quota_enabled)
+		btrfs_qgroup_update_reserved_bytes(root->fs_info,
+						   root_objectid,
+						   ins.offset, 1);
+
 	buf = btrfs_init_new_buffer(trans, root, ins.objectid,
 				    blocksize, level);
 	BUG_ON(IS_ERR(buf)); /* -ENOMEM */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d23362f..4faf794 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -59,6 +59,7 @@ 
 #include "backref.h"
 #include "hash.h"
 #include "props.h"
+#include "qgroup.h"
 
 struct btrfs_iget_args {
 	struct btrfs_key *location;
@@ -739,7 +740,10 @@  retry:
 			}
 			goto out_free;
 		}
-
+		if (root->fs_info->quota_enabled)
+			btrfs_qgroup_update_reserved_bytes(root->fs_info,
+							   root->root_key.objectid,
+							   ins.offset, 1);
 		/*
 		 * here we're doing allocation and writeback of the
 		 * compressed pages
@@ -951,6 +955,11 @@  static noinline int cow_file_range(struct inode *inode,
 		if (ret < 0)
 			goto out_unlock;
 
+		if (root->fs_info->quota_enabled)
+			btrfs_qgroup_update_reserved_bytes(root->fs_info,
+							   root->root_key.objectid,
+							   ins.offset, 1);
+
 		em = alloc_extent_map();
 		if (!em) {
 			ret = -ENOMEM;
@@ -6745,6 +6754,11 @@  static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 		return ERR_PTR(ret);
 	}
 
+	if (root->fs_info->quota_enabled)
+		btrfs_qgroup_update_reserved_bytes(root->fs_info,
+						   root->root_key.objectid,
+						   ins.offset, 1);
+
 	return em;
 }
 
@@ -9266,6 +9280,12 @@  static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 				btrfs_end_transaction(trans, root);
 			break;
 		}
+
+		if (root->fs_info->quota_enabled)
+			btrfs_qgroup_update_reserved_bytes(root->fs_info,
+							   root->root_key.objectid,
+							   ins.offset, 1);
+
 		btrfs_drop_extent_cache(inode, cur_offset,
 					cur_offset + ins.offset -1, 0);
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 48b60db..d147082 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -72,6 +72,7 @@  struct btrfs_qgroup {
 	/*
 	 * reservation tracking
 	 */
+	u64 may_use;
 	u64 reserved;
 
 	/*
@@ -1414,6 +1415,8 @@  static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 	WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
 	qgroup->excl += sign * oper->num_bytes;
 	qgroup->excl_cmpr += sign * oper->num_bytes;
+	if (sign > 0)
+		qgroup->reserved -= oper->num_bytes;
 
 	qgroup_dirty(fs_info, qgroup);
 
@@ -1434,6 +1437,8 @@  static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 		qgroup->excl += sign * oper->num_bytes;
 		if (sign < 0)
 			WARN_ON(qgroup->excl < oper->num_bytes);
+		if (sign > 0)
+			qgroup->reserved -= oper->num_bytes;
 		qgroup->excl_cmpr += sign * oper->num_bytes;
 		qgroup_dirty(fs_info, qgroup);
 
@@ -2359,6 +2364,61 @@  out:
 	return ret;
 }
 
+int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info,
+					    u64 ref_root,
+					    u64 num_bytes,
+					    int sign)
+{
+	struct btrfs_root *quota_root;
+	struct btrfs_qgroup *qgroup;
+	int ret = 0;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+
+	if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+		return 0;
+
+	if (num_bytes == 0)
+		return 0;
+
+	spin_lock(&fs_info->qgroup_lock);
+	quota_root = fs_info->quota_root;
+	if (!quota_root)
+		goto out;
+
+	qgroup = find_qgroup_rb(fs_info, ref_root);
+	if (!qgroup)
+		goto out;
+
+	ulist_reinit(fs_info->qgroup_ulist);
+	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
+			(uintptr_t)qgroup, GFP_ATOMIC);
+	if (ret < 0)
+		goto out;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
+		struct btrfs_qgroup *qg;
+		struct btrfs_qgroup_list *glist;
+
+		qg = u64_to_ptr(unode->aux);
+
+		qg->reserved += sign * num_bytes;
+
+		list_for_each_entry(glist, &qg->groups, next_group) {
+			ret = ulist_add(fs_info->qgroup_ulist,
+					glist->group->qgroupid,
+					(uintptr_t)glist->group, GFP_ATOMIC);
+			if (ret < 0)
+				goto out;
+		}
+	}
+
+out:
+	spin_unlock(&fs_info->qgroup_lock);
+	return ret;
+}
+
 /*
  * reserve some space for a qgroup and all its parents. The reservation takes
  * place with start_transaction or dealloc_reserve, similar to ENOSPC
@@ -2407,14 +2467,14 @@  int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 		qg = u64_to_ptr(unode->aux);
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
-		    qg->reserved + (s64)qg->rfer + num_bytes >
+		    qg->reserved + qg->may_use + (s64)qg->rfer + num_bytes >
 		    qg->max_rfer) {
 			ret = -EDQUOT;
 			goto out;
 		}
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
-		    qg->reserved + (s64)qg->excl + num_bytes >
+		    qg->reserved + qg->may_use + (s64)qg->excl + num_bytes >
 		    qg->max_excl) {
 			ret = -EDQUOT;
 			goto out;
@@ -2438,7 +2498,7 @@  int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 
 		qg = u64_to_ptr(unode->aux);
 
-		qg->reserved += num_bytes;
+		qg->may_use += num_bytes;
 	}
 
 out:
@@ -2484,7 +2544,7 @@  void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
 
 		qg = u64_to_ptr(unode->aux);
 
-		qg->reserved -= num_bytes;
+		qg->may_use -= num_bytes;
 
 		list_for_each_entry(glist, &qg->groups, next_group) {
 			ret = ulist_add(fs_info->qgroup_ulist,
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 18cc68c..31de88e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -95,6 +95,10 @@  int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
 			 struct btrfs_qgroup_inherit *inherit);
+int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info,
+				       u64 ref_root,
+				       u64 num_bytes,
+				       int sign);
 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);