[v2,06/17] btrfs-progs: lowmem check: introduce force_cow_in_new_chunk()
diff mbox

Message ID 20171220045731.19343-7-suy.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

Su Yue Dec. 20, 2017, 4:57 a.m. UTC
Introduce create_chunk_and_block_block_group() to allocate new chunk
and corresponding block group.

The new function force_cow_in_new_chunk() first allocates new chunk
and records its start.
Then it modifies all metadata block groups cached and full.
Finally it marks the new block group uncached and unfree.
In the next CoW, extents states will be updated automatically by
cache_block_group().

Suggested-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
---
 cmds-check.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

Comments

Qu Wenruo Dec. 20, 2017, 5:41 a.m. UTC | #1
On 2017年12月20日 12:57, Su Yue wrote:
> Introduce create_chunk_and_block_block_group() to allocate new chunk
> and corresponding block group.
> 
> The new function force_cow_in_new_chunk() first allocates new chunk
> and records its start.
> Then it modifies all metadata block groups cached and full.
> Finally it marks the new block group uncached and unfree.
> In the next CoW, extents states will be updated automatically by
> cache_block_group().
> 
> Suggested-by: Qu Wenruo <wqu@suse.com>
> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
> ---
>  cmds-check.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 80 insertions(+)
> 
> diff --git a/cmds-check.c b/cmds-check.c
> index d98d4bda7357..311c8a9f45e8 100644
> --- a/cmds-check.c
> +++ b/cmds-check.c
> @@ -10911,6 +10911,86 @@ out:
>  	return ret;
>  }
>  
> +static int create_chunk_and_block_group(struct btrfs_fs_info *fs_info,
> +					u64 flags, u64 *start, u64 *nbytes)
> +{
> +	struct btrfs_trans_handle *trans;
> +	struct btrfs_root *root = fs_info->extent_root;
> +	int ret;
> +
> +	if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
> +		return -EINVAL;
> +
> +	trans = btrfs_start_transaction(root, 1);
> +	if (IS_ERR(trans)) {
> +		ret = PTR_ERR(trans);
> +		error("error starting transaction %s", strerror(-ret));
> +		return ret;
> +	}
> +	ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
> +	if (ret) {
> +		error("fail to allocate new chunk %s", strerror(-ret));
> +		goto out;
> +	}
> +	ret = btrfs_make_block_group(trans, fs_info, 0, flags,
> +			     BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
> +	if (ret) {
> +		error("fail to make block group for chunk %llu %llu %s",
> +		      *start, *nbytes, strerror(-ret));
> +		goto out;
> +	}
> +out:
> +	btrfs_commit_transaction(trans, root);
> +	return ret;
> +}
> +
> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_block_group_cache *bg;
> +	u64 start;
> +	u64 nbytes;
> +	u64 alloc_profile;
> +	u64 flags;
> +	int ret;
> +
> +	alloc_profile = (fs_info->avail_metadata_alloc_bits &
> +			 fs_info->metadata_alloc_profile);
> +	flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
> +	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
> +		flags |= BTRFS_BLOCK_GROUP_DATA;
> +
> +	ret = create_chunk_and_block_group(fs_info, flags, &start, &nbytes);

Why bother allocating a new chunk by yourself?

Just mark all block groups full and that's all.

Any later tree block allocation like btrfs_search_slot() with @cow = 1
will trigger chunk allocation automatically.

Thanks,
Qu

> +	if (!ret)
> +		printf("Create new chunk %llu %llu\n", start, nbytes);
> +	else
> +		goto err;
> +
> +	flags = BTRFS_BLOCK_GROUP_METADATA;
> +	/* Mark all metadata block groups cached and full in free space*/
> +	ret = modify_block_groups_cache(fs_info, flags, 1);
> +	if (ret)
> +		goto clear_bg_cache;
> +
> +	bg = btrfs_lookup_block_group(fs_info, start);
> +	if (!bg) {
> +		ret = -ENOENT;
> +		error("fail to look up block group %llu %llu", start, nbytes);
> +		goto clear_bg_cache;
> +	}
> +
> +	/* Clear block group cache just allocated */
> +	ret = modify_block_group_cache(fs_info, bg, 0);
> +	if (ret)
> +		goto clear_bg_cache;
> +
> +	return 0;
> +
> +clear_bg_cache:
> +	modify_block_groups_cache(fs_info, flags, 0);
> +err:
> +	return ret;
> +}
> +
>  static int check_extent_refs(struct btrfs_root *root,
>  			     struct cache_tree *extent_cache)
>  {
>
Su Yue Dec. 20, 2017, 8:21 a.m. UTC | #2
On 12/20/2017 01:41 PM, Qu Wenruo wrote:
> 
> 
> On 2017年12月20日 12:57, Su Yue wrote:
>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>> and corresponding block group.
>>
>> The new function force_cow_in_new_chunk() first allocates new chunk
>> and records its start.
>> Then it modifies all metadata block groups cached and full.
>> Finally it marks the new block group uncached and unfree.
>> In the next CoW, extents states will be updated automatically by
>> cache_block_group().
>>
>> Suggested-by: Qu Wenruo <wqu@suse.com>
>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>> ---
>>   cmds-check.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 80 insertions(+)
>>
>> diff --git a/cmds-check.c b/cmds-check.c
>> index d98d4bda7357..311c8a9f45e8 100644
>> --- a/cmds-check.c
>> +++ b/cmds-check.c
>> @@ -10911,6 +10911,86 @@ out:
>>   	return ret;
>>   }
>>   
>> +static int create_chunk_and_block_group(struct btrfs_fs_info *fs_info,
>> +					u64 flags, u64 *start, u64 *nbytes)
>> +{
>> +	struct btrfs_trans_handle *trans;
>> +	struct btrfs_root *root = fs_info->extent_root;
>> +	int ret;
>> +
>> +	if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>> +		return -EINVAL;
>> +
>> +	trans = btrfs_start_transaction(root, 1);
>> +	if (IS_ERR(trans)) {
>> +		ret = PTR_ERR(trans);
>> +		error("error starting transaction %s", strerror(-ret));
>> +		return ret;
>> +	}
>> +	ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>> +	if (ret) {
>> +		error("fail to allocate new chunk %s", strerror(-ret));
>> +		goto out;
>> +	}
>> +	ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>> +			     BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>> +	if (ret) {
>> +		error("fail to make block group for chunk %llu %llu %s",
>> +		      *start, *nbytes, strerror(-ret));
>> +		goto out;
>> +	}
>> +out:
>> +	btrfs_commit_transaction(trans, root);
>> +	return ret;
>> +}
>> +
>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>> +{
>> +	struct btrfs_block_group_cache *bg;
>> +	u64 start;
>> +	u64 nbytes;
>> +	u64 alloc_profile;
>> +	u64 flags;
>> +	int ret;
>> +
>> +	alloc_profile = (fs_info->avail_metadata_alloc_bits &
>> +			 fs_info->metadata_alloc_profile);
>> +	flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>> +	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>> +		flags |= BTRFS_BLOCK_GROUP_DATA;
>> +
>> +	ret = create_chunk_and_block_group(fs_info, flags, &start, &nbytes);
> 
> Why bother allocating a new chunk by yourself?
> 
> Just mark all block groups full and that's all.
> 
> Any later tree block allocation like btrfs_search_slot() with @cow = 1
> will trigger chunk allocation automatically.
> 

Tried to let it happen but BUG_ON with -ENOSPC.
Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called 
while doing CoW?
In progs, allocation of new chunk during CoW depends @root->ref_cows.
However, @root->ref_cows should be set only if @root is root of a fs
trees.

Thanks,
Su

> Thanks,
> Qu
> 
>> +	if (!ret)
>> +		printf("Create new chunk %llu %llu\n", start, nbytes);
>> +	else
>> +		goto err;
>> +
>> +	flags = BTRFS_BLOCK_GROUP_METADATA;
>> +	/* Mark all metadata block groups cached and full in free space*/
>> +	ret = modify_block_groups_cache(fs_info, flags, 1);
>> +	if (ret)
>> +		goto clear_bg_cache;
>> +
>> +	bg = btrfs_lookup_block_group(fs_info, start);
>> +	if (!bg) {
>> +		ret = -ENOENT;
>> +		error("fail to look up block group %llu %llu", start, nbytes);
>> +		goto clear_bg_cache;
>> +	}
>> +
>> +	/* Clear block group cache just allocated */
>> +	ret = modify_block_group_cache(fs_info, bg, 0);
>> +	if (ret)
>> +		goto clear_bg_cache;
>> +
>> +	return 0;
>> +
>> +clear_bg_cache:
>> +	modify_block_groups_cache(fs_info, flags, 0);
>> +err:
>> +	return ret;
>> +}
>> +
>>   static int check_extent_refs(struct btrfs_root *root,
>>   			     struct cache_tree *extent_cache)
>>   {
>>
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Dec. 20, 2017, 8:37 a.m. UTC | #3
On 2017年12月20日 16:21, Su Yue wrote:
> 
> 
> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>
>>
>> On 2017年12月20日 12:57, Su Yue wrote:
>>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>>> and corresponding block group.
>>>
>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>> and records its start.
>>> Then it modifies all metadata block groups cached and full.
>>> Finally it marks the new block group uncached and unfree.
>>> In the next CoW, extents states will be updated automatically by
>>> cache_block_group().
>>>
>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>> ---
>>>   cmds-check.c | 80
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 80 insertions(+)
>>>
>>> diff --git a/cmds-check.c b/cmds-check.c
>>> index d98d4bda7357..311c8a9f45e8 100644
>>> --- a/cmds-check.c
>>> +++ b/cmds-check.c
>>> @@ -10911,6 +10911,86 @@ out:
>>>       return ret;
>>>   }
>>>   +static int create_chunk_and_block_group(struct btrfs_fs_info
>>> *fs_info,
>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>> +{
>>> +    struct btrfs_trans_handle *trans;
>>> +    struct btrfs_root *root = fs_info->extent_root;
>>> +    int ret;
>>> +
>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>> +        return -EINVAL;
>>> +
>>> +    trans = btrfs_start_transaction(root, 1);
>>> +    if (IS_ERR(trans)) {
>>> +        ret = PTR_ERR(trans);
>>> +        error("error starting transaction %s", strerror(-ret));
>>> +        return ret;
>>> +    }
>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>> +    if (ret) {
>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>> +        goto out;
>>> +    }
>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>>> +    if (ret) {
>>> +        error("fail to make block group for chunk %llu %llu %s",
>>> +              *start, *nbytes, strerror(-ret));
>>> +        goto out;
>>> +    }
>>> +out:
>>> +    btrfs_commit_transaction(trans, root);
>>> +    return ret;
>>> +}
>>> +
>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>> +{
>>> +    struct btrfs_block_group_cache *bg;
>>> +    u64 start;
>>> +    u64 nbytes;
>>> +    u64 alloc_profile;
>>> +    u64 flags;
>>> +    int ret;
>>> +
>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>> +             fs_info->metadata_alloc_profile);
>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>> +
>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>> &nbytes);
>>
>> Why bother allocating a new chunk by yourself?
>>
>> Just mark all block groups full and that's all.
>>
>> Any later tree block allocation like btrfs_search_slot() with @cow = 1
>> will trigger chunk allocation automatically.
>>
> 
> Tried to let it happen but BUG_ON with -ENOSPC.

Then fix it.

It's not a normal behavior in this case.

Thanks,
Qu

> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called
> while doing CoW?> In progs, allocation of new chunk during CoW depends @root->ref_cows.
> However, @root->ref_cows should be set only if @root is root of a fs
> trees.
> 
> Thanks,
> Su
> 
>> Thanks,
>> Qu
>>
>>> +    if (!ret)
>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>> +    else
>>> +        goto err;
>>> +
>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>> +    /* Mark all metadata block groups cached and full in free space*/
>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>> +    if (ret)
>>> +        goto clear_bg_cache;
>>> +
>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>> +    if (!bg) {
>>> +        ret = -ENOENT;
>>> +        error("fail to look up block group %llu %llu", start, nbytes);
>>> +        goto clear_bg_cache;
>>> +    }
>>> +
>>> +    /* Clear block group cache just allocated */
>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>> +    if (ret)
>>> +        goto clear_bg_cache;
>>> +
>>> +    return 0;
>>> +
>>> +clear_bg_cache:
>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>> +err:
>>> +    return ret;
>>> +}
>>> +
>>>   static int check_extent_refs(struct btrfs_root *root,
>>>                    struct cache_tree *extent_cache)
>>>   {
>>>
>>
> 
> 
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Dec. 21, 2017, 6:51 a.m. UTC | #4
On 2017年12月20日 16:37, Qu Wenruo wrote:
> 
> 
> On 2017年12月20日 16:21, Su Yue wrote:
>>
>>
>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>
>>>
>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>>>> and corresponding block group.
>>>>
>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>> and records its start.
>>>> Then it modifies all metadata block groups cached and full.
>>>> Finally it marks the new block group uncached and unfree.
>>>> In the next CoW, extents states will be updated automatically by
>>>> cache_block_group().
>>>>
>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>> ---
>>>>   cmds-check.c | 80
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 80 insertions(+)
>>>>
>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>> --- a/cmds-check.c
>>>> +++ b/cmds-check.c
>>>> @@ -10911,6 +10911,86 @@ out:
>>>>       return ret;
>>>>   }
>>>>   +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>> *fs_info,
>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>> +{
>>>> +    struct btrfs_trans_handle *trans;
>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>> +    int ret;
>>>> +
>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    trans = btrfs_start_transaction(root, 1);
>>>> +    if (IS_ERR(trans)) {
>>>> +        ret = PTR_ERR(trans);
>>>> +        error("error starting transaction %s", strerror(-ret));
>>>> +        return ret;
>>>> +    }
>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>> +    if (ret) {
>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>> +        goto out;
>>>> +    }
>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>>>> +    if (ret) {
>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>> +              *start, *nbytes, strerror(-ret));
>>>> +        goto out;
>>>> +    }
>>>> +out:
>>>> +    btrfs_commit_transaction(trans, root);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>> +{
>>>> +    struct btrfs_block_group_cache *bg;
>>>> +    u64 start;
>>>> +    u64 nbytes;
>>>> +    u64 alloc_profile;
>>>> +    u64 flags;
>>>> +    int ret;
>>>> +
>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>> +             fs_info->metadata_alloc_profile);
>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>> +
>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>> &nbytes);
>>>
>>> Why bother allocating a new chunk by yourself?
>>>
>>> Just mark all block groups full and that's all.
>>>
>>> Any later tree block allocation like btrfs_search_slot() with @cow = 1
>>> will trigger chunk allocation automatically.
>>>
>>
>> Tried to let it happen but BUG_ON with -ENOSPC.
> 
> Then fix it.
> 
> It's not a normal behavior in this case.
> 
> Thanks,
> Qu

And I think the fix to allow btrfs_reserve_extent() to allocate new
chunk will solve a lot of strange BUG_ON().

it just occurred to me that, a lot of use cases relies on the assumption
that btrfs_reserve_extent() will try to allocate new chunks.

Especially for case like convert, certain btrfs check --repair, some
rescue tools.

So this would be a quite nice start point for such fix.

Thanks,
Qu

> 
>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called
>> while doing CoW?> In progs, allocation of new chunk during CoW depends @root->ref_cows.
>> However, @root->ref_cows should be set only if @root is root of a fs
>> trees.
>>
>> Thanks,
>> Su
>>
>>> Thanks,
>>> Qu
>>>
>>>> +    if (!ret)
>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>> +    else
>>>> +        goto err;
>>>> +
>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>> +    /* Mark all metadata block groups cached and full in free space*/
>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>> +    if (ret)
>>>> +        goto clear_bg_cache;
>>>> +
>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>> +    if (!bg) {
>>>> +        ret = -ENOENT;
>>>> +        error("fail to look up block group %llu %llu", start, nbytes);
>>>> +        goto clear_bg_cache;
>>>> +    }
>>>> +
>>>> +    /* Clear block group cache just allocated */
>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>> +    if (ret)
>>>> +        goto clear_bg_cache;
>>>> +
>>>> +    return 0;
>>>> +
>>>> +clear_bg_cache:
>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>> +err:
>>>> +    return ret;
>>>> +}
>>>> +
>>>>   static int check_extent_refs(struct btrfs_root *root,
>>>>                    struct cache_tree *extent_cache)
>>>>   {
>>>>
>>>
>>
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Su Yue Dec. 21, 2017, 7:06 a.m. UTC | #5
On 12/21/2017 02:51 PM, Qu Wenruo wrote:
> 
> 
> On 2017年12月20日 16:37, Qu Wenruo wrote:
>>
>>
>> On 2017年12月20日 16:21, Su Yue wrote:
>>>
>>>
>>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>>
>>>>
>>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>>>>> and corresponding block group.
>>>>>
>>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>>> and records its start.
>>>>> Then it modifies all metadata block groups cached and full.
>>>>> Finally it marks the new block group uncached and unfree.
>>>>> In the next CoW, extents states will be updated automatically by
>>>>> cache_block_group().
>>>>>
>>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>>> ---
>>>>>    cmds-check.c | 80
>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>    1 file changed, 80 insertions(+)
>>>>>
>>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>>> --- a/cmds-check.c
>>>>> +++ b/cmds-check.c
>>>>> @@ -10911,6 +10911,86 @@ out:
>>>>>        return ret;
>>>>>    }
>>>>>    +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>>> *fs_info,
>>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>>> +{
>>>>> +    struct btrfs_trans_handle *trans;
>>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>>> +    int ret;
>>>>> +
>>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    trans = btrfs_start_transaction(root, 1);
>>>>> +    if (IS_ERR(trans)) {
>>>>> +        ret = PTR_ERR(trans);
>>>>> +        error("error starting transaction %s", strerror(-ret));
>>>>> +        return ret;
>>>>> +    }
>>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>>> +    if (ret) {
>>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>>> +        goto out;
>>>>> +    }
>>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>>>>> +    if (ret) {
>>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>>> +              *start, *nbytes, strerror(-ret));
>>>>> +        goto out;
>>>>> +    }
>>>>> +out:
>>>>> +    btrfs_commit_transaction(trans, root);
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>>> +{
>>>>> +    struct btrfs_block_group_cache *bg;
>>>>> +    u64 start;
>>>>> +    u64 nbytes;
>>>>> +    u64 alloc_profile;
>>>>> +    u64 flags;
>>>>> +    int ret;
>>>>> +
>>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>>> +             fs_info->metadata_alloc_profile);
>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>>> +
>>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>>> &nbytes);
>>>>
>>>> Why bother allocating a new chunk by yourself?
>>>>
>>>> Just mark all block groups full and that's all.
>>>>
>>>> Any later tree block allocation like btrfs_search_slot() with @cow = 1
>>>> will trigger chunk allocation automatically.
>>>>
>>>
>>> Tried to let it happen but BUG_ON with -ENOSPC.
>>
>> Then fix it.
>>
>> It's not a normal behavior in this case.
>>
>> Thanks,
>> Qu
> 
> And I think the fix to allow btrfs_reserve_extent() to allocate new
> chunk will solve a lot of strange BUG_ON().
> 
> it just occurred to me that, a lot of use cases relies on the assumption
> that btrfs_reserve_extent() will try to allocate new chunks.
> Yes, it has many dependency so I considered to do chunk allocation
manually in the patchset. If fix is not good enough, many funtions
of btrfs-progs will behave abnormal.
Since you ask it, I will go to fix it.

Thanks,
Su
> Especially for case like convert, certain btrfs check --repair, some
> rescue tools.
> 
> So this would be a quite nice start point for such fix.
> 
> Thanks,
> Qu
> 
>>
>>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called
>>> while doing CoW?> In progs, allocation of new chunk during CoW depends @root->ref_cows.
>>> However, @root->ref_cows should be set only if @root is root of a fs
>>> trees.
>>>
>>> Thanks,
>>> Su
>>>
>>>> Thanks,
>>>> Qu
>>>>
>>>>> +    if (!ret)
>>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>>> +    else
>>>>> +        goto err;
>>>>> +
>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>>> +    /* Mark all metadata block groups cached and full in free space*/
>>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>>> +    if (ret)
>>>>> +        goto clear_bg_cache;
>>>>> +
>>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>>> +    if (!bg) {
>>>>> +        ret = -ENOENT;
>>>>> +        error("fail to look up block group %llu %llu", start, nbytes);
>>>>> +        goto clear_bg_cache;
>>>>> +    }
>>>>> +
>>>>> +    /* Clear block group cache just allocated */
>>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>>> +    if (ret)
>>>>> +        goto clear_bg_cache;
>>>>> +
>>>>> +    return 0;
>>>>> +
>>>>> +clear_bg_cache:
>>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>>> +err:
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>    static int check_extent_refs(struct btrfs_root *root,
>>>>>                     struct cache_tree *extent_cache)
>>>>>    {
>>>>>
>>>>
>>>
>>>
>>> -- 
>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Su Yue Dec. 21, 2017, 7:09 a.m. UTC | #6
On 12/21/2017 02:51 PM, Qu Wenruo wrote:
> 
> 
> On 2017年12月20日 16:37, Qu Wenruo wrote:
>>
>>
>> On 2017年12月20日 16:21, Su Yue wrote:
>>>
>>>
>>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>>
>>>>
>>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>>>>> and corresponding block group.
>>>>>
>>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>>> and records its start.
>>>>> Then it modifies all metadata block groups cached and full.
>>>>> Finally it marks the new block group uncached and unfree.
>>>>> In the next CoW, extents states will be updated automatically by
>>>>> cache_block_group().
>>>>>
>>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>>> ---
>>>>>    cmds-check.c | 80
>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>    1 file changed, 80 insertions(+)
>>>>>
>>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>>> --- a/cmds-check.c
>>>>> +++ b/cmds-check.c
>>>>> @@ -10911,6 +10911,86 @@ out:
>>>>>        return ret;
>>>>>    }
>>>>>    +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>>> *fs_info,
>>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>>> +{
>>>>> +    struct btrfs_trans_handle *trans;
>>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>>> +    int ret;
>>>>> +
>>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    trans = btrfs_start_transaction(root, 1);
>>>>> +    if (IS_ERR(trans)) {
>>>>> +        ret = PTR_ERR(trans);
>>>>> +        error("error starting transaction %s", strerror(-ret));
>>>>> +        return ret;
>>>>> +    }
>>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>>> +    if (ret) {
>>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>>> +        goto out;
>>>>> +    }
>>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>>>>> +    if (ret) {
>>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>>> +              *start, *nbytes, strerror(-ret));
>>>>> +        goto out;
>>>>> +    }
>>>>> +out:
>>>>> +    btrfs_commit_transaction(trans, root);
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>>> +{
>>>>> +    struct btrfs_block_group_cache *bg;
>>>>> +    u64 start;
>>>>> +    u64 nbytes;
>>>>> +    u64 alloc_profile;
>>>>> +    u64 flags;
>>>>> +    int ret;
>>>>> +
>>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>>> +             fs_info->metadata_alloc_profile);
>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>>> +
>>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>>> &nbytes);
>>>>
>>>> Why bother allocating a new chunk by yourself?
>>>>
>>>> Just mark all block groups full and that's all.
>>>>
>>>> Any later tree block allocation like btrfs_search_slot() with @cow = 1
>>>> will trigger chunk allocation automatically.
>>>>
>>>
>>> Tried to let it happen but BUG_ON with -ENOSPC.
>>
>> Then fix it.
>>
>> It's not a normal behavior in this case.
>>
>> Thanks,
>> Qu
> 
> And I think the fix to allow btrfs_reserve_extent() to allocate new
> chunk will solve a lot of strange BUG_ON().
> 
> it just occurred to me that, a lot of use cases relies on the assumption
> that btrfs_reserve_extent() will try to allocate new chunks.
> 
> Especially for case like convert, certain btrfs check --repair, some
> rescue tools.
> 
Sorry for the previous wrong format mail.

Yes, it has many dependency so I considered to do chunk allocation
manually in the patchset. If fix is not good enough, many funtions
of btrfs-progs will behave abnormal.
Since you ask it, I will go to fix it.

Thanks,
Su

> So this would be a quite nice start point for such fix.
> 
> Thanks,
> Qu
> 
>>
>>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called
>>> while doing CoW?> In progs, allocation of new chunk during CoW depends @root->ref_cows.
>>> However, @root->ref_cows should be set only if @root is root of a fs
>>> trees.
>>>
>>> Thanks,
>>> Su
>>>
>>>> Thanks,
>>>> Qu
>>>>
>>>>> +    if (!ret)
>>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>>> +    else
>>>>> +        goto err;
>>>>> +
>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>>> +    /* Mark all metadata block groups cached and full in free space*/
>>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>>> +    if (ret)
>>>>> +        goto clear_bg_cache;
>>>>> +
>>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>>> +    if (!bg) {
>>>>> +        ret = -ENOENT;
>>>>> +        error("fail to look up block group %llu %llu", start, nbytes);
>>>>> +        goto clear_bg_cache;
>>>>> +    }
>>>>> +
>>>>> +    /* Clear block group cache just allocated */
>>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>>> +    if (ret)
>>>>> +        goto clear_bg_cache;
>>>>> +
>>>>> +    return 0;
>>>>> +
>>>>> +clear_bg_cache:
>>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>>> +err:
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>    static int check_extent_refs(struct btrfs_root *root,
>>>>>                     struct cache_tree *extent_cache)
>>>>>    {
>>>>>
>>>>
>>>
>>>
>>> -- 
>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Dec. 21, 2017, 7:12 a.m. UTC | #7
On 2017年12月21日 15:09, Su Yue wrote:
> 
> 
> On 12/21/2017 02:51 PM, Qu Wenruo wrote:
>>
>>
>> On 2017年12月20日 16:37, Qu Wenruo wrote:
>>>
>>>
>>> On 2017年12月20日 16:21, Su Yue wrote:
>>>>
>>>>
>>>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>>>
>>>>>
>>>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>>>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>>>>>> and corresponding block group.
>>>>>>
>>>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>>>> and records its start.
>>>>>> Then it modifies all metadata block groups cached and full.
>>>>>> Finally it marks the new block group uncached and unfree.
>>>>>> In the next CoW, extents states will be updated automatically by
>>>>>> cache_block_group().
>>>>>>
>>>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>>>> ---
>>>>>>    cmds-check.c | 80
>>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>    1 file changed, 80 insertions(+)
>>>>>>
>>>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>>>> --- a/cmds-check.c
>>>>>> +++ b/cmds-check.c
>>>>>> @@ -10911,6 +10911,86 @@ out:
>>>>>>        return ret;
>>>>>>    }
>>>>>>    +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>>>> *fs_info,
>>>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>>>> +{
>>>>>> +    struct btrfs_trans_handle *trans;
>>>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    trans = btrfs_start_transaction(root, 1);
>>>>>> +    if (IS_ERR(trans)) {
>>>>>> +        ret = PTR_ERR(trans);
>>>>>> +        error("error starting transaction %s", strerror(-ret));
>>>>>> +        return ret;
>>>>>> +    }
>>>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>>>> +    if (ret) {
>>>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>>>>>> +    if (ret) {
>>>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>>>> +              *start, *nbytes, strerror(-ret));
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +out:
>>>>>> +    btrfs_commit_transaction(trans, root);
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>>>> +{
>>>>>> +    struct btrfs_block_group_cache *bg;
>>>>>> +    u64 start;
>>>>>> +    u64 nbytes;
>>>>>> +    u64 alloc_profile;
>>>>>> +    u64 flags;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>>>> +             fs_info->metadata_alloc_profile);
>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>>>> +
>>>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>>>> &nbytes);
>>>>>
>>>>> Why bother allocating a new chunk by yourself?
>>>>>
>>>>> Just mark all block groups full and that's all.
>>>>>
>>>>> Any later tree block allocation like btrfs_search_slot() with @cow = 1
>>>>> will trigger chunk allocation automatically.
>>>>>
>>>>
>>>> Tried to let it happen but BUG_ON with -ENOSPC.
>>>
>>> Then fix it.
>>>
>>> It's not a normal behavior in this case.
>>>
>>> Thanks,
>>> Qu
>>
>> And I think the fix to allow btrfs_reserve_extent() to allocate new
>> chunk will solve a lot of strange BUG_ON().
>>
>> it just occurred to me that, a lot of use cases relies on the assumption
>> that btrfs_reserve_extent() will try to allocate new chunks.
>>
>> Especially for case like convert, certain btrfs check --repair, some
>> rescue tools.
>>
> Sorry for the previous wrong format mail.
> 
> Yes, it has many dependency so I considered to do chunk allocation
> manually in the patchset. If fix is not good enough, many funtions
> of btrfs-progs will behave abnormal.
> Since you ask it, I will go to fix it.

Manually allocation in advance has its advantage, like we can determine
if there is enough space for new chunk instead of checking every return
value with ENOSPC.


However in current case, your metadata usage is limited to the new chunk
only.
If there extent tree has quite a lot of problem, and the chunk allocated
is small (if using single profile and small fs), it can easily hit
ENOSPC again, since btrfs doesn't allocate new chunk for later metadata
write.

So here, we still need to allow btrfs allocate new meta chunk, even we
pre-allocate one meta chunk in advance.

Thanks,
Qu
> 
> Thanks,
> Su
> 
>> So this would be a quite nice start point for such fix.
>>
>> Thanks,
>> Qu
>>
>>>
>>>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called
>>>> while doing CoW?> In progs, allocation of new chunk during CoW
>>>> depends @root->ref_cows.
>>>> However, @root->ref_cows should be set only if @root is root of a fs
>>>> trees.
>>>>
>>>> Thanks,
>>>> Su
>>>>
>>>>> Thanks,
>>>>> Qu
>>>>>
>>>>>> +    if (!ret)
>>>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>>>> +    else
>>>>>> +        goto err;
>>>>>> +
>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>>>> +    /* Mark all metadata block groups cached and full in free
>>>>>> space*/
>>>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>>>> +    if (ret)
>>>>>> +        goto clear_bg_cache;
>>>>>> +
>>>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>>>> +    if (!bg) {
>>>>>> +        ret = -ENOENT;
>>>>>> +        error("fail to look up block group %llu %llu", start,
>>>>>> nbytes);
>>>>>> +        goto clear_bg_cache;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Clear block group cache just allocated */
>>>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>>>> +    if (ret)
>>>>>> +        goto clear_bg_cache;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +
>>>>>> +clear_bg_cache:
>>>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>>>> +err:
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>>    static int check_extent_refs(struct btrfs_root *root,
>>>>>>                     struct cache_tree *extent_cache)
>>>>>>    {
>>>>>>
>>>>>
>>>>
>>>>
>>>> -- 
>>>> To unsubscribe from this list: send the line "unsubscribe
>>>> linux-btrfs" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
> 
>
Su Yue Dec. 26, 2017, 8:17 a.m. UTC | #8
On 12/21/2017 03:12 PM, Qu Wenruo wrote:
> 
> 
> On 2017年12月21日 15:09, Su Yue wrote:
>>
>>
>> On 12/21/2017 02:51 PM, Qu Wenruo wrote:
>>>
>>>
>>> On 2017年12月20日 16:37, Qu Wenruo wrote:
>>>>
>>>>
>>>> On 2017年12月20日 16:21, Su Yue wrote:
>>>>>
>>>>>
>>>>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>>>>
>>>>>>
>>>>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>>>>> Introduce create_chunk_and_block_block_group() to allocate new chunk
>>>>>>> and corresponding block group.
>>>>>>>
>>>>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>>>>> and records its start.
>>>>>>> Then it modifies all metadata block groups cached and full.
>>>>>>> Finally it marks the new block group uncached and unfree.
>>>>>>> In the next CoW, extents states will be updated automatically by
>>>>>>> cache_block_group().
>>>>>>>
>>>>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>>>>> ---
>>>>>>>     cmds-check.c | 80
>>>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>     1 file changed, 80 insertions(+)
>>>>>>>
>>>>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>>>>> --- a/cmds-check.c
>>>>>>> +++ b/cmds-check.c
>>>>>>> @@ -10911,6 +10911,86 @@ out:
>>>>>>>         return ret;
>>>>>>>     }
>>>>>>>     +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>>>>> *fs_info,
>>>>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>>>>> +{
>>>>>>> +    struct btrfs_trans_handle *trans;
>>>>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    trans = btrfs_start_transaction(root, 1);
>>>>>>> +    if (IS_ERR(trans)) {
>>>>>>> +        ret = PTR_ERR(trans);
>>>>>>> +        error("error starting transaction %s", strerror(-ret));
>>>>>>> +        return ret;
>>>>>>> +    }
>>>>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>>>>> +    if (ret) {
>>>>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>>>>> +        goto out;
>>>>>>> +    }
>>>>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
>>>>>>> +    if (ret) {
>>>>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>>>>> +              *start, *nbytes, strerror(-ret));
>>>>>>> +        goto out;
>>>>>>> +    }
>>>>>>> +out:
>>>>>>> +    btrfs_commit_transaction(trans, root);
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>>>>> +{
>>>>>>> +    struct btrfs_block_group_cache *bg;
>>>>>>> +    u64 start;
>>>>>>> +    u64 nbytes;
>>>>>>> +    u64 alloc_profile;
>>>>>>> +    u64 flags;
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>>>>> +             fs_info->metadata_alloc_profile);
>>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>>>>> +
>>>>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>>>>> &nbytes);
>>>>>>
>>>>>> Why bother allocating a new chunk by yourself?
>>>>>>
>>>>>> Just mark all block groups full and that's all.
>>>>>>
>>>>>> Any later tree block allocation like btrfs_search_slot() with @cow = 1
>>>>>> will trigger chunk allocation automatically.
>>>>>>
>>>>>
>>>>> Tried to let it happen but BUG_ON with -ENOSPC.
>>>>
>>>> Then fix it.
>>>>
>>>> It's not a normal behavior in this case.
>>>>
>>>> Thanks,
>>>> Qu
>>>
>>> And I think the fix to allow btrfs_reserve_extent() to allocate new
>>> chunk will solve a lot of strange BUG_ON().
>>>
>>> it just occurred to me that, a lot of use cases relies on the assumption
>>> that btrfs_reserve_extent() will try to allocate new chunks.
>>>
>>> Especially for case like convert, certain btrfs check --repair, some
>>> rescue tools.
>>>
>> Sorry for the previous wrong format mail.
>>
>> Yes, it has many dependency so I considered to do chunk allocation
>> manually in the patchset. If fix is not good enough, many funtions
>> of btrfs-progs will behave abnormal.
>> Since you ask it, I will go to fix it.
> 
> Manually allocation in advance has its advantage, like we can determine
> if there is enough space for new chunk instead of checking every return
> value with ENOSPC.
> 
> 
> However in current case, your metadata usage is limited to the new chunk
> only.
> If there extent tree has quite a lot of problem, and the chunk allocated
> is small (if using single profile and small fs), it can easily hit
> ENOSPC again, since btrfs doesn't allocate new chunk for later metadata
> write.
> 
SAD. After I tried to implement above nice idea, infinite recursive
brings me back to the reality.

Here is the reason why btrfs_reserve_extent can not allocate chunk
by itself if ENOSPC hints:

btrfs_cow_block
...
   btrfs_reserve_extent
     btrfs_alloc_chunk
       btrfs_alloc_dev_extent
         btrfs_insert_empty_item
           ...
            btrfs_cow_block

Thanks,
Su

> So here, we still need to allow btrfs allocate new meta chunk, even we
> pre-allocate one meta chunk in advance.
> 
> Thanks,
> Qu
>>
>> Thanks,
>> Su
>>
>>> So this would be a quite nice start point for such fix.
>>>
>>> Thanks,
>>> Qu
>>>
>>>>
>>>>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is called
>>>>> while doing CoW?> In progs, allocation of new chunk during CoW
>>>>> depends @root->ref_cows.
>>>>> However, @root->ref_cows should be set only if @root is root of a fs
>>>>> trees.
>>>>>
>>>>> Thanks,
>>>>> Su
>>>>>
>>>>>> Thanks,
>>>>>> Qu
>>>>>>
>>>>>>> +    if (!ret)
>>>>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>>>>> +    else
>>>>>>> +        goto err;
>>>>>>> +
>>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>>>>> +    /* Mark all metadata block groups cached and full in free
>>>>>>> space*/
>>>>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>>>>> +    if (ret)
>>>>>>> +        goto clear_bg_cache;
>>>>>>> +
>>>>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>>>>> +    if (!bg) {
>>>>>>> +        ret = -ENOENT;
>>>>>>> +        error("fail to look up block group %llu %llu", start,
>>>>>>> nbytes);
>>>>>>> +        goto clear_bg_cache;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    /* Clear block group cache just allocated */
>>>>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>>>>> +    if (ret)
>>>>>>> +        goto clear_bg_cache;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +
>>>>>>> +clear_bg_cache:
>>>>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>>>>> +err:
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>     static int check_extent_refs(struct btrfs_root *root,
>>>>>>>                      struct cache_tree *extent_cache)
>>>>>>>     {
>>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>> -- 
>>>>> To unsubscribe from this list: send the line "unsubscribe
>>>>> linux-btrfs" in
>>>>> the body of a message to majordomo@vger.kernel.org
>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>>
>>
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Dec. 26, 2017, 10:28 a.m. UTC | #9
On 2017年12月26日 16:17, Su Yue wrote:
> 
> 
> On 12/21/2017 03:12 PM, Qu Wenruo wrote:
>>
>>
>> On 2017年12月21日 15:09, Su Yue wrote:
>>>
>>>
>>> On 12/21/2017 02:51 PM, Qu Wenruo wrote:
>>>>
>>>>
>>>> On 2017年12月20日 16:37, Qu Wenruo wrote:
>>>>>
>>>>>
>>>>> On 2017年12月20日 16:21, Su Yue wrote:
>>>>>>
>>>>>>
>>>>>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>>>>>> Introduce create_chunk_and_block_block_group() to allocate new
>>>>>>>> chunk
>>>>>>>> and corresponding block group.
>>>>>>>>
>>>>>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>>>>>> and records its start.
>>>>>>>> Then it modifies all metadata block groups cached and full.
>>>>>>>> Finally it marks the new block group uncached and unfree.
>>>>>>>> In the next CoW, extents states will be updated automatically by
>>>>>>>> cache_block_group().
>>>>>>>>
>>>>>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>>>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>>>>>> ---
>>>>>>>>     cmds-check.c | 80
>>>>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>     1 file changed, 80 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>>>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>>>>>> --- a/cmds-check.c
>>>>>>>> +++ b/cmds-check.c
>>>>>>>> @@ -10911,6 +10911,86 @@ out:
>>>>>>>>         return ret;
>>>>>>>>     }
>>>>>>>>     +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>>>>>> *fs_info,
>>>>>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>>>>>> +{
>>>>>>>> +    struct btrfs_trans_handle *trans;
>>>>>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    trans = btrfs_start_transaction(root, 1);
>>>>>>>> +    if (IS_ERR(trans)) {
>>>>>>>> +        ret = PTR_ERR(trans);
>>>>>>>> +        error("error starting transaction %s", strerror(-ret));
>>>>>>>> +        return ret;
>>>>>>>> +    }
>>>>>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>>>>>> +    if (ret) {
>>>>>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>>>>>> +        goto out;
>>>>>>>> +    }
>>>>>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>>>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start,
>>>>>>>> *nbytes);
>>>>>>>> +    if (ret) {
>>>>>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>>>>>> +              *start, *nbytes, strerror(-ret));
>>>>>>>> +        goto out;
>>>>>>>> +    }
>>>>>>>> +out:
>>>>>>>> +    btrfs_commit_transaction(trans, root);
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>>>>>> +{
>>>>>>>> +    struct btrfs_block_group_cache *bg;
>>>>>>>> +    u64 start;
>>>>>>>> +    u64 nbytes;
>>>>>>>> +    u64 alloc_profile;
>>>>>>>> +    u64 flags;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>>>>>> +             fs_info->metadata_alloc_profile);
>>>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>>>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>>>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>>>>>> +
>>>>>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>>>>>> &nbytes);
>>>>>>>
>>>>>>> Why bother allocating a new chunk by yourself?
>>>>>>>
>>>>>>> Just mark all block groups full and that's all.
>>>>>>>
>>>>>>> Any later tree block allocation like btrfs_search_slot() with
>>>>>>> @cow = 1
>>>>>>> will trigger chunk allocation automatically.
>>>>>>>
>>>>>>
>>>>>> Tried to let it happen but BUG_ON with -ENOSPC.
>>>>>
>>>>> Then fix it.
>>>>>
>>>>> It's not a normal behavior in this case.
>>>>>
>>>>> Thanks,
>>>>> Qu
>>>>
>>>> And I think the fix to allow btrfs_reserve_extent() to allocate new
>>>> chunk will solve a lot of strange BUG_ON().
>>>>
>>>> it just occurred to me that, a lot of use cases relies on the
>>>> assumption
>>>> that btrfs_reserve_extent() will try to allocate new chunks.
>>>>
>>>> Especially for case like convert, certain btrfs check --repair, some
>>>> rescue tools.
>>>>
>>> Sorry for the previous wrong format mail.
>>>
>>> Yes, it has many dependency so I considered to do chunk allocation
>>> manually in the patchset. If fix is not good enough, many funtions
>>> of btrfs-progs will behave abnormal.
>>> Since you ask it, I will go to fix it.
>>
>> Manually allocation in advance has its advantage, like we can determine
>> if there is enough space for new chunk instead of checking every return
>> value with ENOSPC.
>>
>>
>> However in current case, your metadata usage is limited to the new chunk
>> only.
>> If there extent tree has quite a lot of problem, and the chunk allocated
>> is small (if using single profile and small fs), it can easily hit
>> ENOSPC again, since btrfs doesn't allocate new chunk for later metadata
>> write.
>>
> SAD. After I tried to implement above nice idea, infinite recursive
> brings me back to the reality.
> 
> Here is the reason why btrfs_reserve_extent can not allocate chunk
> by itself if ENOSPC hints:
> 
> btrfs_cow_block
> ...
>   btrfs_reserve_extent
>     btrfs_alloc_chunk
>       btrfs_alloc_dev_extent
>         btrfs_insert_empty_item
>           ...
>            btrfs_cow_block

The order is just wrong.

We should first check for the new chunk space, not doing the insert
right now.

I'll take over the work if you're OK with it.

Thanks,
Qu

> 
> Thanks,
> Su
> 
>> So here, we still need to allow btrfs allocate new meta chunk, even we
>> pre-allocate one meta chunk in advance.
>>
>> Thanks,
>> Qu
>>>
>>> Thanks,
>>> Su
>>>
>>>> So this would be a quite nice start point for such fix.
>>>>
>>>> Thanks,
>>>> Qu
>>>>
>>>>>
>>>>>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is
>>>>>> called
>>>>>> while doing CoW?> In progs, allocation of new chunk during CoW
>>>>>> depends @root->ref_cows.
>>>>>> However, @root->ref_cows should be set only if @root is root of a fs
>>>>>> trees.
>>>>>>
>>>>>> Thanks,
>>>>>> Su
>>>>>>
>>>>>>> Thanks,
>>>>>>> Qu
>>>>>>>
>>>>>>>> +    if (!ret)
>>>>>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>>>>>> +    else
>>>>>>>> +        goto err;
>>>>>>>> +
>>>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>>>>>> +    /* Mark all metadata block groups cached and full in free
>>>>>>>> space*/
>>>>>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto clear_bg_cache;
>>>>>>>> +
>>>>>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>>>>>> +    if (!bg) {
>>>>>>>> +        ret = -ENOENT;
>>>>>>>> +        error("fail to look up block group %llu %llu", start,
>>>>>>>> nbytes);
>>>>>>>> +        goto clear_bg_cache;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    /* Clear block group cache just allocated */
>>>>>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>>>>>> +    if (ret)
>>>>>>>> +        goto clear_bg_cache;
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +
>>>>>>>> +clear_bg_cache:
>>>>>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>>>>>> +err:
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>     static int check_extent_refs(struct btrfs_root *root,
>>>>>>>>                      struct cache_tree *extent_cache)
>>>>>>>>     {
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>>
>>>>>> -- 
>>>>>> To unsubscribe from this list: send the line "unsubscribe
>>>>>> linux-btrfs" in
>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>>
>>>>
>>>
>>>
>>
> 
> 
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Su Yue Dec. 27, 2017, 1:11 a.m. UTC | #10
On 12/26/2017 06:28 PM, Qu Wenruo wrote:
> 
> 
> On 2017年12月26日 16:17, Su Yue wrote:
>>
>>
>> On 12/21/2017 03:12 PM, Qu Wenruo wrote:
>>>
>>>
>>> On 2017年12月21日 15:09, Su Yue wrote:
>>>>
>>>>
>>>> On 12/21/2017 02:51 PM, Qu Wenruo wrote:
>>>>>
>>>>>
>>>>> On 2017年12月20日 16:37, Qu Wenruo wrote:
>>>>>>
>>>>>>
>>>>>> On 2017年12月20日 16:21, Su Yue wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 12/20/2017 01:41 PM, Qu Wenruo wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2017年12月20日 12:57, Su Yue wrote:
>>>>>>>>> Introduce create_chunk_and_block_block_group() to allocate new
>>>>>>>>> chunk
>>>>>>>>> and corresponding block group.
>>>>>>>>>
>>>>>>>>> The new function force_cow_in_new_chunk() first allocates new chunk
>>>>>>>>> and records its start.
>>>>>>>>> Then it modifies all metadata block groups cached and full.
>>>>>>>>> Finally it marks the new block group uncached and unfree.
>>>>>>>>> In the next CoW, extents states will be updated automatically by
>>>>>>>>> cache_block_group().
>>>>>>>>>
>>>>>>>>> Suggested-by: Qu Wenruo <wqu@suse.com>
>>>>>>>>> Signed-off-by: Su Yue <suy.fnst@cn.fujitsu.com>
>>>>>>>>> ---
>>>>>>>>>      cmds-check.c | 80
>>>>>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>      1 file changed, 80 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/cmds-check.c b/cmds-check.c
>>>>>>>>> index d98d4bda7357..311c8a9f45e8 100644
>>>>>>>>> --- a/cmds-check.c
>>>>>>>>> +++ b/cmds-check.c
>>>>>>>>> @@ -10911,6 +10911,86 @@ out:
>>>>>>>>>          return ret;
>>>>>>>>>      }
>>>>>>>>>      +static int create_chunk_and_block_group(struct btrfs_fs_info
>>>>>>>>> *fs_info,
>>>>>>>>> +                    u64 flags, u64 *start, u64 *nbytes)
>>>>>>>>> +{
>>>>>>>>> +    struct btrfs_trans_handle *trans;
>>>>>>>>> +    struct btrfs_root *root = fs_info->extent_root;
>>>>>>>>> +    int ret;
>>>>>>>>> +
>>>>>>>>> +    if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +    trans = btrfs_start_transaction(root, 1);
>>>>>>>>> +    if (IS_ERR(trans)) {
>>>>>>>>> +        ret = PTR_ERR(trans);
>>>>>>>>> +        error("error starting transaction %s", strerror(-ret));
>>>>>>>>> +        return ret;
>>>>>>>>> +    }
>>>>>>>>> +    ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
>>>>>>>>> +    if (ret) {
>>>>>>>>> +        error("fail to allocate new chunk %s", strerror(-ret));
>>>>>>>>> +        goto out;
>>>>>>>>> +    }
>>>>>>>>> +    ret = btrfs_make_block_group(trans, fs_info, 0, flags,
>>>>>>>>> +                 BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start,
>>>>>>>>> *nbytes);
>>>>>>>>> +    if (ret) {
>>>>>>>>> +        error("fail to make block group for chunk %llu %llu %s",
>>>>>>>>> +              *start, *nbytes, strerror(-ret));
>>>>>>>>> +        goto out;
>>>>>>>>> +    }
>>>>>>>>> +out:
>>>>>>>>> +    btrfs_commit_transaction(trans, root);
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
>>>>>>>>> +{
>>>>>>>>> +    struct btrfs_block_group_cache *bg;
>>>>>>>>> +    u64 start;
>>>>>>>>> +    u64 nbytes;
>>>>>>>>> +    u64 alloc_profile;
>>>>>>>>> +    u64 flags;
>>>>>>>>> +    int ret;
>>>>>>>>> +
>>>>>>>>> +    alloc_profile = (fs_info->avail_metadata_alloc_bits &
>>>>>>>>> +             fs_info->metadata_alloc_profile);
>>>>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
>>>>>>>>> +    if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
>>>>>>>>> +        flags |= BTRFS_BLOCK_GROUP_DATA;
>>>>>>>>> +
>>>>>>>>> +    ret = create_chunk_and_block_group(fs_info, flags, &start,
>>>>>>>>> &nbytes);
>>>>>>>>
>>>>>>>> Why bother allocating a new chunk by yourself?
>>>>>>>>
>>>>>>>> Just mark all block groups full and that's all.
>>>>>>>>
>>>>>>>> Any later tree block allocation like btrfs_search_slot() with
>>>>>>>> @cow = 1
>>>>>>>> will trigger chunk allocation automatically.
>>>>>>>>
>>>>>>>
>>>>>>> Tried to let it happen but BUG_ON with -ENOSPC.
>>>>>>
>>>>>> Then fix it.
>>>>>>
>>>>>> It's not a normal behavior in this case.
>>>>>>
>>>>>> Thanks,
>>>>>> Qu
>>>>>
>>>>> And I think the fix to allow btrfs_reserve_extent() to allocate new
>>>>> chunk will solve a lot of strange BUG_ON().
>>>>>
>>>>> it just occurred to me that, a lot of use cases relies on the
>>>>> assumption
>>>>> that btrfs_reserve_extent() will try to allocate new chunks.
>>>>>
>>>>> Especially for case like convert, certain btrfs check --repair, some
>>>>> rescue tools.
>>>>>
>>>> Sorry for the previous wrong format mail.
>>>>
>>>> Yes, it has many dependency so I considered to do chunk allocation
>>>> manually in the patchset. If fix is not good enough, many funtions
>>>> of btrfs-progs will behave abnormal.
>>>> Since you ask it, I will go to fix it.
>>>
>>> Manually allocation in advance has its advantage, like we can determine
>>> if there is enough space for new chunk instead of checking every return
>>> value with ENOSPC.
>>>
>>>
>>> However in current case, your metadata usage is limited to the new chunk
>>> only.
>>> If there extent tree has quite a lot of problem, and the chunk allocated
>>> is small (if using single profile and small fs), it can easily hit
>>> ENOSPC again, since btrfs doesn't allocate new chunk for later metadata
>>> write.
>>>
>> SAD. After I tried to implement above nice idea, infinite recursive
>> brings me back to the reality.
>>
>> Here is the reason why btrfs_reserve_extent can not allocate chunk
>> by itself if ENOSPC hints:
>>
>> btrfs_cow_block
>> ...
>>    btrfs_reserve_extent
>>      btrfs_alloc_chunk
>>        btrfs_alloc_dev_extent
>>          btrfs_insert_empty_item
>>            ...
>>             btrfs_cow_block
> 
> The order is just wrong.
> 
> We should first check for the new chunk space, not doing the insert
> right now.
> 
> I'll take over the work if you're OK with it.
> 
OK. Thanks a lot.

Thanks,
Su

> Thanks,
> Qu
> 
>>
>> Thanks,
>> Su
>>
>>> So here, we still need to allow btrfs allocate new meta chunk, even we
>>> pre-allocate one meta chunk in advance.
>>>
>>> Thanks,
>>> Qu
>>>>
>>>> Thanks,
>>>> Su
>>>>
>>>>> So this would be a quite nice start point for such fix.
>>>>>
>>>>> Thanks,
>>>>> Qu
>>>>>
>>>>>>
>>>>>>> Do you mean do_chunk_alloc() in btrfs_reserve_extent() which is
>>>>>>> called
>>>>>>> while doing CoW?> In progs, allocation of new chunk during CoW
>>>>>>> depends @root->ref_cows.
>>>>>>> However, @root->ref_cows should be set only if @root is root of a fs
>>>>>>> trees.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Su
>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Qu
>>>>>>>>
>>>>>>>>> +    if (!ret)
>>>>>>>>> +        printf("Create new chunk %llu %llu\n", start, nbytes);
>>>>>>>>> +    else
>>>>>>>>> +        goto err;
>>>>>>>>> +
>>>>>>>>> +    flags = BTRFS_BLOCK_GROUP_METADATA;
>>>>>>>>> +    /* Mark all metadata block groups cached and full in free
>>>>>>>>> space*/
>>>>>>>>> +    ret = modify_block_groups_cache(fs_info, flags, 1);
>>>>>>>>> +    if (ret)
>>>>>>>>> +        goto clear_bg_cache;
>>>>>>>>> +
>>>>>>>>> +    bg = btrfs_lookup_block_group(fs_info, start);
>>>>>>>>> +    if (!bg) {
>>>>>>>>> +        ret = -ENOENT;
>>>>>>>>> +        error("fail to look up block group %llu %llu", start,
>>>>>>>>> nbytes);
>>>>>>>>> +        goto clear_bg_cache;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    /* Clear block group cache just allocated */
>>>>>>>>> +    ret = modify_block_group_cache(fs_info, bg, 0);
>>>>>>>>> +    if (ret)
>>>>>>>>> +        goto clear_bg_cache;
>>>>>>>>> +
>>>>>>>>> +    return 0;
>>>>>>>>> +
>>>>>>>>> +clear_bg_cache:
>>>>>>>>> +    modify_block_groups_cache(fs_info, flags, 0);
>>>>>>>>> +err:
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>      static int check_extent_refs(struct btrfs_root *root,
>>>>>>>>>                       struct cache_tree *extent_cache)
>>>>>>>>>      {
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> -- 
>>>>>>> To unsubscribe from this list: send the line "unsubscribe
>>>>>>> linux-btrfs" in
>>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>>>
>>>>>
>>>>
>>>>
>>>
>>
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Jan. 9, 2018, 7:44 a.m. UTC | #11
On 2017年12月27日 09:11, Su Yue wrote:
> 
> 
[snip]
>>>>
>>>> Manually allocation in advance has its advantage, like we can determine
>>>> if there is enough space for new chunk instead of checking every return
>>>> value with ENOSPC.
>>>>
>>>>
>>>> However in current case, your metadata usage is limited to the new
>>>> chunk
>>>> only.
>>>> If there extent tree has quite a lot of problem, and the chunk
>>>> allocated
>>>> is small (if using single profile and small fs), it can easily hit
>>>> ENOSPC again, since btrfs doesn't allocate new chunk for later metadata
>>>> write.
>>>>
>>> SAD. After I tried to implement above nice idea, infinite recursive
>>> brings me back to the reality.
>>>
>>> Here is the reason why btrfs_reserve_extent can not allocate chunk
>>> by itself if ENOSPC hints:
>>>
>>> btrfs_cow_block
>>> ...
>>>    btrfs_reserve_extent
>>>      btrfs_alloc_chunk
>>>        btrfs_alloc_dev_extent
>>>          btrfs_insert_empty_item
>>>            ...
>>>             btrfs_cow_block
>>
>> The order is just wrong.
>>
>> We should first check for the new chunk space, not doing the insert
>> right now.
>>
>> I'll take over the work if you're OK with it.
>>
> OK. Thanks a lot.
> 
> Thanks,
> Su

Well, even after my preparation patches, the situation is still much
complex than my expectation.

Yes, We could delay chunk/extent tree modification so newer chunk
allocation can be used for extent tree CoW.


But a lot of other infrastructure can't handle it well
The main problem is, we break a lot of ctree.c assumption.

For case like a normal tree CoW:

btrfs_search_slot(root=extent_root)
|- btrfs_cow_block()
   |- btrfs_alloc_tree_block()
      |- btrfs_reserve_extent()
         |- btrfs_alloc_chunk()
            |- insert block group item
               *CoW* extent tree

In above case, extent_root changed even before we cow the extent root.
And will cause later (b == root->node) check fail and trigger a SEGV.

To really address it, we may need a large code rework with dynamic chunk
allocation in mind, which seems to expensive for a rarely used case.

So in short, I'm totally wrong and too optimistic about the feature.
Please use the original way, which is to alloc a new meta chunk manually.

Thanks,
Qu

Patch
diff mbox

diff --git a/cmds-check.c b/cmds-check.c
index d98d4bda7357..311c8a9f45e8 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -10911,6 +10911,86 @@  out:
 	return ret;
 }
 
+static int create_chunk_and_block_group(struct btrfs_fs_info *fs_info,
+					u64 flags, u64 *start, u64 *nbytes)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root = fs_info->extent_root;
+	int ret;
+
+	if ((flags & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)
+		return -EINVAL;
+
+	trans = btrfs_start_transaction(root, 1);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		error("error starting transaction %s", strerror(-ret));
+		return ret;
+	}
+	ret = btrfs_alloc_chunk(trans, fs_info, start, nbytes, flags);
+	if (ret) {
+		error("fail to allocate new chunk %s", strerror(-ret));
+		goto out;
+	}
+	ret = btrfs_make_block_group(trans, fs_info, 0, flags,
+			     BTRFS_FIRST_CHUNK_TREE_OBJECTID, *start, *nbytes);
+	if (ret) {
+		error("fail to make block group for chunk %llu %llu %s",
+		      *start, *nbytes, strerror(-ret));
+		goto out;
+	}
+out:
+	btrfs_commit_transaction(trans, root);
+	return ret;
+}
+
+static int force_cow_in_new_chunk(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_block_group_cache *bg;
+	u64 start;
+	u64 nbytes;
+	u64 alloc_profile;
+	u64 flags;
+	int ret;
+
+	alloc_profile = (fs_info->avail_metadata_alloc_bits &
+			 fs_info->metadata_alloc_profile);
+	flags = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
+	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS))
+		flags |= BTRFS_BLOCK_GROUP_DATA;
+
+	ret = create_chunk_and_block_group(fs_info, flags, &start, &nbytes);
+	if (!ret)
+		printf("Create new chunk %llu %llu\n", start, nbytes);
+	else
+		goto err;
+
+	flags = BTRFS_BLOCK_GROUP_METADATA;
+	/* Mark all metadata block groups cached and full in free space*/
+	ret = modify_block_groups_cache(fs_info, flags, 1);
+	if (ret)
+		goto clear_bg_cache;
+
+	bg = btrfs_lookup_block_group(fs_info, start);
+	if (!bg) {
+		ret = -ENOENT;
+		error("fail to look up block group %llu %llu", start, nbytes);
+		goto clear_bg_cache;
+	}
+
+	/* Clear block group cache just allocated */
+	ret = modify_block_group_cache(fs_info, bg, 0);
+	if (ret)
+		goto clear_bg_cache;
+
+	return 0;
+
+clear_bg_cache:
+	modify_block_groups_cache(fs_info, flags, 0);
+err:
+	return ret;
+}
+
 static int check_extent_refs(struct btrfs_root *root,
 			     struct cache_tree *extent_cache)
 {