diff mbox series

ocfs2: fixing global bitmap allocating failure for discontig type

Message ID 20250327062209.19201-1-heming.zhao@suse.com (mailing list archive)
State New
Headers show
Series ocfs2: fixing global bitmap allocating failure for discontig type | expand

Commit Message

Heming Zhao March 27, 2025, 6:22 a.m. UTC
The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
fragmentation is high") introduced a regression. In the discontiguous
extent allocation case, ocfs2_cluster_group_search() is comparing with
the wrong target length, which causes allocation failure.

Call stack:
ocfs2_mkdir()
 ocfs2_reserve_new_inode()
  ocfs2_reserve_suballoc_bits()
   ocfs2_block_group_alloc()
    ocfs2_block_group_alloc_discontig()
     __ocfs2_claim_clusters()
      ocfs2_claim_suballoc_bits()
       ocfs2_search_chain()
        ocfs2_cluster_group_search()

Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
---
 fs/ocfs2/suballoc.c | 14 +++++++++++---
 fs/ocfs2/suballoc.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

Comments

Heming Zhao March 27, 2025, 6:43 a.m. UTC | #1
Hello list,

I wrote this patch based on Gautham's patch code logic. Because I lack a
test script to verify this patch, this patch only passes compilation.

btw, another topic (unrelated to this bug) is that ocfs2-test fails to run
on the latest Linux distributions (e.g., openSUSE Tumbleweed). I am focusing
on fixing this problem, but it requires some time. I have created a personal
repository [1]. Once I finish the verification process for ocfs2-test, I will
submit a PR to the upstream GitHub repository [2] (the suse-py3 branch).

[1]: https://build.opensuse.org/package/show/home:hmzhao:branches:network:ha-clustering:Factory/ocfs2-test
[2]: https://github.com/markfasheh/ocfs2-test

- Heming

On 3/27/25 14:22, Heming Zhao wrote:
> The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
> fragmentation is high") introduced a regression. In the discontiguous
> extent allocation case, ocfs2_cluster_group_search() is comparing with
> the wrong target length, which causes allocation failure.
> 
> Call stack:
> ocfs2_mkdir()
>   ocfs2_reserve_new_inode()
>    ocfs2_reserve_suballoc_bits()
>     ocfs2_block_group_alloc()
>      ocfs2_block_group_alloc_discontig()
>       __ocfs2_claim_clusters()
>        ocfs2_claim_suballoc_bits()
>         ocfs2_search_chain()
>          ocfs2_cluster_group_search()
> 
> Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
> Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
>   fs/ocfs2/suballoc.c | 14 +++++++++++---
>   fs/ocfs2/suballoc.h |  1 +
>   2 files changed, 12 insertions(+), 3 deletions(-)
>
Joseph Qi April 1, 2025, 11:38 a.m. UTC | #2
On 2025/3/27 14:22, Heming Zhao wrote:
> The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
> fragmentation is high") introduced a regression. In the discontiguous
> extent allocation case, ocfs2_cluster_group_search() is comparing with
> the wrong target length, which causes allocation failure.
> 
> Call stack:
> ocfs2_mkdir()
>  ocfs2_reserve_new_inode()
>   ocfs2_reserve_suballoc_bits()
>    ocfs2_block_group_alloc()
>     ocfs2_block_group_alloc_discontig()
>      __ocfs2_claim_clusters()
>       ocfs2_claim_suballoc_bits()
>        ocfs2_search_chain()
>         ocfs2_cluster_group_search()
> 
> Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
> Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
>  fs/ocfs2/suballoc.c | 14 +++++++++++---
>  fs/ocfs2/suballoc.h |  1 +
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index f7b483f0de2a..3dea082f6e91 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
>  
>  	bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
>  					       ac, cl);
> -	if (PTR_ERR(bg_bh) == -ENOSPC)
> +	if (PTR_ERR(bg_bh) == -ENOSPC) {
> +		ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
>  		bg_bh = ocfs2_block_group_alloc_discontig(handle,
>  							  alloc_inode,
>  							  ac, cl);
> +	}
>  	if (IS_ERR(bg_bh)) {
>  		status = PTR_ERR(bg_bh);
>  		bg_bh = NULL;
> @@ -2365,7 +2367,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
>  	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
>  
>  	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
> -	       && ac->ac_which != OCFS2_AC_USE_MAIN);
> +	       && ac->ac_which != OCFS2_AC_USE_MAIN
> +	       && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
>  
>  	if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
>  		WARN_ON(min_clusters > 1);
> @@ -2427,7 +2430,12 @@ int ocfs2_claim_clusters(handle_t *handle,
>  			 u32 *cluster_start,
>  			 u32 *num_clusters)
>  {
> -	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
> +	unsigned int bits_wanted;
> +
> +	if (ac->ac_which == OCFS2_AC_USE_MAIN)
> +		bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
> +	else /* ac_which == OCFS2_AC_USE_MAIN_DISCONTIG */
> +		bits_wanted = min_clusters;

This looks wried. Why can not be other alloc type?

Or it seems you intend to:

unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;

if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG)
	bits_wanted = min_clusters;

Thanks,
Joseph

>  
>  	return __ocfs2_claim_clusters(handle, ac, min_clusters,
>  				      bits_wanted, cluster_start, num_clusters);
> diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
> index b481b834857d..bcf2ed4a8631 100644
> --- a/fs/ocfs2/suballoc.h
> +++ b/fs/ocfs2/suballoc.h
> @@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
>  #define OCFS2_AC_USE_MAIN  2
>  #define OCFS2_AC_USE_INODE 3
>  #define OCFS2_AC_USE_META  4
> +#define OCFS2_AC_USE_MAIN_DISCONTIG  5
>  	u32    ac_which;
>  
>  	/* these are used by the chain search */
Heming Zhao April 1, 2025, 1:50 p.m. UTC | #3
On 4/1/25 19:38, Joseph Qi wrote:
> 
> 
> On 2025/3/27 14:22, Heming Zhao wrote:
>> The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
>> fragmentation is high") introduced a regression. In the discontiguous
>> extent allocation case, ocfs2_cluster_group_search() is comparing with
>> the wrong target length, which causes allocation failure.
>>
>> Call stack:
>> ocfs2_mkdir()
>>   ocfs2_reserve_new_inode()
>>    ocfs2_reserve_suballoc_bits()
>>     ocfs2_block_group_alloc()
>>      ocfs2_block_group_alloc_discontig()
>>       __ocfs2_claim_clusters()
>>        ocfs2_claim_suballoc_bits()
>>         ocfs2_search_chain()
>>          ocfs2_cluster_group_search()
>>
>> Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
>> Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>> ---
>>   fs/ocfs2/suballoc.c | 14 +++++++++++---
>>   fs/ocfs2/suballoc.h |  1 +
>>   2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
>> index f7b483f0de2a..3dea082f6e91 100644
>> --- a/fs/ocfs2/suballoc.c
>> +++ b/fs/ocfs2/suballoc.c
>> @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
>>   
>>   	bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
>>   					       ac, cl);
>> -	if (PTR_ERR(bg_bh) == -ENOSPC)
>> +	if (PTR_ERR(bg_bh) == -ENOSPC) {
>> +		ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
>>   		bg_bh = ocfs2_block_group_alloc_discontig(handle,
>>   							  alloc_inode,
>>   							  ac, cl);
>> +	}
>>   	if (IS_ERR(bg_bh)) {
>>   		status = PTR_ERR(bg_bh);
>>   		bg_bh = NULL;
>> @@ -2365,7 +2367,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
>>   	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
>>   
>>   	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
>> -	       && ac->ac_which != OCFS2_AC_USE_MAIN);
>> +	       && ac->ac_which != OCFS2_AC_USE_MAIN
>> +	       && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
>>   
>>   	if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
>>   		WARN_ON(min_clusters > 1);
>> @@ -2427,7 +2430,12 @@ int ocfs2_claim_clusters(handle_t *handle,
>>   			 u32 *cluster_start,
>>   			 u32 *num_clusters)
>>   {
>> -	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
>> +	unsigned int bits_wanted;
>> +
>> +	if (ac->ac_which == OCFS2_AC_USE_MAIN)
>> +		bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
>> +	else /* ac_which == OCFS2_AC_USE_MAIN_DISCONTIG */
>> +		bits_wanted = min_clusters;
> 
> This looks wried. Why can not be other alloc type?
> 
> Or it seems you intend to:
> 
> unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
> 
> if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG)
> 	bits_wanted = min_clusters;
> 
> Thanks,
> Joseph

Yes, you are right.
We should use OCFS2_AC_USE_MAIN_DISCONTIG to handle the special discontig case.
Will send v2 patch for this mistake.

- Heming
> 
>>   
>>   	return __ocfs2_claim_clusters(handle, ac, min_clusters,
>>   				      bits_wanted, cluster_start, num_clusters);
>> diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
>> index b481b834857d..bcf2ed4a8631 100644
>> --- a/fs/ocfs2/suballoc.h
>> +++ b/fs/ocfs2/suballoc.h
>> @@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
>>   #define OCFS2_AC_USE_MAIN  2
>>   #define OCFS2_AC_USE_INODE 3
>>   #define OCFS2_AC_USE_META  4
>> +#define OCFS2_AC_USE_MAIN_DISCONTIG  5
>>   	u32    ac_which;
>>   
>>   	/* these are used by the chain search */
>
diff mbox series

Patch

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f7b483f0de2a..3dea082f6e91 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -698,10 +698,12 @@  static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
 	bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
 					       ac, cl);
-	if (PTR_ERR(bg_bh) == -ENOSPC)
+	if (PTR_ERR(bg_bh) == -ENOSPC) {
+		ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
 		bg_bh = ocfs2_block_group_alloc_discontig(handle,
 							  alloc_inode,
 							  ac, cl);
+	}
 	if (IS_ERR(bg_bh)) {
 		status = PTR_ERR(bg_bh);
 		bg_bh = NULL;
@@ -2365,7 +2367,8 @@  int __ocfs2_claim_clusters(handle_t *handle,
 	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
 
 	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
-	       && ac->ac_which != OCFS2_AC_USE_MAIN);
+	       && ac->ac_which != OCFS2_AC_USE_MAIN
+	       && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
 
 	if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
 		WARN_ON(min_clusters > 1);
@@ -2427,7 +2430,12 @@  int ocfs2_claim_clusters(handle_t *handle,
 			 u32 *cluster_start,
 			 u32 *num_clusters)
 {
-	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+	unsigned int bits_wanted;
+
+	if (ac->ac_which == OCFS2_AC_USE_MAIN)
+		bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+	else /* ac_which == OCFS2_AC_USE_MAIN_DISCONTIG */
+		bits_wanted = min_clusters;
 
 	return __ocfs2_claim_clusters(handle, ac, min_clusters,
 				      bits_wanted, cluster_start, num_clusters);
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index b481b834857d..bcf2ed4a8631 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -29,6 +29,7 @@  struct ocfs2_alloc_context {
 #define OCFS2_AC_USE_MAIN  2
 #define OCFS2_AC_USE_INODE 3
 #define OCFS2_AC_USE_META  4
+#define OCFS2_AC_USE_MAIN_DISCONTIG  5
 	u32    ac_which;
 
 	/* these are used by the chain search */