diff mbox series

[v4,2/3] ocfs2: adjust enabling place for la window

Message ID 20240327082146.6258-3-heming.zhao@suse.com (mailing list archive)
State New
Headers show
Series improve write IO performance when fragmentation is high | expand

Commit Message

Heming Zhao March 27, 2024, 8:21 a.m. UTC
After introducing gd->bg_contig_free_bits, the code path
'ocfs2_cluster_group_search() => ocfs2_local_alloc_seen_free_bits()'
becomes death when all the gd->bg_contig_free_bits are set to the
correct value. This patch relocates ocfs2_local_alloc_seen_free_bits()
to a more appropriate location. (The new place being
ocfs2_block_group_set_bits().)

In ocfs2_local_alloc_seen_free_bits(), the scope of the spin-lock has
been adjusted to reduce meaningless lock races. e.g: when userspace
creates & deletes 1 cluster_size files in parallel, acquiring the
spin-lock in ocfs2_local_alloc_seen_free_bits() is totally pointless and
impedes IO performance.

Signed-off-by: Heming Zhao <heming.zhao@suse.com>
---
 fs/ocfs2/localalloc.c | 15 ++++++++-------
 fs/ocfs2/suballoc.c   |  9 ++-------
 2 files changed, 10 insertions(+), 14 deletions(-)

Comments

Joseph Qi March 27, 2024, 11:05 a.m. UTC | #1
On 3/27/24 4:21 PM, Heming Zhao wrote:
> After introducing gd->bg_contig_free_bits, the code path
> 'ocfs2_cluster_group_search() => ocfs2_local_alloc_seen_free_bits()'
> becomes death when all the gd->bg_contig_free_bits are set to the
> correct value. This patch relocates ocfs2_local_alloc_seen_free_bits()
> to a more appropriate location. (The new place being
> ocfs2_block_group_set_bits().)
> 
> In ocfs2_local_alloc_seen_free_bits(), the scope of the spin-lock has
> been adjusted to reduce meaningless lock races. e.g: when userspace
> creates & deletes 1 cluster_size files in parallel, acquiring the
> spin-lock in ocfs2_local_alloc_seen_free_bits() is totally pointless and
> impedes IO performance.
> 
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
>  fs/ocfs2/localalloc.c | 15 ++++++++-------
>  fs/ocfs2/suballoc.c   |  9 ++-------
>  2 files changed, 10 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
> index c803c10dd97e..2391b96b8a3b 100644
> --- a/fs/ocfs2/localalloc.c
> +++ b/fs/ocfs2/localalloc.c
> @@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
>  void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
>  				      unsigned int num_clusters)
>  {
> -	spin_lock(&osb->osb_lock);
> -	if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
> -	    osb->local_alloc_state == OCFS2_LA_THROTTLED)
> -		if (num_clusters >= osb->local_alloc_default_bits) {
> +	if (num_clusters >= osb->local_alloc_default_bits) {
> +		spin_lock(&osb->osb_lock);
> +		if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
> +		    osb->local_alloc_state == OCFS2_LA_THROTTLED)
>  			cancel_delayed_work(&osb->la_enable_wq);
> -			osb->local_alloc_state = OCFS2_LA_ENABLED;
> -		}
> -	spin_unlock(&osb->osb_lock);
> +
> +		osb->local_alloc_state = OCFS2_LA_ENABLED;
> +		spin_unlock(&osb->osb_lock);
> +	}

This makes checking osb->local_alloc_default_bits outside osb_lock.

Joseph

>  }
>  
>  void ocfs2_la_enable_worker(struct work_struct *work)
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index 6fd67c8da9fe..4163554b0383 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -1374,6 +1374,7 @@ int ocfs2_block_group_set_bits(handle_t *handle,
>  	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
>  	unsigned int start = bit_off + num_bits;
>  	u16 contig_bits;
> +	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
>  
>  	/* All callers get the descriptor via
>  	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
> @@ -1423,6 +1424,7 @@ int ocfs2_block_group_set_bits(handle_t *handle,
>  		if (contig_bits > max_contig_bits)
>  			max_contig_bits = contig_bits;
>  		bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
> +		ocfs2_local_alloc_seen_free_bits(osb, max_contig_bits);
>  	} else {
>  		bg->bg_contig_free_bits = 0;
>  	}
> @@ -1589,13 +1591,6 @@ static int ocfs2_cluster_group_search(struct inode *inode,
>  		 * of bits. */
>  		if (min_bits <= res->sr_bits)
>  			search = 0; /* success */
> -		else if (res->sr_bits) {
> -			/*
> -			 * Don't show bits which we'll be returning
> -			 * for allocation to the local alloc bitmap.
> -			 */
> -			ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
> -		}
>  	}
>  
>  	return search;
Heming Zhao March 27, 2024, 12:59 p.m. UTC | #2
On 3/27/24 19:05, Joseph Qi wrote:
> 
> 
> On 3/27/24 4:21 PM, Heming Zhao wrote:
>> After introducing gd->bg_contig_free_bits, the code path
>> 'ocfs2_cluster_group_search() => ocfs2_local_alloc_seen_free_bits()'
>> becomes death when all the gd->bg_contig_free_bits are set to the
>> correct value. This patch relocates ocfs2_local_alloc_seen_free_bits()
>> to a more appropriate location. (The new place being
>> ocfs2_block_group_set_bits().)
>>
>> In ocfs2_local_alloc_seen_free_bits(), the scope of the spin-lock has
>> been adjusted to reduce meaningless lock races. e.g: when userspace
>> creates & deletes 1 cluster_size files in parallel, acquiring the
>> spin-lock in ocfs2_local_alloc_seen_free_bits() is totally pointless and
>> impedes IO performance.
>>
>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>> ---
>>   fs/ocfs2/localalloc.c | 15 ++++++++-------
>>   fs/ocfs2/suballoc.c   |  9 ++-------
>>   2 files changed, 10 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
>> index c803c10dd97e..2391b96b8a3b 100644
>> --- a/fs/ocfs2/localalloc.c
>> +++ b/fs/ocfs2/localalloc.c
>> @@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
>>   void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
>>   				      unsigned int num_clusters)
>>   {
>> -	spin_lock(&osb->osb_lock);
>> -	if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
>> -	    osb->local_alloc_state == OCFS2_LA_THROTTLED)
>> -		if (num_clusters >= osb->local_alloc_default_bits) {
>> +	if (num_clusters >= osb->local_alloc_default_bits) {
>> +		spin_lock(&osb->osb_lock);
>> +		if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
>> +		    osb->local_alloc_state == OCFS2_LA_THROTTLED)
>>   			cancel_delayed_work(&osb->la_enable_wq);
>> -			osb->local_alloc_state = OCFS2_LA_ENABLED;
>> -		}
>> -	spin_unlock(&osb->osb_lock);
>> +
>> +		osb->local_alloc_state = OCFS2_LA_ENABLED;
>> +		spin_unlock(&osb->osb_lock);
>> +	}
> 
> This makes checking osb->local_alloc_default_bits outside osb_lock.
> 
> Joseph

I known, in my view, osb->local_alloc_default_bits doesn't change
after mounting the volume. So, using osb_lock to protect this
variable is pointless.

-Heming
Joseph Qi March 28, 2024, 1:58 a.m. UTC | #3
On 3/27/24 8:59 PM, Heming Zhao wrote:
> On 3/27/24 19:05, Joseph Qi wrote:
>>
>>
>> On 3/27/24 4:21 PM, Heming Zhao wrote:
>>> After introducing gd->bg_contig_free_bits, the code path
>>> 'ocfs2_cluster_group_search() => ocfs2_local_alloc_seen_free_bits()'
>>> becomes death when all the gd->bg_contig_free_bits are set to the
>>> correct value. This patch relocates ocfs2_local_alloc_seen_free_bits()
>>> to a more appropriate location. (The new place being
>>> ocfs2_block_group_set_bits().)
>>>
>>> In ocfs2_local_alloc_seen_free_bits(), the scope of the spin-lock has
>>> been adjusted to reduce meaningless lock races. e.g: when userspace
>>> creates & deletes 1 cluster_size files in parallel, acquiring the
>>> spin-lock in ocfs2_local_alloc_seen_free_bits() is totally pointless and
>>> impedes IO performance.
>>>
>>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>>> ---
>>>   fs/ocfs2/localalloc.c | 15 ++++++++-------
>>>   fs/ocfs2/suballoc.c   |  9 ++-------
>>>   2 files changed, 10 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
>>> index c803c10dd97e..2391b96b8a3b 100644
>>> --- a/fs/ocfs2/localalloc.c
>>> +++ b/fs/ocfs2/localalloc.c
>>> @@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
>>>   void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
>>>                         unsigned int num_clusters)
>>>   {
>>> -    spin_lock(&osb->osb_lock);
>>> -    if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
>>> -        osb->local_alloc_state == OCFS2_LA_THROTTLED)
>>> -        if (num_clusters >= osb->local_alloc_default_bits) {
>>> +    if (num_clusters >= osb->local_alloc_default_bits) {
>>> +        spin_lock(&osb->osb_lock);
>>> +        if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
>>> +            osb->local_alloc_state == OCFS2_LA_THROTTLED)
>>>               cancel_delayed_work(&osb->la_enable_wq);
>>> -            osb->local_alloc_state = OCFS2_LA_ENABLED;
>>> -        }
>>> -    spin_unlock(&osb->osb_lock);
>>> +
>>> +        osb->local_alloc_state = OCFS2_LA_ENABLED;

Seems the above should be along with cancel_delayed_work()?

>>> +        spin_unlock(&osb->osb_lock);
>>> +    }
>>
>> This makes checking osb->local_alloc_default_bits outside osb_lock.
>>
>> Joseph
> 
> I known, in my view, osb->local_alloc_default_bits doesn't change
> after mounting the volume. So, using osb_lock to protect this
> variable is pointless.
> 

Okay, it will be only set during fill super, so I think it's fine.
Heming Zhao March 28, 2024, 2:36 a.m. UTC | #4
On 3/28/24 09:58, Joseph Qi wrote:
> 
> 
> On 3/27/24 8:59 PM, Heming Zhao wrote:
>> On 3/27/24 19:05, Joseph Qi wrote:
>>>
>>>
>>> On 3/27/24 4:21 PM, Heming Zhao wrote:
>>>> After introducing gd->bg_contig_free_bits, the code path
>>>> 'ocfs2_cluster_group_search() => ocfs2_local_alloc_seen_free_bits()'
>>>> becomes death when all the gd->bg_contig_free_bits are set to the
>>>> correct value. This patch relocates ocfs2_local_alloc_seen_free_bits()
>>>> to a more appropriate location. (The new place being
>>>> ocfs2_block_group_set_bits().)
>>>>
>>>> In ocfs2_local_alloc_seen_free_bits(), the scope of the spin-lock has
>>>> been adjusted to reduce meaningless lock races. e.g: when userspace
>>>> creates & deletes 1 cluster_size files in parallel, acquiring the
>>>> spin-lock in ocfs2_local_alloc_seen_free_bits() is totally pointless and
>>>> impedes IO performance.
>>>>
>>>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>>>> ---
>>>>    fs/ocfs2/localalloc.c | 15 ++++++++-------
>>>>    fs/ocfs2/suballoc.c   |  9 ++-------
>>>>    2 files changed, 10 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
>>>> index c803c10dd97e..2391b96b8a3b 100644
>>>> --- a/fs/ocfs2/localalloc.c
>>>> +++ b/fs/ocfs2/localalloc.c
>>>> @@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
>>>>    void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
>>>>                          unsigned int num_clusters)
>>>>    {
>>>> -    spin_lock(&osb->osb_lock);
>>>> -    if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
>>>> -        osb->local_alloc_state == OCFS2_LA_THROTTLED)
>>>> -        if (num_clusters >= osb->local_alloc_default_bits) {
>>>> +    if (num_clusters >= osb->local_alloc_default_bits) {
>>>> +        spin_lock(&osb->osb_lock);
>>>> +        if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
>>>> +            osb->local_alloc_state == OCFS2_LA_THROTTLED)
>>>>                cancel_delayed_work(&osb->la_enable_wq);
>>>> -            osb->local_alloc_state = OCFS2_LA_ENABLED;
>>>> -        }
>>>> -    spin_unlock(&osb->osb_lock);
>>>> +
>>>> +        osb->local_alloc_state = OCFS2_LA_ENABLED;
> 
> Seems the above should be along with cancel_delayed_work()?

Good catch! My stupid mistake.

-Heming

> 
>>>> +        spin_unlock(&osb->osb_lock);
>>>> +    }
>>>
>>> This makes checking osb->local_alloc_default_bits outside osb_lock.
>>>
>>> Joseph
>>
>> I known, in my view, osb->local_alloc_default_bits doesn't change
>> after mounting the volume. So, using osb_lock to protect this
>> variable is pointless.
>>
> 
> Okay, it will be only set during fill super, so I think it's fine.
>
diff mbox series

Patch

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c803c10dd97e..2391b96b8a3b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -212,14 +212,15 @@  static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
 				      unsigned int num_clusters)
 {
-	spin_lock(&osb->osb_lock);
-	if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
-	    osb->local_alloc_state == OCFS2_LA_THROTTLED)
-		if (num_clusters >= osb->local_alloc_default_bits) {
+	if (num_clusters >= osb->local_alloc_default_bits) {
+		spin_lock(&osb->osb_lock);
+		if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
+		    osb->local_alloc_state == OCFS2_LA_THROTTLED)
 			cancel_delayed_work(&osb->la_enable_wq);
-			osb->local_alloc_state = OCFS2_LA_ENABLED;
-		}
-	spin_unlock(&osb->osb_lock);
+
+		osb->local_alloc_state = OCFS2_LA_ENABLED;
+		spin_unlock(&osb->osb_lock);
+	}
 }
 
 void ocfs2_la_enable_worker(struct work_struct *work)
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 6fd67c8da9fe..4163554b0383 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1374,6 +1374,7 @@  int ocfs2_block_group_set_bits(handle_t *handle,
 	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
 	unsigned int start = bit_off + num_bits;
 	u16 contig_bits;
+	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
 
 	/* All callers get the descriptor via
 	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
@@ -1423,6 +1424,7 @@  int ocfs2_block_group_set_bits(handle_t *handle,
 		if (contig_bits > max_contig_bits)
 			max_contig_bits = contig_bits;
 		bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+		ocfs2_local_alloc_seen_free_bits(osb, max_contig_bits);
 	} else {
 		bg->bg_contig_free_bits = 0;
 	}
@@ -1589,13 +1591,6 @@  static int ocfs2_cluster_group_search(struct inode *inode,
 		 * of bits. */
 		if (min_bits <= res->sr_bits)
 			search = 0; /* success */
-		else if (res->sr_bits) {
-			/*
-			 * Don't show bits which we'll be returning
-			 * for allocation to the local alloc bitmap.
-			 */
-			ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
-		}
 	}
 
 	return search;