diff mbox series

[v2,3/6] iomap: advance the ifs allocation if we have more than one blocks per folio

Message ID 20240812121159.3775074-4-yi.zhang@huaweicloud.com (mailing list archive)
State Accepted, archived
Headers show
Series iomap: some minor non-critical fixes and improvements when block size < folio size | expand

Commit Message

Zhang Yi Aug. 12, 2024, 12:11 p.m. UTC
From: Zhang Yi <yi.zhang@huawei.com>

Now we allocate ifs if i_blocks_per_folio is larger than one when
writing back dirty folios in iomap_writepage_map(), so we don't attach
an ifs after buffer write to an entire folio until it starts writing
back, if we partial truncate that folio, iomap_invalidate_folio() can't
clear counterpart block's dirty bit as expected. Fix this by advance the
ifs allocation to __iomap_write_begin().

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/iomap/buffered-io.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

Comments

yangerkun Aug. 12, 2024, 12:47 p.m. UTC | #1
在 2024/8/12 20:11, Zhang Yi 写道:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> Now we allocate ifs if i_blocks_per_folio is larger than one when
> writing back dirty folios in iomap_writepage_map(), so we don't attach
> an ifs after buffer write to an entire folio until it starts writing
> back, if we partial truncate that folio, iomap_invalidate_folio() can't
> clear counterpart block's dirty bit as expected. Fix this by advance the
> ifs allocation to __iomap_write_begin().
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
>   fs/iomap/buffered-io.c | 17 ++++++++++++-----
>   1 file changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 763deabe8331..79031b7517e5 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -699,6 +699,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
>   	size_t from = offset_in_folio(folio, pos), to = from + len;
>   	size_t poff, plen;
>   
> +	if (nr_blocks > 1) {
> +		ifs = ifs_alloc(iter->inode, folio, iter->flags);
> +		if ((iter->flags & IOMAP_NOWAIT) && !ifs)
> +			return -EAGAIN;
> +	}
> +
>   	/*
>   	 * If the write or zeroing completely overlaps the current folio, then
>   	 * entire folio will be dirtied so there is no need for

The comments upper need change too.


> @@ -710,10 +716,6 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
>   	    pos + len >= folio_pos(folio) + folio_size(folio))
>   		return 0;
>   
> -	ifs = ifs_alloc(iter->inode, folio, iter->flags);
> -	if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
> -		return -EAGAIN;
> -
>   	if (folio_test_uptodate(folio))
>   		return 0;
>   
> @@ -1928,7 +1930,12 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>   	WARN_ON_ONCE(end_pos <= pos);
>   
>   	if (i_blocks_per_folio(inode, folio) > 1) {
> -		if (!ifs) {
> +		/*
> +		 * This should not happen since we always allocate ifs in
> +		 * iomap_folio_mkwrite_iter() and there is more than one
> +		 * blocks per folio in __iomap_write_begin().
> +		 */
> +		if (WARN_ON_ONCE(!ifs)) {
>   			ifs = ifs_alloc(inode, folio, 0);
>   			iomap_set_range_dirty(folio, 0, end_pos - pos);
>   		}
Zhang Yi Aug. 13, 2024, 2:21 a.m. UTC | #2
On 2024/8/12 20:47, yangerkun wrote:
> 
> 
> 在 2024/8/12 20:11, Zhang Yi 写道:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> Now we allocate ifs if i_blocks_per_folio is larger than one when
>> writing back dirty folios in iomap_writepage_map(), so we don't attach
>> an ifs after buffer write to an entire folio until it starts writing
>> back, if we partial truncate that folio, iomap_invalidate_folio() can't
>> clear counterpart block's dirty bit as expected. Fix this by advance the
>> ifs allocation to __iomap_write_begin().
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>> ---
>>   fs/iomap/buffered-io.c | 17 ++++++++++++-----
>>   1 file changed, 12 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
>> index 763deabe8331..79031b7517e5 100644
>> --- a/fs/iomap/buffered-io.c
>> +++ b/fs/iomap/buffered-io.c
>> @@ -699,6 +699,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
>>       size_t from = offset_in_folio(folio, pos), to = from + len;
>>       size_t poff, plen;
>>   +    if (nr_blocks > 1) {
>> +        ifs = ifs_alloc(iter->inode, folio, iter->flags);
>> +        if ((iter->flags & IOMAP_NOWAIT) && !ifs)
>> +            return -EAGAIN;
>> +    }
>> +
>>       /*
>>        * If the write or zeroing completely overlaps the current folio, then
>>        * entire folio will be dirtied so there is no need for
> 
> The comments upper need change too.

Will update as well, thanks for pointing this out.

Thanks,
Yi.

> 
> 
>> @@ -710,10 +716,6 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
>>           pos + len >= folio_pos(folio) + folio_size(folio))
>>           return 0;
>>   -    ifs = ifs_alloc(iter->inode, folio, iter->flags);
>> -    if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
>> -        return -EAGAIN;
>> -
>>       if (folio_test_uptodate(folio))
>>           return 0;
>>   @@ -1928,7 +1930,12 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>>       WARN_ON_ONCE(end_pos <= pos);
>>         if (i_blocks_per_folio(inode, folio) > 1) {
>> -        if (!ifs) {
>> +        /*
>> +         * This should not happen since we always allocate ifs in
>> +         * iomap_folio_mkwrite_iter() and there is more than one
>> +         * blocks per folio in __iomap_write_begin().
>> +         */
>> +        if (WARN_ON_ONCE(!ifs)) {
>>               ifs = ifs_alloc(inode, folio, 0);
>>               iomap_set_range_dirty(folio, 0, end_pos - pos);
>>           }
Christoph Hellwig Aug. 14, 2024, 5:32 a.m. UTC | #3
On Mon, Aug 12, 2024 at 08:11:56PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> Now we allocate ifs if i_blocks_per_folio is larger than one when
> writing back dirty folios in iomap_writepage_map(), so we don't attach
> an ifs after buffer write to an entire folio until it starts writing
> back, if we partial truncate that folio, iomap_invalidate_folio() can't
> clear counterpart block's dirty bit as expected. Fix this by advance the
> ifs allocation to __iomap_write_begin().

Wouldn't it make more sense to only allocate the ifѕ in
iomap_invalidate_folio when it actually is needed?

Also do you have a reproducer for this?
Zhang Yi Aug. 14, 2024, 7:08 a.m. UTC | #4
On 2024/8/14 13:32, Christoph Hellwig wrote:
> On Mon, Aug 12, 2024 at 08:11:56PM +0800, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> Now we allocate ifs if i_blocks_per_folio is larger than one when
>> writing back dirty folios in iomap_writepage_map(), so we don't attach
>> an ifs after buffer write to an entire folio until it starts writing
>> back, if we partial truncate that folio, iomap_invalidate_folio() can't
>> clear counterpart block's dirty bit as expected. Fix this by advance the
>> ifs allocation to __iomap_write_begin().
> 
> Wouldn't it make more sense to only allocate the ifѕ in
> iomap_invalidate_folio when it actually is needed?
> 

Therefore, you mean current strategy of allocating ifs is to try to delay
the allocation time as much as possible? The advantage is that it could
avoid some unnecessary allocation operations if the whole folio are
invalidated before write back. right?

> Also do you have a reproducer for this?
> 

This mistake doesn't case any real problem now, because once the folio
has been partial truncated, the counterpart range becomes a hole, although
the ifs dirty bit is not cleared, iomap_writepage_map_blocks() can deal
with it and won't cause any problem. Hence I don't have reproducer for
this.

Thanks,
Yi.
Christoph Hellwig Aug. 15, 2024, 6 a.m. UTC | #5
On Wed, Aug 14, 2024 at 03:08:25PM +0800, Zhang Yi wrote:
> > iomap_invalidate_folio when it actually is needed?
> > 
> 
> Therefore, you mean current strategy of allocating ifs is to try to delay
> the allocation time as much as possible?

Yes.

> The advantage is that it could
> avoid some unnecessary allocation operations if the whole folio are
> invalidated before write back. right?

Yes.  And hopefully we can also get to the point where we don't need
to actually allocate it for writeback.  I've been wanting to do that
for a while but never got it.
Zhang Yi Aug. 16, 2024, 1:44 a.m. UTC | #6
On 2024/8/15 14:00, Christoph Hellwig wrote:
> On Wed, Aug 14, 2024 at 03:08:25PM +0800, Zhang Yi wrote:
>>> iomap_invalidate_folio when it actually is needed?
>>>
>>
>> Therefore, you mean current strategy of allocating ifs is to try to delay
>> the allocation time as much as possible?
> 
> Yes.
> 
>> The advantage is that it could
>> avoid some unnecessary allocation operations if the whole folio are
>> invalidated before write back. right?
> 
> Yes.  And hopefully we can also get to the point where we don't need
> to actually allocate it for writeback.  I've been wanting to do that
> for a while but never got it.
> 

Yeah, this sounds like a good idea.

Thanks,
Yi.
Zhang Yi Aug. 17, 2024, 4:27 a.m. UTC | #7
On 2024/8/14 13:32, Christoph Hellwig wrote:
> On Mon, Aug 12, 2024 at 08:11:56PM +0800, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> Now we allocate ifs if i_blocks_per_folio is larger than one when
>> writing back dirty folios in iomap_writepage_map(), so we don't attach
>> an ifs after buffer write to an entire folio until it starts writing
>> back, if we partial truncate that folio, iomap_invalidate_folio() can't
>> clear counterpart block's dirty bit as expected. Fix this by advance the
>> ifs allocation to __iomap_write_begin().
> 
> Wouldn't it make more sense to only allocate the ifѕ in
> iomap_invalidate_folio when it actually is needed?
> 

I forget to mention that truncate_inode_partial_folio() call
folio_invalidate()->iomap_invalidate_folio() only when the folio has
private, if the folio doesn't has ifs, the iomap_invalidate_folio()
would nerver be called, hence allocate the ifs in
iomap_invalidate_folio() is useless.

In my opinion, one solution is change to always call folio_invalidate()
in truncate_inode_partial_folio(), all callbacks should handle the no
private case. Another solution is add a magic (a fake ifs) to
folio->private and then convert it to a real one in
iomap_invalidate_folio(), any thoughts?

Thanks,
Yi.
Matthew Wilcox Aug. 17, 2024, 4:42 a.m. UTC | #8
On Sat, Aug 17, 2024 at 12:27:49PM +0800, Zhang Yi wrote:
> On 2024/8/14 13:32, Christoph Hellwig wrote:
> > On Mon, Aug 12, 2024 at 08:11:56PM +0800, Zhang Yi wrote:
> >> From: Zhang Yi <yi.zhang@huawei.com>
> >>
> >> Now we allocate ifs if i_blocks_per_folio is larger than one when
> >> writing back dirty folios in iomap_writepage_map(), so we don't attach
> >> an ifs after buffer write to an entire folio until it starts writing
> >> back, if we partial truncate that folio, iomap_invalidate_folio() can't
> >> clear counterpart block's dirty bit as expected. Fix this by advance the
> >> ifs allocation to __iomap_write_begin().
> > 
> > Wouldn't it make more sense to only allocate the ifѕ in
> > iomap_invalidate_folio when it actually is needed?
> > 
> 
> I forget to mention that truncate_inode_partial_folio() call
> folio_invalidate()->iomap_invalidate_folio() only when the folio has
> private, if the folio doesn't has ifs, the iomap_invalidate_folio()
> would nerver be called, hence allocate the ifs in
> iomap_invalidate_folio() is useless.
> 
> In my opinion, one solution is change to always call folio_invalidate()
> in truncate_inode_partial_folio(), all callbacks should handle the no
> private case. Another solution is add a magic (a fake ifs) to
> folio->private and then convert it to a real one in
> iomap_invalidate_folio(), any thoughts?

Why do we need iomap_invalidate_folio() to be called if there is no ifs?
Even today, all it does is call ifs_free() if we're freeing the entire
folio (which is done by truncate_cleanup_folio() and not by
truncate_inode_partial_folio().
Zhang Yi Aug. 17, 2024, 6:16 a.m. UTC | #9
On 2024/8/17 12:42, Matthew Wilcox wrote:
> On Sat, Aug 17, 2024 at 12:27:49PM +0800, Zhang Yi wrote:
>> On 2024/8/14 13:32, Christoph Hellwig wrote:
>>> On Mon, Aug 12, 2024 at 08:11:56PM +0800, Zhang Yi wrote:
>>>> From: Zhang Yi <yi.zhang@huawei.com>
>>>>
>>>> Now we allocate ifs if i_blocks_per_folio is larger than one when
>>>> writing back dirty folios in iomap_writepage_map(), so we don't attach
>>>> an ifs after buffer write to an entire folio until it starts writing
>>>> back, if we partial truncate that folio, iomap_invalidate_folio() can't
>>>> clear counterpart block's dirty bit as expected. Fix this by advance the
>>>> ifs allocation to __iomap_write_begin().
>>>
>>> Wouldn't it make more sense to only allocate the ifѕ in
>>> iomap_invalidate_folio when it actually is needed?
>>>
>>
>> I forget to mention that truncate_inode_partial_folio() call
>> folio_invalidate()->iomap_invalidate_folio() only when the folio has
>> private, if the folio doesn't has ifs, the iomap_invalidate_folio()
>> would nerver be called, hence allocate the ifs in
>> iomap_invalidate_folio() is useless.
>>
>> In my opinion, one solution is change to always call folio_invalidate()
>> in truncate_inode_partial_folio(), all callbacks should handle the no
>> private case. Another solution is add a magic (a fake ifs) to
>> folio->private and then convert it to a real one in
>> iomap_invalidate_folio(), any thoughts?
> 
> Why do we need iomap_invalidate_folio() to be called if there is no ifs?
> Even today, all it does is call ifs_free() if we're freeing the entire
> folio (which is done by truncate_cleanup_folio() and not by
> truncate_inode_partial_folio().
> 
Please see patch 2, if we truncate a partial folio (through punch hole or
truncate) on a filesystem with blocksize < folio size, it will left over
dirty bits of truncated/punched blocks, and this will lead to a hole with
dirty bit set but without any block allocated/reserved, this is not
correct.

Hence we also need to call iomap_invalidate_folio() by
truncate_inode_partial_folio() and clear partial folio of the counterpart
dirty bits. But now we don't allocate ifs in __iomap_write_begin() when
writing an entire folio, so it doesn't has ifs, but we still can
partial truncate this folio, we should allocate ifs for it and update
the dirty bits on it.

Thanks,
Yi.
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 763deabe8331..79031b7517e5 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -699,6 +699,12 @@  static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 	size_t from = offset_in_folio(folio, pos), to = from + len;
 	size_t poff, plen;
 
+	if (nr_blocks > 1) {
+		ifs = ifs_alloc(iter->inode, folio, iter->flags);
+		if ((iter->flags & IOMAP_NOWAIT) && !ifs)
+			return -EAGAIN;
+	}
+
 	/*
 	 * If the write or zeroing completely overlaps the current folio, then
 	 * entire folio will be dirtied so there is no need for
@@ -710,10 +716,6 @@  static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 	    pos + len >= folio_pos(folio) + folio_size(folio))
 		return 0;
 
-	ifs = ifs_alloc(iter->inode, folio, iter->flags);
-	if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
-		return -EAGAIN;
-
 	if (folio_test_uptodate(folio))
 		return 0;
 
@@ -1928,7 +1930,12 @@  static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	WARN_ON_ONCE(end_pos <= pos);
 
 	if (i_blocks_per_folio(inode, folio) > 1) {
-		if (!ifs) {
+		/*
+		 * This should not happen since we always allocate ifs in
+		 * iomap_folio_mkwrite_iter() and there is more than one
+		 * blocks per folio in __iomap_write_begin().
+		 */
+		if (WARN_ON_ONCE(!ifs)) {
 			ifs = ifs_alloc(inode, folio, 0);
 			iomap_set_range_dirty(folio, 0, end_pos - pos);
 		}