diff mbox series

[04/10] iomap: zeroing already holds invalidate_lock in iomap_file_buffered_write_punch_delalloc

Message ID 20240827051028.1751933-5-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/10] iomap: handle a post-direct I/O invalidate race in iomap_write_delalloc_release | expand

Commit Message

Christoph Hellwig Aug. 27, 2024, 5:09 a.m. UTC
All callers of iomap_zero_range already hold invalidate_lock, so we can't
take it again in iomap_file_buffered_write_punch_delalloc.

Use the passed in flags argument to detect if we're called from a zeroing
operation and don't take the lock again in this case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

Comments

Darrick J. Wong Aug. 27, 2024, 4:28 p.m. UTC | #1
On Tue, Aug 27, 2024 at 07:09:51AM +0200, Christoph Hellwig wrote:
> All callers of iomap_zero_range already hold invalidate_lock, so we can't
> take it again in iomap_file_buffered_write_punch_delalloc.

What about the xfs_zero_range call in xfs_file_write_checks?  AFAICT we
don't hold the invalidate lock there.  Did I misread that?

Also, would nested takings of the invalidate lock cause a livelock?  Or
is this actually quite broken now?

--D

> Use the passed in flags argument to detect if we're called from a zeroing
> operation and don't take the lock again in this case.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/iomap/buffered-io.c | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 34de9f58794ad5..574ca413516443 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -1198,8 +1198,8 @@ static int iomap_write_delalloc_scan(struct inode *inode,
>   * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
>   * the code to subtle off-by-one bugs....
>   */
> -static int iomap_write_delalloc_release(struct inode *inode,
> -		loff_t start_byte, loff_t end_byte, iomap_punch_t punch)
> +static int iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
> +		loff_t end_byte, unsigned flags, iomap_punch_t punch)
>  {
>  	loff_t punch_start_byte = start_byte;
>  	loff_t scan_end_byte = min(i_size_read(inode), end_byte);
> @@ -1210,8 +1210,13 @@ static int iomap_write_delalloc_release(struct inode *inode,
>  	 * folios and dirtying them via ->page_mkwrite whilst we walk the
>  	 * cache and perform delalloc extent removal. Failing to do this can
>  	 * leave dirty pages with no space reservation in the cache.
> +	 *
> +	 * For zeroing operations the callers already hold invalidate_lock.
>  	 */
> -	filemap_invalidate_lock(inode->i_mapping);
> +	if (flags & IOMAP_ZERO)
> +		rwsem_assert_held_write(&inode->i_mapping->invalidate_lock);
> +	else
> +		filemap_invalidate_lock(inode->i_mapping);
>  	while (start_byte < scan_end_byte) {
>  		loff_t		data_end;
>  
> @@ -1264,7 +1269,8 @@ static int iomap_write_delalloc_release(struct inode *inode,
>  		error = punch(inode, punch_start_byte,
>  				end_byte - punch_start_byte);
>  out_unlock:
> -	filemap_invalidate_unlock(inode->i_mapping);
> +	if (!(flags & IOMAP_ZERO))
> +		filemap_invalidate_unlock(inode->i_mapping);
>  	return error;
>  }
>  
> @@ -1328,7 +1334,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
>  	if (start_byte >= end_byte)
>  		return 0;
>  
> -	return iomap_write_delalloc_release(inode, start_byte, end_byte,
> +	return iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
>  					punch);
>  }
>  EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
> -- 
> 2.43.0
> 
>
Christoph Hellwig Aug. 28, 2024, 4:51 a.m. UTC | #2
On Tue, Aug 27, 2024 at 09:28:04AM -0700, Darrick J. Wong wrote:
> On Tue, Aug 27, 2024 at 07:09:51AM +0200, Christoph Hellwig wrote:
> > All callers of iomap_zero_range already hold invalidate_lock, so we can't
> > take it again in iomap_file_buffered_write_punch_delalloc.
> 
> What about the xfs_zero_range call in xfs_file_write_checks?  AFAICT we
> don't hold the invalidate lock there.  Did I misread that?

No, I think you're right.  My testing just never managed to hit a short
zero while doing the write prep.

I guess I'll need to do something more complicated than the zero flag
then. I initially added a new flag just for that and then (wrongly as you
pointed out) that I don't need it after all.

> Also, would nested takings of the invalidate lock cause a livelock?  Or
> is this actually quite broken now?

It is a cold, hard deadlock.
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 34de9f58794ad5..574ca413516443 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1198,8 +1198,8 @@  static int iomap_write_delalloc_scan(struct inode *inode,
  * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
  * the code to subtle off-by-one bugs....
  */
-static int iomap_write_delalloc_release(struct inode *inode,
-		loff_t start_byte, loff_t end_byte, iomap_punch_t punch)
+static int iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
+		loff_t end_byte, unsigned flags, iomap_punch_t punch)
 {
 	loff_t punch_start_byte = start_byte;
 	loff_t scan_end_byte = min(i_size_read(inode), end_byte);
@@ -1210,8 +1210,13 @@  static int iomap_write_delalloc_release(struct inode *inode,
 	 * folios and dirtying them via ->page_mkwrite whilst we walk the
 	 * cache and perform delalloc extent removal. Failing to do this can
 	 * leave dirty pages with no space reservation in the cache.
+	 *
+	 * For zeroing operations the callers already hold invalidate_lock.
 	 */
-	filemap_invalidate_lock(inode->i_mapping);
+	if (flags & IOMAP_ZERO)
+		rwsem_assert_held_write(&inode->i_mapping->invalidate_lock);
+	else
+		filemap_invalidate_lock(inode->i_mapping);
 	while (start_byte < scan_end_byte) {
 		loff_t		data_end;
 
@@ -1264,7 +1269,8 @@  static int iomap_write_delalloc_release(struct inode *inode,
 		error = punch(inode, punch_start_byte,
 				end_byte - punch_start_byte);
 out_unlock:
-	filemap_invalidate_unlock(inode->i_mapping);
+	if (!(flags & IOMAP_ZERO))
+		filemap_invalidate_unlock(inode->i_mapping);
 	return error;
 }
 
@@ -1328,7 +1334,7 @@  int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 	if (start_byte >= end_byte)
 		return 0;
 
-	return iomap_write_delalloc_release(inode, start_byte, end_byte,
+	return iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
 					punch);
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);