diff mbox series

[03/10] iomap: add a IOMAP_F_ANON_WRITE flag

Message ID 20241219173954.22546-4-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/10] iomap: allow the file system to submit the writeback bios | expand

Commit Message

Christoph Hellwig Dec. 19, 2024, 5:39 p.m. UTC
Add a IOMAP_F_ANON_WRITE flag that indicates that the write I/O does not
have a target block assigned to it yet at iomap time and the file system
will do that in the bio submission handler, splitting the I/O as needed.

This is used to implement Zone Append based I/O for zoned XFS, where
splitting writes to the hardware limits and assigning a zone to them
happens just before sending the I/O off to the block layer, but could
also be useful for other things like compressed I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/iomap/design.rst |  4 ++++
 fs/iomap/buffered-io.c                     | 13 +++++++++----
 fs/iomap/direct-io.c                       |  6 ++++--
 include/linux/iomap.h                      |  7 +++++++
 4 files changed, 24 insertions(+), 6 deletions(-)

Comments

Darrick J. Wong Dec. 19, 2024, 6:02 p.m. UTC | #1
On Thu, Dec 19, 2024 at 05:39:08PM +0000, Christoph Hellwig wrote:
> Add a IOMAP_F_ANON_WRITE flag that indicates that the write I/O does not
> have a target block assigned to it yet at iomap time and the file system
> will do that in the bio submission handler, splitting the I/O as needed.
> 
> This is used to implement Zone Append based I/O for zoned XFS, where
> splitting writes to the hardware limits and assigning a zone to them
> happens just before sending the I/O off to the block layer, but could
> also be useful for other things like compressed I/O.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  Documentation/filesystems/iomap/design.rst |  4 ++++
>  fs/iomap/buffered-io.c                     | 13 +++++++++----
>  fs/iomap/direct-io.c                       |  6 ++++--
>  include/linux/iomap.h                      |  7 +++++++
>  4 files changed, 24 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst
> index b0d0188a095e..28ab3758c474 100644
> --- a/Documentation/filesystems/iomap/design.rst
> +++ b/Documentation/filesystems/iomap/design.rst
> @@ -246,6 +246,10 @@ The fields are as follows:
>     * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
>       be set by the filesystem for its own purposes.
>  
> +   * **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
> +     block assigned to it yet and the file system will do that in the bio
> +     submission handler, splitting the I/O as needed.
> +
>     These flags can be set by iomap itself during file operations.
>     The filesystem should supply an ``->iomap_end`` function if it needs
>     to observe these flags:
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 3176dc996fb7..8c18fb2a82e0 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -1691,10 +1691,14 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
>  	 * failure happened so that the file system end I/O handler gets called
>  	 * to clean up.
>  	 */
> -	if (wpc->ops->submit_ioend)
> +	if (wpc->ops->submit_ioend) {
>  		error = wpc->ops->submit_ioend(wpc, error);
> -	else if (!error)
> -		submit_bio(&wpc->ioend->io_bio);
> +	} else {
> +		if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE))
> +			error = -EIO;
> +		if (!error)
> +			submit_bio(&wpc->ioend->io_bio);
> +	}
>  
>  	if (error) {
>  		wpc->ioend->io_bio.bi_status = errno_to_blk_status(error);
> @@ -1744,7 +1748,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
>  		return false;
>  	if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
>  		return false;
> -	if (iomap_sector(&wpc->iomap, pos) !=
> +	if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) &&
> +	    iomap_sector(&wpc->iomap, pos) !=
>  	    bio_end_sector(&wpc->ioend->io_bio))
>  		return false;
>  	/*
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index b521eb15759e..641649a04614 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -81,10 +81,12 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
>  		WRITE_ONCE(iocb->private, bio);
>  	}
>  
> -	if (dio->dops && dio->dops->submit_io)
> +	if (dio->dops && dio->dops->submit_io) {
>  		dio->dops->submit_io(iter, bio, pos);
> -	else
> +	} else {
> +		WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE);
>  		submit_bio(bio);

Do we need to error the bio instead of submitting it if
IOMAP_F_ANON_WRITE is set here?  Or are we relying on the block
layer/device will reject an IO to U64_MAX and produce the EIO for us?

If yes, then that's acceptagble to me
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> +	}
>  }
>  
>  ssize_t iomap_dio_complete(struct iomap_dio *dio)
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 31857d4750a9..36a7298b6cea 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -56,6 +56,10 @@ struct vm_fault;
>   *
>   * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
>   * never be merged with the mapping before it.
> + *
> + * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block
> + * assigned to it yet and the file system will do that in the bio submission
> + * handler, splitting the I/O as needed.
>   */
>  #define IOMAP_F_NEW		(1U << 0)
>  #define IOMAP_F_DIRTY		(1U << 1)
> @@ -68,6 +72,7 @@ struct vm_fault;
>  #endif /* CONFIG_BUFFER_HEAD */
>  #define IOMAP_F_XATTR		(1U << 5)
>  #define IOMAP_F_BOUNDARY	(1U << 6)
> +#define IOMAP_F_ANON_WRITE	(1U << 7)
>  
>  /*
>   * Flags set by the core iomap code during operations:
> @@ -111,6 +116,8 @@ struct iomap {
>  
>  static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
>  {
> +	if (iomap->flags & IOMAP_F_ANON_WRITE)
> +		return U64_MAX; /* invalid */
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> -- 
> 2.45.2
> 
>
Christoph Hellwig Dec. 19, 2024, 6:24 p.m. UTC | #2
On Thu, Dec 19, 2024 at 10:02:08AM -0800, Darrick J. Wong wrote:
> Do we need to error the bio instead of submitting it if
> IOMAP_F_ANON_WRITE is set here?  Or are we relying on the block
> layer/device will reject an IO to U64_MAX and produce the EIO for us?

I'd rather not add an error return where we previously didn't for
catching a grave programmer error.  I wonder if I should also drop
the error handling in the writeback submission to be consistent?
diff mbox series

Patch

diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst
index b0d0188a095e..28ab3758c474 100644
--- a/Documentation/filesystems/iomap/design.rst
+++ b/Documentation/filesystems/iomap/design.rst
@@ -246,6 +246,10 @@  The fields are as follows:
    * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
      be set by the filesystem for its own purposes.
 
+   * **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
+     block assigned to it yet and the file system will do that in the bio
+     submission handler, splitting the I/O as needed.
+
    These flags can be set by iomap itself during file operations.
    The filesystem should supply an ``->iomap_end`` function if it needs
    to observe these flags:
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 3176dc996fb7..8c18fb2a82e0 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1691,10 +1691,14 @@  static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
 	 * failure happened so that the file system end I/O handler gets called
 	 * to clean up.
 	 */
-	if (wpc->ops->submit_ioend)
+	if (wpc->ops->submit_ioend) {
 		error = wpc->ops->submit_ioend(wpc, error);
-	else if (!error)
-		submit_bio(&wpc->ioend->io_bio);
+	} else {
+		if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE))
+			error = -EIO;
+		if (!error)
+			submit_bio(&wpc->ioend->io_bio);
+	}
 
 	if (error) {
 		wpc->ioend->io_bio.bi_status = errno_to_blk_status(error);
@@ -1744,7 +1748,8 @@  static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,
 		return false;
 	if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
 		return false;
-	if (iomap_sector(&wpc->iomap, pos) !=
+	if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) &&
+	    iomap_sector(&wpc->iomap, pos) !=
 	    bio_end_sector(&wpc->ioend->io_bio))
 		return false;
 	/*
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b521eb15759e..641649a04614 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -81,10 +81,12 @@  static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 		WRITE_ONCE(iocb->private, bio);
 	}
 
-	if (dio->dops && dio->dops->submit_io)
+	if (dio->dops && dio->dops->submit_io) {
 		dio->dops->submit_io(iter, bio, pos);
-	else
+	} else {
+		WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE);
 		submit_bio(bio);
+	}
 }
 
 ssize_t iomap_dio_complete(struct iomap_dio *dio)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 31857d4750a9..36a7298b6cea 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -56,6 +56,10 @@  struct vm_fault;
  *
  * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
  * never be merged with the mapping before it.
+ *
+ * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block
+ * assigned to it yet and the file system will do that in the bio submission
+ * handler, splitting the I/O as needed.
  */
 #define IOMAP_F_NEW		(1U << 0)
 #define IOMAP_F_DIRTY		(1U << 1)
@@ -68,6 +72,7 @@  struct vm_fault;
 #endif /* CONFIG_BUFFER_HEAD */
 #define IOMAP_F_XATTR		(1U << 5)
 #define IOMAP_F_BOUNDARY	(1U << 6)
+#define IOMAP_F_ANON_WRITE	(1U << 7)
 
 /*
  * Flags set by the core iomap code during operations:
@@ -111,6 +116,8 @@  struct iomap {
 
 static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
 {
+	if (iomap->flags & IOMAP_F_ANON_WRITE)
+		return U64_MAX; /* invalid */
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }