diff mbox series

[28/43] xfs: implement direct writes to zoned RT devices

Message ID 20241211085636.1380516-29-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/43] xfs: constify feature checks | expand

Commit Message

Christoph Hellwig Dec. 11, 2024, 8:54 a.m. UTC
Direct writes to zoned RT devices are extremely simple.  After taking the
block reservation before acquiring the iolock, the iomap direct I/O calls
into ->iomap_begin which will return a "fake" iomap for the entire
requested range.  The actual block allocation is then done from the
submit_io handler using code shared with the buffered I/O path.

The iomap_dio_ops set the bio_set to the (iomap) ioend one and initialize
the embedded ioend, which allows reusing the existing ioend based buffered
I/O completion path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c  |  6 ++--
 fs/xfs/xfs_aops.h  |  3 +-
 fs/xfs/xfs_file.c  | 80 +++++++++++++++++++++++++++++++++++++++++-----
 fs/xfs/xfs_iomap.c | 54 +++++++++++++++++++++++++++++++
 fs/xfs/xfs_iomap.h |  1 +
 5 files changed, 133 insertions(+), 11 deletions(-)

Comments

Darrick J. Wong Dec. 13, 2024, 10:39 p.m. UTC | #1
On Wed, Dec 11, 2024 at 09:54:53AM +0100, Christoph Hellwig wrote:
> Direct writes to zoned RT devices are extremely simple.  After taking the
> block reservation before acquiring the iolock, the iomap direct I/O calls
> into ->iomap_begin which will return a "fake" iomap for the entire
> requested range.  The actual block allocation is then done from the
> submit_io handler using code shared with the buffered I/O path.
> 
> The iomap_dio_ops set the bio_set to the (iomap) ioend one and initialize
> the embedded ioend, which allows reusing the existing ioend based buffered
> I/O completion path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Yeah that is a lot simpler. :)
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> ---
>  fs/xfs/xfs_aops.c  |  6 ++--
>  fs/xfs/xfs_aops.h  |  3 +-
>  fs/xfs/xfs_file.c  | 80 +++++++++++++++++++++++++++++++++++++++++-----
>  fs/xfs/xfs_iomap.c | 54 +++++++++++++++++++++++++++++++
>  fs/xfs/xfs_iomap.h |  1 +
>  5 files changed, 133 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 67392413216b..a3ca14e811fd 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -137,7 +137,9 @@ xfs_end_ioend(
>  	else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
>  		error = xfs_iomap_write_unwritten(ip, offset, size, false);
>  
> -	if (!error && xfs_ioend_is_append(ioend))
> +	if (!error &&
> +	    !(ioend->io_flags & IOMAP_IOEND_DIRECT) &&
> +	    xfs_ioend_is_append(ioend))
>  		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
>  done:
>  	iomap_finish_ioends(ioend, error);
> @@ -182,7 +184,7 @@ xfs_end_io(
>  	}
>  }
>  
> -static void
> +void
>  xfs_end_bio(
>  	struct bio		*bio)
>  {
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index e0bd68419764..5a7a0f1a0b49 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -9,6 +9,7 @@
>  extern const struct address_space_operations xfs_address_space_operations;
>  extern const struct address_space_operations xfs_dax_aops;
>  
> -int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
> +void xfs_end_bio(struct bio *bio);
>  
>  #endif /* __XFS_AOPS_H__ */
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 195cf60a81b0..1b39000b7c62 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -25,6 +25,7 @@
>  #include "xfs_iomap.h"
>  #include "xfs_reflink.h"
>  #include "xfs_file.h"
> +#include "xfs_aops.h"
>  #include "xfs_zone_alloc.h"
>  
>  #include <linux/dax.h>
> @@ -548,6 +549,9 @@ xfs_dio_write_end_io(
>  	loff_t			offset = iocb->ki_pos;
>  	unsigned int		nofs_flag;
>  
> +	ASSERT(!xfs_is_zoned_inode(ip) ||
> +	       !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
> +
>  	trace_xfs_end_io_direct_write(ip, offset, size);
>  
>  	if (xfs_is_shutdown(ip->i_mount))
> @@ -627,14 +631,51 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
>  	.end_io		= xfs_dio_write_end_io,
>  };
>  
> +static void
> +xfs_dio_zoned_submit_io(
> +	const struct iomap_iter	*iter,
> +	struct bio		*bio,
> +	loff_t			file_offset)
> +{
> +	struct xfs_mount	*mp = XFS_I(iter->inode)->i_mount;
> +	struct xfs_zone_alloc_ctx *ac = iter->private;
> +	xfs_filblks_t		count_fsb;
> +	struct iomap_ioend	*ioend;
> +
> +	count_fsb = XFS_B_TO_FSB(mp, bio->bi_iter.bi_size);
> +	if (count_fsb > ac->reserved_blocks) {
> +		xfs_err(mp,
> +"allocation (%lld) larger than reservation (%lld).",
> +			count_fsb, ac->reserved_blocks);
> +		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> +		bio_io_error(bio);
> +		return;
> +	}
> +	ac->reserved_blocks -= count_fsb;
> +
> +	bio->bi_end_io = xfs_end_bio;
> +	ioend = iomap_init_ioend(iter->inode, bio, file_offset,
> +			IOMAP_IOEND_DIRECT);
> +	xfs_zone_alloc_and_submit(ioend, &ac->open_zone);
> +}
> +
> +static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
> +	.bio_set	= &iomap_ioend_bioset,
> +	.submit_io	= xfs_dio_zoned_submit_io,
> +	.end_io		= xfs_dio_write_end_io,
> +};
> +
>  /*
> - * Handle block aligned direct I/O writes
> + * Handle block aligned direct I/O writes.
>   */
>  static noinline ssize_t
>  xfs_file_dio_write_aligned(
>  	struct xfs_inode	*ip,
>  	struct kiocb		*iocb,
> -	struct iov_iter		*from)
> +	struct iov_iter		*from,
> +	const struct iomap_ops	*ops,
> +	const struct iomap_dio_ops *dops,
> +	struct xfs_zone_alloc_ctx *ac)
>  {
>  	unsigned int		iolock = XFS_IOLOCK_SHARED;
>  	ssize_t			ret;
> @@ -642,7 +683,7 @@ xfs_file_dio_write_aligned(
>  	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
>  	if (ret)
>  		return ret;
> -	ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
> +	ret = xfs_file_write_checks(iocb, from, &iolock, ac);
>  	if (ret)
>  		goto out_unlock;
>  
> @@ -656,11 +697,31 @@ xfs_file_dio_write_aligned(
>  		iolock = XFS_IOLOCK_SHARED;
>  	}
>  	trace_xfs_file_direct_write(iocb, from);
> -	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
> -			   &xfs_dio_write_ops, 0, NULL, 0);
> +	ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
>  out_unlock:
> -	if (iolock)
> -		xfs_iunlock(ip, iolock);
> +	xfs_iunlock(ip, iolock);
> +	return ret;
> +}
> +
> +/*
> + * Handle block aligned direct I/O writes to zoned devices.
> + */
> +static noinline ssize_t
> +xfs_file_dio_write_zoned(
> +	struct xfs_inode	*ip,
> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct xfs_zone_alloc_ctx ac = { };
> +	ssize_t			ret;
> +
> +	ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
> +	if (ret < 0)
> +		return ret;
> +	ret = xfs_file_dio_write_aligned(ip, iocb, from,
> +			&xfs_zoned_direct_write_iomap_ops,
> +			&xfs_dio_zoned_write_ops, &ac);
> +	xfs_zoned_space_unreserve(ip, &ac);
>  	return ret;
>  }
>  
> @@ -777,7 +838,10 @@ xfs_file_dio_write(
>  	    (xfs_is_always_cow_inode(ip) &&
>  	     (iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
>  		return xfs_file_dio_write_unaligned(ip, iocb, from);
> -	return xfs_file_dio_write_aligned(ip, iocb, from);
> +	if (xfs_is_zoned_inode(ip))
> +		return xfs_file_dio_write_zoned(ip, iocb, from);
> +	return xfs_file_dio_write_aligned(ip, iocb, from,
> +			&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
>  }
>  
>  static noinline ssize_t
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 402b253ce3a2..9626632883d0 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -965,6 +965,60 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
>  	.iomap_begin		= xfs_direct_write_iomap_begin,
>  };
>  
> +#ifdef CONFIG_XFS_RT
> +/*
> + * This is really simple.  The space has already been reserved before taking the
> + * IOLOCK, the actual block allocation is done just before submitting the bio
> + * and only recorded in the extent map on I/O completion.
> + */
> +static int
> +xfs_zoned_direct_write_iomap_begin(
> +	struct inode		*inode,
> +	loff_t			offset,
> +	loff_t			length,
> +	unsigned		flags,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
> +{
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	int			error;
> +
> +	ASSERT(!(flags & IOMAP_OVERWRITE_ONLY));
> +
> +	/*
> +	 * Needs to be pushed down into the allocator so that only writes into
> +	 * a single zone can be supported.
> +	 */
> +	if (flags & IOMAP_NOWAIT)
> +		return -EAGAIN;
> +
> +	/*
> +	 * Ensure the extent list is in memory in so that we don't have to do
> +	 * read it from the I/O completion handler.
> +	 */
> +	if (xfs_need_iread_extents(&ip->i_df)) {
> +		xfs_ilock(ip, XFS_ILOCK_EXCL);
> +		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
> +		xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +		if (error)
> +			return error;
> +	}
> +
> +	iomap->type = IOMAP_MAPPED;
> +	iomap->flags = IOMAP_F_DIRTY;
> +	iomap->bdev = ip->i_mount->m_rtdev_targp->bt_bdev;
> +	iomap->offset = offset;
> +	iomap->length = length;
> +	iomap->flags = IOMAP_F_ZONE_APPEND;
> +	iomap->addr = 0;
> +	return 0;
> +}
> +
> +const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
> +	.iomap_begin		= xfs_zoned_direct_write_iomap_begin,
> +};
> +#endif /* CONFIG_XFS_RT */
> +
>  static int
>  xfs_dax_write_iomap_end(
>  	struct inode		*inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index bc8a00cad854..d330c4a581b1 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -51,6 +51,7 @@ xfs_aligned_fsb_count(
>  
>  extern const struct iomap_ops xfs_buffered_write_iomap_ops;
>  extern const struct iomap_ops xfs_direct_write_iomap_ops;
> +extern const struct iomap_ops xfs_zoned_direct_write_iomap_ops;
>  extern const struct iomap_ops xfs_read_iomap_ops;
>  extern const struct iomap_ops xfs_seek_iomap_ops;
>  extern const struct iomap_ops xfs_xattr_iomap_ops;
> -- 
> 2.45.2
> 
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 67392413216b..a3ca14e811fd 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -137,7 +137,9 @@  xfs_end_ioend(
 	else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
 
-	if (!error && xfs_ioend_is_append(ioend))
+	if (!error &&
+	    !(ioend->io_flags & IOMAP_IOEND_DIRECT) &&
+	    xfs_ioend_is_append(ioend))
 		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
 done:
 	iomap_finish_ioends(ioend, error);
@@ -182,7 +184,7 @@  xfs_end_io(
 	}
 }
 
-static void
+void
 xfs_end_bio(
 	struct bio		*bio)
 {
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index e0bd68419764..5a7a0f1a0b49 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -9,6 +9,7 @@ 
 extern const struct address_space_operations xfs_address_space_operations;
 extern const struct address_space_operations xfs_dax_aops;
 
-int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
+int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
+void xfs_end_bio(struct bio *bio);
 
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 195cf60a81b0..1b39000b7c62 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -25,6 +25,7 @@ 
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
 #include "xfs_file.h"
+#include "xfs_aops.h"
 #include "xfs_zone_alloc.h"
 
 #include <linux/dax.h>
@@ -548,6 +549,9 @@  xfs_dio_write_end_io(
 	loff_t			offset = iocb->ki_pos;
 	unsigned int		nofs_flag;
 
+	ASSERT(!xfs_is_zoned_inode(ip) ||
+	       !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
+
 	trace_xfs_end_io_direct_write(ip, offset, size);
 
 	if (xfs_is_shutdown(ip->i_mount))
@@ -627,14 +631,51 @@  static const struct iomap_dio_ops xfs_dio_write_ops = {
 	.end_io		= xfs_dio_write_end_io,
 };
 
+static void
+xfs_dio_zoned_submit_io(
+	const struct iomap_iter	*iter,
+	struct bio		*bio,
+	loff_t			file_offset)
+{
+	struct xfs_mount	*mp = XFS_I(iter->inode)->i_mount;
+	struct xfs_zone_alloc_ctx *ac = iter->private;
+	xfs_filblks_t		count_fsb;
+	struct iomap_ioend	*ioend;
+
+	count_fsb = XFS_B_TO_FSB(mp, bio->bi_iter.bi_size);
+	if (count_fsb > ac->reserved_blocks) {
+		xfs_err(mp,
+"allocation (%lld) larger than reservation (%lld).",
+			count_fsb, ac->reserved_blocks);
+		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+		bio_io_error(bio);
+		return;
+	}
+	ac->reserved_blocks -= count_fsb;
+
+	bio->bi_end_io = xfs_end_bio;
+	ioend = iomap_init_ioend(iter->inode, bio, file_offset,
+			IOMAP_IOEND_DIRECT);
+	xfs_zone_alloc_and_submit(ioend, &ac->open_zone);
+}
+
+static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
+	.bio_set	= &iomap_ioend_bioset,
+	.submit_io	= xfs_dio_zoned_submit_io,
+	.end_io		= xfs_dio_write_end_io,
+};
+
 /*
- * Handle block aligned direct I/O writes
+ * Handle block aligned direct I/O writes.
  */
 static noinline ssize_t
 xfs_file_dio_write_aligned(
 	struct xfs_inode	*ip,
 	struct kiocb		*iocb,
-	struct iov_iter		*from)
+	struct iov_iter		*from,
+	const struct iomap_ops	*ops,
+	const struct iomap_dio_ops *dops,
+	struct xfs_zone_alloc_ctx *ac)
 {
 	unsigned int		iolock = XFS_IOLOCK_SHARED;
 	ssize_t			ret;
@@ -642,7 +683,7 @@  xfs_file_dio_write_aligned(
 	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
 	if (ret)
 		return ret;
-	ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
+	ret = xfs_file_write_checks(iocb, from, &iolock, ac);
 	if (ret)
 		goto out_unlock;
 
@@ -656,11 +697,31 @@  xfs_file_dio_write_aligned(
 		iolock = XFS_IOLOCK_SHARED;
 	}
 	trace_xfs_file_direct_write(iocb, from);
-	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
-			   &xfs_dio_write_ops, 0, NULL, 0);
+	ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
 out_unlock:
-	if (iolock)
-		xfs_iunlock(ip, iolock);
+	xfs_iunlock(ip, iolock);
+	return ret;
+}
+
+/*
+ * Handle block aligned direct I/O writes to zoned devices.
+ */
+static noinline ssize_t
+xfs_file_dio_write_zoned(
+	struct xfs_inode	*ip,
+	struct kiocb		*iocb,
+	struct iov_iter		*from)
+{
+	struct xfs_zone_alloc_ctx ac = { };
+	ssize_t			ret;
+
+	ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
+	if (ret < 0)
+		return ret;
+	ret = xfs_file_dio_write_aligned(ip, iocb, from,
+			&xfs_zoned_direct_write_iomap_ops,
+			&xfs_dio_zoned_write_ops, &ac);
+	xfs_zoned_space_unreserve(ip, &ac);
 	return ret;
 }
 
@@ -777,7 +838,10 @@  xfs_file_dio_write(
 	    (xfs_is_always_cow_inode(ip) &&
 	     (iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
 		return xfs_file_dio_write_unaligned(ip, iocb, from);
-	return xfs_file_dio_write_aligned(ip, iocb, from);
+	if (xfs_is_zoned_inode(ip))
+		return xfs_file_dio_write_zoned(ip, iocb, from);
+	return xfs_file_dio_write_aligned(ip, iocb, from,
+			&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
 }
 
 static noinline ssize_t
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 402b253ce3a2..9626632883d0 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -965,6 +965,60 @@  const struct iomap_ops xfs_direct_write_iomap_ops = {
 	.iomap_begin		= xfs_direct_write_iomap_begin,
 };
 
+#ifdef CONFIG_XFS_RT
+/*
+ * This is really simple.  The space has already been reserved before taking the
+ * IOLOCK, the actual block allocation is done just before submitting the bio
+ * and only recorded in the extent map on I/O completion.
+ */
+static int
+xfs_zoned_direct_write_iomap_begin(
+	struct inode		*inode,
+	loff_t			offset,
+	loff_t			length,
+	unsigned		flags,
+	struct iomap		*iomap,
+	struct iomap		*srcmap)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	int			error;
+
+	ASSERT(!(flags & IOMAP_OVERWRITE_ONLY));
+
+	/*
+	 * Needs to be pushed down into the allocator so that only writes into
+	 * a single zone can be supported.
+	 */
+	if (flags & IOMAP_NOWAIT)
+		return -EAGAIN;
+
+	/*
+	 * Ensure the extent list is in memory in so that we don't have to do
+	 * read it from the I/O completion handler.
+	 */
+	if (xfs_need_iread_extents(&ip->i_df)) {
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (error)
+			return error;
+	}
+
+	iomap->type = IOMAP_MAPPED;
+	iomap->flags = IOMAP_F_DIRTY;
+	iomap->bdev = ip->i_mount->m_rtdev_targp->bt_bdev;
+	iomap->offset = offset;
+	iomap->length = length;
+	iomap->flags = IOMAP_F_ZONE_APPEND;
+	iomap->addr = 0;
+	return 0;
+}
+
+const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
+	.iomap_begin		= xfs_zoned_direct_write_iomap_begin,
+};
+#endif /* CONFIG_XFS_RT */
+
 static int
 xfs_dax_write_iomap_end(
 	struct inode		*inode,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index bc8a00cad854..d330c4a581b1 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -51,6 +51,7 @@  xfs_aligned_fsb_count(
 
 extern const struct iomap_ops xfs_buffered_write_iomap_ops;
 extern const struct iomap_ops xfs_direct_write_iomap_ops;
+extern const struct iomap_ops xfs_zoned_direct_write_iomap_ops;
 extern const struct iomap_ops xfs_read_iomap_ops;
 extern const struct iomap_ops xfs_seek_iomap_ops;
 extern const struct iomap_ops xfs_xattr_iomap_ops;