diff mbox series

[v3,4/7] xfs: Add extent zeroing support for atomic writes

Message ID 20250102140411.14617-5-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs | expand

Commit Message

John Garry Jan. 2, 2025, 2:04 p.m. UTC
An atomic write which spans mixed unwritten and mapped extents would be
rejected. This is one reason why atomic write unit min and max is
currently fixed at the block size.

To enable large atomic writes, any unwritten extents need to be zeroed
before issuing the atomic write. So call iomap_dio_zero_unwritten() for
this scenario and retry the atomic write.

It can be detected if there is any unwritten extents by passing
IOMAP_DIO_OVERWRITE_ONLY to the original iomap_dio_rw() call.

After iomap_dio_zero_unwritten() is called then iomap_dio_rw() is retried -
if that fails then there really is something wrong.

However keep the same behaviour for writing a single block, i.e. we don't
need to pre-zero.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_file.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
diff mbox series

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9a435b1ff264..2c810f75dbbd 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -578,10 +578,47 @@  xfs_dio_write_end_io(
 	return error;
 }
 
+static int
+xfs_dio_write_end_zero_unwritten(
+	struct kiocb		*iocb,
+	ssize_t			size,
+	int			error,
+	unsigned		flags)
+{
+	struct inode		*inode = file_inode(iocb->ki_filp);
+	struct xfs_inode	*ip = XFS_I(inode);
+	loff_t			offset = iocb->ki_pos;
+	unsigned int		nofs_flag;
+
+	trace_xfs_end_io_direct_write(ip, offset, size);
+
+	if (xfs_is_shutdown(ip->i_mount))
+		return -EIO;
+
+	if (error)
+		return error;
+	if (WARN_ON_ONCE(!size))
+		return 0;
+	if (!(flags & IOMAP_DIO_UNWRITTEN))
+		return 0;
+
+	/* Same as xfs_dio_write_end_io() ... */
+	nofs_flag = memalloc_nofs_save();
+
+	error = xfs_iomap_write_unwritten(ip, offset, size, true);
+
+	memalloc_nofs_restore(nofs_flag);
+	return error;
+}
+
 static const struct iomap_dio_ops xfs_dio_write_ops = {
 	.end_io		= xfs_dio_write_end_io,
 };
 
+static const struct iomap_dio_ops xfs_dio_zero_ops = {
+	.end_io		= xfs_dio_write_end_zero_unwritten,
+};
+
 /*
  * Handle block aligned direct I/O writes
  */
@@ -619,6 +656,63 @@  xfs_file_dio_write_aligned(
 	return ret;
 }
 
+static noinline ssize_t
+xfs_file_dio_write_atomic(
+	struct xfs_inode	*ip,
+	struct kiocb		*iocb,
+	struct iov_iter		*from)
+{
+	unsigned int		iolock = XFS_IOLOCK_SHARED;
+	bool			do_zero = false;
+	unsigned int		dio_flags;
+	ssize_t			ret;
+
+	/*
+	 * Zero unwritten only for writing multiple blocks. Leverage
+	 * IOMAP_DIO_OVERWRITE_ONLY detecting when zeroing is required, as
+	 * it ensures that a single written mapping is provided.
+	 */
+	if (iov_iter_count(from) > ip->i_mount->m_sb.sb_blocksize)
+		dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
+	else
+		dio_flags = 0;
+
+retry:
+	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
+	if (ret)
+		return ret;
+
+	ret = xfs_file_write_checks(iocb, from, &iolock);
+	if (ret)
+		goto out_unlock;
+
+	if (do_zero) {
+		ret = iomap_dio_zero_unwritten(iocb, from,
+				&xfs_direct_write_iomap_ops,
+				&xfs_dio_zero_ops);
+		if (ret)
+			goto out_unlock;
+	}
+
+	trace_xfs_file_direct_write(iocb, from);
+	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
+			&xfs_dio_write_ops, dio_flags, NULL, 0);
+
+	if (do_zero && ret < 0)
+		goto out_unlock;
+
+	if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
+		xfs_iunlock(ip, iolock);
+		do_zero = true;
+		goto retry;
+	}
+
+out_unlock:
+	if (iolock)
+		xfs_iunlock(ip, iolock);
+	return ret;
+}
+
 /*
  * Handle block unaligned direct I/O writes
  *
@@ -723,6 +817,8 @@  xfs_file_dio_write(
 		return -EINVAL;
 	if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
 		return xfs_file_dio_write_unaligned(ip, iocb, from);
+	if (iocb->ki_flags & IOCB_ATOMIC)
+		return xfs_file_dio_write_atomic(ip, iocb, from);
 	return xfs_file_dio_write_aligned(ip, iocb, from);
 }