diff mbox series

[v3,2/7] iomap: Add zero unwritten mappings dio support

Message ID 20250102140411.14617-3-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs | expand

Commit Message

John Garry Jan. 2, 2025, 2:04 p.m. UTC
For atomic writes support, it is required to only ever submit a single bio
(for an atomic write).

Furthermore, currently the atomic write unit min and max limit is fixed at
the FS block size.

For lifting the atomic write unit max limit, it may occur that an atomic
write spans mixed unwritten and mapped extents. For this case, due to the
iterative nature of iomap, multiple bios would be produced, which is
intolerable.

Add a function to zero unwritten extents in a certain range, which may be
used to ensure that unwritten extents are zeroed prior to issuing of an
atomic write.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/iomap/direct-io.c  | 76 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/iomap.h |  3 ++
 2 files changed, 79 insertions(+)
diff mbox series

Patch

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 23fdad16e6a8..18c888f0c11f 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -805,6 +805,82 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
 
+static loff_t
+iomap_dio_zero_unwritten_iter(struct iomap_iter *iter, struct iomap_dio *dio)
+{
+	const struct iomap *iomap = &iter->iomap;
+	loff_t length = iomap_length(iter);
+	loff_t pos = iter->pos;
+
+	if (iomap->type == IOMAP_UNWRITTEN) {
+		int ret;
+
+		dio->flags |= IOMAP_DIO_UNWRITTEN;
+		ret = iomap_dio_zero(iter, dio, pos, length);
+		if (ret)
+			return ret;
+	}
+
+	dio->size += length;
+
+	return length;
+}
+
+ssize_t
+iomap_dio_zero_unwritten(struct kiocb *iocb, struct iov_iter *iter,
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct iomap_dio *dio;
+	ssize_t ret;
+	struct iomap_iter iomi = {
+		.inode		= inode,
+		.pos		= iocb->ki_pos,
+		.len		= iov_iter_count(iter),
+		.flags		= IOMAP_WRITE,
+	};
+
+	dio = kzalloc(sizeof(*dio), GFP_KERNEL);
+	if (!dio)
+		return -ENOMEM;
+
+	dio->iocb = iocb;
+	atomic_set(&dio->ref, 1);
+	dio->i_size = i_size_read(inode);
+	dio->dops = dops;
+	dio->submit.waiter = current;
+	dio->wait_for_completion = true;
+
+	inode_dio_begin(inode);
+
+	while ((ret = iomap_iter(&iomi, ops)) > 0)
+		iomi.processed = iomap_dio_zero_unwritten_iter(&iomi, dio);
+
+	if (ret < 0)
+		iomap_dio_set_error(dio, ret);
+
+	if (!atomic_dec_and_test(&dio->ref)) {
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (!READ_ONCE(dio->submit.waiter))
+				break;
+
+			blk_io_schedule();
+		}
+		__set_current_state(TASK_RUNNING);
+	}
+
+	if (dops && dops->end_io)
+		ret = dops->end_io(iocb, dio->size, ret, dio->flags);
+
+	kfree(dio);
+
+	inode_dio_end(file_inode(iocb->ki_filp));
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_dio_zero_unwritten);
+
 static int __init iomap_dio_init(void)
 {
 	zero_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5675af6b740c..c2d44b9e446d 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -440,6 +440,9 @@  ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		unsigned int dio_flags, void *private, size_t done_before);
+ssize_t iomap_dio_zero_unwritten(struct kiocb *iocb, struct iov_iter *iter,
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
+
 ssize_t iomap_dio_complete(struct iomap_dio *dio);
 void iomap_dio_bio_end_io(struct bio *bio);