diff mbox series

[1/2] iomap: add iomap_writepages_unbound() to write beyond EOF

Message ID 20241229133640.1193578-2-aalbersh@kernel.org (mailing list archive)
State New
Headers show
Series Introduce iomap interface to work with regions beyond EOF | expand

Commit Message

Andrey Albershteyn Dec. 29, 2024, 1:36 p.m. UTC
From: Andrey Albershteyn <aalbersh@redhat.com>

Add iomap_writepages_unbound() without limit in form of EOF. XFS
will use this to writeback extended attributes (fs-verity Merkle
tree) in range far beyond EOF.

Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
---
 fs/iomap/buffered-io.c | 55 ++++++++++++++++++++++++++++++++----------
 include/linux/iomap.h  |  4 +++
 2 files changed, 46 insertions(+), 13 deletions(-)

Comments

kernel test robot Dec. 29, 2024, 5:54 p.m. UTC | #1
Hi Andrey,

kernel test robot noticed the following build errors:

[auto build test ERROR on brauner-vfs/vfs.all]
[also build test ERROR on linus/master v6.13-rc4 next-20241220]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrey-Albershteyn/iomap-add-iomap_writepages_unbound-to-write-beyond-EOF/20241229-213942
base:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all
patch link:    https://lore.kernel.org/r/20241229133640.1193578-2-aalbersh%40kernel.org
patch subject: [PATCH 1/2] iomap: add iomap_writepages_unbound() to write beyond EOF
config: s390-randconfig-002-20241229 (https://download.01.org/0day-ci/archive/20241230/202412300135.cvWMPZGf-lkp@intel.com/config)
compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241230/202412300135.cvWMPZGf-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412300135.cvWMPZGf-lkp@intel.com/

All errors (new ones prefixed by >>):

>> fs/iomap/buffered-io.c:982:23: error: use of undeclared identifier 'IOMAP_NOSIZE'
                   if (!(iter->flags & IOMAP_NOSIZE) && (pos + written > old_size)) {
                                       ^
   fs/iomap/buffered-io.c:988:23: error: use of undeclared identifier 'IOMAP_NOSIZE'
                   if (!(iter->flags & IOMAP_NOSIZE) && (old_size < pos))
                                       ^
   2 errors generated.


vim +/IOMAP_NOSIZE +982 fs/iomap/buffered-io.c

   909	
   910	static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
   911	{
   912		loff_t length = iomap_length(iter);
   913		loff_t pos = iter->pos;
   914		ssize_t total_written = 0;
   915		long status = 0;
   916		struct address_space *mapping = iter->inode->i_mapping;
   917		size_t chunk = mapping_max_folio_size(mapping);
   918		unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
   919	
   920		do {
   921			struct folio *folio;
   922			loff_t old_size;
   923			size_t offset;		/* Offset into folio */
   924			size_t bytes;		/* Bytes to write to folio */
   925			size_t copied;		/* Bytes copied from user */
   926			size_t written;		/* Bytes have been written */
   927	
   928			bytes = iov_iter_count(i);
   929	retry:
   930			offset = pos & (chunk - 1);
   931			bytes = min(chunk - offset, bytes);
   932			status = balance_dirty_pages_ratelimited_flags(mapping,
   933								       bdp_flags);
   934			if (unlikely(status))
   935				break;
   936	
   937			if (bytes > length)
   938				bytes = length;
   939	
   940			/*
   941			 * Bring in the user page that we'll copy from _first_.
   942			 * Otherwise there's a nasty deadlock on copying from the
   943			 * same page as we're writing to, without it being marked
   944			 * up-to-date.
   945			 *
   946			 * For async buffered writes the assumption is that the user
   947			 * page has already been faulted in. This can be optimized by
   948			 * faulting the user page.
   949			 */
   950			if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
   951				status = -EFAULT;
   952				break;
   953			}
   954	
   955			status = iomap_write_begin(iter, pos, bytes, &folio);
   956			if (unlikely(status)) {
   957				iomap_write_failed(iter->inode, pos, bytes);
   958				break;
   959			}
   960			if (iter->iomap.flags & IOMAP_F_STALE)
   961				break;
   962	
   963			offset = offset_in_folio(folio, pos);
   964			if (bytes > folio_size(folio) - offset)
   965				bytes = folio_size(folio) - offset;
   966	
   967			if (mapping_writably_mapped(mapping))
   968				flush_dcache_folio(folio);
   969	
   970			copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
   971			written = iomap_write_end(iter, pos, bytes, copied, folio) ?
   972				  copied : 0;
   973	
   974			/*
   975			 * Update the in-memory inode size after copying the data into
   976			 * the page cache.  It's up to the file system to write the
   977			 * updated size to disk, preferably after I/O completion so that
   978			 * no stale data is exposed.  Only once that's done can we
   979			 * unlock and release the folio.
   980			 */
   981			old_size = iter->inode->i_size;
 > 982			if (!(iter->flags & IOMAP_NOSIZE) && (pos + written > old_size)) {
   983				i_size_write(iter->inode, pos + written);
   984				iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
   985			}
   986			__iomap_put_folio(iter, pos, written, folio);
   987	
   988			if (!(iter->flags & IOMAP_NOSIZE) && (old_size < pos))
   989				pagecache_isize_extended(iter->inode, old_size, pos);
   990	
   991			cond_resched();
   992			if (unlikely(written == 0)) {
   993				/*
   994				 * A short copy made iomap_write_end() reject the
   995				 * thing entirely.  Might be memory poisoning
   996				 * halfway through, might be a race with munmap,
   997				 * might be severe memory pressure.
   998				 */
   999				iomap_write_failed(iter->inode, pos, bytes);
  1000				iov_iter_revert(i, copied);
  1001	
  1002				if (chunk > PAGE_SIZE)
  1003					chunk /= 2;
  1004				if (copied) {
  1005					bytes = copied;
  1006					goto retry;
  1007				}
  1008			} else {
  1009				pos += written;
  1010				total_written += written;
  1011				length -= written;
  1012			}
  1013		} while (iov_iter_count(i) && length);
  1014	
  1015		if (status == -EAGAIN) {
  1016			iov_iter_revert(i, total_written);
  1017			return -EAGAIN;
  1018		}
  1019		return total_written ? total_written : status;
  1020	}
  1021
kernel test robot Dec. 29, 2024, 9:36 p.m. UTC | #2
Hi Andrey,

kernel test robot noticed the following build errors:

[auto build test ERROR on brauner-vfs/vfs.all]
[also build test ERROR on linus/master v6.13-rc4 next-20241220]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrey-Albershteyn/iomap-add-iomap_writepages_unbound-to-write-beyond-EOF/20241229-213942
base:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git vfs.all
patch link:    https://lore.kernel.org/r/20241229133640.1193578-2-aalbersh%40kernel.org
patch subject: [PATCH 1/2] iomap: add iomap_writepages_unbound() to write beyond EOF
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20241230/202412300506.Upx51jzg-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241230/202412300506.Upx51jzg-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412300506.Upx51jzg-lkp@intel.com/

All errors (new ones prefixed by >>):

   fs/iomap/buffered-io.c: In function 'iomap_write_iter':
>> fs/iomap/buffered-io.c:982:37: error: 'IOMAP_NOSIZE' undeclared (first use in this function); did you mean 'IOMAP_HOLE'?
     982 |                 if (!(iter->flags & IOMAP_NOSIZE) && (pos + written > old_size)) {
         |                                     ^~~~~~~~~~~~
         |                                     IOMAP_HOLE
   fs/iomap/buffered-io.c:982:37: note: each undeclared identifier is reported only once for each function it appears in


vim +982 fs/iomap/buffered-io.c

   909	
   910	static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
   911	{
   912		loff_t length = iomap_length(iter);
   913		loff_t pos = iter->pos;
   914		ssize_t total_written = 0;
   915		long status = 0;
   916		struct address_space *mapping = iter->inode->i_mapping;
   917		size_t chunk = mapping_max_folio_size(mapping);
   918		unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
   919	
   920		do {
   921			struct folio *folio;
   922			loff_t old_size;
   923			size_t offset;		/* Offset into folio */
   924			size_t bytes;		/* Bytes to write to folio */
   925			size_t copied;		/* Bytes copied from user */
   926			size_t written;		/* Bytes have been written */
   927	
   928			bytes = iov_iter_count(i);
   929	retry:
   930			offset = pos & (chunk - 1);
   931			bytes = min(chunk - offset, bytes);
   932			status = balance_dirty_pages_ratelimited_flags(mapping,
   933								       bdp_flags);
   934			if (unlikely(status))
   935				break;
   936	
   937			if (bytes > length)
   938				bytes = length;
   939	
   940			/*
   941			 * Bring in the user page that we'll copy from _first_.
   942			 * Otherwise there's a nasty deadlock on copying from the
   943			 * same page as we're writing to, without it being marked
   944			 * up-to-date.
   945			 *
   946			 * For async buffered writes the assumption is that the user
   947			 * page has already been faulted in. This can be optimized by
   948			 * faulting the user page.
   949			 */
   950			if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
   951				status = -EFAULT;
   952				break;
   953			}
   954	
   955			status = iomap_write_begin(iter, pos, bytes, &folio);
   956			if (unlikely(status)) {
   957				iomap_write_failed(iter->inode, pos, bytes);
   958				break;
   959			}
   960			if (iter->iomap.flags & IOMAP_F_STALE)
   961				break;
   962	
   963			offset = offset_in_folio(folio, pos);
   964			if (bytes > folio_size(folio) - offset)
   965				bytes = folio_size(folio) - offset;
   966	
   967			if (mapping_writably_mapped(mapping))
   968				flush_dcache_folio(folio);
   969	
   970			copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
   971			written = iomap_write_end(iter, pos, bytes, copied, folio) ?
   972				  copied : 0;
   973	
   974			/*
   975			 * Update the in-memory inode size after copying the data into
   976			 * the page cache.  It's up to the file system to write the
   977			 * updated size to disk, preferably after I/O completion so that
   978			 * no stale data is exposed.  Only once that's done can we
   979			 * unlock and release the folio.
   980			 */
   981			old_size = iter->inode->i_size;
 > 982			if (!(iter->flags & IOMAP_NOSIZE) && (pos + written > old_size)) {
   983				i_size_write(iter->inode, pos + written);
   984				iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
   985			}
   986			__iomap_put_folio(iter, pos, written, folio);
   987	
   988			if (!(iter->flags & IOMAP_NOSIZE) && (old_size < pos))
   989				pagecache_isize_extended(iter->inode, old_size, pos);
   990	
   991			cond_resched();
   992			if (unlikely(written == 0)) {
   993				/*
   994				 * A short copy made iomap_write_end() reject the
   995				 * thing entirely.  Might be memory poisoning
   996				 * halfway through, might be a race with munmap,
   997				 * might be severe memory pressure.
   998				 */
   999				iomap_write_failed(iter->inode, pos, bytes);
  1000				iov_iter_revert(i, copied);
  1001	
  1002				if (chunk > PAGE_SIZE)
  1003					chunk /= 2;
  1004				if (copied) {
  1005					bytes = copied;
  1006					goto retry;
  1007				}
  1008			} else {
  1009				pos += written;
  1010				total_written += written;
  1011				length -= written;
  1012			}
  1013		} while (iov_iter_count(i) && length);
  1014	
  1015		if (status == -EAGAIN) {
  1016			iov_iter_revert(i, total_written);
  1017			return -EAGAIN;
  1018		}
  1019		return total_written ? total_written : status;
  1020	}
  1021
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 955f19e27e47..61ec924c5b80 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -979,13 +979,13 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 		 * unlock and release the folio.
 		 */
 		old_size = iter->inode->i_size;
-		if (pos + written > old_size) {
+		if (!(iter->flags & IOMAP_NOSIZE) && (pos + written > old_size)) {
 			i_size_write(iter->inode, pos + written);
 			iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
 		}
 		__iomap_put_folio(iter, pos, written, folio);
 
-		if (old_size < pos)
+		if (!(iter->flags & IOMAP_NOSIZE) && (old_size < pos))
 			pagecache_isize_extended(iter->inode, old_size, pos);
 
 		cond_resched();
@@ -1918,18 +1918,10 @@  static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	int error = 0;
 	u32 rlen;
 
-	WARN_ON_ONCE(!folio_test_locked(folio));
-	WARN_ON_ONCE(folio_test_dirty(folio));
-	WARN_ON_ONCE(folio_test_writeback(folio));
-
-	trace_iomap_writepage(inode, pos, folio_size(folio));
-
-	if (!iomap_writepage_handle_eof(folio, inode, &end_pos)) {
-		folio_unlock(folio);
-		return 0;
-	}
 	WARN_ON_ONCE(end_pos <= pos);
 
+	trace_iomap_writepage(inode, pos, folio_size(folio));
+
 	if (i_blocks_per_folio(inode, folio) > 1) {
 		if (!ifs) {
 			ifs = ifs_alloc(inode, folio, 0);
@@ -1992,6 +1984,23 @@  static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	return error;
 }
 
+/* Map pages bound by EOF */
+static int iomap_writepage_map_eof(struct iomap_writepage_ctx *wpc,
+		struct writeback_control *wbc, struct folio *folio)
+{
+	int error;
+	struct inode *inode = folio->mapping->host;
+	u64 end_pos = folio_pos(folio) + folio_size(folio);
+
+	if (!iomap_writepage_handle_eof(folio, inode, &end_pos)) {
+		folio_unlock(folio);
+		return 0;
+	}
+
+	error = iomap_writepage_map(wpc, wbc, folio);
+	return error;
+}
+
 int
 iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
 		struct iomap_writepage_ctx *wpc,
@@ -2008,12 +2017,32 @@  iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
 			PF_MEMALLOC))
 		return -EIO;
 
+	wpc->ops = ops;
+	while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+		WARN_ON_ONCE(!folio_test_locked(folio));
+		WARN_ON_ONCE(folio_test_dirty(folio));
+		WARN_ON_ONCE(folio_test_writeback(folio));
+
+		error = iomap_writepage_map_eof(wpc, wbc, folio);
+	}
+	return iomap_submit_ioend(wpc, error);
+}
+EXPORT_SYMBOL_GPL(iomap_writepages);
+
+int
+iomap_writepages_unbound(struct address_space *mapping, struct writeback_control *wbc,
+		struct iomap_writepage_ctx *wpc,
+		const struct iomap_writeback_ops *ops)
+{
+	struct folio *folio = NULL;
+	int error;
+
 	wpc->ops = ops;
 	while ((folio = writeback_iter(mapping, wbc, folio, &error)))
 		error = iomap_writepage_map(wpc, wbc, folio);
 	return iomap_submit_ioend(wpc, error);
 }
-EXPORT_SYMBOL_GPL(iomap_writepages);
+EXPORT_SYMBOL_GPL(iomap_writepages_unbound);
 
 static int __init iomap_buffered_init(void)
 {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5675af6b740c..3bfd3035ac28 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -181,6 +181,7 @@  struct iomap_folio_ops {
 #define IOMAP_DAX		(1 << 8) /* DAX mapping */
 #else
 #define IOMAP_DAX		0
+#define IOMAP_NOSIZE		(1 << 9) /* Don't update in-memory inode size*/
 #endif /* CONFIG_FS_DAX */
 #define IOMAP_ATOMIC		(1 << 9)
 
@@ -390,6 +391,9 @@  void iomap_sort_ioends(struct list_head *ioend_list);
 int iomap_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops);
+int iomap_writepages_unbound(struct address_space *mapping,
+		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
+		const struct iomap_writeback_ops *ops);
 
 /*
  * Flags for direct I/O ->end_io: