diff mbox series

[3/3] iomap: Copy larger chunks from userspace

Message ID 20230520163603.1794256-4-willy@infradead.org (mailing list archive)
State Superseded, archived
Headers show
Series Create large folios in iomap buffered write path | expand

Commit Message

Matthew Wilcox May 20, 2023, 4:36 p.m. UTC
If we have a large folio, we can copy in larger chunks than PAGE_SIZE.
Start at the maximum page cache size and shrink by half every time we
hit the "we are short on memory" problem.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

Comments

kernel test robot May 20, 2023, 7:11 p.m. UTC | #1
Hi Matthew,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on gfs2/for-next linus/master v6.4-rc2 next-20230519]
[cannot apply to xfs-linux/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Matthew-Wilcox-Oracle/filemap-Allow-__filemap_get_folio-to-allocate-large-folios/20230521-003701
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20230520163603.1794256-4-willy%40infradead.org
patch subject: [PATCH 3/3] iomap: Copy larger chunks from userspace
config: x86_64-randconfig-x064
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/a902fd182cfb0cb04d14533396d9fa6a40cecae6
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Matthew-Wilcox-Oracle/filemap-Allow-__filemap_get_folio-to-allocate-large-folios/20230521-003701
        git checkout a902fd182cfb0cb04d14533396d9fa6a40cecae6
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash fs/iomap/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305210219.wI88J7W6-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> fs/iomap/buffered-io.c:825:42: warning: if statement has empty body [-Wempty-body]
                   if (bytes > folio_size(folio) - offset);
                                                          ^
   fs/iomap/buffered-io.c:825:42: note: put the semicolon on a separate line to silence this warning
   1 warning generated.


vim +825 fs/iomap/buffered-io.c

   775	
   776	static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
   777	{
   778		loff_t length = iomap_length(iter);
   779		size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
   780		loff_t pos = iter->pos;
   781		ssize_t written = 0;
   782		long status = 0;
   783		struct address_space *mapping = iter->inode->i_mapping;
   784		unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
   785	
   786		do {
   787			struct folio *folio;
   788			size_t offset;		/* Offset into folio */
   789			unsigned long bytes;	/* Bytes to write to folio */
   790			size_t copied;		/* Bytes copied from user */
   791	
   792	again:
   793			offset = pos & (chunk - 1);
   794			bytes = min(chunk - offset, iov_iter_count(i));
   795			status = balance_dirty_pages_ratelimited_flags(mapping,
   796								       bdp_flags);
   797			if (unlikely(status))
   798				break;
   799	
   800			if (bytes > length)
   801				bytes = length;
   802	
   803			/*
   804			 * Bring in the user page that we'll copy from _first_.
   805			 * Otherwise there's a nasty deadlock on copying from the
   806			 * same page as we're writing to, without it being marked
   807			 * up-to-date.
   808			 *
   809			 * For async buffered writes the assumption is that the user
   810			 * page has already been faulted in. This can be optimized by
   811			 * faulting the user page.
   812			 */
   813			if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
   814				status = -EFAULT;
   815				break;
   816			}
   817	
   818			status = iomap_write_begin(iter, pos, bytes, &folio);
   819			if (unlikely(status))
   820				break;
   821			if (iter->iomap.flags & IOMAP_F_STALE)
   822				break;
   823	
   824			offset = offset_in_folio(folio, pos);
 > 825			if (bytes > folio_size(folio) - offset);
   826				bytes = folio_size(folio) - offset;
   827	
   828			if (mapping_writably_mapped(mapping))
   829				flush_dcache_folio(folio);
   830	
   831			copied = copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
   832	
   833			status = iomap_write_end(iter, pos, bytes, copied, folio);
   834	
   835			if (unlikely(copied != status))
   836				iov_iter_revert(i, copied - status);
   837	
   838			cond_resched();
   839			if (unlikely(status == 0)) {
   840				/*
   841				 * A short copy made iomap_write_end() reject the
   842				 * thing entirely.  Might be memory poisoning
   843				 * halfway through, might be a race with munmap,
   844				 * might be severe memory pressure.
   845				 */
   846				if (copied)
   847					bytes = copied;
   848				if (chunk > PAGE_SIZE)
   849					chunk /= 2;
   850				goto again;
   851			}
   852			pos += status;
   853			written += status;
   854			length -= status;
   855		} while (iov_iter_count(i) && length);
   856	
   857		if (status == -EAGAIN) {
   858			iov_iter_revert(i, written);
   859			return -EAGAIN;
   860		}
   861		return written ? written : status;
   862	}
   863
kernel test robot May 20, 2023, 7:25 p.m. UTC | #2
Hi Matthew,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on gfs2/for-next linus/master v6.4-rc2 next-20230519]
[cannot apply to xfs-linux/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Matthew-Wilcox-Oracle/filemap-Allow-__filemap_get_folio-to-allocate-large-folios/20230521-003701
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20230520163603.1794256-4-willy%40infradead.org
patch subject: [PATCH 3/3] iomap: Copy larger chunks from userspace
config: x86_64-randconfig-x065
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/a902fd182cfb0cb04d14533396d9fa6a40cecae6
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Matthew-Wilcox-Oracle/filemap-Allow-__filemap_get_folio-to-allocate-large-folios/20230521-003701
        git checkout a902fd182cfb0cb04d14533396d9fa6a40cecae6
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 olddefconfig
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash fs/iomap/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202305210254.9yfIpoH1-lkp@intel.com/

All warnings (new ones prefixed by >>):

   fs/iomap/buffered-io.c: In function 'iomap_write_iter':
>> fs/iomap/buffered-io.c:825:56: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
     825 |                 if (bytes > folio_size(folio) - offset);
         |                                                        ^
>> fs/iomap/buffered-io.c:825:17: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
     825 |                 if (bytes > folio_size(folio) - offset);
         |                 ^~
   fs/iomap/buffered-io.c:826:25: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'
     826 |                         bytes = folio_size(folio) - offset;
         |                         ^~~~~


vim +/if +825 fs/iomap/buffered-io.c

   775	
   776	static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
   777	{
   778		loff_t length = iomap_length(iter);
   779		size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
   780		loff_t pos = iter->pos;
   781		ssize_t written = 0;
   782		long status = 0;
   783		struct address_space *mapping = iter->inode->i_mapping;
   784		unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
   785	
   786		do {
   787			struct folio *folio;
   788			size_t offset;		/* Offset into folio */
   789			unsigned long bytes;	/* Bytes to write to folio */
   790			size_t copied;		/* Bytes copied from user */
   791	
   792	again:
   793			offset = pos & (chunk - 1);
   794			bytes = min(chunk - offset, iov_iter_count(i));
   795			status = balance_dirty_pages_ratelimited_flags(mapping,
   796								       bdp_flags);
   797			if (unlikely(status))
   798				break;
   799	
   800			if (bytes > length)
   801				bytes = length;
   802	
   803			/*
   804			 * Bring in the user page that we'll copy from _first_.
   805			 * Otherwise there's a nasty deadlock on copying from the
   806			 * same page as we're writing to, without it being marked
   807			 * up-to-date.
   808			 *
   809			 * For async buffered writes the assumption is that the user
   810			 * page has already been faulted in. This can be optimized by
   811			 * faulting the user page.
   812			 */
   813			if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
   814				status = -EFAULT;
   815				break;
   816			}
   817	
   818			status = iomap_write_begin(iter, pos, bytes, &folio);
   819			if (unlikely(status))
   820				break;
   821			if (iter->iomap.flags & IOMAP_F_STALE)
   822				break;
   823	
   824			offset = offset_in_folio(folio, pos);
 > 825			if (bytes > folio_size(folio) - offset);
   826				bytes = folio_size(folio) - offset;
   827	
   828			if (mapping_writably_mapped(mapping))
   829				flush_dcache_folio(folio);
   830	
   831			copied = copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
   832	
   833			status = iomap_write_end(iter, pos, bytes, copied, folio);
   834	
   835			if (unlikely(copied != status))
   836				iov_iter_revert(i, copied - status);
   837	
   838			cond_resched();
   839			if (unlikely(status == 0)) {
   840				/*
   841				 * A short copy made iomap_write_end() reject the
   842				 * thing entirely.  Might be memory poisoning
   843				 * halfway through, might be a race with munmap,
   844				 * might be severe memory pressure.
   845				 */
   846				if (copied)
   847					bytes = copied;
   848				if (chunk > PAGE_SIZE)
   849					chunk /= 2;
   850				goto again;
   851			}
   852			pos += status;
   853			written += status;
   854			length -= status;
   855		} while (iov_iter_count(i) && length);
   856	
   857		if (status == -EAGAIN) {
   858			iov_iter_revert(i, written);
   859			return -EAGAIN;
   860		}
   861		return written ? written : status;
   862	}
   863
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 651af2d424ac..aa1268e708fb 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -775,6 +775,7 @@  static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 {
 	loff_t length = iomap_length(iter);
+	size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
 	loff_t pos = iter->pos;
 	ssize_t written = 0;
 	long status = 0;
@@ -783,15 +784,13 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 
 	do {
 		struct folio *folio;
-		struct page *page;
-		unsigned long offset;	/* Offset into pagecache page */
-		unsigned long bytes;	/* Bytes to write to page */
+		size_t offset;		/* Offset into folio */
+		unsigned long bytes;	/* Bytes to write to folio */
 		size_t copied;		/* Bytes copied from user */
 
-		offset = offset_in_page(pos);
-		bytes = min_t(unsigned long, PAGE_SIZE - offset,
-						iov_iter_count(i));
 again:
+		offset = pos & (chunk - 1);
+		bytes = min(chunk - offset, iov_iter_count(i));
 		status = balance_dirty_pages_ratelimited_flags(mapping,
 							       bdp_flags);
 		if (unlikely(status))
@@ -821,11 +820,14 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 		if (iter->iomap.flags & IOMAP_F_STALE)
 			break;
 
-		page = folio_file_page(folio, pos >> PAGE_SHIFT);
+		offset = offset_in_folio(folio, pos);
+		if (bytes > folio_size(folio) - offset);
+			bytes = folio_size(folio) - offset;
+
 		if (mapping_writably_mapped(mapping))
-			flush_dcache_page(page);
+			flush_dcache_folio(folio);
 
-		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
+		copied = copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
 
 		status = iomap_write_end(iter, pos, bytes, copied, folio);
 
@@ -842,6 +844,8 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 			 */
 			if (copied)
 				bytes = copied;
+			if (chunk > PAGE_SIZE)
+				chunk /= 2;
 			goto again;
 		}
 		pos += status;