diff mbox series

fs: warn if stale pagecache is left after direct write

Message ID 157225698620.5453.17655271871684298255.stgit@buzz (mailing list archive)
State New, archived
Headers show
Series fs: warn if stale pagecache is left after direct write | expand

Commit Message

Konstantin Khlebnikov Oct. 28, 2019, 10:03 a.m. UTC
Function generic_file_direct_write() tries to invalidate pagecache after
O_DIRECT write. Unlike to similar code in dio_complete() this silently
ignores error returned from invalidate_inode_pages2_range().

According to comment this code here because not all filesystems call
dio_complete() or do proper invalidation after O_DIRECT write.
Noticeable case is a blkdev_direct_IO().

This patch calls dio_warn_stale_pagecache() if invalidation fails.

Also this skips invalidation for async writes (written == -EIOCBQUEUED).
Async write should call dio_complete() later, when write completes.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 mm/filemap.c |   12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

Comments

Konstantin Khlebnikov Oct. 30, 2019, 8:09 a.m. UTC | #1
On 30/10/2019 03.20, kbuild test robot wrote:
> Hi Konstantin,
> 
> Thank you for the patch! Yet something to improve:
> 
> [auto build test ERROR on linus/master]
> [also build test ERROR on v5.4-rc5 next-20191029]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
> base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
> 
> url:    https://github.com/0day-ci/linux/commits/Konstantin-Khlebnikov/fs-warn-if-stale-pagecache-is-left-after-direct-write/20191030-073543
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 23fdb198ae81f47a574296dab5167c5e136a02ba
> config: i386-tinyconfig (attached as .config)
> compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
> reproduce:
>          # save the attached .config to linux build tree
>          make ARCH=i386
> 
> If you fix the issue, kindly add following tag
> Reported-by: kbuild test robot <lkp@intel.com>
> 
> All errors (new ones prefixed by >>):
> 
>     mm/filemap.c: In function 'generic_file_direct_write':
>>> mm/filemap.c:3229:3: error: implicit declaration of function 'dio_warn_stale_pagecache'; did you mean 'truncate_pagecache'? [-Werror=implicit-function-declaration]
>        dio_warn_stale_pagecache(file);
>        ^~~~~~~~~~~~~~~~~~~~~~~~
>        truncate_pagecache
>     cc1: some warnings being treated as errors

This config has CONFIG_BLOCK=n while O_DIRECT is still here.
For example, NFS has it too.

I'll move dio_warn_stale_pagecache() into different file.

> 
> vim +3229 mm/filemap.c
> 
>    3163	
>    3164	ssize_t
>    3165	generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
>    3166	{
>    3167		struct file	*file = iocb->ki_filp;
>    3168		struct address_space *mapping = file->f_mapping;
>    3169		struct inode	*inode = mapping->host;
>    3170		loff_t		pos = iocb->ki_pos;
>    3171		ssize_t		written;
>    3172		size_t		write_len;
>    3173		pgoff_t		end;
>    3174	
>    3175		write_len = iov_iter_count(from);
>    3176		end = (pos + write_len - 1) >> PAGE_SHIFT;
>    3177	
>    3178		if (iocb->ki_flags & IOCB_NOWAIT) {
>    3179			/* If there are pages to writeback, return */
>    3180			if (filemap_range_has_page(inode->i_mapping, pos,
>    3181						   pos + write_len - 1))
>    3182				return -EAGAIN;
>    3183		} else {
>    3184			written = filemap_write_and_wait_range(mapping, pos,
>    3185								pos + write_len - 1);
>    3186			if (written)
>    3187				goto out;
>    3188		}
>    3189	
>    3190		/*
>    3191		 * After a write we want buffered reads to be sure to go to disk to get
>    3192		 * the new data.  We invalidate clean cached page from the region we're
>    3193		 * about to write.  We do this *before* the write so that we can return
>    3194		 * without clobbering -EIOCBQUEUED from ->direct_IO().
>    3195		 */
>    3196		written = invalidate_inode_pages2_range(mapping,
>    3197						pos >> PAGE_SHIFT, end);
>    3198		/*
>    3199		 * If a page can not be invalidated, return 0 to fall back
>    3200		 * to buffered write.
>    3201		 */
>    3202		if (written) {
>    3203			if (written == -EBUSY)
>    3204				return 0;
>    3205			goto out;
>    3206		}
>    3207	
>    3208		written = mapping->a_ops->direct_IO(iocb, from);
>    3209	
>    3210		/*
>    3211		 * Finally, try again to invalidate clean pages which might have been
>    3212		 * cached by non-direct readahead, or faulted in by get_user_pages()
>    3213		 * if the source of the write was an mmap'ed region of the file
>    3214		 * we're writing.  Either one is a pretty crazy thing to do,
>    3215		 * so we don't support it 100%.  If this invalidation
>    3216		 * fails, tough, the write still worked...
>    3217		 *
>    3218		 * Most of the time we do not need this since dio_complete() will do
>    3219		 * the invalidation for us. However there are some file systems that
>    3220		 * do not end up with dio_complete() being called, so let's not break
>    3221		 * them by removing it completely.
>    3222		 *
>    3223		 * Noticeable case is a blkdev_direct_IO().
>    3224		 *
>    3225		 * Skip invalidation for async writes or if mapping has no pages.
>    3226		 */
>    3227		if (written > 0 && mapping->nrpages &&
>    3228		    invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
>> 3229			dio_warn_stale_pagecache(file);
>    3230	
>    3231		if (written > 0) {
>    3232			pos += written;
>    3233			write_len -= written;
>    3234			if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
>    3235				i_size_write(inode, pos);
>    3236				mark_inode_dirty(inode);
>    3237			}
>    3238			iocb->ki_pos = pos;
>    3239		}
>    3240		iov_iter_revert(from, write_len - iov_iter_count(from));
>    3241	out:
>    3242		return written;
>    3243	}
>    3244	EXPORT_SYMBOL(generic_file_direct_write);
>    3245	
> 
> ---
> 0-DAY kernel test infrastructure                Open Source Technology Center
> https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
>
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 92abf5f348a9..1fa8d587ef78 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3222,11 +3222,15 @@  generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	 * Most of the time we do not need this since dio_complete() will do
 	 * the invalidation for us. However there are some file systems that
 	 * do not end up with dio_complete() being called, so let's not break
-	 * them by removing it completely
+	 * them by removing it completely.
+	 *
+	 * Noticeable case is a blkdev_direct_IO().
+	 *
+	 * Skip invalidation for async writes or if mapping has no pages.
 	 */
-	if (mapping->nrpages)
-		invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_SHIFT, end);
+	if (written > 0 && mapping->nrpages &&
+	    invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
+		dio_warn_stale_pagecache(file);
 
 	if (written > 0) {
 		pos += written;