diff mbox series

[2/2] fs: generic_file_buffered_read() now uses find_get_pages_contig

Message ID 20180815232632.32548-3-kent.overstreet@gmail.com (mailing list archive)
State New, archived
Headers show
Series generic_file_buffered_read improvements | expand

Commit Message

Kent Overstreet Aug. 15, 2018, 11:26 p.m. UTC
Convert generic_file_buffered_read() to get pages to read from in
batches, and then copy data to userspace from many pages at once - in
particular, we now don't touch any cachelines that might be contended
while we're in the loop to copy data to userspace.

This is is a performance improvement on workloads that do buffered reads
with large blocksizes, and a very large performance improvement if that
file is also being accessed concurrently by different threads.

On smaller reads (512 bytes), there's a very small performance
improvement (1%, within the margin of error).

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
---
 mm/filemap.c | 266 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 144 insertions(+), 122 deletions(-)

Comments

kernel test robot Aug. 16, 2018, 2:56 p.m. UTC | #1
Hi Kent,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18 next-20180816]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Kent-Overstreet/generic_file_buffered_read-improvements/20180816-200515
config: sparc64-allmodconfig (attached as .config)
compiler: sparc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   mm/filemap.c: In function 'generic_file_buffered_read':
>> mm/filemap.c:2340:23: error: 'page' undeclared (first use in this function)
        flush_dcache_page(page);
                          ^~~~
   mm/filemap.c:2340:23: note: each undeclared identifier is reported only once for each function it appears in

vim +/page +2340 mm/filemap.c

  2253	
  2254	/**
  2255	 * generic_file_buffered_read - generic file read routine
  2256	 * @iocb:	the iocb to read
  2257	 * @iter:	data destination
  2258	 * @written:	already copied
  2259	 *
  2260	 * This is a generic file read routine, and uses the
  2261	 * mapping->a_ops->readpage() function for the actual low-level stuff.
  2262	 *
  2263	 * This is really ugly. But the goto's actually try to clarify some
  2264	 * of the logic when it comes to error handling etc.
  2265	 */
  2266	static ssize_t generic_file_buffered_read(struct kiocb *iocb,
  2267			struct iov_iter *iter, ssize_t written)
  2268	{
  2269		struct file *filp = iocb->ki_filp;
  2270		struct file_ra_state *ra = &filp->f_ra;
  2271		struct address_space *mapping = filp->f_mapping;
  2272		struct inode *inode = mapping->host;
  2273		size_t orig_count = iov_iter_count(iter);
  2274		struct page *pages[64];
  2275		int i, pg_nr, error = 0;
  2276		bool writably_mapped;
  2277		loff_t isize, end_offset;
  2278	
  2279		if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
  2280			return 0;
  2281		iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
  2282	
  2283		do {
  2284			cond_resched();
  2285	
  2286			i = 0;
  2287			pg_nr = generic_file_buffered_read_get_pages(iocb, iter, pages,
  2288								     ARRAY_SIZE(pages));
  2289			if (pg_nr < 0) {
  2290				error = pg_nr;
  2291				break;
  2292			}
  2293	
  2294			/*
  2295			 * i_size must be checked after we know the pages are Uptodate.
  2296			 *
  2297			 * Checking i_size after the check allows us to calculate
  2298			 * the correct value for "nr", which means the zero-filled
  2299			 * part of the page is not copied back to userspace (unless
  2300			 * another truncate extends the file - this is desired though).
  2301			 */
  2302			isize = i_size_read(inode);
  2303			if (unlikely(iocb->ki_pos >= isize))
  2304				goto put_pages;
  2305	
  2306			end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
  2307	
  2308			while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
  2309			       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
  2310				put_page(pages[--pg_nr]);
  2311	
  2312			/*
  2313			 * Once we start copying data, we don't want to be touching any
  2314			 * cachelines that might be contended:
  2315			 */
  2316			writably_mapped = mapping_writably_mapped(mapping);
  2317	
  2318			/*
  2319			 * When a sequential read accesses a page several times, only
  2320			 * mark it as accessed the first time.
  2321			 */
  2322			if (iocb->ki_pos >> PAGE_SHIFT !=
  2323			    ra->prev_pos >> PAGE_SHIFT)
  2324				mark_page_accessed(pages[0]);
  2325			for (i = 1; i < pg_nr; i++)
  2326				mark_page_accessed(pages[i]);
  2327	
  2328			for (i = 0; i < pg_nr; i++) {
  2329				unsigned offset = iocb->ki_pos & ~PAGE_MASK;
  2330				unsigned bytes = min_t(loff_t, end_offset - iocb->ki_pos,
  2331						       PAGE_SIZE - offset);
  2332				unsigned copied;
  2333	
  2334				/*
  2335				 * If users can be writing to this page using arbitrary
  2336				 * virtual addresses, take care about potential aliasing
  2337				 * before reading the page on the kernel side.
  2338				 */
  2339				if (writably_mapped)
> 2340					flush_dcache_page(page);
  2341	
  2342				copied = copy_page_to_iter(pages[i], offset, bytes, iter);
  2343	
  2344				iocb->ki_pos += copied;
  2345				ra->prev_pos = iocb->ki_pos;
  2346	
  2347				if (copied < bytes) {
  2348					error = -EFAULT;
  2349					break;
  2350				}
  2351			}
  2352	put_pages:
  2353			for (i = 0; i < pg_nr; i++)
  2354				put_page(pages[i]);
  2355		} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
  2356	
  2357		file_accessed(filp);
  2358		written += orig_count - iov_iter_count(iter);
  2359	
  2360		return written ? written : error;
  2361	}
  2362	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 308bdd466f..0de9fe4c61 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2110,67 +2110,6 @@  static void shrink_readahead_size_eio(struct file *filp,
 	ra->ra_pages /= 4;
 }
 
-static int generic_file_buffered_read_page_ok(struct kiocb *iocb,
-			struct iov_iter *iter,
-			struct page *page)
-{
-	struct address_space *mapping = iocb->ki_filp->f_mapping;
-	struct inode *inode = mapping->host;
-	struct file_ra_state *ra = &iocb->ki_filp->f_ra;
-	unsigned offset = iocb->ki_pos & ~PAGE_MASK;
-	unsigned bytes, copied;
-	loff_t isize, end_offset;
-
-	BUG_ON(iocb->ki_pos >> PAGE_SHIFT != page->index);
-
-	/*
-	 * i_size must be checked after we know the page is Uptodate.
-	 *
-	 * Checking i_size after the check allows us to calculate
-	 * the correct value for "bytes", which means the zero-filled
-	 * part of the page is not copied back to userspace (unless
-	 * another truncate extends the file - this is desired though).
-	 */
-
-	isize = i_size_read(inode);
-	if (unlikely(iocb->ki_pos >= isize))
-		return 1;
-
-	end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
-
-	bytes = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
-
-	/* If users can be writing to this page using arbitrary
-	 * virtual addresses, take care about potential aliasing
-	 * before reading the page on the kernel side.
-	 */
-	if (mapping_writably_mapped(mapping))
-		flush_dcache_page(page);
-
-	/*
-	 * Ok, we have the page, and it's up-to-date, so
-	 * now we can copy it to user space...
-	 */
-
-	copied = copy_page_to_iter(page, offset, bytes, iter);
-
-	iocb->ki_pos += copied;
-
-	/*
-	 * When a sequential read accesses a page several times,
-	 * only mark it as accessed the first time.
-	 */
-	if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT)
-		mark_page_accessed(page);
-
-	ra->prev_pos = iocb->ki_pos;
-
-	if (copied < bytes)
-		return -EFAULT;
-
-	return !iov_iter_count(iter) || iocb->ki_pos == isize;
-}
-
 static struct page *
 generic_file_buffered_read_readpage(struct file *filp,
 				    struct address_space *mapping,
@@ -2314,6 +2253,79 @@  generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
 	return generic_file_buffered_read_readpage(filp, mapping, page);
 }
 
+static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
+						struct iov_iter *iter,
+						struct page **pages,
+						unsigned nr)
+{
+	struct file *filp = iocb->ki_filp;
+	struct address_space *mapping = filp->f_mapping;
+	struct file_ra_state *ra = &filp->f_ra;
+	pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+	pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+	int i, j, ret, err = 0;
+
+	nr = min_t(unsigned long, last_index - index, nr);
+find_page:
+	if (fatal_signal_pending(current))
+		return -EINTR;
+
+	ret = find_get_pages_contig(mapping, index, nr, pages);
+	if (ret)
+		goto got_pages;
+
+	if (iocb->ki_flags & IOCB_NOWAIT)
+		return -EAGAIN;
+
+	page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
+
+	ret = find_get_pages_contig(mapping, index, nr, pages);
+	if (ret)
+		goto got_pages;
+
+	pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
+	err = PTR_ERR_OR_ZERO(pages[0]);
+	ret = !IS_ERR_OR_NULL(pages[0]);
+got_pages:
+	for (i = 0; i < ret; i++) {
+		struct page *page = pages[i];
+		pgoff_t pg_index = index +i;
+		loff_t pg_pos = max(iocb->ki_pos,
+				    (loff_t) pg_index << PAGE_SHIFT);
+		loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
+
+		if (PageReadahead(page))
+			page_cache_async_readahead(mapping, ra, filp, page,
+					pg_index, last_index - pg_index);
+
+		if (!PageUptodate(page)) {
+			if (iocb->ki_flags & IOCB_NOWAIT) {
+				for (j = i; j < ret; j++)
+					put_page(pages[j]);
+				ret = i;
+				err = -EAGAIN;
+				break;
+			}
+
+			page = generic_file_buffered_read_pagenotuptodate(filp,
+						iter, page, pg_pos, pg_count);
+			if (IS_ERR_OR_NULL(page)) {
+				for (j = i + 1; j < ret; j++)
+					put_page(pages[j]);
+				ret = i;
+				err = PTR_ERR_OR_ZERO(page);
+				break;
+			}
+		}
+	}
+
+	if (likely(ret))
+		return ret;
+	if (err)
+		return err;
+	goto find_page;
+}
+
 /**
  * generic_file_buffered_read - generic file read routine
  * @iocb:	the iocb to read
@@ -2330,83 +2342,93 @@  static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 		struct iov_iter *iter, ssize_t written)
 {
 	struct file *filp = iocb->ki_filp;
+	struct file_ra_state *ra = &filp->f_ra;
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = mapping->host;
-	struct file_ra_state *ra = &filp->f_ra;
 	size_t orig_count = iov_iter_count(iter);
-	pgoff_t last_index;
-	int error = 0;
+	struct page *pages[64];
+	int i, pg_nr, error = 0;
+	bool writably_mapped;
+	loff_t isize, end_offset;
 
 	if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
 		return 0;
 	iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
 
-	last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-
-	for (;;) {
-		pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-		struct page *page;
-
+	do {
 		cond_resched();
-find_page:
-		if (fatal_signal_pending(current)) {
-			error = -EINTR;
-			goto out;
-		}
 
-		page = find_get_page(mapping, index);
-		if (!page) {
-			if (iocb->ki_flags & IOCB_NOWAIT)
-				goto would_block;
-			page_cache_sync_readahead(mapping,
-					ra, filp,
-					index, last_index - index);
-			page = find_get_page(mapping, index);
-			if (unlikely(page == NULL)) {
-				page = generic_file_buffered_read_no_cached_page(iocb, iter);
-				if (!page)
-					goto find_page;
-				if (IS_ERR(page)) {
-					error = PTR_ERR(page);
-					goto out;
-				}
-			}
-		}
-		if (PageReadahead(page)) {
-			page_cache_async_readahead(mapping,
-					ra, filp, page,
-					index, last_index - index);
+		i = 0;
+		pg_nr = generic_file_buffered_read_get_pages(iocb, iter, pages,
+							     ARRAY_SIZE(pages));
+		if (pg_nr < 0) {
+			error = pg_nr;
+			break;
 		}
-		if (!PageUptodate(page)) {
-			if (iocb->ki_flags & IOCB_NOWAIT) {
-				put_page(page);
-				error = -EAGAIN;
-				goto out;
-			}
 
-			page = generic_file_buffered_read_pagenotuptodate(filp,
-					iter, page, iocb->ki_pos, iter->count);
-			if (!page)
-				goto find_page;
-			if (IS_ERR(page)) {
-				error = PTR_ERR(page);
-				goto out;
-			}
-		}
+		/*
+		 * i_size must be checked after we know the pages are Uptodate.
+		 *
+		 * Checking i_size after the check allows us to calculate
+		 * the correct value for "nr", which means the zero-filled
+		 * part of the page is not copied back to userspace (unless
+		 * another truncate extends the file - this is desired though).
+		 */
+		isize = i_size_read(inode);
+		if (unlikely(iocb->ki_pos >= isize))
+			goto put_pages;
 
-		error = generic_file_buffered_read_page_ok(iocb, iter, page);
-		put_page(page);
+		end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 
-		if (error) {
-			if (error > 0)
-				error = 0;
-			goto out;
+		while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
+		       (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
+			put_page(pages[--pg_nr]);
+
+		/*
+		 * Once we start copying data, we don't want to be touching any
+		 * cachelines that might be contended:
+		 */
+		writably_mapped = mapping_writably_mapped(mapping);
+
+		/*
+		 * When a sequential read accesses a page several times, only
+		 * mark it as accessed the first time.
+		 */
+		if (iocb->ki_pos >> PAGE_SHIFT !=
+		    ra->prev_pos >> PAGE_SHIFT)
+			mark_page_accessed(pages[0]);
+		for (i = 1; i < pg_nr; i++)
+			mark_page_accessed(pages[i]);
+
+		for (i = 0; i < pg_nr; i++) {
+			unsigned offset = iocb->ki_pos & ~PAGE_MASK;
+			unsigned bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+					       PAGE_SIZE - offset);
+			unsigned copied;
+
+			/*
+			 * If users can be writing to this page using arbitrary
+			 * virtual addresses, take care about potential aliasing
+			 * before reading the page on the kernel side.
+			 */
+			if (writably_mapped)
+				flush_dcache_page(page);
+
+			copied = copy_page_to_iter(pages[i], offset, bytes, iter);
+
+			iocb->ki_pos += copied;
+			ra->prev_pos = iocb->ki_pos;
+
+			if (copied < bytes) {
+				error = -EFAULT;
+				break;
+			}
 		}
-	}
+put_pages:
+		for (i = 0; i < pg_nr; i++)
+			put_page(pages[i]);
+	} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
 
-would_block:
-	error = -EAGAIN;
-out:
 	file_accessed(filp);
 	written += orig_count - iov_iter_count(iter);