diff mbox series

[v2,6/9] iomap,xfs: Convert from readpages to readahead

Message ID 20200115023843.31325-7-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Replacing the readpages a_op | expand

Commit Message

Matthew Wilcox Jan. 15, 2020, 2:38 a.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Use the new readahead operation in XFS and iomap.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 72 +++++++++---------------------------------
 fs/iomap/trace.h       |  2 +-
 fs/xfs/xfs_aops.c      | 10 +++---
 include/linux/iomap.h  |  2 +-
 4 files changed, 22 insertions(+), 64 deletions(-)

Comments

Christoph Hellwig Jan. 15, 2020, 7:16 a.m. UTC | #1
On Tue, Jan 14, 2020 at 06:38:40PM -0800, Matthew Wilcox wrote:
>  static loff_t
> +iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
>  		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_readpage_ctx *ctx = data;
> @@ -410,10 +381,8 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
>  			ctx->cur_page = NULL;
>  		}
>  		if (!ctx->cur_page) {
> -			ctx->cur_page = iomap_next_page(inode, ctx->pages,
> -					pos, length, &done);
> -			if (!ctx->cur_page)
> -				break;
> +			ctx->cur_page = readahead_page(inode->i_mapping,
> +					pos / PAGE_SIZE);

Don't we at least need a sanity check for a NULL cur_page here?
Also the readahead_page version in your previous patch seems to expect
a byte offset, so the division above would not be required. (and should
probably be replaced with a right shift anyway no matter where it ends
up)

> +unsigned
> +iomap_readahead(struct address_space *mapping, pgoff_t start,
>  		unsigned nr_pages, const struct iomap_ops *ops)
>  {
>  	struct iomap_readpage_ctx ctx = {
> -		.pages		= pages,
>  		.is_readahead	= true,
>  	};
> -	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> -	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> -	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> +	loff_t pos = start * PAGE_SIZE;
> +	loff_t length = nr_pages * PAGE_SIZE;

Any good reason not to pass byte offsets for start and length?

> +	return length / PAGE_SIZE;

Same for the return value?

For the file systems that would usually be a more natural interface than
a page index and number of pages.
Matthew Wilcox Jan. 15, 2020, 7:42 a.m. UTC | #2
On Tue, Jan 14, 2020 at 11:16:28PM -0800, Christoph Hellwig wrote:
> On Tue, Jan 14, 2020 at 06:38:40PM -0800, Matthew Wilcox wrote:
> >  static loff_t
> > +iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
> >  		void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct iomap_readpage_ctx *ctx = data;
> > @@ -410,10 +381,8 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
> >  			ctx->cur_page = NULL;
> >  		}
> >  		if (!ctx->cur_page) {
> > -			ctx->cur_page = iomap_next_page(inode, ctx->pages,
> > -					pos, length, &done);
> > -			if (!ctx->cur_page)
> > -				break;
> > +			ctx->cur_page = readahead_page(inode->i_mapping,
> > +					pos / PAGE_SIZE);
> 
> Don't we at least need a sanity check for a NULL cur_page here?

I don't think so.  The caller has already put the locked page into the
page cache at that index.  If the page has gone away, that's a bug, and
I don't think BUG_ON is all that much better than a NULL pointer derefence.
Indeed, readahead_page() checks PageLocked, so it can't return NULL.

> Also the readahead_page version in your previous patch seems to expect
> a byte offset, so the division above would not be required.

Oops.  I had intended to make readahead_pages() look like this:

struct page *readahead_page(struct address_space *mapping, pgoff_t index)
{
        struct page *page = xa_load(&mapping->i_pages, index);
        VM_BUG_ON_PAGE(!PageLocked(page), page);

        return page;
}

If only our tools could warn about these kinds of mistakes.

> (and should
> probably be replaced with a right shift anyway no matter where it ends
> up)

If the compiler can't tell that x / 4096 and x >> 12 are precisely the same
and choose the more efficient of the two, we have big problems.

> > +unsigned
> > +iomap_readahead(struct address_space *mapping, pgoff_t start,
> >  		unsigned nr_pages, const struct iomap_ops *ops)
> >  {
> >  	struct iomap_readpage_ctx ctx = {
> > -		.pages		= pages,
> >  		.is_readahead	= true,
> >  	};
> > -	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> > -	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> > -	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> > +	loff_t pos = start * PAGE_SIZE;
> > +	loff_t length = nr_pages * PAGE_SIZE;
> 
> Any good reason not to pass byte offsets for start and length?
> 
> > +	return length / PAGE_SIZE;
> 
> Same for the return value?
> 
> For the file systems that would usually be a more natural interface than
> a page index and number of pages.

That seems to depend on the filesystem.  iomap definitely would be happier
with loff_t, but cifs prefers pgoff_t.  I should probably survey a few
more filesystems and see if there's a strong lean in one direction or
the other.
Matthew Wilcox Jan. 24, 2020, 10:53 p.m. UTC | #3
On Tue, Jan 14, 2020 at 11:42:43PM -0800, Matthew Wilcox wrote:
> On Tue, Jan 14, 2020 at 11:16:28PM -0800, Christoph Hellwig wrote:
> > On Tue, Jan 14, 2020 at 06:38:40PM -0800, Matthew Wilcox wrote:
> > > +iomap_readahead(struct address_space *mapping, pgoff_t start,
> > >  		unsigned nr_pages, const struct iomap_ops *ops)
> > >  {
> > >  	struct iomap_readpage_ctx ctx = {
> > > -		.pages		= pages,
> > >  		.is_readahead	= true,
> > >  	};
> > > -	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> > > -	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> > > -	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> > > +	loff_t pos = start * PAGE_SIZE;
> > > +	loff_t length = nr_pages * PAGE_SIZE;
> > 
> > Any good reason not to pass byte offsets for start and length?
> > 
> > > +	return length / PAGE_SIZE;
> > 
> > Same for the return value?
> > 
> > For the file systems that would usually be a more natural interface than
> > a page index and number of pages.
> 
> That seems to depend on the filesystem.  iomap definitely would be happier
> with loff_t, but cifs prefers pgoff_t.  I should probably survey a few
> more filesystems and see if there's a strong lean in one direction or
> the other.

I've converted all the filesystems now except for those that use fscache.
http://git.infradead.org/users/willy/linux-dax.git/shortlog/refs/heads/readahead

iomap is the only one for which an loff_t makes sense as an argument.
fscache will also prefer page index & count once Dave's conversion series
lands:
https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache-iter&id=ae317744dfb9732123e554467a9f6d93733e8a5b

I'll prep a serious conversion series for 5.6 soon (skipping cifs, but
converting all the non-fscache filesystems).
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 828444e14d09..a835b99f281f 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -216,7 +216,6 @@  struct iomap_readpage_ctx {
 	bool			cur_page_in_bio;
 	bool			is_readahead;
 	struct bio		*bio;
-	struct list_head	*pages;
 };
 
 static void
@@ -367,36 +366,8 @@  iomap_readpage(struct page *page, const struct iomap_ops *ops)
 }
 EXPORT_SYMBOL_GPL(iomap_readpage);
 
-static struct page *
-iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
-		loff_t length, loff_t *done)
-{
-	while (!list_empty(pages)) {
-		struct page *page = lru_to_page(pages);
-
-		if (page_offset(page) >= (u64)pos + length)
-			break;
-
-		list_del(&page->lru);
-		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
-				GFP_NOFS))
-			return page;
-
-		/*
-		 * If we already have a page in the page cache at index we are
-		 * done.  Upper layers don't care if it is uptodate after the
-		 * readpages call itself as every page gets checked again once
-		 * actually needed.
-		 */
-		*done += PAGE_SIZE;
-		put_page(page);
-	}
-
-	return NULL;
-}
-
 static loff_t
-iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
 		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_readpage_ctx *ctx = data;
@@ -410,10 +381,8 @@  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 			ctx->cur_page = NULL;
 		}
 		if (!ctx->cur_page) {
-			ctx->cur_page = iomap_next_page(inode, ctx->pages,
-					pos, length, &done);
-			if (!ctx->cur_page)
-				break;
+			ctx->cur_page = readahead_page(inode->i_mapping,
+					pos / PAGE_SIZE);
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_actor(inode, pos + done, length - done,
@@ -423,48 +392,37 @@  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 	return done;
 }
 
-int
-iomap_readpages(struct address_space *mapping, struct list_head *pages,
+unsigned
+iomap_readahead(struct address_space *mapping, pgoff_t start,
 		unsigned nr_pages, const struct iomap_ops *ops)
 {
 	struct iomap_readpage_ctx ctx = {
-		.pages		= pages,
 		.is_readahead	= true,
 	};
-	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
-	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
-	loff_t length = last - pos + PAGE_SIZE, ret = 0;
+	loff_t pos = start * PAGE_SIZE;
+	loff_t length = nr_pages * PAGE_SIZE;
 
-	trace_iomap_readpages(mapping->host, nr_pages);
+	trace_iomap_readahead(mapping->host, nr_pages);
 
 	while (length > 0) {
-		ret = iomap_apply(mapping->host, pos, length, 0, ops,
-				&ctx, iomap_readpages_actor);
+		loff_t ret = iomap_apply(mapping->host, pos, length, 0, ops,
+				&ctx, iomap_readahead_actor);
 		if (ret <= 0) {
 			WARN_ON_ONCE(ret == 0);
-			goto done;
+			break;
 		}
 		pos += ret;
 		length -= ret;
 	}
-	ret = 0;
-done:
+
 	if (ctx.bio)
 		submit_bio(ctx.bio);
-	if (ctx.cur_page) {
-		if (!ctx.cur_page_in_bio)
-			unlock_page(ctx.cur_page);
+	if (ctx.cur_page && ctx.cur_page_in_bio)
 		put_page(ctx.cur_page);
-	}
 
-	/*
-	 * Check that we didn't lose a page due to the arcance calling
-	 * conventions..
-	 */
-	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
-	return ret;
+	return length / PAGE_SIZE;
 }
-EXPORT_SYMBOL_GPL(iomap_readpages);
+EXPORT_SYMBOL_GPL(iomap_readahead);
 
 /*
  * iomap_is_partially_uptodate checks whether blocks within a page are
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 6dc227b8c47e..d6ba705f938a 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -39,7 +39,7 @@  DEFINE_EVENT(iomap_readpage_class, name,	\
 	TP_PROTO(struct inode *inode, int nr_pages), \
 	TP_ARGS(inode, nr_pages))
 DEFINE_READPAGE_EVENT(iomap_readpage);
-DEFINE_READPAGE_EVENT(iomap_readpages);
+DEFINE_READPAGE_EVENT(iomap_readahead);
 
 DECLARE_EVENT_CLASS(iomap_page_class,
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a688eb5c5ae..4d9da34e759b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -621,14 +621,14 @@  xfs_vm_readpage(
 	return iomap_readpage(page, &xfs_read_iomap_ops);
 }
 
-STATIC int
-xfs_vm_readpages(
+STATIC unsigned
+xfs_vm_readahead(
 	struct file		*unused,
 	struct address_space	*mapping,
-	struct list_head	*pages,
+	pgoff_t			start,
 	unsigned		nr_pages)
 {
-	return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
+	return iomap_readahead(mapping, start, nr_pages, &xfs_read_iomap_ops);
 }
 
 static int
@@ -644,7 +644,7 @@  xfs_iomap_swapfile_activate(
 
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
-	.readpages		= xfs_vm_readpages,
+	.readahead		= xfs_vm_readahead,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
 	.set_page_dirty		= iomap_set_page_dirty,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 8b09463dae0d..81c6067e9b61 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -155,7 +155,7 @@  loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
-int iomap_readpages(struct address_space *mapping, struct list_head *pages,
+unsigned iomap_readahead(struct address_space *, pgoff_t start,
 		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
 int iomap_is_partially_uptodate(struct page *page, unsigned long from,