diff mbox

[11/13] iomap: add an iomap-based readpage and readpages implementation

Message ID 20180530095813.31245-12-hch@lst.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christoph Hellwig May 30, 2018, 9:58 a.m. UTC
Simply use iomap_apply to iterate over the file and a submit a bio for
each non-uptodate but mapped region and zero everything else.  Note that
as-is this can not be used for file systems with a blocksize smaller than
the page size, but that support will be added later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap.c            | 203 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/iomap.h |   4 +
 2 files changed, 206 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong May 30, 2018, 4:22 p.m. UTC | #1
On Wed, May 30, 2018 at 11:58:11AM +0200, Christoph Hellwig wrote:
> Simply use iomap_apply to iterate over the file and a submit a bio for
> each non-uptodate but mapped region and zero everything else.  Note that
> as-is this can not be used for file systems with a blocksize smaller than
> the page size, but that support will be added later.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/iomap.c            | 203 +++++++++++++++++++++++++++++++++++++++++-
>  include/linux/iomap.h |   4 +
>  2 files changed, 206 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index b0bc928672af..5e5a266e3325 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -1,6 +1,6 @@
>  /*
>   * Copyright (C) 2010 Red Hat, Inc.
> - * Copyright (c) 2016 Christoph Hellwig.
> + * Copyright (c) 2016-2018 Christoph Hellwig.
>   *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms and conditions of the GNU General Public License,
> @@ -18,6 +18,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/gfp.h>
>  #include <linux/mm.h>
> +#include <linux/mm_inline.h>
>  #include <linux/swap.h>
>  #include <linux/pagemap.h>
>  #include <linux/file.h>
> @@ -102,6 +103,206 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> +static void
> +iomap_read_end_io(struct bio *bio)
> +{
> +	int error = blk_status_to_errno(bio->bi_status);
> +	struct bio_vec *bvec;
> +	int i;
> +
> +	bio_for_each_segment_all(bvec, bio, i)
> +		page_endio(bvec->bv_page, false, error);
> +	bio_put(bio);
> +}
> +
> +struct iomap_readpage_ctx {
> +	struct page		*cur_page;
> +	bool			cur_page_in_bio;
> +	bool			is_readahead;
> +	struct bio		*bio;
> +	struct list_head	*pages;
> +};
> +
> +static loff_t
> +iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> +		struct iomap *iomap)
> +{
> +	struct iomap_readpage_ctx *ctx = data;
> +	struct page *page = ctx->cur_page;
> +	unsigned poff = pos & (PAGE_SIZE - 1);
> +	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
> +	bool is_contig = false;
> +	sector_t sector;
> +
> +	/* we don't support blocksize < PAGE_SIZE quite yet: */
> +	WARN_ON_ONCE(pos != page_offset(page));
> +	WARN_ON_ONCE(plen != PAGE_SIZE);
> +
> +	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
> +		zero_user(page, poff, plen);
> +		SetPageUptodate(page);
> +		goto done;
> +	}
> +
> +	ctx->cur_page_in_bio = true;
> +
> +	/*
> +	 * Try to merge into a previous segment if we can.
> +	 */
> +	sector = iomap_sector(iomap, pos);
> +	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
> +		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
> +			goto done;
> +		is_contig = true;
> +	}
> +
> +	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
> +		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
> +		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +
> +		if (ctx->bio)
> +			submit_bio(ctx->bio);
> +
> +		if (ctx->is_readahead) /* same as readahead_gfp_mask */
> +			gfp |= __GFP_NORETRY | __GFP_NOWARN;
> +		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
> +		ctx->bio->bi_opf = REQ_OP_READ;
> +		if (ctx->is_readahead)
> +			ctx->bio->bi_opf |= REQ_RAHEAD;
> +		ctx->bio->bi_iter.bi_sector = sector;
> +		bio_set_dev(ctx->bio, iomap->bdev);
> +		ctx->bio->bi_end_io = iomap_read_end_io;
> +	}
> +
> +	__bio_add_page(ctx->bio, page, plen, poff);
> +done:
> +	return plen;
> +}
> +
> +int
> +iomap_readpage(struct page *page, const struct iomap_ops *ops)
> +{
> +	struct iomap_readpage_ctx ctx = { .cur_page = page };
> +	struct inode *inode = page->mapping->host;
> +	unsigned poff;
> +	loff_t ret;
> +
> +	WARN_ON_ONCE(page_has_buffers(page));
> +
> +	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
> +		ret = iomap_apply(inode, page_offset(page) + poff,
> +				PAGE_SIZE - poff, 0, ops, &ctx,
> +				iomap_readpage_actor);
> +		if (ret <= 0) {
> +			WARN_ON_ONCE(ret == 0);
> +			SetPageError(page);
> +			break;
> +		}
> +	}
> +
> +	if (ctx.bio) {
> +		submit_bio(ctx.bio);
> +		WARN_ON_ONCE(!ctx.cur_page_in_bio);
> +	} else {
> +		WARN_ON_ONCE(ctx.cur_page_in_bio);
> +		unlock_page(page);
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(iomap_readpage);
> +
> +static struct page *
> +iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
> +		loff_t length, loff_t *done)
> +{
> +	while (!list_empty(pages)) {
> +		struct page *page = lru_to_page(pages);
> +
> +		if (page_offset(page) >= (u64)pos + length)
> +			break;
> +
> +		list_del(&page->lru);
> +		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
> +				GFP_NOFS))
> +			return page;
> +
> +		/*
> +		 * If we already have a page in the page cache at index we are
> +		 * done.  Upper layers don't care if it is uptodate after the
> +		 * readpages call itself as every page gets checked again once
> +		 * actually needed.
> +		 */
> +		*done += PAGE_SIZE;
> +		put_page(page);
> +	}
> +
> +	return NULL;
> +}
> +
> +static loff_t
> +iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
> +		void *data, struct iomap *iomap)
> +{
> +	struct iomap_readpage_ctx *ctx = data;
> +	loff_t done, ret;
> +
> +	for (done = 0; done < length; done += ret) {
> +		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +			ctx->cur_page = NULL;
> +		}
> +		if (!ctx->cur_page) {
> +			ctx->cur_page = iomap_next_page(inode, ctx->pages,
> +					pos, length, &done);
> +			if (!ctx->cur_page)
> +				break;
> +			ctx->cur_page_in_bio = false;
> +		}
> +		ret = iomap_readpage_actor(inode, pos + done, length - done,
> +				ctx, iomap);
> +	}
> +
> +	return done;
> +}
> +
> +int
> +iomap_readpages(struct address_space *mapping, struct list_head *pages,
> +		unsigned nr_pages, const struct iomap_ops *ops)
> +{
> +	struct iomap_readpage_ctx ctx = {
> +		.pages		= pages,
> +		.is_readahead	= true,
> +	};
> +	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> +	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> +	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> +
> +	while (length > 0) {
> +		ret = iomap_apply(mapping->host, pos, length, 0, ops,
> +				&ctx, iomap_readpages_actor);
> +		if (ret <= 0) {
> +			WARN_ON_ONCE(ret == 0);
> +			goto done;
> +		}
> +		pos += ret;
> +		length -= ret;
> +	}
> +	ret = 0;
> +done:
> +	if (ctx.bio)
> +		submit_bio(ctx.bio);
> +	if (ctx.cur_page) {
> +		if (!ctx.cur_page_in_bio)
> +			unlock_page(ctx.cur_page);
> +		put_page(ctx.cur_page);
> +	}
> +	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iomap_readpages);
> +
>  static void
>  iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
>  {
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index a044a824da85..7300d30ca495 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -9,6 +9,7 @@ struct fiemap_extent_info;
>  struct inode;
>  struct iov_iter;
>  struct kiocb;
> +struct page;
>  struct vm_area_struct;
>  struct vm_fault;
>  
> @@ -88,6 +89,9 @@ struct iomap_ops {
>  
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		const struct iomap_ops *ops);
> +int iomap_readpage(struct page *page, const struct iomap_ops *ops);
> +int iomap_readpages(struct address_space *mapping, struct list_head *pages,
> +		unsigned nr_pages, const struct iomap_ops *ops);
>  int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
>  		const struct iomap_ops *ops);
>  int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
> -- 
> 2.17.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner May 30, 2018, 11:45 p.m. UTC | #2
On Wed, May 30, 2018 at 11:58:11AM +0200, Christoph Hellwig wrote:
> Simply use iomap_apply to iterate over the file and a submit a bio for
> each non-uptodate but mapped region and zero everything else.  Note that
> as-is this can not be used for file systems with a blocksize smaller than
> the page size, but that support will be added later.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/iomap.c            | 203 +++++++++++++++++++++++++++++++++++++++++-
>  include/linux/iomap.h |   4 +
>  2 files changed, 206 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index b0bc928672af..5e5a266e3325 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -1,6 +1,6 @@
>  /*
>   * Copyright (C) 2010 Red Hat, Inc.
> - * Copyright (c) 2016 Christoph Hellwig.
> + * Copyright (c) 2016-2018 Christoph Hellwig.
>   *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms and conditions of the GNU General Public License,
> @@ -18,6 +18,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/gfp.h>
>  #include <linux/mm.h>
> +#include <linux/mm_inline.h>
>  #include <linux/swap.h>
>  #include <linux/pagemap.h>
>  #include <linux/file.h>
> @@ -102,6 +103,206 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> +static void
> +iomap_read_end_io(struct bio *bio)
> +{
> +	int error = blk_status_to_errno(bio->bi_status);
> +	struct bio_vec *bvec;
> +	int i;
> +
> +	bio_for_each_segment_all(bvec, bio, i)
> +		page_endio(bvec->bv_page, false, error);
> +	bio_put(bio);
> +}
> +
> +struct iomap_readpage_ctx {
> +	struct page		*cur_page;
> +	bool			cur_page_in_bio;
> +	bool			is_readahead;
> +	struct bio		*bio;
> +	struct list_head	*pages;
> +};
> +
> +static loff_t
> +iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> +		struct iomap *iomap)
> +{
> +	struct iomap_readpage_ctx *ctx = data;
> +	struct page *page = ctx->cur_page;
> +	unsigned poff = pos & (PAGE_SIZE - 1);
> +	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
> +	bool is_contig = false;
> +	sector_t sector;
> +
> +	/* we don't support blocksize < PAGE_SIZE quite yet: */

sentence ends with a ".". :)

> +	WARN_ON_ONCE(pos != page_offset(page));
> +	WARN_ON_ONCE(plen != PAGE_SIZE);
> +
> +	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {

In what situation do we get a read request completely beyond EOF?
(comment, please!)

> +		zero_user(page, poff, plen);
> +		SetPageUptodate(page);
> +		goto done;
> +	}

[...]

> +int
> +iomap_readpage(struct page *page, const struct iomap_ops *ops)
> +{
> +	struct iomap_readpage_ctx ctx = { .cur_page = page };
> +	struct inode *inode = page->mapping->host;
> +	unsigned poff;
> +	loff_t ret;
> +
> +	WARN_ON_ONCE(page_has_buffers(page));
> +
> +	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
> +		ret = iomap_apply(inode, page_offset(page) + poff,
> +				PAGE_SIZE - poff, 0, ops, &ctx,
> +				iomap_readpage_actor);
> +		if (ret <= 0) {
> +			WARN_ON_ONCE(ret == 0);
> +			SetPageError(page);
> +			break;
> +		}
> +	}
> +
> +	if (ctx.bio) {
> +		submit_bio(ctx.bio);
> +		WARN_ON_ONCE(!ctx.cur_page_in_bio);
> +	} else {
> +		WARN_ON_ONCE(ctx.cur_page_in_bio);
> +		unlock_page(page);
> +	}
> +	return 0;

Hmmm. If we had an error from iomap_apply, shouldn't we be returning
it here instead just throwing it away? some ->readpage callers
appear to ignore the PageError() state on return but do expect
errors to be returned.

[...]

> +iomap_readpages(struct address_space *mapping, struct list_head *pages,
> +		unsigned nr_pages, const struct iomap_ops *ops)
> +{
> +	struct iomap_readpage_ctx ctx = {
> +		.pages		= pages,
> +		.is_readahead	= true,
> +	};
> +	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> +	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> +	loff_t length = last - pos + PAGE_SIZE, ret = 0;

Two lines, please.

> +	while (length > 0) {
> +		ret = iomap_apply(mapping->host, pos, length, 0, ops,
> +				&ctx, iomap_readpages_actor);
> +		if (ret <= 0) {
> +			WARN_ON_ONCE(ret == 0);
> +			goto done;
> +		}
> +		pos += ret;
> +		length -= ret;
> +	}
> +	ret = 0;
> +done:
> +	if (ctx.bio)
> +		submit_bio(ctx.bio);
> +	if (ctx.cur_page) {
> +		if (!ctx.cur_page_in_bio)
> +			unlock_page(ctx.cur_page);
> +		put_page(ctx.cur_page);
> +	}
> +	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));

What error condition is this warning about?

Cheers,

Dave.
Christoph Hellwig May 31, 2018, 6:13 a.m. UTC | #3
On Thu, May 31, 2018 at 09:45:57AM +1000, Dave Chinner wrote:
> sentence ends with a ".". :)

Ok.  This was intended to point to the WARN_ON calls below, but a "."
is fine with me, too.

> 
> > +	WARN_ON_ONCE(pos != page_offset(page));
> > +	WARN_ON_ONCE(plen != PAGE_SIZE);
> > +
> > +	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
> 
> In what situation do we get a read request completely beyond EOF?
> (comment, please!)

This is generally to cover a racing read beyond EOF.  That being said
I'd have to look up if it can really happen for blocksize == pagesize.

All this becomes moot once small block size support is added, so I think
I'd rather skip the comment and research here for now.

> > +	if (ctx.bio) {
> > +		submit_bio(ctx.bio);
> > +		WARN_ON_ONCE(!ctx.cur_page_in_bio);
> > +	} else {
> > +		WARN_ON_ONCE(ctx.cur_page_in_bio);
> > +		unlock_page(page);
> > +	}
> > +	return 0;
> 
> Hmmm. If we had an error from iomap_apply, shouldn't we be returning
> it here instead just throwing it away? some ->readpage callers
> appear to ignore the PageError() state on return but do expect
> errors to be returned.

Both mpage_readpage and block_read_full_page always return 0, so for
now I'd like to stay compatible to them.  Might be worth a full audit
later.

> > +	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> > +	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> > +	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> 
> Two lines, please.

I really like it that way, though..

> > +done:
> > +	if (ctx.bio)
> > +		submit_bio(ctx.bio);
> > +	if (ctx.cur_page) {
> > +		if (!ctx.cur_page_in_bio)
> > +			unlock_page(ctx.cur_page);
> > +		put_page(ctx.cur_page);
> > +	}
> > +	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
> 
> What error condition is this warning about?

Not finishing all pages without an error.  Which wasn't too hard to get
wrong given the arance readpages calling convention.
Dave Chinner May 31, 2018, 11:59 a.m. UTC | #4
On Thu, May 31, 2018 at 08:13:15AM +0200, Christoph Hellwig wrote:
> On Thu, May 31, 2018 at 09:45:57AM +1000, Dave Chinner wrote:
> > sentence ends with a ".". :)
> 
> Ok.  This was intended to point to the WARN_ON calls below, but a "."
> is fine with me, too.
> 
> > 
> > > +	WARN_ON_ONCE(pos != page_offset(page));
> > > +	WARN_ON_ONCE(plen != PAGE_SIZE);
> > > +
> > > +	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
> > 
> > In what situation do we get a read request completely beyond EOF?
> > (comment, please!)
> 
> This is generally to cover a racing read beyond EOF.  That being said
> I'd have to look up if it can really happen for blocksize == pagesize.
> 
> All this becomes moot once small block size support is added, so I think
> I'd rather skip the comment and research here for now.

OK.

> > > +	if (ctx.bio) {
> > > +		submit_bio(ctx.bio);
> > > +		WARN_ON_ONCE(!ctx.cur_page_in_bio);
> > > +	} else {
> > > +		WARN_ON_ONCE(ctx.cur_page_in_bio);
> > > +		unlock_page(page);
> > > +	}
> > > +	return 0;
> > 
> > Hmmm. If we had an error from iomap_apply, shouldn't we be returning
> > it here instead just throwing it away? some ->readpage callers
> > appear to ignore the PageError() state on return but do expect
> > errors to be returned.
> 
> Both mpage_readpage and block_read_full_page always return 0, so for
> now I'd like to stay compatible to them.  Might be worth a full audit
> later.
> 
> > > +	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
> > > +	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
> > > +	loff_t length = last - pos + PAGE_SIZE, ret = 0;
> > 
> > Two lines, please.
> 
> I really like it that way, though..

Except for the fact most peoples eyes are trained for one line per
declaration and one variable assignment per line. I don't care about
an extra line of code or two, but it's so easy to lose a declaration
of a short variable in all those long declarations and initialisers.
I found myself asking several times through these patchsets "now
where was /that/ variable declared/initialised?".  That's why I'm
asking for it to be changed.

> > > +done:
> > > +	if (ctx.bio)
> > > +		submit_bio(ctx.bio);
> > > +	if (ctx.cur_page) {
> > > +		if (!ctx.cur_page_in_bio)
> > > +			unlock_page(ctx.cur_page);
> > > +		put_page(ctx.cur_page);
> > > +	}
> > > +	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
> > 
> > What error condition is this warning about?
> 
> Not finishing all pages without an error.  Which wasn't too hard to get
> wrong given the arance readpages calling convention.

It's crusty old code like this that make me realise why we have so
many problems with IO error reporting - instead of fixing error
propagation problems when we come across them,  we just layer more
crap on top with some undocumented warnings for good measure.

Not really happy about it. Please add comments explaining the crap
you're adding to work around the crappy error propagation issues.

Cheers,

Dave.
diff mbox

Patch

diff --git a/fs/iomap.c b/fs/iomap.c
index b0bc928672af..5e5a266e3325 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1,6 +1,6 @@ 
 /*
  * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016 Christoph Hellwig.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
+#include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
@@ -102,6 +103,206 @@  iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static void
+iomap_read_end_io(struct bio *bio)
+{
+	int error = blk_status_to_errno(bio->bi_status);
+	struct bio_vec *bvec;
+	int i;
+
+	bio_for_each_segment_all(bvec, bio, i)
+		page_endio(bvec->bv_page, false, error);
+	bio_put(bio);
+}
+
+struct iomap_readpage_ctx {
+	struct page		*cur_page;
+	bool			cur_page_in_bio;
+	bool			is_readahead;
+	struct bio		*bio;
+	struct list_head	*pages;
+};
+
+static loff_t
+iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	struct page *page = ctx->cur_page;
+	unsigned poff = pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	bool is_contig = false;
+	sector_t sector;
+
+	/* we don't support blocksize < PAGE_SIZE quite yet: */
+	WARN_ON_ONCE(pos != page_offset(page));
+	WARN_ON_ONCE(plen != PAGE_SIZE);
+
+	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+		zero_user(page, poff, plen);
+		SetPageUptodate(page);
+		goto done;
+	}
+
+	ctx->cur_page_in_bio = true;
+
+	/*
+	 * Try to merge into a previous segment if we can.
+	 */
+	sector = iomap_sector(iomap, pos);
+	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
+		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+			goto done;
+		is_contig = true;
+	}
+
+	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+		if (ctx->bio)
+			submit_bio(ctx->bio);
+
+		if (ctx->is_readahead) /* same as readahead_gfp_mask */
+			gfp |= __GFP_NORETRY | __GFP_NOWARN;
+		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+		ctx->bio->bi_opf = REQ_OP_READ;
+		if (ctx->is_readahead)
+			ctx->bio->bi_opf |= REQ_RAHEAD;
+		ctx->bio->bi_iter.bi_sector = sector;
+		bio_set_dev(ctx->bio, iomap->bdev);
+		ctx->bio->bi_end_io = iomap_read_end_io;
+	}
+
+	__bio_add_page(ctx->bio, page, plen, poff);
+done:
+	return plen;
+}
+
+int
+iomap_readpage(struct page *page, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = { .cur_page = page };
+	struct inode *inode = page->mapping->host;
+	unsigned poff;
+	loff_t ret;
+
+	WARN_ON_ONCE(page_has_buffers(page));
+
+	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
+		ret = iomap_apply(inode, page_offset(page) + poff,
+				PAGE_SIZE - poff, 0, ops, &ctx,
+				iomap_readpage_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			SetPageError(page);
+			break;
+		}
+	}
+
+	if (ctx.bio) {
+		submit_bio(ctx.bio);
+		WARN_ON_ONCE(!ctx.cur_page_in_bio);
+	} else {
+		WARN_ON_ONCE(ctx.cur_page_in_bio);
+		unlock_page(page);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_readpage);
+
+static struct page *
+iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
+		loff_t length, loff_t *done)
+{
+	while (!list_empty(pages)) {
+		struct page *page = lru_to_page(pages);
+
+		if (page_offset(page) >= (u64)pos + length)
+			break;
+
+		list_del(&page->lru);
+		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
+				GFP_NOFS))
+			return page;
+
+		/*
+		 * If we already have a page in the page cache at index we are
+		 * done.  Upper layers don't care if it is uptodate after the
+		 * readpages call itself as every page gets checked again once
+		 * actually needed.
+		 */
+		*done += PAGE_SIZE;
+		put_page(page);
+	}
+
+	return NULL;
+}
+
+static loff_t
+iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	loff_t done, ret;
+
+	for (done = 0; done < length; done += ret) {
+		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+			ctx->cur_page = NULL;
+		}
+		if (!ctx->cur_page) {
+			ctx->cur_page = iomap_next_page(inode, ctx->pages,
+					pos, length, &done);
+			if (!ctx->cur_page)
+				break;
+			ctx->cur_page_in_bio = false;
+		}
+		ret = iomap_readpage_actor(inode, pos + done, length - done,
+				ctx, iomap);
+	}
+
+	return done;
+}
+
+int
+iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = {
+		.pages		= pages,
+		.is_readahead	= true,
+	};
+	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
+	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
+	loff_t length = last - pos + PAGE_SIZE, ret = 0;
+
+	while (length > 0) {
+		ret = iomap_apply(mapping->host, pos, length, 0, ops,
+				&ctx, iomap_readpages_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			goto done;
+		}
+		pos += ret;
+		length -= ret;
+	}
+	ret = 0;
+done:
+	if (ctx.bio)
+		submit_bio(ctx.bio);
+	if (ctx.cur_page) {
+		if (!ctx.cur_page_in_bio)
+			unlock_page(ctx.cur_page);
+		put_page(ctx.cur_page);
+	}
+	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_readpages);
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a044a824da85..7300d30ca495 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,6 +9,7 @@  struct fiemap_extent_info;
 struct inode;
 struct iov_iter;
 struct kiocb;
+struct page;
 struct vm_area_struct;
 struct vm_fault;
 
@@ -88,6 +89,9 @@  struct iomap_ops {
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
+int iomap_readpage(struct page *page, const struct iomap_ops *ops);
+int iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,