[2/2] iomap: Convert readahead to iomap_iter
diff mbox series

Message ID 20200728173216.7184-3-willy@infradead.org
State New
Headers show
Series
  • Avoid indirect function calls in iomap
Related show

Commit Message

Matthew Wilcox (Oracle) July 28, 2020, 5:32 p.m. UTC
This approach removes at least two indirect function calls from the
readahead path.  Previous call chain (indirect function calls marked *):

xfs_vm_readahead
  iomap_readahead
    iomap_apply
      xfs_read_iomap_begin [*]
      iomap_readahead_actor [*]
        iomap_readpage_actor

New call chain:

xfs_vm_readahead
  xfs_iomap_next_read
  iomi_advance
  iomap_readahead
    iomap_readpage_actor

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 82 ++++++++++++++----------------------------
 fs/xfs/xfs_aops.c      |  9 ++++-
 fs/xfs/xfs_iomap.c     | 15 ++++++++
 fs/xfs/xfs_iomap.h     |  2 ++
 fs/zonefs/super.c      | 20 ++++++++++-
 include/linux/iomap.h  | 10 +++++-
 6 files changed, 79 insertions(+), 59 deletions(-)

Comments

Darrick J. Wong Aug. 11, 2020, 8:56 p.m. UTC | #1
On Tue, Jul 28, 2020 at 06:32:15PM +0100, Matthew Wilcox (Oracle) wrote:
> This approach removes at least two indirect function calls from the
> readahead path.  Previous call chain (indirect function calls marked *):
> 
> xfs_vm_readahead
>   iomap_readahead
>     iomap_apply
>       xfs_read_iomap_begin [*]
>       iomap_readahead_actor [*]
>         iomap_readpage_actor
> 
> New call chain:
> 
> xfs_vm_readahead
>   xfs_iomap_next_read
>   iomi_advance
>   iomap_readahead
>     iomap_readpage_actor

I mostly like this, with a few comments...

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 82 ++++++++++++++----------------------------
>  fs/xfs/xfs_aops.c      |  9 ++++-
>  fs/xfs/xfs_iomap.c     | 15 ++++++++
>  fs/xfs/xfs_iomap.h     |  2 ++
>  fs/zonefs/super.c      | 20 ++++++++++-
>  include/linux/iomap.h  | 10 +++++-
>  6 files changed, 79 insertions(+), 59 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index bcfc288dba3f..fff23ed6a682 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -206,13 +206,6 @@ iomap_read_end_io(struct bio *bio)
>  	bio_put(bio);
>  }
>  
> -struct iomap_readpage_ctx {
> -	struct page		*cur_page;
> -	bool			cur_page_in_bio;
> -	struct bio		*bio;
> -	struct readahead_control *rac;
> -};
> -
>  static void
>  iomap_read_inline_data(struct inode *inode, struct page *page,
>  		struct iomap *iomap)
> @@ -369,35 +362,10 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
>  }
>  EXPORT_SYMBOL_GPL(iomap_readpage);
>  
> -static loff_t
> -iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap, struct iomap *srcmap)
> -{
> -	struct iomap_readpage_ctx *ctx = data;
> -	loff_t done, ret;
> -
> -	for (done = 0; done < length; done += ret) {
> -		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
> -			if (!ctx->cur_page_in_bio)
> -				unlock_page(ctx->cur_page);
> -			put_page(ctx->cur_page);
> -			ctx->cur_page = NULL;
> -		}
> -		if (!ctx->cur_page) {
> -			ctx->cur_page = readahead_page(ctx->rac);
> -			ctx->cur_page_in_bio = false;
> -		}
> -		ret = iomap_readpage_actor(inode, pos + done, length - done,
> -				ctx, iomap, srcmap);
> -	}
> -
> -	return done;
> -}
> -
>  /**
>   * iomap_readahead - Attempt to read pages from a file.
> + * @iomi: The iomap iterator for this operation.
>   * @rac: Describes the pages to be read.
> - * @ops: The operations vector for the filesystem.
>   *
>   * This function is for filesystems to call to implement their readahead
>   * address_space operation.
> @@ -409,35 +377,37 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
>   * function is called with memalloc_nofs set, so allocations will not cause
>   * the filesystem to be reentered.
>   */
> -void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
> +loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
>  {
> -	struct inode *inode = rac->mapping->host;
> -	loff_t pos = readahead_pos(rac);
> -	loff_t length = readahead_length(rac);
> -	struct iomap_readpage_ctx ctx = {
> -		.rac	= rac,
> -	};
> -
> -	trace_iomap_readahead(inode, readahead_count(rac));
> +	loff_t done, ret, length = iomap_length(iomi);
>  
> -	while (length > 0) {
> -		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
> -				&ctx, iomap_readahead_actor);
> -		if (ret <= 0) {
> -			WARN_ON_ONCE(ret == 0);
> -			break;
> +	for (done = 0; done < length; done += ret) {
> +		if (ctx->cur_page && offset_in_page(iomi->pos + done) == 0) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +			ctx->cur_page = NULL;
>  		}
> -		pos += ret;
> -		length -= ret;
> +		if (!ctx->cur_page) {
> +			ctx->cur_page = readahead_page(ctx->rac);
> +			ctx->cur_page_in_bio = false;
> +		}
> +		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
> +				length - done, ctx,
> +				&iomi->iomap, &iomi->srcmap);
>  	}
>  
> -	if (ctx.bio)
> -		submit_bio(ctx.bio);
> -	if (ctx.cur_page) {
> -		if (!ctx.cur_page_in_bio)
> -			unlock_page(ctx.cur_page);
> -		put_page(ctx.cur_page);
> +	if (iomi->len == done) {
> +		if (ctx->bio)
> +			submit_bio(ctx->bio);
> +		if (ctx->cur_page) {
> +			if (!ctx->cur_page_in_bio)
> +				unlock_page(ctx->cur_page);
> +			put_page(ctx->cur_page);
> +		}
>  	}
> +
> +	return done;
>  }
>  EXPORT_SYMBOL_GPL(iomap_readahead);
>  
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index b35611882ff9..2884752e40e8 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -625,7 +625,14 @@ STATIC void
>  xfs_vm_readahead(
>  	struct readahead_control	*rac)
>  {
> -	iomap_readahead(rac, &xfs_read_iomap_ops);
> +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> +			readahead_length(rac), 0);
> +	struct iomap_readpage_ctx ctx = {
> +		.rac = rac,
> +	};
> +
> +	while (iomap_iter(&iomi, xfs_iomap_next_read))
> +		iomi.copied = iomap_readahead(&iomi, &ctx);

Why not have iomap_readahead set iomi.copied on its way out?  The actor
function is supposed to set iomi.ret if an error happens, right?

Oh wait no, the actor function returns a positive copied value, or a
negative error code, and then it's up to the _next_read function to
notice if copied is negative, stuff it in ret, and then return false to
stop the iteration?

>  }
>  
>  static int
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 0e3f62cde375..66f2fcaf136e 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -1150,6 +1150,21 @@ const struct iomap_ops xfs_read_iomap_ops = {
>  	.iomap_begin		= xfs_read_iomap_begin,
>  };
>  
> +int
> +xfs_iomap_next_read(
> +	const struct iomap_iter *iomi,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)

Aren't these last two parameters already in the iomap iter?
Are they passed separately to work around the pointer being const?

> +{
> +	if (iomi->copied < 0)
> +		return iomi->copied;

Is this boilerplate going to end up in every single iomap_next_t
function?  If so, it should probably just go in iomap_iter prior to the
next() call, right?

I also wonder if these functions (and the typedef) ought to be called
iomap_iter_advance_t since that's what they do -- pick up the status
from the last round, and advance the iterator to the next mapping that
we want to process.

> +	if (iomi->copied >= iomi->len)
> +		return 0;

Er... if we copied more than we asked for, doesn't that imply something
bad just happened?

> +
> +	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);

Would be kinda nice if you could just pass the whole iomap_iter, but I
get that we're probably stuck with this until the entirety gets
converted.

--D

> +}
> +
>  static int
>  xfs_seek_iomap_begin(
>  	struct inode		*inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 7d3703556d0e..1b1fa225e938 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -46,4 +46,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
>  extern const struct iomap_ops xfs_seek_iomap_ops;
>  extern const struct iomap_ops xfs_xattr_iomap_ops;
>  
> +int xfs_iomap_next_read(const struct iomap_iter *iomi, struct iomap *iomap,
> +		struct iomap *srcmap);
>  #endif /* __XFS_IOMAP_H__*/
> diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
> index 07bc42d62673..4842b85ce36d 100644
> --- a/fs/zonefs/super.c
> +++ b/fs/zonefs/super.c
> @@ -70,6 +70,17 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  	return 0;
>  }
>  
> +static int zonefs_iomap_next(const struct iomap_iter *iomi,
> +		struct iomap *iomap, struct iomap *srcmap)
> +{
> +	if (iomi->copied < 0)
> +		return iomi->copied;
> +	if (iomi->copied >= iomi->len)
> +		return 0;
> +	return zonefs_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
> +}
> +
>  static const struct iomap_ops zonefs_iomap_ops = {
>  	.iomap_begin	= zonefs_iomap_begin,
>  };
> @@ -81,7 +92,14 @@ static int zonefs_readpage(struct file *unused, struct page *page)
>  
>  static void zonefs_readahead(struct readahead_control *rac)
>  {
> -	iomap_readahead(rac, &zonefs_iomap_ops);
> +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> +			readahead_length(rac), 0);
> +	struct iomap_readpage_ctx ctx = {
> +		.rac = rac,
> +	};
> +
> +	while (iomap_iter(&iomi, zonefs_iomap_next))
> +		iomi.copied = iomap_readahead(&iomi, &ctx);
>  }
>  
>  /*
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index fe58e68ec0c1..dd9bfed85c4f 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -212,7 +212,6 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
>  ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
>  		const struct iomap_ops *ops);
>  int iomap_readpage(struct page *page, const struct iomap_ops *ops);
> -void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
>  int iomap_set_page_dirty(struct page *page);
>  int iomap_is_partially_uptodate(struct page *page, unsigned long from,
>  		unsigned long count);
> @@ -299,6 +298,15 @@ int iomap_writepages(struct address_space *mapping,
>  		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
>  		const struct iomap_writeback_ops *ops);
>  
> +struct iomap_readpage_ctx {
> +	struct page		*cur_page;
> +	bool			cur_page_in_bio;
> +	struct bio		*bio;
> +	struct readahead_control *rac;
> +};
> +
> +loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
> +
>  /*
>   * Flags for direct I/O ->end_io:
>   */
> -- 
> 2.27.0
>
Matthew Wilcox (Oracle) Aug. 11, 2020, 10:31 p.m. UTC | #2
On Tue, Aug 11, 2020 at 01:56:13PM -0700, Darrick J. Wong wrote:
> > @@ -625,7 +625,14 @@ STATIC void
> >  xfs_vm_readahead(
> >  	struct readahead_control	*rac)
> >  {
> > -	iomap_readahead(rac, &xfs_read_iomap_ops);
> > +	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
> > +			readahead_length(rac), 0);
> > +	struct iomap_readpage_ctx ctx = {
> > +		.rac = rac,
> > +	};
> > +
> > +	while (iomap_iter(&iomi, xfs_iomap_next_read))
> > +		iomi.copied = iomap_readahead(&iomi, &ctx);
> 
> Why not have iomap_readahead set iomi.copied on its way out?  The actor
> function is supposed to set iomi.ret if an error happens, right?

I actually wanted to make iomap_readahead take a const pointer.
This should do the trick.

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index fff23ed6a682..3ca128a3b044 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -377,7 +377,8 @@ EXPORT_SYMBOL_GPL(iomap_readpage);
  * function is called with memalloc_nofs set, so allocations will not cause
  * the filesystem to be reentered.
  */
-loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
+loff_t iomap_readahead(const struct iomap_iter *iomi,
+		struct iomap *iomap, struct iomap_readpage_ctx *ctx)
 {
 	loff_t done, ret, length = iomap_length(iomi);
 
@@ -393,8 +394,7 @@ loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
-				length - done, ctx,
-				&iomi->iomap, &iomi->srcmap);
+				length - done, ctx, iomap, NULL);
 	}
 
 	if (iomi->len == done) {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2884752e40e8..62777daefe94 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,7 +632,7 @@ xfs_vm_readahead(
 	};
 
 	while (iomap_iter(&iomi, xfs_iomap_next_read))
-		iomi.copied = iomap_readahead(&iomi, &ctx);
+		iomi.copied = iomap_readahead(&iomi, &iomi.iomap, &ctx);
 }
 
 static int
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 4842b85ce36d..6ae51bf1d77c 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -99,7 +99,7 @@ static void zonefs_readahead(struct readahead_control *rac)
 	};
 
 	while (iomap_iter(&iomi, zonefs_iomap_next))
-		iomi.copied = iomap_readahead(&iomi, &ctx);
+		iomi.copied = iomap_readahead(&iomi, &iomi.iomap, &ctx);
 }
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index dd9bfed85c4f..11a104129a04 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -305,7 +305,8 @@ struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
+loff_t iomap_readahead(const struct iomap_iter *, struct iomap *,
+		struct iomap_readpage_ctx *);
 
 /*
  * Flags for direct I/O ->end_io:

> Oh wait no, the actor function returns a positive copied value, or a
> negative error code, and then it's up to the _next_read function to
> notice if copied is negative, stuff it in ret, and then return false to
> stop the iteration?

I want to handle all the changes to iomap_iter in iomap_iter() and
iomi_advance() so people writing new things that use iomap_iter don't
need to think about what they should modify.  Just return the error;
done.

One of the more convoluted bits of this is making sure that both the
filesystem and the body of the loop get the chance to clean up their state
if the other encounters an error.  So if 'copied' is set to an errno by
the body, then we call next() anyway (and stop the iteration).  And if
next() returns an error, we iterate the body once more.  We'll still
call next() again even if it did return an error, because it might not
have realised that returning a completely bogus iomap was an error.

> > +int
> > +xfs_iomap_next_read(
> > +	const struct iomap_iter *iomi,
> > +	struct iomap		*iomap,
> > +	struct iomap		*srcmap)
> 
> Aren't these last two parameters already in the iomap iter?
> Are they passed separately to work around the pointer being const?

Exactly.

> > +{
> > +	if (iomi->copied < 0)
> > +		return iomi->copied;
> 
> Is this boilerplate going to end up in every single iomap_next_t
> function?  If so, it should probably just go in iomap_iter prior to the
> next() call, right?

This is to give the next_t the opportunity to clean up after itself.
ie it's for the things currently done in ->iomap_end().  So when we
replace xfs_buffered_write_iomap_ops, you'll see it used then.

> > +	if (iomi->copied >= iomi->len)
> > +		return 0;
> 
> Er... if we copied more than we asked for, doesn't that imply something
> bad just happened?

erm ... maybe?  We don't currently sanity-check the return value from
actor() in iomap_apply().  Perhaps we should?

> > +
> > +	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
> > +			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
> 
> Would be kinda nice if you could just pass the whole iomap_iter, but I
> get that we're probably stuck with this until the entirety gets
> converted.

Yeah.  I could probably do it the other way round where
xfs_read_iomap_begin() constructs an iomap_iter on the stack and passes
it to xfs_read_iomap_begin().  I don't think it makes much difference.

Patch
diff mbox series

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index bcfc288dba3f..fff23ed6a682 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -206,13 +206,6 @@  iomap_read_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
-struct iomap_readpage_ctx {
-	struct page		*cur_page;
-	bool			cur_page_in_bio;
-	struct bio		*bio;
-	struct readahead_control *rac;
-};
-
 static void
 iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
@@ -369,35 +362,10 @@  iomap_readpage(struct page *page, const struct iomap_ops *ops)
 }
 EXPORT_SYMBOL_GPL(iomap_readpage);
 
-static loff_t
-iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap, struct iomap *srcmap)
-{
-	struct iomap_readpage_ctx *ctx = data;
-	loff_t done, ret;
-
-	for (done = 0; done < length; done += ret) {
-		if (ctx->cur_page && offset_in_page(pos + done) == 0) {
-			if (!ctx->cur_page_in_bio)
-				unlock_page(ctx->cur_page);
-			put_page(ctx->cur_page);
-			ctx->cur_page = NULL;
-		}
-		if (!ctx->cur_page) {
-			ctx->cur_page = readahead_page(ctx->rac);
-			ctx->cur_page_in_bio = false;
-		}
-		ret = iomap_readpage_actor(inode, pos + done, length - done,
-				ctx, iomap, srcmap);
-	}
-
-	return done;
-}
-
 /**
  * iomap_readahead - Attempt to read pages from a file.
+ * @iomi: The iomap iterator for this operation.
  * @rac: Describes the pages to be read.
- * @ops: The operations vector for the filesystem.
  *
  * This function is for filesystems to call to implement their readahead
  * address_space operation.
@@ -409,35 +377,37 @@  iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length,
  * function is called with memalloc_nofs set, so allocations will not cause
  * the filesystem to be reentered.
  */
-void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
+loff_t iomap_readahead(struct iomap_iter *iomi, struct iomap_readpage_ctx *ctx)
 {
-	struct inode *inode = rac->mapping->host;
-	loff_t pos = readahead_pos(rac);
-	loff_t length = readahead_length(rac);
-	struct iomap_readpage_ctx ctx = {
-		.rac	= rac,
-	};
-
-	trace_iomap_readahead(inode, readahead_count(rac));
+	loff_t done, ret, length = iomap_length(iomi);
 
-	while (length > 0) {
-		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
-				&ctx, iomap_readahead_actor);
-		if (ret <= 0) {
-			WARN_ON_ONCE(ret == 0);
-			break;
+	for (done = 0; done < length; done += ret) {
+		if (ctx->cur_page && offset_in_page(iomi->pos + done) == 0) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+			ctx->cur_page = NULL;
 		}
-		pos += ret;
-		length -= ret;
+		if (!ctx->cur_page) {
+			ctx->cur_page = readahead_page(ctx->rac);
+			ctx->cur_page_in_bio = false;
+		}
+		ret = iomap_readpage_actor(iomi->inode, iomi->pos + done,
+				length - done, ctx,
+				&iomi->iomap, &iomi->srcmap);
 	}
 
-	if (ctx.bio)
-		submit_bio(ctx.bio);
-	if (ctx.cur_page) {
-		if (!ctx.cur_page_in_bio)
-			unlock_page(ctx.cur_page);
-		put_page(ctx.cur_page);
+	if (iomi->len == done) {
+		if (ctx->bio)
+			submit_bio(ctx->bio);
+		if (ctx->cur_page) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+		}
 	}
+
+	return done;
 }
 EXPORT_SYMBOL_GPL(iomap_readahead);
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index b35611882ff9..2884752e40e8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -625,7 +625,14 @@  STATIC void
 xfs_vm_readahead(
 	struct readahead_control	*rac)
 {
-	iomap_readahead(rac, &xfs_read_iomap_ops);
+	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
+			readahead_length(rac), 0);
+	struct iomap_readpage_ctx ctx = {
+		.rac = rac,
+	};
+
+	while (iomap_iter(&iomi, xfs_iomap_next_read))
+		iomi.copied = iomap_readahead(&iomi, &ctx);
 }
 
 static int
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0e3f62cde375..66f2fcaf136e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1150,6 +1150,21 @@  const struct iomap_ops xfs_read_iomap_ops = {
 	.iomap_begin		= xfs_read_iomap_begin,
 };
 
+int
+xfs_iomap_next_read(
+	const struct iomap_iter *iomi,
+	struct iomap		*iomap,
+	struct iomap		*srcmap)
+{
+	if (iomi->copied < 0)
+		return iomi->copied;
+	if (iomi->copied >= iomi->len)
+		return 0;
+
+	return xfs_read_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
+			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
+}
+
 static int
 xfs_seek_iomap_begin(
 	struct inode		*inode,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 7d3703556d0e..1b1fa225e938 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -46,4 +46,6 @@  extern const struct iomap_ops xfs_read_iomap_ops;
 extern const struct iomap_ops xfs_seek_iomap_ops;
 extern const struct iomap_ops xfs_xattr_iomap_ops;
 
+int xfs_iomap_next_read(const struct iomap_iter *iomi, struct iomap *iomap,
+		struct iomap *srcmap);
 #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 07bc42d62673..4842b85ce36d 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -70,6 +70,17 @@  static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	return 0;
 }
 
+static int zonefs_iomap_next(const struct iomap_iter *iomi,
+		struct iomap *iomap, struct iomap *srcmap)
+{
+	if (iomi->copied < 0)
+		return iomi->copied;
+	if (iomi->copied >= iomi->len)
+		return 0;
+	return zonefs_iomap_begin(iomi->inode, iomi->pos + iomi->copied,
+			iomi->len - iomi->copied, iomi->flags, iomap, srcmap);
+}
+
 static const struct iomap_ops zonefs_iomap_ops = {
 	.iomap_begin	= zonefs_iomap_begin,
 };
@@ -81,7 +92,14 @@  static int zonefs_readpage(struct file *unused, struct page *page)
 
 static void zonefs_readahead(struct readahead_control *rac)
 {
-	iomap_readahead(rac, &zonefs_iomap_ops);
+	IOMAP_ITER(iomi, rac->mapping->host, readahead_pos(rac),
+			readahead_length(rac), 0);
+	struct iomap_readpage_ctx ctx = {
+		.rac = rac,
+	};
+
+	while (iomap_iter(&iomi, zonefs_iomap_next))
+		iomi.copied = iomap_readahead(&iomi, &ctx);
 }
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index fe58e68ec0c1..dd9bfed85c4f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -212,7 +212,6 @@  loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
-void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
 int iomap_is_partially_uptodate(struct page *page, unsigned long from,
 		unsigned long count);
@@ -299,6 +298,15 @@  int iomap_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops);
 
+struct iomap_readpage_ctx {
+	struct page		*cur_page;
+	bool			cur_page_in_bio;
+	struct bio		*bio;
+	struct readahead_control *rac;
+};
+
+loff_t iomap_readahead(struct iomap_iter *, struct iomap_readpage_ctx *);
+
 /*
  * Flags for direct I/O ->end_io:
  */