diff mbox series

[16/13] iomap: Make readpage synchronous

Message ID 20200917225647.26481-3-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Allow readpage to return a locked page | expand

Commit Message

Matthew Wilcox Sept. 17, 2020, 10:56 p.m. UTC
A synchronous readpage lets us report the actual errno instead of
ineffectively setting PageError.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 64 +++++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 26 deletions(-)

Comments

Christoph Hellwig Sept. 19, 2020, 6:39 a.m. UTC | #1
I think just adding the completion and status to struct
iomap_readpage_ctx would be a lot easier to follow, at the cost
of bloating the structure a bit for the readahead case.  If we
are realy concerned about that, the completion could be directly
on the iomap_readpage stack and we'd pass a pointer.
Christoph Hellwig Sept. 19, 2020, 6:43 a.m. UTC | #2
On Sat, Sep 19, 2020 at 07:39:08AM +0100, Christoph Hellwig wrote:
> I think just adding the completion and status to struct
> iomap_readpage_ctx would be a lot easier to follow, at the cost
> of bloating the structure a bit for the readahead case.  If we
> are realy concerned about that, the completion could be directly
> on the iomap_readpage stack and we'd pass a pointer.

Anbother option would be to chain the bios and use submit_bio_wait.
That would take care of the completion and the status propagation
withour extra fields and extra code in iomap.
Matthew Wilcox Sept. 19, 2020, 5:03 p.m. UTC | #3
On Sat, Sep 19, 2020 at 07:43:08AM +0100, Christoph Hellwig wrote:
> On Sat, Sep 19, 2020 at 07:39:08AM +0100, Christoph Hellwig wrote:
> > I think just adding the completion and status to struct
> > iomap_readpage_ctx would be a lot easier to follow, at the cost
> > of bloating the structure a bit for the readahead case.  If we
> > are realy concerned about that, the completion could be directly
> > on the iomap_readpage stack and we'd pass a pointer.
> 
> Anbother option would be to chain the bios and use submit_bio_wait.
> That would take care of the completion and the status propagation
> withour extra fields and extra code in iomap.

But it wouldn't let us mark some blocks on the page as Uptodate, would it?
As I read the code, chaining two BIOs together will call the parent's
bi_end_io only once both the child and the parent BIOs have completed,
but at the point the parent's bi_end_io is called, the child bio has
already been put and we can't iterate over its bio_vec.

Maybe I misread the code; bio chaining does not seem to be well
documented.
Matthew Wilcox Sept. 19, 2020, 5:10 p.m. UTC | #4
On Sat, Sep 19, 2020 at 07:39:08AM +0100, Christoph Hellwig wrote:
> I think just adding the completion and status to struct
> iomap_readpage_ctx would be a lot easier to follow, at the cost
> of bloating the structure a bit for the readahead case.  If we
> are realy concerned about that, the completion could be directly
> on the iomap_readpage stack and we'd pass a pointer.

We could do that.  I was intending to reuse the code for the write_begin
path so that a pathological case where a page straddles multiple extents
can be handled by sending multiple BIOs and waiting on both of them at
the same time, instead of the current way of sending a BIO, waiting for
it to complete, sending a second BIO, waiting for it to complete, ...

I haven't fully got my head around how to do that effectively yet.
The iomap buffered write path assumes that extents are larger than page
size and you're going to get multiple pages per extent, when the situation
could be reversed and we might need to stitch together multiple extents
in order to bring a page Uptodate.  I also don't yet understand why
we read the current contents of a block when we're going to completely
overwrite it with data.
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 13b56d656337..aec95996bd4b 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -146,9 +146,6 @@  void iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
 	unsigned long flags;
 	unsigned int i;
 
-	if (PageError(page))
-		return;
-
 	if (!iop) {
 		SetPageUptodate(page);
 		return;
@@ -167,32 +164,41 @@  void iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
 	spin_unlock_irqrestore(&iop->uptodate_lock, flags);
 }
 
-static void
-iomap_read_page_end_io(struct bio_vec *bvec, int error)
+struct iomap_sync_end {
+	blk_status_t status;
+	struct completion done;
+};
+
+static void iomap_read_page_end_io(struct bio_vec *bvec,
+		struct iomap_sync_end *end, bool error)
 {
 	struct page *page = bvec->bv_page;
 	struct iomap_page *iop = to_iomap_page(page);
 
-	if (unlikely(error)) {
-		ClearPageUptodate(page);
-		SetPageError(page);
-	} else {
+	if (!error)
 		iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
-	}
 
-	if (!iop || atomic_dec_and_test(&iop->read_count))
-		unlock_page(page);
+	if (!iop || atomic_dec_and_test(&iop->read_count)) {
+		if (end)
+			complete(&end->done);
+		else
+			unlock_page(page);
+	}
 }
 
 static void
 iomap_read_end_io(struct bio *bio)
 {
-	int error = blk_status_to_errno(bio->bi_status);
+	struct iomap_sync_end *end = bio->bi_private;
 	struct bio_vec *bvec;
 	struct bvec_iter_all iter_all;
 
+	/* Capture the first error */
+	if (end && end->status == BLK_STS_OK)
+		end->status = bio->bi_status;
+
 	bio_for_each_segment_all(bvec, bio, iter_all)
-		iomap_read_page_end_io(bvec, error);
+		iomap_read_page_end_io(bvec, end, bio->bi_status != BLK_STS_OK);
 	bio_put(bio);
 }
 
@@ -201,6 +207,7 @@  struct iomap_readpage_ctx {
 	bool			cur_page_in_bio;
 	struct bio		*bio;
 	struct readahead_control *rac;
+	struct iomap_sync_end	*end;
 };
 
 static void
@@ -307,6 +314,7 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 			ctx->bio->bi_opf |= REQ_RAHEAD;
 		ctx->bio->bi_iter.bi_sector = sector;
 		bio_set_dev(ctx->bio, iomap->bdev);
+		ctx->bio->bi_private = ctx->end;
 		ctx->bio->bi_end_io = iomap_read_end_io;
 	}
 
@@ -324,22 +332,25 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 int
 iomap_readpage(struct page *page, const struct iomap_ops *ops)
 {
-	struct iomap_readpage_ctx ctx = { .cur_page = page };
+	struct iomap_sync_end end;
+	struct iomap_readpage_ctx ctx = { .cur_page = page, .end = &end, };
 	struct inode *inode = page->mapping->host;
 	unsigned poff;
 	loff_t ret;
 
 	trace_iomap_readpage(page->mapping->host, 1);
 
+	end.status = BLK_STS_OK;
+	init_completion(&end.done);
+
 	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
 		ret = iomap_apply(inode, page_offset(page) + poff,
 				PAGE_SIZE - poff, 0, ops, &ctx,
 				iomap_readpage_actor);
-		if (ret <= 0) {
-			WARN_ON_ONCE(ret == 0);
-			SetPageError(page);
+		if (WARN_ON_ONCE(ret == 0))
+			ret = -EIO;
+		if (ret < 0)
 			break;
-		}
 	}
 
 	if (ctx.bio) {
@@ -347,15 +358,16 @@  iomap_readpage(struct page *page, const struct iomap_ops *ops)
 		WARN_ON_ONCE(!ctx.cur_page_in_bio);
 	} else {
 		WARN_ON_ONCE(ctx.cur_page_in_bio);
-		unlock_page(page);
+		complete(&end.done);
 	}
 
-	/*
-	 * Just like mpage_readahead and block_read_full_page we always
-	 * return 0 and just mark the page as PageError on errors.  This
-	 * should be cleaned up all through the stack eventually.
-	 */
-	return 0;
+	wait_for_completion(&end.done);
+	if (ret >= 0)
+		ret = blk_status_to_errno(end.status);
+	if (ret == 0)
+		return AOP_UPDATED_PAGE;
+	unlock_page(page);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_readpage);