diff mbox series

[RFC,v6,05/10] iomap/gfs2: Get page in page_prepare handler

Message ID 20230108194034.1444764-6-agruenba@redhat.com (mailing list archive)
State New, archived
Headers show
Series Turn iomap_page_ops into iomap_folio_ops | expand

Commit Message

Andreas Gruenbacher Jan. 8, 2023, 7:40 p.m. UTC
Change the iomap ->page_prepare() handler to get and return a locked
folio instead of doing that in iomap_write_begin().  This allows to
recover from out-of-memory situations in ->page_prepare(), which
eliminates the corresponding error handling code in iomap_write_begin().
The ->put_folio() handler now also isn't called with NULL as the folio
value anymore.

Filesystems are expected to use the iomap_get_folio() helper for getting
locked folios in their ->page_prepare() handlers.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/gfs2/bmap.c         | 21 +++++++++++++--------
 fs/iomap/buffered-io.c | 17 ++++++-----------
 include/linux/iomap.h  |  9 +++++----
 3 files changed, 24 insertions(+), 23 deletions(-)

Comments

Matthew Wilcox Jan. 31, 2023, 7:37 p.m. UTC | #1
On Sun, Jan 08, 2023 at 08:40:29PM +0100, Andreas Gruenbacher wrote:
> +static struct folio *
> +gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
>  {
> +	struct inode *inode = iter->inode;
>  	unsigned int blockmask = i_blocksize(inode) - 1;
>  	struct gfs2_sbd *sdp = GFS2_SB(inode);
>  	unsigned int blocks;
> +	struct folio *folio;
> +	int status;
>  
>  	blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
> -	return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> +	status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> +	if (status)
> +		return ERR_PTR(status);
> +
> +	folio = iomap_get_folio(iter, pos);
> +	if (IS_ERR(folio))
> +		gfs2_trans_end(sdp);
> +	return folio;
>  }

Hi Andreas,

I didn't think to mention this at the time, but I was reading through
buffered-io.c and this jumped out at me.  For filesystems which support
folios, we pass the entire length of the write (or at least the length
of the remaining iomap length).  That's intended to allow us to decide
how large a folio to allocate at some point in the future.

For GFS2, we do this:

        if (!mapping_large_folio_support(iter->inode->i_mapping))
                len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

I'd like to drop that and pass the full length of the write to
->get_folio().  It looks like you'll have to clamp it yourself at this
point.  I am kind of curious why you do one transaction per page --
I would have thought you'd rather do one transaction for the entire write.
Andreas Gruenbacher Jan. 31, 2023, 9:33 p.m. UTC | #2
On Tue, Jan 31, 2023 at 8:37 PM Matthew Wilcox <willy@infradead.org> wrote:
> On Sun, Jan 08, 2023 at 08:40:29PM +0100, Andreas Gruenbacher wrote:
> > +static struct folio *
> > +gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
> >  {
> > +     struct inode *inode = iter->inode;
> >       unsigned int blockmask = i_blocksize(inode) - 1;
> >       struct gfs2_sbd *sdp = GFS2_SB(inode);
> >       unsigned int blocks;
> > +     struct folio *folio;
> > +     int status;
> >
> >       blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
> > -     return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> > +     status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> > +     if (status)
> > +             return ERR_PTR(status);
> > +
> > +     folio = iomap_get_folio(iter, pos);
> > +     if (IS_ERR(folio))
> > +             gfs2_trans_end(sdp);
> > +     return folio;
> >  }
>
> Hi Andreas,

Hello,

> I didn't think to mention this at the time, but I was reading through
> buffered-io.c and this jumped out at me.  For filesystems which support
> folios, we pass the entire length of the write (or at least the length
> of the remaining iomap length).  That's intended to allow us to decide
> how large a folio to allocate at some point in the future.
>
> For GFS2, we do this:
>
>         if (!mapping_large_folio_support(iter->inode->i_mapping))
>                 len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
>
> I'd like to drop that and pass the full length of the write to
> ->get_folio().  It looks like you'll have to clamp it yourself at this
> point.

sounds reasonable to me.

I see that gfs2_page_add_databufs() hasn't been folio-ized yet, but it
looks like it might just work anway. So gfs2_iomap_get_folio() ...
gfs2_iomap_put_folio() should, in principle, work for requests bigger
than PAGE_SIZE.

Is there a reasonable way of trying it out?

We still want to keep the transaction size somewhat reasonable, but
the maximum size gfs2_iomap_begin() will return for a write is 509
blocks on a 4k-block filesystem, or slightly less than 2 MiB, which
should be fine.

>  I am kind of curious why you do one transaction per page --
> I would have thought you'd rather do one transaction for the entire write.

Only for journaled data writes. We could probably do bigger
transactions even in that case, but we'd rather get rid of data
journaling than encourage it, so we're also not spending a lot of time
on optimizing this case.

Thanks,
Andreas
diff mbox series

Patch

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0c041459677b..41349e09558b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -956,15 +956,25 @@  static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 	goto out;
 }
 
-static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
-				   unsigned len)
+static struct folio *
+gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
 {
+	struct inode *inode = iter->inode;
 	unsigned int blockmask = i_blocksize(inode) - 1;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	unsigned int blocks;
+	struct folio *folio;
+	int status;
 
 	blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
-	return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
+	status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
+	if (status)
+		return ERR_PTR(status);
+
+	folio = iomap_get_folio(iter, pos);
+	if (IS_ERR(folio))
+		gfs2_trans_end(sdp);
+	return folio;
 }
 
 static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
@@ -974,11 +984,6 @@  static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-	if (!folio) {
-		gfs2_trans_end(sdp);
-		return;
-	}
-
 	if (!gfs2_is_stuffed(ip))
 		gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
 				       copied);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index de4a8e5f721a..418519dea2ce 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -609,7 +609,7 @@  static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
 
 	if (page_ops && page_ops->put_folio) {
 		page_ops->put_folio(iter->inode, pos, ret, folio);
-	} else if (folio) {
+	} else {
 		folio_unlock(folio);
 		folio_put(folio);
 	}
@@ -642,17 +642,12 @@  static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
 	if (!mapping_large_folio_support(iter->inode->i_mapping))
 		len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
 
-	if (page_ops && page_ops->page_prepare) {
-		status = page_ops->page_prepare(iter->inode, pos, len);
-		if (status)
-			return status;
-	}
-
-	folio = iomap_get_folio(iter, pos);
-	if (IS_ERR(folio)) {
-		__iomap_put_folio(iter, pos, 0, NULL);
+	if (page_ops && page_ops->page_prepare)
+		folio = page_ops->page_prepare(iter, pos, len);
+	else
+		folio = iomap_get_folio(iter, pos);
+	if (IS_ERR(folio))
 		return PTR_ERR(folio);
-	}
 
 	/*
 	 * Now we have a locked folio, before we do anything with it we need to
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 188d14e786a4..d50501781856 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -13,6 +13,7 @@ 
 struct address_space;
 struct fiemap_extent_info;
 struct inode;
+struct iomap_iter;
 struct iomap_dio;
 struct iomap_writepage_ctx;
 struct iov_iter;
@@ -131,12 +132,12 @@  static inline bool iomap_inline_data_valid(const struct iomap *iomap)
  * associated with them.
  *
  * When page_prepare succeeds, put_folio will always be called to do any
- * cleanup work necessary.  In that put_folio call, @folio will be NULL if the
- * associated folio could not be obtained.  When folio is not NULL, put_folio
- * is responsible for unlocking and putting the folio.
+ * cleanup work necessary.  put_folio is responsible for unlocking and putting
+ * @folio.
  */
 struct iomap_page_ops {
-	int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
+	struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos,
+			unsigned len);
 	void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
 			struct folio *folio);