diff mbox series

[v2] iomap: Move page_done callback under the folio lock

Message ID 20221214102409.1857526-1-agruenba@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] iomap: Move page_done callback under the folio lock | expand

Commit Message

Andreas Gruenbacher Dec. 14, 2022, 10:24 a.m. UTC
Move the ->page_done() call in iomap_write_end() under the folio lock.
This closes a race between journaled data writes and the shrinker in
gfs2.  What's happening is that gfs2_iomap_page_done() is called after
the page has been unlocked, so try_to_free_buffers() can come in and
free the buffers while gfs2_iomap_page_done() is trying to add them to
the current transaction.  The folio lock prevents that from happening.

The only current user of ->page_done() is gfs2, so other filesystems are
not affected.  Still, to catch out any new users, switch from page to
folio in ->page_done().

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c         |  7 ++++---
 fs/iomap/buffered-io.c |  4 ++--
 include/linux/iomap.h  | 10 +++++-----
 3 files changed, 11 insertions(+), 10 deletions(-)

Comments

Andreas Gruenbacher Dec. 15, 2022, 8:13 p.m. UTC | #1
On Wed, Dec 14, 2022 at 11:24 AM Andreas Gruenbacher
<agruenba@redhat.com> wrote:
>
> Move the ->page_done() call in iomap_write_end() under the folio lock.
> This closes a race between journaled data writes and the shrinker in
> gfs2.  What's happening is that gfs2_iomap_page_done() is called after
> the page has been unlocked, so try_to_free_buffers() can come in and
> free the buffers while gfs2_iomap_page_done() is trying to add them to
> the current transaction.  The folio lock prevents that from happening.
>
> The only current user of ->page_done() is gfs2, so other filesystems are
> not affected.  Still, to catch out any new users, switch from page to
> folio in ->page_done().
>
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> ---
>  fs/gfs2/bmap.c         |  7 ++++---
>  fs/iomap/buffered-io.c |  4 ++--
>  include/linux/iomap.h  | 10 +++++-----
>  3 files changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index e7537fd305dd..c4ee47f8e499 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -968,14 +968,15 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
>  }
>
>  static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
> -                                unsigned copied, struct page *page)
> +                                unsigned copied, struct folio *folio)
>  {
>         struct gfs2_trans *tr = current->journal_info;
>         struct gfs2_inode *ip = GFS2_I(inode);
>         struct gfs2_sbd *sdp = GFS2_SB(inode);
>
> -       if (page && !gfs2_is_stuffed(ip))
> -               gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
> +       if (folio && !gfs2_is_stuffed(ip))
> +               gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
> +                                      copied);
>
>         if (tr->tr_num_buf_new)
>                 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);

This is still screwed up. We really need to unlock the page before
calling into __mark_inode_dirty() and ending the transaction. The
current page_done() hook would force us to then re-lock the page just
so that the caller can unlock it again. This just doesn't make sense,
particularly since the page_prepare and page_done hooks only exist to
allow gfs2 to do data journaling via iomap. I'll follow up with a more
useful approach ...

> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 91ee0b308e13..d988c1bedf70 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -714,12 +714,12 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
>                 i_size_write(iter->inode, pos + ret);
>                 iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
>         }
> +       if (page_ops && page_ops->page_done)
> +               page_ops->page_done(iter->inode, pos, ret, folio);
>         folio_unlock(folio);
>
>         if (old_size < pos)
>                 pagecache_isize_extended(iter->inode, old_size, pos);
> -       if (page_ops && page_ops->page_done)
> -               page_ops->page_done(iter->inode, pos, ret, &folio->page);
>         folio_put(folio);
>
>         if (ret < len)
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 238a03087e17..bd6d80453726 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -116,18 +116,18 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
>
>  /*
>   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
> - * and page_done will be called for each page written to.  This only applies to
> - * buffered writes as unbuffered writes will not typically have pages
> + * and page_done will be called for each folio written to.  This only applies
> + * to buffered writes as unbuffered writes will not typically have folios
>   * associated with them.
>   *
>   * When page_prepare succeeds, page_done will always be called to do any
> - * cleanup work necessary.  In that page_done call, @page will be NULL if the
> - * associated page could not be obtained.
> + * cleanup work necessary.  In that page_done call, @folio will be NULL if the
> + * associated folio could not be obtained.
>   */
>  struct iomap_page_ops {
>         int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
>         void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
> -                       struct page *page);
> +                       struct folio *folio);
>  };
>
>  /*
> --
> 2.38.1
>
Christoph Hellwig Dec. 16, 2022, 8:22 a.m. UTC | #2
On Thu, Dec 15, 2022 at 09:13:50PM +0100, Andreas Gruenbacher wrote:
> This is still screwed up. We really need to unlock the page before
> calling into __mark_inode_dirty() and ending the transaction. The
> current page_done() hook would force us to then re-lock the page just
> so that the caller can unlock it again. This just doesn't make sense,
> particularly since the page_prepare and page_done hooks only exist to
> allow gfs2 to do data journaling via iomap. I'll follow up with a more
> useful approach ...

Yes.  And it would make sense to include the gfs2 patches.
diff mbox series

Patch

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e7537fd305dd..c4ee47f8e499 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -968,14 +968,15 @@  static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
 }
 
 static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
-				 unsigned copied, struct page *page)
+				 unsigned copied, struct folio *folio)
 {
 	struct gfs2_trans *tr = current->journal_info;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-	if (page && !gfs2_is_stuffed(ip))
-		gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+	if (folio && !gfs2_is_stuffed(ip))
+		gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
+				       copied);
 
 	if (tr->tr_num_buf_new)
 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 91ee0b308e13..d988c1bedf70 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -714,12 +714,12 @@  static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 		i_size_write(iter->inode, pos + ret);
 		iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
 	}
+	if (page_ops && page_ops->page_done)
+		page_ops->page_done(iter->inode, pos, ret, folio);
 	folio_unlock(folio);
 
 	if (old_size < pos)
 		pagecache_isize_extended(iter->inode, old_size, pos);
-	if (page_ops && page_ops->page_done)
-		page_ops->page_done(iter->inode, pos, ret, &folio->page);
 	folio_put(folio);
 
 	if (ret < len)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 238a03087e17..bd6d80453726 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -116,18 +116,18 @@  static inline bool iomap_inline_data_valid(const struct iomap *iomap)
 
 /*
  * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
- * and page_done will be called for each page written to.  This only applies to
- * buffered writes as unbuffered writes will not typically have pages
+ * and page_done will be called for each folio written to.  This only applies
+ * to buffered writes as unbuffered writes will not typically have folios
  * associated with them.
  *
  * When page_prepare succeeds, page_done will always be called to do any
- * cleanup work necessary.  In that page_done call, @page will be NULL if the
- * associated page could not be obtained.
+ * cleanup work necessary.  In that page_done call, @folio will be NULL if the
+ * associated folio could not be obtained.
  */
 struct iomap_page_ops {
 	int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
 	void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
-			struct page *page);
+			struct folio *folio);
 };
 
 /*