diff mbox series

[1/3] filemap: add a per-mapping stable writes flag

Message ID 20231024064416.897956-2-hch@lst.de (mailing list archive)
State New
Headers show
Series [1/3] filemap: add a per-mapping stable writes flag | expand

Commit Message

Christoph Hellwig Oct. 24, 2023, 6:44 a.m. UTC
folio_wait_stable waits for writeback to finish before modifying the
contents of a folio again, e.g. to support check summing of the data
in the block integrity code.

Currently this behavior is controlled by the SB_I_STABLE_WRITES flag
on the super_block, which means it is uniform for the entire file system.
This is wrong for the block device pseudofs which is shared by all
block devices, or file systems that can use multiple devices like XFS
witht the RT subvolume or btrfs (although btrfs currently reimplements
folio_wait_stable anyway).

Add a per-address_space AS_STABLE_WRITES flag to control the behavior
in a more fine grained way.  The existing SB_I_STABLE_WRITES is kept
to initialize AS_STABLE_WRITES to the existing default which covers
most cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/inode.c              |  2 ++
 include/linux/pagemap.h | 17 +++++++++++++++++
 mm/page-writeback.c     |  2 +-
 3 files changed, 20 insertions(+), 1 deletion(-)

Comments

Matthew Wilcox Oct. 24, 2023, 11:43 a.m. UTC | #1
On Tue, Oct 24, 2023 at 08:44:14AM +0200, Christoph Hellwig wrote:
> folio_wait_stable waits for writeback to finish before modifying the
> contents of a folio again, e.g. to support check summing of the data
> in the block integrity code.
> 
> Currently this behavior is controlled by the SB_I_STABLE_WRITES flag
> on the super_block, which means it is uniform for the entire file system.
> This is wrong for the block device pseudofs which is shared by all
> block devices, or file systems that can use multiple devices like XFS
> witht the RT subvolume or btrfs (although btrfs currently reimplements
> folio_wait_stable anyway).
> 
> Add a per-address_space AS_STABLE_WRITES flag to control the behavior
> in a more fine grained way.  The existing SB_I_STABLE_WRITES is kept
> to initialize AS_STABLE_WRITES to the existing default which covers
> most cases.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>

> +++ b/mm/page-writeback.c
> @@ -3110,7 +3110,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
>   */
>  void folio_wait_stable(struct folio *folio)
>  {
> -	if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
> +	if (mapping_stable_writes(folio_mapping(folio)))
>  		folio_wait_writeback(folio);

What I really like about this is that we've gone from

	folio->mapping->host->i_sb->s_iflags
to
	folio->mapping->flags

which saves us two pointer dereferences.  Sure, probably cached, but
maybe not, and cache misses are expensive.
Ilya Dryomov Oct. 24, 2023, 12:03 p.m. UTC | #2
On Tue, Oct 24, 2023 at 8:44 AM Christoph Hellwig <hch@lst.de> wrote:
>
> folio_wait_stable waits for writeback to finish before modifying the
> contents of a folio again, e.g. to support check summing of the data
> in the block integrity code.
>
> Currently this behavior is controlled by the SB_I_STABLE_WRITES flag
> on the super_block, which means it is uniform for the entire file system.
> This is wrong for the block device pseudofs which is shared by all
> block devices, or file systems that can use multiple devices like XFS
> witht the RT subvolume or btrfs (although btrfs currently reimplements
> folio_wait_stable anyway).
>
> Add a per-address_space AS_STABLE_WRITES flag to control the behavior
> in a more fine grained way.  The existing SB_I_STABLE_WRITES is kept
> to initialize AS_STABLE_WRITES to the existing default which covers
> most cases.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/inode.c              |  2 ++
>  include/linux/pagemap.h | 17 +++++++++++++++++
>  mm/page-writeback.c     |  2 +-
>  3 files changed, 20 insertions(+), 1 deletion(-)
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 84bc3c76e5ccb5..ae1a6410b53d7e 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
>         lockdep_set_class_and_name(&mapping->invalidate_lock,
>                                    &sb->s_type->invalidate_lock_key,
>                                    "mapping.invalidate_lock");
> +       if (sb->s_iflags & SB_I_STABLE_WRITES)
> +               mapping_set_stable_writes(mapping);
>         inode->i_private = NULL;
>         inode->i_mapping = mapping;
>         INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 351c3b7f93a14e..8c9608b217b000 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -204,6 +204,8 @@ enum mapping_flags {
>         AS_NO_WRITEBACK_TAGS = 5,
>         AS_LARGE_FOLIO_SUPPORT = 6,
>         AS_RELEASE_ALWAYS,      /* Call ->release_folio(), even if no private data */
> +       AS_STABLE_WRITES,       /* must wait for writeback before modifying
> +                                  folio contents */
>  };
>
>  /**
> @@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping)
>         clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
>  }
>
> +static inline bool mapping_stable_writes(const struct address_space *mapping)
> +{
> +       return test_bit(AS_STABLE_WRITES, &mapping->flags);
> +}
> +
> +static inline void mapping_set_stable_writes(struct address_space *mapping)
> +{
> +       set_bit(AS_STABLE_WRITES, &mapping->flags);
> +}
> +
> +static inline void mapping_clear_stable_writes(struct address_space *mapping)

Hi Christoph,

Nit: mapping_clear_stable_writes() is unused.

> +{
> +       clear_bit(AS_STABLE_WRITES, &mapping->flags);
> +}
> +
>  static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
>  {
>         return mapping->gfp_mask;
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index b8d3d7040a506a..4656534b8f5cc6 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -3110,7 +3110,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
>   */
>  void folio_wait_stable(struct folio *folio)
>  {
> -       if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
> +       if (mapping_stable_writes(folio_mapping(folio)))
>                 folio_wait_writeback(folio);
>  }
>  EXPORT_SYMBOL_GPL(folio_wait_stable);
> --
> 2.39.2
>

Tested with RBD which behaves like a DIF/DIX device (i.e. requires
stable pages):

Tested-by: Ilya Dryomov <idryomov@gmail.com>

Thanks,

                Ilya
Matthew Wilcox Oct. 24, 2023, 12:09 p.m. UTC | #3
On Tue, Oct 24, 2023 at 02:03:36PM +0200, Ilya Dryomov wrote:
> > +static inline void mapping_clear_stable_writes(struct address_space *mapping)
> 
> Hi Christoph,
> 
> Nit: mapping_clear_stable_writes() is unused.

It's used in patch 3
Ilya Dryomov Oct. 24, 2023, 12:45 p.m. UTC | #4
On Tue, Oct 24, 2023 at 2:09 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Tue, Oct 24, 2023 at 02:03:36PM +0200, Ilya Dryomov wrote:
> > > +static inline void mapping_clear_stable_writes(struct address_space *mapping)
> >
> > Hi Christoph,
> >
> > Nit: mapping_clear_stable_writes() is unused.
>
> It's used in patch 3

My apologies, I was too quick to archive it as specific to XFS.

Thanks,

                Ilya
Darrick J. Wong Oct. 24, 2023, 3 p.m. UTC | #5
On Tue, Oct 24, 2023 at 08:44:14AM +0200, Christoph Hellwig wrote:
> folio_wait_stable waits for writeback to finish before modifying the
> contents of a folio again, e.g. to support check summing of the data
> in the block integrity code.
> 
> Currently this behavior is controlled by the SB_I_STABLE_WRITES flag
> on the super_block, which means it is uniform for the entire file system.
> This is wrong for the block device pseudofs which is shared by all
> block devices, or file systems that can use multiple devices like XFS
> witht the RT subvolume or btrfs (although btrfs currently reimplements
> folio_wait_stable anyway).
> 
> Add a per-address_space AS_STABLE_WRITES flag to control the behavior
> in a more fine grained way.  The existing SB_I_STABLE_WRITES is kept
> to initialize AS_STABLE_WRITES to the existing default which covers
> most cases.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/inode.c              |  2 ++
>  include/linux/pagemap.h | 17 +++++++++++++++++
>  mm/page-writeback.c     |  2 +-

For a hot second I wondered if we could get rid of SB_I_STABLE_WRITES
too, but then had an AHA moment when I saw that NFS also sets it.

This looks reasonable,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

>  3 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 84bc3c76e5ccb5..ae1a6410b53d7e 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
>  	lockdep_set_class_and_name(&mapping->invalidate_lock,
>  				   &sb->s_type->invalidate_lock_key,
>  				   "mapping.invalidate_lock");
> +	if (sb->s_iflags & SB_I_STABLE_WRITES)
> +		mapping_set_stable_writes(mapping);
>  	inode->i_private = NULL;
>  	inode->i_mapping = mapping;
>  	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 351c3b7f93a14e..8c9608b217b000 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -204,6 +204,8 @@ enum mapping_flags {
>  	AS_NO_WRITEBACK_TAGS = 5,
>  	AS_LARGE_FOLIO_SUPPORT = 6,
>  	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
> +	AS_STABLE_WRITES,	/* must wait for writeback before modifying
> +				   folio contents */
>  };
>  
>  /**
> @@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping)
>  	clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
>  }
>  
> +static inline bool mapping_stable_writes(const struct address_space *mapping)
> +{
> +	return test_bit(AS_STABLE_WRITES, &mapping->flags);
> +}
> +
> +static inline void mapping_set_stable_writes(struct address_space *mapping)
> +{
> +	set_bit(AS_STABLE_WRITES, &mapping->flags);
> +}
> +
> +static inline void mapping_clear_stable_writes(struct address_space *mapping)
> +{
> +	clear_bit(AS_STABLE_WRITES, &mapping->flags);
> +}
> +
>  static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
>  {
>  	return mapping->gfp_mask;
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index b8d3d7040a506a..4656534b8f5cc6 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -3110,7 +3110,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
>   */
>  void folio_wait_stable(struct folio *folio)
>  {
> -	if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
> +	if (mapping_stable_writes(folio_mapping(folio)))
>  		folio_wait_writeback(folio);
>  }
>  EXPORT_SYMBOL_GPL(folio_wait_stable);
> -- 
> 2.39.2
>
Matthew Wilcox Oct. 24, 2023, 3:10 p.m. UTC | #6
On Tue, Oct 24, 2023 at 08:00:53AM -0700, Darrick J. Wong wrote:
> On Tue, Oct 24, 2023 at 08:44:14AM +0200, Christoph Hellwig wrote:
> > Add a per-address_space AS_STABLE_WRITES flag to control the behavior
> > in a more fine grained way.  The existing SB_I_STABLE_WRITES is kept
> > to initialize AS_STABLE_WRITES to the existing default which covers
> > most cases.
> 
> For a hot second I wondered if we could get rid of SB_I_STABLE_WRITES
> too, but then had an AHA moment when I saw that NFS also sets it.

I mean, we could if we're short on flags, or it just offends our
aesthetics to have it.  NFS (and others) would just have to
call mapping_set_stable_writes() in their inode initialisation
routines.  I don't personally find it a big deal either way.
Christoph Hellwig Oct. 24, 2023, 4:14 p.m. UTC | #7
On Tue, Oct 24, 2023 at 08:00:53AM -0700, Darrick J. Wong wrote:
> For a hot second I wondered if we could get rid of SB_I_STABLE_WRITES
> too, but then had an AHA moment when I saw that NFS also sets it.

It's not just NFS, but still the general way of propagating the flag.
I don't really want to add bdev-specific code to new_inode.
diff mbox series

Patch

diff --git a/fs/inode.c b/fs/inode.c
index 84bc3c76e5ccb5..ae1a6410b53d7e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -215,6 +215,8 @@  int inode_init_always(struct super_block *sb, struct inode *inode)
 	lockdep_set_class_and_name(&mapping->invalidate_lock,
 				   &sb->s_type->invalidate_lock_key,
 				   "mapping.invalidate_lock");
+	if (sb->s_iflags & SB_I_STABLE_WRITES)
+		mapping_set_stable_writes(mapping);
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
 	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 351c3b7f93a14e..8c9608b217b000 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -204,6 +204,8 @@  enum mapping_flags {
 	AS_NO_WRITEBACK_TAGS = 5,
 	AS_LARGE_FOLIO_SUPPORT = 6,
 	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
+	AS_STABLE_WRITES,	/* must wait for writeback before modifying
+				   folio contents */
 };
 
 /**
@@ -289,6 +291,21 @@  static inline void mapping_clear_release_always(struct address_space *mapping)
 	clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 }
 
+static inline bool mapping_stable_writes(const struct address_space *mapping)
+{
+	return test_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_stable_writes(struct address_space *mapping)
+{
+	set_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_clear_stable_writes(struct address_space *mapping)
+{
+	clear_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
 	return mapping->gfp_mask;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b8d3d7040a506a..4656534b8f5cc6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -3110,7 +3110,7 @@  EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
  */
 void folio_wait_stable(struct folio *folio)
 {
-	if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
+	if (mapping_stable_writes(folio_mapping(folio)))
 		folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);