diff mbox series

[08/29] fsverity: add per-sb workqueue for post read processing

Message ID 171035223488.2613863.7583467519759571221.stgit@frogsfrogsfrogs (mailing list archive)
State New
Headers show
Series [01/29] fsverity: remove hash page spin lock | expand

Commit Message

Darrick J. Wong March 13, 2024, 5:54 p.m. UTC
From: Andrey Albershteyn <aalbersh@redhat.com>

For XFS, fsverity's global workqueue is not really suitable due to:

1. High priority workqueues are used within XFS to ensure that data
   IO completion cannot stall processing of journal IO completions.
   Hence using a WQ_HIGHPRI workqueue directly in the user data IO
   path is a potential filesystem livelock/deadlock vector.

2. The fsverity workqueue is global - it creates a cross-filesystem
   contention point.

This patch adds per-filesystem, per-cpu workqueue for fsverity
work. This allows iomap to add verification work in the read path on
BIO completion.

Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/super.c               |    7 +++++++
 include/linux/fs.h       |    2 ++
 include/linux/fsverity.h |   22 ++++++++++++++++++++++
 3 files changed, 31 insertions(+)

Comments

Darrick J. Wong March 19, 2024, 11:30 p.m. UTC | #1
On Wed, Mar 13, 2024 at 10:54:39AM -0700, Darrick J. Wong wrote:
> From: Andrey Albershteyn <aalbersh@redhat.com>
> 
> For XFS, fsverity's global workqueue is not really suitable due to:
> 
> 1. High priority workqueues are used within XFS to ensure that data
>    IO completion cannot stall processing of journal IO completions.
>    Hence using a WQ_HIGHPRI workqueue directly in the user data IO
>    path is a potential filesystem livelock/deadlock vector.
> 
> 2. The fsverity workqueue is global - it creates a cross-filesystem
>    contention point.
> 
> This patch adds per-filesystem, per-cpu workqueue for fsverity
> work. This allows iomap to add verification work in the read path on
> BIO completion.
> 
> Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/super.c               |    7 +++++++
>  include/linux/fs.h       |    2 ++
>  include/linux/fsverity.h |   22 ++++++++++++++++++++++
>  3 files changed, 31 insertions(+)
> 
> 
> diff --git a/fs/super.c b/fs/super.c
> index d35e85295489..338d86864200 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -642,6 +642,13 @@ void generic_shutdown_super(struct super_block *sb)
>  			sb->s_dio_done_wq = NULL;
>  		}
>  
> +#ifdef CONFIG_FS_VERITY
> +		if (sb->s_read_done_wq) {
> +			destroy_workqueue(sb->s_read_done_wq);
> +			sb->s_read_done_wq = NULL;
> +		}
> +#endif
> +
>  		if (sop->put_super)
>  			sop->put_super(sb);
>  
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ed5966a70495..9db24a825d94 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1221,6 +1221,8 @@ struct super_block {
>  #endif
>  #ifdef CONFIG_FS_VERITY
>  	const struct fsverity_operations *s_vop;
> +	/* Completion queue for post read verification */
> +	struct workqueue_struct *s_read_done_wq;
>  #endif
>  #if IS_ENABLED(CONFIG_UNICODE)
>  	struct unicode_map *s_encoding;
> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> index 0973b521ac5a..45b7c613148a 100644
> --- a/include/linux/fsverity.h
> +++ b/include/linux/fsverity.h
> @@ -241,6 +241,22 @@ void fsverity_enqueue_verify_work(struct work_struct *work);
>  void fsverity_invalidate_block(struct inode *inode,
>  		struct fsverity_blockbuf *block);
>  
> +static inline int fsverity_set_ops(struct super_block *sb,
> +				   const struct fsverity_operations *ops)
> +{
> +	sb->s_vop = ops;
> +
> +	/* Create per-sb workqueue for post read bio verification */
> +	struct workqueue_struct *wq = alloc_workqueue(
> +		"pread/%s", (WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, sb->s_id);

Looking at this more closely, why is it that the fsverity_read_queue
is unbound and tagged WQ_HIGHPRI, whereas this one is instead FREEZEABLE
and MEM_RECLAIM and bound?

If it's really feasible to use /one/ workqueue for all the read
post-processing then this ought to be a fs/super.c helper ala
sb_init_dio_done_wq.  That said, from Eric's comments on the v5 thread
about fsverity and fscrypt locking horns over workqueue stalls I'm not
convinced that's true.

--D

> +	if (!wq)
> +		return -ENOMEM;
> +
> +	sb->s_read_done_wq = wq;
> +
> +	return 0;
> +}
> +
>  #else /* !CONFIG_FS_VERITY */
>  
>  static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
> @@ -318,6 +334,12 @@ static inline void fsverity_enqueue_verify_work(struct work_struct *work)
>  	WARN_ON_ONCE(1);
>  }
>  
> +static inline int fsverity_set_ops(struct super_block *sb,
> +				   const struct fsverity_operations *ops)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
>  #endif	/* !CONFIG_FS_VERITY */
>  
>  static inline bool fsverity_verify_folio(struct folio *folio)
> 
>
Andrey Albershteyn March 20, 2024, 10:37 a.m. UTC | #2
On 2024-03-19 16:30:10, Darrick J. Wong wrote:
> On Wed, Mar 13, 2024 at 10:54:39AM -0700, Darrick J. Wong wrote:
> > From: Andrey Albershteyn <aalbersh@redhat.com>
> > 
> > For XFS, fsverity's global workqueue is not really suitable due to:
> > 
> > 1. High priority workqueues are used within XFS to ensure that data
> >    IO completion cannot stall processing of journal IO completions.
> >    Hence using a WQ_HIGHPRI workqueue directly in the user data IO
> >    path is a potential filesystem livelock/deadlock vector.
> > 
> > 2. The fsverity workqueue is global - it creates a cross-filesystem
> >    contention point.
> > 
> > This patch adds per-filesystem, per-cpu workqueue for fsverity
> > work. This allows iomap to add verification work in the read path on
> > BIO completion.
> > 
> > Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/super.c               |    7 +++++++
> >  include/linux/fs.h       |    2 ++
> >  include/linux/fsverity.h |   22 ++++++++++++++++++++++
> >  3 files changed, 31 insertions(+)
> > 
> > 
> > diff --git a/fs/super.c b/fs/super.c
> > index d35e85295489..338d86864200 100644
> > --- a/fs/super.c
> > +++ b/fs/super.c
> > @@ -642,6 +642,13 @@ void generic_shutdown_super(struct super_block *sb)
> >  			sb->s_dio_done_wq = NULL;
> >  		}
> >  
> > +#ifdef CONFIG_FS_VERITY
> > +		if (sb->s_read_done_wq) {
> > +			destroy_workqueue(sb->s_read_done_wq);
> > +			sb->s_read_done_wq = NULL;
> > +		}
> > +#endif
> > +
> >  		if (sop->put_super)
> >  			sop->put_super(sb);
> >  
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index ed5966a70495..9db24a825d94 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1221,6 +1221,8 @@ struct super_block {
> >  #endif
> >  #ifdef CONFIG_FS_VERITY
> >  	const struct fsverity_operations *s_vop;
> > +	/* Completion queue for post read verification */
> > +	struct workqueue_struct *s_read_done_wq;
> >  #endif
> >  #if IS_ENABLED(CONFIG_UNICODE)
> >  	struct unicode_map *s_encoding;
> > diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> > index 0973b521ac5a..45b7c613148a 100644
> > --- a/include/linux/fsverity.h
> > +++ b/include/linux/fsverity.h
> > @@ -241,6 +241,22 @@ void fsverity_enqueue_verify_work(struct work_struct *work);
> >  void fsverity_invalidate_block(struct inode *inode,
> >  		struct fsverity_blockbuf *block);
> >  
> > +static inline int fsverity_set_ops(struct super_block *sb,
> > +				   const struct fsverity_operations *ops)
> > +{
> > +	sb->s_vop = ops;
> > +
> > +	/* Create per-sb workqueue for post read bio verification */
> > +	struct workqueue_struct *wq = alloc_workqueue(
> > +		"pread/%s", (WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, sb->s_id);
> 
> Looking at this more closely, why is it that the fsverity_read_queue
> is unbound and tagged WQ_HIGHPRI, whereas this one is instead FREEZEABLE
> and MEM_RECLAIM and bound?
> 
> If it's really feasible to use /one/ workqueue for all the read
> post-processing then this ought to be a fs/super.c helper ala
> sb_init_dio_done_wq.  That said, from Eric's comments on the v5 thread
> about fsverity and fscrypt locking horns over workqueue stalls I'm not
> convinced that's true.

There's good explanation by Dave why WQ_HIGHPRI is not a good fit
for XFS (potential livelock/deadlock):

https://lore.kernel.org/linux-xfs/20221214054357.GI3600936@dread.disaster.area/

Based on his feedback I changed it to per-filesystem.
Darrick J. Wong March 20, 2024, 2:55 p.m. UTC | #3
On Wed, Mar 20, 2024 at 11:37:28AM +0100, Andrey Albershteyn wrote:
> On 2024-03-19 16:30:10, Darrick J. Wong wrote:
> > On Wed, Mar 13, 2024 at 10:54:39AM -0700, Darrick J. Wong wrote:
> > > From: Andrey Albershteyn <aalbersh@redhat.com>
> > > 
> > > For XFS, fsverity's global workqueue is not really suitable due to:
> > > 
> > > 1. High priority workqueues are used within XFS to ensure that data
> > >    IO completion cannot stall processing of journal IO completions.
> > >    Hence using a WQ_HIGHPRI workqueue directly in the user data IO
> > >    path is a potential filesystem livelock/deadlock vector.
> > > 
> > > 2. The fsverity workqueue is global - it creates a cross-filesystem
> > >    contention point.
> > > 
> > > This patch adds per-filesystem, per-cpu workqueue for fsverity
> > > work. This allows iomap to add verification work in the read path on
> > > BIO completion.
> > > 
> > > Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  fs/super.c               |    7 +++++++
> > >  include/linux/fs.h       |    2 ++
> > >  include/linux/fsverity.h |   22 ++++++++++++++++++++++
> > >  3 files changed, 31 insertions(+)
> > > 
> > > 
> > > diff --git a/fs/super.c b/fs/super.c
> > > index d35e85295489..338d86864200 100644
> > > --- a/fs/super.c
> > > +++ b/fs/super.c
> > > @@ -642,6 +642,13 @@ void generic_shutdown_super(struct super_block *sb)
> > >  			sb->s_dio_done_wq = NULL;
> > >  		}
> > >  
> > > +#ifdef CONFIG_FS_VERITY
> > > +		if (sb->s_read_done_wq) {
> > > +			destroy_workqueue(sb->s_read_done_wq);
> > > +			sb->s_read_done_wq = NULL;
> > > +		}
> > > +#endif
> > > +
> > >  		if (sop->put_super)
> > >  			sop->put_super(sb);
> > >  
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index ed5966a70495..9db24a825d94 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -1221,6 +1221,8 @@ struct super_block {
> > >  #endif
> > >  #ifdef CONFIG_FS_VERITY
> > >  	const struct fsverity_operations *s_vop;
> > > +	/* Completion queue for post read verification */
> > > +	struct workqueue_struct *s_read_done_wq;
> > >  #endif
> > >  #if IS_ENABLED(CONFIG_UNICODE)
> > >  	struct unicode_map *s_encoding;
> > > diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> > > index 0973b521ac5a..45b7c613148a 100644
> > > --- a/include/linux/fsverity.h
> > > +++ b/include/linux/fsverity.h
> > > @@ -241,6 +241,22 @@ void fsverity_enqueue_verify_work(struct work_struct *work);
> > >  void fsverity_invalidate_block(struct inode *inode,
> > >  		struct fsverity_blockbuf *block);
> > >  
> > > +static inline int fsverity_set_ops(struct super_block *sb,
> > > +				   const struct fsverity_operations *ops)
> > > +{
> > > +	sb->s_vop = ops;
> > > +
> > > +	/* Create per-sb workqueue for post read bio verification */
> > > +	struct workqueue_struct *wq = alloc_workqueue(
> > > +		"pread/%s", (WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, sb->s_id);
> > 
> > Looking at this more closely, why is it that the fsverity_read_queue
> > is unbound and tagged WQ_HIGHPRI, whereas this one is instead FREEZEABLE
> > and MEM_RECLAIM and bound?
> > 
> > If it's really feasible to use /one/ workqueue for all the read
> > post-processing then this ought to be a fs/super.c helper ala
> > sb_init_dio_done_wq.  That said, from Eric's comments on the v5 thread
> > about fsverity and fscrypt locking horns over workqueue stalls I'm not
> > convinced that's true.
> 
> There's good explanation by Dave why WQ_HIGHPRI is not a good fit
> for XFS (potential livelock/deadlock):
> 
> https://lore.kernel.org/linux-xfs/20221214054357.GI3600936@dread.disaster.area/
> 
> Based on his feedback I changed it to per-filesystem.

Ah, ok.  Why is the workqueue tagged with MEM_RECLAIM though?  Does
letting it run actually help out with reclaim?  I guess it does by
allowing pages involved in readahead to get to unlocked state where they
can be ripped out. :)

--D

> -- 
> - Andrey
> 
>
Andrey Albershteyn March 20, 2024, 4:22 p.m. UTC | #4
On 2024-03-20 07:55:04, Darrick J. Wong wrote:
> On Wed, Mar 20, 2024 at 11:37:28AM +0100, Andrey Albershteyn wrote:
> > On 2024-03-19 16:30:10, Darrick J. Wong wrote:
> > > On Wed, Mar 13, 2024 at 10:54:39AM -0700, Darrick J. Wong wrote:
> > > > From: Andrey Albershteyn <aalbersh@redhat.com>
> > > > 
> > > > For XFS, fsverity's global workqueue is not really suitable due to:
> > > > 
> > > > 1. High priority workqueues are used within XFS to ensure that data
> > > >    IO completion cannot stall processing of journal IO completions.
> > > >    Hence using a WQ_HIGHPRI workqueue directly in the user data IO
> > > >    path is a potential filesystem livelock/deadlock vector.
> > > > 
> > > > 2. The fsverity workqueue is global - it creates a cross-filesystem
> > > >    contention point.
> > > > 
> > > > This patch adds per-filesystem, per-cpu workqueue for fsverity
> > > > work. This allows iomap to add verification work in the read path on
> > > > BIO completion.
> > > > 
> > > > Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> > > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  fs/super.c               |    7 +++++++
> > > >  include/linux/fs.h       |    2 ++
> > > >  include/linux/fsverity.h |   22 ++++++++++++++++++++++
> > > >  3 files changed, 31 insertions(+)
> > > > 
> > > > 
> > > > diff --git a/fs/super.c b/fs/super.c
> > > > index d35e85295489..338d86864200 100644
> > > > --- a/fs/super.c
> > > > +++ b/fs/super.c
> > > > @@ -642,6 +642,13 @@ void generic_shutdown_super(struct super_block *sb)
> > > >  			sb->s_dio_done_wq = NULL;
> > > >  		}
> > > >  
> > > > +#ifdef CONFIG_FS_VERITY
> > > > +		if (sb->s_read_done_wq) {
> > > > +			destroy_workqueue(sb->s_read_done_wq);
> > > > +			sb->s_read_done_wq = NULL;
> > > > +		}
> > > > +#endif
> > > > +
> > > >  		if (sop->put_super)
> > > >  			sop->put_super(sb);
> > > >  
> > > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > > index ed5966a70495..9db24a825d94 100644
> > > > --- a/include/linux/fs.h
> > > > +++ b/include/linux/fs.h
> > > > @@ -1221,6 +1221,8 @@ struct super_block {
> > > >  #endif
> > > >  #ifdef CONFIG_FS_VERITY
> > > >  	const struct fsverity_operations *s_vop;
> > > > +	/* Completion queue for post read verification */
> > > > +	struct workqueue_struct *s_read_done_wq;
> > > >  #endif
> > > >  #if IS_ENABLED(CONFIG_UNICODE)
> > > >  	struct unicode_map *s_encoding;
> > > > diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> > > > index 0973b521ac5a..45b7c613148a 100644
> > > > --- a/include/linux/fsverity.h
> > > > +++ b/include/linux/fsverity.h
> > > > @@ -241,6 +241,22 @@ void fsverity_enqueue_verify_work(struct work_struct *work);
> > > >  void fsverity_invalidate_block(struct inode *inode,
> > > >  		struct fsverity_blockbuf *block);
> > > >  
> > > > +static inline int fsverity_set_ops(struct super_block *sb,
> > > > +				   const struct fsverity_operations *ops)
> > > > +{
> > > > +	sb->s_vop = ops;
> > > > +
> > > > +	/* Create per-sb workqueue for post read bio verification */
> > > > +	struct workqueue_struct *wq = alloc_workqueue(
> > > > +		"pread/%s", (WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, sb->s_id);
> > > 
> > > Looking at this more closely, why is it that the fsverity_read_queue
> > > is unbound and tagged WQ_HIGHPRI, whereas this one is instead FREEZEABLE
> > > and MEM_RECLAIM and bound?
> > > 
> > > If it's really feasible to use /one/ workqueue for all the read
> > > post-processing then this ought to be a fs/super.c helper ala
> > > sb_init_dio_done_wq.  That said, from Eric's comments on the v5 thread
> > > about fsverity and fscrypt locking horns over workqueue stalls I'm not
> > > convinced that's true.
> > 
> > There's good explanation by Dave why WQ_HIGHPRI is not a good fit
> > for XFS (potential livelock/deadlock):
> > 
> > https://lore.kernel.org/linux-xfs/20221214054357.GI3600936@dread.disaster.area/
> > 
> > Based on his feedback I changed it to per-filesystem.
> 
> Ah, ok.  Why is the workqueue tagged with MEM_RECLAIM though?  Does
> letting it run actually help out with reclaim?  I guess it does by
> allowing pages involved in readahead to get to unlocked state where they
> can be ripped out. :)

Not sure how much it actually helps with reclaims, leaving it out
would probably have the same effect in most cases. But I suppose at
least one reserved execution context is good thing to not block BIO
finalization.
diff mbox series

Patch

diff --git a/fs/super.c b/fs/super.c
index d35e85295489..338d86864200 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -642,6 +642,13 @@  void generic_shutdown_super(struct super_block *sb)
 			sb->s_dio_done_wq = NULL;
 		}
 
+#ifdef CONFIG_FS_VERITY
+		if (sb->s_read_done_wq) {
+			destroy_workqueue(sb->s_read_done_wq);
+			sb->s_read_done_wq = NULL;
+		}
+#endif
+
 		if (sop->put_super)
 			sop->put_super(sb);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..9db24a825d94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1221,6 +1221,8 @@  struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
 	const struct fsverity_operations *s_vop;
+	/* Completion queue for post read verification */
+	struct workqueue_struct *s_read_done_wq;
 #endif
 #if IS_ENABLED(CONFIG_UNICODE)
 	struct unicode_map *s_encoding;
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 0973b521ac5a..45b7c613148a 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -241,6 +241,22 @@  void fsverity_enqueue_verify_work(struct work_struct *work);
 void fsverity_invalidate_block(struct inode *inode,
 		struct fsverity_blockbuf *block);
 
+static inline int fsverity_set_ops(struct super_block *sb,
+				   const struct fsverity_operations *ops)
+{
+	sb->s_vop = ops;
+
+	/* Create per-sb workqueue for post read bio verification */
+	struct workqueue_struct *wq = alloc_workqueue(
+		"pread/%s", (WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, sb->s_id);
+	if (!wq)
+		return -ENOMEM;
+
+	sb->s_read_done_wq = wq;
+
+	return 0;
+}
+
 #else /* !CONFIG_FS_VERITY */
 
 static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
@@ -318,6 +334,12 @@  static inline void fsverity_enqueue_verify_work(struct work_struct *work)
 	WARN_ON_ONCE(1);
 }
 
+static inline int fsverity_set_ops(struct super_block *sb,
+				   const struct fsverity_operations *ops)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif	/* !CONFIG_FS_VERITY */
 
 static inline bool fsverity_verify_folio(struct folio *folio)