diff mbox series

[v4,13/25] xfs: introduce workqueue for post read IO work

Message ID 20240212165821.1901300-14-aalbersh@redhat.com (mailing list archive)
State Superseded
Headers show
Series fs-verity support for XFS | expand

Commit Message

Andrey Albershteyn Feb. 12, 2024, 4:58 p.m. UTC
As noted by Dave there are two problems with using fs-verity's
workqueue in XFS:

1. High priority workqueues are used within XFS to ensure that data
   IO completion cannot stall processing of journal IO completions.
   Hence using a WQ_HIGHPRI workqueue directly in the user data IO
   path is a potential filesystem livelock/deadlock vector.

2. The fsverity workqueue is global - it creates a cross-filesystem
   contention point.

This patch adds per-filesystem, per-cpu workqueue for fsverity
work.

Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
---
 fs/xfs/xfs_aops.c  | 15 +++++++++++++--
 fs/xfs/xfs_linux.h |  1 +
 fs/xfs/xfs_mount.h |  1 +
 fs/xfs/xfs_super.c |  9 +++++++++
 4 files changed, 24 insertions(+), 2 deletions(-)

Comments

Dave Chinner Feb. 15, 2024, 10:11 p.m. UTC | #1
On Mon, Feb 12, 2024 at 05:58:10PM +0100, Andrey Albershteyn wrote:
> As noted by Dave there are two problems with using fs-verity's
> workqueue in XFS:
> 
> 1. High priority workqueues are used within XFS to ensure that data
>    IO completion cannot stall processing of journal IO completions.
>    Hence using a WQ_HIGHPRI workqueue directly in the user data IO
>    path is a potential filesystem livelock/deadlock vector.
> 
> 2. The fsverity workqueue is global - it creates a cross-filesystem
>    contention point.
> 
> This patch adds per-filesystem, per-cpu workqueue for fsverity
> work.
> 
> Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> ---
>  fs/xfs/xfs_aops.c  | 15 +++++++++++++--
>  fs/xfs/xfs_linux.h |  1 +
>  fs/xfs/xfs_mount.h |  1 +
>  fs/xfs/xfs_super.c |  9 +++++++++
>  4 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 7a6627404160..70e444c151b2 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -548,19 +548,30 @@ xfs_vm_bmap(
>  	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
>  }
>  
> +static inline struct workqueue_struct *
> +xfs_fsverity_wq(
> +	struct address_space	*mapping)
> +{
> +	if (fsverity_active(mapping->host))
> +		return XFS_I(mapping->host)->i_mount->m_postread_workqueue;
> +	return NULL;
> +}
> +
>  STATIC int
>  xfs_vm_read_folio(
>  	struct file		*unused,
>  	struct folio		*folio)
>  {
> -	return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL);
> +	return iomap_read_folio(folio, &xfs_read_iomap_ops,
> +				xfs_fsverity_wq(folio->mapping));
>  }
>  
>  STATIC void
>  xfs_vm_readahead(
>  	struct readahead_control	*rac)
>  {
> -	iomap_readahead(rac, &xfs_read_iomap_ops, NULL);
> +	iomap_readahead(rac, &xfs_read_iomap_ops,
> +			xfs_fsverity_wq(rac->mapping));
>  }

Ok, Now I see how this workqueue is specified, I just don't see
anything XFS specific about this, and it adds complexity to the
whole system by making XFS special.

Either the fsverity code provides a per-sb workqueue instance, or
we use the global fsverity workqueue. i.e. the filesystem itself
should not have to supply this, nor should it be plumbed into
generic iomap IO path.

We already do this with direct IO completion to use a
per-superblock workqueue for defering write completions
(sb->s_dio_done_wq), so I think that is what we should be doing
here, too. i.e. a generic per-sb post-read workqueue.

That way iomap_read_bio_alloc() becomes:

+#ifdef CONFIG_FS_VERITY
+	if (fsverity_active(inode)) {
+		bio = bio_alloc_bioset(bdev, nr_vecs, REQ_OP_READ, gfp,
+					&iomap_fsverity_bioset);
+		if (bio) {
+			bio->bi_private = inode->i_sb->i_postread_wq;
+			bio->bi_end_io = iomap_read_fsverity_end_io;
+		}
+		return bio;
+	}

And we no longer need to pass a work queue through the IO stack.
This workqueue can be initialised when we first initialise fsverity
support for the superblock at mount time, and it would be relatively
trivial to convert all the fsverity filesytsems to use this
mechanism, getting rid of the global workqueue altogether.

-Dave.
Andrey Albershteyn Feb. 16, 2024, 4:29 p.m. UTC | #2
On 2024-02-16 09:11:43, Dave Chinner wrote:
> On Mon, Feb 12, 2024 at 05:58:10PM +0100, Andrey Albershteyn wrote:
> > As noted by Dave there are two problems with using fs-verity's
> > workqueue in XFS:
> > 
> > 1. High priority workqueues are used within XFS to ensure that data
> >    IO completion cannot stall processing of journal IO completions.
> >    Hence using a WQ_HIGHPRI workqueue directly in the user data IO
> >    path is a potential filesystem livelock/deadlock vector.
> > 
> > 2. The fsverity workqueue is global - it creates a cross-filesystem
> >    contention point.
> > 
> > This patch adds per-filesystem, per-cpu workqueue for fsverity
> > work.
> > 
> > Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> > ---
> >  fs/xfs/xfs_aops.c  | 15 +++++++++++++--
> >  fs/xfs/xfs_linux.h |  1 +
> >  fs/xfs/xfs_mount.h |  1 +
> >  fs/xfs/xfs_super.c |  9 +++++++++
> >  4 files changed, 24 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> > index 7a6627404160..70e444c151b2 100644
> > --- a/fs/xfs/xfs_aops.c
> > +++ b/fs/xfs/xfs_aops.c
> > @@ -548,19 +548,30 @@ xfs_vm_bmap(
> >  	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
> >  }
> >  
> > +static inline struct workqueue_struct *
> > +xfs_fsverity_wq(
> > +	struct address_space	*mapping)
> > +{
> > +	if (fsverity_active(mapping->host))
> > +		return XFS_I(mapping->host)->i_mount->m_postread_workqueue;
> > +	return NULL;
> > +}
> > +
> >  STATIC int
> >  xfs_vm_read_folio(
> >  	struct file		*unused,
> >  	struct folio		*folio)
> >  {
> > -	return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL);
> > +	return iomap_read_folio(folio, &xfs_read_iomap_ops,
> > +				xfs_fsverity_wq(folio->mapping));
> >  }
> >  
> >  STATIC void
> >  xfs_vm_readahead(
> >  	struct readahead_control	*rac)
> >  {
> > -	iomap_readahead(rac, &xfs_read_iomap_ops, NULL);
> > +	iomap_readahead(rac, &xfs_read_iomap_ops,
> > +			xfs_fsverity_wq(rac->mapping));
> >  }
> 
> Ok, Now I see how this workqueue is specified, I just don't see
> anything XFS specific about this, and it adds complexity to the
> whole system by making XFS special.
> 
> Either the fsverity code provides a per-sb workqueue instance, or
> we use the global fsverity workqueue. i.e. the filesystem itself
> should not have to supply this, nor should it be plumbed into
> generic iomap IO path.
> 
> We already do this with direct IO completion to use a
> per-superblock workqueue for defering write completions
> (sb->s_dio_done_wq), so I think that is what we should be doing
> here, too. i.e. a generic per-sb post-read workqueue.
> 
> That way iomap_read_bio_alloc() becomes:
> 
> +#ifdef CONFIG_FS_VERITY
> +	if (fsverity_active(inode)) {
> +		bio = bio_alloc_bioset(bdev, nr_vecs, REQ_OP_READ, gfp,
> +					&iomap_fsverity_bioset);
> +		if (bio) {
> +			bio->bi_private = inode->i_sb->i_postread_wq;
> +			bio->bi_end_io = iomap_read_fsverity_end_io;
> +		}
> +		return bio;
> +	}
> 
> And we no longer need to pass a work queue through the IO stack.
> This workqueue can be initialised when we first initialise fsverity
> support for the superblock at mount time, and it would be relatively
> trivial to convert all the fsverity filesytsems to use this
> mechanism, getting rid of the global workqueue altogether.

Thanks, haven't thought about that. I will change it.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7a6627404160..70e444c151b2 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -548,19 +548,30 @@  xfs_vm_bmap(
 	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
 }
 
+static inline struct workqueue_struct *
+xfs_fsverity_wq(
+	struct address_space	*mapping)
+{
+	if (fsverity_active(mapping->host))
+		return XFS_I(mapping->host)->i_mount->m_postread_workqueue;
+	return NULL;
+}
+
 STATIC int
 xfs_vm_read_folio(
 	struct file		*unused,
 	struct folio		*folio)
 {
-	return iomap_read_folio(folio, &xfs_read_iomap_ops, NULL);
+	return iomap_read_folio(folio, &xfs_read_iomap_ops,
+				xfs_fsverity_wq(folio->mapping));
 }
 
 STATIC void
 xfs_vm_readahead(
 	struct readahead_control	*rac)
 {
-	iomap_readahead(rac, &xfs_read_iomap_ops, NULL);
+	iomap_readahead(rac, &xfs_read_iomap_ops,
+			xfs_fsverity_wq(rac->mapping));
 }
 
 static int
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index d7873e0360f0..9c76e025b5d8 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -64,6 +64,7 @@  typedef __u32			xfs_nlink_t;
 #include <linux/xattr.h>
 #include <linux/mnt_idmapping.h>
 #include <linux/debugfs.h>
+#include <linux/fsverity.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 503fe3c7edbf..f64bf75f50d6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -109,6 +109,7 @@  typedef struct xfs_mount {
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct workqueue_struct *m_buf_workqueue;
 	struct workqueue_struct	*m_unwritten_workqueue;
+	struct workqueue_struct	*m_postread_workqueue;
 	struct workqueue_struct	*m_reclaim_workqueue;
 	struct workqueue_struct	*m_sync_workqueue;
 	struct workqueue_struct *m_blockgc_wq;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5a2512d20bd0..b2b6c1f24c42 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -553,6 +553,12 @@  xfs_init_mount_workqueues(
 	if (!mp->m_unwritten_workqueue)
 		goto out_destroy_buf;
 
+	mp->m_postread_workqueue = alloc_workqueue("xfs-pread/%s",
+			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+			0, mp->m_super->s_id);
+	if (!mp->m_postread_workqueue)
+		goto out_destroy_postread;
+
 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
 			0, mp->m_super->s_id);
@@ -586,6 +592,8 @@  xfs_init_mount_workqueues(
 	destroy_workqueue(mp->m_reclaim_workqueue);
 out_destroy_unwritten:
 	destroy_workqueue(mp->m_unwritten_workqueue);
+out_destroy_postread:
+	destroy_workqueue(mp->m_postread_workqueue);
 out_destroy_buf:
 	destroy_workqueue(mp->m_buf_workqueue);
 out:
@@ -601,6 +609,7 @@  xfs_destroy_mount_workqueues(
 	destroy_workqueue(mp->m_inodegc_wq);
 	destroy_workqueue(mp->m_reclaim_workqueue);
 	destroy_workqueue(mp->m_unwritten_workqueue);
+	destroy_workqueue(mp->m_postread_workqueue);
 	destroy_workqueue(mp->m_buf_workqueue);
 }