diff mbox series

[5/5] dax: always use _copy_mc_to_iter in dax_copy_to_iter

Message ID 20211209063828.18944-6-hch@lst.de (mailing list archive)
State Superseded
Headers show
Series [1/5] uio: remove copy_from_iter_flushcache() and copy_mc_to_iter() | expand

Commit Message

Christoph Hellwig Dec. 9, 2021, 6:38 a.m. UTC
While using the MC-safe copy routines is rather pointless on a virtual device
like virtiofs, it also isn't harmful at all.  So just use _copy_mc_to_iter
unconditionally to simplify the code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/dax/super.c | 10 ----------
 fs/fuse/virtio_fs.c |  1 -
 include/linux/dax.h |  1 -
 3 files changed, 12 deletions(-)

Comments

Vivek Goyal Dec. 10, 2021, 2:05 p.m. UTC | #1
On Thu, Dec 09, 2021 at 07:38:28AM +0100, Christoph Hellwig wrote:
> While using the MC-safe copy routines is rather pointless on a virtual device
> like virtiofs,

I was wondering about that. Is it completely pointless.

Typically we are just mapping host page cache into qemu address space.
That shows as virtiofs device pfn in guest and that pfn is mapped into
guest application address space in mmap() call.

Given on host its DRAM, so I would not expect machine check on load side
so there was no need to use machine check safe variant. But what if host
filesystem is on persistent memory and using DAX. In that case load in
guest can trigger a machine check. Not sure if that machine check will
actually travel into the guest and unblock read() operation or not.

But this sounds like a good change from virtiofs point of view, anyway.

Thanks
Vivek


> it also isn't harmful at all.  So just use _copy_mc_to_iter
> unconditionally to simplify the code.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/dax/super.c | 10 ----------
>  fs/fuse/virtio_fs.c |  1 -
>  include/linux/dax.h |  1 -
>  3 files changed, 12 deletions(-)
> 
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index ff676a07480c8..fe783234ca669 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -107,8 +107,6 @@ enum dax_device_flags {
>  	DAXDEV_SYNC,
>  	/* do not use uncached operations to write data */
>  	DAXDEV_CACHED,
> -	/* do not use mcsafe operations to read data */
> -	DAXDEV_NOMCSAFE,
>  };
>  
>  /**
> @@ -171,8 +169,6 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
>  	 * via access_ok() in vfs_red, so use the 'no check' version to bypass
>  	 * the HARDENED_USERCOPY overhead.
>  	 */
> -	if (test_bit(DAXDEV_NOMCSAFE, &dax_dev->flags))
> -		return _copy_to_iter(addr, bytes, i);
>  	return _copy_mc_to_iter(addr, bytes, i);
>  }
>  
> @@ -242,12 +238,6 @@ void set_dax_cached(struct dax_device *dax_dev)
>  }
>  EXPORT_SYMBOL_GPL(set_dax_cached);
>  
> -void set_dax_nomcsafe(struct dax_device *dax_dev)
> -{
> -	set_bit(DAXDEV_NOMCSAFE, &dax_dev->flags);
> -}
> -EXPORT_SYMBOL_GPL(set_dax_nomcsafe);
> -
>  bool dax_alive(struct dax_device *dax_dev)
>  {
>  	lockdep_assert_held(&dax_srcu);
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 754319ce2a29b..d9c20b148ac19 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -838,7 +838,6 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
>  	if (IS_ERR(fs->dax_dev))
>  		return PTR_ERR(fs->dax_dev);
>  	set_dax_cached(fs->dax_dev);
> -	set_dax_nomcsafe(fs->dax_dev);
>  	return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
>  					fs->dax_dev);
>  }
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index d22cbf03d37d2..d267331bc37e7 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -90,7 +90,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
>  #endif
>  
>  void set_dax_cached(struct dax_device *dax_dev);
> -void set_dax_nomcsafe(struct dax_device *dax_dev);
>  
>  struct writeback_control;
>  #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
> -- 
> 2.30.2
>
Dan Williams Dec. 12, 2021, 2:48 p.m. UTC | #2
On Fri, Dec 10, 2021 at 6:05 AM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Thu, Dec 09, 2021 at 07:38:28AM +0100, Christoph Hellwig wrote:
> > While using the MC-safe copy routines is rather pointless on a virtual device
> > like virtiofs,
>
> I was wondering about that. Is it completely pointless.
>
> Typically we are just mapping host page cache into qemu address space.
> That shows as virtiofs device pfn in guest and that pfn is mapped into
> guest application address space in mmap() call.
>
> Given on host its DRAM, so I would not expect machine check on load side
> so there was no need to use machine check safe variant.

That's a broken assumption, DRAM experiences multi-bit ECC errors.
Machine checks, data aborts, etc existed before PMEM.

>  But what if host
> filesystem is on persistent memory and using DAX. In that case load in
> guest can trigger a machine check. Not sure if that machine check will
> actually travel into the guest and unblock read() operation or not.
>
> But this sounds like a good change from virtiofs point of view, anyway.
>
> Thanks
> Vivek
>
>
> > it also isn't harmful at all.  So just use _copy_mc_to_iter
> > unconditionally to simplify the code.
> >
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  drivers/dax/super.c | 10 ----------
> >  fs/fuse/virtio_fs.c |  1 -
> >  include/linux/dax.h |  1 -
> >  3 files changed, 12 deletions(-)
> >
> > diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> > index ff676a07480c8..fe783234ca669 100644
> > --- a/drivers/dax/super.c
> > +++ b/drivers/dax/super.c
> > @@ -107,8 +107,6 @@ enum dax_device_flags {
> >       DAXDEV_SYNC,
> >       /* do not use uncached operations to write data */
> >       DAXDEV_CACHED,
> > -     /* do not use mcsafe operations to read data */
> > -     DAXDEV_NOMCSAFE,
> >  };
> >
> >  /**
> > @@ -171,8 +169,6 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
> >        * via access_ok() in vfs_red, so use the 'no check' version to bypass
> >        * the HARDENED_USERCOPY overhead.
> >        */
> > -     if (test_bit(DAXDEV_NOMCSAFE, &dax_dev->flags))
> > -             return _copy_to_iter(addr, bytes, i);
> >       return _copy_mc_to_iter(addr, bytes, i);
> >  }
> >
> > @@ -242,12 +238,6 @@ void set_dax_cached(struct dax_device *dax_dev)
> >  }
> >  EXPORT_SYMBOL_GPL(set_dax_cached);
> >
> > -void set_dax_nomcsafe(struct dax_device *dax_dev)
> > -{
> > -     set_bit(DAXDEV_NOMCSAFE, &dax_dev->flags);
> > -}
> > -EXPORT_SYMBOL_GPL(set_dax_nomcsafe);
> > -
> >  bool dax_alive(struct dax_device *dax_dev)
> >  {
> >       lockdep_assert_held(&dax_srcu);
> > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> > index 754319ce2a29b..d9c20b148ac19 100644
> > --- a/fs/fuse/virtio_fs.c
> > +++ b/fs/fuse/virtio_fs.c
> > @@ -838,7 +838,6 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
> >       if (IS_ERR(fs->dax_dev))
> >               return PTR_ERR(fs->dax_dev);
> >       set_dax_cached(fs->dax_dev);
> > -     set_dax_nomcsafe(fs->dax_dev);
> >       return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
> >                                       fs->dax_dev);
> >  }
> > diff --git a/include/linux/dax.h b/include/linux/dax.h
> > index d22cbf03d37d2..d267331bc37e7 100644
> > --- a/include/linux/dax.h
> > +++ b/include/linux/dax.h
> > @@ -90,7 +90,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
> >  #endif
> >
> >  void set_dax_cached(struct dax_device *dax_dev);
> > -void set_dax_nomcsafe(struct dax_device *dax_dev);
> >
> >  struct writeback_control;
> >  #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
> > --
> > 2.30.2
> >
>
Dan Williams Dec. 12, 2021, 3:03 p.m. UTC | #3
On Wed, Dec 8, 2021 at 10:38 PM Christoph Hellwig <hch@lst.de> wrote:
>
> While using the MC-safe copy routines is rather pointless on a virtual device
> like virtiofs, it also isn't harmful at all.  So just use _copy_mc_to_iter
> unconditionally to simplify the code.

From a correctness perspective, yes, but from a performance perspective, see:

enable_copy_mc_fragile()

...on those platforms fast-string copy implementation is replaced with
a manual unrolled copy. So this will cause a performance regression on
those platforms.

How about let's keep this as is / still only use it for PMEM where end
users are already dealing with the performance difference across
platforms? I considered exporting an indicator of which backend
routine has been selected from arch/x86/lib/copy_mc.c, but it got
messy quickly so I fell back to just keeping the status quo.
Christoph Hellwig Dec. 13, 2021, 8:20 a.m. UTC | #4
On Sun, Dec 12, 2021 at 06:48:05AM -0800, Dan Williams wrote:
> On Fri, Dec 10, 2021 at 6:05 AM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Thu, Dec 09, 2021 at 07:38:28AM +0100, Christoph Hellwig wrote:
> > > While using the MC-safe copy routines is rather pointless on a virtual device
> > > like virtiofs,
> >
> > I was wondering about that. Is it completely pointless.
> >
> > Typically we are just mapping host page cache into qemu address space.
> > That shows as virtiofs device pfn in guest and that pfn is mapped into
> > guest application address space in mmap() call.
> >
> > Given on host its DRAM, so I would not expect machine check on load side
> > so there was no need to use machine check safe variant.
> 
> That's a broken assumption, DRAM experiences multi-bit ECC errors.
> Machine checks, data aborts, etc existed before PMEM.

So the conclusion here is that we should always use the mc safe variant?
Dan Williams Dec. 13, 2021, 4:43 p.m. UTC | #5
On Mon, Dec 13, 2021 at 12:20 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Sun, Dec 12, 2021 at 06:48:05AM -0800, Dan Williams wrote:
> > On Fri, Dec 10, 2021 at 6:05 AM Vivek Goyal <vgoyal@redhat.com> wrote:
> > >
> > > On Thu, Dec 09, 2021 at 07:38:28AM +0100, Christoph Hellwig wrote:
> > > > While using the MC-safe copy routines is rather pointless on a virtual device
> > > > like virtiofs,
> > >
> > > I was wondering about that. Is it completely pointless.
> > >
> > > Typically we are just mapping host page cache into qemu address space.
> > > That shows as virtiofs device pfn in guest and that pfn is mapped into
> > > guest application address space in mmap() call.
> > >
> > > Given on host its DRAM, so I would not expect machine check on load side
> > > so there was no need to use machine check safe variant.
> >
> > That's a broken assumption, DRAM experiences multi-bit ECC errors.
> > Machine checks, data aborts, etc existed before PMEM.
>
> So the conclusion here is that we should always use the mc safe variant?

The proposal is one of the following:

1/ any paths not currently using the mc safe variant should continue
not using it to avoid the performance regression on older platforms,
i.e. drop this patch.

2/ add plumbing to switch to mcsafe variant, but only on newer
platforms, incremental new patch

3/ always use the mc safe variant, keep this patch

We could go with 3/ and see who screams, because 3/ is smallest
ongoing maintenance burden. However, I feel like 1/ is the path of
least resistance until the platforms with the need to do 'careful'
copying age out of use.
Vivek Goyal Dec. 14, 2021, 1:59 p.m. UTC | #6
On Sun, Dec 12, 2021 at 06:48:05AM -0800, Dan Williams wrote:
> On Fri, Dec 10, 2021 at 6:05 AM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Thu, Dec 09, 2021 at 07:38:28AM +0100, Christoph Hellwig wrote:
> > > While using the MC-safe copy routines is rather pointless on a virtual device
> > > like virtiofs,
> >
> > I was wondering about that. Is it completely pointless.
> >
> > Typically we are just mapping host page cache into qemu address space.
> > That shows as virtiofs device pfn in guest and that pfn is mapped into
> > guest application address space in mmap() call.
> >
> > Given on host its DRAM, so I would not expect machine check on load side
> > so there was no need to use machine check safe variant.
> 
> That's a broken assumption, DRAM experiences multi-bit ECC errors.
> Machine checks, data aborts, etc existed before PMEM.

So we should use MC safe variant when loading from DRAM as well?
(If needed platoform support is there).

Vivek
diff mbox series

Patch

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ff676a07480c8..fe783234ca669 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -107,8 +107,6 @@  enum dax_device_flags {
 	DAXDEV_SYNC,
 	/* do not use uncached operations to write data */
 	DAXDEV_CACHED,
-	/* do not use mcsafe operations to read data */
-	DAXDEV_NOMCSAFE,
 };
 
 /**
@@ -171,8 +169,6 @@  size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 	 * via access_ok() in vfs_red, so use the 'no check' version to bypass
 	 * the HARDENED_USERCOPY overhead.
 	 */
-	if (test_bit(DAXDEV_NOMCSAFE, &dax_dev->flags))
-		return _copy_to_iter(addr, bytes, i);
 	return _copy_mc_to_iter(addr, bytes, i);
 }
 
@@ -242,12 +238,6 @@  void set_dax_cached(struct dax_device *dax_dev)
 }
 EXPORT_SYMBOL_GPL(set_dax_cached);
 
-void set_dax_nomcsafe(struct dax_device *dax_dev)
-{
-	set_bit(DAXDEV_NOMCSAFE, &dax_dev->flags);
-}
-EXPORT_SYMBOL_GPL(set_dax_nomcsafe);
-
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 754319ce2a29b..d9c20b148ac19 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -838,7 +838,6 @@  static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 	if (IS_ERR(fs->dax_dev))
 		return PTR_ERR(fs->dax_dev);
 	set_dax_cached(fs->dax_dev);
-	set_dax_nomcsafe(fs->dax_dev);
 	return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
 					fs->dax_dev);
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d22cbf03d37d2..d267331bc37e7 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -90,7 +90,6 @@  static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 #endif
 
 void set_dax_cached(struct dax_device *dax_dev);
-void set_dax_nomcsafe(struct dax_device *dax_dev);
 
 struct writeback_control;
 #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)