Message ID | 20210928062311.4012070-8-ruansy.fnst@fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fsdax,xfs: Add reflink&dedupe support for fsdax | expand |
On Tue, Sep 28, 2021 at 02:23:10PM +0800, Shiyang Ruan wrote: > In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. > After that, new allocated extents needs to be remapped to the file. > So, add a CoW identification in ->iomap_begin(), and implement > ->iomap_end() to do the remapping work. > > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> I think this patch looks good, so: Reviewed-by: Darrick J. Wong <djwong@kernel.org> A big thank you to Shiyang for persisting in getting this series finished! :) Judging from the conversation Christoph and I had the last time this patchset was submitted, I gather the last big remaining issue is the use of page->mapping for hw poison. So I'll go take a look at "fsdax: introduce FS query interface to support reflink" now. --D > --- > fs/xfs/xfs_bmap_util.c | 3 +-- > fs/xfs/xfs_file.c | 7 ++----- > fs/xfs/xfs_iomap.c | 30 +++++++++++++++++++++++++++- > fs/xfs/xfs_iomap.h | 44 ++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_iops.c | 7 +++---- > fs/xfs/xfs_reflink.c | 3 +-- > 6 files changed, 80 insertions(+), 14 deletions(-) > > diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c > index 73a36b7be3bd..0681250e0a5d 100644 > --- a/fs/xfs/xfs_bmap_util.c > +++ b/fs/xfs/xfs_bmap_util.c > @@ -1009,8 +1009,7 @@ xfs_free_file_space( > return 0; > if (offset + len > XFS_ISIZE(ip)) > len = XFS_ISIZE(ip) - offset; > - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, > - &xfs_buffered_write_iomap_ops); > + error = xfs_iomap_zero_range(ip, offset, len, NULL); > if (error) > return error; > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c > index 7aa943edfc02..afde4fbefb6f 100644 > --- a/fs/xfs/xfs_file.c > +++ b/fs/xfs/xfs_file.c > @@ -704,7 +704,7 @@ xfs_file_dax_write( > pos = iocb->ki_pos; > > trace_xfs_file_dax_write(iocb, from); > - ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); > + ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops); > if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { > i_size_write(inode, iocb->ki_pos); > error = xfs_setfilesize(ip, pos, ret); > @@ -1327,10 +1327,7 @@ __xfs_filemap_fault( > pfn_t pfn; > > xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); > - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, > - (write_fault && !vmf->cow_page) ? > - &xfs_direct_write_iomap_ops : > - &xfs_read_iomap_ops); > + ret = xfs_dax_iomap_fault(vmf, pe_size, write_fault, &pfn); > if (ret & VM_FAULT_NEEDDSYNC) > ret = dax_finish_sync_fault(vmf, pe_size, pfn); > xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 093758440ad5..51cb5b713521 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -761,7 +761,8 @@ xfs_direct_write_iomap_begin( > > /* may drop and re-acquire the ilock */ > error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, > - &lockmode, flags & IOMAP_DIRECT); > + &lockmode, > + (flags & IOMAP_DIRECT) || IS_DAX(inode)); > if (error) > goto out_unlock; > if (shared) > @@ -854,6 +855,33 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { > .iomap_begin = xfs_direct_write_iomap_begin, > }; > > +static int > +xfs_dax_write_iomap_end( > + struct inode *inode, > + loff_t pos, > + loff_t length, > + ssize_t written, > + unsigned flags, > + struct iomap *iomap) > +{ > + struct xfs_inode *ip = XFS_I(inode); > + > + if (!xfs_is_cow_inode(ip)) > + return 0; > + > + if (!written) { > + xfs_reflink_cancel_cow_range(ip, pos, length, true); > + return 0; > + } > + > + return xfs_reflink_end_cow(ip, pos, written); > +} > + > +const struct iomap_ops xfs_dax_write_iomap_ops = { > + .iomap_begin = xfs_direct_write_iomap_begin, > + .iomap_end = xfs_dax_write_iomap_end, > +}; > + > static int > xfs_buffered_write_iomap_begin( > struct inode *inode, > diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h > index 7d3703556d0e..81726bfbf890 100644 > --- a/fs/xfs/xfs_iomap.h > +++ b/fs/xfs/xfs_iomap.h > @@ -7,6 +7,7 @@ > #define __XFS_IOMAP_H__ > > #include <linux/iomap.h> > +#include <linux/dax.h> > > struct xfs_inode; > struct xfs_bmbt_irec; > @@ -45,5 +46,48 @@ extern const struct iomap_ops xfs_direct_write_iomap_ops; > extern const struct iomap_ops xfs_read_iomap_ops; > extern const struct iomap_ops xfs_seek_iomap_ops; > extern const struct iomap_ops xfs_xattr_iomap_ops; > +extern const struct iomap_ops xfs_dax_write_iomap_ops; > + > +static inline int > +xfs_iomap_zero_range( > + struct xfs_inode *ip, > + loff_t pos, > + loff_t len, > + bool *did_zero) > +{ > + struct inode *inode = VFS_I(ip); > + > + return iomap_zero_range(inode, pos, len, did_zero, > + IS_DAX(inode) ? > + &xfs_dax_write_iomap_ops : > + &xfs_buffered_write_iomap_ops); > +} > + > +static inline int > +xfs_iomap_truncate_page( > + struct xfs_inode *ip, > + loff_t pos, > + bool *did_zero) > +{ > + struct inode *inode = VFS_I(ip); > + > + return iomap_truncate_page(inode, pos, did_zero, > + IS_DAX(inode) ? > + &xfs_dax_write_iomap_ops : > + &xfs_buffered_write_iomap_ops); > +} > + > +static inline int > +xfs_dax_iomap_fault( > + struct vm_fault *vmf, > + enum page_entry_size pe_size, > + bool write_fault, > + pfn_t *pfn) > +{ > + return dax_iomap_fault(vmf, pe_size, pfn, NULL, > + (write_fault && !vmf->cow_page) ? > + &xfs_dax_write_iomap_ops : > + &xfs_read_iomap_ops); > +} > > #endif /* __XFS_IOMAP_H__*/ > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > index a607d6aca5c4..332e6208dffd 100644 > --- a/fs/xfs/xfs_iops.c > +++ b/fs/xfs/xfs_iops.c > @@ -911,8 +911,8 @@ xfs_setattr_size( > */ > if (newsize > oldsize) { > trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); > - error = iomap_zero_range(inode, oldsize, newsize - oldsize, > - &did_zeroing, &xfs_buffered_write_iomap_ops); > + error = xfs_iomap_zero_range(ip, oldsize, newsize - oldsize, > + &did_zeroing); > } else { > /* > * iomap won't detect a dirty page over an unwritten block (or a > @@ -924,8 +924,7 @@ xfs_setattr_size( > newsize); > if (error) > return error; > - error = iomap_truncate_page(inode, newsize, &did_zeroing, > - &xfs_buffered_write_iomap_ops); > + error = xfs_iomap_truncate_page(ip, newsize, &did_zeroing); > } > > if (error) > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index 7ecea0311e88..9d876e268734 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -1269,8 +1269,7 @@ xfs_reflink_zero_posteof( > return 0; > > trace_xfs_zero_eof(ip, isize, pos - isize); > - return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, > - &xfs_buffered_write_iomap_ops); > + return xfs_iomap_zero_range(ip, isize, pos - isize, NULL); > } > > /* > -- > 2.33.0 > > >
On Thu, Oct 14, 2021 at 10:38 AM Darrick J. Wong <djwong@kernel.org> wrote: > > On Tue, Sep 28, 2021 at 02:23:10PM +0800, Shiyang Ruan wrote: > > In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. > > After that, new allocated extents needs to be remapped to the file. > > So, add a CoW identification in ->iomap_begin(), and implement > > ->iomap_end() to do the remapping work. > > > > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> > > I think this patch looks good, so: > Reviewed-by: Darrick J. Wong <djwong@kernel.org> > > A big thank you to Shiyang for persisting in getting this series > finished! :) > > Judging from the conversation Christoph and I had the last time this > patchset was submitted, I gather the last big remaining issue is the use > of page->mapping for hw poison. So I'll go take a look at "fsdax: > introduce FS query interface to support reflink" now. The other blocker was enabling mounting dax filesystems on a dax-device rather than a block device. I'm actively refactoring the nvdimm subsystem side of that equation, but could use help with the conversion of the xfs mount path. Christoph, might you have that in your queue?
On Thu, Oct 14, 2021 at 10:50:00AM -0700, Dan Williams wrote: > The other blocker was enabling mounting dax filesystems on a > dax-device rather than a block device. I'm actively refactoring the > nvdimm subsystem side of that equation, but could use help with the > conversion of the xfs mount path. Christoph, might you have that in > your queue? It's in my queue. I'm about to send your a series of prep patches and plan to tackle the actual mounting next merge window.
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 73a36b7be3bd..0681250e0a5d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1009,8 +1009,7 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, - &xfs_buffered_write_iomap_ops); + error = xfs_iomap_zero_range(ip, offset, len, NULL); if (error) return error; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7aa943edfc02..afde4fbefb6f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -704,7 +704,7 @@ xfs_file_dax_write( pos = iocb->ki_pos; trace_xfs_file_dax_write(iocb, from); - ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1327,10 +1327,7 @@ __xfs_filemap_fault( pfn_t pfn; xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, - (write_fault && !vmf->cow_page) ? - &xfs_direct_write_iomap_ops : - &xfs_read_iomap_ops); + ret = xfs_dax_iomap_fault(vmf, pe_size, write_fault, &pfn); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 093758440ad5..51cb5b713521 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -761,7 +761,8 @@ xfs_direct_write_iomap_begin( /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, - &lockmode, flags & IOMAP_DIRECT); + &lockmode, + (flags & IOMAP_DIRECT) || IS_DAX(inode)); if (error) goto out_unlock; if (shared) @@ -854,6 +855,33 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { .iomap_begin = xfs_direct_write_iomap_begin, }; +static int +xfs_dax_write_iomap_end( + struct inode *inode, + loff_t pos, + loff_t length, + ssize_t written, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + + if (!xfs_is_cow_inode(ip)) + return 0; + + if (!written) { + xfs_reflink_cancel_cow_range(ip, pos, length, true); + return 0; + } + + return xfs_reflink_end_cow(ip, pos, written); +} + +const struct iomap_ops xfs_dax_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, + .iomap_end = xfs_dax_write_iomap_end, +}; + static int xfs_buffered_write_iomap_begin( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7d3703556d0e..81726bfbf890 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -7,6 +7,7 @@ #define __XFS_IOMAP_H__ #include <linux/iomap.h> +#include <linux/dax.h> struct xfs_inode; struct xfs_bmbt_irec; @@ -45,5 +46,48 @@ extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; +extern const struct iomap_ops xfs_dax_write_iomap_ops; + +static inline int +xfs_iomap_zero_range( + struct xfs_inode *ip, + loff_t pos, + loff_t len, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + return iomap_zero_range(inode, pos, len, did_zero, + IS_DAX(inode) ? + &xfs_dax_write_iomap_ops : + &xfs_buffered_write_iomap_ops); +} + +static inline int +xfs_iomap_truncate_page( + struct xfs_inode *ip, + loff_t pos, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + return iomap_truncate_page(inode, pos, did_zero, + IS_DAX(inode) ? + &xfs_dax_write_iomap_ops : + &xfs_buffered_write_iomap_ops); +} + +static inline int +xfs_dax_iomap_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size, + bool write_fault, + pfn_t *pfn) +{ + return dax_iomap_fault(vmf, pe_size, pfn, NULL, + (write_fault && !vmf->cow_page) ? + &xfs_dax_write_iomap_ops : + &xfs_read_iomap_ops); +} #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index a607d6aca5c4..332e6208dffd 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -911,8 +911,8 @@ xfs_setattr_size( */ if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); - error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); + error = xfs_iomap_zero_range(ip, oldsize, newsize - oldsize, + &did_zeroing); } else { /* * iomap won't detect a dirty page over an unwritten block (or a @@ -924,8 +924,7 @@ xfs_setattr_size( newsize); if (error) return error; - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); + error = xfs_iomap_truncate_page(ip, newsize, &did_zeroing); } if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 7ecea0311e88..9d876e268734 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1269,8 +1269,7 @@ xfs_reflink_zero_posteof( return 0; trace_xfs_zero_eof(ip, isize, pos - isize); - return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, - &xfs_buffered_write_iomap_ops); + return xfs_iomap_zero_range(ip, isize, pos - isize, NULL); } /*
In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. After that, new allocated extents needs to be remapped to the file. So, add a CoW identification in ->iomap_begin(), and implement ->iomap_end() to do the remapping work. Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> --- fs/xfs/xfs_bmap_util.c | 3 +-- fs/xfs/xfs_file.c | 7 ++----- fs/xfs/xfs_iomap.c | 30 +++++++++++++++++++++++++++- fs/xfs/xfs_iomap.h | 44 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_iops.c | 7 +++---- fs/xfs/xfs_reflink.c | 3 +-- 6 files changed, 80 insertions(+), 14 deletions(-)