Message ID | 20210829122517.1648171-7-ruansy.fnst@fujitsu.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | fsdax,xfs: Add reflink&dedupe support for fsdax | expand |
On Sun, Aug 29, 2021 at 08:25:16PM +0800, Shiyang Ruan wrote: > In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. > After that, new allocated extents needs to be remapped to the file. Add > an implementation of ->iomap_end() for dax write ops to do the remapping > work. Please split the new dax infrastructure from the XFS changes. > static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > - int *iomap_errp, const struct iomap_ops *ops) > + int *iomap_errp, const struct iomap_ops *ops) > { > struct address_space *mapping = vmf->vma->vm_file->f_mapping; > XA_STATE(xas, &mapping->i_pages, vmf->pgoff); > @@ -1631,7 +1664,7 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, > } > > static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, > - const struct iomap_ops *ops) > + const struct iomap_ops *ops) These looks like unrelated whitespace changes. > -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > +loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > { > const struct iomap *iomap = &iter->iomap; > const struct iomap *srcmap = iomap_iter_srcmap(iter); > @@ -918,6 +918,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > > return written; > } > +EXPORT_SYMBOL_GPL(iomap_zero_iter); I don't see why this would have to be exported. > + unsigned flags, > + struct iomap *iomap) > +{ > + int error = 0; > + struct xfs_inode *ip = XFS_I(inode); > + bool cow = xfs_is_cow_inode(ip); The cow variable is only used once, so I think we can drop it. > + const struct iomap_iter *iter = > + container_of(iomap, typeof(*iter), iomap); Please comment this as it is a little unusual. > + > + if (cow) { > + if (iter->processed <= 0) > + xfs_reflink_cancel_cow_range(ip, pos, length, true); > + else > + error = xfs_reflink_end_cow(ip, pos, iter->processed); > + } > + return error ?: iter->processed; The ->iomap_end convention is to return 0 or a negative error code. Also i'd much prefer to just spell this out in a normal sequential way: if (!xfs_is_cow_inode(ip)) return 0; if (iter->processed <= 0) { xfs_reflink_cancel_cow_range(ip, pos, length, true); return 0; } return xfs_reflink_end_cow(ip, pos, iter->processed); > +static inline int > +xfs_iomap_zero_range( > + struct xfs_inode *ip, > + loff_t pos, > + loff_t len, > + bool *did_zero) > +{ > + struct inode *inode = VFS_I(ip); > + > + return IS_DAX(inode) > + ? dax_iomap_zero_range(inode, pos, len, did_zero, > + &xfs_dax_write_iomap_ops) > + : iomap_zero_range(inode, pos, len, did_zero, > + &xfs_buffered_write_iomap_ops); > +} if (IS_DAX(inode)) return dax_iomap_zero_range(inode, pos, len, did_zero, &xfs_dax_write_iomap_ops); return iomap_zero_range(inode, pos, len, did_zero, &xfs_buffered_write_iomap_ops); > +static inline int > +xfs_iomap_truncate_page( > + struct xfs_inode *ip, > + loff_t pos, > + bool *did_zero) > +{ > + struct inode *inode = VFS_I(ip); > + > + return IS_DAX(inode) > + ? dax_iomap_truncate_page(inode, pos, did_zero, > + &xfs_dax_write_iomap_ops) > + : iomap_truncate_page(inode, pos, did_zero, > + &xfs_buffered_write_iomap_ops); > +} Same here.
On Thu, Sep 02, 2021 at 09:43:08AM +0200, Christoph Hellwig wrote: > On Sun, Aug 29, 2021 at 08:25:16PM +0800, Shiyang Ruan wrote: > > In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. > > After that, new allocated extents needs to be remapped to the file. Add > > an implementation of ->iomap_end() for dax write ops to do the remapping > > work. > > Please split the new dax infrastructure from the XFS changes. > > > static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, > > - int *iomap_errp, const struct iomap_ops *ops) > > + int *iomap_errp, const struct iomap_ops *ops) > > { > > struct address_space *mapping = vmf->vma->vm_file->f_mapping; > > XA_STATE(xas, &mapping->i_pages, vmf->pgoff); > > @@ -1631,7 +1664,7 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, > > } > > > > static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, > > - const struct iomap_ops *ops) > > + const struct iomap_ops *ops) > > These looks like unrelated whitespace changes. > > > -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > > +loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > > { > > const struct iomap *iomap = &iter->iomap; > > const struct iomap *srcmap = iomap_iter_srcmap(iter); > > @@ -918,6 +918,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) > > > > return written; > > } > > +EXPORT_SYMBOL_GPL(iomap_zero_iter); > > I don't see why this would have to be exported. > > > + unsigned flags, > > + struct iomap *iomap) > > +{ > > + int error = 0; > > + struct xfs_inode *ip = XFS_I(inode); > > + bool cow = xfs_is_cow_inode(ip); > > The cow variable is only used once, so I think we can drop it. > > > + const struct iomap_iter *iter = > > + container_of(iomap, typeof(*iter), iomap); > > Please comment this as it is a little unusual. > > > + > > + if (cow) { > > + if (iter->processed <= 0) > > + xfs_reflink_cancel_cow_range(ip, pos, length, true); > > + else > > + error = xfs_reflink_end_cow(ip, pos, iter->processed); > > + } > > + return error ?: iter->processed; > > The ->iomap_end convention is to return 0 or a negative error code. > Also i'd much prefer to just spell this out in a normal sequential way: > > if (!xfs_is_cow_inode(ip)) > return 0; > > if (iter->processed <= 0) { > xfs_reflink_cancel_cow_range(ip, pos, length, true); > return 0; > } > > return xfs_reflink_end_cow(ip, pos, iter->processed); Seeing as written either contains iter->processed if it's positive, or zero if nothing got written or there were errors, I wonder why this isn't just: if (!xfs_is_cow_inode(ip)); return 0; if (!written) { xfs_reflink_cancel_cow_range(ip, pos, length, true); return 0; } return xfs_reflink_end_cow(ip, pos, written); ? (He says while cleaning up trying to leave for vacation, pardon me if this comment is totally boneheaded...) --D > > +static inline int > > +xfs_iomap_zero_range( > > + struct xfs_inode *ip, > > + loff_t pos, > > + loff_t len, > > + bool *did_zero) > > +{ > > + struct inode *inode = VFS_I(ip); > > + > > + return IS_DAX(inode) > > + ? dax_iomap_zero_range(inode, pos, len, did_zero, > > + &xfs_dax_write_iomap_ops) > > + : iomap_zero_range(inode, pos, len, did_zero, > > + &xfs_buffered_write_iomap_ops); > > +} > > if (IS_DAX(inode)) > return dax_iomap_zero_range(inode, pos, len, did_zero, > &xfs_dax_write_iomap_ops); > return iomap_zero_range(inode, pos, len, did_zero, > &xfs_buffered_write_iomap_ops); > > > +static inline int > > +xfs_iomap_truncate_page( > > + struct xfs_inode *ip, > > + loff_t pos, > > + bool *did_zero) > > +{ > > + struct inode *inode = VFS_I(ip); > > + > > + return IS_DAX(inode) > > + ? dax_iomap_truncate_page(inode, pos, did_zero, > > + &xfs_dax_write_iomap_ops) > > + : iomap_truncate_page(inode, pos, did_zero, > > + &xfs_buffered_write_iomap_ops); > > +} > > Same here.
diff --git a/fs/dax.c b/fs/dax.c index a1232d6b7e37..88541b734f97 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1401,6 +1401,39 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, } EXPORT_SYMBOL_GPL(dax_iomap_rw); +int +dax_iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, + bool *did_zero, const struct iomap_ops *ops) +{ + struct iomap_iter iomi = { + .inode = inode, + .pos = pos, + .len = len, + .flags = IOMAP_ZERO, + }; + int ret; + + while ((ret = iomap_iter(&iomi, ops)) > 0) + iomi.processed = iomap_zero_iter(&iomi, did_zero); + + return ret; +} +EXPORT_SYMBOL_GPL(dax_iomap_zero_range); + +int +dax_iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + const struct iomap_ops *ops) +{ + unsigned int blocksize = i_blocksize(inode); + unsigned int off = pos & (blocksize - 1); + + /* Block boundary? Nothing to do */ + if (!off) + return 0; + return dax_iomap_zero_range(inode, pos, blocksize - off, did_zero, ops); +} +EXPORT_SYMBOL_GPL(dax_iomap_truncate_page); + static vm_fault_t dax_fault_return(int error) { if (error == 0) @@ -1521,7 +1554,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, } static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, - int *iomap_errp, const struct iomap_ops *ops) + int *iomap_errp, const struct iomap_ops *ops) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; XA_STATE(xas, &mapping->i_pages, vmf->pgoff); @@ -1631,7 +1664,7 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas, } static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, - const struct iomap_ops *ops) + const struct iomap_ops *ops) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER); @@ -1732,7 +1765,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * successfully. */ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) + pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops) { switch (pe_size) { case PE_SIZE_PTE: diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 6e8d40877d01..6341a1328def 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -887,7 +887,7 @@ static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) return iomap_write_end(iter, pos, bytes, bytes, page); } -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) +loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) { const struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = iomap_iter_srcmap(iter); @@ -918,6 +918,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) return written; } +EXPORT_SYMBOL_GPL(iomap_zero_iter); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 213a97a921bb..f1b7a2637a1d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1009,8 +1009,7 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, - &xfs_buffered_write_iomap_ops); + error = xfs_iomap_zero_range(ip, offset, len, NULL); if (error) return error; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index cc3cfb12df53..d57f94c523c7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -704,7 +704,7 @@ xfs_file_dax_write( pos = iocb->ki_pos; trace_xfs_file_dax_write(iocb, from); - ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1329,7 +1329,7 @@ __xfs_filemap_fault( ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, (write_fault && !vmf->cow_page) ? - &xfs_direct_write_iomap_ops : + &xfs_dax_write_iomap_ops : &xfs_read_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d8cd2583dedb..c037a47004f9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -761,7 +761,8 @@ xfs_direct_write_iomap_begin( /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, - &lockmode, flags & IOMAP_DIRECT); + &lockmode, + (flags & IOMAP_DIRECT) || IS_DAX(inode)); if (error) goto out_unlock; if (shared) @@ -854,6 +855,35 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { .iomap_begin = xfs_direct_write_iomap_begin, }; +static int +xfs_dax_write_iomap_end( + struct inode *inode, + loff_t pos, + loff_t length, + ssize_t written, + unsigned flags, + struct iomap *iomap) +{ + int error = 0; + struct xfs_inode *ip = XFS_I(inode); + bool cow = xfs_is_cow_inode(ip); + const struct iomap_iter *iter = + container_of(iomap, typeof(*iter), iomap); + + if (cow) { + if (iter->processed <= 0) + xfs_reflink_cancel_cow_range(ip, pos, length, true); + else + error = xfs_reflink_end_cow(ip, pos, iter->processed); + } + return error ?: iter->processed; +} + +const struct iomap_ops xfs_dax_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, + .iomap_end = xfs_dax_write_iomap_end, +}; + static int xfs_buffered_write_iomap_begin( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7d3703556d0e..1ca9f9102523 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -45,5 +45,37 @@ extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; +extern const struct iomap_ops xfs_dax_write_iomap_ops; + +static inline int +xfs_iomap_zero_range( + struct xfs_inode *ip, + loff_t pos, + loff_t len, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + return IS_DAX(inode) + ? dax_iomap_zero_range(inode, pos, len, did_zero, + &xfs_dax_write_iomap_ops) + : iomap_zero_range(inode, pos, len, did_zero, + &xfs_buffered_write_iomap_ops); +} + +static inline int +xfs_iomap_truncate_page( + struct xfs_inode *ip, + loff_t pos, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + return IS_DAX(inode) + ? dax_iomap_truncate_page(inode, pos, did_zero, + &xfs_dax_write_iomap_ops) + : iomap_truncate_page(inode, pos, did_zero, + &xfs_buffered_write_iomap_ops); +} #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 93c082db04b7..0380f6942bc0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -911,8 +911,8 @@ xfs_setattr_size( */ if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); - error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); + error = xfs_iomap_zero_range(ip, oldsize, newsize - oldsize, + &did_zeroing); } else { /* * iomap won't detect a dirty page over an unwritten block (or a @@ -924,8 +924,7 @@ xfs_setattr_size( newsize); if (error) return error; - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); + error = xfs_iomap_truncate_page(ip, newsize, &did_zeroing); } if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 28effe537d07..13e461cf2055 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1269,8 +1269,7 @@ xfs_reflink_zero_posteof( return 0; trace_xfs_zero_eof(ip, isize, pos - isize); - return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, - &xfs_buffered_write_iomap_ops); + return xfs_iomap_zero_range(ip, isize, pos - isize, NULL); } /* diff --git a/include/linux/dax.h b/include/linux/dax.h index 8ecd125434ef..96b1f0bb4ab8 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -237,6 +237,10 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int dax_iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, + bool *did_zero, const struct iomap_ops *ops); +int dax_iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + const struct iomap_ops *ops); s64 dax_iomap_zero(loff_t pos, u64 length, const struct iomap *iomap, const struct iomap *srcmap); int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 24f8489583ca..b32cbdb74c49 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -235,6 +235,7 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage, #endif int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); +loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. After that, new allocated extents needs to be remapped to the file. Add an implementation of ->iomap_end() for dax write ops to do the remapping work. Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com> --- fs/dax.c | 39 ++++++++++++++++++++++++++++++++++++--- fs/iomap/buffered-io.c | 3 ++- fs/xfs/xfs_bmap_util.c | 3 +-- fs/xfs/xfs_file.c | 4 ++-- fs/xfs/xfs_iomap.c | 32 +++++++++++++++++++++++++++++++- fs/xfs/xfs_iomap.h | 32 ++++++++++++++++++++++++++++++++ fs/xfs/xfs_iops.c | 7 +++---- fs/xfs/xfs_reflink.c | 3 +-- include/linux/dax.h | 4 ++++ include/linux/iomap.h | 1 + 10 files changed, 113 insertions(+), 15 deletions(-)