Message ID | 20181203222503.30649-9-hch@lst.de (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [01/11] xfs: remove xfs_trim_extent_eof | expand |
On Mon, Dec 03, 2018 at 05:25:00PM -0500, Christoph Hellwig wrote: > Besides simplifying the code a bit this allows to actually implement > the behavior of using COW preallocation for non-COW data mentioned > in the current comments. > > Note that this breaks the current version of xfs/420, but that is > because the test is broken. A separate fix will be sent for it. (Still waiting for this...) > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/xfs_iomap.c | 132 ++++++++++++++++++++++++++++++------------- > fs/xfs/xfs_reflink.c | 67 ---------------------- > fs/xfs/xfs_trace.h | 1 - > 3 files changed, 93 insertions(+), 107 deletions(-) > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index d851abac16a9..d19f99e5476a 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay( > { > struct xfs_inode *ip = XFS_I(inode); > struct xfs_mount *mp = ip->i_mount; > - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > xfs_fileoff_t maxbytes_fsb = > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); > xfs_fileoff_t end_fsb; > - int error = 0, eof = 0; > - struct xfs_bmbt_irec got; > - struct xfs_iext_cursor icur; > + struct xfs_bmbt_irec imap, cmap; > + struct xfs_iext_cursor icur, ccur; > xfs_fsblock_t prealloc_blocks = 0; > + bool eof = false, cow_eof = false, shared; > + int whichfork = XFS_DATA_FORK; > + int error = 0; > > ASSERT(!XFS_IS_REALTIME_INODE(ip)); > ASSERT(!xfs_get_extsz_hint(ip)); > @@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay( > > XFS_STATS_INC(mp, xs_blk_mapw); > > - if (!(ifp->if_flags & XFS_IFEXTENTS)) { > + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { > error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); > if (error) > goto out_unlock; > @@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay( > > end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > - eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); > + /* > + * Search the data fork fork first to look up our source mapping. We > + * always need the data fork map, as we have to return it to the > + * iomap code so that the higher level write code can read data in to > + * perform read-modify-write cycles for unaligned writes. > + */ > + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); > if (eof) > - got.br_startoff = end_fsb; /* fake hole until the end */ > + imap.br_startoff = end_fsb; /* fake hole until the end */ > > - if (got.br_startoff <= offset_fsb) { > + /* We never need to allocate blocks for zeroing a hole. */ > + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { > + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); > + goto out_unlock; > + } > + > + /* > + * Search the COW fork extent list even if we did not find a data fork > + * extent. This serves two purposes: first this implements the > + * speculative preallocation using cowextisze, so that we also unshare "cowextsize" > + * block adjacent to shared blocks instead of just the shared blocks > + * themselves. Second the lookup in the extent list is generally faster > + * than going out to the shared extent tree. > + */ > + if (xfs_is_reflink_inode(ip)) { > + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, > + &ccur, &cmap); > + if (!cow_eof && cmap.br_startoff <= offset_fsb) { > + trace_xfs_reflink_cow_found(ip, &cmap); > + whichfork = XFS_COW_FORK; > + goto done; > + } > + } > + > + if (imap.br_startoff <= offset_fsb) { > /* > * For reflink files we may need a delalloc reservation when > * overwriting shared extents. This includes zeroing of > * existing extents that contain data. > */ > - if (xfs_is_reflink_inode(ip) && > - ((flags & IOMAP_WRITE) || > - got.br_state != XFS_EXT_UNWRITTEN)) { > - xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); > - error = xfs_reflink_reserve_cow(ip, &got); > - if (error) > - goto out_unlock; > + if (!xfs_is_reflink_inode(ip) || > + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > + &imap); > + goto done; > } > > - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); > - goto done; > - } > + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); > > - if (flags & IOMAP_ZERO) { > - xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); > - goto out_unlock; > + /* Trim the mapping to the nearest shared extent boundary. */ > + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); > + if (error) > + goto out_unlock; > + > + /* Not shared? Just report the (potentially capped) extent. */ > + if (!shared) { > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > + &imap); > + goto done; > + } > + > + /* > + * Fork all the shared blocks from our write offset until the > + * end of the extent. > + */ > + whichfork = XFS_COW_FORK; > + end_fsb = imap.br_startoff + imap.br_blockcount; > + } else { > + /* > + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES > + * pages to keep the chunks of work done where somewhat > + * symmetric with the work writeback does. This is a completely > + * arbitrary number pulled out of thin air. > + * > + * Note that the values needs to be less than 32-bits wide until > + * the lower level functions are updated. > + */ > + count = min_t(loff_t, count, 1024 * PAGE_SIZE); > + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > } > > error = xfs_qm_dqattach_locked(ip, false); > if (error) > goto out_unlock; > > - /* > - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages > - * to keep the chunks of work done where somewhat symmetric with the > - * work writeback does. This is a completely arbitrary number pulled > - * out of thin air as a best guess for initial testing. > - * > - * Note that the values needs to be less than 32-bits wide until > - * the lower level functions are updated. > - */ > - count = min_t(loff_t, count, 1024 * PAGE_SIZE); > - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > - > - if (eof) { > + if (eof && whichfork == XFS_DATA_FORK) { > prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, > &icur); > if (prealloc_blocks) { > @@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay( > } > > retry: > - error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, > - end_fsb - offset_fsb, prealloc_blocks, &got, &icur, > - eof); > + error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, > + end_fsb - offset_fsb, prealloc_blocks, > + whichfork == XFS_DATA_FORK ? &imap : &cmap, > + whichfork == XFS_DATA_FORK ? &icur : &ccur, > + whichfork == XFS_DATA_FORK ? eof : cow_eof); > switch (error) { > case 0: > break; > @@ -659,9 +703,19 @@ xfs_file_iomap_begin_delay( > * them out if the write happens to fail. > */ > iomap->flags |= IOMAP_F_NEW; > - trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); > + trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap); I'm confused by this, if whichfork == COW then won't ftrace report results from the wrong fork? --D > done: > - error = xfs_bmbt_to_iomap(ip, iomap, &got, false); > + if (whichfork == XFS_COW_FORK) { > + if (imap.br_startoff > offset_fsb) { > + xfs_trim_extent(&cmap, offset_fsb, > + imap.br_startoff - offset_fsb); > + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, false); > + goto out_unlock; > + } > + /* ensure we only report blocks we have a reservation for */ > + xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); > + } > + error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); > out_unlock: > xfs_iunlock(ip, XFS_ILOCK_EXCL); > return error; > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c > index bdbaff1b3fb7..d59b556d42cb 100644 > --- a/fs/xfs/xfs_reflink.c > +++ b/fs/xfs/xfs_reflink.c > @@ -234,73 +234,6 @@ xfs_reflink_trim_around_shared( > } > } > > -/* > - * Trim the passed in imap to the next shared/unshared extent boundary, and > - * if imap->br_startoff points to a shared extent reserve space for it in the > - * COW fork. > - * > - * Note that imap will always contain the block numbers for the existing blocks > - * in the data fork, as the upper layers need them for read-modify-write > - * operations. > - */ > -int > -xfs_reflink_reserve_cow( > - struct xfs_inode *ip, > - struct xfs_bmbt_irec *imap) > -{ > - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); > - struct xfs_bmbt_irec got; > - int error = 0; > - bool eof = false; > - struct xfs_iext_cursor icur; > - bool shared; > - > - /* > - * Search the COW fork extent list first. This serves two purposes: > - * first this implement the speculative preallocation using cowextisze, > - * so that we also unshared block adjacent to shared blocks instead > - * of just the shared blocks themselves. Second the lookup in the > - * extent list is generally faster than going out to the shared extent > - * tree. > - */ > - > - if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got)) > - eof = true; > - if (!eof && got.br_startoff <= imap->br_startoff) { > - trace_xfs_reflink_cow_found(ip, imap); > - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); > - return 0; > - } > - > - /* Trim the mapping to the nearest shared extent boundary. */ > - error = xfs_reflink_trim_around_shared(ip, imap, &shared); > - if (error) > - return error; > - > - /* Not shared? Just report the (potentially capped) extent. */ > - if (!shared) > - return 0; > - > - /* > - * Fork all the shared blocks from our write offset until the end of > - * the extent. > - */ > - error = xfs_qm_dqattach_locked(ip, false); > - if (error) > - return error; > - > - error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, > - imap->br_blockcount, 0, &got, &icur, eof); > - if (error == -ENOSPC || error == -EDQUOT) > - trace_xfs_reflink_cow_enospc(ip, imap); > - if (error) > - return error; > - > - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); > - trace_xfs_reflink_cow_alloc(ip, &got); > - return 0; > -} > - > /* Convert part of an unwritten CoW extent to a real one. */ > STATIC int > xfs_reflink_convert_cow_extent( > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > index 870865913bd8..36e74fc90700 100644 > --- a/fs/xfs/xfs_trace.h > +++ b/fs/xfs/xfs_trace.h > @@ -3179,7 +3179,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); > > /* copy on write */ > DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); > -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); > DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); > DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); > DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); > -- > 2.19.1 >
On Tue, Dec 18, 2018 at 03:36:41PM -0800, Darrick J. Wong wrote: > On Mon, Dec 03, 2018 at 05:25:00PM -0500, Christoph Hellwig wrote: > > Besides simplifying the code a bit this allows to actually implement > > the behavior of using COW preallocation for non-COW data mentioned > > in the current comments. > > > > Note that this breaks the current version of xfs/420, but that is > > because the test is broken. A separate fix will be sent for it. > > (Still waiting for this...) It was sent quite a while ago, but I need to resend it.. > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > --- > > fs/xfs/xfs_iomap.c | 132 ++++++++++++++++++++++++++++++------------- > > fs/xfs/xfs_reflink.c | 67 ---------------------- > > fs/xfs/xfs_trace.h | 1 - > > 3 files changed, 93 insertions(+), 107 deletions(-) > > > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > > index d851abac16a9..d19f99e5476a 100644 > > --- a/fs/xfs/xfs_iomap.c > > +++ b/fs/xfs/xfs_iomap.c > > @@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay( > > { > > struct xfs_inode *ip = XFS_I(inode); > > struct xfs_mount *mp = ip->i_mount; > > - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); > > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > > xfs_fileoff_t maxbytes_fsb = > > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); > > xfs_fileoff_t end_fsb; > > - int error = 0, eof = 0; > > - struct xfs_bmbt_irec got; > > - struct xfs_iext_cursor icur; > > + struct xfs_bmbt_irec imap, cmap; > > + struct xfs_iext_cursor icur, ccur; > > xfs_fsblock_t prealloc_blocks = 0; > > + bool eof = false, cow_eof = false, shared; > > + int whichfork = XFS_DATA_FORK; > > + int error = 0; > > > > ASSERT(!XFS_IS_REALTIME_INODE(ip)); > > ASSERT(!xfs_get_extsz_hint(ip)); > > @@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay( > > > > XFS_STATS_INC(mp, xs_blk_mapw); > > > > - if (!(ifp->if_flags & XFS_IFEXTENTS)) { > > + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { > > error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); > > if (error) > > goto out_unlock; > > @@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay( > > > > end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > > > - eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); > > + /* > > + * Search the data fork fork first to look up our source mapping. We > > + * always need the data fork map, as we have to return it to the > > + * iomap code so that the higher level write code can read data in to > > + * perform read-modify-write cycles for unaligned writes. > > + */ > > + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); > > if (eof) > > - got.br_startoff = end_fsb; /* fake hole until the end */ > > + imap.br_startoff = end_fsb; /* fake hole until the end */ > > > > - if (got.br_startoff <= offset_fsb) { > > + /* We never need to allocate blocks for zeroing a hole. */ > > + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { > > + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); > > + goto out_unlock; > > + } > > + > > + /* > > + * Search the COW fork extent list even if we did not find a data fork > > + * extent. This serves two purposes: first this implements the > > + * speculative preallocation using cowextisze, so that we also unshare > > "cowextsize" > > > + * block adjacent to shared blocks instead of just the shared blocks > > + * themselves. Second the lookup in the extent list is generally faster > > + * than going out to the shared extent tree. > > + */ > > + if (xfs_is_reflink_inode(ip)) { > > + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, > > + &ccur, &cmap); > > + if (!cow_eof && cmap.br_startoff <= offset_fsb) { > > + trace_xfs_reflink_cow_found(ip, &cmap); > > + whichfork = XFS_COW_FORK; > > + goto done; > > + } > > + } > > + > > + if (imap.br_startoff <= offset_fsb) { > > /* > > * For reflink files we may need a delalloc reservation when > > * overwriting shared extents. This includes zeroing of > > * existing extents that contain data. > > */ > > - if (xfs_is_reflink_inode(ip) && > > - ((flags & IOMAP_WRITE) || > > - got.br_state != XFS_EXT_UNWRITTEN)) { > > - xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); > > - error = xfs_reflink_reserve_cow(ip, &got); > > - if (error) > > - goto out_unlock; > > + if (!xfs_is_reflink_inode(ip) || > > + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { > > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > > + &imap); > > + goto done; > > } > > > > - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); > > - goto done; > > - } > > + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); > > > > - if (flags & IOMAP_ZERO) { > > - xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); > > - goto out_unlock; > > + /* Trim the mapping to the nearest shared extent boundary. */ > > + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); > > + if (error) > > + goto out_unlock; > > + > > + /* Not shared? Just report the (potentially capped) extent. */ > > + if (!shared) { > > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > > + &imap); > > + goto done; > > + } > > + > > + /* > > + * Fork all the shared blocks from our write offset until the > > + * end of the extent. > > + */ > > + whichfork = XFS_COW_FORK; > > + end_fsb = imap.br_startoff + imap.br_blockcount; > > + } else { > > + /* > > + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES > > + * pages to keep the chunks of work done where somewhat > > + * symmetric with the work writeback does. This is a completely > > + * arbitrary number pulled out of thin air. > > + * > > + * Note that the values needs to be less than 32-bits wide until > > + * the lower level functions are updated. > > + */ > > + count = min_t(loff_t, count, 1024 * PAGE_SIZE); > > + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > } > > > > error = xfs_qm_dqattach_locked(ip, false); > > if (error) > > goto out_unlock; > > > > - /* > > - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages > > - * to keep the chunks of work done where somewhat symmetric with the > > - * work writeback does. This is a completely arbitrary number pulled > > - * out of thin air as a best guess for initial testing. > > - * > > - * Note that the values needs to be less than 32-bits wide until > > - * the lower level functions are updated. > > - */ > > - count = min_t(loff_t, count, 1024 * PAGE_SIZE); > > - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > - > > - if (eof) { > > + if (eof && whichfork == XFS_DATA_FORK) { > > prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, > > &icur); > > if (prealloc_blocks) { > > @@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay( > > } > > > > retry: > > - error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, > > - end_fsb - offset_fsb, prealloc_blocks, &got, &icur, > > - eof); > > + error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, > > + end_fsb - offset_fsb, prealloc_blocks, > > + whichfork == XFS_DATA_FORK ? &imap : &cmap, > > + whichfork == XFS_DATA_FORK ? &icur : &ccur, > > + whichfork == XFS_DATA_FORK ? eof : cow_eof); > > switch (error) { > > case 0: > > break; > > @@ -659,9 +703,19 @@ xfs_file_iomap_begin_delay( > > * them out if the write happens to fail. > > */ > > iomap->flags |= IOMAP_F_NEW; > > - trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); > > + trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap); > > I'm confused by this, if whichfork == COW then won't ftrace report > results from the wrong fork? The question is what the "right" fork to trace is as both matter here. At least we now clearly tell you which fork the trace belongs too.
On Wed, Dec 19, 2018 at 08:38:43PM +0100, Christoph Hellwig wrote: > On Tue, Dec 18, 2018 at 03:36:41PM -0800, Darrick J. Wong wrote: > > On Mon, Dec 03, 2018 at 05:25:00PM -0500, Christoph Hellwig wrote: > > > Besides simplifying the code a bit this allows to actually implement > > > the behavior of using COW preallocation for non-COW data mentioned > > > in the current comments. > > > > > > Note that this breaks the current version of xfs/420, but that is > > > because the test is broken. A separate fix will be sent for it. > > > > (Still waiting for this...) > > It was sent quite a while ago, but I need to resend it.. > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > > --- > > > fs/xfs/xfs_iomap.c | 132 ++++++++++++++++++++++++++++++------------- > > > fs/xfs/xfs_reflink.c | 67 ---------------------- > > > fs/xfs/xfs_trace.h | 1 - > > > 3 files changed, 93 insertions(+), 107 deletions(-) > > > > > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > > > index d851abac16a9..d19f99e5476a 100644 > > > --- a/fs/xfs/xfs_iomap.c > > > +++ b/fs/xfs/xfs_iomap.c > > > @@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay( > > > { > > > struct xfs_inode *ip = XFS_I(inode); > > > struct xfs_mount *mp = ip->i_mount; > > > - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); > > > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > > > xfs_fileoff_t maxbytes_fsb = > > > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); > > > xfs_fileoff_t end_fsb; > > > - int error = 0, eof = 0; > > > - struct xfs_bmbt_irec got; > > > - struct xfs_iext_cursor icur; > > > + struct xfs_bmbt_irec imap, cmap; > > > + struct xfs_iext_cursor icur, ccur; > > > xfs_fsblock_t prealloc_blocks = 0; > > > + bool eof = false, cow_eof = false, shared; > > > + int whichfork = XFS_DATA_FORK; > > > + int error = 0; > > > > > > ASSERT(!XFS_IS_REALTIME_INODE(ip)); > > > ASSERT(!xfs_get_extsz_hint(ip)); > > > @@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay( > > > > > > XFS_STATS_INC(mp, xs_blk_mapw); > > > > > > - if (!(ifp->if_flags & XFS_IFEXTENTS)) { > > > + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { > > > error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); > > > if (error) > > > goto out_unlock; > > > @@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay( > > > > > > end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > > > > > - eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); > > > + /* > > > + * Search the data fork fork first to look up our source mapping. We > > > + * always need the data fork map, as we have to return it to the > > > + * iomap code so that the higher level write code can read data in to > > > + * perform read-modify-write cycles for unaligned writes. > > > + */ > > > + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); > > > if (eof) > > > - got.br_startoff = end_fsb; /* fake hole until the end */ > > > + imap.br_startoff = end_fsb; /* fake hole until the end */ > > > > > > - if (got.br_startoff <= offset_fsb) { > > > + /* We never need to allocate blocks for zeroing a hole. */ > > > + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { > > > + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); > > > + goto out_unlock; > > > + } > > > + > > > + /* > > > + * Search the COW fork extent list even if we did not find a data fork > > > + * extent. This serves two purposes: first this implements the > > > + * speculative preallocation using cowextisze, so that we also unshare > > > > "cowextsize" > > > > > + * block adjacent to shared blocks instead of just the shared blocks > > > + * themselves. Second the lookup in the extent list is generally faster > > > + * than going out to the shared extent tree. > > > + */ > > > + if (xfs_is_reflink_inode(ip)) { > > > + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, > > > + &ccur, &cmap); > > > + if (!cow_eof && cmap.br_startoff <= offset_fsb) { > > > + trace_xfs_reflink_cow_found(ip, &cmap); > > > + whichfork = XFS_COW_FORK; > > > + goto done; > > > + } > > > + } > > > + > > > + if (imap.br_startoff <= offset_fsb) { > > > /* > > > * For reflink files we may need a delalloc reservation when > > > * overwriting shared extents. This includes zeroing of > > > * existing extents that contain data. > > > */ > > > - if (xfs_is_reflink_inode(ip) && > > > - ((flags & IOMAP_WRITE) || > > > - got.br_state != XFS_EXT_UNWRITTEN)) { > > > - xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); > > > - error = xfs_reflink_reserve_cow(ip, &got); > > > - if (error) > > > - goto out_unlock; > > > + if (!xfs_is_reflink_inode(ip) || > > > + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { > > > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > > > + &imap); > > > + goto done; > > > } > > > > > > - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); > > > - goto done; > > > - } > > > + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); > > > > > > - if (flags & IOMAP_ZERO) { > > > - xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); > > > - goto out_unlock; > > > + /* Trim the mapping to the nearest shared extent boundary. */ > > > + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); > > > + if (error) > > > + goto out_unlock; > > > + > > > + /* Not shared? Just report the (potentially capped) extent. */ > > > + if (!shared) { > > > + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, > > > + &imap); > > > + goto done; > > > + } > > > + > > > + /* > > > + * Fork all the shared blocks from our write offset until the > > > + * end of the extent. > > > + */ > > > + whichfork = XFS_COW_FORK; > > > + end_fsb = imap.br_startoff + imap.br_blockcount; > > > + } else { > > > + /* > > > + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES > > > + * pages to keep the chunks of work done where somewhat > > > + * symmetric with the work writeback does. This is a completely > > > + * arbitrary number pulled out of thin air. > > > + * > > > + * Note that the values needs to be less than 32-bits wide until > > > + * the lower level functions are updated. > > > + */ > > > + count = min_t(loff_t, count, 1024 * PAGE_SIZE); > > > + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > > } > > > > > > error = xfs_qm_dqattach_locked(ip, false); > > > if (error) > > > goto out_unlock; > > > > > > - /* > > > - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages > > > - * to keep the chunks of work done where somewhat symmetric with the > > > - * work writeback does. This is a completely arbitrary number pulled > > > - * out of thin air as a best guess for initial testing. > > > - * > > > - * Note that the values needs to be less than 32-bits wide until > > > - * the lower level functions are updated. > > > - */ > > > - count = min_t(loff_t, count, 1024 * PAGE_SIZE); > > > - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); > > > - > > > - if (eof) { > > > + if (eof && whichfork == XFS_DATA_FORK) { > > > prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, > > > &icur); > > > if (prealloc_blocks) { > > > @@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay( > > > } > > > > > > retry: > > > - error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, > > > - end_fsb - offset_fsb, prealloc_blocks, &got, &icur, > > > - eof); > > > + error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, > > > + end_fsb - offset_fsb, prealloc_blocks, > > > + whichfork == XFS_DATA_FORK ? &imap : &cmap, > > > + whichfork == XFS_DATA_FORK ? &icur : &ccur, > > > + whichfork == XFS_DATA_FORK ? eof : cow_eof); > > > switch (error) { > > > case 0: > > > break; > > > @@ -659,9 +703,19 @@ xfs_file_iomap_begin_delay( > > > * them out if the write happens to fail. > > > */ > > > iomap->flags |= IOMAP_F_NEW; > > > - trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); > > > + trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap); > > > > I'm confused by this, if whichfork == COW then won't ftrace report > > results from the wrong fork? > > The question is what the "right" fork to trace is as both matter here. > At least we now clearly tell you which fork the trace belongs too. True. But I think it's misleading to have a tracepoint report say "cow" when the extent data it records is actually from the data fork, and particularly because we sometimes pass cmap back to the caller when whichfork == COW. If I were tracing through here I'd probably want to know what we found from both forks at that particular point in time so that I could continue following the logic. --D
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d851abac16a9..d19f99e5476a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t maxbytes_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); xfs_fileoff_t end_fsb; - int error = 0, eof = 0; - struct xfs_bmbt_irec got; - struct xfs_iext_cursor icur; + struct xfs_bmbt_irec imap, cmap; + struct xfs_iext_cursor icur, ccur; xfs_fsblock_t prealloc_blocks = 0; + bool eof = false, cow_eof = false, shared; + int whichfork = XFS_DATA_FORK; + int error = 0; ASSERT(!XFS_IS_REALTIME_INODE(ip)); ASSERT(!xfs_get_extsz_hint(ip)); @@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay( XFS_STATS_INC(mp, xs_blk_mapw); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); if (error) goto out_unlock; @@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay( end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); + /* + * Search the data fork fork first to look up our source mapping. We + * always need the data fork map, as we have to return it to the + * iomap code so that the higher level write code can read data in to + * perform read-modify-write cycles for unaligned writes. + */ + eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); if (eof) - got.br_startoff = end_fsb; /* fake hole until the end */ + imap.br_startoff = end_fsb; /* fake hole until the end */ - if (got.br_startoff <= offset_fsb) { + /* We never need to allocate blocks for zeroing a hole. */ + if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { + xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); + goto out_unlock; + } + + /* + * Search the COW fork extent list even if we did not find a data fork + * extent. This serves two purposes: first this implements the + * speculative preallocation using cowextisze, so that we also unshare + * block adjacent to shared blocks instead of just the shared blocks + * themselves. Second the lookup in the extent list is generally faster + * than going out to the shared extent tree. + */ + if (xfs_is_reflink_inode(ip)) { + cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, + &ccur, &cmap); + if (!cow_eof && cmap.br_startoff <= offset_fsb) { + trace_xfs_reflink_cow_found(ip, &cmap); + whichfork = XFS_COW_FORK; + goto done; + } + } + + if (imap.br_startoff <= offset_fsb) { /* * For reflink files we may need a delalloc reservation when * overwriting shared extents. This includes zeroing of * existing extents that contain data. */ - if (xfs_is_reflink_inode(ip) && - ((flags & IOMAP_WRITE) || - got.br_state != XFS_EXT_UNWRITTEN)) { - xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); - error = xfs_reflink_reserve_cow(ip, &got); - if (error) - goto out_unlock; + if (!xfs_is_reflink_inode(ip) || + ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, + &imap); + goto done; } - trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); - goto done; - } + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); - if (flags & IOMAP_ZERO) { - xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); - goto out_unlock; + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + if (error) + goto out_unlock; + + /* Not shared? Just report the (potentially capped) extent. */ + if (!shared) { + trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, + &imap); + goto done; + } + + /* + * Fork all the shared blocks from our write offset until the + * end of the extent. + */ + whichfork = XFS_COW_FORK; + end_fsb = imap.br_startoff + imap.br_blockcount; + } else { + /* + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES + * pages to keep the chunks of work done where somewhat + * symmetric with the work writeback does. This is a completely + * arbitrary number pulled out of thin air. + * + * Note that the values needs to be less than 32-bits wide until + * the lower level functions are updated. + */ + count = min_t(loff_t, count, 1024 * PAGE_SIZE); + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); } error = xfs_qm_dqattach_locked(ip, false); if (error) goto out_unlock; - /* - * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages - * to keep the chunks of work done where somewhat symmetric with the - * work writeback does. This is a completely arbitrary number pulled - * out of thin air as a best guess for initial testing. - * - * Note that the values needs to be less than 32-bits wide until - * the lower level functions are updated. - */ - count = min_t(loff_t, count, 1024 * PAGE_SIZE); - end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - - if (eof) { + if (eof && whichfork == XFS_DATA_FORK) { prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, &icur); if (prealloc_blocks) { @@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay( } retry: - error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, - end_fsb - offset_fsb, prealloc_blocks, &got, &icur, - eof); + error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, + end_fsb - offset_fsb, prealloc_blocks, + whichfork == XFS_DATA_FORK ? &imap : &cmap, + whichfork == XFS_DATA_FORK ? &icur : &ccur, + whichfork == XFS_DATA_FORK ? eof : cow_eof); switch (error) { case 0: break; @@ -659,9 +703,19 @@ xfs_file_iomap_begin_delay( * them out if the write happens to fail. */ iomap->flags |= IOMAP_F_NEW; - trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); + trace_xfs_iomap_alloc(ip, offset, count, whichfork, &imap); done: - error = xfs_bmbt_to_iomap(ip, iomap, &got, false); + if (whichfork == XFS_COW_FORK) { + if (imap.br_startoff > offset_fsb) { + xfs_trim_extent(&cmap, offset_fsb, + imap.br_startoff - offset_fsb); + error = xfs_bmbt_to_iomap(ip, iomap, &cmap, false); + goto out_unlock; + } + /* ensure we only report blocks we have a reservation for */ + xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); + } + error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index bdbaff1b3fb7..d59b556d42cb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -234,73 +234,6 @@ xfs_reflink_trim_around_shared( } } -/* - * Trim the passed in imap to the next shared/unshared extent boundary, and - * if imap->br_startoff points to a shared extent reserve space for it in the - * COW fork. - * - * Note that imap will always contain the block numbers for the existing blocks - * in the data fork, as the upper layers need them for read-modify-write - * operations. - */ -int -xfs_reflink_reserve_cow( - struct xfs_inode *ip, - struct xfs_bmbt_irec *imap) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - struct xfs_bmbt_irec got; - int error = 0; - bool eof = false; - struct xfs_iext_cursor icur; - bool shared; - - /* - * Search the COW fork extent list first. This serves two purposes: - * first this implement the speculative preallocation using cowextisze, - * so that we also unshared block adjacent to shared blocks instead - * of just the shared blocks themselves. Second the lookup in the - * extent list is generally faster than going out to the shared extent - * tree. - */ - - if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got)) - eof = true; - if (!eof && got.br_startoff <= imap->br_startoff) { - trace_xfs_reflink_cow_found(ip, imap); - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); - return 0; - } - - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, imap, &shared); - if (error) - return error; - - /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) - return 0; - - /* - * Fork all the shared blocks from our write offset until the end of - * the extent. - */ - error = xfs_qm_dqattach_locked(ip, false); - if (error) - return error; - - error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, - imap->br_blockcount, 0, &got, &icur, eof); - if (error == -ENOSPC || error == -EDQUOT) - trace_xfs_reflink_cow_enospc(ip, imap); - if (error) - return error; - - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); - trace_xfs_reflink_cow_alloc(ip, &got); - return 0; -} - /* Convert part of an unwritten CoW extent to a real one. */ STATIC int xfs_reflink_convert_cow_extent( diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 870865913bd8..36e74fc90700 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3179,7 +3179,6 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); /* copy on write */ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
Besides simplifying the code a bit this allows to actually implement the behavior of using COW preallocation for non-COW data mentioned in the current comments. Note that this breaks the current version of xfs/420, but that is because the test is broken. A separate fix will be sent for it. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/xfs/xfs_iomap.c | 132 ++++++++++++++++++++++++++++++------------- fs/xfs/xfs_reflink.c | 67 ---------------------- fs/xfs/xfs_trace.h | 1 - 3 files changed, 93 insertions(+), 107 deletions(-)