diff mbox

[2/3] xfs: go straight to real allocations for direct I/O COW writes

Message ID 1480971924-4864-3-git-send-email-hch@lst.de (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Christoph Hellwig Dec. 5, 2016, 9:05 p.m. UTC
When we allocate COW fork blocks for direct I/O writes we currently first
create a delayed allocation, and then convert it to a real allocation
once we've got the delayed one.

As there is no good reason for that this patch instead makes use call
xfs_bmapi_write from the COW allocation path.  The only interesting bits
are a few tweaks the low-level allocator to allow for this, most notably
the need to remove the call to xfs_bmap_extsize_align for the cowextsize
in xfs_bmap_btalloc - for the existing convert case it's a no-op, but
for the direct allocation case it would blow up our block reservation
way beyond what we reserved for the transaction.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_bmap.c |   9 ++---
 fs/xfs/xfs_reflink.c     | 101 ++++++++++++++++++++++++++++++++++-------------
 2 files changed, 76 insertions(+), 34 deletions(-)

Comments

Brian Foster Dec. 7, 2016, 7 p.m. UTC | #1
On Mon, Dec 05, 2016 at 10:05:23PM +0100, Christoph Hellwig wrote:
> When we allocate COW fork blocks for direct I/O writes we currently first
> create a delayed allocation, and then convert it to a real allocation
> once we've got the delayed one.
> 
> As there is no good reason for that this patch instead makes use call
> xfs_bmapi_write from the COW allocation path.  The only interesting bits
> are a few tweaks the low-level allocator to allow for this, most notably
> the need to remove the call to xfs_bmap_extsize_align for the cowextsize
> in xfs_bmap_btalloc - for the existing convert case it's a no-op, but
> for the direct allocation case it would blow up our block reservation
> way beyond what we reserved for the transaction.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/libxfs/xfs_bmap.c |   9 ++---
>  fs/xfs/xfs_reflink.c     | 101 ++++++++++++++++++++++++++++++++++-------------
>  2 files changed, 76 insertions(+), 34 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index 6f28814..a4a4327 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -2893,13 +2893,14 @@ xfs_bmap_add_extent_hole_real(
>  	ASSERT(!isnullstartblock(new->br_startblock));
>  	ASSERT(!bma->cur ||
>  	       !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
> -	ASSERT(whichfork != XFS_COW_FORK);
>  
>  	XFS_STATS_INC(mp, xs_add_exlist);
>  
>  	state = 0;
>  	if (whichfork == XFS_ATTR_FORK)
>  		state |= BMAP_ATTRFORK;
> +	if (whichfork == XFS_COW_FORK)
> +		state |= BMAP_COWFORK;
>  
>  	/*
>  	 * Check and set flags if this segment has a left neighbor.
> @@ -3619,9 +3620,7 @@ xfs_bmap_btalloc(
>  	else if (mp->m_dalign)
>  		stripe_align = mp->m_dalign;
>  
> -	if (ap->flags & XFS_BMAPI_COWFORK)
> -		align = xfs_get_cowextsz_hint(ap->ip);
> -	else if (xfs_alloc_is_userdata(ap->datatype))
> +	if (xfs_alloc_is_userdata(ap->datatype))

Doesn't this defeat the purpose (i.e., fragmentation avoidance via wider
allocations) of the cowextszhint for direct I/O? I think it would be
better to be consistent with the approach for traditional I/O + extsz
and incorporate the alignment into the reservation. Perhaps the hunk of
code that already does just that in xfs_iomap_write_direct() could be
converted to a small helper and reused..?

>  		align = xfs_get_extsz_hint(ap->ip);
>  	if (unlikely(align)) {
>  		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
> @@ -4608,8 +4607,6 @@ xfs_bmapi_write(
>  		 */
>  		if (flags & XFS_BMAPI_REMAP)
>  			ASSERT(inhole);
> -		if (flags & XFS_BMAPI_COWFORK)
> -			ASSERT(!inhole);
>  
>  		/*
>  		 * First, deal with the hole before the allocated space
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 88fd03c..0a077e80 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -304,59 +304,104 @@ __xfs_reflink_allocate_cow(
>  	xfs_fileoff_t		end_fsb)
>  {
>  	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_bmbt_irec	imap;
> +	struct xfs_bmbt_irec	imap, got;
>  	struct xfs_defer_ops	dfops;
>  	struct xfs_trans	*tp;
>  	xfs_fsblock_t		first_block;
> -	int			nimaps = 1, error;
> -	bool			shared;
> +	int			nimaps, error, lockmode;
> +	bool			shared, trimmed;
> +	xfs_extlen_t		resblks, align;
> +	xfs_extnum_t		idx;
>  
> -	xfs_defer_init(&dfops, &first_block);
> +	align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
> +	if (align)
> +		end_fsb = roundup_64(end_fsb, align);
>  
> -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
> -			XFS_TRANS_RESERVE, &tp);
> +	resblks = XFS_DIOSTRAT_SPACE_RES(mp, end_fsb - *offset_fsb);
> +
> +	error = xfs_qm_dqattach(ip, 0);

This is already in the (only) caller.

Brian

>  	if (error)
>  		return error;
>  
> -	xfs_ilock(ip, XFS_ILOCK_EXCL);
> -
> -	/* Read extent from the source file. */
> -	nimaps = 1;
> -	error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> -			&imap, &nimaps, 0);
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
>  	if (error)
> -		goto out_unlock;
> -	ASSERT(nimaps == 1);
> +		return error;
>  
> -	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
> -	if (error)
> -		goto out_trans_cancel;
> +	lockmode = XFS_ILOCK_EXCL;
> +	xfs_ilock(ip, lockmode);
>  
> -	if (!shared) {
> -		*offset_fsb = imap.br_startoff + imap.br_blockcount;
> -		goto out_trans_cancel;
> +	/*
> +	 * Even if the extent is not shared we might have a preallocation for
> +	 * it in the COW fork.  If so use it.
> +	 */
> +	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
> +	    got.br_startoff <= *offset_fsb) {
> +		/* If we have a real allocation in the COW fork we're done. */
> +		if (!isnullstartblock(got.br_startblock)) {
> +			xfs_trim_extent(&got, *offset_fsb,
> +					end_fsb - *offset_fsb);
> +			*offset_fsb = got.br_startoff + got.br_blockcount;
> +			goto out_trans_cancel;
> +		}
> +	} else {
> +		nimaps = 1;
> +		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
> +				&imap, &nimaps, 0);
> +		if (error)
> +			goto out_trans_cancel;
> +		ASSERT(nimaps == 1);
> +
> +		/* Trim the mapping to the nearest shared extent boundary. */
> +		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
> +				&trimmed);
> +		if (error)
> +			goto out_trans_cancel;
> +
> +		if (!shared) {
> +			*offset_fsb = imap.br_startoff + imap.br_blockcount;
> +			goto out_trans_cancel;
> +		}
> +
> +		*offset_fsb = imap.br_startoff;
> +		end_fsb = imap.br_startoff + imap.br_blockcount;
> +		if (align)
> +			end_fsb = roundup_64(end_fsb, align);
>  	}
>  
> -	xfs_trans_ijoin(tp, ip, 0);
> -	error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
> -			XFS_BMAPI_COWFORK, &first_block,
> -			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
> -			&imap, &nimaps, &dfops);
> +	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
> +			XFS_QMOPT_RES_REGBLKS);
>  	if (error)
>  		goto out_trans_cancel;
>  
> +	xfs_trans_ijoin(tp, ip, 0);
> +
> +	xfs_defer_init(&dfops, &first_block);
> +	nimaps = 1;
> +	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
> +			XFS_BMAPI_COWFORK, &first_block, resblks, &imap,
> +			&nimaps, &dfops);
> +	if (error)
> +		goto out_bmap_cancel;
> +
>  	error = xfs_defer_finish(&tp, &dfops, NULL);
>  	if (error)
> -		goto out_trans_cancel;
> +		goto out_bmap_cancel;
>  
>  	error = xfs_trans_commit(tp);
> +	if (error)
> +		goto out_unlock;
>  
>  	*offset_fsb = imap.br_startoff + imap.br_blockcount;
> +
>  out_unlock:
> -	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	xfs_iunlock(ip, lockmode);
>  	return error;
> -out_trans_cancel:
> +
> +out_bmap_cancel:
>  	xfs_defer_cancel(&dfops);
> +	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
> +			XFS_QMOPT_RES_REGBLKS);
> +out_trans_cancel:
>  	xfs_trans_cancel(tp);
>  	goto out_unlock;
>  }
> -- 
> 2.1.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Dec. 7, 2016, 7:37 p.m. UTC | #2
On Wed, Dec 07, 2016 at 02:00:09PM -0500, Brian Foster wrote:
> > -	if (ap->flags & XFS_BMAPI_COWFORK)
> > -		align = xfs_get_cowextsz_hint(ap->ip);
> > -	else if (xfs_alloc_is_userdata(ap->datatype))
> > +	if (xfs_alloc_is_userdata(ap->datatype))
> 
> Doesn't this defeat the purpose (i.e., fragmentation avoidance via wider
> allocations) of the cowextszhint for direct I/O? I think it would be
> better to be consistent with the approach for traditional I/O + extsz
> and incorporate the alignment into the reservation. Perhaps the hunk of
> code that already does just that in xfs_iomap_write_direct() could be
> converted to a small helper and reused..?

We're already doing the alignment to the cowextsize hint in
__xfs_reflink_allocate_cow so that we can take the cowextsize into
account.

> > +	resblks = XFS_DIOSTRAT_SPACE_RES(mp, end_fsb - *offset_fsb);
> > +
> > +	error = xfs_qm_dqattach(ip, 0);
> 
> This is already in the (only) caller.

Yes, it can be dropped, although a superflous xfs_qm_dqattach is
totally harmless anyway.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster Dec. 7, 2016, 7:46 p.m. UTC | #3
On Wed, Dec 07, 2016 at 08:37:09PM +0100, Christoph Hellwig wrote:
> On Wed, Dec 07, 2016 at 02:00:09PM -0500, Brian Foster wrote:
> > > -	if (ap->flags & XFS_BMAPI_COWFORK)
> > > -		align = xfs_get_cowextsz_hint(ap->ip);
> > > -	else if (xfs_alloc_is_userdata(ap->datatype))
> > > +	if (xfs_alloc_is_userdata(ap->datatype))
> > 
> > Doesn't this defeat the purpose (i.e., fragmentation avoidance via wider
> > allocations) of the cowextszhint for direct I/O? I think it would be
> > better to be consistent with the approach for traditional I/O + extsz
> > and incorporate the alignment into the reservation. Perhaps the hunk of
> > code that already does just that in xfs_iomap_write_direct() could be
> > converted to a small helper and reused..?
> 
> We're already doing the alignment to the cowextsize hint in
> __xfs_reflink_allocate_cow so that we can take the cowextsize into
> account.
> 

Only for end_fsb... xfs_bmap_btalloc() calls xfs_bmap_extsize_align()
with the alignment, which rounds out the start and end offsets.

This same end_fsb code has already been removed from
xfs_reflink_reserve_cow() for similar reasons. It should probably be
removed from xfs_reflink_allocate_cow() as well.

Brian

> > > +	resblks = XFS_DIOSTRAT_SPACE_RES(mp, end_fsb - *offset_fsb);
> > > +
> > > +	error = xfs_qm_dqattach(ip, 0);
> > 
> > This is already in the (only) caller.
> 
> Yes, it can be dropped, although a superflous xfs_qm_dqattach is
> totally harmless anyway.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Dec. 8, 2016, 4:23 a.m. UTC | #4
On Wed, Dec 07, 2016 at 02:46:34PM -0500, Brian Foster wrote:
> On Wed, Dec 07, 2016 at 08:37:09PM +0100, Christoph Hellwig wrote:
> > On Wed, Dec 07, 2016 at 02:00:09PM -0500, Brian Foster wrote:
> > > > -	if (ap->flags & XFS_BMAPI_COWFORK)
> > > > -		align = xfs_get_cowextsz_hint(ap->ip);
> > > > -	else if (xfs_alloc_is_userdata(ap->datatype))
> > > > +	if (xfs_alloc_is_userdata(ap->datatype))
> > > 
> > > Doesn't this defeat the purpose (i.e., fragmentation avoidance via wider
> > > allocations) of the cowextszhint for direct I/O? I think it would be
> > > better to be consistent with the approach for traditional I/O + extsz
> > > and incorporate the alignment into the reservation. Perhaps the hunk of
> > > code that already does just that in xfs_iomap_write_direct() could be
> > > converted to a small helper and reused..?
> > 
> > We're already doing the alignment to the cowextsize hint in
> > __xfs_reflink_allocate_cow so that we can take the cowextsize into
> > account.
> > 
> 
> Only for end_fsb... xfs_bmap_btalloc() calls xfs_bmap_extsize_align()
> with the alignment, which rounds out the start and end offsets.

Seconded; I finally got a chance to build a kernel with these three
patches and ran xfs/214 with a tracepointed kernel; it doesn't round the
start down to the previous cowextsize alignment like 4.9 does... and
thus the quota accounting is off.

--D

> 
> This same end_fsb code has already been removed from
> xfs_reflink_reserve_cow() for similar reasons. It should probably be
> removed from xfs_reflink_allocate_cow() as well.
> 
> Brian
> 
> > > > +	resblks = XFS_DIOSTRAT_SPACE_RES(mp, end_fsb - *offset_fsb);
> > > > +
> > > > +	error = xfs_qm_dqattach(ip, 0);
> > > 
> > > This is already in the (only) caller.
> > 
> > Yes, it can be dropped, although a superflous xfs_qm_dqattach is
> > totally harmless anyway.
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 24, 2017, 8:37 a.m. UTC | #5
On Wed, Dec 07, 2016 at 02:46:34PM -0500, Brian Foster wrote:
> Only for end_fsb... xfs_bmap_btalloc() calls xfs_bmap_extsize_align()
> with the alignment, which rounds out the start and end offsets.

... and corrupts data in the direct I/O case.

The problem is that the down-alignment in xfs_bmap_extsize_align will
now create a real extent that spans before the extent that we have
to COW in this write_begin call.  But the area before might have been
a hole before the dio write that had just before been filled with
an allocation in the data fork.  And due to the direct I/O end_io
interface that only covers the range of the whole write we don't
know at that point where exactly the COW operation started and will
happily splice back our front pad into the data fork, replacing
the just written data with garbage.  xfs/228 and sometimes generic/199
reproduce this nicely.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster Jan. 24, 2017, 1:50 p.m. UTC | #6
On Tue, Jan 24, 2017 at 09:37:32AM +0100, Christoph Hellwig wrote:
> On Wed, Dec 07, 2016 at 02:46:34PM -0500, Brian Foster wrote:
> > Only for end_fsb... xfs_bmap_btalloc() calls xfs_bmap_extsize_align()
> > with the alignment, which rounds out the start and end offsets.
> 
> ... and corrupts data in the direct I/O case.
> 
> The problem is that the down-alignment in xfs_bmap_extsize_align will
> now create a real extent that spans before the extent that we have
> to COW in this write_begin call.  But the area before might have been
> a hole before the dio write that had just before been filled with
> an allocation in the data fork.  And due to the direct I/O end_io
> interface that only covers the range of the whole write we don't
> know at that point where exactly the COW operation started and will
> happily splice back our front pad into the data fork, replacing
> the just written data with garbage.  xfs/228 and sometimes generic/199
> reproduce this nicely.

Is this reproducible on the current tree or only with this patch series?

Also, shouldn't the end_io handler only remap the range of the write,
regardless of whether the initial allocation ended up preallocating over
holes or purely a shared range?

Perhaps what you are saying here is that we have a single dio write that
spans wider than a shared data fork extent..? In that case, we iterate
the range in xfs_reflink_reserve_cow(). We'd skip the start of the range
that is a hole in the data fork, but as you say, the
xfs_bmapi_reserve_delalloc() call for the part of the I/O on the shared
extent can widen the COW fork allocation to before the extent in the
data fork, possibly to before the start of the I/O. (Thus we end up
allocating COW blocks over the hole anyways...).

From there we are going to drop into iomap_dio_rw(), which looks like
it's going to check the COW fork for blocks and if found, write to those
blocks (as opposed to doing a data fork allocation). AFAICT, that means
the extent size hint shouldn't be a problem. What am I missing?

Brian

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 24, 2017, 1:59 p.m. UTC | #7
On Tue, Jan 24, 2017 at 08:50:45AM -0500, Brian Foster wrote:
> Is this reproducible on the current tree or only with this patch series?

It's only reproducible with the series modified to your review comments.

> Also, shouldn't the end_io handler only remap the range of the write,
> regardless of whether the initial allocation ended up preallocating over
> holes or purely a shared range?

The end_io handler is caller for the whole size of the write.  That's
mostly because we don't have an object corresponding to a write_begin
call.

> Perhaps what you are saying here is that we have a single dio write that
> spans wider than a shared data fork extent..?

Yes.

> In that case, we iterate
> the range in xfs_reflink_reserve_cow(). We'd skip the start of the range
> that is a hole in the data fork, but as you say, the
> xfs_bmapi_reserve_delalloc() call for the part of the I/O on the shared
> extent can widen the COW fork allocation to before the extent in the
> data fork, possibly to before the start of the I/O. (Thus we end up
> allocating COW blocks over the hole anyways...).

The problem is the following.

We have a file with the following layout


HHHHHHHHHHHHDDDDDDDDDDDDDD

where H is hole and D is data.  The H to D boundary is not aligned
to the cowextsize.

The direct I/O code now does a first pass allocating an extent for
H and copies data to it.  Then in the next step it goes on to D
and unshares it.  It then enlarges the extent into the end of the
previously H range. It does however not copy data into H again,
as the iomap iterator is past it.  The ->end_io routine however
is called for the hole range, and will move the just allocated
rounding before H back into the data fork, replacing the valid data
writtent just before.

--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster Jan. 24, 2017, 3:02 p.m. UTC | #8
On Tue, Jan 24, 2017 at 02:59:37PM +0100, Christoph Hellwig wrote:
> On Tue, Jan 24, 2017 at 08:50:45AM -0500, Brian Foster wrote:
> > Is this reproducible on the current tree or only with this patch series?
> 
> It's only reproducible with the series modified to your review comments.
> 

Ok, well then I'm probably not going to be able to follow the details
well enough to try and provide constructive feedback without seeing the
code. Looking back, my comments were generally about the tradeoff of
bypassing the extent size hint mechanism that has been built into
reflink to avoid cow fragmention.

> > Also, shouldn't the end_io handler only remap the range of the write,
> > regardless of whether the initial allocation ended up preallocating over
> > holes or purely a shared range?
> 
> The end_io handler is caller for the whole size of the write.  That's
> mostly because we don't have an object corresponding to a write_begin
> call.
> 
> > Perhaps what you are saying here is that we have a single dio write that
> > spans wider than a shared data fork extent..?
> 
> Yes.
> 
> > In that case, we iterate
> > the range in xfs_reflink_reserve_cow(). We'd skip the start of the range
> > that is a hole in the data fork, but as you say, the
> > xfs_bmapi_reserve_delalloc() call for the part of the I/O on the shared
> > extent can widen the COW fork allocation to before the extent in the
> > data fork, possibly to before the start of the I/O. (Thus we end up
> > allocating COW blocks over the hole anyways...).
> 
> The problem is the following.
> 
> We have a file with the following layout
> 
> 
> HHHHHHHHHHHHDDDDDDDDDDDDDD
> 
> where H is hole and D is data.  The H to D boundary is not aligned
> to the cowextsize.
> 
> The direct I/O code now does a first pass allocating an extent for
> H and copies data to it.  Then in the next step it goes on to D
> and unshares it.  It then enlarges the extent into the end of the
> previously H range. It does however not copy data into H again,
> as the iomap iterator is past it.  The ->end_io routine however
> is called for the hole range, and will move the just allocated
> rounding before H back into the data fork, replacing the valid data
> writtent just before.
> 

Without seeing the code, perhaps we need to pull up the cow extent size
hint mechanism from the bmapi layer to something similar to how
xfs_iomap_direct_write() handles the traditional extent size hints..?
That may allow us to more intelligently consider the current state
across the data and cow forks in such cases (to not preallocate over
existing blocks, for example, without having to kill off the extent size
hint entirely).

Brian

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 24, 2017, 3:09 p.m. UTC | #9
On Tue, Jan 24, 2017 at 10:02:22AM -0500, Brian Foster wrote:
> Ok, well then I'm probably not going to be able to follow the details
> well enough to try and provide constructive feedback without seeing the
> code. Looking back, my comments were generally about the tradeoff of
> bypassing the extent size hint mechanism that has been built into
> reflink to avoid cow fragmention.

Ok - really very little change here - only the call to the extsize
alignment in xfs_bmap_btalloc reinstated and the according reservation
changes in the caller.

Note that with the previously posted series there is no change in
handling the cowextsize hint for real on-disk allocations.  While
the current code rounds down based on the cowextsize for creating
the delayed extent we will never convert the alignment before the
write start to a real extent - it will just get cleaned up later
at inode eviction time or using the timer.

> Without seeing the code, perhaps we need to pull up the cow extent size
> hint mechanism from the bmapi layer to something similar to how
> xfs_iomap_direct_write() handles the traditional extent size hints..?
> That may allow us to more intelligently consider the current state
> across the data and cow forks in such cases (to not preallocate over
> existing blocks, for example, without having to kill off the extent size
> hint entirely).

We could.  On the other hand I'd love to get the current series in
first as the only thing it change in behavior is not allocating
additional delayed extent space that never gets used, and not writing
data twice if it's sub-blocksize, all of which seem like a clear
improvement.  And it's also the base for my pending DAX reflink support.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster Jan. 24, 2017, 4:17 p.m. UTC | #10
On Tue, Jan 24, 2017 at 04:09:59PM +0100, Christoph Hellwig wrote:
> On Tue, Jan 24, 2017 at 10:02:22AM -0500, Brian Foster wrote:
> > Ok, well then I'm probably not going to be able to follow the details
> > well enough to try and provide constructive feedback without seeing the
> > code. Looking back, my comments were generally about the tradeoff of
> > bypassing the extent size hint mechanism that has been built into
> > reflink to avoid cow fragmention.
> 
> Ok - really very little change here - only the call to the extsize
> alignment in xfs_bmap_btalloc reinstated and the according reservation
> changes in the caller.
> 
> Note that with the previously posted series there is no change in
> handling the cowextsize hint for real on-disk allocations.  While
> the current code rounds down based on the cowextsize for creating
> the delayed extent we will never convert the alignment before the
> write start to a real extent - it will just get cleaned up later
> at inode eviction time or using the timer.
> 

I thought that while not necessarily guaranteed, generally the entire
extent gets converted from delalloc to real blocks. IIRC, that's what
I've seen in the past when looking into the cow fork with bmap. After
all, isn't that the point of the extent size hint? Allocate wider than
the write to accommodate potential subsequent writes into a more
contiguous range.

> > Without seeing the code, perhaps we need to pull up the cow extent size
> > hint mechanism from the bmapi layer to something similar to how
> > xfs_iomap_direct_write() handles the traditional extent size hints..?
> > That may allow us to more intelligently consider the current state
> > across the data and cow forks in such cases (to not preallocate over
> > existing blocks, for example, without having to kill off the extent size
> > hint entirely).
> 
> We could.  On the other hand I'd love to get the current series in
> first as the only thing it change in behavior is not allocating
> additional delayed extent space that never gets used, and not writing
> data twice if it's sub-blocksize, all of which seem like a clear
> improvement.  And it's also the base for my pending DAX reflink support.

Fair enough, that's a different discussion. Personally, I'd prefer to
fix up the stuff we know that needs fixing before piling more stuff on
top. Particularly since something like cow I/O to a reflinked vm image
file (taking full advantage of cowextszhint) might be a more common use
case than DAX reflink at the moment.

All of this stuff is "experimental," however, so I don't feel strongly
about the order so long as we agree the regression can/should be fixed
up. So I'll defer to the maintainer on that one. ;)

Brian

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 24, 2017, 4:21 p.m. UTC | #11
On Tue, Jan 24, 2017 at 11:17:20AM -0500, Brian Foster wrote:
> I thought that while not necessarily guaranteed, generally the entire
> extent gets converted from delalloc to real blocks.

For buffered I/O that's the case.  See the discussion on my recent
xfs_bmapi_write patch.

> IIRC, that's what
> I've seen in the past when looking into the cow fork with bmap. After
> all, isn't that the point of the extent size hint? Allocate wider than
> the write to accommodate potential subsequent writes into a more
> contiguous range.

Well, for direct I/O that's not what the current code does.  Implementing
it might be useful, but I'm not sure how much the front alignment is
going to help in usual worksloads - you'd need a backwards write or
random writes that happen to look almost backwards for it to make
a difference.  I suspect most of them time we'd just allocate blocks to
reclaim them again a little later.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Brian Foster Jan. 24, 2017, 5:43 p.m. UTC | #12
On Tue, Jan 24, 2017 at 05:21:56PM +0100, Christoph Hellwig wrote:
> On Tue, Jan 24, 2017 at 11:17:20AM -0500, Brian Foster wrote:
> > I thought that while not necessarily guaranteed, generally the entire
> > extent gets converted from delalloc to real blocks.
> 
> For buffered I/O that's the case.  See the discussion on my recent
> xfs_bmapi_write patch.
> 

Yeah, that's the behavior I would have expected...

> > IIRC, that's what
> > I've seen in the past when looking into the cow fork with bmap. After
> > all, isn't that the point of the extent size hint? Allocate wider than
> > the write to accommodate potential subsequent writes into a more
> > contiguous range.
> 
> Well, for direct I/O that's not what the current code does.  Implementing
> it might be useful, but I'm not sure how much the front alignment is
> going to help in usual worksloads - you'd need a backwards write or
> random writes that happen to look almost backwards for it to make
> a difference.  I suspect most of them time we'd just allocate blocks to
> reclaim them again a little later.

Hmm, that's not what I'm seeing (not that it really matters, but I'm
curious if I'm missing something):

# xfs_io -d -c "bmap -v" -c "pwrite 8k 4k" -c "bmap -v" -c "bmap -cv" ./file
./file:
 EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET        TOTAL
   0: [0..15]:         80..95            0 (80..95)            16 100000
   1: [16..2047]:      160..2191         0 (160..2191)       2032 100000
wrote 4096/4096 bytes at offset 8192
4 KiB, 1 ops; 0.0000 sec (12.440 MiB/sec and 3184.7134 ops/sec)
./file:
 EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET        TOTAL
   0: [0..15]:         80..95            0 (80..95)            16 100000
   1: [16..23]:        2208..2215        0 (2208..2215)         8
   2: [24..2047]:      168..2191         0 (168..2191)       2024 100000
./file:
 EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET        TOTAL
   0: [0..15]:         2192..2207        0 (2192..2207)        16
   1: [16..23]:        hole                                     8
   2: [24..255]:       2216..2447        0 (2216..2447)       232
   3: [256..2047]:     hole                                  1792

That's on a fairly recent for-next on a fully reflinked file. It looks
to me that the cow fork extent is fully allocated immediately
(otherwise, I'm not sure what else would have converted it).

Brian

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 24, 2017, 8:08 p.m. UTC | #13
On Tue, Jan 24, 2017 at 12:43:18PM -0500, Brian Foster wrote:
> Hmm, that's not what I'm seeing (not that it really matters, but I'm
> curious if I'm missing something):

Yeah, I can reproduce this on mainline.  Turns out the it was done
by the align call in xfs_bmap_btalloc that even my before run had
removed.

Took me some time to spin my head around this.

Btw, I think we have a nasty race in the current DIO code that might
expose stale data, but this is just the same kind of head spinning
exercise for now:

Thread 1 writes a range from B to c

                    B --------- C

a little later thread 2 writes from A to B

        A --------- B

but the code preallocates beyond B into the range where thread
1 has just written, but ->end_io hasn't been called yet.
But once ->end_io is called thread 2 has already allocated
up to the extent size hint into the write range of thread 1,
so the end_io handler will splice the unintialized blocks from
that preallocation back into the file right after B.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 24, 2017, 8:10 p.m. UTC | #14
On Tue, Jan 24, 2017 at 09:08:55PM +0100, Christoph Hellwig wrote:
> On Tue, Jan 24, 2017 at 12:43:18PM -0500, Brian Foster wrote:
> > Hmm, that's not what I'm seeing (not that it really matters, but I'm
> > curious if I'm missing something):
> 
> Yeah, I can reproduce this on mainline.  Turns out the it was done

s/can/can't/, sorry.  It's getting late for the day.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Jan. 25, 2017, 12:09 a.m. UTC | #15
On Tue, Jan 24, 2017 at 09:08:55PM +0100, Christoph Hellwig wrote:
> On Tue, Jan 24, 2017 at 12:43:18PM -0500, Brian Foster wrote:
> > Hmm, that's not what I'm seeing (not that it really matters, but I'm
> > curious if I'm missing something):
> 
> Yeah, I can reproduce this on mainline.  Turns out the it was done
> by the align call in xfs_bmap_btalloc that even my before run had
> removed.
> 
> Took me some time to spin my head around this.
> 
> Btw, I think we have a nasty race in the current DIO code that might
> expose stale data, but this is just the same kind of head spinning
> exercise for now:
> 
> Thread 1 writes a range from B to c
> 
>                     B --------- C

          A --------- B --------- C
               ^           ^
               d           e

I'm assuming B-C has no shared blocks, d-B has shared blocks, and that
both d & e are cowextsize boundaries.

> a little later thread 2 writes from A to B
> 
>         A --------- B
> but the code preallocates beyond B into the range where thread
> 1 has just written, but ->end_io hasn't been called yet.
> But once ->end_io is called thread 2 has already allocated
> up to the extent size hint into the write range of thread 1,
> so the end_io handler will splice the unintialized blocks from
> that preallocation back into the file right after B.

I think you're right about there being a dio race here.  I'm not sure
what the solution here is -- certainly we could disregard the cowextsize
hint, though that has a fragmentation cost that we already know about.

We could also change the dio write setup to extend the range that it
checks for shared blocks up and down to the nearest cowextsize boundary
and set up the delalloc reservations in the cow fork as necessary.  If
our thread2 comes along then it'll find the reservations already set
up for a cow so that we avoid the situation where B-C changes between
iomap_begin and dio_write_end_io does the wrong thing.  That's more in
the spirit of cowextsize since we'd promote future writes to CoW.
However it's more wasteful of blocks since we have no idea if those
future writes are ever actually going to happen, and it also adds more
bmap records if we don't use all of the reservation.

Ugh, my head hurts, I'm going to go for a walk to ponder this some more,
and to try to work out whether the buffered path is all right.

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Darrick J. Wong Jan. 27, 2017, 5:44 p.m. UTC | #16
On Tue, Jan 24, 2017 at 04:09:34PM -0800, Darrick J. Wong wrote:
> On Tue, Jan 24, 2017 at 09:08:55PM +0100, Christoph Hellwig wrote:
> > On Tue, Jan 24, 2017 at 12:43:18PM -0500, Brian Foster wrote:
> > > Hmm, that's not what I'm seeing (not that it really matters, but I'm
> > > curious if I'm missing something):
> > 
> > Yeah, I can reproduce this on mainline.  Turns out the it was done
> > by the align call in xfs_bmap_btalloc that even my before run had
> > removed.
> > 
> > Took me some time to spin my head around this.
> > 
> > Btw, I think we have a nasty race in the current DIO code that might
> > expose stale data, but this is just the same kind of head spinning
> > exercise for now:
> > 
> > Thread 1 writes a range from B to c
> > 
> >                     B --------- C
> 
>           A --------- B --------- C
>                ^           ^
>                d           e
> 
> I'm assuming B-C has no shared blocks, d-B has shared blocks, and that
> both d & e are cowextsize boundaries.
> 
> > a little later thread 2 writes from A to B
> > 
> >         A --------- B
> > but the code preallocates beyond B into the range where thread
> > 1 has just written, but ->end_io hasn't been called yet.
> > But once ->end_io is called thread 2 has already allocated
> > up to the extent size hint into the write range of thread 1,
> > so the end_io handler will splice the unintialized blocks from
> > that preallocation back into the file right after B.
> 
> I think you're right about there being a dio race here.  I'm not sure
> what the solution here is -- certainly we could disregard the cowextsize
> hint, though that has a fragmentation cost that we already know about.
> 
> We could also change the dio write setup to extend the range that it
> checks for shared blocks up and down to the nearest cowextsize boundary
> and set up the delalloc reservations in the cow fork as necessary.  If
> our thread2 comes along then it'll find the reservations already set
> up for a cow so that we avoid the situation where B-C changes between
> iomap_begin and dio_write_end_io does the wrong thing.  That's more in
> the spirit of cowextsize since we'd promote future writes to CoW.
> However it's more wasteful of blocks since we have no idea if those
> future writes are ever actually going to happen, and it also adds more
> bmap records if we don't use all of the reservation.
> 
> Ugh, my head hurts, I'm going to go for a walk to ponder this some more,
> and to try to work out whether the buffered path is all right.

I think I came up with a better idea overnight: take advantage of the
unwritten bit.  Currently, all extents in the cow fork are either
delalloc or normal extents.  When we allocate the blocks to fill a cow
extent, we'll flag the extent as unwritten.  When we go to write the
data to disk, we convert the unwritten extent to a real extent, and the
post-write remap (since it only takes a file offset and length) will
be changed to remap only real, written extents into the data fork.

--D

> 
> --D
> 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 27, 2017, 5:48 p.m. UTC | #17
On Fri, Jan 27, 2017 at 09:44:47AM -0800, Darrick J. Wong wrote:
> I think I came up with a better idea overnight: take advantage of the
> unwritten bit.  Currently, all extents in the cow fork are either
> delalloc or normal extents.  When we allocate the blocks to fill a cow
> extent, we'll flag the extent as unwritten.  When we go to write the
> data to disk, we convert the unwritten extent to a real extent, and the
> post-write remap (since it only takes a file offset and length) will
> be changed to remap only real, written extents into the data fork.

That sounds like a possibility.  My other idea was to force COW all
direct I/O writes, that way we can't have a mismatch between extents
in the data and the COW fork.  But so far this is just an idea..
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 6f28814..a4a4327 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2893,13 +2893,14 @@  xfs_bmap_add_extent_hole_real(
 	ASSERT(!isnullstartblock(new->br_startblock));
 	ASSERT(!bma->cur ||
 	       !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
-	ASSERT(whichfork != XFS_COW_FORK);
 
 	XFS_STATS_INC(mp, xs_add_exlist);
 
 	state = 0;
 	if (whichfork == XFS_ATTR_FORK)
 		state |= BMAP_ATTRFORK;
+	if (whichfork == XFS_COW_FORK)
+		state |= BMAP_COWFORK;
 
 	/*
 	 * Check and set flags if this segment has a left neighbor.
@@ -3619,9 +3620,7 @@  xfs_bmap_btalloc(
 	else if (mp->m_dalign)
 		stripe_align = mp->m_dalign;
 
-	if (ap->flags & XFS_BMAPI_COWFORK)
-		align = xfs_get_cowextsz_hint(ap->ip);
-	else if (xfs_alloc_is_userdata(ap->datatype))
+	if (xfs_alloc_is_userdata(ap->datatype))
 		align = xfs_get_extsz_hint(ap->ip);
 	if (unlikely(align)) {
 		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -4608,8 +4607,6 @@  xfs_bmapi_write(
 		 */
 		if (flags & XFS_BMAPI_REMAP)
 			ASSERT(inhole);
-		if (flags & XFS_BMAPI_COWFORK)
-			ASSERT(!inhole);
 
 		/*
 		 * First, deal with the hole before the allocated space
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 88fd03c..0a077e80 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -304,59 +304,104 @@  __xfs_reflink_allocate_cow(
 	xfs_fileoff_t		end_fsb)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	imap;
+	struct xfs_bmbt_irec	imap, got;
 	struct xfs_defer_ops	dfops;
 	struct xfs_trans	*tp;
 	xfs_fsblock_t		first_block;
-	int			nimaps = 1, error;
-	bool			shared;
+	int			nimaps, error, lockmode;
+	bool			shared, trimmed;
+	xfs_extlen_t		resblks, align;
+	xfs_extnum_t		idx;
 
-	xfs_defer_init(&dfops, &first_block);
+	align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
+	if (align)
+		end_fsb = roundup_64(end_fsb, align);
 
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
-			XFS_TRANS_RESERVE, &tp);
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, end_fsb - *offset_fsb);
+
+	error = xfs_qm_dqattach(ip, 0);
 	if (error)
 		return error;
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	/* Read extent from the source file. */
-	nimaps = 1;
-	error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-			&imap, &nimaps, 0);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
-		goto out_unlock;
-	ASSERT(nimaps == 1);
+		return error;
 
-	error = xfs_reflink_reserve_cow(ip, &imap, &shared);
-	if (error)
-		goto out_trans_cancel;
+	lockmode = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, lockmode);
 
-	if (!shared) {
-		*offset_fsb = imap.br_startoff + imap.br_blockcount;
-		goto out_trans_cancel;
+	/*
+	 * Even if the extent is not shared we might have a preallocation for
+	 * it in the COW fork.  If so use it.
+	 */
+	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, *offset_fsb, &idx, &got) &&
+	    got.br_startoff <= *offset_fsb) {
+		/* If we have a real allocation in the COW fork we're done. */
+		if (!isnullstartblock(got.br_startblock)) {
+			xfs_trim_extent(&got, *offset_fsb,
+					end_fsb - *offset_fsb);
+			*offset_fsb = got.br_startoff + got.br_blockcount;
+			goto out_trans_cancel;
+		}
+	} else {
+		nimaps = 1;
+		error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+				&imap, &nimaps, 0);
+		if (error)
+			goto out_trans_cancel;
+		ASSERT(nimaps == 1);
+
+		/* Trim the mapping to the nearest shared extent boundary. */
+		error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+				&trimmed);
+		if (error)
+			goto out_trans_cancel;
+
+		if (!shared) {
+			*offset_fsb = imap.br_startoff + imap.br_blockcount;
+			goto out_trans_cancel;
+		}
+
+		*offset_fsb = imap.br_startoff;
+		end_fsb = imap.br_startoff + imap.br_blockcount;
+		if (align)
+			end_fsb = roundup_64(end_fsb, align);
 	}
 
-	xfs_trans_ijoin(tp, ip, 0);
-	error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
-			XFS_BMAPI_COWFORK, &first_block,
-			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
-			&imap, &nimaps, &dfops);
+	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
+			XFS_QMOPT_RES_REGBLKS);
 	if (error)
 		goto out_trans_cancel;
 
+	xfs_trans_ijoin(tp, ip, 0);
+
+	xfs_defer_init(&dfops, &first_block);
+	nimaps = 1;
+	error = xfs_bmapi_write(tp, ip, *offset_fsb, end_fsb - *offset_fsb,
+			XFS_BMAPI_COWFORK, &first_block, resblks, &imap,
+			&nimaps, &dfops);
+	if (error)
+		goto out_bmap_cancel;
+
 	error = xfs_defer_finish(&tp, &dfops, NULL);
 	if (error)
-		goto out_trans_cancel;
+		goto out_bmap_cancel;
 
 	error = xfs_trans_commit(tp);
+	if (error)
+		goto out_unlock;
 
 	*offset_fsb = imap.br_startoff + imap.br_blockcount;
+
 out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, lockmode);
 	return error;
-out_trans_cancel:
+
+out_bmap_cancel:
 	xfs_defer_cancel(&dfops);
+	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
+			XFS_QMOPT_RES_REGBLKS);
+out_trans_cancel:
 	xfs_trans_cancel(tp);
 	goto out_unlock;
 }