[01/21] Btrfs: rework outstanding_extents
diff mbox

Message ID 1506714245-23072-2-git-send-email-jbacik@fb.com
State New
Headers show

Commit Message

Josef Bacik Sept. 29, 2017, 7:43 p.m. UTC
Right now we do a lot of weird hoops around outstanding_extents in order
to keep the extent count consistent.  This is because we logically
transfer the outstanding_extent count from the initial reservation
through the set_delalloc_bits.  This makes it pretty difficult to get a
handle on how and when we need to mess with outstanding_extents.

Fix this by revamping the rules of how we deal with outstanding_extents.
Now instead everybody that is holding on to a delalloc extent is
required to increase the outstanding extents count for itself.  This
means we'll have something like this

btrfs_dealloc_reserve_metadata	- outstanding_extents = 1
 btrfs_set_delalloc		- outstanding_extents = 2
btrfs_release_delalloc_extents	- outstanding_extents = 1

for an initial file write.  Now take the append write where we extend an
existing delalloc range but still under the maximum extent size

btrfs_delalloc_reserve_metadata - outstanding_extents = 2
  btrfs_set_delalloc
    btrfs_set_bit_hook		- outstanding_extents = 3
    btrfs_merge_bit_hook	- outstanding_extents = 2
btrfs_release_delalloc_extents	- outstanding_extnets = 1

In order to make the ordered extent transition we of course must now
make ordered extents carry their own outstanding_extent reservation, so
for cow_file_range we end up with

btrfs_add_ordered_extent	- outstanding_extents = 2
clear_extent_bit		- outstanding_extents = 1
btrfs_remove_ordered_extent	- outstanding_extents = 0

This makes all manipulations of outstanding_extents much more explicit.
Every successful call to btrfs_reserve_delalloc_metadata _must_ now be
combined with btrfs_release_delalloc_extents, even in the error case, as
that is the only function that actually modifies the
outstanding_extents counter.

The drawback to this is now we are much more likely to have transient
cases where outstanding_extents is much larger than it actually should
be.  This could happen before as we manipulated the delalloc bits, but
now it happens basically at every write.  This may put more pressure on
the ENOSPC flushing code, but I think making this code simpler is worth
the cost.  I have another change coming to mitigate this side-effect
somewhat.

I also added trace points for the counter manipulation.  These were used
by a bpf script I wrote to help track down leak issues.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 fs/btrfs/btrfs_inode.h       |  18 ++++++
 fs/btrfs/ctree.h             |   2 +
 fs/btrfs/extent-tree.c       | 139 ++++++++++++++++++++++++++++---------------
 fs/btrfs/file.c              |  22 +++----
 fs/btrfs/inode-map.c         |   3 +-
 fs/btrfs/inode.c             | 114 +++++++++++------------------------
 fs/btrfs/ioctl.c             |   2 +
 fs/btrfs/ordered-data.c      |  21 ++++++-
 fs/btrfs/relocation.c        |   3 +
 fs/btrfs/tests/inode-tests.c |  18 ++----
 10 files changed, 186 insertions(+), 156 deletions(-)

Comments

Nikolay Borisov Oct. 13, 2017, 8:39 a.m. UTC | #1
On 29.09.2017 22:43, Josef Bacik wrote:
>  
> +static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
> +						 int mod)
> +{
> +	ASSERT(spin_is_locked(&inode->lock));
> +	inode->outstanding_extents += mod;
> +	if (btrfs_is_free_space_inode(inode))
> +		return;
> +}
> +
> +static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode,
> +					      int mod)
> +{
> +	ASSERT(spin_is_locked(&inode->lock));

lockdep_assert_held(&inode->lock); for both functions. I've spoken with
Peterz and he said any other way of checking whether a lock is held, I
quote, "must die"

> +	inode->reserved_extents += mod;
> +	if (btrfs_is_free_space_inode(inode))
> +		return;
> +}
> +
>  static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
>  {
>  	int ret = 0;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index a7f68c304b4c..1262612fbf78 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -2742,6 +2742,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
>  				     u64 *qgroup_reserved, bool use_global_rsv);
>  void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
>  				      struct btrfs_block_rsv *rsv);
> +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
> +
>  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
>  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
>  int btrfs_delalloc_reserve_space(struct inode *inode,
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 1a6aced00a19..aa0f5c8953b0 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -5971,42 +5971,31 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
>  }
>  
>  /**
> - * drop_outstanding_extent - drop an outstanding extent
> + * drop_over_reserved_extents - drop our extra extent reservations
>   * @inode: the inode we're dropping the extent for
> - * @num_bytes: the number of bytes we're releasing.
>   *
> - * This is called when we are freeing up an outstanding extent, either called
> - * after an error or after an extent is written.  This will return the number of
> - * reserved extents that need to be freed.  This must be called with
> - * BTRFS_I(inode)->lock held.
> + * We reserve extents we may use, but they may have been merged with other
> + * extents and we may not need the extra reservation.
> + *
> + * We also call this when we've completed io to an extent or had an error and
> + * cleared the outstanding extent, in either case we no longer need our
> + * reservation and can drop the excess.
>   */
> -static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
> -		u64 num_bytes)
> +static unsigned drop_over_reserved_extents(struct btrfs_inode *inode)
>  {
> -	unsigned drop_inode_space = 0;
> -	unsigned dropped_extents = 0;
> -	unsigned num_extents;
> +	unsigned num_extents = 0;
>  
> -	num_extents = count_max_extents(num_bytes);
> -	ASSERT(num_extents);
> -	ASSERT(inode->outstanding_extents >= num_extents);
> -	inode->outstanding_extents -= num_extents;
> +	if (inode->reserved_extents > inode->outstanding_extents) {
> +		num_extents = inode->reserved_extents -
> +			inode->outstanding_extents;
> +		btrfs_mod_reserved_extents(inode, -num_extents);
> +	}
>  
>  	if (inode->outstanding_extents == 0 &&
>  	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
>  			       &inode->runtime_flags))
> -		drop_inode_space = 1;
> -
> -	/*
> -	 * If we have more or the same amount of outstanding extents than we have
> -	 * reserved then we need to leave the reserved extents count alone.
> -	 */
> -	if (inode->outstanding_extents >= inode->reserved_extents)
> -		return drop_inode_space;
> -
> -	dropped_extents = inode->reserved_extents - inode->outstanding_extents;
> -	inode->reserved_extents -= dropped_extents;
> -	return dropped_extents + drop_inode_space;
> +		num_extents++;
> +	return num_extents;

Something bugs me around the handling of this. In
btrfs_delalloc_reserve_metadata we do add the additional bytes necessary
for updating the inode. However outstanding_extents is modified with the
number of extent items necessary to cover the requires byte range but we
don't account the extra inode item as being an extent. Then why do we
have to actually increment the num_extents here? Doesn't this lead to
underflow?

To illustrate: A write comes for 1 mb, we count this as 1 outstanding
extent in delalloc_reserve_metadata:

nr_extents = count_max_extents(num_bytes);
inode->outstanding_extents += nr_extents;

We do account the extra inode item but only when calculating the bytes
to reserve:

to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
And we of course set : test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,

Now, when the time comes to free the outstanding extent and call :
drop_over_reserved_extents we do account the extra inode item as an
extent being freed. Is this correct? In any case it's not something
which is introduced by your patch but something which has been there
since time immemorial, just wondering?

>  }
>  
>  /**
> @@ -6061,13 +6050,15 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  	struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
>  	u64 to_reserve = 0;
>  	u64 csum_bytes;
> -	unsigned nr_extents;
> +	unsigned nr_extents, reserve_extents;
>  	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
>  	int ret = 0;
>  	bool delalloc_lock = true;
>  	u64 to_free = 0;
>  	unsigned dropped;
>  	bool release_extra = false;
> +	bool underflow = false;
> +	bool did_retry = false;
>  
>  	/* If we are a free space inode we need to not flush since we will be in
>  	 * the middle of a transaction commit.  We also don't need the delalloc
> @@ -6092,18 +6083,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  		mutex_lock(&inode->delalloc_mutex);
>  
>  	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
> -
> +retry:
>  	spin_lock(&inode->lock);
> -	nr_extents = count_max_extents(num_bytes);
> -	inode->outstanding_extents += nr_extents;
> +	reserve_extents = nr_extents = count_max_extents(num_bytes);
> +	btrfs_mod_outstanding_extents(inode, nr_extents);
>  
> -	nr_extents = 0;
> -	if (inode->outstanding_extents > inode->reserved_extents)
> -		nr_extents += inode->outstanding_extents -
> +	/*
> +	 * Because we add an outstanding extent for ordered before we clear
> +	 * delalloc we will double count our outstanding extents slightly.  This
> +	 * could mean that we transiently over-reserve, which could result in an
> +	 * early ENOSPC if our timing is unlucky.  Keep track of the case that
> +	 * we had a reservation underflow so we can retry if we fail.
> +	 *
> +	 * Keep in mind we can legitimately have more outstanding extents than
> +	 * reserved because of fragmentation, so only allow a retry once.
> +	 */
> +	if (inode->outstanding_extents >
> +	    inode->reserved_extents + nr_extents) {
> +		reserve_extents = inode->outstanding_extents -
>  			inode->reserved_extents;
> +		underflow = true;
> +	}
>  
>  	/* We always want to reserve a slot for updating the inode. */
> -	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
> +	to_reserve = btrfs_calc_trans_metadata_size(fs_info,
> +						    reserve_extents + 1);
>  	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
>  	csum_bytes = inode->csum_bytes;
>  	spin_unlock(&inode->lock);
> @@ -6128,7 +6132,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  		to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
>  		release_extra = true;
>  	}
> -	inode->reserved_extents += nr_extents;
> +	btrfs_mod_reserved_extents(inode, reserve_extents);
>  	spin_unlock(&inode->lock);
>  
>  	if (delalloc_lock)
> @@ -6144,7 +6148,10 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  
>  out_fail:
>  	spin_lock(&inode->lock);
> -	dropped = drop_outstanding_extent(inode, num_bytes);
> +	nr_extents = count_max_extents(num_bytes);

nr_extent isn't changed, so re-calculating it is redundant.

> +	btrfs_mod_outstanding_extents(inode, -nr_extents);
> +
> +	dropped = drop_over_reserved_extents(inode);
>  	/*
>  	 * If the inodes csum_bytes is the same as the original
>  	 * csum_bytes then we know we haven't raced with any free()ers
> @@ -6201,6 +6208,11 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  		trace_btrfs_space_reservation(fs_info, "delalloc",
>  					      btrfs_ino(inode), to_free, 0);
>  	}
> +	if (underflow && !did_retry) {
> +		did_retry = true;
> +		underflow = false;
> +		goto retry;
> +	}
>  	if (delalloc_lock)
>  		mutex_unlock(&inode->delalloc_mutex);
>  	return ret;
> @@ -6208,12 +6220,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  
>  /**
>   * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
> - * @inode: the inode to release the reservation for
> - * @num_bytes: the number of bytes we're releasing
> + * @inode: the inode to release the reservation for.
> + * @num_bytes: the number of bytes we are releasing.
>   *
>   * This will release the metadata reservation for an inode.  This can be called
>   * once we complete IO for a given set of bytes to release their metadata
> - * reservations.
> + * reservations, or on error for the same reason.
>   */
>  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  {
> @@ -6223,8 +6235,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  
>  	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
>  	spin_lock(&inode->lock);
> -	dropped = drop_outstanding_extent(inode, num_bytes);
> -
> +	dropped = drop_over_reserved_extents(inode);
>  	if (num_bytes)
>  		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
>  	spin_unlock(&inode->lock);
> @@ -6241,6 +6252,42 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  }
>  
>  /**
> + * btrfs_delalloc_release_extents - release our outstanding_extents
> + * @inode: the inode to balance the reservation for.
> + * @num_bytes: the number of bytes we originally reserved with
> + *
> + * When we reserve space we increase outstanding_extents for the extents we may
> + * add.  Once we've set the range as delalloc or created our ordered extents we
> + * have outstanding_extents to track the real usage, so we use this to free our
> + * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
> + * with btrfs_delalloc_reserve_metadata.
> + */
> +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
> +{
> +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
> +	unsigned num_extents;
> +	u64 to_free;
> +	unsigned dropped;
> +
> +	spin_lock(&inode->lock);
> +	num_extents = count_max_extents(num_bytes);
> +	btrfs_mod_outstanding_extents(inode, -num_extents);
> +	dropped = drop_over_reserved_extents(inode);
> +	spin_unlock(&inode->lock);
> +
> +	if (!dropped)
> +		return;
> +
> +	if (btrfs_is_testing(fs_info))
> +		return;
> +
> +	to_free = btrfs_calc_trans_metadata_size(fs_info, dropped);

So what's really happening here is that drop_over_reserved_extents
reallu returns the number of items (which can consists of extent items +
1 inode item). So perhaps the function should be renamed to
drop_over_reserved_items?

> +	trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
> +				      to_free, 0);
> +	btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
> +}
> +
> +/**
>   * btrfs_delalloc_reserve_space - reserve data and metadata space for
>   * delalloc
>   * @inode: inode we're writing to
> @@ -6284,10 +6331,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
>   * @inode: inode we're releasing space for
>   * @start: start position of the space already reserved
>   * @len: the len of the space already reserved
> - *
> - * This must be matched with a call to btrfs_delalloc_reserve_space.  This is
> - * called in the case that we don't need the metadata AND data reservations
> - * anymore.  So if there is an error or we insert an inline extent.
> + * @release_bytes: the len of the space we consumed or didn't use
>   *
>   * This function will release the metadata space that was not used and will
>   * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
> @@ -6295,7 +6339,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
>   * Also it will handle the qgroup reserved space.
>   */
>  void btrfs_delalloc_release_space(struct inode *inode,
> -			struct extent_changeset *reserved, u64 start, u64 len)
> +				  struct extent_changeset *reserved,
> +				  u64 start, u64 len)
>  {
>  	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
>  	btrfs_free_reserved_data_space(inode, reserved, start, len);


<omitted for brevity>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik Oct. 13, 2017, 1:10 p.m. UTC | #2
On Fri, Oct 13, 2017 at 11:39:15AM +0300, Nikolay Borisov wrote:
> 
> 
> On 29.09.2017 22:43, Josef Bacik wrote:
> >  
> > +static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
> > +						 int mod)
> > +{
> > +	ASSERT(spin_is_locked(&inode->lock));
> > +	inode->outstanding_extents += mod;
> > +	if (btrfs_is_free_space_inode(inode))
> > +		return;
> > +}
> > +
> > +static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode,
> > +					      int mod)
> > +{
> > +	ASSERT(spin_is_locked(&inode->lock));
> 
> lockdep_assert_held(&inode->lock); for both functions. I've spoken with
> Peterz and he said any other way of checking whether a lock is held, I
> quote, "must die"
> 

Blah I continually forget that's what we're supposed to use now.

> > +	inode->reserved_extents += mod;
> > +	if (btrfs_is_free_space_inode(inode))
> > +		return;
> > +}
> > +
> >  static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
> >  {
> >  	int ret = 0;
> > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > index a7f68c304b4c..1262612fbf78 100644
> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -2742,6 +2742,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
> >  				     u64 *qgroup_reserved, bool use_global_rsv);
> >  void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
> >  				      struct btrfs_block_rsv *rsv);
> > +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
> > +
> >  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
> >  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
> >  int btrfs_delalloc_reserve_space(struct inode *inode,
> > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> > index 1a6aced00a19..aa0f5c8953b0 100644
> > --- a/fs/btrfs/extent-tree.c
> > +++ b/fs/btrfs/extent-tree.c
> > @@ -5971,42 +5971,31 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
> >  }
> >  
> >  /**
> > - * drop_outstanding_extent - drop an outstanding extent
> > + * drop_over_reserved_extents - drop our extra extent reservations
> >   * @inode: the inode we're dropping the extent for
> > - * @num_bytes: the number of bytes we're releasing.
> >   *
> > - * This is called when we are freeing up an outstanding extent, either called
> > - * after an error or after an extent is written.  This will return the number of
> > - * reserved extents that need to be freed.  This must be called with
> > - * BTRFS_I(inode)->lock held.
> > + * We reserve extents we may use, but they may have been merged with other
> > + * extents and we may not need the extra reservation.
> > + *
> > + * We also call this when we've completed io to an extent or had an error and
> > + * cleared the outstanding extent, in either case we no longer need our
> > + * reservation and can drop the excess.
> >   */
> > -static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
> > -		u64 num_bytes)
> > +static unsigned drop_over_reserved_extents(struct btrfs_inode *inode)
> >  {
> > -	unsigned drop_inode_space = 0;
> > -	unsigned dropped_extents = 0;
> > -	unsigned num_extents;
> > +	unsigned num_extents = 0;
> >  
> > -	num_extents = count_max_extents(num_bytes);
> > -	ASSERT(num_extents);
> > -	ASSERT(inode->outstanding_extents >= num_extents);
> > -	inode->outstanding_extents -= num_extents;
> > +	if (inode->reserved_extents > inode->outstanding_extents) {
> > +		num_extents = inode->reserved_extents -
> > +			inode->outstanding_extents;
> > +		btrfs_mod_reserved_extents(inode, -num_extents);
> > +	}
> >  
> >  	if (inode->outstanding_extents == 0 &&
> >  	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
> >  			       &inode->runtime_flags))
> > -		drop_inode_space = 1;
> > -
> > -	/*
> > -	 * If we have more or the same amount of outstanding extents than we have
> > -	 * reserved then we need to leave the reserved extents count alone.
> > -	 */
> > -	if (inode->outstanding_extents >= inode->reserved_extents)
> > -		return drop_inode_space;
> > -
> > -	dropped_extents = inode->reserved_extents - inode->outstanding_extents;
> > -	inode->reserved_extents -= dropped_extents;
> > -	return dropped_extents + drop_inode_space;
> > +		num_extents++;
> > +	return num_extents;
> 
> Something bugs me around the handling of this. In
> btrfs_delalloc_reserve_metadata we do add the additional bytes necessary
> for updating the inode. However outstanding_extents is modified with the
> number of extent items necessary to cover the requires byte range but we
> don't account the extra inode item as being an extent. Then why do we
> have to actually increment the num_extents here? Doesn't this lead to
> underflow?
> 
> To illustrate: A write comes for 1 mb, we count this as 1 outstanding
> extent in delalloc_reserve_metadata:
> 
> nr_extents = count_max_extents(num_bytes);
> inode->outstanding_extents += nr_extents;
> 
> We do account the extra inode item but only when calculating the bytes
> to reserve:
> 
> to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
> And we of course set : test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
> 
> Now, when the time comes to free the outstanding extent and call :
> drop_over_reserved_extents we do account the extra inode item as an
> extent being freed. Is this correct? In any case it's not something
> which is introduced by your patch but something which has been there
> since time immemorial, just wondering?
> 

The outstanding_extents accounting is consistent with only the items needed to
handle the outstanding extent items.  However since changing the inode requires
updating the inode item as well we have to keep this floating reservation for
the inode item until we have 0 outstanding extents.  The way we do this is with
the BTRFS_INODE_DELALLOC_META_RESERVED flag.  So if it isn't set we will
allocate nr_exntents + 1 in btrfs_delalloc_reserve_metadata() and then set our
bit.  If we ever steal this reservation we make sure to clear the flag so we
know we don't have to clean it up when outstanding_extents goes to 0.  It's not
super intuitive but needs to be done under the BTRFS_I(inode)->lock so this was
the best place to put it.  I suppose we could move the logic out of here and put
it somewhere else to make it more clear.

> >  }
> >  
> >  /**
> > @@ -6061,13 +6050,15 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  	struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
> >  	u64 to_reserve = 0;
> >  	u64 csum_bytes;
> > -	unsigned nr_extents;
> > +	unsigned nr_extents, reserve_extents;
> >  	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
> >  	int ret = 0;
> >  	bool delalloc_lock = true;
> >  	u64 to_free = 0;
> >  	unsigned dropped;
> >  	bool release_extra = false;
> > +	bool underflow = false;
> > +	bool did_retry = false;
> >  
> >  	/* If we are a free space inode we need to not flush since we will be in
> >  	 * the middle of a transaction commit.  We also don't need the delalloc
> > @@ -6092,18 +6083,31 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  		mutex_lock(&inode->delalloc_mutex);
> >  
> >  	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
> > -
> > +retry:
> >  	spin_lock(&inode->lock);
> > -	nr_extents = count_max_extents(num_bytes);
> > -	inode->outstanding_extents += nr_extents;
> > +	reserve_extents = nr_extents = count_max_extents(num_bytes);
> > +	btrfs_mod_outstanding_extents(inode, nr_extents);
> >  
> > -	nr_extents = 0;
> > -	if (inode->outstanding_extents > inode->reserved_extents)
> > -		nr_extents += inode->outstanding_extents -
> > +	/*
> > +	 * Because we add an outstanding extent for ordered before we clear
> > +	 * delalloc we will double count our outstanding extents slightly.  This
> > +	 * could mean that we transiently over-reserve, which could result in an
> > +	 * early ENOSPC if our timing is unlucky.  Keep track of the case that
> > +	 * we had a reservation underflow so we can retry if we fail.
> > +	 *
> > +	 * Keep in mind we can legitimately have more outstanding extents than
> > +	 * reserved because of fragmentation, so only allow a retry once.
> > +	 */
> > +	if (inode->outstanding_extents >
> > +	    inode->reserved_extents + nr_extents) {
> > +		reserve_extents = inode->outstanding_extents -
> >  			inode->reserved_extents;
> > +		underflow = true;
> > +	}
> >  
> >  	/* We always want to reserve a slot for updating the inode. */
> > -	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
> > +	to_reserve = btrfs_calc_trans_metadata_size(fs_info,
> > +						    reserve_extents + 1);
> >  	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
> >  	csum_bytes = inode->csum_bytes;
> >  	spin_unlock(&inode->lock);
> > @@ -6128,7 +6132,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  		to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
> >  		release_extra = true;
> >  	}
> > -	inode->reserved_extents += nr_extents;
> > +	btrfs_mod_reserved_extents(inode, reserve_extents);
> >  	spin_unlock(&inode->lock);
> >  
> >  	if (delalloc_lock)
> > @@ -6144,7 +6148,10 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  
> >  out_fail:
> >  	spin_lock(&inode->lock);
> > -	dropped = drop_outstanding_extent(inode, num_bytes);
> > +	nr_extents = count_max_extents(num_bytes);
> 
> nr_extent isn't changed, so re-calculating it is redundant.
> 
> > +	btrfs_mod_outstanding_extents(inode, -nr_extents);
> > +
> > +	dropped = drop_over_reserved_extents(inode);
> >  	/*
> >  	 * If the inodes csum_bytes is the same as the original
> >  	 * csum_bytes then we know we haven't raced with any free()ers
> > @@ -6201,6 +6208,11 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  		trace_btrfs_space_reservation(fs_info, "delalloc",
> >  					      btrfs_ino(inode), to_free, 0);
> >  	}
> > +	if (underflow && !did_retry) {
> > +		did_retry = true;
> > +		underflow = false;
> > +		goto retry;
> > +	}
> >  	if (delalloc_lock)
> >  		mutex_unlock(&inode->delalloc_mutex);
> >  	return ret;
> > @@ -6208,12 +6220,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  
> >  /**
> >   * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
> > - * @inode: the inode to release the reservation for
> > - * @num_bytes: the number of bytes we're releasing
> > + * @inode: the inode to release the reservation for.
> > + * @num_bytes: the number of bytes we are releasing.
> >   *
> >   * This will release the metadata reservation for an inode.  This can be called
> >   * once we complete IO for a given set of bytes to release their metadata
> > - * reservations.
> > + * reservations, or on error for the same reason.
> >   */
> >  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  {
> > @@ -6223,8 +6235,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  
> >  	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
> >  	spin_lock(&inode->lock);
> > -	dropped = drop_outstanding_extent(inode, num_bytes);
> > -
> > +	dropped = drop_over_reserved_extents(inode);
> >  	if (num_bytes)
> >  		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
> >  	spin_unlock(&inode->lock);
> > @@ -6241,6 +6252,42 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
> >  }
> >  
> >  /**
> > + * btrfs_delalloc_release_extents - release our outstanding_extents
> > + * @inode: the inode to balance the reservation for.
> > + * @num_bytes: the number of bytes we originally reserved with
> > + *
> > + * When we reserve space we increase outstanding_extents for the extents we may
> > + * add.  Once we've set the range as delalloc or created our ordered extents we
> > + * have outstanding_extents to track the real usage, so we use this to free our
> > + * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
> > + * with btrfs_delalloc_reserve_metadata.
> > + */
> > +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
> > +{
> > +	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
> > +	unsigned num_extents;
> > +	u64 to_free;
> > +	unsigned dropped;
> > +
> > +	spin_lock(&inode->lock);
> > +	num_extents = count_max_extents(num_bytes);
> > +	btrfs_mod_outstanding_extents(inode, -num_extents);
> > +	dropped = drop_over_reserved_extents(inode);
> > +	spin_unlock(&inode->lock);
> > +
> > +	if (!dropped)
> > +		return;
> > +
> > +	if (btrfs_is_testing(fs_info))
> > +		return;
> > +
> > +	to_free = btrfs_calc_trans_metadata_size(fs_info, dropped);
> 
> So what's really happening here is that drop_over_reserved_extents
> reallu returns the number of items (which can consists of extent items +
> 1 inode item). So perhaps the function should be renamed to
> drop_over_reserved_items?
> 

I'll just move the cleaning up of the inode item reservation out of the helper.
Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Oct. 13, 2017, 1:33 p.m. UTC | #3
On Fri, Oct 13, 2017 at 09:10:52AM -0400, Josef Bacik wrote:
> On Fri, Oct 13, 2017 at 11:39:15AM +0300, Nikolay Borisov wrote:
> > 
> > 
> > On 29.09.2017 22:43, Josef Bacik wrote:
> > >  
> > > +static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
> > > +						 int mod)
> > > +{
> > > +	ASSERT(spin_is_locked(&inode->lock));
> > > +	inode->outstanding_extents += mod;
> > > +	if (btrfs_is_free_space_inode(inode))
> > > +		return;
> > > +}
> > > +
> > > +static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode,
> > > +					      int mod)
> > > +{
> > > +	ASSERT(spin_is_locked(&inode->lock));
> > 
> > lockdep_assert_held(&inode->lock); for both functions. I've spoken with
> > Peterz and he said any other way of checking whether a lock is held, I
> > quote, "must die"
> > 
> 
> Blah I continually forget that's what we're supposed to use now.

I try to keep such information on
https://btrfs.wiki.kernel.org/index.php/Development_notes#BCP

Just started the section with best practices, feel free to add more.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nikolay Borisov Oct. 13, 2017, 1:55 p.m. UTC | #4
> 
> The outstanding_extents accounting is consistent with only the items needed to
> handle the outstanding extent items.  However since changing the inode requires
> updating the inode item as well we have to keep this floating reservation for
> the inode item until we have 0 outstanding extents.  The way we do this is with
> the BTRFS_INODE_DELALLOC_META_RESERVED flag.  So if it isn't set we will
> allocate nr_exntents + 1 in btrfs_delalloc_reserve_metadata() and then set our
> bit.  If we ever steal this reservation we make sure to clear the flag so we
> know we don't have to clean it up when outstanding_extents goes to 0.  It's not
> super intuitive but needs to be done under the BTRFS_I(inode)->lock so this was
> the best place to put it.  I suppose we could move the logic out of here and put
> it somewhere else to make it more clear.

I think defining this logic in its own, discrete block of code would be
best w.r.t readibility. It's not super obvious.


I'm slowly going through your patchkit so expect more question but
otherwise the delalloc stuff after this and patch 03 really start
looking a lot more obvious !


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Edmund Nadolski Oct. 19, 2017, 3:14 a.m. UTC | #5
just a few quick things for the changelog:

On 09/29/2017 01:43 PM, Josef Bacik wrote:
> Right now we do a lot of weird hoops around outstanding_extents in order
> to keep the extent count consistent.  This is because we logically
> transfer the outstanding_extent count from the initial reservation
> through the set_delalloc_bits.  This makes it pretty difficult to get a
> handle on how and when we need to mess with outstanding_extents.
> 
> Fix this by revamping the rules of how we deal with outstanding_extents.
> Now instead everybody that is holding on to a delalloc extent is
> required to increase the outstanding extents count for itself.  This
> means we'll have something like this
> 
> btrfs_dealloc_reserve_metadata	- outstanding_extents = 1

s/dealloc/delalloc/


>  btrfs_set_delalloc		- outstanding_extents = 2

should be btrfs_set_extent_delalloc?


> btrfs_release_delalloc_extents	- outstanding_extents = 1
> 
> for an initial file write.  Now take the append write where we extend an
> existing delalloc range but still under the maximum extent size
> 
> btrfs_delalloc_reserve_metadata - outstanding_extents = 2
>   btrfs_set_delalloc

btrfs_set_extent_delalloc?


>     btrfs_set_bit_hook		- outstanding_extents = 3
>     btrfs_merge_bit_hook	- outstanding_extents = 2

should be btrfs_clear_bit_hook? (or btrfs_merge_extent_hook?)


> btrfs_release_delalloc_extents	- outstanding_extnets = 1

btrfs_delalloc_release_metadata?


> 
> In order to make the ordered extent transition we of course must now
> make ordered extents carry their own outstanding_extent reservation, so
> for cow_file_range we end up with
> 
> btrfs_add_ordered_extent	- outstanding_extents = 2
> clear_extent_bit		- outstanding_extents = 1
> btrfs_remove_ordered_extent	- outstanding_extents = 0
> 
> This makes all manipulations of outstanding_extents much more explicit.
> Every successful call to btrfs_reserve_delalloc_metadata _must_ now be
                           ^
btrfs_delalloc_reserve_metadata?


Thanks,
Ed

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik Oct. 19, 2017, 6:10 p.m. UTC | #6
On Fri, Oct 13, 2017 at 04:55:58PM +0300, Nikolay Borisov wrote:
> 
> > 
> > The outstanding_extents accounting is consistent with only the items needed to
> > handle the outstanding extent items.  However since changing the inode requires
> > updating the inode item as well we have to keep this floating reservation for
> > the inode item until we have 0 outstanding extents.  The way we do this is with
> > the BTRFS_INODE_DELALLOC_META_RESERVED flag.  So if it isn't set we will
> > allocate nr_exntents + 1 in btrfs_delalloc_reserve_metadata() and then set our
> > bit.  If we ever steal this reservation we make sure to clear the flag so we
> > know we don't have to clean it up when outstanding_extents goes to 0.  It's not
> > super intuitive but needs to be done under the BTRFS_I(inode)->lock so this was
> > the best place to put it.  I suppose we could move the logic out of here and put
> > it somewhere else to make it more clear.
> 
> I think defining this logic in its own, discrete block of code would be
> best w.r.t readibility. It's not super obvious.
> 

I went to do this and realized that I rip all of this out when we switch to
per-inode block rsvs, so I'm just going to leave this patch as is.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index eccadb5f62a5..a6a22ef41f91 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -267,6 +267,24 @@  static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
 	return false;
 }
 
+static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
+						 int mod)
+{
+	ASSERT(spin_is_locked(&inode->lock));
+	inode->outstanding_extents += mod;
+	if (btrfs_is_free_space_inode(inode))
+		return;
+}
+
+static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode,
+					      int mod)
+{
+	ASSERT(spin_is_locked(&inode->lock));
+	inode->reserved_extents += mod;
+	if (btrfs_is_free_space_inode(inode))
+		return;
+}
+
 static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
 {
 	int ret = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a7f68c304b4c..1262612fbf78 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2742,6 +2742,8 @@  int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
+
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1a6aced00a19..aa0f5c8953b0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5971,42 +5971,31 @@  void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 }
 
 /**
- * drop_outstanding_extent - drop an outstanding extent
+ * drop_over_reserved_extents - drop our extra extent reservations
  * @inode: the inode we're dropping the extent for
- * @num_bytes: the number of bytes we're releasing.
  *
- * This is called when we are freeing up an outstanding extent, either called
- * after an error or after an extent is written.  This will return the number of
- * reserved extents that need to be freed.  This must be called with
- * BTRFS_I(inode)->lock held.
+ * We reserve extents we may use, but they may have been merged with other
+ * extents and we may not need the extra reservation.
+ *
+ * We also call this when we've completed io to an extent or had an error and
+ * cleared the outstanding extent, in either case we no longer need our
+ * reservation and can drop the excess.
  */
-static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
-		u64 num_bytes)
+static unsigned drop_over_reserved_extents(struct btrfs_inode *inode)
 {
-	unsigned drop_inode_space = 0;
-	unsigned dropped_extents = 0;
-	unsigned num_extents;
+	unsigned num_extents = 0;
 
-	num_extents = count_max_extents(num_bytes);
-	ASSERT(num_extents);
-	ASSERT(inode->outstanding_extents >= num_extents);
-	inode->outstanding_extents -= num_extents;
+	if (inode->reserved_extents > inode->outstanding_extents) {
+		num_extents = inode->reserved_extents -
+			inode->outstanding_extents;
+		btrfs_mod_reserved_extents(inode, -num_extents);
+	}
 
 	if (inode->outstanding_extents == 0 &&
 	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
 			       &inode->runtime_flags))
-		drop_inode_space = 1;
-
-	/*
-	 * If we have more or the same amount of outstanding extents than we have
-	 * reserved then we need to leave the reserved extents count alone.
-	 */
-	if (inode->outstanding_extents >= inode->reserved_extents)
-		return drop_inode_space;
-
-	dropped_extents = inode->reserved_extents - inode->outstanding_extents;
-	inode->reserved_extents -= dropped_extents;
-	return dropped_extents + drop_inode_space;
+		num_extents++;
+	return num_extents;
 }
 
 /**
@@ -6061,13 +6050,15 @@  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
 	u64 to_reserve = 0;
 	u64 csum_bytes;
-	unsigned nr_extents;
+	unsigned nr_extents, reserve_extents;
 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
 	int ret = 0;
 	bool delalloc_lock = true;
 	u64 to_free = 0;
 	unsigned dropped;
 	bool release_extra = false;
+	bool underflow = false;
+	bool did_retry = false;
 
 	/* If we are a free space inode we need to not flush since we will be in
 	 * the middle of a transaction commit.  We also don't need the delalloc
@@ -6092,18 +6083,31 @@  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 		mutex_lock(&inode->delalloc_mutex);
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-
+retry:
 	spin_lock(&inode->lock);
-	nr_extents = count_max_extents(num_bytes);
-	inode->outstanding_extents += nr_extents;
+	reserve_extents = nr_extents = count_max_extents(num_bytes);
+	btrfs_mod_outstanding_extents(inode, nr_extents);
 
-	nr_extents = 0;
-	if (inode->outstanding_extents > inode->reserved_extents)
-		nr_extents += inode->outstanding_extents -
+	/*
+	 * Because we add an outstanding extent for ordered before we clear
+	 * delalloc we will double count our outstanding extents slightly.  This
+	 * could mean that we transiently over-reserve, which could result in an
+	 * early ENOSPC if our timing is unlucky.  Keep track of the case that
+	 * we had a reservation underflow so we can retry if we fail.
+	 *
+	 * Keep in mind we can legitimately have more outstanding extents than
+	 * reserved because of fragmentation, so only allow a retry once.
+	 */
+	if (inode->outstanding_extents >
+	    inode->reserved_extents + nr_extents) {
+		reserve_extents = inode->outstanding_extents -
 			inode->reserved_extents;
+		underflow = true;
+	}
 
 	/* We always want to reserve a slot for updating the inode. */
-	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
+	to_reserve = btrfs_calc_trans_metadata_size(fs_info,
+						    reserve_extents + 1);
 	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
 	csum_bytes = inode->csum_bytes;
 	spin_unlock(&inode->lock);
@@ -6128,7 +6132,7 @@  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 		to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
 		release_extra = true;
 	}
-	inode->reserved_extents += nr_extents;
+	btrfs_mod_reserved_extents(inode, reserve_extents);
 	spin_unlock(&inode->lock);
 
 	if (delalloc_lock)
@@ -6144,7 +6148,10 @@  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
 out_fail:
 	spin_lock(&inode->lock);
-	dropped = drop_outstanding_extent(inode, num_bytes);
+	nr_extents = count_max_extents(num_bytes);
+	btrfs_mod_outstanding_extents(inode, -nr_extents);
+
+	dropped = drop_over_reserved_extents(inode);
 	/*
 	 * If the inodes csum_bytes is the same as the original
 	 * csum_bytes then we know we haven't raced with any free()ers
@@ -6201,6 +6208,11 @@  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 		trace_btrfs_space_reservation(fs_info, "delalloc",
 					      btrfs_ino(inode), to_free, 0);
 	}
+	if (underflow && !did_retry) {
+		did_retry = true;
+		underflow = false;
+		goto retry;
+	}
 	if (delalloc_lock)
 		mutex_unlock(&inode->delalloc_mutex);
 	return ret;
@@ -6208,12 +6220,12 @@  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
 /**
  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for
- * @num_bytes: the number of bytes we're releasing
+ * @inode: the inode to release the reservation for.
+ * @num_bytes: the number of bytes we are releasing.
  *
  * This will release the metadata reservation for an inode.  This can be called
  * once we complete IO for a given set of bytes to release their metadata
- * reservations.
+ * reservations, or on error for the same reason.
  */
 void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
@@ -6223,8 +6235,7 @@  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 	spin_lock(&inode->lock);
-	dropped = drop_outstanding_extent(inode, num_bytes);
-
+	dropped = drop_over_reserved_extents(inode);
 	if (num_bytes)
 		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
 	spin_unlock(&inode->lock);
@@ -6241,6 +6252,42 @@  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 }
 
 /**
+ * btrfs_delalloc_release_extents - release our outstanding_extents
+ * @inode: the inode to balance the reservation for.
+ * @num_bytes: the number of bytes we originally reserved with
+ *
+ * When we reserve space we increase outstanding_extents for the extents we may
+ * add.  Once we've set the range as delalloc or created our ordered extents we
+ * have outstanding_extents to track the real usage, so we use this to free our
+ * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
+ * with btrfs_delalloc_reserve_metadata.
+ */
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	unsigned num_extents;
+	u64 to_free;
+	unsigned dropped;
+
+	spin_lock(&inode->lock);
+	num_extents = count_max_extents(num_bytes);
+	btrfs_mod_outstanding_extents(inode, -num_extents);
+	dropped = drop_over_reserved_extents(inode);
+	spin_unlock(&inode->lock);
+
+	if (!dropped)
+		return;
+
+	if (btrfs_is_testing(fs_info))
+		return;
+
+	to_free = btrfs_calc_trans_metadata_size(fs_info, dropped);
+	trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
+				      to_free, 0);
+	btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
+}
+
+/**
  * btrfs_delalloc_reserve_space - reserve data and metadata space for
  * delalloc
  * @inode: inode we're writing to
@@ -6284,10 +6331,7 @@  int btrfs_delalloc_reserve_space(struct inode *inode,
  * @inode: inode we're releasing space for
  * @start: start position of the space already reserved
  * @len: the len of the space already reserved
- *
- * This must be matched with a call to btrfs_delalloc_reserve_space.  This is
- * called in the case that we don't need the metadata AND data reservations
- * anymore.  So if there is an error or we insert an inline extent.
+ * @release_bytes: the len of the space we consumed or didn't use
  *
  * This function will release the metadata space that was not used and will
  * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
@@ -6295,7 +6339,8 @@  int btrfs_delalloc_reserve_space(struct inode *inode,
  * Also it will handle the qgroup reserved space.
  */
 void btrfs_delalloc_release_space(struct inode *inode,
-			struct extent_changeset *reserved, u64 start, u64 len)
+				  struct extent_changeset *reserved,
+				  u64 start, u64 len)
 {
 	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
 	btrfs_free_reserved_data_space(inode, reserved, start, len);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index aafcc785f840..2a7f1b27149b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1656,6 +1656,7 @@  static noinline ssize_t __btrfs_buffered_write(struct file *file,
 			}
 		}
 
+		WARN_ON(reserve_bytes == 0);
 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
 				reserve_bytes);
 		if (ret) {
@@ -1679,8 +1680,11 @@  static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		ret = prepare_pages(inode, pages, num_pages,
 				    pos, write_bytes,
 				    force_page_uptodate);
-		if (ret)
+		if (ret) {
+			btrfs_delalloc_release_extents(BTRFS_I(inode),
+						       reserve_bytes);
 			break;
+		}
 
 		ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
 				num_pages, pos, write_bytes, &lockstart,
@@ -1688,6 +1692,8 @@  static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		if (ret < 0) {
 			if (ret == -EAGAIN)
 				goto again;
+			btrfs_delalloc_release_extents(BTRFS_I(inode),
+						       reserve_bytes);
 			break;
 		} else if (ret > 0) {
 			need_unlock = true;
@@ -1718,23 +1724,10 @@  static noinline ssize_t __btrfs_buffered_write(struct file *file,
 						   PAGE_SIZE);
 		}
 
-		/*
-		 * If we had a short copy we need to release the excess delaloc
-		 * bytes we reserved.  We need to increment outstanding_extents
-		 * because btrfs_delalloc_release_space and
-		 * btrfs_delalloc_release_metadata will decrement it, but
-		 * we still have an outstanding extent for the chunk we actually
-		 * managed to copy.
-		 */
 		if (num_sectors > dirty_sectors) {
 			/* release everything except the sectors we dirtied */
 			release_bytes -= dirty_sectors <<
 						fs_info->sb->s_blocksize_bits;
-			if (copied > 0) {
-				spin_lock(&BTRFS_I(inode)->lock);
-				BTRFS_I(inode)->outstanding_extents++;
-				spin_unlock(&BTRFS_I(inode)->lock);
-			}
 			if (only_release_metadata) {
 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 								release_bytes);
@@ -1760,6 +1753,7 @@  static noinline ssize_t __btrfs_buffered_write(struct file *file,
 			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
 					     lockstart, lockend, &cached_state,
 					     GFP_NOFS);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
 		if (ret) {
 			btrfs_drop_pages(pages, num_pages);
 			break;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index d02019747d00..022b19336fee 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -500,11 +500,12 @@  int btrfs_save_ino_cache(struct btrfs_root *root,
 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 					      prealloc, prealloc, &alloc_hint);
 	if (ret) {
-		btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
 		goto out_put;
 	}
 
 	ret = btrfs_write_out_ino_cache(root, trans, path, inode);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
 out_put:
 	iput(inode);
 out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b728397ba6e1..33ba258815b2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -67,7 +67,6 @@  struct btrfs_iget_args {
 };
 
 struct btrfs_dio_data {
-	u64 outstanding_extents;
 	u64 reserve;
 	u64 unsubmitted_oe_range_start;
 	u64 unsubmitted_oe_range_end;
@@ -348,7 +347,6 @@  static noinline int cow_file_range_inline(struct btrfs_root *root,
 	}
 
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
-	btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
 	btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
 out:
 	/*
@@ -587,16 +585,21 @@  static noinline void compress_file_range(struct inode *inode,
 		}
 		if (ret <= 0) {
 			unsigned long clear_flags = EXTENT_DELALLOC |
-				EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
+				EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+				EXTENT_DO_ACCOUNTING;
 			unsigned long page_error_op;
 
-			clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
 			page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
 
 			/*
 			 * inline extent creation worked or returned error,
 			 * we don't need to create any more async work items.
 			 * Unlock and free up our temp pages.
+			 *
+			 * We use DO_ACCOUNTING here because we need the
+			 * delalloc_release_metadata to be done _after_ we drop
+			 * our outstanding extent for clearing delalloc for this
+			 * range.
 			 */
 			extent_clear_unlock_delalloc(inode, start, end, end,
 						     NULL, clear_flags,
@@ -605,10 +608,6 @@  static noinline void compress_file_range(struct inode *inode,
 						     PAGE_SET_WRITEBACK |
 						     page_error_op |
 						     PAGE_END_WRITEBACK);
-			if (ret == 0)
-				btrfs_free_reserved_data_space_noquota(inode,
-							       start,
-							       end - start + 1);
 			goto free_pages_out;
 		}
 	}
@@ -985,15 +984,19 @@  static noinline int cow_file_range(struct inode *inode,
 		ret = cow_file_range_inline(root, inode, start, end, 0,
 					BTRFS_COMPRESS_NONE, NULL);
 		if (ret == 0) {
+			/*
+			 * We use DO_ACCOUNTING here because we need the
+			 * delalloc_release_metadata to be run _after_ we drop
+			 * our outstanding extent for clearing delalloc for this
+			 * range.
+			 */
 			extent_clear_unlock_delalloc(inode, start, end,
 				     delalloc_end, NULL,
 				     EXTENT_LOCKED | EXTENT_DELALLOC |
-				     EXTENT_DELALLOC_NEW |
-				     EXTENT_DEFRAG, PAGE_UNLOCK |
+				     EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
+				     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
 				     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
 				     PAGE_END_WRITEBACK);
-			btrfs_free_reserved_data_space_noquota(inode, start,
-						end - start + 1);
 			*nr_written = *nr_written +
 			     (end - start + PAGE_SIZE) / PAGE_SIZE;
 			*page_started = 1;
@@ -1631,7 +1634,7 @@  static void btrfs_split_extent_hook(void *private_data,
 	}
 
 	spin_lock(&BTRFS_I(inode)->lock);
-	BTRFS_I(inode)->outstanding_extents++;
+	btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
 	spin_unlock(&BTRFS_I(inode)->lock);
 }
 
@@ -1661,7 +1664,7 @@  static void btrfs_merge_extent_hook(void *private_data,
 	/* we're not bigger than the max, unreserve the space and go */
 	if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
 		spin_lock(&BTRFS_I(inode)->lock);
-		BTRFS_I(inode)->outstanding_extents--;
+		btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
 		spin_unlock(&BTRFS_I(inode)->lock);
 		return;
 	}
@@ -1692,7 +1695,7 @@  static void btrfs_merge_extent_hook(void *private_data,
 		return;
 
 	spin_lock(&BTRFS_I(inode)->lock);
-	BTRFS_I(inode)->outstanding_extents--;
+	btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
 	spin_unlock(&BTRFS_I(inode)->lock);
 }
 
@@ -1762,15 +1765,12 @@  static void btrfs_set_bit_hook(void *private_data,
 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
+		u32 num_extents = count_max_extents(len);
 		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
 
-		if (*bits & EXTENT_FIRST_DELALLOC) {
-			*bits &= ~EXTENT_FIRST_DELALLOC;
-		} else {
-			spin_lock(&BTRFS_I(inode)->lock);
-			BTRFS_I(inode)->outstanding_extents++;
-			spin_unlock(&BTRFS_I(inode)->lock);
-		}
+		spin_lock(&BTRFS_I(inode)->lock);
+		btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
+		spin_unlock(&BTRFS_I(inode)->lock);
 
 		/* For sanity tests */
 		if (btrfs_is_testing(fs_info))
@@ -1824,13 +1824,9 @@  static void btrfs_clear_bit_hook(void *private_data,
 		struct btrfs_root *root = inode->root;
 		bool do_list = !btrfs_is_free_space_inode(inode);
 
-		if (*bits & EXTENT_FIRST_DELALLOC) {
-			*bits &= ~EXTENT_FIRST_DELALLOC;
-		} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
-			spin_lock(&inode->lock);
-			inode->outstanding_extents -= num_extents;
-			spin_unlock(&inode->lock);
-		}
+		spin_lock(&inode->lock);
+		btrfs_mod_outstanding_extents(inode, -num_extents);
+		spin_unlock(&inode->lock);
 
 		/*
 		 * We don't reserve metadata space for space cache inodes so we
@@ -2101,6 +2097,7 @@  static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 				  0);
 	ClearPageChecked(page);
 	set_page_dirty(page);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 out:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			     &cached_state, GFP_NOFS);
@@ -3054,9 +3051,6 @@  static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 				 0, &cached_state, GFP_NOFS);
 	}
 
-	if (root != fs_info->tree_root)
-		btrfs_delalloc_release_metadata(BTRFS_I(inode),
-				ordered_extent->len);
 	if (trans)
 		btrfs_end_transaction(trans);
 
@@ -4797,8 +4791,11 @@  int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	    (!len || ((len & (blocksize - 1)) == 0)))
 		goto out;
 
+	block_start = round_down(from, blocksize);
+	block_end = block_start + blocksize - 1;
+
 	ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
-			round_down(from, blocksize), blocksize);
+					   block_start, blocksize);
 	if (ret)
 		goto out;
 
@@ -4806,15 +4803,12 @@  int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	page = find_or_create_page(mapping, index, mask);
 	if (!page) {
 		btrfs_delalloc_release_space(inode, data_reserved,
-				round_down(from, blocksize),
-				blocksize);
+					     block_start, blocksize);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	block_start = round_down(from, blocksize);
-	block_end = block_start + blocksize - 1;
-
 	if (!PageUptodate(page)) {
 		ret = btrfs_readpage(NULL, page);
 		lock_page(page);
@@ -4879,6 +4873,7 @@  int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	if (ret)
 		btrfs_delalloc_release_space(inode, data_reserved, block_start,
 					     blocksize);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
 	unlock_page(page);
 	put_page(page);
 out:
@@ -7793,33 +7788,6 @@  static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 	return em;
 }
 
-static void adjust_dio_outstanding_extents(struct inode *inode,
-					   struct btrfs_dio_data *dio_data,
-					   const u64 len)
-{
-	unsigned num_extents = count_max_extents(len);
-
-	/*
-	 * If we have an outstanding_extents count still set then we're
-	 * within our reservation, otherwise we need to adjust our inode
-	 * counter appropriately.
-	 */
-	if (dio_data->outstanding_extents >= num_extents) {
-		dio_data->outstanding_extents -= num_extents;
-	} else {
-		/*
-		 * If dio write length has been split due to no large enough
-		 * contiguous space, we need to compensate our inode counter
-		 * appropriately.
-		 */
-		u64 num_needed = num_extents - dio_data->outstanding_extents;
-
-		spin_lock(&BTRFS_I(inode)->lock);
-		BTRFS_I(inode)->outstanding_extents += num_needed;
-		spin_unlock(&BTRFS_I(inode)->lock);
-	}
-}
-
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
@@ -7981,7 +7949,6 @@  static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 		if (!dio_data->overwrite && start + len > i_size_read(inode))
 			i_size_write(inode, start + len);
 
-		adjust_dio_outstanding_extents(inode, dio_data, len);
 		WARN_ON(dio_data->reserve < len);
 		dio_data->reserve -= len;
 		dio_data->unsubmitted_oe_range_end = start + len;
@@ -8011,14 +7978,6 @@  static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 err:
 	if (dio_data)
 		current->journal_info = dio_data;
-	/*
-	 * Compensate the delalloc release we do in btrfs_direct_IO() when we
-	 * write less data then expected, so that we don't underflow our inode's
-	 * outstanding extents counter.
-	 */
-	if (create && dio_data)
-		adjust_dio_outstanding_extents(inode, dio_data, len);
-
 	return ret;
 }
 
@@ -8863,7 +8822,6 @@  static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 						   offset, count);
 		if (ret)
 			goto out;
-		dio_data.outstanding_extents = count_max_extents(count);
 
 		/*
 		 * We need to know how many extents we reserved so that we can
@@ -8890,6 +8848,7 @@  static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (iov_iter_rw(iter) == WRITE) {
 		up_read(&BTRFS_I(inode)->dio_sem);
 		current->journal_info = NULL;
+		btrfs_delalloc_release_extents(BTRFS_I(inode), count);
 		if (ret < 0 && ret != -EIOCBQUEUED) {
 			if (dio_data.reserve)
 				btrfs_delalloc_release_space(inode, data_reserved,
@@ -9227,9 +9186,6 @@  int btrfs_page_mkwrite(struct vm_fault *vmf)
 					  fs_info->sectorsize);
 		if (reserved_space < PAGE_SIZE) {
 			end = page_start + reserved_space - 1;
-			spin_lock(&BTRFS_I(inode)->lock);
-			BTRFS_I(inode)->outstanding_extents++;
-			spin_unlock(&BTRFS_I(inode)->lock);
 			btrfs_delalloc_release_space(inode, data_reserved,
 					page_start, PAGE_SIZE - reserved_space);
 		}
@@ -9281,12 +9237,14 @@  int btrfs_page_mkwrite(struct vm_fault *vmf)
 
 out_unlock:
 	if (!ret) {
+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 		sb_end_pagefault(inode->i_sb);
 		extent_changeset_free(data_reserved);
 		return VM_FAULT_LOCKED;
 	}
 	unlock_page(page);
 out:
+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 	btrfs_delalloc_release_space(inode, data_reserved, page_start,
 				     reserved_space);
 out_noreserve:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 31407c62da63..17059aa5564f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1204,6 +1204,7 @@  static int cluster_pages_for_defrag(struct inode *inode,
 		unlock_page(pages[i]);
 		put_page(pages[i]);
 	}
+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
 	extent_changeset_free(data_reserved);
 	return i_done;
 out:
@@ -1214,6 +1215,7 @@  static int cluster_pages_for_defrag(struct inode *inode,
 	btrfs_delalloc_release_space(inode, data_reserved,
 			start_index << PAGE_SHIFT,
 			page_cnt << PAGE_SHIFT);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
 	extent_changeset_free(data_reserved);
 	return ret;
 
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a3aca495e33e..5b311aeddcc8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -242,6 +242,15 @@  static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 	}
 	spin_unlock(&root->ordered_extent_lock);
 
+	/*
+	 * We don't need the count_max_extents here, we can assume that all of
+	 * that work has been done at higher layers, so this is truly the
+	 * smallest the extent is going to get.
+	 */
+	spin_lock(&BTRFS_I(inode)->lock);
+	btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
+	spin_unlock(&BTRFS_I(inode)->lock);
+
 	return 0;
 }
 
@@ -591,11 +600,19 @@  void btrfs_remove_ordered_extent(struct inode *inode,
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ordered_inode_tree *tree;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+	struct btrfs_root *root = btrfs_inode->root;
 	struct rb_node *node;
 	bool dec_pending_ordered = false;
 
-	tree = &BTRFS_I(inode)->ordered_tree;
+	/* This is paired with btrfs_add_ordered_extent. */
+	spin_lock(&btrfs_inode->lock);
+	btrfs_mod_outstanding_extents(btrfs_inode, -1);
+	spin_unlock(&btrfs_inode->lock);
+	if (root != fs_info->tree_root)
+		btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
+
+	tree = &btrfs_inode->ordered_tree;
 	spin_lock_irq(&tree->lock);
 	node = &entry->rb_node;
 	rb_erase(node, &tree->tree);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 9841faef08ea..53e192647339 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3246,6 +3246,8 @@  static int relocate_file_extent_cluster(struct inode *inode,
 				put_page(page);
 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 							PAGE_SIZE);
+				btrfs_delalloc_release_extents(BTRFS_I(inode),
+							       PAGE_SIZE);
 				ret = -EIO;
 				goto out;
 			}
@@ -3275,6 +3277,7 @@  static int relocate_file_extent_cluster(struct inode *inode,
 		put_page(page);
 
 		index++;
+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 		balance_dirty_pages_ratelimited(inode->i_mapping);
 		btrfs_throttle(fs_info);
 	}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 8c91d03cc82d..11c77eafde00 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -968,7 +968,6 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	btrfs_test_inode_set_ops(inode);
 
 	/* [BTRFS_MAX_EXTENT_SIZE] */
-	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
 					NULL, 0);
 	if (ret) {
@@ -983,7 +982,6 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	}
 
 	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
-	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
 					BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
 					NULL, 0);
@@ -1003,7 +1001,7 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 			       BTRFS_MAX_EXTENT_SIZE >> 1,
 			       (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
 			       EXTENT_DELALLOC | EXTENT_DIRTY |
-			       EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
+			       EXTENT_UPTODATE, 0, 0,
 			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
@@ -1017,7 +1015,6 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	}
 
 	/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
-	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
 					(BTRFS_MAX_EXTENT_SIZE >> 1)
 					+ sectorsize - 1,
@@ -1035,12 +1032,7 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
 	/*
 	 * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
-	 *
-	 * I'm artificially adding 2 to outstanding_extents because in the
-	 * buffered IO case we'd add things up as we go, but I don't feel like
-	 * doing that here, this isn't the interesting case we want to test.
 	 */
-	BTRFS_I(inode)->outstanding_extents += 2;
 	ret = btrfs_set_extent_delalloc(inode,
 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
 			(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
@@ -1059,7 +1051,6 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	/*
 	* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
 	*/
-	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode,
 			BTRFS_MAX_EXTENT_SIZE + sectorsize,
 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
@@ -1079,7 +1070,7 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 			       BTRFS_MAX_EXTENT_SIZE + sectorsize,
 			       BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
-			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+			       EXTENT_UPTODATE, 0, 0,
 			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
@@ -1096,7 +1087,6 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	 * Refill the hole again just for good measure, because I thought it
 	 * might fail and I'd rather satisfy my paranoia at this point.
 	 */
-	BTRFS_I(inode)->outstanding_extents++;
 	ret = btrfs_set_extent_delalloc(inode,
 			BTRFS_MAX_EXTENT_SIZE + sectorsize,
 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
@@ -1114,7 +1104,7 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	/* Empty */
 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
-			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+			       EXTENT_UPTODATE, 0, 0,
 			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
@@ -1131,7 +1121,7 @@  static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 	if (ret)
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 				 EXTENT_DIRTY | EXTENT_DELALLOC |
-				 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+				 EXTENT_UPTODATE, 0, 0,
 				 NULL, GFP_KERNEL);
 	iput(inode);
 	btrfs_free_dummy_root(root);