[03/19] btrfs: keep track of which extents have been discarded
diff mbox series

Message ID 5875088b5f4ada0ef73f097b238935dd583d5b3e.1570479299.git.dennis@kernel.org
State New
Headers show
Series
  • btrfs: async discard support
Related show

Commit Message

Dennis Zhou Oct. 7, 2019, 8:17 p.m. UTC
Async discard will use the free space cache as backing knowledge for
which extents to discard. This patch plumbs knowledge about which
extents need to be discarded into the free space cache from
unpin_extent_range().

An untrimmed extent can merge with everything as this is a new region.
Absorbing trimmed extents is a tradeoff to for greater coalescing which
makes life better for find_free_extent(). Additionally, it seems the
size of a trim isn't as problematic as the trim io itself.

When reading in the free space cache from disk, if sync is set, mark all
extents as trimmed. The current code ensures at transaction commit that
all free space is trimmed when sync is set, so this reflects that.

Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 fs/btrfs/extent-tree.c      | 15 ++++++++++-----
 fs/btrfs/free-space-cache.c | 38 ++++++++++++++++++++++++++++++-------
 fs/btrfs/free-space-cache.h | 10 +++++++++-
 fs/btrfs/inode-map.c        | 13 +++++++------
 4 files changed, 57 insertions(+), 19 deletions(-)

Comments

Josef Bacik Oct. 7, 2019, 8:37 p.m. UTC | #1
On Mon, Oct 07, 2019 at 04:17:34PM -0400, Dennis Zhou wrote:
> Async discard will use the free space cache as backing knowledge for
> which extents to discard. This patch plumbs knowledge about which
> extents need to be discarded into the free space cache from
> unpin_extent_range().
> 
> An untrimmed extent can merge with everything as this is a new region.
> Absorbing trimmed extents is a tradeoff to for greater coalescing which
> makes life better for find_free_extent(). Additionally, it seems the
> size of a trim isn't as problematic as the trim io itself.
> 
> When reading in the free space cache from disk, if sync is set, mark all
> extents as trimmed. The current code ensures at transaction commit that
> all free space is trimmed when sync is set, so this reflects that.
> 
> Signed-off-by: Dennis Zhou <dennis@kernel.org>
> ---
>  fs/btrfs/extent-tree.c      | 15 ++++++++++-----
>  fs/btrfs/free-space-cache.c | 38 ++++++++++++++++++++++++++++++-------
>  fs/btrfs/free-space-cache.h | 10 +++++++++-
>  fs/btrfs/inode-map.c        | 13 +++++++------
>  4 files changed, 57 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 77a5904756c5..b9e3bedad878 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -2782,7 +2782,7 @@ fetch_cluster_info(struct btrfs_fs_info *fs_info,
>  }
>  
>  static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> -			      u64 start, u64 end,
> +			      u64 start, u64 end, u32 fsc_flags,
>  			      const bool return_free_space)
>  {
>  	struct btrfs_block_group_cache *cache = NULL;
> @@ -2816,7 +2816,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
>  		if (start < cache->last_byte_to_unpin) {
>  			len = min(len, cache->last_byte_to_unpin - start);
>  			if (return_free_space)
> -				btrfs_add_free_space(cache, start, len);
> +				__btrfs_add_free_space(fs_info,
> +						       cache->free_space_ctl,
> +						       start, len, fsc_flags);
>  		}
>  
>  		start += len;
> @@ -2894,6 +2896,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
>  
>  	while (!trans->aborted) {
>  		struct extent_state *cached_state = NULL;
> +		u32 fsc_flags = 0;
>  
>  		mutex_lock(&fs_info->unused_bg_unpin_mutex);
>  		ret = find_first_extent_bit(unpin, 0, &start, &end,
> @@ -2903,12 +2906,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
>  			break;
>  		}
>  
> -		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> +		if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
>  			ret = btrfs_discard_extent(fs_info, start,
>  						   end + 1 - start, NULL);
> +			fsc_flags |= BTRFS_FSC_TRIMMED;
> +		}
>  
>  		clear_extent_dirty(unpin, start, end, &cached_state);
> -		unpin_extent_range(fs_info, start, end, true);
> +		unpin_extent_range(fs_info, start, end, fsc_flags, true);
>  		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
>  		free_extent_state(cached_state);
>  		cond_resched();
> @@ -5512,7 +5517,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
>  int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
>  				   u64 start, u64 end)
>  {
> -	return unpin_extent_range(fs_info, start, end, false);
> +	return unpin_extent_range(fs_info, start, end, 0, false);
>  }
>  
>  /*
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index d54dcd0ab230..f119895292b8 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -747,6 +747,14 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
>  			goto free_cache;
>  		}
>  
> +		/*
> +		 * Sync discard ensures that the free space cache is always
> +		 * trimmed.  So when reading this in, the state should reflect
> +		 * that.
> +		 */
> +		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> +			e->flags |= BTRFS_FSC_TRIMMED;
> +
>  		if (!e->bytes) {
>  			kmem_cache_free(btrfs_free_space_cachep, e);
>  			goto free_cache;
> @@ -2165,6 +2173,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
>  	bool merged = false;
>  	u64 offset = info->offset;
>  	u64 bytes = info->bytes;
> +	bool is_trimmed = btrfs_free_space_trimmed(info);
>  
>  	/*
>  	 * first we want to see if there is free space adjacent to the range we
> @@ -2178,7 +2187,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
>  	else
>  		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
>  
> -	if (right_info && !right_info->bitmap) {
> +	if (right_info && !right_info->bitmap &&
> +	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
>  		if (update_stat)
>  			unlink_free_space(ctl, right_info);
>  		else
> @@ -2189,7 +2199,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
>  	}
>  
>  	if (left_info && !left_info->bitmap &&
> -	    left_info->offset + left_info->bytes == offset) {
> +	    left_info->offset + left_info->bytes == offset &&
> +	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {

So we allow merging if we haven't trimmed this entry, or if the adjacent entry
is already trimmed?  This means we'll merge if we trimmed the new entry
regardless of the adjacent entries status, or if the new entry is drity.  Why is
that?  Thanks,

Josef
Dennis Zhou Oct. 7, 2019, 10:38 p.m. UTC | #2
On Mon, Oct 07, 2019 at 04:37:28PM -0400, Josef Bacik wrote:
> On Mon, Oct 07, 2019 at 04:17:34PM -0400, Dennis Zhou wrote:
> > Async discard will use the free space cache as backing knowledge for
> > which extents to discard. This patch plumbs knowledge about which
> > extents need to be discarded into the free space cache from
> > unpin_extent_range().
> > 
> > An untrimmed extent can merge with everything as this is a new region.
> > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > makes life better for find_free_extent(). Additionally, it seems the
> > size of a trim isn't as problematic as the trim io itself.
> > 
> > When reading in the free space cache from disk, if sync is set, mark all
> > extents as trimmed. The current code ensures at transaction commit that
> > all free space is trimmed when sync is set, so this reflects that.
> > 
> > Signed-off-by: Dennis Zhou <dennis@kernel.org>
> > ---
> >  fs/btrfs/extent-tree.c      | 15 ++++++++++-----
> >  fs/btrfs/free-space-cache.c | 38 ++++++++++++++++++++++++++++++-------
> >  fs/btrfs/free-space-cache.h | 10 +++++++++-
> >  fs/btrfs/inode-map.c        | 13 +++++++------
> >  4 files changed, 57 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> > index 77a5904756c5..b9e3bedad878 100644
> > --- a/fs/btrfs/extent-tree.c
> > +++ b/fs/btrfs/extent-tree.c
> > @@ -2782,7 +2782,7 @@ fetch_cluster_info(struct btrfs_fs_info *fs_info,
> >  }
> >  
> >  static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> > -			      u64 start, u64 end,
> > +			      u64 start, u64 end, u32 fsc_flags,
> >  			      const bool return_free_space)
> >  {
> >  	struct btrfs_block_group_cache *cache = NULL;
> > @@ -2816,7 +2816,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> >  		if (start < cache->last_byte_to_unpin) {
> >  			len = min(len, cache->last_byte_to_unpin - start);
> >  			if (return_free_space)
> > -				btrfs_add_free_space(cache, start, len);
> > +				__btrfs_add_free_space(fs_info,
> > +						       cache->free_space_ctl,
> > +						       start, len, fsc_flags);
> >  		}
> >  
> >  		start += len;
> > @@ -2894,6 +2896,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
> >  
> >  	while (!trans->aborted) {
> >  		struct extent_state *cached_state = NULL;
> > +		u32 fsc_flags = 0;
> >  
> >  		mutex_lock(&fs_info->unused_bg_unpin_mutex);
> >  		ret = find_first_extent_bit(unpin, 0, &start, &end,
> > @@ -2903,12 +2906,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
> >  			break;
> >  		}
> >  
> > -		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> > +		if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
> >  			ret = btrfs_discard_extent(fs_info, start,
> >  						   end + 1 - start, NULL);
> > +			fsc_flags |= BTRFS_FSC_TRIMMED;
> > +		}
> >  
> >  		clear_extent_dirty(unpin, start, end, &cached_state);
> > -		unpin_extent_range(fs_info, start, end, true);
> > +		unpin_extent_range(fs_info, start, end, fsc_flags, true);
> >  		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
> >  		free_extent_state(cached_state);
> >  		cond_resched();
> > @@ -5512,7 +5517,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
> >  int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
> >  				   u64 start, u64 end)
> >  {
> > -	return unpin_extent_range(fs_info, start, end, false);
> > +	return unpin_extent_range(fs_info, start, end, 0, false);
> >  }
> >  
> >  /*
> > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > index d54dcd0ab230..f119895292b8 100644
> > --- a/fs/btrfs/free-space-cache.c
> > +++ b/fs/btrfs/free-space-cache.c
> > @@ -747,6 +747,14 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
> >  			goto free_cache;
> >  		}
> >  
> > +		/*
> > +		 * Sync discard ensures that the free space cache is always
> > +		 * trimmed.  So when reading this in, the state should reflect
> > +		 * that.
> > +		 */
> > +		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> > +			e->flags |= BTRFS_FSC_TRIMMED;
> > +
> >  		if (!e->bytes) {
> >  			kmem_cache_free(btrfs_free_space_cachep, e);
> >  			goto free_cache;
> > @@ -2165,6 +2173,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> >  	bool merged = false;
> >  	u64 offset = info->offset;
> >  	u64 bytes = info->bytes;
> > +	bool is_trimmed = btrfs_free_space_trimmed(info);
> >  
> >  	/*
> >  	 * first we want to see if there is free space adjacent to the range we
> > @@ -2178,7 +2187,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> >  	else
> >  		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
> >  
> > -	if (right_info && !right_info->bitmap) {
> > +	if (right_info && !right_info->bitmap &&
> > +	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
> >  		if (update_stat)
> >  			unlink_free_space(ctl, right_info);
> >  		else
> > @@ -2189,7 +2199,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> >  	}
> >  
> >  	if (left_info && !left_info->bitmap &&
> > -	    left_info->offset + left_info->bytes == offset) {
> > +	    left_info->offset + left_info->bytes == offset &&
> > +	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
> 
> So we allow merging if we haven't trimmed this entry, or if the adjacent entry
> is already trimmed?  This means we'll merge if we trimmed the new entry
> regardless of the adjacent entries status, or if the new entry is drity.  Why is
> that?  Thanks,
> 

This is the tradeoff I called out above here:

> > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > makes life better for find_free_extent(). Additionally, it seems the
> > size of a trim isn't as problematic as the trim io itself.

A problematic example case:

|----trimmed----|/////X/////|-----trimmed-----|

If region X gets freed and returned to the free space cache, we end up
with the following:

|----trimmed----|-untrimmed-|-----trimmed-----|

This isn't great because now we need to teach find_free_extent() to span
multiple btrfs_free_space entries, something I didn't want to do. So the
other option is to overtrim trading for a simpler find_free_extent().
Then the above becomes:

|-------------------trimmed-------------------|

It makes the assumption that if we're inserting, it's generally is free
space being returned rather than we needed to slice out from the middle
of a block. It does still have degenerative cases, but it's better than
the above. The merging also allows for stuff to come out of bitmaps more
proactively too.

Also from what it seems, the cost of a discard operation is quite costly
relative to the amount your discarding (1 larger discard is better than
several smaller discards) as it will clog up the device too.


Thanks,
Dennis
Nikolay Borisov Oct. 8, 2019, 12:46 p.m. UTC | #3
On 7.10.19 г. 23:17 ч., Dennis Zhou wrote:
> Async discard will use the free space cache as backing knowledge for
> which extents to discard. This patch plumbs knowledge about which
> extents need to be discarded into the free space cache from
> unpin_extent_range().
> 
> An untrimmed extent can merge with everything as this is a new region.
> Absorbing trimmed extents is a tradeoff to for greater coalescing which
> makes life better for find_free_extent(). Additionally, it seems the
> size of a trim isn't as problematic as the trim io itself.
> 
> When reading in the free space cache from disk, if sync is set, mark all
> extents as trimmed. The current code ensures at transaction commit that
> all free space is trimmed when sync is set, so this reflects that.
> 
> Signed-off-by: Dennis Zhou <dennis@kernel.org>

I haven't looked closely into this commit but I already implemented
something similar in order to speed up trimming by not discarding an
already discarded region twice. The code was introduced by the following
series:
https://lore.kernel.org/linux-btrfs/20190327122418.24027-1-nborisov@suse.com/
in particular patches 13 to 15 .

Can you leverage it ? If not then your code should, at some point,
subsume the old one.
Josef Bacik Oct. 10, 2019, 1:40 p.m. UTC | #4
On Mon, Oct 07, 2019 at 06:38:10PM -0400, Dennis Zhou wrote:
> On Mon, Oct 07, 2019 at 04:37:28PM -0400, Josef Bacik wrote:
> > On Mon, Oct 07, 2019 at 04:17:34PM -0400, Dennis Zhou wrote:
> > > Async discard will use the free space cache as backing knowledge for
> > > which extents to discard. This patch plumbs knowledge about which
> > > extents need to be discarded into the free space cache from
> > > unpin_extent_range().
> > > 
> > > An untrimmed extent can merge with everything as this is a new region.
> > > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > > makes life better for find_free_extent(). Additionally, it seems the
> > > size of a trim isn't as problematic as the trim io itself.
> > > 
> > > When reading in the free space cache from disk, if sync is set, mark all
> > > extents as trimmed. The current code ensures at transaction commit that
> > > all free space is trimmed when sync is set, so this reflects that.
> > > 
> > > Signed-off-by: Dennis Zhou <dennis@kernel.org>
> > > ---
> > >  fs/btrfs/extent-tree.c      | 15 ++++++++++-----
> > >  fs/btrfs/free-space-cache.c | 38 ++++++++++++++++++++++++++++++-------
> > >  fs/btrfs/free-space-cache.h | 10 +++++++++-
> > >  fs/btrfs/inode-map.c        | 13 +++++++------
> > >  4 files changed, 57 insertions(+), 19 deletions(-)
> > > 
> > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> > > index 77a5904756c5..b9e3bedad878 100644
> > > --- a/fs/btrfs/extent-tree.c
> > > +++ b/fs/btrfs/extent-tree.c
> > > @@ -2782,7 +2782,7 @@ fetch_cluster_info(struct btrfs_fs_info *fs_info,
> > >  }
> > >  
> > >  static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> > > -			      u64 start, u64 end,
> > > +			      u64 start, u64 end, u32 fsc_flags,
> > >  			      const bool return_free_space)
> > >  {
> > >  	struct btrfs_block_group_cache *cache = NULL;
> > > @@ -2816,7 +2816,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> > >  		if (start < cache->last_byte_to_unpin) {
> > >  			len = min(len, cache->last_byte_to_unpin - start);
> > >  			if (return_free_space)
> > > -				btrfs_add_free_space(cache, start, len);
> > > +				__btrfs_add_free_space(fs_info,
> > > +						       cache->free_space_ctl,
> > > +						       start, len, fsc_flags);
> > >  		}
> > >  
> > >  		start += len;
> > > @@ -2894,6 +2896,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
> > >  
> > >  	while (!trans->aborted) {
> > >  		struct extent_state *cached_state = NULL;
> > > +		u32 fsc_flags = 0;
> > >  
> > >  		mutex_lock(&fs_info->unused_bg_unpin_mutex);
> > >  		ret = find_first_extent_bit(unpin, 0, &start, &end,
> > > @@ -2903,12 +2906,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
> > >  			break;
> > >  		}
> > >  
> > > -		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> > > +		if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
> > >  			ret = btrfs_discard_extent(fs_info, start,
> > >  						   end + 1 - start, NULL);
> > > +			fsc_flags |= BTRFS_FSC_TRIMMED;
> > > +		}
> > >  
> > >  		clear_extent_dirty(unpin, start, end, &cached_state);
> > > -		unpin_extent_range(fs_info, start, end, true);
> > > +		unpin_extent_range(fs_info, start, end, fsc_flags, true);
> > >  		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
> > >  		free_extent_state(cached_state);
> > >  		cond_resched();
> > > @@ -5512,7 +5517,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
> > >  int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
> > >  				   u64 start, u64 end)
> > >  {
> > > -	return unpin_extent_range(fs_info, start, end, false);
> > > +	return unpin_extent_range(fs_info, start, end, 0, false);
> > >  }
> > >  
> > >  /*
> > > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > > index d54dcd0ab230..f119895292b8 100644
> > > --- a/fs/btrfs/free-space-cache.c
> > > +++ b/fs/btrfs/free-space-cache.c
> > > @@ -747,6 +747,14 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
> > >  			goto free_cache;
> > >  		}
> > >  
> > > +		/*
> > > +		 * Sync discard ensures that the free space cache is always
> > > +		 * trimmed.  So when reading this in, the state should reflect
> > > +		 * that.
> > > +		 */
> > > +		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> > > +			e->flags |= BTRFS_FSC_TRIMMED;
> > > +
> > >  		if (!e->bytes) {
> > >  			kmem_cache_free(btrfs_free_space_cachep, e);
> > >  			goto free_cache;
> > > @@ -2165,6 +2173,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> > >  	bool merged = false;
> > >  	u64 offset = info->offset;
> > >  	u64 bytes = info->bytes;
> > > +	bool is_trimmed = btrfs_free_space_trimmed(info);
> > >  
> > >  	/*
> > >  	 * first we want to see if there is free space adjacent to the range we
> > > @@ -2178,7 +2187,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> > >  	else
> > >  		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
> > >  
> > > -	if (right_info && !right_info->bitmap) {
> > > +	if (right_info && !right_info->bitmap &&
> > > +	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
> > >  		if (update_stat)
> > >  			unlink_free_space(ctl, right_info);
> > >  		else
> > > @@ -2189,7 +2199,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> > >  	}
> > >  
> > >  	if (left_info && !left_info->bitmap &&
> > > -	    left_info->offset + left_info->bytes == offset) {
> > > +	    left_info->offset + left_info->bytes == offset &&
> > > +	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
> > 
> > So we allow merging if we haven't trimmed this entry, or if the adjacent entry
> > is already trimmed?  This means we'll merge if we trimmed the new entry
> > regardless of the adjacent entries status, or if the new entry is drity.  Why is
> > that?  Thanks,
> > 
> 
> This is the tradeoff I called out above here:
> 
> > > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > > makes life better for find_free_extent(). Additionally, it seems the
> > > size of a trim isn't as problematic as the trim io itself.
> 
> A problematic example case:
> 
> |----trimmed----|/////X/////|-----trimmed-----|
> 
> If region X gets freed and returned to the free space cache, we end up
> with the following:
> 
> |----trimmed----|-untrimmed-|-----trimmed-----|
> 
> This isn't great because now we need to teach find_free_extent() to span
> multiple btrfs_free_space entries, something I didn't want to do. So the
> other option is to overtrim trading for a simpler find_free_extent().
> Then the above becomes:
> 
> |-------------------trimmed-------------------|
> 
> It makes the assumption that if we're inserting, it's generally is free
> space being returned rather than we needed to slice out from the middle
> of a block. It does still have degenerative cases, but it's better than
> the above. The merging also allows for stuff to come out of bitmaps more
> proactively too.
> 
> Also from what it seems, the cost of a discard operation is quite costly
> relative to the amount your discarding (1 larger discard is better than
> several smaller discards) as it will clog up the device too.


OOOOOh I fucking get it now.  That's going to need a comment, because it's not
obvious at all.

However I still wonder if this is right.  Your above examples are legitimate,
but say you have

| 512mib adding back that isn't trimmed |------- 512mib trimmed ------|

we'll merge these two, but really we should probably trim that 512mib chunk
we're adding right?  Thanks,

Josef
Dennis Zhou Oct. 11, 2019, 4:08 p.m. UTC | #5
On Tue, Oct 08, 2019 at 03:46:18PM +0300, Nikolay Borisov wrote:
> 
> 
> On 7.10.19 г. 23:17 ч., Dennis Zhou wrote:
> > Async discard will use the free space cache as backing knowledge for
> > which extents to discard. This patch plumbs knowledge about which
> > extents need to be discarded into the free space cache from
> > unpin_extent_range().
> > 
> > An untrimmed extent can merge with everything as this is a new region.
> > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > makes life better for find_free_extent(). Additionally, it seems the
> > size of a trim isn't as problematic as the trim io itself.
> > 
> > When reading in the free space cache from disk, if sync is set, mark all
> > extents as trimmed. The current code ensures at transaction commit that
> > all free space is trimmed when sync is set, so this reflects that.
> > 
> > Signed-off-by: Dennis Zhou <dennis@kernel.org>
> 
> I haven't looked closely into this commit but I already implemented
> something similar in order to speed up trimming by not discarding an
> already discarded region twice. The code was introduced by the following
> series:
> https://lore.kernel.org/linux-btrfs/20190327122418.24027-1-nborisov@suse.com/
> in particular patches 13 to 15 .
> 
> Can you leverage it ? If not then your code should, at some point,
> subsume the old one.
> 

I spent some time reading through that. I believe we're tackling two
separate problems. Correct me if I'm wrong, but your patches are making
subsequent fitrims faster because it's skipping over free regions that
were never allocated by the chunk allocator.

This series is aiming to solve intra-block group trim latency as trim is
handled during transaction commit and consequently also help prevent
retrimming of the free space that is already trimmed.

Thanks,
Dennis
Dennis Zhou Oct. 11, 2019, 4:15 p.m. UTC | #6
On Thu, Oct 10, 2019 at 09:40:37AM -0400, Josef Bacik wrote:
> On Mon, Oct 07, 2019 at 06:38:10PM -0400, Dennis Zhou wrote:
> > On Mon, Oct 07, 2019 at 04:37:28PM -0400, Josef Bacik wrote:
> > > On Mon, Oct 07, 2019 at 04:17:34PM -0400, Dennis Zhou wrote:
> > > > Async discard will use the free space cache as backing knowledge for
> > > > which extents to discard. This patch plumbs knowledge about which
> > > > extents need to be discarded into the free space cache from
> > > > unpin_extent_range().
> > > > 
> > > > An untrimmed extent can merge with everything as this is a new region.
> > > > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > > > makes life better for find_free_extent(). Additionally, it seems the
> > > > size of a trim isn't as problematic as the trim io itself.
> > > > 
> > > > When reading in the free space cache from disk, if sync is set, mark all
> > > > extents as trimmed. The current code ensures at transaction commit that
> > > > all free space is trimmed when sync is set, so this reflects that.
> > > > 
> > > > Signed-off-by: Dennis Zhou <dennis@kernel.org>
> > > > ---
> > > >  fs/btrfs/extent-tree.c      | 15 ++++++++++-----
> > > >  fs/btrfs/free-space-cache.c | 38 ++++++++++++++++++++++++++++++-------
> > > >  fs/btrfs/free-space-cache.h | 10 +++++++++-
> > > >  fs/btrfs/inode-map.c        | 13 +++++++------
> > > >  4 files changed, 57 insertions(+), 19 deletions(-)
> > > > 
> > > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> > > > index 77a5904756c5..b9e3bedad878 100644
> > > > --- a/fs/btrfs/extent-tree.c
> > > > +++ b/fs/btrfs/extent-tree.c
> > > > @@ -2782,7 +2782,7 @@ fetch_cluster_info(struct btrfs_fs_info *fs_info,
> > > >  }
> > > >  
> > > >  static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> > > > -			      u64 start, u64 end,
> > > > +			      u64 start, u64 end, u32 fsc_flags,
> > > >  			      const bool return_free_space)
> > > >  {
> > > >  	struct btrfs_block_group_cache *cache = NULL;
> > > > @@ -2816,7 +2816,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
> > > >  		if (start < cache->last_byte_to_unpin) {
> > > >  			len = min(len, cache->last_byte_to_unpin - start);
> > > >  			if (return_free_space)
> > > > -				btrfs_add_free_space(cache, start, len);
> > > > +				__btrfs_add_free_space(fs_info,
> > > > +						       cache->free_space_ctl,
> > > > +						       start, len, fsc_flags);
> > > >  		}
> > > >  
> > > >  		start += len;
> > > > @@ -2894,6 +2896,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
> > > >  
> > > >  	while (!trans->aborted) {
> > > >  		struct extent_state *cached_state = NULL;
> > > > +		u32 fsc_flags = 0;
> > > >  
> > > >  		mutex_lock(&fs_info->unused_bg_unpin_mutex);
> > > >  		ret = find_first_extent_bit(unpin, 0, &start, &end,
> > > > @@ -2903,12 +2906,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
> > > >  			break;
> > > >  		}
> > > >  
> > > > -		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> > > > +		if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
> > > >  			ret = btrfs_discard_extent(fs_info, start,
> > > >  						   end + 1 - start, NULL);
> > > > +			fsc_flags |= BTRFS_FSC_TRIMMED;
> > > > +		}
> > > >  
> > > >  		clear_extent_dirty(unpin, start, end, &cached_state);
> > > > -		unpin_extent_range(fs_info, start, end, true);
> > > > +		unpin_extent_range(fs_info, start, end, fsc_flags, true);
> > > >  		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
> > > >  		free_extent_state(cached_state);
> > > >  		cond_resched();
> > > > @@ -5512,7 +5517,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
> > > >  int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
> > > >  				   u64 start, u64 end)
> > > >  {
> > > > -	return unpin_extent_range(fs_info, start, end, false);
> > > > +	return unpin_extent_range(fs_info, start, end, 0, false);
> > > >  }
> > > >  
> > > >  /*
> > > > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> > > > index d54dcd0ab230..f119895292b8 100644
> > > > --- a/fs/btrfs/free-space-cache.c
> > > > +++ b/fs/btrfs/free-space-cache.c
> > > > @@ -747,6 +747,14 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
> > > >  			goto free_cache;
> > > >  		}
> > > >  
> > > > +		/*
> > > > +		 * Sync discard ensures that the free space cache is always
> > > > +		 * trimmed.  So when reading this in, the state should reflect
> > > > +		 * that.
> > > > +		 */
> > > > +		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
> > > > +			e->flags |= BTRFS_FSC_TRIMMED;
> > > > +
> > > >  		if (!e->bytes) {
> > > >  			kmem_cache_free(btrfs_free_space_cachep, e);
> > > >  			goto free_cache;
> > > > @@ -2165,6 +2173,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> > > >  	bool merged = false;
> > > >  	u64 offset = info->offset;
> > > >  	u64 bytes = info->bytes;
> > > > +	bool is_trimmed = btrfs_free_space_trimmed(info);
> > > >  
> > > >  	/*
> > > >  	 * first we want to see if there is free space adjacent to the range we
> > > > @@ -2178,7 +2187,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> > > >  	else
> > > >  		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
> > > >  
> > > > -	if (right_info && !right_info->bitmap) {
> > > > +	if (right_info && !right_info->bitmap &&
> > > > +	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
> > > >  		if (update_stat)
> > > >  			unlink_free_space(ctl, right_info);
> > > >  		else
> > > > @@ -2189,7 +2199,8 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> > > >  	}
> > > >  
> > > >  	if (left_info && !left_info->bitmap &&
> > > > -	    left_info->offset + left_info->bytes == offset) {
> > > > +	    left_info->offset + left_info->bytes == offset &&
> > > > +	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
> > > 
> > > So we allow merging if we haven't trimmed this entry, or if the adjacent entry
> > > is already trimmed?  This means we'll merge if we trimmed the new entry
> > > regardless of the adjacent entries status, or if the new entry is drity.  Why is
> > > that?  Thanks,
> > > 
> > 
> > This is the tradeoff I called out above here:
> > 
> > > > Absorbing trimmed extents is a tradeoff to for greater coalescing which
> > > > makes life better for find_free_extent(). Additionally, it seems the
> > > > size of a trim isn't as problematic as the trim io itself.
> > 
> > A problematic example case:
> > 
> > |----trimmed----|/////X/////|-----trimmed-----|
> > 
> > If region X gets freed and returned to the free space cache, we end up
> > with the following:
> > 
> > |----trimmed----|-untrimmed-|-----trimmed-----|
> > 
> > This isn't great because now we need to teach find_free_extent() to span
> > multiple btrfs_free_space entries, something I didn't want to do. So the
> > other option is to overtrim trading for a simpler find_free_extent().
> > Then the above becomes:
> > 
> > |-------------------trimmed-------------------|
> > 
> > It makes the assumption that if we're inserting, it's generally is free
> > space being returned rather than we needed to slice out from the middle
> > of a block. It does still have degenerative cases, but it's better than
> > the above. The merging also allows for stuff to come out of bitmaps more
> > proactively too.
> > 
> > Also from what it seems, the cost of a discard operation is quite costly
> > relative to the amount your discarding (1 larger discard is better than
> > several smaller discards) as it will clog up the device too.
> 
> 
> OOOOOh I fucking get it now.  That's going to need a comment, because it's not
> obvious at all.
> 
> However I still wonder if this is right.  Your above examples are legitimate,
> but say you have
> 
> | 512mib adding back that isn't trimmed |------- 512mib trimmed ------|
> 
> we'll merge these two, but really we should probably trim that 512mib chunk
> we're adding right?  Thanks,
> 

So that's the crux of the problem. I'm not sure if it's right to make
heuristics around this and have merging thresholds because it makes the
code tricker + not necessarily correct. A contrived case would be
something where we go through a few iterations of merging because we
pulled stuff out of the bitmaps and that then was able to merge more
free space. How do you what the right balance is for merging extents?

I kind of favor the overeager approach for now because it is always
correct to rediscard regions, but forgetting about regions means it may
go undiscarded until for some unbounded time in the future.  This also
makes life the easiest for find_free_extent().

As I said, I'm not sure what the right thing to do is, so I favored
being accurate.  This is something I'm happy to change depending on
discussion and on further data I collect.

I added a comment, I might need to make it more indepth, but it's a
start (I'll revisit before v2).

Thanks,
Dennis
David Sterba Oct. 15, 2019, 12:17 p.m. UTC | #7
On Mon, Oct 07, 2019 at 04:17:34PM -0400, Dennis Zhou wrote:
> @@ -2165,6 +2173,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
>  	bool merged = false;
>  	u64 offset = info->offset;
>  	u64 bytes = info->bytes;
> +	bool is_trimmed = btrfs_free_space_trimmed(info);

Please add a const in such cases. I've been doing that in other patches
but as more iterations are expected, let's have it there from the
beginning.

> --- a/fs/btrfs/free-space-cache.h
> +++ b/fs/btrfs/free-space-cache.h
> @@ -6,6 +6,8 @@
>  #ifndef BTRFS_FREE_SPACE_CACHE_H
>  #define BTRFS_FREE_SPACE_CACHE_H
>  
> +#define BTRFS_FSC_TRIMMED		(1UL << 0)

Please add a comment
Dennis Zhou Oct. 15, 2019, 7:58 p.m. UTC | #8
On Tue, Oct 15, 2019 at 02:17:55PM +0200, David Sterba wrote:
> On Mon, Oct 07, 2019 at 04:17:34PM -0400, Dennis Zhou wrote:
> > @@ -2165,6 +2173,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> >  	bool merged = false;
> >  	u64 offset = info->offset;
> >  	u64 bytes = info->bytes;
> > +	bool is_trimmed = btrfs_free_space_trimmed(info);
> 
> Please add a const in such cases. I've been doing that in other patches
> but as more iterations are expected, let's have it there from the
> beginning.
> 

Done.

> > --- a/fs/btrfs/free-space-cache.h
> > +++ b/fs/btrfs/free-space-cache.h
> > @@ -6,6 +6,8 @@
> >  #ifndef BTRFS_FREE_SPACE_CACHE_H
> >  #define BTRFS_FREE_SPACE_CACHE_H
> >  
> > +#define BTRFS_FSC_TRIMMED		(1UL << 0)
> 
> Please add a comment

I've switched this to an enum and added a comment above it.

Thanks,
Dennis

Patch
diff mbox series

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 77a5904756c5..b9e3bedad878 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2782,7 +2782,7 @@  fetch_cluster_info(struct btrfs_fs_info *fs_info,
 }
 
 static int unpin_extent_range(struct btrfs_fs_info *fs_info,
-			      u64 start, u64 end,
+			      u64 start, u64 end, u32 fsc_flags,
 			      const bool return_free_space)
 {
 	struct btrfs_block_group_cache *cache = NULL;
@@ -2816,7 +2816,9 @@  static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 		if (start < cache->last_byte_to_unpin) {
 			len = min(len, cache->last_byte_to_unpin - start);
 			if (return_free_space)
-				btrfs_add_free_space(cache, start, len);
+				__btrfs_add_free_space(fs_info,
+						       cache->free_space_ctl,
+						       start, len, fsc_flags);
 		}
 
 		start += len;
@@ -2894,6 +2896,7 @@  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 
 	while (!trans->aborted) {
 		struct extent_state *cached_state = NULL;
+		u32 fsc_flags = 0;
 
 		mutex_lock(&fs_info->unused_bg_unpin_mutex);
 		ret = find_first_extent_bit(unpin, 0, &start, &end,
@@ -2903,12 +2906,14 @@  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 			break;
 		}
 
-		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
+		if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
 			ret = btrfs_discard_extent(fs_info, start,
 						   end + 1 - start, NULL);
+			fsc_flags |= BTRFS_FSC_TRIMMED;
+		}
 
 		clear_extent_dirty(unpin, start, end, &cached_state);
-		unpin_extent_range(fs_info, start, end, true);
+		unpin_extent_range(fs_info, start, end, fsc_flags, true);
 		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 		free_extent_state(cached_state);
 		cond_resched();
@@ -5512,7 +5517,7 @@  u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
 				   u64 start, u64 end)
 {
-	return unpin_extent_range(fs_info, start, end, false);
+	return unpin_extent_range(fs_info, start, end, 0, false);
 }
 
 /*
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d54dcd0ab230..f119895292b8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -747,6 +747,14 @@  static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 			goto free_cache;
 		}
 
+		/*
+		 * Sync discard ensures that the free space cache is always
+		 * trimmed.  So when reading this in, the state should reflect
+		 * that.
+		 */
+		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
+			e->flags |= BTRFS_FSC_TRIMMED;
+
 		if (!e->bytes) {
 			kmem_cache_free(btrfs_free_space_cachep, e);
 			goto free_cache;
@@ -2165,6 +2173,7 @@  static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 	bool merged = false;
 	u64 offset = info->offset;
 	u64 bytes = info->bytes;
+	bool is_trimmed = btrfs_free_space_trimmed(info);
 
 	/*
 	 * first we want to see if there is free space adjacent to the range we
@@ -2178,7 +2187,8 @@  static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 	else
 		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
 
-	if (right_info && !right_info->bitmap) {
+	if (right_info && !right_info->bitmap &&
+	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
 		if (update_stat)
 			unlink_free_space(ctl, right_info);
 		else
@@ -2189,7 +2199,8 @@  static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 	}
 
 	if (left_info && !left_info->bitmap &&
-	    left_info->offset + left_info->bytes == offset) {
+	    left_info->offset + left_info->bytes == offset &&
+	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
 		if (update_stat)
 			unlink_free_space(ctl, left_info);
 		else
@@ -2225,6 +2236,9 @@  static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
 	bytes = (j - i) * ctl->unit;
 	info->bytes += bytes;
 
+	if (!btrfs_free_space_trimmed(bitmap))
+		info->flags &= ~BTRFS_FSC_TRIMMED;
+
 	if (update_stat)
 		bitmap_clear_bits(ctl, bitmap, end, bytes);
 	else
@@ -2278,6 +2292,9 @@  static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
 	info->offset -= bytes;
 	info->bytes += bytes;
 
+	if (!btrfs_free_space_trimmed(bitmap))
+		info->flags &= ~BTRFS_FSC_TRIMMED;
+
 	if (update_stat)
 		bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
 	else
@@ -2327,7 +2344,7 @@  static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
 
 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
 			   struct btrfs_free_space_ctl *ctl,
-			   u64 offset, u64 bytes)
+			   u64 offset, u64 bytes, u32 flags)
 {
 	struct btrfs_free_space *info;
 	int ret = 0;
@@ -2338,6 +2355,7 @@  int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
 
 	info->offset = offset;
 	info->bytes = bytes;
+	info->flags = flags;
 	RB_CLEAR_NODE(&info->offset_index);
 
 	spin_lock(&ctl->tree_lock);
@@ -2385,7 +2403,7 @@  int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 {
 	return __btrfs_add_free_space(block_group->fs_info,
 				      block_group->free_space_ctl,
-				      bytenr, size);
+				      bytenr, size, 0);
 }
 
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
@@ -2460,8 +2478,11 @@  int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 			}
 			spin_unlock(&ctl->tree_lock);
 
-			ret = btrfs_add_free_space(block_group, offset + bytes,
-						   old_end - (offset + bytes));
+			ret = __btrfs_add_free_space(block_group->fs_info,
+						     ctl,
+						     offset + bytes,
+						     old_end - (offset + bytes),
+						     info->flags);
 			WARN_ON(ret);
 			goto out;
 		}
@@ -2630,6 +2651,7 @@  u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 	u64 ret = 0;
 	u64 align_gap = 0;
 	u64 align_gap_len = 0;
+	u64 align_gap_flags = 0;
 
 	spin_lock(&ctl->tree_lock);
 	entry = find_free_space(ctl, &offset, &bytes_search,
@@ -2646,6 +2668,7 @@  u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 		unlink_free_space(ctl, entry);
 		align_gap_len = offset - entry->offset;
 		align_gap = entry->offset;
+		align_gap_flags = entry->flags;
 
 		entry->offset = offset + bytes;
 		WARN_ON(entry->bytes < bytes + align_gap_len);
@@ -2661,7 +2684,8 @@  u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 
 	if (align_gap_len)
 		__btrfs_add_free_space(block_group->fs_info, ctl,
-				       align_gap, align_gap_len);
+				       align_gap, align_gap_len,
+				       align_gap_flags);
 	return ret;
 }
 
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 39c32c8fc24f..ab3dfc00abb5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -6,6 +6,8 @@ 
 #ifndef BTRFS_FREE_SPACE_CACHE_H
 #define BTRFS_FREE_SPACE_CACHE_H
 
+#define BTRFS_FSC_TRIMMED		(1UL << 0)
+
 struct btrfs_free_space {
 	struct rb_node offset_index;
 	u64 offset;
@@ -13,8 +15,14 @@  struct btrfs_free_space {
 	u64 max_extent_size;
 	unsigned long *bitmap;
 	struct list_head list;
+	u32 flags;
 };
 
+static inline bool btrfs_free_space_trimmed(struct btrfs_free_space *info)
+{
+	return (info->flags & BTRFS_FSC_TRIMMED);
+}
+
 struct btrfs_free_space_ctl {
 	spinlock_t tree_lock;
 	struct rb_root free_space_offset;
@@ -84,7 +92,7 @@  int btrfs_write_out_ino_cache(struct btrfs_root *root,
 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
 			   struct btrfs_free_space_ctl *ctl,
-			   u64 bytenr, u64 size);
+			   u64 bytenr, u64 size, u32 flags);
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 			 u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 63cad7865d75..00e225de4fe6 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -107,7 +107,7 @@  static int caching_kthread(void *data)
 
 		if (last != (u64)-1 && last + 1 != key.objectid) {
 			__btrfs_add_free_space(fs_info, ctl, last + 1,
-					       key.objectid - last - 1);
+					       key.objectid - last - 1, 0);
 			wake_up(&root->ino_cache_wait);
 		}
 
@@ -118,7 +118,7 @@  static int caching_kthread(void *data)
 
 	if (last < root->highest_objectid - 1) {
 		__btrfs_add_free_space(fs_info, ctl, last + 1,
-				       root->highest_objectid - last - 1);
+				       root->highest_objectid - last - 1, 0);
 	}
 
 	spin_lock(&root->ino_cache_lock);
@@ -175,7 +175,8 @@  static void start_caching(struct btrfs_root *root)
 	ret = btrfs_find_free_objectid(root, &objectid);
 	if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
 		__btrfs_add_free_space(fs_info, ctl, objectid,
-				       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
+				       BTRFS_LAST_FREE_OBJECTID - objectid + 1,
+				       0);
 		wake_up(&root->ino_cache_wait);
 	}
 
@@ -221,7 +222,7 @@  void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 		return;
 again:
 	if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
-		__btrfs_add_free_space(fs_info, pinned, objectid, 1);
+		__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
 	} else {
 		down_write(&fs_info->commit_root_sem);
 		spin_lock(&root->ino_cache_lock);
@@ -234,7 +235,7 @@  void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 
 		start_caching(root);
 
-		__btrfs_add_free_space(fs_info, pinned, objectid, 1);
+		__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
 
 		up_write(&fs_info->commit_root_sem);
 	}
@@ -281,7 +282,7 @@  void btrfs_unpin_free_ino(struct btrfs_root *root)
 		spin_unlock(rbroot_lock);
 		if (count)
 			__btrfs_add_free_space(root->fs_info, ctl,
-					       info->offset, count);
+					       info->offset, count, 0);
 		kmem_cache_free(btrfs_free_space_cachep, info);
 	}
 }