diff mbox series

[v2,01/39] btrfs: backref: Introduce the skeleton of btrfs_backref_iter

Message ID 20200326083316.48847-2-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: qgroup: Use backref cache based backref walk for commit roots | expand

Commit Message

Qu Wenruo March 26, 2020, 8:32 a.m. UTC
Due to the complex nature of btrfs extent tree, when we want to iterate
all backrefs of one extent, it involves quite a lot of work, like
searching the EXTENT_ITEM/METADATA_ITEM, iteration through inline and keyed
backrefs.

Normally this would result pretty complex code, something like:
  btrfs_search_slot()
  /* Ensure we are at EXTENT_ITEM/METADATA_ITEM */
  while (1) {	/* Loop for extent tree items */
	while (ptr < end) { /* Loop for inlined items */
		/* REAL WORK HERE */
	}
  next:
  	ret = btrfs_next_item()
	/* Ensure we're still at keyed item for specified bytenr */
  }

The idea of btrfs_backref_iter is to avoid such complex and hard to
read code structure, but something like the following:

  iter = btrfs_backref_iter_alloc();
  ret = btrfs_backref_iter_start(iter, bytenr);
  if (ret < 0)
	goto out;
  for (; ; ret = btrfs_backref_iter_next(iter)) {
	/* REAL WORK HERE */
  }
  out:
  btrfs_backref_iter_free(iter);

This patch is just the skeleton + btrfs_backref_iter_start() code.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/backref.c | 110 +++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/backref.h |  39 ++++++++++++++++
 2 files changed, 149 insertions(+)

Comments

David Sterba April 1, 2020, 3:37 p.m. UTC | #1
On Thu, Mar 26, 2020 at 04:32:38PM +0800, Qu Wenruo wrote:
> --- a/fs/btrfs/backref.h
> +++ b/fs/btrfs/backref.h
> @@ -78,4 +78,43 @@ struct prelim_ref {
>  	u64 wanted_disk_byte;
>  };
>  
> +/*
> + * Helper structure to help iterate backrefs of one extent.
> + *
> + * Now it only supports iteration for tree block in commit root.
> + */
> +struct btrfs_backref_iter {
> +	u64 bytenr;
> +	struct btrfs_path *path;
> +	struct btrfs_fs_info *fs_info;
> +	struct btrfs_key cur_key;
> +	u32 item_ptr;
> +	u32 cur_ptr;
> +	u32 end_ptr;
> +};
> +
> +struct btrfs_backref_iter *btrfs_backref_iter_alloc(
> +		struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
> +
> +static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
> +{
> +	if (!iter)
> +		return;
> +	btrfs_free_path(iter->path);
> +	kfree(iter);
> +}

Why do you make so many functions static inline? It makes sense for some
of them but in the following patches there are functions that are either
too big (so when they're inlined it bloats the asm) or called
infrequently so the inlining does not bring much. Code in header files
should be kept to minimum.

There are also functions not used anywhere else than in backref.c so
they don't need to be exported for now. For example
btrfs_backref_iter_is_inline_ref.

> +
> +int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
> +
> +static inline void
> +btrfs_backref_iter_release(struct btrfs_backref_iter *iter)

Please keep the function type and name on the same line, arguments can
go to the next line.

> +{
> +	iter->bytenr = 0;
> +	iter->item_ptr = 0;
> +	iter->cur_ptr = 0;
> +	iter->end_ptr = 0;
> +	btrfs_release_path(iter->path);
> +	memset(&iter->cur_key, 0, sizeof(iter->cur_key));
> +}
> +
>  #endif
> -- 
> 2.26.0
Qu Wenruo April 1, 2020, 11:31 p.m. UTC | #2
On 2020/4/1 下午11:37, David Sterba wrote:
> On Thu, Mar 26, 2020 at 04:32:38PM +0800, Qu Wenruo wrote:
>> --- a/fs/btrfs/backref.h
>> +++ b/fs/btrfs/backref.h
>> @@ -78,4 +78,43 @@ struct prelim_ref {
>>  	u64 wanted_disk_byte;
>>  };
>>  
>> +/*
>> + * Helper structure to help iterate backrefs of one extent.
>> + *
>> + * Now it only supports iteration for tree block in commit root.
>> + */
>> +struct btrfs_backref_iter {
>> +	u64 bytenr;
>> +	struct btrfs_path *path;
>> +	struct btrfs_fs_info *fs_info;
>> +	struct btrfs_key cur_key;
>> +	u32 item_ptr;
>> +	u32 cur_ptr;
>> +	u32 end_ptr;
>> +};
>> +
>> +struct btrfs_backref_iter *btrfs_backref_iter_alloc(
>> +		struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
>> +
>> +static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
>> +{
>> +	if (!iter)
>> +		return;
>> +	btrfs_free_path(iter->path);
>> +	kfree(iter);
>> +}
> 
> Why do you make so many functions static inline? It makes sense for some
> of them but in the following patches there are functions that are either
> too big (so when they're inlined it bloats the asm) or called
> infrequently so the inlining does not bring much. Code in header files
> should be kept to minimum.

As most of them meet the requirement for either too small, or too
infrequently called.

> 
> There are also functions not used anywhere else than in backref.c so
> they don't need to be exported for now. For example
> btrfs_backref_iter_is_inline_ref.

But it's used in later patches, thus I exported them to avoid re-export
them later.

> 
>> +
>> +int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
>> +
>> +static inline void
>> +btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
> 
> Please keep the function type and name on the same line, arguments can
> go to the next line.

Forgot this one...

Do I need to resend?

Thanks,
Qu

> 
>> +{
>> +	iter->bytenr = 0;
>> +	iter->item_ptr = 0;
>> +	iter->cur_ptr = 0;
>> +	iter->end_ptr = 0;
>> +	btrfs_release_path(iter->path);
>> +	memset(&iter->cur_key, 0, sizeof(iter->cur_key));
>> +}
>> +
>>  #endif
>> -- 
>> 2.26.0
David Sterba April 2, 2020, 1:01 a.m. UTC | #3
On Thu, Apr 02, 2020 at 07:31:28AM +0800, Qu Wenruo wrote:
> On 2020/4/1 下午11:37, David Sterba wrote:
> > On Thu, Mar 26, 2020 at 04:32:38PM +0800, Qu Wenruo wrote:
> >> --- a/fs/btrfs/backref.h
> >> +++ b/fs/btrfs/backref.h
> >> @@ -78,4 +78,43 @@ struct prelim_ref {
> >>  	u64 wanted_disk_byte;
> >>  };
> >>  
> >> +/*
> >> + * Helper structure to help iterate backrefs of one extent.
> >> + *
> >> + * Now it only supports iteration for tree block in commit root.
> >> + */
> >> +struct btrfs_backref_iter {
> >> +	u64 bytenr;
> >> +	struct btrfs_path *path;
> >> +	struct btrfs_fs_info *fs_info;
> >> +	struct btrfs_key cur_key;
> >> +	u32 item_ptr;
> >> +	u32 cur_ptr;
> >> +	u32 end_ptr;
> >> +};
> >> +
> >> +struct btrfs_backref_iter *btrfs_backref_iter_alloc(
> >> +		struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
> >> +
> >> +static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
> >> +{
> >> +	if (!iter)
> >> +		return;
> >> +	btrfs_free_path(iter->path);
> >> +	kfree(iter);
> >> +}
> > 
> > Why do you make so many functions static inline? It makes sense for some
> > of them but in the following patches there are functions that are either
> > too big (so when they're inlined it bloats the asm) or called
> > infrequently so the inlining does not bring much. Code in header files
> > should be kept to minimum.
> 
> As most of them meet the requirement for either too small, or too
> infrequently called.

So the rules or recommendations I use to decide if a function should be
static inline:

* it's like macro with type checking
* results into a few instructions (where few is like 3-6)
* the function is good for code readability, like for a helper that does
  some checks and returns a result, or derefernces a few pointers, and
  the function name is self explaining
* there should be some performance reason where the function call would
  be too costly, eg. if the function is on a hot path or in a loop

And all of that can be irrelevant if the compiler does some fancy
optimization, like function cloning where it keeps one copy intact
that's for the public interface and then inline other copies, possibly
applying more optimizations eg. based on parameters or some analysis
that splits function to hot and cold parts.

Unless we find some suboptimal result of compilation that could be fixed
by static inlines, I tend to not use them besides the trivial cases that
help code readability.

> > There are also functions not used anywhere else than in backref.c so
> > they don't need to be exported for now. For example
> > btrfs_backref_iter_is_inline_ref.
> 
> But it's used in later patches, thus I exported them to avoid re-export
> them later.

I grepped the whole branch with the backref cache and assumed that if
you introduce a function in the cleanup part, it would be used in the
other one. But btrfs_backref_iter_is_inline_ref wasn't.

> >> +int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
> >> +
> >> +static inline void
> >> +btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
> > 
> > Please keep the function type and name on the same line, arguments can
> > go to the next line.
> 
> Forgot this one...
> 
> Do I need to resend?

I fix such things when applying the patches so for that only reason it's
not necessary to resend.
Qu Wenruo April 2, 2020, 1:27 a.m. UTC | #4
On 2020/4/2 上午9:01, David Sterba wrote:
> On Thu, Apr 02, 2020 at 07:31:28AM +0800, Qu Wenruo wrote:
>> On 2020/4/1 下午11:37, David Sterba wrote:
>>> On Thu, Mar 26, 2020 at 04:32:38PM +0800, Qu Wenruo wrote:
>>>> --- a/fs/btrfs/backref.h
>>>> +++ b/fs/btrfs/backref.h
>>>> @@ -78,4 +78,43 @@ struct prelim_ref {
>>>>  	u64 wanted_disk_byte;
>>>>  };
>>>>  
>>>> +/*
>>>> + * Helper structure to help iterate backrefs of one extent.
>>>> + *
>>>> + * Now it only supports iteration for tree block in commit root.
>>>> + */
>>>> +struct btrfs_backref_iter {
>>>> +	u64 bytenr;
>>>> +	struct btrfs_path *path;
>>>> +	struct btrfs_fs_info *fs_info;
>>>> +	struct btrfs_key cur_key;
>>>> +	u32 item_ptr;
>>>> +	u32 cur_ptr;
>>>> +	u32 end_ptr;
>>>> +};
>>>> +
>>>> +struct btrfs_backref_iter *btrfs_backref_iter_alloc(
>>>> +		struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
>>>> +
>>>> +static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
>>>> +{
>>>> +	if (!iter)
>>>> +		return;
>>>> +	btrfs_free_path(iter->path);
>>>> +	kfree(iter);
>>>> +}
>>>
>>> Why do you make so many functions static inline? It makes sense for some
>>> of them but in the following patches there are functions that are either
>>> too big (so when they're inlined it bloats the asm) or called
>>> infrequently so the inlining does not bring much. Code in header files
>>> should be kept to minimum.
>>
>> As most of them meet the requirement for either too small, or too
>> infrequently called.
> 
> So the rules or recommendations I use to decide if a function should be
> static inline:
> 
> * it's like macro with type checking
> * results into a few instructions (where few is like 3-6)
> * the function is good for code readability, like for a helper that does
>   some checks and returns a result, or derefernces a few pointers, and
>   the function name is self explaining

After re-checking the backref.h, I still find most (if not all) of these
inlined functions meet these conditions.

They are short, mostly just doing basic check then free up some pointers.

> * there should be some performance reason where the function call would
>   be too costly, eg. if the function is on a hot path or in a loop

This is my main concern.

Compiler can easily "uninline" functions if they are only static inline
inside the same C file.
But if we define some function in headers, then it will always be a
function call, and can't be "uninlined"

That's my major concern, any exported functions is a barrier where
compiler can't do their best to optimize.

Thus to me, I agree condition 1~3, but no the 4th if we're handling
exported functions, as it's a single directional optimization.

If there is some proof (I don't believe though) that, compiler can
uninline/inline such exported functions, then I'm completely happy to
make them regular functions.

> 
> And all of that can be irrelevant if the compiler does some fancy
> optimization, like function cloning where it keeps one copy intact
> that's for the public interface and then inline other copies, possibly
> applying more optimizations eg. based on parameters or some analysis
> that splits function to hot and cold parts.
> 
> Unless we find some suboptimal result of compilation that could be fixed
> by static inlines, I tend to not use them besides the trivial cases that
> help code readability.

But these functions are small, self explaining, thus rarely get read
that much.

> 
>>> There are also functions not used anywhere else than in backref.c so
>>> they don't need to be exported for now. For example
>>> btrfs_backref_iter_is_inline_ref.
>>
>> But it's used in later patches, thus I exported them to avoid re-export
>> them later.
> 
> I grepped the whole branch with the backref cache and assumed that if
> you introduce a function in the cleanup part, it would be used in the
> other one. But btrfs_backref_iter_is_inline_ref wasn't.

Oh, you're right. That function is only temporary used.
It's introduced in patch 02, then utilized in patch 03, where the major
backref cache handling is still in relocation.c.

Then in patch 29, the backref cache code got moved to backref.c, then no
one utilize that function outside of backref.c, thus it no longer needs
to be exported.

Would you mind to unexport it in patch 29 too?

Thanks,
Qu

> 
>>>> +int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
>>>> +
>>>> +static inline void
>>>> +btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
>>>
>>> Please keep the function type and name on the same line, arguments can
>>> go to the next line.
>>
>> Forgot this one...
>>
>> Do I need to resend?
> 
> I fix such things when applying the patches so for that only reason it's
> not necessary to resend.
>
diff mbox series

Patch

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 9c380e7edf62..b27e90e362d6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2295,3 +2295,113 @@  void free_ipath(struct inode_fs_paths *ipath)
 	kvfree(ipath->fspath);
 	kfree(ipath);
 }
+
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
+		struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
+{
+	struct btrfs_backref_iter *ret;
+
+	ret = kzalloc(sizeof(*ret), gfp_flag);
+	if (!ret)
+		return NULL;
+
+	ret->path = btrfs_alloc_path();
+	if (!ret) {
+		kfree(ret);
+		return NULL;
+	}
+
+	/* Current backref iterator only supports iteration in commit root */
+	ret->path->search_commit_root = 1;
+	ret->path->skip_locking = 1;
+	ret->fs_info = fs_info;
+
+	return ret;
+}
+
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
+{
+	struct btrfs_fs_info *fs_info = iter->fs_info;
+	struct btrfs_path *path = iter->path;
+	struct btrfs_extent_item *ei;
+	struct btrfs_key key;
+	int ret;
+
+	key.objectid = bytenr;
+	key.type = BTRFS_METADATA_ITEM_KEY;
+	key.offset = (u64)-1;
+	iter->bytenr = bytenr;
+
+	ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+	if (ret < 0)
+		return ret;
+	if (ret == 0) {
+		ret = -EUCLEAN;
+		goto release;
+	}
+	if (path->slots[0] == 0) {
+		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+		ret = -EUCLEAN;
+		goto release;
+	}
+	path->slots[0]--;
+
+	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+	if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
+	     key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
+		ret = -ENOENT;
+		goto release;
+	}
+	memcpy(&iter->cur_key, &key, sizeof(key));
+	iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+						    path->slots[0]);
+	iter->end_ptr = (u32)(iter->item_ptr +
+			btrfs_item_size_nr(path->nodes[0], path->slots[0]));
+	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			    struct btrfs_extent_item);
+
+	/*
+	 * Only support iteration on tree backref yet.
+	 *
+	 * This is extra precaution for non skinny-metadata, where
+	 * EXTENT_ITEM is also used for tree blocks, that we can only use
+	 * extent flags to determine if it's a tree block.
+	 */
+	if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
+		ret = -ENOTSUPP;
+		goto release;
+	}
+	iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
+
+	/* If there is no inline backref, go search for keyed backref */
+	if (iter->cur_ptr >= iter->end_ptr) {
+		ret = btrfs_next_item(fs_info->extent_root, path);
+
+		/* No inline nor keyed ref */
+		if (ret > 0) {
+			ret = -ENOENT;
+			goto release;
+		}
+		if (ret < 0)
+			goto release;
+
+		btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
+				path->slots[0]);
+		if (iter->cur_key.objectid != bytenr ||
+		    (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
+		     iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
+			ret = -ENOENT;
+			goto release;
+		}
+		iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
+							   path->slots[0]);
+		iter->item_ptr = iter->cur_ptr;
+		iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr(
+				      path->nodes[0], path->slots[0]));
+	}
+
+	return 0;
+release:
+	btrfs_backref_iter_release(iter);
+	return ret;
+}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 723d6da99114..4217e9019f4a 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -78,4 +78,43 @@  struct prelim_ref {
 	u64 wanted_disk_byte;
 };
 
+/*
+ * Helper structure to help iterate backrefs of one extent.
+ *
+ * Now it only supports iteration for tree block in commit root.
+ */
+struct btrfs_backref_iter {
+	u64 bytenr;
+	struct btrfs_path *path;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_key cur_key;
+	u32 item_ptr;
+	u32 cur_ptr;
+	u32 end_ptr;
+};
+
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
+		struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
+
+static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
+{
+	if (!iter)
+		return;
+	btrfs_free_path(iter->path);
+	kfree(iter);
+}
+
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
+
+static inline void
+btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
+{
+	iter->bytenr = 0;
+	iter->item_ptr = 0;
+	iter->cur_ptr = 0;
+	iter->end_ptr = 0;
+	btrfs_release_path(iter->path);
+	memset(&iter->cur_key, 0, sizeof(iter->cur_key));
+}
+
 #endif