diff mbox series

[v2,04/19] btrfs: remove the open-code to read disk-key

Message ID 20200915053532.63279-5-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add read-only support for subpage sector size | expand

Commit Message

Qu Wenruo Sept. 15, 2020, 5:35 a.m. UTC
generic_bin_search() distinguishes between reading a key which doesn't
cross a page and one which does. However this distinction is not
necessary since read_extent_buffer handles both cases transparently.

Just use read_extent_buffer to streamline the code.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/ctree.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

Comments

Nikolay Borisov Sept. 15, 2020, 8:36 a.m. UTC | #1
On 15.09.20 г. 8:35 ч., Qu Wenruo wrote:
> generic_bin_search() distinguishes between reading a key which doesn't
> cross a page and one which does. However this distinction is not
> necessary since read_extent_buffer handles both cases transparently.
> 
> Just use read_extent_buffer to streamline the code.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Johannes Thumshirn Sept. 15, 2020, 8:40 a.m. UTC | #2
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
David Sterba Sept. 16, 2020, 4:01 p.m. UTC | #3
On Tue, Sep 15, 2020 at 01:35:17PM +0800, Qu Wenruo wrote:
> generic_bin_search() distinguishes between reading a key which doesn't
> cross a page and one which does. However this distinction is not
> necessary since read_extent_buffer handles both cases transparently.
> 
> Just use read_extent_buffer to streamline the code.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/ctree.c | 13 ++-----------
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index cd1cd673bc0b..e204e1320745 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -1697,7 +1697,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
>  	}
>  
>  	while (low < high) {
> -		unsigned long oip;
>  		unsigned long offset;
>  		struct btrfs_disk_key *tmp;
>  		struct btrfs_disk_key unaligned;
> @@ -1705,17 +1704,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
>  
>  		mid = (low + high) / 2;
>  		offset = p + mid * item_size;
> -		oip = offset_in_page(offset);
>  
> -		if (oip + key_size <= PAGE_SIZE) {
> -			const unsigned long idx = offset >> PAGE_SHIFT;
> -			char *kaddr = page_address(eb->pages[idx]);
> -
> -			tmp = (struct btrfs_disk_key *)(kaddr + oip);
> -		} else {
> -			read_extent_buffer(eb, &unaligned, offset, key_size);
> -			tmp = &unaligned;
> -		}
> +		read_extent_buffer(eb, &unaligned, offset, key_size);
> +		tmp = &unaligned;

Reading from the first page is a performance optimization on systems
with 4K pages, ie. the majority. I'm not in favor removing it just to
make the code look nicer.
Qu Wenruo Sept. 17, 2020, 8:02 a.m. UTC | #4
On 2020/9/17 上午12:01, David Sterba wrote:
> On Tue, Sep 15, 2020 at 01:35:17PM +0800, Qu Wenruo wrote:
>> generic_bin_search() distinguishes between reading a key which doesn't
>> cross a page and one which does. However this distinction is not
>> necessary since read_extent_buffer handles both cases transparently.
>>
>> Just use read_extent_buffer to streamline the code.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>  fs/btrfs/ctree.c | 13 ++-----------
>>  1 file changed, 2 insertions(+), 11 deletions(-)
>>
>> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
>> index cd1cd673bc0b..e204e1320745 100644
>> --- a/fs/btrfs/ctree.c
>> +++ b/fs/btrfs/ctree.c
>> @@ -1697,7 +1697,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
>>  	}
>>  
>>  	while (low < high) {
>> -		unsigned long oip;
>>  		unsigned long offset;
>>  		struct btrfs_disk_key *tmp;
>>  		struct btrfs_disk_key unaligned;
>> @@ -1705,17 +1704,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
>>  
>>  		mid = (low + high) / 2;
>>  		offset = p + mid * item_size;
>> -		oip = offset_in_page(offset);
>>  
>> -		if (oip + key_size <= PAGE_SIZE) {
>> -			const unsigned long idx = offset >> PAGE_SHIFT;
>> -			char *kaddr = page_address(eb->pages[idx]);
>> -
>> -			tmp = (struct btrfs_disk_key *)(kaddr + oip);
>> -		} else {
>> -			read_extent_buffer(eb, &unaligned, offset, key_size);
>> -			tmp = &unaligned;
>> -		}
>> +		read_extent_buffer(eb, &unaligned, offset, key_size);
>> +		tmp = &unaligned;
> 
> Reading from the first page is a performance optimization on systems
> with 4K pages, ie. the majority. I'm not in favor removing it just to
> make the code look nicer.
> 

For 4K system, with the optimization it only saves one
read_extent_buffer() call cost.

In read_extent_buffer() it's still doing the same thing.

Or we will need to manually call get_eb_page_offset() here to make it
work for subpage.

I wonder if it really worthy.

Thanks,
Qu
David Sterba Sept. 17, 2020, 12:37 p.m. UTC | #5
On Thu, Sep 17, 2020 at 04:02:37PM +0800, Qu Wenruo wrote:
> On 2020/9/17 上午12:01, David Sterba wrote:
> > On Tue, Sep 15, 2020 at 01:35:17PM +0800, Qu Wenruo wrote:
> >> generic_bin_search() distinguishes between reading a key which doesn't
> >> cross a page and one which does. However this distinction is not
> >> necessary since read_extent_buffer handles both cases transparently.
> >>
> >> Just use read_extent_buffer to streamline the code.
> >>
> >> Signed-off-by: Qu Wenruo <wqu@suse.com>
> >> ---
> >>  fs/btrfs/ctree.c | 13 ++-----------
> >>  1 file changed, 2 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> >> index cd1cd673bc0b..e204e1320745 100644
> >> --- a/fs/btrfs/ctree.c
> >> +++ b/fs/btrfs/ctree.c
> >> @@ -1697,7 +1697,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
> >>  	}
> >>  
> >>  	while (low < high) {
> >> -		unsigned long oip;
> >>  		unsigned long offset;
> >>  		struct btrfs_disk_key *tmp;
> >>  		struct btrfs_disk_key unaligned;
> >> @@ -1705,17 +1704,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
> >>  
> >>  		mid = (low + high) / 2;
> >>  		offset = p + mid * item_size;
> >> -		oip = offset_in_page(offset);
> >>  
> >> -		if (oip + key_size <= PAGE_SIZE) {
> >> -			const unsigned long idx = offset >> PAGE_SHIFT;
> >> -			char *kaddr = page_address(eb->pages[idx]);
> >> -
> >> -			tmp = (struct btrfs_disk_key *)(kaddr + oip);
> >> -		} else {
> >> -			read_extent_buffer(eb, &unaligned, offset, key_size);
> >> -			tmp = &unaligned;
> >> -		}
> >> +		read_extent_buffer(eb, &unaligned, offset, key_size);
> >> +		tmp = &unaligned;
> > 
> > Reading from the first page is a performance optimization on systems
> > with 4K pages, ie. the majority. I'm not in favor removing it just to
> > make the code look nicer.
> 
> For 4K system, with the optimization it only saves one
> read_extent_buffer() call cost.

This evaluation is wrong, you missed several things that
generic_bin_search and read_extent_buffer do.

generic_bin_search is called very often, each search slot so
optimization is worth here

read_extent_buffer is used _only_ for keys that cross page boundary, so
we need to read the bytes in two steps and this is wrapped into a
function that we call in a limited number of cases

In all other cases, when the whole key is contained in the page the call
is inline in generic_bin_search, ie. no function call overhead

> Or we will need to manually call get_eb_page_offset() here to make it
> work for subpage.

For nodesize that is smaller than PAGE_SIZE there's no page crossing at
all so using read_extent_buffer would be making things worse.
Qu Wenruo Sept. 17, 2020, 1:15 p.m. UTC | #6
On 2020/9/17 下午8:37, David Sterba wrote:
> On Thu, Sep 17, 2020 at 04:02:37PM +0800, Qu Wenruo wrote:
>> On 2020/9/17 上午12:01, David Sterba wrote:
>>> On Tue, Sep 15, 2020 at 01:35:17PM +0800, Qu Wenruo wrote:
>>>> generic_bin_search() distinguishes between reading a key which doesn't
>>>> cross a page and one which does. However this distinction is not
>>>> necessary since read_extent_buffer handles both cases transparently.
>>>>
>>>> Just use read_extent_buffer to streamline the code.
>>>>
>>>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>>>> ---
>>>>  fs/btrfs/ctree.c | 13 ++-----------
>>>>  1 file changed, 2 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
>>>> index cd1cd673bc0b..e204e1320745 100644
>>>> --- a/fs/btrfs/ctree.c
>>>> +++ b/fs/btrfs/ctree.c
>>>> @@ -1697,7 +1697,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
>>>>  	}
>>>>  
>>>>  	while (low < high) {
>>>> -		unsigned long oip;
>>>>  		unsigned long offset;
>>>>  		struct btrfs_disk_key *tmp;
>>>>  		struct btrfs_disk_key unaligned;
>>>> @@ -1705,17 +1704,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
>>>>  
>>>>  		mid = (low + high) / 2;
>>>>  		offset = p + mid * item_size;
>>>> -		oip = offset_in_page(offset);
>>>>  
>>>> -		if (oip + key_size <= PAGE_SIZE) {
>>>> -			const unsigned long idx = offset >> PAGE_SHIFT;
>>>> -			char *kaddr = page_address(eb->pages[idx]);
>>>> -
>>>> -			tmp = (struct btrfs_disk_key *)(kaddr + oip);
>>>> -		} else {
>>>> -			read_extent_buffer(eb, &unaligned, offset, key_size);
>>>> -			tmp = &unaligned;
>>>> -		}
>>>> +		read_extent_buffer(eb, &unaligned, offset, key_size);
>>>> +		tmp = &unaligned;
>>>
>>> Reading from the first page is a performance optimization on systems
>>> with 4K pages, ie. the majority. I'm not in favor removing it just to
>>> make the code look nicer.
>>
>> For 4K system, with the optimization it only saves one
>> read_extent_buffer() call cost.
> 
> This evaluation is wrong, you missed several things that
> generic_bin_search and read_extent_buffer do.
> 
> generic_bin_search is called very often, each search slot so
> optimization is worth here
> 
> read_extent_buffer is used _only_ for keys that cross page boundary, so
> we need to read the bytes in two steps and this is wrapped into a
> function that we call in a limited number of cases

Then to me, the better solution is to make read_extent_buffer() to be
split into two part.

Part 1 to handle the same page read, which should be made inline.
The part 1 should be small enough, as it only involves the in-page
offset calculation, which is also already done in current
generic_bin_search.

Then part 2 to handle the cross page case, and that part can be a
function call.

Personally speaking, even generic_bin_search() is a hot-path, I still
don't believe it's worthy, as read_extent_buffer() itself is also
frequently called in other locations, and I never see a special handling
for it in any other location.

Anyway, I will use the get_eb_page_offset()/get_eb_page_index() macros
here first, or subpage will be completely screwed.

And then try to use that two-part solution for read_extent_buffer().

Thanks,
Qu

> 
> In all other cases, when the whole key is contained in the page the call
> is inline in generic_bin_search, ie. no function call overhead
> 
>> Or we will need to manually call get_eb_page_offset() here to make it
>> work for subpage.
> 
> For nodesize that is smaller than PAGE_SIZE there's no page crossing at
> all so using read_extent_buffer would be making things worse.
>
David Sterba Sept. 17, 2020, 10:41 p.m. UTC | #7
On Thu, Sep 17, 2020 at 09:15:31PM +0800, Qu Wenruo wrote:
> Then to me, the better solution is to make read_extent_buffer() to be
> split into two part.
> 
> Part 1 to handle the same page read, which should be made inline.
> The part 1 should be small enough, as it only involves the in-page
> offset calculation, which is also already done in current
> generic_bin_search.

Sounds easy, the result is awful. The inlined part 1 is not that small
and explodes for each call of read_extent_buffer. Explodes in code size,
increases stack consumption of all callers.

> Then part 2 to handle the cross page case, and that part can be a
> function call.
> 
> Personally speaking, even generic_bin_search() is a hot-path, I still
> don't believe it's worthy, as read_extent_buffer() itself is also
> frequently called in other locations, and I never see a special handling
> for it in any other location.

The usage pattern is different, many other location calls
read_extent_buffer just once to read some data and process. There are
very few functions that call it in a loop.

OTOH, bin_search jumps over the sorted array of node keys, it does not
even have to read the actual key for comparison because it understands
the on-disk key and just sets the pointer. Calling read_extent_buffer
for each of them will just waste cycles copying it to the tmp variable.

> Anyway, I will use the get_eb_page_offset()/get_eb_page_index() macros
> here first, or subpage will be completely screwed.
> 
> And then try to use that two-part solution for read_extent_buffer().

Some numbers from a release build, patch below:

object size:

   text    data     bss     dec     hex filename
1099317   17972   14912 1132201  1146a9 pre/btrfs.ko
1165930   17972   14912 1198814  124ade post/btrfs.ko

DELTA: +66613

Stack usage meter:

send_clone                                                        +16 (128 -> 144)
btree_readpage_end_io_hook                                        +40 (168 -> 208)
btrfs_lookup_csum                                                  +8 (104 -> 112)
find_free_dev_extent_start                                         +8 (144 -> 152)
__btrfs_commit_inode_delayed_items                                 +8 (168 -> 176)
btrfs_exclude_logged_extents                                       +8 (72 -> 80)
btrfs_set_inode_index                                             +16 (88 -> 104)
btrfs_shrink_device                                                +8 (160 -> 168)
find_parent_nodes                                                  -8 (312 -> 304)
__add_to_free_space_tree                                          +16 (112 -> 128)
btrfs_truncate_inode_items                                         -8 (360 -> 352)
ref_get_fields                                                    +16 (48 -> 64)
btrfs_qgroup_trace_leaf_items                                      +8 (80 -> 88)
did_create_dir                                                     +8 (112 -> 120)
free_space_next_bitmap                                            +32 (56 -> 88)
btrfs_lookup_bio_sums                                             +24 (216 -> 240)
btrfs_read_qgroup_config                                           +8 (120 -> 128)
btrfs_check_ref_name_override                                     +16 (152 -> 168)
btrfs_uuid_tree_iterate                                            +8 (128 -> 136)
log_dir_items                                                     +16 (160 -> 176)
btrfs_ioctl_send                                                  +16 (216 -> 232)
btrfs_get_parent                                                  +16 (80 -> 96)
__btrfs_inc_extent_ref                                             +8 (128 -> 136)
btrfs_unlink_subvol                                               +16 (144 -> 160)
btrfs_del_csums                                                    +8 (184 -> 192)
btrfs_mount                                                       -16 (184 -> 168)
generic_bin_search                                                 +8 (104 -> 112)
btrfs_uuid_tree_add                                               +16 (128 -> 144)
free_space_test_bit                                                +8 (72 -> 80)
btrfs_init_dev_stats                                              +16 (160 -> 176)
btrfs_read_chunk_tree                                             +48 (208 -> 256)
process_all_refs                                                  +16 (104 -> 120)
... and this goes on

LOST (80):
        btrfs_ioctl_setflags                                       80

NEW (208):
        __read_extent_buffer                                       24
        get_order                                                   8
        btrfs_search_path_in_tree_user                            176
LOST/NEW DELTA:     +128
PRE/POST DELTA:    +1944

---

Here's the patch. I'm now quite sure we don't want to split
read_extent_buffer and keep the bin_search optimization as is.

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index afac70ef0cc5..77c1df5771bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5584,7 +5584,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 	return ret;
 }
 
-static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
+bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
 			    unsigned long len)
 {
 	btrfs_warn(eb->fs_info,
@@ -5595,45 +5595,17 @@ static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
 	return true;
 }
 
-/*
- * Check if the [start, start + len) range is valid before reading/writing
- * the eb.
- * NOTE: @start and @len are offset inside the eb, not logical address.
- *
- * Caller should not touch the dst/src memory if this function returns error.
- */
-static inline int check_eb_range(const struct extent_buffer *eb,
-				 unsigned long start, unsigned long len)
-{
-	unsigned long offset;
-
-	/* start, start + len should not go beyond eb->len nor overflow */
-	if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
-		return report_eb_range(eb, start, len);
-
-	return false;
-}
-
-void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
+void __read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 			unsigned long start, unsigned long len)
 {
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
+	unsigned long offset = offset_in_page(start);
 	char *dst = (char *)dstv;
 	unsigned long i = start >> PAGE_SHIFT;
 
-	if (check_eb_range(eb, start, len))
-		return;
-
-	offset = offset_in_page(start);
-
 	while (len > 0) {
-		page = eb->pages[i];
+		const char *kaddr = page_address(eb->pages[i]);
+		size_t cur = min(len, (PAGE_SIZE - offset));
 
-		cur = min(len, (PAGE_SIZE - offset));
-		kaddr = page_address(page);
 		memcpy(dst, kaddr + offset, cur);
 
 		dst += cur;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3bbc25b816ea..7ea53794f927 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -241,9 +241,57 @@ static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
 
 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 			 unsigned long start, unsigned long len);
+/* NEW */
+
+bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
+			    unsigned long len);
+void __read_extent_buffer(const struct extent_buffer *eb, void *dst,
+			unsigned long start,
+			unsigned long len);
+/*
+ * Check if the [start, start + len) range is valid before reading/writing
+ * the eb.
+ * NOTE: @start and @len are offset inside the eb, not logical address.
+ *
+ * Caller should not touch the dst/src memory if this function returns error.
+ */
+static inline int check_eb_range(const struct extent_buffer *eb,
+				 unsigned long start, unsigned long len)
+{
+	unsigned long offset;
+
+	/* start, start + len should not go beyond eb->len nor overflow */
+	if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
+		return report_eb_range(eb, start, len);
+
+	return false;
+}
+
+static inline void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
+				      unsigned long start, unsigned long len)
+{
+	const unsigned long oip = offset_in_page(start);
+
+	if (check_eb_range(eb, start, len))
+		return;
+
+	if (likely(oip + len <= PAGE_SIZE)) {
+		const unsigned long idx = start >> PAGE_SHIFT;
+		const char *kaddr = page_address(eb->pages[idx]);
+
+		memcpy(dstv, kaddr + oip, len);
+		return;
+	}
+
+	__read_extent_buffer(eb, dstv, start, len);
+}
+
+/* END */
+/*
 void read_extent_buffer(const struct extent_buffer *eb, void *dst,
 			unsigned long start,
 			unsigned long len);
+*/
 int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
 				       void __user *dst, unsigned long start,
 				       unsigned long len);
Qu Wenruo Sept. 17, 2020, 11:26 p.m. UTC | #8
On 2020/9/18 上午6:41, David Sterba wrote:
> On Thu, Sep 17, 2020 at 09:15:31PM +0800, Qu Wenruo wrote:
>> Then to me, the better solution is to make read_extent_buffer() to be
>> split into two part.
>>
>> Part 1 to handle the same page read, which should be made inline.
>> The part 1 should be small enough, as it only involves the in-page
>> offset calculation, which is also already done in current
>> generic_bin_search.
> 
> Sounds easy, the result is awful. The inlined part 1 is not that small
> and explodes for each call of read_extent_buffer. Explodes in code size,
> increases stack consumption of all callers.
> 
>> Then part 2 to handle the cross page case, and that part can be a
>> function call.
>>
>> Personally speaking, even generic_bin_search() is a hot-path, I still
>> don't believe it's worthy, as read_extent_buffer() itself is also
>> frequently called in other locations, and I never see a special handling
>> for it in any other location.
> 
> The usage pattern is different, many other location calls
> read_extent_buffer just once to read some data and process. There are
> very few functions that call it in a loop.
> 
> OTOH, bin_search jumps over the sorted array of node keys, it does not
> even have to read the actual key for comparison because it understands
> the on-disk key and just sets the pointer. Calling read_extent_buffer
> for each of them will just waste cycles copying it to the tmp variable.
> 
>> Anyway, I will use the get_eb_page_offset()/get_eb_page_index() macros
>> here first, or subpage will be completely screwed.
>>
>> And then try to use that two-part solution for read_extent_buffer().
> 
> Some numbers from a release build, patch below:
> 
> object size:
> 
>    text    data     bss     dec     hex filename
> 1099317   17972   14912 1132201  1146a9 pre/btrfs.ko
> 1165930   17972   14912 1198814  124ade post/btrfs.ko
> 
> DELTA: +66613
> 
> Stack usage meter:
> 
> send_clone                                                        +16 (128 -> 144)
> btree_readpage_end_io_hook                                        +40 (168 -> 208)
> btrfs_lookup_csum                                                  +8 (104 -> 112)
> find_free_dev_extent_start                                         +8 (144 -> 152)
> __btrfs_commit_inode_delayed_items                                 +8 (168 -> 176)
> btrfs_exclude_logged_extents                                       +8 (72 -> 80)
> btrfs_set_inode_index                                             +16 (88 -> 104)
> btrfs_shrink_device                                                +8 (160 -> 168)
> find_parent_nodes                                                  -8 (312 -> 304)
> __add_to_free_space_tree                                          +16 (112 -> 128)
> btrfs_truncate_inode_items                                         -8 (360 -> 352)
> ref_get_fields                                                    +16 (48 -> 64)
> btrfs_qgroup_trace_leaf_items                                      +8 (80 -> 88)
> did_create_dir                                                     +8 (112 -> 120)
> free_space_next_bitmap                                            +32 (56 -> 88)
> btrfs_lookup_bio_sums                                             +24 (216 -> 240)
> btrfs_read_qgroup_config                                           +8 (120 -> 128)
> btrfs_check_ref_name_override                                     +16 (152 -> 168)
> btrfs_uuid_tree_iterate                                            +8 (128 -> 136)
> log_dir_items                                                     +16 (160 -> 176)
> btrfs_ioctl_send                                                  +16 (216 -> 232)
> btrfs_get_parent                                                  +16 (80 -> 96)
> __btrfs_inc_extent_ref                                             +8 (128 -> 136)
> btrfs_unlink_subvol                                               +16 (144 -> 160)
> btrfs_del_csums                                                    +8 (184 -> 192)
> btrfs_mount                                                       -16 (184 -> 168)
> generic_bin_search                                                 +8 (104 -> 112)
> btrfs_uuid_tree_add                                               +16 (128 -> 144)
> free_space_test_bit                                                +8 (72 -> 80)
> btrfs_init_dev_stats                                              +16 (160 -> 176)
> btrfs_read_chunk_tree                                             +48 (208 -> 256)
> process_all_refs                                                  +16 (104 -> 120)
> ... and this goes on
> 
> LOST (80):
>         btrfs_ioctl_setflags                                       80
> 
> NEW (208):
>         __read_extent_buffer                                       24
>         get_order                                                   8
>         btrfs_search_path_in_tree_user                            176
> LOST/NEW DELTA:     +128
> PRE/POST DELTA:    +1944
> 
> ---
> 
> Here's the patch. I'm now quite sure we don't want to split
> read_extent_buffer and keep the bin_search optimization as is.

Fine, I'll change the patch to use get_eb_page_offset/index() in
generic_bin_search().

> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index afac70ef0cc5..77c1df5771bf 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -5584,7 +5584,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
>  	return ret;
>  }
>  
> -static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
> +bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
>  			    unsigned long len)
>  {
>  	btrfs_warn(eb->fs_info,
> @@ -5595,45 +5595,17 @@ static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
>  	return true;
>  }
>  
> -/*
> - * Check if the [start, start + len) range is valid before reading/writing
> - * the eb.
> - * NOTE: @start and @len are offset inside the eb, not logical address.
> - *
> - * Caller should not touch the dst/src memory if this function returns error.
> - */
> -static inline int check_eb_range(const struct extent_buffer *eb,
> -				 unsigned long start, unsigned long len)
> -{
> -	unsigned long offset;
> -
> -	/* start, start + len should not go beyond eb->len nor overflow */
> -	if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
> -		return report_eb_range(eb, start, len);
> -
> -	return false;
> -}
> -
> -void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
> +void __read_extent_buffer(const struct extent_buffer *eb, void *dstv,
>  			unsigned long start, unsigned long len)
>  {
> -	size_t cur;
> -	size_t offset;
> -	struct page *page;
> -	char *kaddr;
> +	unsigned long offset = offset_in_page(start);
>  	char *dst = (char *)dstv;
>  	unsigned long i = start >> PAGE_SHIFT;
>  
> -	if (check_eb_range(eb, start, len))
> -		return;
> -
> -	offset = offset_in_page(start);
> -
>  	while (len > 0) {
> -		page = eb->pages[i];
> +		const char *kaddr = page_address(eb->pages[i]);
> +		size_t cur = min(len, (PAGE_SIZE - offset));
>  
> -		cur = min(len, (PAGE_SIZE - offset));
> -		kaddr = page_address(page);
>  		memcpy(dst, kaddr + offset, cur);
>  
>  		dst += cur;
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 3bbc25b816ea..7ea53794f927 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -241,9 +241,57 @@ static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
>  
>  int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
>  			 unsigned long start, unsigned long len);
> +/* NEW */
> +
> +bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
> +			    unsigned long len);
> +void __read_extent_buffer(const struct extent_buffer *eb, void *dst,
> +			unsigned long start,
> +			unsigned long len);
> +/*
> + * Check if the [start, start + len) range is valid before reading/writing
> + * the eb.
> + * NOTE: @start and @len are offset inside the eb, not logical address.
> + *
> + * Caller should not touch the dst/src memory if this function returns error.
> + */
> +static inline int check_eb_range(const struct extent_buffer *eb,
> +				 unsigned long start, unsigned long len)
> +{
> +	unsigned long offset;
> +
> +	/* start, start + len should not go beyond eb->len nor overflow */
> +	if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
> +		return report_eb_range(eb, start, len);
> +
> +	return false;
> +}
> +
> +static inline void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
> +				      unsigned long start, unsigned long len)
> +{
> +	const unsigned long oip = offset_in_page(start);
> +
> +	if (check_eb_range(eb, start, len))
> +		return;
> +
> +	if (likely(oip + len <= PAGE_SIZE)) {
> +		const unsigned long idx = start >> PAGE_SHIFT;
> +		const char *kaddr = page_address(eb->pages[idx]);
> +
> +		memcpy(dstv, kaddr + oip, len);
> +		return;
> +	}
> +
> +	__read_extent_buffer(eb, dstv, start, len);
> +}
> +
> +/* END */
> +/*
>  void read_extent_buffer(const struct extent_buffer *eb, void *dst,
>  			unsigned long start,
>  			unsigned long len);
> +*/
>  int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
>  				       void __user *dst, unsigned long start,
>  				       unsigned long len);
>
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cd1cd673bc0b..e204e1320745 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1697,7 +1697,6 @@  static noinline int generic_bin_search(struct extent_buffer *eb,
 	}
 
 	while (low < high) {
-		unsigned long oip;
 		unsigned long offset;
 		struct btrfs_disk_key *tmp;
 		struct btrfs_disk_key unaligned;
@@ -1705,17 +1704,9 @@  static noinline int generic_bin_search(struct extent_buffer *eb,
 
 		mid = (low + high) / 2;
 		offset = p + mid * item_size;
-		oip = offset_in_page(offset);
 
-		if (oip + key_size <= PAGE_SIZE) {
-			const unsigned long idx = offset >> PAGE_SHIFT;
-			char *kaddr = page_address(eb->pages[idx]);
-
-			tmp = (struct btrfs_disk_key *)(kaddr + oip);
-		} else {
-			read_extent_buffer(eb, &unaligned, offset, key_size);
-			tmp = &unaligned;
-		}
+		read_extent_buffer(eb, &unaligned, offset, key_size);
+		tmp = &unaligned;
 
 		ret = comp_keys(tmp, key);