diff mbox series

mm/vmalloc: use rb_tree instead of list for vread() lookups

Message ID 20210208155303.10523-1-serapheim@delphix.com (mailing list archive)
State New, archived
Headers show
Series mm/vmalloc: use rb_tree instead of list for vread() lookups | expand

Commit Message

Serapheim Dimitropoulos Feb. 8, 2021, 3:53 p.m. UTC
vread() has been linearly searching vmap_area_list for looking up
vmalloc areas to read from. These same areas are also tracked by
a rb_tree (vmap_area_root) which offers logarithmic lookup.

This patch modifies vread() to use the rb_tree structure instead
of the list and the speedup for heavy /proc/kcore readers can
be pretty significant. Below are the wall clock measurements of
a Python application that leverages the drgn debugging library
to read and interpret data read from /proc/kcore.

Before the patch:
-----
$ time sudo sdb -e 'dbuf | head 2500 | wc'
(unsigned long)2500

real	0m21.128s
user	0m2.321s
sys	0m19.227s
-----

With the patch:
-----
$ time sudo sdb -e 'dbuf | head 2500 | wc'
(unsigned long)2500

real	0m1.870s
user	0m1.628s
sys	0m0.660s
-----

Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
---
 mm/vmalloc.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

Comments

Uladzislau Rezki Feb. 8, 2021, 7:01 p.m. UTC | #1
On Mon, Feb 08, 2021 at 03:53:03PM +0000, Serapheim Dimitropoulos wrote:
> vread() has been linearly searching vmap_area_list for looking up
> vmalloc areas to read from. These same areas are also tracked by
> a rb_tree (vmap_area_root) which offers logarithmic lookup.
> 
> This patch modifies vread() to use the rb_tree structure instead
> of the list and the speedup for heavy /proc/kcore readers can
> be pretty significant. Below are the wall clock measurements of
> a Python application that leverages the drgn debugging library
> to read and interpret data read from /proc/kcore.
> 
> Before the patch:
> -----
> $ time sudo sdb -e 'dbuf | head 2500 | wc'
> (unsigned long)2500
> 
> real	0m21.128s
> user	0m2.321s
> sys	0m19.227s
> -----
> 
> With the patch:
> -----
> $ time sudo sdb -e 'dbuf | head 2500 | wc'
> (unsigned long)2500
> 
> real	0m1.870s
> user	0m1.628s
> sys	0m0.660s
> -----
> 
> Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
> ---
>  mm/vmalloc.c | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 49ab9b6c001d..86343b879938 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -2851,6 +2851,7 @@ long vread(char *buf, char *addr, unsigned long count)
>  {
>  	struct vmap_area *va;
>  	struct vm_struct *vm;
> +	struct rb_node *node;
>  	char *vaddr, *buf_start = buf;
>  	unsigned long buflen = count;
>  	unsigned long n;
> @@ -2860,17 +2861,15 @@ long vread(char *buf, char *addr, unsigned long count)
>  		count = -(unsigned long) addr;
>  
>  	spin_lock(&vmap_area_lock);
> -	list_for_each_entry(va, &vmap_area_list, list) {
> -		if (!count)
> -			break;
> -
> +	va = __find_vmap_area((unsigned long)addr);
> +	if (!va)
> +		goto finished;
> +	while (count) {
>  		if (!va->vm)
> -			continue;
> +			goto next_node;
>  
>  		vm = va->vm;
>  		vaddr = (char *) vm->addr;
> -		if (addr >= vaddr + get_vm_area_size(vm))
> -			continue;
>  		while (addr < vaddr) {
>  			if (count == 0)
>  				goto finished;
> @@ -2889,6 +2888,12 @@ long vread(char *buf, char *addr, unsigned long count)
>  		buf += n;
>  		addr += n;
>  		count -= n;
> +
> +next_node:
> +		node = rb_next(&va->rb_node);
> +		if (!node)
> +			break;
> +		va = rb_entry(node, struct vmap_area, rb_node);
>
You can also improve it. Instead of rb_next() you can directly access
to a "next" element via "va->list" making it O(1) complexity.

--
Vlad Rezki
diff mbox series

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 49ab9b6c001d..86343b879938 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2851,6 +2851,7 @@  long vread(char *buf, char *addr, unsigned long count)
 {
 	struct vmap_area *va;
 	struct vm_struct *vm;
+	struct rb_node *node;
 	char *vaddr, *buf_start = buf;
 	unsigned long buflen = count;
 	unsigned long n;
@@ -2860,17 +2861,15 @@  long vread(char *buf, char *addr, unsigned long count)
 		count = -(unsigned long) addr;
 
 	spin_lock(&vmap_area_lock);
-	list_for_each_entry(va, &vmap_area_list, list) {
-		if (!count)
-			break;
-
+	va = __find_vmap_area((unsigned long)addr);
+	if (!va)
+		goto finished;
+	while (count) {
 		if (!va->vm)
-			continue;
+			goto next_node;
 
 		vm = va->vm;
 		vaddr = (char *) vm->addr;
-		if (addr >= vaddr + get_vm_area_size(vm))
-			continue;
 		while (addr < vaddr) {
 			if (count == 0)
 				goto finished;
@@ -2889,6 +2888,12 @@  long vread(char *buf, char *addr, unsigned long count)
 		buf += n;
 		addr += n;
 		count -= n;
+
+next_node:
+		node = rb_next(&va->rb_node);
+		if (!node)
+			break;
+		va = rb_entry(node, struct vmap_area, rb_node);
 	}
 finished:
 	spin_unlock(&vmap_area_lock);