diff mbox

[1/2] kvm tools: Add interval red-black tree helper

Message ID 1305615515-13913-1-git-send-email-levinsasha928@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sasha Levin May 17, 2011, 6:58 a.m. UTC
Interval rb-tree allows to directly store interval ranges
and quickly lookup an overlap with a single point or a range.

The helper is based on the kernel rb-tree implementation
(located in <linux/rbtree.h>) which alows for the augmention
of the classical rb-tree to be used as an interval tree.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 tools/kvm/Makefile                      |    1 +
 tools/kvm/include/kvm/interval-rbtree.h |   27 +++++++++
 tools/kvm/util/interval-rbtree.c        |   97 +++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/include/kvm/interval-rbtree.h
 create mode 100644 tools/kvm/util/interval-rbtree.c

Comments

Ingo Molnar May 17, 2011, 7:41 a.m. UTC | #1
* Sasha Levin <levinsasha928@gmail.com> wrote:

> Interval rb-tree allows to directly store interval ranges
> and quickly lookup an overlap with a single point or a range.
> 
> The helper is based on the kernel rb-tree implementation
> (located in <linux/rbtree.h>) which alows for the augmention
> of the classical rb-tree to be used as an interval tree.
> 
> Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> ---
>  tools/kvm/Makefile                      |    1 +
>  tools/kvm/include/kvm/interval-rbtree.h |   27 +++++++++
>  tools/kvm/util/interval-rbtree.c        |   97 +++++++++++++++++++++++++++++++

Small nit, please make it rbtree-interval.c - we try to name by using the 
higher-order (more generic) concept first.

So we name in a galaxy_planet_country_city_street hierarchical fashion, not in 
a street_city_country_planet_galaxy Polish fashion. This applies to function 
names as well.

> +struct rb_int_node {
> +	struct rb_node node;
> +	u64 low;
> +	u64 high;
> +
> +	/* max_high will store the highest high of it's 2 children. */
> +	u64 max_high;
> +};

Another small nit: please align the fields vertically, like we do it for all 
other structs.

> +#include <kvm/interval-rbtree.h>
> +#include <stdio.h>
> +#include <stdlib.h>

At first sight i dont think you really need the stdio.h and stlib.h includes - 
you added these while having debugging printfs in the code?

> +#define RB_INT(n) container_of(n, struct rb_int_node, node)

please use a name that matches the kernel's rb entry definition:

#define rb_entry(ptr, type, member) container_of(ptr, type, member)

i.e. lower-case and something like:

#define rb_int_entry(ptr) container_of(ptr, struct rb_int_node, node)

But it could also be shortened to rb_int() like you did - as long as it does 
not shout at us in all capitals :-)

> +struct rb_int_node *rb_int_search_single(struct rb_root *root, u64 p)

What does 'p' mean here? Please use more descriptive (but still short) names.

> +{
> +	struct rb_node *node = root->rb_node;
> +	struct rb_node *lowest = NULL;
> +
> +	while (node) {
> +		struct rb_int_node *cur = RB_INT(node);
> +		struct rb_int_node *left;
> +		if (node->rb_left)

newline after local variable definitions please, so that it stands out better 
visually.

> +	return container_of(lowest, struct rb_int_node, node);

Using the new definition above this could be written as rb_int(lowest) i guess?

> +static void update_node_max_high(struct rb_node *node, void *arg)
> +{
> +	u64 high_left = 0, high_right = 0;
> +	struct rb_int_node *data = RB_INT(node);
> +
> +	if (node->rb_left)
> +		high_left	= RB_INT(node->rb_left)->high;
> +	if (node->rb_right)
> +		high_right	= RB_INT(node->rb_right)->high;
> +
> +	data->max_high = (high_left > high_right) ? high_left : high_right;

that's

	data->max_high = max(high_left, high_right);

correct?

> +	if (data->max_high < RB_INT(node)->high)
> +		data->max_high = RB_INT(node)->high;

and that is:

	data->max_high = max(data->max_high, RB_INT(node)->high);

right?

so writing:

	data->max_high = max(high_left, high_right);
	data->max_high = max(data->max_high, rb_int(node)->high);

makes it more obvious that we are really just tracking a maximum of 3 possibilities here.

In fact it could be all written as just a simple:

static void update_node_max_high(struct rb_node *node, void *arg)
{
	struct rb_int_node *i_node = rb_int(node);

	i_node->max_high = i_node->high;

	if (node->rb_left)
		i_node->max_high = max(rb_int(node->rb_left)->high, i_node->max_high);
	if (node->rb_right)
		i_node->max_high = max(rb_int(node->rb_right)->high, i_node->max_high);

Which is even more obvious.

Note that i renamed 'data' to 'i_node' - it's much better to use a descriptive 
name for local variables not some opaque 'data' which could be anything.

> +int rb_int_insert(struct rb_root *root, struct rb_int_node *data)

i'd suggest to rename 'data' to i_node in other places as well. Here we'd want 
to use the name i_node_root i suspect.

(Note, naming it 'inode' would suck for us kernel developers :-)

> +	struct rb_node **new = &(root->rb_node), *parent = NULL;
> +
> +	while (*new) {
> +		struct rb_int_node *this	= RB_INT(*new);

So the rb-node iterator is named 'new', while the rb-int-node iterator is 
called 'this'? That does not make sense.

Please name them in a matching way: 'node' for the rb iterator, 'i_node' for 
the rb-int iterator, or so.

> +		int result			= data->low - this->low;

and then this would look like:

> +		int result			= i_node_root->low - i_node->low;

which suddenly gains semantic meaning even if all you see is this single line! 
That is what proper naming allows.

> +
> +		parent = *new;
> +		if (result < 0)
> +			new = &((*new)->rb_left);
> +		else if (result > 0)
> +			new = &((*new)->rb_right);
> +		else
> +			return 0;
> +	}
> +
> +	rb_link_node(&data->node, parent, new);
> +	rb_insert_color(&data->node, root);
> +
> +	rb_augment_insert(*new, update_node_max_high, NULL);
> +	return 1;
> +}
> +
> +void rb_int_erase(struct rb_root *root, struct rb_int_node *node)
> +{
> +	struct rb_node *deepest;
> +
> +	deepest = rb_augment_erase_begin(&node->node);
> +	rb_erase(&node->node, root);
> +	rb_augment_erase_end(deepest, update_node_max_high, NULL);
> +
> +}

Nit: superfluous newline at the end of the function.

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin May 17, 2011, 7:55 a.m. UTC | #2
On Tue, 2011-05-17 at 09:41 +0200, Ingo Molnar wrote:
> * Sasha Levin <levinsasha928@gmail.com> wrote:
> > +#include <kvm/interval-rbtree.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> 
> At first sight i dont think you really need the stdio.h and stlib.h includes - 
> you added these while having debugging printfs in the code?

We can drop either of them, but not both. Added it for size_t
definition.

> > +int rb_int_insert(struct rb_root *root, struct rb_int_node *data)
> 
> i'd suggest to rename 'data' to i_node in other places as well. Here we'd want 
> to use the name i_node_root i suspect.
> 
> (Note, naming it 'inode' would suck for us kernel developers :-)
> 
> > +	struct rb_node **new = &(root->rb_node), *parent = NULL;
> > +
> > +	while (*new) {
> > +		struct rb_int_node *this	= RB_INT(*new);
> 
> So the rb-node iterator is named 'new', while the rb-int-node iterator is 
> called 'this'? That does not make sense.

I actually took that bit from an example in Documentation/rbtree.txt of
how to write an insertion function :)

Maybe it's worth doing another rbtree.txt patch and cleaning up the
samples there?
Ingo Molnar May 17, 2011, 8:05 a.m. UTC | #3
* Sasha Levin <levinsasha928@gmail.com> wrote:

> On Tue, 2011-05-17 at 09:41 +0200, Ingo Molnar wrote:
> > * Sasha Levin <levinsasha928@gmail.com> wrote:
> > > +#include <kvm/interval-rbtree.h>
> > > +#include <stdio.h>
> > > +#include <stdlib.h>
> > 
> > At first sight i dont think you really need the stdio.h and stlib.h includes - 
> > you added these while having debugging printfs in the code?
> 
> We can drop either of them, but not both. Added it for size_t
> definition.

Oh, ok - sure!

> > So the rb-node iterator is named 'new', while the rb-int-node iterator is 
> > called 'this'? That does not make sense.
> 
> I actually took that bit from an example in Documentation/rbtree.txt of how 
> to write an insertion function :)

The kernel is far from perfect :-)

> Maybe it's worth doing another rbtree.txt patch and cleaning up the samples 
> there?

Yeah :-)

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pekka Enberg May 17, 2011, 8:18 a.m. UTC | #4
On Tue, May 17, 2011 at 11:05 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Sasha Levin <levinsasha928@gmail.com> wrote:
>
>> On Tue, 2011-05-17 at 09:41 +0200, Ingo Molnar wrote:
>> > * Sasha Levin <levinsasha928@gmail.com> wrote:
>> > > +#include <kvm/interval-rbtree.h>
>> > > +#include <stdio.h>
>> > > +#include <stdlib.h>
>> >
>> > At first sight i dont think you really need the stdio.h and stlib.h includes -
>> > you added these while having debugging printfs in the code?
>>
>> We can drop either of them, but not both. Added it for size_t
>> definition.
>
> Oh, ok - sure!

I think

#include <stddef.h>

is the right thing to do here, no?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin May 17, 2011, 8:21 a.m. UTC | #5
On Tue, 2011-05-17 at 11:18 +0300, Pekka Enberg wrote:
> On Tue, May 17, 2011 at 11:05 AM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > * Sasha Levin <levinsasha928@gmail.com> wrote:
> >
> >> On Tue, 2011-05-17 at 09:41 +0200, Ingo Molnar wrote:
> >> > * Sasha Levin <levinsasha928@gmail.com> wrote:
> >> > > +#include <kvm/interval-rbtree.h>
> >> > > +#include <stdio.h>
> >> > > +#include <stdlib.h>
> >> >
> >> > At first sight i dont think you really need the stdio.h and stlib.h includes -
> >> > you added these while having debugging printfs in the code?
> >>
> >> We can drop either of them, but not both. Added it for size_t
> >> definition.
> >
> > Oh, ok - sure!
> 
> I think
> 
> #include <stddef.h>
> 
> is the right thing to do here, no?

Yup, That'll be better.
diff mbox

Patch

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 64fdcbe..b175e18 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -42,6 +42,7 @@  OBJS    += mptable.o
 OBJS    += threadpool.o
 OBJS    += irq.o
 OBJS    += ../../lib/rbtree.o
+OBJS    += util/interval-rbtree.o
 
 DEPS	:= $(patsubst %.o,%.d,$(OBJS))
 
diff --git a/tools/kvm/include/kvm/interval-rbtree.h b/tools/kvm/include/kvm/interval-rbtree.h
new file mode 100644
index 0000000..13862b3
--- /dev/null
+++ b/tools/kvm/include/kvm/interval-rbtree.h
@@ -0,0 +1,27 @@ 
+#ifndef KVM__INTERVAL_RBTREE_H
+#define KVM__INTERVAL_RBTREE_H
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+#define RB_INT_INIT(l, h) (struct rb_int_node){.low = l, .high = h}
+
+struct rb_int_node {
+	struct rb_node node;
+	u64 low;
+	u64 high;
+
+	/* max_high will store the highest high of it's 2 children. */
+	u64 max_high;
+};
+
+/* Return the rb_int_node in which p is located. */
+struct rb_int_node *rb_int_search_single(struct rb_root *root, u64 p);
+
+/* Return the rb_int_node in which start:len is located. */
+struct rb_int_node *rb_int_search_range(struct rb_root *root, u64 low, u64 high);
+
+int rb_int_insert(struct rb_root *root, struct rb_int_node *data);
+void rb_int_erase(struct rb_root *root, struct rb_int_node *node);
+
+#endif
diff --git a/tools/kvm/util/interval-rbtree.c b/tools/kvm/util/interval-rbtree.c
new file mode 100644
index 0000000..8957c62
--- /dev/null
+++ b/tools/kvm/util/interval-rbtree.c
@@ -0,0 +1,97 @@ 
+#include <kvm/interval-rbtree.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define RB_INT(n) container_of(n, struct rb_int_node, node)
+
+struct rb_int_node *rb_int_search_single(struct rb_root *root, u64 p)
+{
+	struct rb_node *node = root->rb_node;
+	struct rb_node *lowest = NULL;
+
+	while (node) {
+		struct rb_int_node *cur = RB_INT(node);
+		struct rb_int_node *left;
+		if (node->rb_left)
+			left = RB_INT(node->rb_left);
+		if (node->rb_left && (RB_INT(node->rb_left)->max_high > p)) {
+			node = node->rb_left;
+		} else if (cur->low <= p && cur->high > p) {
+			lowest = node;
+			break;
+		} else if (p > cur->low) {
+			node = node->rb_right;
+		} else {
+			break;
+		}
+	}
+
+	if (lowest == NULL)
+		return NULL;
+
+	return container_of(lowest, struct rb_int_node, node);
+}
+
+struct rb_int_node *rb_int_search_range(struct rb_root *root, u64 low, u64 high)
+{
+	struct rb_int_node *range;
+
+	range = rb_int_search_single(root, low);
+	if (range == NULL)
+		return NULL;
+
+	/* We simply verify that 'high' is smaller than the end of the range where 'low' is located */
+	if (range->high < high)
+		return NULL;
+
+	return range;
+}
+
+static void update_node_max_high(struct rb_node *node, void *arg)
+{
+	u64 high_left = 0, high_right = 0;
+	struct rb_int_node *data = RB_INT(node);
+
+	if (node->rb_left)
+		high_left	= RB_INT(node->rb_left)->high;
+	if (node->rb_right)
+		high_right	= RB_INT(node->rb_right)->high;
+
+	data->max_high = (high_left > high_right) ? high_left : high_right;
+	if (data->max_high < RB_INT(node)->high)
+		data->max_high = RB_INT(node)->high;
+}
+
+int rb_int_insert(struct rb_root *root, struct rb_int_node *data)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	while (*new) {
+		struct rb_int_node *this	= RB_INT(*new);
+		int result			= data->low - this->low;
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			return 0;
+	}
+
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+
+	rb_augment_insert(*new, update_node_max_high, NULL);
+	return 1;
+}
+
+void rb_int_erase(struct rb_root *root, struct rb_int_node *node)
+{
+	struct rb_node *deepest;
+
+	deepest = rb_augment_erase_begin(&node->node);
+	rb_erase(&node->node, root);
+	rb_augment_erase_end(deepest, update_node_max_high, NULL);
+
+}