[1/3] drm: Track drm_mm nodes with an interval tree
diff mbox

Message ID 1444128791-6327-1-git-send-email-chris@chris-wilson.co.uk
State New
Headers show

Commit Message

Chris Wilson Oct. 6, 2015, 10:53 a.m. UTC
In addition to the last-in/first-out stack for accessing drm_mm nodes,
we occasionally and in the future often want to find a drm_mm_node by an
address. To do so efficiently we need to track the nodes in an interval
tree - lookups for a particular address will then be O(lg(N)), where N
is the number of nodes in the range manager as opposed to O(N).
Insertion however gains an extra O(lg(N)) step for all nodes
irrespective of whether the interval tree is in use. For future i915
patches, eliminating the linear walk is a significant improvement.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/Kconfig  |  1 +
 drivers/gpu/drm/drm_mm.c | 71 ++++++++++++++++++++++++++++++++----------------
 include/drm/drm_mm.h     |  5 ++++
 3 files changed, 54 insertions(+), 23 deletions(-)

Comments

Daniel Vetter Oct. 6, 2015, 11:11 a.m. UTC | #1
On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> In addition to the last-in/first-out stack for accessing drm_mm nodes,
> we occasionally and in the future often want to find a drm_mm_node by an
> address. To do so efficiently we need to track the nodes in an interval
> tree - lookups for a particular address will then be O(lg(N)), where N
> is the number of nodes in the range manager as opposed to O(N).
> Insertion however gains an extra O(lg(N)) step for all nodes
> irrespective of whether the interval tree is in use. For future i915
> patches, eliminating the linear walk is a significant improvement.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: dri-devel@lists.freedesktop.org

For the vma manager David Herrman put the interval tree outside of drm_mm.
Whichever way we pick, but I think we should be consistent about this.
-Daniel

> ---
>  drivers/gpu/drm/Kconfig  |  1 +
>  drivers/gpu/drm/drm_mm.c | 71 ++++++++++++++++++++++++++++++++----------------
>  include/drm/drm_mm.h     |  5 ++++
>  3 files changed, 54 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 06ae5008c5ed..e25050a5a843 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -12,6 +12,7 @@ menuconfig DRM
>  	select I2C
>  	select I2C_ALGOBIT
>  	select DMA_SHARED_BUFFER
> +	select INTERVAL_TREE
>  	help
>  	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
>  	  introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 04de6fd88f8c..e3acd860f738 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>  	INIT_LIST_HEAD(&node->hole_stack);
>  	list_add(&node->node_list, &hole_node->node_list);
>  
> +	node->it.start = node->start;
> +	node->it.last = node->start + size - 1;
> +	interval_tree_insert(&node->it, &mm->interval_tree);
> +
>  	BUG_ON(node->start + node->size > adj_end);
>  
>  	node->hole_follows = 0;
> @@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>   */
>  int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
>  {
> -	struct drm_mm_node *hole;
>  	u64 end = node->start + node->size;
> -	u64 hole_start;
> -	u64 hole_end;
> -
> -	BUG_ON(node == NULL);
> +	struct interval_tree_node *it;
> +	struct drm_mm_node *hole;
> +	u64 hole_start, hole_end;
>  
>  	/* Find the relevant hole to add our node to */
> -	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
> -		if (hole_start > node->start || hole_end < end)
> -			continue;
> +	it = interval_tree_iter_first(&mm->interval_tree,
> +				      node->start, (u64)-1);
> +	if (it == NULL) {
> +		hole = list_last_entry(&mm->head_node.node_list,
> +				       struct drm_mm_node, node_list);
> +	} else {
> +		hole = container_of(it, typeof(*hole), it);
> +		if (hole->start <= node->start)
> +			return -ENOSPC;
> +
> +		hole = list_last_entry(&hole->node_list,
> +				       struct drm_mm_node, node_list);
> +	}
>  
> -		node->mm = mm;
> -		node->allocated = 1;
> +	hole_start = drm_mm_hole_node_start(hole);
> +	hole_end = drm_mm_hole_node_end(hole);
> +	if (hole_start > node->start || hole_end < end)
> +		return -ENOSPC;
>  
> -		INIT_LIST_HEAD(&node->hole_stack);
> -		list_add(&node->node_list, &hole->node_list);
> +	node->mm = mm;
> +	node->allocated = 1;
>  
> -		if (node->start == hole_start) {
> -			hole->hole_follows = 0;
> -			list_del_init(&hole->hole_stack);
> -		}
> +	INIT_LIST_HEAD(&node->hole_stack);
> +	list_add(&node->node_list, &hole->node_list);
>  
> -		node->hole_follows = 0;
> -		if (end != hole_end) {
> -			list_add(&node->hole_stack, &mm->hole_stack);
> -			node->hole_follows = 1;
> -		}
> +	node->it.start = node->start;
> +	node->it.last = node->start + node->size - 1;
> +	interval_tree_insert(&node->it, &mm->interval_tree);
>  
> -		return 0;
> +	if (node->start == hole_start) {
> +		hole->hole_follows = 0;
> +		list_del_init(&hole->hole_stack);
>  	}
>  
> -	return -ENOSPC;
> +	node->hole_follows = 0;
> +	if (end != hole_end) {
> +		list_add(&node->hole_stack, &mm->hole_stack);
> +		node->hole_follows = 1;
> +	}
> +
> +	return 0;
>  }
>  EXPORT_SYMBOL(drm_mm_reserve_node);
>  
> @@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
>  	INIT_LIST_HEAD(&node->hole_stack);
>  	list_add(&node->node_list, &hole_node->node_list);
>  
> +	node->it.start = node->start;
> +	node->it.last = node->start + node->size - 1;
> +	interval_tree_insert(&node->it, &mm->interval_tree);
> +
>  	BUG_ON(node->start < start);
>  	BUG_ON(node->start < adj_start);
>  	BUG_ON(node->start + node->size > adj_end);
> @@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
>  	} else
>  		list_move(&prev_node->hole_stack, &mm->hole_stack);
>  
> +	interval_tree_remove(&node->it, &mm->interval_tree);
>  	list_del(&node->node_list);
>  	node->allocated = 0;
>  }
> @@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
>  	mm->head_node.size = start - mm->head_node.start;
>  	list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
>  
> +	mm->interval_tree = RB_ROOT;
> +
>  	mm->color_adjust = NULL;
>  }
>  EXPORT_SYMBOL(drm_mm_init);
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 0de6290df4da..6d531fe529bf 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -37,6 +37,7 @@
>   * Generic range manager structs
>   */
>  #include <linux/bug.h>
> +#include <linux/interval_tree.h>
>  #include <linux/kernel.h>
>  #include <linux/list.h>
>  #include <linux/spinlock.h>
> @@ -61,6 +62,7 @@ enum drm_mm_allocator_flags {
>  struct drm_mm_node {
>  	struct list_head node_list;
>  	struct list_head hole_stack;
> +	struct interval_tree_node it;
>  	unsigned hole_follows : 1;
>  	unsigned scanned_block : 1;
>  	unsigned scanned_prev_free : 1;
> @@ -79,6 +81,9 @@ struct drm_mm {
>  	/* head_node.node_list is the list of all memory nodes, ordered
>  	 * according to the (increasing) start address of the memory node. */
>  	struct drm_mm_node head_node;
> +	/* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
> +	struct rb_root interval_tree;
> +
>  	unsigned int scan_check_range : 1;
>  	unsigned scan_alignment;
>  	unsigned long scan_color;
> -- 
> 2.6.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 6, 2015, 11:19 a.m. UTC | #2
On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> > we occasionally and in the future often want to find a drm_mm_node by an
> > address. To do so efficiently we need to track the nodes in an interval
> > tree - lookups for a particular address will then be O(lg(N)), where N
> > is the number of nodes in the range manager as opposed to O(N).
> > Insertion however gains an extra O(lg(N)) step for all nodes
> > irrespective of whether the interval tree is in use. For future i915
> > patches, eliminating the linear walk is a significant improvement.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: dri-devel@lists.freedesktop.org
> 
> For the vma manager David Herrman put the interval tree outside of drm_mm.
> Whichever way we pick, but I think we should be consistent about this.

Given that the basis of this patch is that functionality exposed by
drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
there is a measurable perf degradation if we switch over from the mru
stack to using fixed addresses) it makes sense to improve that
functionality. The question is then why the drm_vma_manager didn't use
and improve the existing functionality...
-Chris
Daniel Vetter Oct. 6, 2015, 12:01 p.m. UTC | #3
On Tue, Oct 06, 2015 at 12:19:43PM +0100, Chris Wilson wrote:
> On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> > On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> > > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> > > we occasionally and in the future often want to find a drm_mm_node by an
> > > address. To do so efficiently we need to track the nodes in an interval
> > > tree - lookups for a particular address will then be O(lg(N)), where N
> > > is the number of nodes in the range manager as opposed to O(N).
> > > Insertion however gains an extra O(lg(N)) step for all nodes
> > > irrespective of whether the interval tree is in use. For future i915
> > > patches, eliminating the linear walk is a significant improvement.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: dri-devel@lists.freedesktop.org
> > 
> > For the vma manager David Herrman put the interval tree outside of drm_mm.
> > Whichever way we pick, but I think we should be consistent about this.
> 
> Given that the basis of this patch is that functionality exposed by
> drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> there is a measurable perf degradation if we switch over from the mru
> stack to using fixed addresses) it makes sense to improve that
> functionality. The question is then why the drm_vma_manager didn't use
> and improve the existing functionality...

Yeah I'm trying to volunteer you to add a lookup-function and rework the
vma-manager ;-)
-Daniel
David Herrmann Oct. 7, 2015, 10:22 a.m. UTC | #4
Hi

On Tue, Oct 6, 2015 at 1:19 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
>> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
>> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
>> > we occasionally and in the future often want to find a drm_mm_node by an
>> > address. To do so efficiently we need to track the nodes in an interval
>> > tree - lookups for a particular address will then be O(lg(N)), where N
>> > is the number of nodes in the range manager as opposed to O(N).
>> > Insertion however gains an extra O(lg(N)) step for all nodes
>> > irrespective of whether the interval tree is in use. For future i915
>> > patches, eliminating the linear walk is a significant improvement.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: dri-devel@lists.freedesktop.org
>>
>> For the vma manager David Herrman put the interval tree outside of drm_mm.
>> Whichever way we pick, but I think we should be consistent about this.
>
> Given that the basis of this patch is that functionality exposed by
> drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> there is a measurable perf degradation if we switch over from the mru
> stack to using fixed addresses) it makes sense to improve that
> functionality. The question is then why the drm_vma_manager didn't use
> and improve the existing functionality...

I didn't want to slow down drm_mm operations, so I kept it separate. I
don't mind if it is merged into drm_mm. It'd be trivial to make the
vma-manager use it (only on the top-level, though).

Thanks
David
Thomas Daniel Oct. 16, 2015, 8:54 a.m. UTC | #5
Hi,

> -----Original Message-----

> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of

> David Herrmann

> Sent: Wednesday, October 7, 2015 11:23 AM

> To: Chris Wilson; Daniel Vetter; Intel Graphics Development; dri-

> devel@lists.freedesktop.org

> Subject: Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval

> tree

> 

> Hi

> 

> On Tue, Oct 6, 2015 at 1:19 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:

> > On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:

> >> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:

> >> > In addition to the last-in/first-out stack for accessing drm_mm nodes,

> >> > we occasionally and in the future often want to find a drm_mm_node by an

> >> > address. To do so efficiently we need to track the nodes in an interval

> >> > tree - lookups for a particular address will then be O(lg(N)), where N

> >> > is the number of nodes in the range manager as opposed to O(N).

> >> > Insertion however gains an extra O(lg(N)) step for all nodes

> >> > irrespective of whether the interval tree is in use. For future i915

> >> > patches, eliminating the linear walk is a significant improvement.

> >> >

> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

> >> > Cc: dri-devel@lists.freedesktop.org

> >>

> >> For the vma manager David Herrman put the interval tree outside of

> drm_mm.

> >> Whichever way we pick, but I think we should be consistent about this.

> >

> > Given that the basis of this patch is that functionality exposed by

> > drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.

> > there is a measurable perf degradation if we switch over from the mru

> > stack to using fixed addresses) it makes sense to improve that

> > functionality. The question is then why the drm_vma_manager didn't use

> > and improve the existing functionality...

> 

> I didn't want to slow down drm_mm operations, so I kept it separate. I

> don't mind if it is merged into drm_mm. It'd be trivial to make the

> vma-manager use it (only on the top-level, though).

> 

> Thanks

> David


Is there a conclusion to this discussion?  I'm under increasing pressure to get the i915 soft-pinning merged and Chris's latest version depends on this interval tree.

I've been told to post a new rebase of the version which doesn't use the interval tree if not.

Cheers,
Thomas.
Daniel Vetter Oct. 16, 2015, 2:26 p.m. UTC | #6
On Fri, Oct 16, 2015 at 08:54:20AM +0000, Daniel, Thomas wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> > David Herrmann
> > Sent: Wednesday, October 7, 2015 11:23 AM
> > To: Chris Wilson; Daniel Vetter; Intel Graphics Development; dri-
> > devel@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval
> > tree
> > 
> > Hi
> > 
> > On Tue, Oct 6, 2015 at 1:19 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> > >> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> > >> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> > >> > we occasionally and in the future often want to find a drm_mm_node by an
> > >> > address. To do so efficiently we need to track the nodes in an interval
> > >> > tree - lookups for a particular address will then be O(lg(N)), where N
> > >> > is the number of nodes in the range manager as opposed to O(N).
> > >> > Insertion however gains an extra O(lg(N)) step for all nodes
> > >> > irrespective of whether the interval tree is in use. For future i915
> > >> > patches, eliminating the linear walk is a significant improvement.
> > >> >
> > >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >> > Cc: dri-devel@lists.freedesktop.org
> > >>
> > >> For the vma manager David Herrman put the interval tree outside of
> > drm_mm.
> > >> Whichever way we pick, but I think we should be consistent about this.
> > >
> > > Given that the basis of this patch is that functionality exposed by
> > > drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> > > there is a measurable perf degradation if we switch over from the mru
> > > stack to using fixed addresses) it makes sense to improve that
> > > functionality. The question is then why the drm_vma_manager didn't use
> > > and improve the existing functionality...
> > 
> > I didn't want to slow down drm_mm operations, so I kept it separate. I
> > don't mind if it is merged into drm_mm. It'd be trivial to make the
> > vma-manager use it (only on the top-level, though).
> > 
> > Thanks
> > David
> 
> Is there a conclusion to this discussion?  I'm under increasing pressure to get the i915 soft-pinning merged and Chris's latest version depends on this interval tree.
> 
> I've been told to post a new rebase of the version which doesn't use the interval tree if not.

In my opiniong pushing the interval tree into drm_mm.c makes tons of
sense, so that we don't have to duplicate this between i915 and the vma
manager.
-Daniel
Daniel Vetter Oct. 21, 2015, 3:11 p.m. UTC | #7
On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> In addition to the last-in/first-out stack for accessing drm_mm nodes,
> we occasionally and in the future often want to find a drm_mm_node by an
> address. To do so efficiently we need to track the nodes in an interval
> tree - lookups for a particular address will then be O(lg(N)), where N
> is the number of nodes in the range manager as opposed to O(N).
> Insertion however gains an extra O(lg(N)) step for all nodes
> irrespective of whether the interval tree is in use. For future i915
> patches, eliminating the linear walk is a significant improvement.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: dri-devel@lists.freedesktop.org

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

I guess for simpler merge ordering we can just pull this into drm-intel
and patch up the vma manager (just need to drop a lot of code and adjust
the search to use the drm_mm internal_tree nodes) later on.
-Daniel

> ---
>  drivers/gpu/drm/Kconfig  |  1 +
>  drivers/gpu/drm/drm_mm.c | 71 ++++++++++++++++++++++++++++++++----------------
>  include/drm/drm_mm.h     |  5 ++++
>  3 files changed, 54 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 06ae5008c5ed..e25050a5a843 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -12,6 +12,7 @@ menuconfig DRM
>  	select I2C
>  	select I2C_ALGOBIT
>  	select DMA_SHARED_BUFFER
> +	select INTERVAL_TREE
>  	help
>  	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
>  	  introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 04de6fd88f8c..e3acd860f738 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>  	INIT_LIST_HEAD(&node->hole_stack);
>  	list_add(&node->node_list, &hole_node->node_list);
>  
> +	node->it.start = node->start;
> +	node->it.last = node->start + size - 1;
> +	interval_tree_insert(&node->it, &mm->interval_tree);
> +
>  	BUG_ON(node->start + node->size > adj_end);
>  
>  	node->hole_follows = 0;
> @@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
>   */
>  int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
>  {
> -	struct drm_mm_node *hole;
>  	u64 end = node->start + node->size;
> -	u64 hole_start;
> -	u64 hole_end;
> -
> -	BUG_ON(node == NULL);
> +	struct interval_tree_node *it;
> +	struct drm_mm_node *hole;
> +	u64 hole_start, hole_end;
>  
>  	/* Find the relevant hole to add our node to */
> -	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
> -		if (hole_start > node->start || hole_end < end)
> -			continue;
> +	it = interval_tree_iter_first(&mm->interval_tree,
> +				      node->start, (u64)-1);
> +	if (it == NULL) {
> +		hole = list_last_entry(&mm->head_node.node_list,
> +				       struct drm_mm_node, node_list);
> +	} else {
> +		hole = container_of(it, typeof(*hole), it);
> +		if (hole->start <= node->start)
> +			return -ENOSPC;
> +
> +		hole = list_last_entry(&hole->node_list,
> +				       struct drm_mm_node, node_list);
> +	}
>  
> -		node->mm = mm;
> -		node->allocated = 1;
> +	hole_start = drm_mm_hole_node_start(hole);
> +	hole_end = drm_mm_hole_node_end(hole);
> +	if (hole_start > node->start || hole_end < end)
> +		return -ENOSPC;
>  
> -		INIT_LIST_HEAD(&node->hole_stack);
> -		list_add(&node->node_list, &hole->node_list);
> +	node->mm = mm;
> +	node->allocated = 1;
>  
> -		if (node->start == hole_start) {
> -			hole->hole_follows = 0;
> -			list_del_init(&hole->hole_stack);
> -		}
> +	INIT_LIST_HEAD(&node->hole_stack);
> +	list_add(&node->node_list, &hole->node_list);
>  
> -		node->hole_follows = 0;
> -		if (end != hole_end) {
> -			list_add(&node->hole_stack, &mm->hole_stack);
> -			node->hole_follows = 1;
> -		}
> +	node->it.start = node->start;
> +	node->it.last = node->start + node->size - 1;
> +	interval_tree_insert(&node->it, &mm->interval_tree);
>  
> -		return 0;
> +	if (node->start == hole_start) {
> +		hole->hole_follows = 0;
> +		list_del_init(&hole->hole_stack);
>  	}
>  
> -	return -ENOSPC;
> +	node->hole_follows = 0;
> +	if (end != hole_end) {
> +		list_add(&node->hole_stack, &mm->hole_stack);
> +		node->hole_follows = 1;
> +	}
> +
> +	return 0;
>  }
>  EXPORT_SYMBOL(drm_mm_reserve_node);
>  
> @@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
>  	INIT_LIST_HEAD(&node->hole_stack);
>  	list_add(&node->node_list, &hole_node->node_list);
>  
> +	node->it.start = node->start;
> +	node->it.last = node->start + node->size - 1;
> +	interval_tree_insert(&node->it, &mm->interval_tree);
> +
>  	BUG_ON(node->start < start);
>  	BUG_ON(node->start < adj_start);
>  	BUG_ON(node->start + node->size > adj_end);
> @@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
>  	} else
>  		list_move(&prev_node->hole_stack, &mm->hole_stack);
>  
> +	interval_tree_remove(&node->it, &mm->interval_tree);
>  	list_del(&node->node_list);
>  	node->allocated = 0;
>  }
> @@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
>  	mm->head_node.size = start - mm->head_node.start;
>  	list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
>  
> +	mm->interval_tree = RB_ROOT;
> +
>  	mm->color_adjust = NULL;
>  }
>  EXPORT_SYMBOL(drm_mm_init);
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 0de6290df4da..6d531fe529bf 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -37,6 +37,7 @@
>   * Generic range manager structs
>   */
>  #include <linux/bug.h>
> +#include <linux/interval_tree.h>
>  #include <linux/kernel.h>
>  #include <linux/list.h>
>  #include <linux/spinlock.h>
> @@ -61,6 +62,7 @@ enum drm_mm_allocator_flags {
>  struct drm_mm_node {
>  	struct list_head node_list;
>  	struct list_head hole_stack;
> +	struct interval_tree_node it;
>  	unsigned hole_follows : 1;
>  	unsigned scanned_block : 1;
>  	unsigned scanned_prev_free : 1;
> @@ -79,6 +81,9 @@ struct drm_mm {
>  	/* head_node.node_list is the list of all memory nodes, ordered
>  	 * according to the (increasing) start address of the memory node. */
>  	struct drm_mm_node head_node;
> +	/* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
> +	struct rb_root interval_tree;
> +
>  	unsigned int scan_check_range : 1;
>  	unsigned scan_alignment;
>  	unsigned long scan_color;
> -- 
> 2.6.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
David Herrmann Oct. 21, 2015, 3:14 p.m. UTC | #8
Hi

On Wed, Oct 21, 2015 at 5:11 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
>> In addition to the last-in/first-out stack for accessing drm_mm nodes,
>> we occasionally and in the future often want to find a drm_mm_node by an
>> address. To do so efficiently we need to track the nodes in an interval
>> tree - lookups for a particular address will then be O(lg(N)), where N
>> is the number of nodes in the range manager as opposed to O(N).
>> Insertion however gains an extra O(lg(N)) step for all nodes
>> irrespective of whether the interval tree is in use. For future i915
>> patches, eliminating the linear walk is a significant improvement.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: dri-devel@lists.freedesktop.org
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> I guess for simpler merge ordering we can just pull this into drm-intel
> and patch up the vma manager (just need to drop a lot of code and adjust
> the search to use the drm_mm internal_tree nodes) later on.

Agreed.

Acked-by: David Herrmann <dh.herrmann@gmail.com>

Thanks
David
Daniel Vetter Oct. 22, 2015, 8:07 a.m. UTC | #9
On Wed, Oct 21, 2015 at 5:14 PM, David Herrmann <dh.herrmann@gmail.com> wrote:
> On Wed, Oct 21, 2015 at 5:11 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
>>> In addition to the last-in/first-out stack for accessing drm_mm nodes,
>>> we occasionally and in the future often want to find a drm_mm_node by an
>>> address. To do so efficiently we need to track the nodes in an interval
>>> tree - lookups for a particular address will then be O(lg(N)), where N
>>> is the number of nodes in the range manager as opposed to O(N).
>>> Insertion however gains an extra O(lg(N)) step for all nodes
>>> irrespective of whether the interval tree is in use. For future i915
>>> patches, eliminating the linear walk is a significant improvement.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: dri-devel@lists.freedesktop.org
>>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>
>> I guess for simpler merge ordering we can just pull this into drm-intel
>> and patch up the vma manager (just need to drop a lot of code and adjust
>> the search to use the drm_mm internal_tree nodes) later on.
>
> Agreed.
>
> Acked-by: David Herrmann <dh.herrmann@gmail.com>

Also has Dave's irc-ack for pushing through drm-intel.
-Daniel

Patch
diff mbox

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 06ae5008c5ed..e25050a5a843 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -12,6 +12,7 @@  menuconfig DRM
 	select I2C
 	select I2C_ALGOBIT
 	select DMA_SHARED_BUFFER
+	select INTERVAL_TREE
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 04de6fd88f8c..e3acd860f738 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -153,6 +153,10 @@  static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	INIT_LIST_HEAD(&node->hole_stack);
 	list_add(&node->node_list, &hole_node->node_list);
 
+	node->it.start = node->start;
+	node->it.last = node->start + size - 1;
+	interval_tree_insert(&node->it, &mm->interval_tree);
+
 	BUG_ON(node->start + node->size > adj_end);
 
 	node->hole_follows = 0;
@@ -178,39 +182,53 @@  static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
  */
 int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 {
-	struct drm_mm_node *hole;
 	u64 end = node->start + node->size;
-	u64 hole_start;
-	u64 hole_end;
-
-	BUG_ON(node == NULL);
+	struct interval_tree_node *it;
+	struct drm_mm_node *hole;
+	u64 hole_start, hole_end;
 
 	/* Find the relevant hole to add our node to */
-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		if (hole_start > node->start || hole_end < end)
-			continue;
+	it = interval_tree_iter_first(&mm->interval_tree,
+				      node->start, (u64)-1);
+	if (it == NULL) {
+		hole = list_last_entry(&mm->head_node.node_list,
+				       struct drm_mm_node, node_list);
+	} else {
+		hole = container_of(it, typeof(*hole), it);
+		if (hole->start <= node->start)
+			return -ENOSPC;
+
+		hole = list_last_entry(&hole->node_list,
+				       struct drm_mm_node, node_list);
+	}
 
-		node->mm = mm;
-		node->allocated = 1;
+	hole_start = drm_mm_hole_node_start(hole);
+	hole_end = drm_mm_hole_node_end(hole);
+	if (hole_start > node->start || hole_end < end)
+		return -ENOSPC;
 
-		INIT_LIST_HEAD(&node->hole_stack);
-		list_add(&node->node_list, &hole->node_list);
+	node->mm = mm;
+	node->allocated = 1;
 
-		if (node->start == hole_start) {
-			hole->hole_follows = 0;
-			list_del_init(&hole->hole_stack);
-		}
+	INIT_LIST_HEAD(&node->hole_stack);
+	list_add(&node->node_list, &hole->node_list);
 
-		node->hole_follows = 0;
-		if (end != hole_end) {
-			list_add(&node->hole_stack, &mm->hole_stack);
-			node->hole_follows = 1;
-		}
+	node->it.start = node->start;
+	node->it.last = node->start + node->size - 1;
+	interval_tree_insert(&node->it, &mm->interval_tree);
 
-		return 0;
+	if (node->start == hole_start) {
+		hole->hole_follows = 0;
+		list_del_init(&hole->hole_stack);
 	}
 
-	return -ENOSPC;
+	node->hole_follows = 0;
+	if (end != hole_end) {
+		list_add(&node->hole_stack, &mm->hole_stack);
+		node->hole_follows = 1;
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(drm_mm_reserve_node);
 
@@ -300,6 +318,10 @@  static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 	INIT_LIST_HEAD(&node->hole_stack);
 	list_add(&node->node_list, &hole_node->node_list);
 
+	node->it.start = node->start;
+	node->it.last = node->start + node->size - 1;
+	interval_tree_insert(&node->it, &mm->interval_tree);
+
 	BUG_ON(node->start < start);
 	BUG_ON(node->start < adj_start);
 	BUG_ON(node->start + node->size > adj_end);
@@ -388,6 +410,7 @@  void drm_mm_remove_node(struct drm_mm_node *node)
 	} else
 		list_move(&prev_node->hole_stack, &mm->hole_stack);
 
+	interval_tree_remove(&node->it, &mm->interval_tree);
 	list_del(&node->node_list);
 	node->allocated = 0;
 }
@@ -756,6 +779,8 @@  void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
 	mm->head_node.size = start - mm->head_node.start;
 	list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
 
+	mm->interval_tree = RB_ROOT;
+
 	mm->color_adjust = NULL;
 }
 EXPORT_SYMBOL(drm_mm_init);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 0de6290df4da..6d531fe529bf 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -37,6 +37,7 @@ 
  * Generic range manager structs
  */
 #include <linux/bug.h>
+#include <linux/interval_tree.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -61,6 +62,7 @@  enum drm_mm_allocator_flags {
 struct drm_mm_node {
 	struct list_head node_list;
 	struct list_head hole_stack;
+	struct interval_tree_node it;
 	unsigned hole_follows : 1;
 	unsigned scanned_block : 1;
 	unsigned scanned_prev_free : 1;
@@ -79,6 +81,9 @@  struct drm_mm {
 	/* head_node.node_list is the list of all memory nodes, ordered
 	 * according to the (increasing) start address of the memory node. */
 	struct drm_mm_node head_node;
+	/* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
+	struct rb_root interval_tree;
+
 	unsigned int scan_check_range : 1;
 	unsigned scan_alignment;
 	unsigned long scan_color;