diff mbox series

[2/2] drm/mm: improve rb_hole_addr rbtree search

Message ID 20200519084436.91718-2-nirmoy.das@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/mm: expand rb_hole_addr augmented callbacks | expand

Commit Message

Nirmoy Das May 19, 2020, 8:44 a.m. UTC
Userspace can still abuse alignment while allocating buffer object
to slow down rb_hole_addr rbtree search. This patch improves search
in fragmented rb_hole_addr rbtree by storing maximum subtree hole
alignment and use that to skip a complete subtree if that subtree
can not fit a (size + alignment) request.

With this patch applied, 50k bo allocs of size 4k and alignment 9k
took ~0.24 sec on amdgpu, compared to  27 sec without it.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/drm_mm.c | 66 ++++++++++++++++++++++++++++++++++------
 include/drm/drm_mm.h     |  1 +
 2 files changed, 58 insertions(+), 9 deletions(-)

Comments

Nirmoy May 20, 2020, 4:28 p.m. UTC | #1
Hi Chris,

On 5/20/20 12:56 AM, Chris Wilson wrote:
> Quoting Nirmoy Das (2020-05-19 09:44:36)
>> +#define DRM_MM_ALIGN_SHIFT 6
>>   #define HOLE_SIZE(NODE) ((NODE)->hole_size)
>>   #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
>> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
>> +                              ffs(HOLE_ADDR(NODE)))
> Fwiw, max hole size of 58b, we would need to stop storing byte
> extents...


Can you please explain 2nd part of this statement.


>>   static struct drm_mm_node *
>> -next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>> +next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
>>   {
>>          struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
>>          struct drm_mm_node *right_node;
>> +       u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
>>   
>>          if (!entry)
>>                  return NULL;
>> @@ -513,6 +561,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size)
>>                  right_node = rb_entry(right_rb_node,
>>                                        struct drm_mm_node, rb_hole_addr);
>>                  if ((right_node->subtree_max_hole < size ||
>> +                    right_node->subtree_max_hole_align < req_align ||
> What was the point in storing the packed alignment if we are just
> searching for a hole big enough for (size + alignment)?

Yes, I realized this is not correct :/

Still thinking about a better solution to capture alignment into subtree 
elimination.


Regards,

Nirmoy

> -Chris
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&amp;data=02%7C01%7Cnirmoy.das%40amd.com%7C1b1ab9c2ca03412daa2108d7fc47d26e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637255257724473951&amp;sdata=gDRvdhwLV1M%2BKLCgpENik52gAB3O0ik1n%2B%2FaZxLgr%2Fk%3D&amp;reserved=0
Chris Wilson May 20, 2020, 4:35 p.m. UTC | #2
Quoting Nirmoy (2020-05-20 17:28:04)
> Hi Chris,
> 
> On 5/20/20 12:56 AM, Chris Wilson wrote:
> > Quoting Nirmoy Das (2020-05-19 09:44:36)
> >> +#define DRM_MM_ALIGN_SHIFT 6
> >>   #define HOLE_SIZE(NODE) ((NODE)->hole_size)
> >>   #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
> >> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
> >> +                              ffs(HOLE_ADDR(NODE)))
> > Fwiw, max hole size of 58b, we would need to stop storing byte
> > extents...
> 
> 
> Can you please explain 2nd part of this statement.

Currently we [i915] use drm_mm with byte-addressing, so 58b is a tad too
close to the amount we actually need to track. If we used page tracking
instead of bytes, we regain 12b to play around with. It makes no
difference to the code at the moment (e.g. we still could not fit within
u32) so there has been no pressure to rewrite the extents handling. But
given sufficient reason, we could.
-Chris
Nirmoy May 20, 2020, 4:46 p.m. UTC | #3
On 5/20/20 6:35 PM, Chris Wilson wrote:
> Quoting Nirmoy (2020-05-20 17:28:04)
>> Hi Chris,
>>
>> On 5/20/20 12:56 AM, Chris Wilson wrote:
>>> Quoting Nirmoy Das (2020-05-19 09:44:36)
>>>> +#define DRM_MM_ALIGN_SHIFT 6
>>>>    #define HOLE_SIZE(NODE) ((NODE)->hole_size)
>>>>    #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
>>>> +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
>>>> +                              ffs(HOLE_ADDR(NODE)))
>>> Fwiw, max hole size of 58b, we would need to stop storing byte
>>> extents...
>>
>> Can you please explain 2nd part of this statement.
> Currently we [i915] use drm_mm with byte-addressing, so 58b is a tad too
> close to the amount we actually need to track. If we used page tracking
> instead of bytes, we regain 12b to play around with. It makes no
> difference to the code at the moment (e.g. we still could not fit within
> u32) so there has been no pressure to rewrite the extents handling. But
> given sufficient reason, we could.
> -Chris


Thanks for the detailed explanation.


Nirmoy
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 91e90c635e05..1af0a211b660 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -212,8 +212,11 @@  static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 				   &drm_mm_interval_tree_augment);
 }
 
+#define DRM_MM_ALIGN_SHIFT 6
 #define HOLE_SIZE(NODE) ((NODE)->hole_size)
 #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
+#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \
+			       ffs(HOLE_ADDR(NODE)))
 
 static u64 rb_to_hole_size(struct rb_node *rb)
 {
@@ -241,6 +244,33 @@  static void insert_hole_size(struct rb_root_cached *root,
 	rb_insert_color_cached(&node->rb_hole_size, root, first);
 }
 
+static inline bool
+augment_callbacks_compute_max_hole_align(struct drm_mm_node *node, bool exit)
+{
+	struct drm_mm_node *child;
+	u64 max = HOLE_SIZE_ALIGN(node);
+
+	if (node->rb_hole_addr.rb_left) {
+		child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node,
+				 rb_hole_addr);
+		if (child->subtree_max_hole_align > max)
+			max = child->subtree_max_hole_align;
+	}
+
+	if (node->rb_hole_addr.rb_right) {
+		child = rb_entry(node->rb_hole_addr.rb_right,
+				 struct drm_mm_node, rb_hole_addr);
+		if (child->subtree_max_hole_align > max)
+			max = child->subtree_max_hole_align;
+	}
+
+	if (exit && node->subtree_max_hole_align == max)
+		return true;
+
+	node->subtree_max_hole_align = max;
+	return false;
+}
+
 static inline bool
 augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
 {
@@ -271,10 +301,14 @@  augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit)
 static inline void
 augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop)
 {
+	bool compute_max_hole, compute_max_hole_align;
+
 	while (rb != stop) {
 		struct drm_mm_node *node = rb_entry(rb,  struct drm_mm_node,
 						    rb_hole_addr);
-		if (augment_callbacks_compute_max_hole(node, true))
+		compute_max_hole = augment_callbacks_compute_max_hole(node, true);
+		compute_max_hole_align = augment_callbacks_compute_max_hole_align(node, true);
+		if (compute_max_hole && compute_max_hole_align)
 			break;
 
 		rb = rb_parent(&node->rb_hole_addr);
@@ -290,6 +324,7 @@  augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new)
 					   rb_hole_addr);
 
 	new->subtree_max_hole = old->subtree_max_hole;
+	new->subtree_max_hole_align = old->subtree_max_hole_align;
 }
 
 static void
@@ -301,7 +336,9 @@  augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
 					   rb_hole_addr);
 
 	new->subtree_max_hole = old->subtree_max_hole;
+	new->subtree_max_hole_align = old->subtree_max_hole_align;
 	augment_callbacks_compute_max_hole(old, false);
+	augment_callbacks_compute_max_hole_align(old, false);
 }
 
 static const struct rb_augment_callbacks augment_callbacks = {
@@ -313,7 +350,9 @@  static const struct rb_augment_callbacks augment_callbacks = {
 static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
 {
 	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
-	u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole;
+	u64 start = HOLE_ADDR(node);
+	u64 subtree_max_hole = node->subtree_max_hole;
+	u64 subtree_max_hole_align = node->subtree_max_hole_align;
 	struct drm_mm_node *parent;
 
 	while (*link) {
@@ -322,6 +361,9 @@  static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node)
 		if (parent->subtree_max_hole < subtree_max_hole)
 			parent->subtree_max_hole = subtree_max_hole;
 
+		if (parent->subtree_max_hole_align < subtree_max_hole_align)
+			parent->subtree_max_hole_align = subtree_max_hole_align;
+
 		if (start < HOLE_ADDR(parent))
 			link = &parent->rb_hole_addr.rb_left;
 		else
@@ -339,6 +381,7 @@  static void add_hole(struct drm_mm_node *node)
 	node->hole_size =
 		__drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
 	node->subtree_max_hole = node->hole_size;
+	node->subtree_max_hole_align = HOLE_SIZE_ALIGN(node);
 	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
 	insert_hole_size(&mm->holes_size, node);
@@ -355,8 +398,10 @@  static void rm_hole(struct drm_mm_node *node)
 	rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size);
 	rb_erase_augmented(&node->rb_hole_addr, &node->mm->holes_addr,
 			   &augment_callbacks);
+
 	node->hole_size = 0;
 	node->subtree_max_hole = 0;
+	node->subtree_max_hole_align = 0;
 
 	DRM_MM_BUG_ON(drm_mm_hole_follows(node));
 }
@@ -458,10 +503,11 @@  first_hole(struct drm_mm *mm,
  * else return parent of @entry
  */
 static struct drm_mm_node *
-next_hole_high_addr(struct drm_mm_node *entry, u64 size)
+next_hole_high_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
 {
 	struct rb_node *rb_node, *left_rb_node, *parent_rb_node;
 	struct drm_mm_node *left_node;
+	u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
 
 	if (!entry)
 		return NULL;
@@ -473,6 +519,7 @@  next_hole_high_addr(struct drm_mm_node *entry, u64 size)
 		left_node = rb_entry(left_rb_node,
 				     struct drm_mm_node, rb_hole_addr);
 		if ((left_node->subtree_max_hole < size ||
+		     left_node->subtree_max_hole_align < req_align ||
 		     entry->size == entry->subtree_max_hole) &&
 		    parent_rb_node && parent_rb_node->rb_left != rb_node)
 			return rb_hole_addr_to_node(parent_rb_node);
@@ -498,10 +545,11 @@  next_hole_high_addr(struct drm_mm_node *entry, u64 size)
  * else return parent of @entry
  */
 static struct drm_mm_node *
-next_hole_low_addr(struct drm_mm_node *entry, u64 size)
+next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment)
 {
 	struct rb_node *rb_node, *right_rb_node, *parent_rb_node;
 	struct drm_mm_node *right_node;
+	u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT;
 
 	if (!entry)
 		return NULL;
@@ -513,6 +561,7 @@  next_hole_low_addr(struct drm_mm_node *entry, u64 size)
 		right_node = rb_entry(right_rb_node,
 				      struct drm_mm_node, rb_hole_addr);
 		if ((right_node->subtree_max_hole < size ||
+		     right_node->subtree_max_hole_align < req_align ||
 		     entry->size == entry->subtree_max_hole) &&
 		    parent_rb_node && parent_rb_node->rb_right != rb_node)
 			return rb_hole_addr_to_node(parent_rb_node);
@@ -524,7 +573,7 @@  next_hole_low_addr(struct drm_mm_node *entry, u64 size)
 static struct drm_mm_node *
 next_hole(struct drm_mm *mm,
 	  struct drm_mm_node *node,
-	  u64 size,
+	  u64 size, u64 alignment,
 	  enum drm_mm_insert_mode mode)
 {
 	switch (mode) {
@@ -533,10 +582,10 @@  next_hole(struct drm_mm *mm,
 		return rb_hole_size_to_node(rb_prev(&node->rb_hole_size));
 
 	case DRM_MM_INSERT_LOW:
-		return next_hole_low_addr(node, size);
+		return next_hole_low_addr(node, size, alignment);
 
 	case DRM_MM_INSERT_HIGH:
-		return next_hole_high_addr(node, size);
+		return next_hole_high_addr(node, size, alignment);
 
 	case DRM_MM_INSERT_EVICT:
 		node = list_next_entry(node, hole_stack);
@@ -650,7 +699,7 @@  int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 	remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
 	for (hole = first_hole(mm, range_start, range_end, size, mode);
 	     hole;
-	     hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+	     hole = once ? NULL : next_hole(mm, hole, size, alignment, mode)) {
 		u64 hole_start = __drm_mm_hole_node_start(hole);
 		u64 hole_end = hole_start + hole->hole_size;
 		u64 adj_start, adj_end;
@@ -713,7 +762,6 @@  int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 			add_hole(hole);
 		if (adj_start + size < hole_end)
 			add_hole(node);
-
 		save_stack(node);
 		return 0;
 	}
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index a01bc6fac83c..c280b535d9a9 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -169,6 +169,7 @@  struct drm_mm_node {
 	u64 __subtree_last;
 	u64 hole_size;
 	u64 subtree_max_hole;
+	u64 subtree_max_hole_align;
 	unsigned long flags;
 #define DRM_MM_NODE_ALLOCATED_BIT	0
 #define DRM_MM_NODE_SCANNED_BIT		1