[079/190] drm/i915: Reduce the pointer dance of i915_is_ggtt()
diff mbox

Message ID 1452503961-14837-79-git-send-email-chris@chris-wilson.co.uk
State New
Headers show

Commit Message

Chris Wilson Jan. 11, 2016, 9:17 a.m. UTC
The multiple levels of indirect do nothing but hinder the compiler and
the pointer chasing turns to be quite painful but painless to fix.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 13 ++++++-------
 drivers/gpu/drm/i915/i915_drv.h            |  7 -------
 drivers/gpu/drm/i915/i915_gem.c            | 18 +++++++-----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++---
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 12 +++++-------
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  5 +++++
 drivers/gpu/drm/i915/i915_trace.h          | 27 ++++++++-------------------
 7 files changed, 33 insertions(+), 54 deletions(-)

Comments

Dave Gordon Jan. 15, 2016, 12:12 p.m. UTC | #1
On 11/01/16 09:17, Chris Wilson wrote:
> The multiple levels of indirect do nothing but hinder the compiler and
> the pointer chasing turns to be quite painful but painless to fix.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 13 ++++++-------
>   drivers/gpu/drm/i915/i915_drv.h            |  7 -------
>   drivers/gpu/drm/i915/i915_gem.c            | 18 +++++++-----------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++---
>   drivers/gpu/drm/i915/i915_gem_gtt.c        | 12 +++++-------
>   drivers/gpu/drm/i915/i915_gem_gtt.h        |  5 +++++
>   drivers/gpu/drm/i915/i915_trace.h          | 27 ++++++++-------------------
>   7 files changed, 33 insertions(+), 54 deletions(-)

[snip]

> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c9c1a5cdc1e5..f840cc55f1ab 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2905,18 +2905,11 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
>   /* Some GGTT VM helpers */
>   #define i915_obj_to_ggtt(obj) \
>   	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
> -static inline bool i915_is_ggtt(struct i915_address_space *vm)
> -{
> -	struct i915_address_space *ggtt =
> -		&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
> -	return vm == ggtt;
> -}

> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index b5c3bbe6dc2a..06117bd0fc00 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3150,6 +3150,7 @@ int i915_gem_gtt_init(struct drm_device *dev)
>   	}
>
>   	gtt->base.dev = dev;
> +	gtt->base.is_ggtt = true;

So, it looks like the plan here is that when we need to determine 
whether something is the special distinguished instance of a type, then 
instead of comparing its address against the global pointer to the 
distinguished instance, we'll just look for a flag /inside/ the object 
itself, which is set /only/ on the distinguished instance.

Now why didn't I think of that? That looks like such a good idea, we 
should apply it in other CONTEXTs!

Reviewed-by: Dave Gordon <david.s.gordon@intel.com>
Chris Wilson Jan. 15, 2016, 12:24 p.m. UTC | #2
On Fri, Jan 15, 2016 at 12:12:15PM +0000, Dave Gordon wrote:
> On 11/01/16 09:17, Chris Wilson wrote:
> >The multiple levels of indirect do nothing but hinder the compiler and
> >the pointer chasing turns to be quite painful but painless to fix.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c        | 13 ++++++-------
> >  drivers/gpu/drm/i915/i915_drv.h            |  7 -------
> >  drivers/gpu/drm/i915/i915_gem.c            | 18 +++++++-----------
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c        | 12 +++++-------
> >  drivers/gpu/drm/i915/i915_gem_gtt.h        |  5 +++++
> >  drivers/gpu/drm/i915/i915_trace.h          | 27 ++++++++-------------------
> >  7 files changed, 33 insertions(+), 54 deletions(-)
> 
> [snip]
> 
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index c9c1a5cdc1e5..f840cc55f1ab 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2905,18 +2905,11 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
> >  /* Some GGTT VM helpers */
> >  #define i915_obj_to_ggtt(obj) \
> >  	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
> >-static inline bool i915_is_ggtt(struct i915_address_space *vm)
> >-{
> >-	struct i915_address_space *ggtt =
> >-		&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
> >-	return vm == ggtt;
> >-}
> 
> >diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >index b5c3bbe6dc2a..06117bd0fc00 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >@@ -3150,6 +3150,7 @@ int i915_gem_gtt_init(struct drm_device *dev)
> >  	}
> >
> >  	gtt->base.dev = dev;
> >+	gtt->base.is_ggtt = true;
> 
> So, it looks like the plan here is that when we need to determine
> whether something is the special distinguished instance of a type,
> then instead of comparing its address against the global pointer to
> the distinguished instance, we'll just look for a flag /inside/ the
> object itself, which is set /only/ on the distinguished instance.
> 
> Now why didn't I think of that? That looks like such a good idea, we
> should apply it in other CONTEXTs!

But we already have that flag in contexts! It also happens to be useful
for other tracking as well. And we demonstrated that we didn't even need
the checks for the kernel context anyway.

You will also note this is a small stepping patch after which we
transition away from i915_address_space.is_ggtt to using the owner.
-Chris

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index dd1788c81b90..99a6181b012e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -118,7 +118,7 @@  static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (i915_is_ggtt(vma->vm) && drm_mm_node_allocated(&vma->node))
+		if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
 			size += vma->node.size;
 	}
 
@@ -165,12 +165,11 @@  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (fence: %d)", obj->fence_reg);
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
-			   i915_is_ggtt(vma->vm) ? "g" : "pp",
+			   vma->is_ggtt ? "g" : "pp",
 			   vma->node.start, vma->node.size);
-		if (i915_is_ggtt(vma->vm))
-			seq_printf(m, ", type: %u)", vma->ggtt_view.type);
-		else
-			seq_puts(m, ")");
+		if (vma->is_ggtt)
+			seq_printf(m, ", type: %u", vma->ggtt_view.type);
+		seq_puts(m, ")");
 	}
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
@@ -346,7 +345,7 @@  static int per_file_stats(int id, void *ptr, void *data)
 
 		bound++;
 
-		if (i915_is_ggtt(vma->vm)) {
+		if (vma->is_ggtt) {
 			stats->global += vma->node.size;
 		} else {
 			struct i915_hw_ppgtt *ppgtt
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9c1a5cdc1e5..f840cc55f1ab 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2905,18 +2905,11 @@  bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
 /* Some GGTT VM helpers */
 #define i915_obj_to_ggtt(obj) \
 	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
-static inline bool i915_is_ggtt(struct i915_address_space *vm)
-{
-	struct i915_address_space *ggtt =
-		&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
-	return vm == ggtt;
-}
 
 static inline struct i915_hw_ppgtt *
 i915_vm_to_ppgtt(struct i915_address_space *vm)
 {
 	WARN_ON(i915_is_ggtt(vm));
-
 	return container_of(vm, struct i915_hw_ppgtt, base);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 44bd514a6c2e..9a22fdd8a9f5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2595,8 +2595,7 @@  static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 			return ret;
 	}
 
-	if (i915_is_ggtt(vma->vm) &&
-	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
+	if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
@@ -2611,7 +2610,7 @@  static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 	vma->bound = 0;
 
 	list_del_init(&vma->vm_link);
-	if (i915_is_ggtt(vma->vm)) {
+	if (vma->is_ggtt) {
 		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 			obj->map_and_fenceable = false;
 		} else if (vma->ggtt_view.pages) {
@@ -3880,17 +3879,14 @@  struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
 
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
-	struct i915_address_space *vm = NULL;
 	WARN_ON(vma->node.allocated);
 
 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
 	if (!list_empty(&vma->exec_list))
 		return;
 
-	vm = vma->vm;
-
-	if (!i915_is_ggtt(vm))
-		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
+	if (!vma->is_ggtt)
+		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
 	list_del(&vma->obj_link);
 
@@ -4446,7 +4442,7 @@  u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
 	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
 
 	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (i915_is_ggtt(vma->vm) &&
+		if (vma->is_ggtt &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm)
@@ -4479,7 +4475,7 @@  bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (i915_is_ggtt(vma->vm) &&
+		if (vma->is_ggtt &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
@@ -4526,7 +4522,7 @@  unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 	BUG_ON(list_empty(&o->vma_list));
 
 	list_for_each_entry(vma, &o->vma_list, obj_link) {
-		if (i915_is_ggtt(vma->vm) &&
+		if (vma->is_ggtt &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 56d6b5dbb121..c10795f58bfc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -683,7 +683,7 @@  need_reloc_mappable(struct i915_vma *vma)
 	if (entry->relocation_count == 0)
 		return false;
 
-	if (!i915_is_ggtt(vma->vm))
+	if (!vma->is_ggtt)
 		return false;
 
 	/* See also use_cpu_reloc() */
@@ -702,8 +702,7 @@  eb_vma_misplaced(struct i915_vma *vma)
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
-	       !i915_is_ggtt(vma->vm));
+	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt);
 
 	if (entry->alignment &&
 	    vma->node.start & (entry->alignment - 1))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b5c3bbe6dc2a..06117bd0fc00 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3150,6 +3150,7 @@  int i915_gem_gtt_init(struct drm_device *dev)
 	}
 
 	gtt->base.dev = dev;
+	gtt->base.is_ggtt = true;
 
 	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
 			     &gtt->mappable_base, &gtt->mappable_end);
@@ -3258,13 +3259,14 @@  __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&vma->exec_list);
 	vma->vm = vm;
 	vma->obj = obj;
+	vma->is_ggtt = i915_is_ggtt(vm);
 
 	if (i915_is_ggtt(vm))
 		vma->ggtt_view = *ggtt_view;
+	else
+		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
 
 	list_add_tail(&vma->obj_link, &obj->vma_list);
-	if (!i915_is_ggtt(vm))
-		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
 
 	return vma;
 }
@@ -3536,13 +3538,9 @@  int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		return 0;
 
 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma->vm,
-				    vma->node.start,
-				    vma->node.size,
-				    VM_TO_TRACE_NAME(vma->vm));
-
 		/* XXX: i915_vma_pin() will fix this +- hack */
 		vma->pin_count++;
+		trace_i915_va_alloc(vma);
 		ret = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start,
 						 vma->node.size);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index cb796c1ff6a5..633b9b2e1acb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -184,6 +184,7 @@  struct i915_vma {
 #define GLOBAL_BIND	(1<<0)
 #define LOCAL_BIND	(1<<1)
 	unsigned int bound : 4;
+	bool is_ggtt : 1;
 
 	/**
 	 * Support different GGTT views into the same object.
@@ -276,6 +277,8 @@  struct i915_address_space {
 	u64 start;		/* Start offset always 0 for dri2 */
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 
+	bool is_ggtt;
+
 	struct i915_page_scratch *scratch_page;
 	struct i915_page_table *scratch_pt;
 	struct i915_page_directory *scratch_pd;
@@ -331,6 +334,8 @@  struct i915_address_space {
 			u32 flags);
 };
 
+#define i915_is_ggtt(V) ((V)->is_ggtt)
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 85469e3c740a..e486dcef508d 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -175,35 +175,24 @@  TRACE_EVENT(i915_vma_unbind,
 		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
-#define VM_TO_TRACE_NAME(vm) \
-	(i915_is_ggtt(vm) ? "G" : \
-		      "P")
-
-DECLARE_EVENT_CLASS(i915_va,
-	TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
-	TP_ARGS(vm, start, length, name),
+TRACE_EVENT(i915_va_alloc,
+	TP_PROTO(struct i915_vma *vma),
+	TP_ARGS(vma),
 
 	TP_STRUCT__entry(
 		__field(struct i915_address_space *, vm)
 		__field(u64, start)
 		__field(u64, end)
-		__string(name, name)
 	),
 
 	TP_fast_assign(
-		__entry->vm = vm;
-		__entry->start = start;
-		__entry->end = start + length - 1;
-		__assign_str(name, name);
+		__entry->vm = vma->vm;
+		__entry->start = vma->node.start;
+		__entry->end = vma->node.start + vma->node.size - 1;
 	),
 
-	TP_printk("vm=%p (%s), 0x%llx-0x%llx",
-		  __entry->vm, __get_str(name),  __entry->start, __entry->end)
-);
-
-DEFINE_EVENT(i915_va, i915_va_alloc,
-	     TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
-	     TP_ARGS(vm, start, length, name)
+	TP_printk("vm=%p (%c), 0x%llx-0x%llx",
+		  __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P',  __entry->start, __entry->end)
 );
 
 DECLARE_EVENT_CLASS(i915_px_entry,