diff mbox

[v3] drm/i915: Simplify and fix object to display tracking

Message ID 1427806541-18485-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin March 31, 2015, 12:55 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Purpose of this tracking is to know when to flush the cache between
the CPU and the non-coherent display engine. Prior to:

   commit 121920faf2ccce9aa66a7e2588415c9647b66104
   Author: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
   Date:   Mon Mar 23 11:10:37 2015 +0000

       drm/i915/skl: Query display address through a wrapper

This worked by a mix of direct flag manipulation and checking for
existence of a pinned GGTT VMA.

With the introduction of rotated display mappings this approach is
no longer correct.

New simpler approach is to just keep this count over calls which pin
and unpin objects to and from display, at the slight cost of extra
space in every bo.

(Inspired and extracted code from a larger rework by Chris Wilson.)

v2: Remove the limit since it is not well defined. (Chris Wilson, Ville Syrjälä)
v3: Commit message corrections. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c | 30 ++++++------------------------
 2 files changed, 8 insertions(+), 25 deletions(-)

Comments

Joonas Lahtinen March 31, 2015, 1:10 p.m. UTC | #1
On ti, 2015-03-31 at 13:55 +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Purpose of this tracking is to know when to flush the cache between
> the CPU and the non-coherent display engine. Prior to:
> 
>    commit 121920faf2ccce9aa66a7e2588415c9647b66104
>    Author: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>    Date:   Mon Mar 23 11:10:37 2015 +0000
> 
>        drm/i915/skl: Query display address through a wrapper
> 
> This worked by a mix of direct flag manipulation and checking for
> existence of a pinned GGTT VMA.
> 
> With the introduction of rotated display mappings this approach is
> no longer correct.
> 
> New simpler approach is to just keep this count over calls which pin
> and unpin objects to and from display, at the slight cost of extra
> space in every bo.
> 
> (Inspired and extracted code from a larger rework by Chris Wilson.)
> 
> v2: Remove the limit since it is not well defined. (Chris Wilson, Ville Syrjälä)
> v3: Commit message corrections. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  3 ++-
>  drivers/gpu/drm/i915/i915_gem.c | 30 ++++++------------------------
>  2 files changed, 8 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4ef320c..37abd58 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1969,7 +1969,6 @@ struct drm_i915_gem_object {
>  	 */
>  	unsigned int fault_mappable:1;
>  	unsigned int pin_mappable:1;
> -	unsigned int pin_display:1;
>  
>  	/*
>  	 * Is the object to be mapped as read-only to the GPU
> @@ -1983,6 +1982,8 @@ struct drm_i915_gem_object {
>  
>  	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
>  
> +	unsigned int pin_display;
> +

Here. Comment below.

>  	struct sg_table *pages;
>  	int pages_pin_count;
>  

How about naming it like the variable just below it (pages_pin_count).
So let it be "display_pin_count", and make it just "int" too, I don't
think the 1 bit reduction in pin count matters really, but having sign
makes it useful for detecting negative pin count errors? At least I'd
make it signed integer and make some of the spots WARN_ON(xxx--- < 0).

Regards, Joonas

> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 3a91365..8b75dab 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3849,24 +3849,6 @@ unlock:
>  	return ret;
>  }
>  
> -static bool is_pin_display(struct drm_i915_gem_object *obj)
> -{
> -	struct i915_vma *vma;
> -
> -	vma = i915_gem_obj_to_ggtt(obj);
> -	if (!vma)
> -		return false;
> -
> -	/* There are 2 sources that pin objects:
> -	 *   1. The display engine (scanouts, sprites, cursors);
> -	 *   2. Reservations for execbuffer;
> -	 *
> -	 * We can ignore reservations as we hold the struct_mutex and
> -	 * are only called outside of the reservation path.
> -	 */
> -	return vma->pin_count;
> -}
> -
>  /*
>   * Prepare buffer for display plane (scanout, cursors, etc).
>   * Can be called from an uninterruptible phase (modesetting) and allows
> @@ -3879,7 +3861,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     const struct i915_ggtt_view *view)
>  {
>  	u32 old_read_domains, old_write_domain;
> -	bool was_pin_display;
>  	int ret;
>  
>  	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
> @@ -3891,8 +3872,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	/* Mark the pin_display early so that we account for the
>  	 * display coherency whilst setting up the cache domains.
>  	 */
> -	was_pin_display = obj->pin_display;
> -	obj->pin_display = true;
> +	obj->pin_display++;
>  
>  	/* The display engine is not coherent with the LLC cache on gen6.  As
>  	 * a result, we make sure that the pinning that is about to occur is
> @@ -3936,8 +3916,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	return 0;
>  
>  err_unpin_display:
> -	WARN_ON(was_pin_display != is_pin_display(obj));
> -	obj->pin_display = was_pin_display;
> +	obj->pin_display--;
>  	return ret;
>  }
>  
> @@ -3945,9 +3924,12 @@ void
>  i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
>  					 const struct i915_ggtt_view *view)
>  {
> +	if (WARN_ON(obj->pin_display == 0))
> +		return;
> +
>  	i915_gem_object_ggtt_unpin_view(obj, view);
>  
> -	obj->pin_display = is_pin_display(obj);
> +	obj->pin_display--;
>  }
>  
>  int
Tvrtko Ursulin March 31, 2015, 1:15 p.m. UTC | #2
Hi,

On 03/31/2015 02:10 PM, Joonas Lahtinen wrote:
> On ti, 2015-03-31 at 13:55 +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Purpose of this tracking is to know when to flush the cache between
>> the CPU and the non-coherent display engine. Prior to:
>>
>>     commit 121920faf2ccce9aa66a7e2588415c9647b66104
>>     Author: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>     Date:   Mon Mar 23 11:10:37 2015 +0000
>>
>>         drm/i915/skl: Query display address through a wrapper
>>
>> This worked by a mix of direct flag manipulation and checking for
>> existence of a pinned GGTT VMA.
>>
>> With the introduction of rotated display mappings this approach is
>> no longer correct.
>>
>> New simpler approach is to just keep this count over calls which pin
>> and unpin objects to and from display, at the slight cost of extra
>> space in every bo.
>>
>> (Inspired and extracted code from a larger rework by Chris Wilson.)
>>
>> v2: Remove the limit since it is not well defined. (Chris Wilson, Ville Syrjälä)
>> v3: Commit message corrections. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h |  3 ++-
>>   drivers/gpu/drm/i915/i915_gem.c | 30 ++++++------------------------
>>   2 files changed, 8 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 4ef320c..37abd58 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1969,7 +1969,6 @@ struct drm_i915_gem_object {
>>   	 */
>>   	unsigned int fault_mappable:1;
>>   	unsigned int pin_mappable:1;
>> -	unsigned int pin_display:1;
>>
>>   	/*
>>   	 * Is the object to be mapped as read-only to the GPU
>> @@ -1983,6 +1982,8 @@ struct drm_i915_gem_object {
>>
>>   	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
>>
>> +	unsigned int pin_display;
>> +
>
> Here. Comment below.
>
>>   	struct sg_table *pages;
>>   	int pages_pin_count;
>>
>
> How about naming it like the variable just below it (pages_pin_count).
> So let it be "display_pin_count", and make it just "int" too, I don't
> think the 1 bit reduction in pin count matters really, but having sign
> makes it useful for detecting negative pin count errors? At least I'd
> make it signed integer and make some of the spots WARN_ON(xxx--- < 0).

It can't go negative due check in i915_gem_object_unpin_from_display_plane.

Regards,

Tvrtko
Chris Wilson March 31, 2015, 1:18 p.m. UTC | #3
On Tue, Mar 31, 2015 at 04:10:05PM +0300, Joonas Lahtinen wrote:
> On ti, 2015-03-31 at 13:55 +0100, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > Purpose of this tracking is to know when to flush the cache between
> > the CPU and the non-coherent display engine. Prior to:
> > 
> >    commit 121920faf2ccce9aa66a7e2588415c9647b66104
> >    Author: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >    Date:   Mon Mar 23 11:10:37 2015 +0000
> > 
> >        drm/i915/skl: Query display address through a wrapper
> > 
> > This worked by a mix of direct flag manipulation and checking for
> > existence of a pinned GGTT VMA.
> > 
> > With the introduction of rotated display mappings this approach is
> > no longer correct.
> > 
> > New simpler approach is to just keep this count over calls which pin
> > and unpin objects to and from display, at the slight cost of extra
> > space in every bo.
> > 
> > (Inspired and extracted code from a larger rework by Chris Wilson.)
> > 
> > v2: Remove the limit since it is not well defined. (Chris Wilson, Ville Syrjälä)
> > v3: Commit message corrections. (Chris Wilson)
> > 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h |  3 ++-
> >  drivers/gpu/drm/i915/i915_gem.c | 30 ++++++------------------------
> >  2 files changed, 8 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 4ef320c..37abd58 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1969,7 +1969,6 @@ struct drm_i915_gem_object {
> >  	 */
> >  	unsigned int fault_mappable:1;
> >  	unsigned int pin_mappable:1;
> > -	unsigned int pin_display:1;
> >  
> >  	/*
> >  	 * Is the object to be mapped as read-only to the GPU
> > @@ -1983,6 +1982,8 @@ struct drm_i915_gem_object {
> >  
> >  	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> >  
> > +	unsigned int pin_display;
> > +
> 
> Here. Comment below.
> 
> >  	struct sg_table *pages;
> >  	int pages_pin_count;
> >  
> 
> How about naming it like the variable just below it (pages_pin_count).
> So let it be "display_pin_count", and make it just "int" too, I don't
> think the 1 bit reduction in pin count matters really, but having sign
> makes it useful for detecting negative pin count errors? At least I'd
> make it signed integer and make some of the spots WARN_ON(xxx--- < 0).

We already have the counter underflow detection. We could just as well
make pages_pin_count unsigned. I blame my own laziness.
-Chris
Shuang He April 1, 2015, 3:41 a.m. UTC | #4
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6104
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -4              272/272              268/272
ILK                                  302/302              302/302
SNB                                  303/303              303/303
IVB                                  338/338              338/338
BYT                                  287/287              287/287
HSW                                  361/361              361/361
BDW                                  308/308              308/308
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
 PNV  igt@gem_userptr_blits@coherency-sync      CRASH(2)PASS(3)      CRASH(2)
*PNV  igt@gem_fence_thrash@bo-write-verify-threaded-none      PASS(3)      CRASH(1)PASS(1)
 PNV  igt@gem_tiled_pread_pwrite      FAIL(3)PASS(2)      FAIL(2)
 PNV  igt@gen3_render_tiledx_blits      FAIL(3)PASS(1)      FAIL(2)
Note: You need to pay more attention to line start with '*'
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4ef320c..37abd58 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1969,7 +1969,6 @@  struct drm_i915_gem_object {
 	 */
 	unsigned int fault_mappable:1;
 	unsigned int pin_mappable:1;
-	unsigned int pin_display:1;
 
 	/*
 	 * Is the object to be mapped as read-only to the GPU
@@ -1983,6 +1982,8 @@  struct drm_i915_gem_object {
 
 	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
 
+	unsigned int pin_display;
+
 	struct sg_table *pages;
 	int pages_pin_count;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3a91365..8b75dab 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3849,24 +3849,6 @@  unlock:
 	return ret;
 }
 
-static bool is_pin_display(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	vma = i915_gem_obj_to_ggtt(obj);
-	if (!vma)
-		return false;
-
-	/* There are 2 sources that pin objects:
-	 *   1. The display engine (scanouts, sprites, cursors);
-	 *   2. Reservations for execbuffer;
-	 *
-	 * We can ignore reservations as we hold the struct_mutex and
-	 * are only called outside of the reservation path.
-	 */
-	return vma->pin_count;
-}
-
 /*
  * Prepare buffer for display plane (scanout, cursors, etc).
  * Can be called from an uninterruptible phase (modesetting) and allows
@@ -3879,7 +3861,6 @@  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view)
 {
 	u32 old_read_domains, old_write_domain;
-	bool was_pin_display;
 	int ret;
 
 	if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
@@ -3891,8 +3872,7 @@  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
-	was_pin_display = obj->pin_display;
-	obj->pin_display = true;
+	obj->pin_display++;
 
 	/* The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
@@ -3936,8 +3916,7 @@  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	return 0;
 
 err_unpin_display:
-	WARN_ON(was_pin_display != is_pin_display(obj));
-	obj->pin_display = was_pin_display;
+	obj->pin_display--;
 	return ret;
 }
 
@@ -3945,9 +3924,12 @@  void
 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
 					 const struct i915_ggtt_view *view)
 {
+	if (WARN_ON(obj->pin_display == 0))
+		return;
+
 	i915_gem_object_ggtt_unpin_view(obj, view);
 
-	obj->pin_display = is_pin_display(obj);
+	obj->pin_display--;
 }
 
 int