diff mbox

drm/i915: Revoke partial fences when installing on the scanout

Message ID 20161222135224.11886-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Dec. 22, 2016, 1:52 p.m. UTC
In commit 50349247ea80 ("drm/i915: Drop ORIGIN_GTT for untracked GTT
writes") partial mmaps were updated to indicate that writes through them
were not tracked automatically by the hardware and that the expected
subsequent manual invalidations by the application (on calling dirtyfb at
the end of the frame) take over from the hardware tracking. However, not
all applications actually call dirtyfb on the scanout after they dirty it
and so those writes through partial GTT mmaps are not being tracked and
triggering FBC updates.

Fixes: a61007a83a46 ("drm/i915: Fix partial GGTT faulting")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: "Zanoni, Paulo R" <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        | 25 ++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
 drivers/gpu/drm/i915/intel_display.c   | 10 +++++++++-
 3 files changed, 31 insertions(+), 6 deletions(-)

Comments

Chris Wilson Dec. 22, 2016, 3:26 p.m. UTC | #1
On Thu, Dec 22, 2016 at 01:52:24PM +0000, Chris Wilson wrote:
> In commit 50349247ea80 ("drm/i915: Drop ORIGIN_GTT for untracked GTT
> writes") partial mmaps were updated to indicate that writes through them
> were not tracked automatically by the hardware and that the expected
> subsequent manual invalidations by the application (on calling dirtyfb at
> the end of the frame) take over from the hardware tracking. However, not
> all applications actually call dirtyfb on the scanout after they dirty it
> and so those writes through partial GTT mmaps are not being tracked and
> triggering FBC updates.
> 
> Fixes: a61007a83a46 ("drm/i915: Fix partial GGTT faulting")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@intel.com>
> Cc: "Zanoni, Paulo R" <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c        | 25 ++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
>  drivers/gpu/drm/i915/intel_display.c   | 10 +++++++++-
>  3 files changed, 31 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f379c5484a84..d51c9b209837 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1882,11 +1882,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  			compute_partial_view(obj, area,
>  					     page_offset, MIN_CHUNK_PAGES);
>  
> -		/* Userspace is now writing through an untracked VMA, abandon
> -		 * all hope that the hardware is able to track future writes.
> -		 */
> -		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
> -
>  		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
>  	}
>  	if (IS_ERR(vma)) {
> @@ -2015,6 +2010,26 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	intel_runtime_pm_put(i915);
>  }
>  
> +bool i915_gem_object_has_partial_fences(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma;
> +
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> +	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +		if (!i915_vma_is_ggtt(vma))
> +			break;
> +
> +		if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)

if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)

> +			continue;

Ofc.
-Chris
Zanoni, Paulo R Dec. 22, 2016, 8:39 p.m. UTC | #2
Em Qui, 2016-12-22 às 13:52 +0000, Chris Wilson escreveu:
> In commit 50349247ea80 ("drm/i915: Drop ORIGIN_GTT for untracked GTT
> writes") partial mmaps were updated to indicate that writes through
> them
> were not tracked automatically by the hardware and that the expected
> subsequent manual invalidations by the application (on calling
> dirtyfb at
> the end of the frame) take over from the hardware tracking. However,
> not
> all applications actually call dirtyfb on the scanout after they
> dirty it
> and so those writes through partial GTT mmaps are not being tracked
> and
> triggering FBC updates.

Since the application in question here is IGT, and IGT is generally not
considered a real API/ABI user to enforce backwards compatibility
forever, I can make the required changes to IGT in case we conclude
that's the appropriate way to go, just tell me. But if that's the case,
I really think we should try to sit down and write what are the
expectations for frontbuffer rendering in user space code, because it
seems to me that these expectations are changing over time...

> 
> Fixes: a61007a83a46 ("drm/i915: Fix partial GGTT faulting")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@intel.com>
> Cc: "Zanoni, Paulo R" <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c        | 25 ++++++++++++++++++++--
> ---
>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
>  drivers/gpu/drm/i915/intel_display.c   | 10 +++++++++-
>  3 files changed, 31 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> index f379c5484a84..d51c9b209837 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1882,11 +1882,6 @@ int i915_gem_fault(struct vm_area_struct
> *area, struct vm_fault *vmf)
>  			compute_partial_view(obj, area,
>  					     page_offset,
> MIN_CHUNK_PAGES);
>  
> -		/* Userspace is now writing through an untracked
> VMA, abandon
> -		 * all hope that the hardware is able to track
> future writes.
> -		 */
> -		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
> -
>  		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0,
> PIN_MAPPABLE);
>  	}
>  	if (IS_ERR(vma)) {
> @@ -2015,6 +2010,26 @@ i915_gem_release_mmap(struct
> drm_i915_gem_object *obj)
>  	intel_runtime_pm_put(i915);
>  }
>  
> +bool i915_gem_object_has_partial_fences(struct drm_i915_gem_object
> *obj)
> +{
> +	struct i915_vma *vma;
> +
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> +	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +		if (!i915_vma_is_ggtt(vma))
> +			break;
> +
> +		if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
> +			continue;

I also see your other email, and excuse my weak gem-fu, but both
options will run the code below for rotated views. Shouldn't we
explicitly be checking here for I915_GGTT_VIEW_PARTIAL?

Anyway, I tested your two versions of this patch and both (!) seemed to
have got rid of the problem I was seeing (why?). The problem was
previously reproducible between 20-50% of the time [0], and I ran ~10
test runs for both patches without seeing the errors, so the
probability that I just got lucky in these runs is small (although
existing).

I do still think that there could be some other sort of hidden problem
lying around here (some leak? corruption?) since the problem was not
happening 100% of the time, but whatever it is, it seems to get solved
by both versions of your patch. Can you theorize why?

Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

Thanks for the help!

[0]: How to reproduce the problem:

Run this on Skylake with 2 monitors attached:
sudo ./kms_frontbuffer_tracking --run-subtest '*fbc-[^2]*' 2>&1 | tee
~/fbc.txt

Eventually one of the mmap-gtt tests will fail, claiming that FBC is
disabled.

> +
> +		if (vma->fence)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
>  {
>  	struct drm_i915_gem_object *obj, *on;
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h
> b/drivers/gpu/drm/i915/i915_gem_object.h
> index 36e0c50e2099..03c066cbdbfd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -333,5 +333,7 @@ i915_gem_object_last_write_engine(struct
> drm_i915_gem_object *obj)
>  	return engine;
>  }
>  
> +bool i915_gem_object_has_partial_fences(struct drm_i915_gem_object
> *obj);
> +
>  #endif
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index aaec753e510e..89d8dffed546 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2236,8 +2236,16 @@ intel_pin_and_fence_fb_obj(struct
> drm_framebuffer *fb, unsigned int rotation)
>  		 * something and try to run the system in a "less
> than optimal"
>  		 * mode that matches the user configuration.
>  		 */
> -		if (i915_vma_get_fence(vma) == 0)
> +		if (i915_vma_get_fence(vma) == 0) {
>  			i915_vma_pin_fence(vma);
> +
> +			/* Revoke any partial fences and force users
> of
> +			 * those mmapings to transfer over to this
> fence
> +			 * under the watchful gaze of FBC.
> +			 */
> +			if (i915_gem_object_has_partial_fences(obj))
> +				i915_gem_release_mmap(obj);
> +		}
>  	}
>  
>  	i915_vma_get(vma);
Chris Wilson Dec. 22, 2016, 9:07 p.m. UTC | #3
On Thu, Dec 22, 2016 at 06:39:39PM -0200, Paulo Zanoni wrote:
> Em Qui, 2016-12-22 às 13:52 +0000, Chris Wilson escreveu:
> > In commit 50349247ea80 ("drm/i915: Drop ORIGIN_GTT for untracked GTT
> > writes") partial mmaps were updated to indicate that writes through
> > them
> > were not tracked automatically by the hardware and that the expected
> > subsequent manual invalidations by the application (on calling
> > dirtyfb at
> > the end of the frame) take over from the hardware tracking. However,
> > not
> > all applications actually call dirtyfb on the scanout after they
> > dirty it
> > and so those writes through partial GTT mmaps are not being tracked
> > and
> > triggering FBC updates.
> 
> Since the application in question here is IGT, and IGT is generally not
> considered a real API/ABI user to enforce backwards compatibility
> forever, I can make the required changes to IGT in case we conclude
> that's the appropriate way to go, just tell me. But if that's the case,
> I really think we should try to sit down and write what are the
> expectations for frontbuffer rendering in user space code, because it
> seems to me that these expectations are changing over time...
> 
> > 
> > Fixes: a61007a83a46 ("drm/i915: Fix partial GGTT faulting")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter@intel.com>
> > Cc: "Zanoni, Paulo R" <paulo.r.zanoni@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c        | 25 ++++++++++++++++++++--
> > ---
> >  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
> >  drivers/gpu/drm/i915/intel_display.c   | 10 +++++++++-
> >  3 files changed, 31 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index f379c5484a84..d51c9b209837 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1882,11 +1882,6 @@ int i915_gem_fault(struct vm_area_struct
> > *area, struct vm_fault *vmf)
> >  			compute_partial_view(obj, area,
> >  					     page_offset,
> > MIN_CHUNK_PAGES);
> >  
> > -		/* Userspace is now writing through an untracked
> > VMA, abandon
> > -		 * all hope that the hardware is able to track
> > future writes.
> > -		 */
> > -		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
> > -
> >  		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0,
> > PIN_MAPPABLE);
> >  	}
> >  	if (IS_ERR(vma)) {
> > @@ -2015,6 +2010,26 @@ i915_gem_release_mmap(struct
> > drm_i915_gem_object *obj)
> >  	intel_runtime_pm_put(i915);
> >  }
> >  
> > +bool i915_gem_object_has_partial_fences(struct drm_i915_gem_object
> > *obj)
> > +{
> > +	struct i915_vma *vma;
> > +
> > +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> > +
> > +	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> > +		if (!i915_vma_is_ggtt(vma))
> > +			break;
> > +
> > +		if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
> > +			continue;
> 
> I also see your other email, and excuse my weak gem-fu, but both
> options will run the code below for rotated views. Shouldn't we
> explicitly be checking here for I915_GGTT_VIEW_PARTIAL?

Rotated views are not fenced or exposed via mmaps, but if they were they 
also would not share the FBC fenced and so need to be remapped to use
the right fence.
 
> Anyway, I tested your two versions of this patch and both (!) seemed to
> have got rid of the problem I was seeing (why?).

In your case we have both a normal fence and a partial fence, so
triggering the remapping of the partial fences happened in either case.

> The problem was
> previously reproducible between 20-50% of the time [0], and I ran ~10
> test runs for both patches without seeing the errors, so the
> probability that I just got lucky in these runs is small (although
> existing).
> 
> I do still think that there could be some other sort of hidden problem
> lying around here (some leak? corruption?) since the problem was not
> happening 100% of the time, but whatever it is, it seems to get solved
> by both versions of your patch. Can you theorize why?

It is a global state that is easily fragmented and by default we try not
to trigger evictions as stalling is not acceptable. We need to balance
this by having an asynchronous install of a new normal view if the we
can not move the existing normal view.

And yes, there is quite often a framebuffer leak, e.g. I currently have
2774 cursors, each with refcounts in the several hundred.
-Chris
Daniel Vetter Dec. 27, 2016, 2:52 p.m. UTC | #4
On Thu, Dec 22, 2016 at 06:39:39PM -0200, Paulo Zanoni wrote:
> Em Qui, 2016-12-22 às 13:52 +0000, Chris Wilson escreveu:
> > In commit 50349247ea80 ("drm/i915: Drop ORIGIN_GTT for untracked GTT
> > writes") partial mmaps were updated to indicate that writes through
> > them
> > were not tracked automatically by the hardware and that the expected
> > subsequent manual invalidations by the application (on calling
> > dirtyfb at
> > the end of the frame) take over from the hardware tracking. However,
> > not
> > all applications actually call dirtyfb on the scanout after they
> > dirty it
> > and so those writes through partial GTT mmaps are not being tracked
> > and
> > triggering FBC updates.
> 
> Since the application in question here is IGT, and IGT is generally not
> considered a real API/ABI user to enforce backwards compatibility
> forever, I can make the required changes to IGT in case we conclude
> that's the appropriate way to go, just tell me. But if that's the case,
> I really think we should try to sit down and write what are the
> expectations for frontbuffer rendering in user space code, because it
> seems to me that these expectations are changing over time...

Yeah, same here. Afaiui it's not resulting in any functional issues (like
outdated screen contents), just fbc not gettting re-enabled. I think just
expecting all userspace that cares to call dirtyfb, and adjusting IGT is
the more reasonable option. We're still making a best effort at keeping
fbc working for userspace that doesn't, but trying to make it perfect is
probably a long game of whack-a-mole. I don't think that's worth it.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f379c5484a84..d51c9b209837 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1882,11 +1882,6 @@  int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 			compute_partial_view(obj, area,
 					     page_offset, MIN_CHUNK_PAGES);
 
-		/* Userspace is now writing through an untracked VMA, abandon
-		 * all hope that the hardware is able to track future writes.
-		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
-
 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 	}
 	if (IS_ERR(vma)) {
@@ -2015,6 +2010,26 @@  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	intel_runtime_pm_put(i915);
 }
 
+bool i915_gem_object_has_partial_fences(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (!i915_vma_is_ggtt(vma))
+			break;
+
+		if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
+			continue;
+
+		if (vma->fence)
+			return true;
+	}
+
+	return false;
+}
+
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 36e0c50e2099..03c066cbdbfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -333,5 +333,7 @@  i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 	return engine;
 }
 
+bool i915_gem_object_has_partial_fences(struct drm_i915_gem_object *obj);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index aaec753e510e..89d8dffed546 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2236,8 +2236,16 @@  intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 		 * something and try to run the system in a "less than optimal"
 		 * mode that matches the user configuration.
 		 */
-		if (i915_vma_get_fence(vma) == 0)
+		if (i915_vma_get_fence(vma) == 0) {
 			i915_vma_pin_fence(vma);
+
+			/* Revoke any partial fences and force users of
+			 * those mmapings to transfer over to this fence
+			 * under the watchful gaze of FBC.
+			 */
+			if (i915_gem_object_has_partial_fences(obj))
+				i915_gem_release_mmap(obj);
+		}
 	}
 
 	i915_vma_get(vma);