diff mbox

[2/2] drm/i915: Move the policy for placement of the GGTT vma into the caller

Message ID 20180220105011.11651-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Feb. 20, 2018, 10:50 a.m. UTC
Currently we make the unilateral decision inside
i915_gem_object_pin_to_display() where the VMA should resided (inside
the fence and mappable region or above?). This is not our decision to
make as it impacts on how the display engine can use the resulting
scanout object, and it would rather instruct us where to place the VMA so
that it can enable the features it wants. As such, make the pin flags an
argument to i915_gem_object_pin_to_display() and control them from
intel_pin_and_fence_fb_obj()

Whilst taking control of the mapping for ourselves, start tracking how
we use it to avoid trying to free a fence we never claimed:

<3>[  227.151869] GEM_BUG_ON(vma->fence->pin_count <= 0)
<4>[  227.152064] ------------[ cut here ]------------
<2>[  227.152068] kernel BUG at drivers/gpu/drm/i915/i915_vma.h:391!
<4>[  227.152084] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
<0>[  227.152092] Dumping ftrace buffer:
<0>[  227.152099]    (ftrace buffer empty)
<4>[  227.152102] Modules linked in: i915 snd_hda_codec_analog snd_hda_codec_generic coretemp snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm lpc_ich e1000e mei_me mei prime_numbers
<4>[  227.152131] CPU: 1 PID: 1587 Comm: kworker/u16:49 Tainted: G     U           4.16.0-rc1-gbab67b2f6177-kasan_7+ #1
<4>[  227.152134] Hardware name: Dell Inc. OptiPlex 755                 /0PU052, BIOS A08 02/19/2008
<4>[  227.152236] Workqueue: events_unbound intel_atomic_commit_work [i915]
<4>[  227.152292] RIP: 0010:intel_unpin_fb_vma+0x23a/0x2a0 [i915]
<4>[  227.152295] RSP: 0018:ffff88005aad7b68 EFLAGS: 00010286
<4>[  227.152300] RAX: 0000000000000026 RBX: ffff88005c359580 RCX: 0000000000000000
<4>[  227.152304] RDX: 0000000000000026 RSI: ffffffff8707d840 RDI: ffffed000b55af63
<4>[  227.152307] RBP: ffff880056817e58 R08: 0000000000000001 R09: 0000000000000000
<4>[  227.152311] R10: ffff88005aad7b88 R11: 0000000000000000 R12: ffff8800568184d0
<4>[  227.152314] R13: ffff880065b5ab08 R14: 0000000000000000 R15: dffffc0000000000
<4>[  227.152318] FS:  0000000000000000(0000) GS:ffff88006ac40000(0000) knlGS:0000000000000000
<4>[  227.152322] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  227.152325] CR2: 00007f5fb25550a8 CR3: 0000000068c78000 CR4: 00000000000006e0
<4>[  227.152328] Call Trace:
<4>[  227.152385]  intel_cleanup_plane_fb+0x6b/0xd0 [i915]
<4>[  227.152395]  drm_atomic_helper_cleanup_planes+0x166/0x280
<4>[  227.152452]  intel_atomic_commit_tail+0x159d/0x3380 [i915]
<4>[  227.152463]  ? process_one_work+0x66e/0x1460
<4>[  227.152516]  ? skl_update_crtcs+0x9c0/0x9c0 [i915]
<4>[  227.152523]  ? lock_acquire+0x13d/0x390
<4>[  227.152527]  ? lock_acquire+0x13d/0x390
<4>[  227.152534]  process_one_work+0x71a/0x1460
<4>[  227.152540]  ? __schedule+0x815/0x1e20
<4>[  227.152547]  ? pwq_dec_nr_in_flight+0x2b0/0x2b0
<4>[  227.152553]  ? _raw_spin_lock_irq+0xa/0x40
<4>[  227.152559]  worker_thread+0xdf/0xf60
<4>[  227.152569]  ? process_one_work+0x1460/0x1460
<4>[  227.152573]  kthread+0x2cf/0x3c0
<4>[  227.152578]  ? _kthread_create_on_node+0xa0/0xa0
<4>[  227.152583]  ret_from_fork+0x3a/0x50
<4>[  227.152591] Code: c6 00 11 86 c0 48 c7 c7 e0 bd 85 c0 e8 60 e7 a9 c4 0f ff e9 1f fe ff ff 48 c7 c6 40 10 86 c0 48 c7 c7 e0 ca 85 c0 e8 2b 95 bd c4 <0f> 0b 48 89 ef e8 4c 44 e8 c4 e9 ef fd ff ff e8 42 44 e8 c4 e9
<1>[  227.152720] RIP: intel_unpin_fb_vma+0x23a/0x2a0 [i915] RSP: ffff88005aad7b68

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h           |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c           | 26 ++++++------------
 drivers/gpu/drm/i915/intel_atomic_plane.c |  1 +
 drivers/gpu/drm/i915/intel_display.c      | 45 ++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_drv.h          |  9 +++++--
 drivers/gpu/drm/i915/intel_fbdev.c        | 10 ++++---
 drivers/gpu/drm/i915/intel_overlay.c      |  3 ++-
 7 files changed, 62 insertions(+), 35 deletions(-)

Comments

Ville Syrjala Feb. 20, 2018, 11:39 a.m. UTC | #1
On Tue, Feb 20, 2018 at 10:50:11AM +0000, Chris Wilson wrote:
> Currently we make the unilateral decision inside
> i915_gem_object_pin_to_display() where the VMA should resided (inside
> the fence and mappable region or above?). This is not our decision to
> make as it impacts on how the display engine can use the resulting
> scanout object, and it would rather instruct us where to place the VMA so
> that it can enable the features it wants. As such, make the pin flags an
> argument to i915_gem_object_pin_to_display() and control them from
> intel_pin_and_fence_fb_obj()
> 
> Whilst taking control of the mapping for ourselves, start tracking how
> we use it to avoid trying to free a fence we never claimed:
> 
> <3>[  227.151869] GEM_BUG_ON(vma->fence->pin_count <= 0)
> <4>[  227.152064] ------------[ cut here ]------------
> <2>[  227.152068] kernel BUG at drivers/gpu/drm/i915/i915_vma.h:391!
> <4>[  227.152084] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
> <0>[  227.152092] Dumping ftrace buffer:
> <0>[  227.152099]    (ftrace buffer empty)
> <4>[  227.152102] Modules linked in: i915 snd_hda_codec_analog snd_hda_codec_generic coretemp snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm lpc_ich e1000e mei_me mei prime_numbers
> <4>[  227.152131] CPU: 1 PID: 1587 Comm: kworker/u16:49 Tainted: G     U           4.16.0-rc1-gbab67b2f6177-kasan_7+ #1
> <4>[  227.152134] Hardware name: Dell Inc. OptiPlex 755                 /0PU052, BIOS A08 02/19/2008
> <4>[  227.152236] Workqueue: events_unbound intel_atomic_commit_work [i915]
> <4>[  227.152292] RIP: 0010:intel_unpin_fb_vma+0x23a/0x2a0 [i915]
> <4>[  227.152295] RSP: 0018:ffff88005aad7b68 EFLAGS: 00010286
> <4>[  227.152300] RAX: 0000000000000026 RBX: ffff88005c359580 RCX: 0000000000000000
> <4>[  227.152304] RDX: 0000000000000026 RSI: ffffffff8707d840 RDI: ffffed000b55af63
> <4>[  227.152307] RBP: ffff880056817e58 R08: 0000000000000001 R09: 0000000000000000
> <4>[  227.152311] R10: ffff88005aad7b88 R11: 0000000000000000 R12: ffff8800568184d0
> <4>[  227.152314] R13: ffff880065b5ab08 R14: 0000000000000000 R15: dffffc0000000000
> <4>[  227.152318] FS:  0000000000000000(0000) GS:ffff88006ac40000(0000) knlGS:0000000000000000
> <4>[  227.152322] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[  227.152325] CR2: 00007f5fb25550a8 CR3: 0000000068c78000 CR4: 00000000000006e0
> <4>[  227.152328] Call Trace:
> <4>[  227.152385]  intel_cleanup_plane_fb+0x6b/0xd0 [i915]
> <4>[  227.152395]  drm_atomic_helper_cleanup_planes+0x166/0x280
> <4>[  227.152452]  intel_atomic_commit_tail+0x159d/0x3380 [i915]
> <4>[  227.152463]  ? process_one_work+0x66e/0x1460
> <4>[  227.152516]  ? skl_update_crtcs+0x9c0/0x9c0 [i915]
> <4>[  227.152523]  ? lock_acquire+0x13d/0x390
> <4>[  227.152527]  ? lock_acquire+0x13d/0x390
> <4>[  227.152534]  process_one_work+0x71a/0x1460
> <4>[  227.152540]  ? __schedule+0x815/0x1e20
> <4>[  227.152547]  ? pwq_dec_nr_in_flight+0x2b0/0x2b0
> <4>[  227.152553]  ? _raw_spin_lock_irq+0xa/0x40
> <4>[  227.152559]  worker_thread+0xdf/0xf60
> <4>[  227.152569]  ? process_one_work+0x1460/0x1460
> <4>[  227.152573]  kthread+0x2cf/0x3c0
> <4>[  227.152578]  ? _kthread_create_on_node+0xa0/0xa0
> <4>[  227.152583]  ret_from_fork+0x3a/0x50
> <4>[  227.152591] Code: c6 00 11 86 c0 48 c7 c7 e0 bd 85 c0 e8 60 e7 a9 c4 0f ff e9 1f fe ff ff 48 c7 c6 40 10 86 c0 48 c7 c7 e0 ca 85 c0 e8 2b 95 bd c4 <0f> 0b 48 89 ef e8 4c 44 e8 c4 e9 ef fd ff ff e8 42 44 e8 c4 e9
> <1>[  227.152720] RIP: intel_unpin_fb_vma+0x23a/0x2a0 [i915] RSP: ffff88005aad7b68
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com
> ---
>  drivers/gpu/drm/i915/i915_drv.h           |  3 ++-
>  drivers/gpu/drm/i915/i915_gem.c           | 26 ++++++------------
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  1 +
>  drivers/gpu/drm/i915/intel_display.c      | 45 ++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_drv.h          |  9 +++++--
>  drivers/gpu/drm/i915/intel_fbdev.c        | 10 ++++---
>  drivers/gpu/drm/i915/intel_overlay.c      |  3 ++-
>  7 files changed, 62 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 76bfe909168c..20575b3ee406 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3413,7 +3413,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
>  struct i915_vma * __must_check
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
> -				     const struct i915_ggtt_view *view);
> +				     const struct i915_ggtt_view *view,
> +				     unsigned int flags);
>  void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
>  int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  				int align);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1854a69bc354..bdc2069d5433 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4078,7 +4078,8 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>  struct i915_vma *
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
> -				     const struct i915_ggtt_view *view)
> +				     const struct i915_ggtt_view *view,
> +				     unsigned int flags)
>  {
>  	struct i915_vma *vma;
>  	int ret;
> @@ -4115,25 +4116,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * try to preserve the existing ABI).
>  	 */
>  	vma = ERR_PTR(-ENOSPC);
> -	if (!view || view->type == I915_GGTT_VIEW_NORMAL)
> +	if ((flags & PIN_MAPPABLE) == 0 &&
> +	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
>  		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> -					       PIN_MAPPABLE | PIN_NONBLOCK);
> -	if (IS_ERR(vma)) {
> -		struct drm_i915_private *i915 = to_i915(obj->base.dev);
> -		unsigned int flags;
> -
> -		/* Valleyview is definitely limited to scanning out the first
> -		 * 512MiB. Lets presume this behaviour was inherited from the
> -		 * g4x display engine and that all earlier gen are similarly
> -		 * limited. Testing suggests that it is a little more
> -		 * complicated than this. For example, Cherryview appears quite
> -		 * happy to scanout from anywhere within its global aperture.
> -		 */
> -		flags = 0;
> -		if (HAS_GMCH_DISPLAY(i915))
> -			flags = PIN_MAPPABLE;
> +					       flags |
> +					       PIN_MAPPABLE |
> +					       PIN_NONBLOCK);
> +	if (IS_ERR(vma))
>  		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
> -	}
>  	if (IS_ERR(vma))
>  		goto err_unpin_global;
>  
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index 0ee32275994a..7481ce85746b 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -85,6 +85,7 @@ intel_plane_duplicate_state(struct drm_plane *plane)
>  	__drm_atomic_helper_plane_duplicate_state(plane, state);
>  
>  	intel_state->vma = NULL;
> +	intel_state->flags = 0;
>  
>  	return state;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 280c04326f64..8075e1990157 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2068,7 +2068,9 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
>  }
>  
>  struct i915_vma *
> -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
> +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
> +			   unsigned int rotation,
> +			   unsigned long *out_flags)
>  {
>  	struct drm_device *dev = fb->dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> @@ -2076,6 +2078,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  	struct i915_ggtt_view view;
>  	struct i915_vma *vma;
>  	u32 alignment;
> +	unsigned int pinctl;
>  
>  	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>  
> @@ -2102,7 +2105,20 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  
>  	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
>  
> -	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
> +	pinctl = 0;
> +
> +	/* Valleyview is definitely limited to scanning out the first
> +	 * 512MiB. Lets presume this behaviour was inherited from the
> +	 * g4x display engine and that all earlier gen are similarly
> +	 * limited. Testing suggests that it is a little more
> +	 * complicated than this. For example, Cherryview appears quite
> +	 * happy to scanout from anywhere within its global aperture.
> +	 */
> +	if (HAS_GMCH_DISPLAY(dev_priv))
> +		pinctl |= PIN_MAPPABLE;
> +
> +	vma = i915_gem_object_pin_to_display_plane(obj,
> +						   alignment, &view, pinctl);
>  	if (IS_ERR(vma))
>  		goto err;
>  
> @@ -2123,7 +2139,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  		 * something and try to run the system in a "less than optimal"
>  		 * mode that matches the user configuration.
>  		 */
> -		i915_vma_pin_fence(vma);
> +		if (i915_vma_pin_fence(vma) == 0)
> +			*out_flags |= PLANE_HAS_FENCE;
>  	}
>  
>  	i915_vma_get(vma);
> @@ -2134,11 +2151,12 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
>  	return vma;
>  }
>  
> -void intel_unpin_fb_vma(struct i915_vma *vma)
> +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
>  {
>  	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
>  
> -	i915_vma_unpin_fence(vma);
> +	if (flags & PLANE_HAS_FENCE)
> +		i915_vma_unpin_fence(vma);
>  	i915_gem_object_unpin_from_display_plane(vma);
>  	i915_vma_put(vma);
>  }
> @@ -2808,7 +2826,9 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
>  valid_fb:
>  	mutex_lock(&dev->struct_mutex);
>  	intel_state->vma =
> -		intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
> +		intel_pin_and_fence_fb_obj(fb,
> +					   primary->state->rotation,
> +					   &intel_state->flags);
>  	mutex_unlock(&dev->struct_mutex);
>  	if (IS_ERR(intel_state->vma)) {
>  		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
> @@ -12713,7 +12733,9 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>  	} else {
>  		struct i915_vma *vma;
>  
> -		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
> +		vma = intel_pin_and_fence_fb_obj(fb,
> +						 new_state->rotation,
> +						 &to_intel_plane_state(new_state)->flags);
>  		if (!IS_ERR(vma))
>  			to_intel_plane_state(new_state)->vma = vma;
>  		else
> @@ -12768,7 +12790,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
>  	vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma);
>  	if (vma) {
>  		mutex_lock(&plane->dev->struct_mutex);
> -		intel_unpin_fb_vma(vma);
> +		intel_unpin_fb_vma(vma, to_intel_plane_state(old_state)->flags);
>  		mutex_unlock(&plane->dev->struct_mutex);
>  	}
>  }
> @@ -13129,7 +13151,9 @@ intel_legacy_cursor_update(struct drm_plane *plane,
>  			goto out_unlock;
>  		}
>  	} else {
> -		vma = intel_pin_and_fence_fb_obj(fb, new_plane_state->rotation);
> +		vma = intel_pin_and_fence_fb_obj(fb,
> +						 new_plane_state->rotation,
> +						 &to_intel_plane_state(new_plane_state)->flags);
>  		if (IS_ERR(vma)) {
>  			DRM_DEBUG_KMS("failed to pin object\n");
>  
> @@ -13160,7 +13184,8 @@ intel_legacy_cursor_update(struct drm_plane *plane,
>  
>  	old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma);
>  	if (old_vma)
> -		intel_unpin_fb_vma(old_vma);
> +		intel_unpin_fb_vma(old_vma,
> +				   to_intel_plane_state(old_plane_state)->flags);
>  
>  out_unlock:
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 898064e8bea7..50874f4035cf 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -204,6 +204,7 @@ struct intel_fbdev {
>  	struct drm_fb_helper helper;
>  	struct intel_framebuffer *fb;
>  	struct i915_vma *vma;
> +	unsigned long vma_flags;
>  	async_cookie_t cookie;
>  	int preferred_bpp;
>  };
> @@ -490,6 +491,8 @@ struct intel_atomic_state {
>  struct intel_plane_state {
>  	struct drm_plane_state base;
>  	struct i915_vma *vma;
> +	unsigned long flags;

long seems potentially wasteful when we have just the one flag.
Although maybe the next thing gets aligned to 64bits anyway.

Anyways, patch looks reasonable.
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

fbc should probably start looking at the new flag instead of
using the racy vma->fence checks it has now.

> +#define PLANE_HAS_FENCE BIT(0)
>  
>  	struct {
>  		u32 offset;
> @@ -1503,8 +1506,10 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
>  				    struct intel_load_detect_pipe *old,
>  				    struct drm_modeset_acquire_ctx *ctx);
>  struct i915_vma *
> -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
> -void intel_unpin_fb_vma(struct i915_vma *vma);
> +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
> +			   unsigned int rotation,
> +			   unsigned long *out_flags);
> +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags);
>  struct drm_framebuffer *
>  intel_framebuffer_create(struct drm_i915_gem_object *obj,
>  			 struct drm_mode_fb_cmd2 *mode_cmd);
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index da48af11eb6b..3d8ce3a62743 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -177,6 +177,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	struct fb_info *info;
>  	struct drm_framebuffer *fb;
>  	struct i915_vma *vma;
> +	unsigned long flags = 0;
>  	bool prealloc = false;
>  	void __iomem *vaddr;
>  	int ret;
> @@ -211,7 +212,9 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	 * This also validates that any existing fb inherited from the
>  	 * BIOS is suitable for own access.
>  	 */
> -	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0);
> +	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base,
> +					 DRM_MODE_ROTATE_0,
> +					 &flags);
>  	if (IS_ERR(vma)) {
>  		ret = PTR_ERR(vma);
>  		goto out_unlock;
> @@ -268,6 +271,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
>  		      fb->width, fb->height, i915_ggtt_offset(vma));
>  	ifbdev->vma = vma;
> +	ifbdev->vma_flags = flags;
>  
>  	intel_runtime_pm_put(dev_priv);
>  	mutex_unlock(&dev->struct_mutex);
> @@ -275,7 +279,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	return 0;
>  
>  out_unpin:
> -	intel_unpin_fb_vma(vma);
> +	intel_unpin_fb_vma(vma, flags);
>  out_unlock:
>  	intel_runtime_pm_put(dev_priv);
>  	mutex_unlock(&dev->struct_mutex);
> @@ -513,7 +517,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
>  
>  	if (ifbdev->vma) {
>  		mutex_lock(&ifbdev->helper.dev->struct_mutex);
> -		intel_unpin_fb_vma(ifbdev->vma);
> +		intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags);
>  		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 41e9465d44a8..89f568e739ee 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -801,7 +801,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  
>  	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
>  
> -	vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
> +	vma = i915_gem_object_pin_to_display_plane(new_bo,
> +						   0, NULL, PIN_MAPPABLE);
>  	if (IS_ERR(vma)) {
>  		ret = PTR_ERR(vma);
>  		goto out_pin_section;
> -- 
> 2.16.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Feb. 20, 2018, 11:45 a.m. UTC | #2
Quoting Ville Syrjälä (2018-02-20 11:39:13)
> On Tue, Feb 20, 2018 at 10:50:11AM +0000, Chris Wilson wrote:
> > @@ -490,6 +491,8 @@ struct intel_atomic_state {
> >  struct intel_plane_state {
> >       struct drm_plane_state base;
> >       struct i915_vma *vma;
> > +     unsigned long flags;
> 
> long seems potentially wasteful when we have just the one flag.
> Although maybe the next thing gets aligned to 64bits anyway.

Yeah, wasteful until you look at the holes. :) I tend to find I run out
of flag space all too quick so erred on the side of plenty of bits. Plus
it stops all the arguments about BIT() being UL but the type just U.

> Anyways, patch looks reasonable.
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> fbc should probably start looking at the new flag instead of
> using the racy vma->fence checks it has now.

Right, I was looking at how we could pass requirements like we want fbc
so try harder to give us a fence (with the usual cop-out of if we are
triple buffering 4k screens, we run out of mappable very quickly).
Hooking up to the plane->flags & HAS_FENCE is a good idea.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 76bfe909168c..20575b3ee406 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3413,7 +3413,8 @@  i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 struct i915_vma * __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     const struct i915_ggtt_view *view);
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags);
 void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 				int align);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1854a69bc354..bdc2069d5433 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4078,7 +4078,8 @@  int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 struct i915_vma *
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     const struct i915_ggtt_view *view)
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags)
 {
 	struct i915_vma *vma;
 	int ret;
@@ -4115,25 +4116,14 @@  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * try to preserve the existing ABI).
 	 */
 	vma = ERR_PTR(-ENOSPC);
-	if (!view || view->type == I915_GGTT_VIEW_NORMAL)
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-					       PIN_MAPPABLE | PIN_NONBLOCK);
-	if (IS_ERR(vma)) {
-		struct drm_i915_private *i915 = to_i915(obj->base.dev);
-		unsigned int flags;
-
-		/* Valleyview is definitely limited to scanning out the first
-		 * 512MiB. Lets presume this behaviour was inherited from the
-		 * g4x display engine and that all earlier gen are similarly
-		 * limited. Testing suggests that it is a little more
-		 * complicated than this. For example, Cherryview appears quite
-		 * happy to scanout from anywhere within its global aperture.
-		 */
-		flags = 0;
-		if (HAS_GMCH_DISPLAY(i915))
-			flags = PIN_MAPPABLE;
+					       flags |
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK);
+	if (IS_ERR(vma))
 		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-	}
 	if (IS_ERR(vma))
 		goto err_unpin_global;
 
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 0ee32275994a..7481ce85746b 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -85,6 +85,7 @@  intel_plane_duplicate_state(struct drm_plane *plane)
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
 
 	intel_state->vma = NULL;
+	intel_state->flags = 0;
 
 	return state;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 280c04326f64..8075e1990157 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2068,7 +2068,9 @@  static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 }
 
 struct i915_vma *
-intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
+			   unsigned int rotation,
+			   unsigned long *out_flags)
 {
 	struct drm_device *dev = fb->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -2076,6 +2078,7 @@  intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	struct i915_ggtt_view view;
 	struct i915_vma *vma;
 	u32 alignment;
+	unsigned int pinctl;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
@@ -2102,7 +2105,20 @@  intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-	vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
+	pinctl = 0;
+
+	/* Valleyview is definitely limited to scanning out the first
+	 * 512MiB. Lets presume this behaviour was inherited from the
+	 * g4x display engine and that all earlier gen are similarly
+	 * limited. Testing suggests that it is a little more
+	 * complicated than this. For example, Cherryview appears quite
+	 * happy to scanout from anywhere within its global aperture.
+	 */
+	if (HAS_GMCH_DISPLAY(dev_priv))
+		pinctl |= PIN_MAPPABLE;
+
+	vma = i915_gem_object_pin_to_display_plane(obj,
+						   alignment, &view, pinctl);
 	if (IS_ERR(vma))
 		goto err;
 
@@ -2123,7 +2139,8 @@  intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 		 * something and try to run the system in a "less than optimal"
 		 * mode that matches the user configuration.
 		 */
-		i915_vma_pin_fence(vma);
+		if (i915_vma_pin_fence(vma) == 0)
+			*out_flags |= PLANE_HAS_FENCE;
 	}
 
 	i915_vma_get(vma);
@@ -2134,11 +2151,12 @@  intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	return vma;
 }
 
-void intel_unpin_fb_vma(struct i915_vma *vma)
+void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
-	i915_vma_unpin_fence(vma);
+	if (flags & PLANE_HAS_FENCE)
+		i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
 	i915_vma_put(vma);
 }
@@ -2808,7 +2826,9 @@  intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 valid_fb:
 	mutex_lock(&dev->struct_mutex);
 	intel_state->vma =
-		intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+		intel_pin_and_fence_fb_obj(fb,
+					   primary->state->rotation,
+					   &intel_state->flags);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(intel_state->vma)) {
 		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
@@ -12713,7 +12733,9 @@  intel_prepare_plane_fb(struct drm_plane *plane,
 	} else {
 		struct i915_vma *vma;
 
-		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+		vma = intel_pin_and_fence_fb_obj(fb,
+						 new_state->rotation,
+						 &to_intel_plane_state(new_state)->flags);
 		if (!IS_ERR(vma))
 			to_intel_plane_state(new_state)->vma = vma;
 		else
@@ -12768,7 +12790,7 @@  intel_cleanup_plane_fb(struct drm_plane *plane,
 	vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma);
 	if (vma) {
 		mutex_lock(&plane->dev->struct_mutex);
-		intel_unpin_fb_vma(vma);
+		intel_unpin_fb_vma(vma, to_intel_plane_state(old_state)->flags);
 		mutex_unlock(&plane->dev->struct_mutex);
 	}
 }
@@ -13129,7 +13151,9 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 			goto out_unlock;
 		}
 	} else {
-		vma = intel_pin_and_fence_fb_obj(fb, new_plane_state->rotation);
+		vma = intel_pin_and_fence_fb_obj(fb,
+						 new_plane_state->rotation,
+						 &to_intel_plane_state(new_plane_state)->flags);
 		if (IS_ERR(vma)) {
 			DRM_DEBUG_KMS("failed to pin object\n");
 
@@ -13160,7 +13184,8 @@  intel_legacy_cursor_update(struct drm_plane *plane,
 
 	old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma);
 	if (old_vma)
-		intel_unpin_fb_vma(old_vma);
+		intel_unpin_fb_vma(old_vma,
+				   to_intel_plane_state(old_plane_state)->flags);
 
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 898064e8bea7..50874f4035cf 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -204,6 +204,7 @@  struct intel_fbdev {
 	struct drm_fb_helper helper;
 	struct intel_framebuffer *fb;
 	struct i915_vma *vma;
+	unsigned long vma_flags;
 	async_cookie_t cookie;
 	int preferred_bpp;
 };
@@ -490,6 +491,8 @@  struct intel_atomic_state {
 struct intel_plane_state {
 	struct drm_plane_state base;
 	struct i915_vma *vma;
+	unsigned long flags;
+#define PLANE_HAS_FENCE BIT(0)
 
 	struct {
 		u32 offset;
@@ -1503,8 +1506,10 @@  void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct intel_load_detect_pipe *old,
 				    struct drm_modeset_acquire_ctx *ctx);
 struct i915_vma *
-intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
-void intel_unpin_fb_vma(struct i915_vma *vma);
+intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
+			   unsigned int rotation,
+			   unsigned long *out_flags);
+void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags);
 struct drm_framebuffer *
 intel_framebuffer_create(struct drm_i915_gem_object *obj,
 			 struct drm_mode_fb_cmd2 *mode_cmd);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index da48af11eb6b..3d8ce3a62743 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -177,6 +177,7 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	struct fb_info *info;
 	struct drm_framebuffer *fb;
 	struct i915_vma *vma;
+	unsigned long flags = 0;
 	bool prealloc = false;
 	void __iomem *vaddr;
 	int ret;
@@ -211,7 +212,9 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	 * This also validates that any existing fb inherited from the
 	 * BIOS is suitable for own access.
 	 */
-	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0);
+	vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base,
+					 DRM_MODE_ROTATE_0,
+					 &flags);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_unlock;
@@ -268,6 +271,7 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
 		      fb->width, fb->height, i915_ggtt_offset(vma));
 	ifbdev->vma = vma;
+	ifbdev->vma_flags = flags;
 
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
@@ -275,7 +279,7 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	return 0;
 
 out_unpin:
-	intel_unpin_fb_vma(vma);
+	intel_unpin_fb_vma(vma, flags);
 out_unlock:
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
@@ -513,7 +517,7 @@  static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	if (ifbdev->vma) {
 		mutex_lock(&ifbdev->helper.dev->struct_mutex);
-		intel_unpin_fb_vma(ifbdev->vma);
+		intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags);
 		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 41e9465d44a8..89f568e739ee 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -801,7 +801,8 @@  static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-	vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
+	vma = i915_gem_object_pin_to_display_plane(new_bo,
+						   0, NULL, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_pin_section;