drm/i915: Decouple execbuf uAPI from internal implementation
diff mbox

Message ID 1452783779-2364-1-git-send-email-tvrtko.ursulin@linux.intel.com
State New
Headers show

Commit Message

Tvrtko Ursulin Jan. 14, 2016, 3:02 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

At the moment execbuf ring selection is fully coupled to
internal ring ids which is not a good thing on its own.

This dependency is also spread between two source files and
not spelled out at either side which makes it hidden and
fragile.

This patch decouples this dependency by introducing an explicit
translation table of execbuf uAPI to ring id close to the only
call site (i915_gem_do_execbuffer).

This way we are free to change driver internal implementation
details without breaking userspace. All state relating to the
uAPI is now contained in, or next to, i915_gem_do_execbuffer.

As a side benefit, this patch decreases the compiled size
of i915_gem_do_execbuffer.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |   4 +-
 drivers/gpu/drm/i915/i915_gem.c            |   2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 103 +++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  10 +--
 4 files changed, 56 insertions(+), 63 deletions(-)

Comments

Chris Wilson Jan. 14, 2016, 10:09 p.m. UTC | #1
On Thu, Jan 14, 2016 at 03:02:59PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> At the moment execbuf ring selection is fully coupled to
> internal ring ids which is not a good thing on its own.
> 
> This dependency is also spread between two source files and
> not spelled out at either side which makes it hidden and
> fragile.
> 
> This patch decouples this dependency by introducing an explicit
> translation table of execbuf uAPI to ring id close to the only
> call site (i915_gem_do_execbuffer).
> 
> This way we are free to change driver internal implementation
> details without breaking userspace. All state relating to the
> uAPI is now contained in, or next to, i915_gem_do_execbuffer.

I was hesistant at first, since "why?" isn't fully developed, but the
code won me over.

> +#define I915_USER_RINGS (4)
> +
> +static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
> +	[I915_EXEC_DEFAULT]	= RCS,
> +	[I915_EXEC_RENDER]	= RCS,
> +	[I915_EXEC_BLT]		= BCS,
> +	[I915_EXEC_BSD]		= VCS,
> +	[I915_EXEC_VEBOX]	= VECS
> +};

I was wondering whether packing here mattered at all, but we're under a
cacheline so highly unlikely.

>  static int
>  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		       struct drm_file *file,
> @@ -1393,6 +1393,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
>  	struct i915_execbuffer_params *params = &params_master;
>  	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
> +	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>  	u32 dispatch_flags;
>  	int ret;
>  	bool need_relocs;
> @@ -1414,49 +1415,39 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	if (args->flags & I915_EXEC_IS_PINNED)
>  		dispatch_flags |= I915_DISPATCH_PINNED;
>  
> -	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
> -		DRM_DEBUG("execbuf with unknown ring: %d\n",
> -			  (int)(args->flags & I915_EXEC_RING_MASK));
> +	if (user_ring_id > I915_USER_RINGS) {
> +		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>  		return -EINVAL;
>  	}
>  
> -	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
> +	if ((user_ring_id != I915_EXEC_BSD) &&
>  	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
>  		DRM_DEBUG("execbuf with non bsd ring but with invalid "
>  			"bsd dispatch flags: %d\n", (int)(args->flags));
>  		return -EINVAL;
> -	} 
> -
> -	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
> -		ring = &dev_priv->ring[RCS];
> -	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
> -		if (HAS_BSD2(dev)) {
> -			int ring_id;
> -
> -			switch (args->flags & I915_EXEC_BSD_MASK) {
> -			case I915_EXEC_BSD_DEFAULT:
> -				ring_id = gen8_dispatch_bsd_ring(dev, file);
> -				ring = &dev_priv->ring[ring_id];
> -				break;
> -			case I915_EXEC_BSD_RING1:
> -				ring = &dev_priv->ring[VCS];
> -				break;
> -			case I915_EXEC_BSD_RING2:
> -				ring = &dev_priv->ring[VCS2];
> -				break;
> -			default:
> -				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
> -					  (int)(args->flags & I915_EXEC_BSD_MASK));
> -				return -EINVAL;
> -			}
> -		} else
> -			ring = &dev_priv->ring[VCS];
> -	} else
> -		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
> +	}
> +
> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev)) {

HAS_BSD2(dev_priv)

> +		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
> +
> +		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> +			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
> +		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
> +			   bsd_idx <= I915_EXEC_BSD_RING2) {
> +			bsd_idx--;
> +		} else {
> +			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
> +				  bsd_idx);
> +			return -EINVAL;
> +		}
> +
> +		ring = &dev_priv->ring[_VCS(bsd_idx)];
> +	} else {
> +		ring = &dev_priv->ring[user_ring_map[user_ring_id]];
> +	}
>  
>  	if (!intel_ring_initialized(ring)) {
> -		DRM_DEBUG("execbuf with invalid ring: %d\n",
> -			  (int)(args->flags & I915_EXEC_RING_MASK));
> +		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
>  		return -EINVAL;
>  	}

One question, how does this look if we move this section to its own
function:

ring = eb_select_ring(dev_priv, file, args);
if (IS_ERR(ring))
	return PTR_ERR(ring);

Do you keep your code size reduction?

> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 7349d9258191..fdc220fc2b18 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -148,14 +148,14 @@ struct  i915_ctx_workarounds {
>  struct  intel_engine_cs {
>  	const char	*name;
>  	enum intel_ring_id {
> -		RCS = 0x0,
> -		VCS,
> +		RCS = 0,
>  		BCS,
> -		VECS,
> -		VCS2
> +		VCS,
> +		VCS2,	/* Keep instances of the same type engine together. */
> +		VECS

Technically, this breaks userspace ABI elsewhere. :| Though the only
user of the id mask is only looking for RCS==0 vs the reset.

So I think we would cope, but to be extra safe we could just avoid
reshuffling the ids. Let me sleep on the implications. We may say just
break that ABI whilst we still can and do a reverse-map to EXEC bit.
Hmm.
-Chris
Tvrtko Ursulin Jan. 15, 2016, 10:29 a.m. UTC | #2
On 14/01/16 22:09, Chris Wilson wrote:
> On Thu, Jan 14, 2016 at 03:02:59PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> At the moment execbuf ring selection is fully coupled to
>> internal ring ids which is not a good thing on its own.
>>
>> This dependency is also spread between two source files and
>> not spelled out at either side which makes it hidden and
>> fragile.
>>
>> This patch decouples this dependency by introducing an explicit
>> translation table of execbuf uAPI to ring id close to the only
>> call site (i915_gem_do_execbuffer).
>>
>> This way we are free to change driver internal implementation
>> details without breaking userspace. All state relating to the
>> uAPI is now contained in, or next to, i915_gem_do_execbuffer.
>
> I was hesistant at first, since "why?" isn't fully developed, but the
> code won me over.
>
>> +#define I915_USER_RINGS (4)
>> +
>> +static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
>> +	[I915_EXEC_DEFAULT]	= RCS,
>> +	[I915_EXEC_RENDER]	= RCS,
>> +	[I915_EXEC_BLT]		= BCS,
>> +	[I915_EXEC_BSD]		= VCS,
>> +	[I915_EXEC_VEBOX]	= VECS
>> +};
>
> I was wondering whether packing here mattered at all, but we're under a
> cacheline so highly unlikely.
>
>>   static int
>>   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   		       struct drm_file *file,
>> @@ -1393,6 +1393,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
>>   	struct i915_execbuffer_params *params = &params_master;
>>   	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
>> +	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>>   	u32 dispatch_flags;
>>   	int ret;
>>   	bool need_relocs;
>> @@ -1414,49 +1415,39 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   	if (args->flags & I915_EXEC_IS_PINNED)
>>   		dispatch_flags |= I915_DISPATCH_PINNED;
>>
>> -	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
>> -		DRM_DEBUG("execbuf with unknown ring: %d\n",
>> -			  (int)(args->flags & I915_EXEC_RING_MASK));
>> +	if (user_ring_id > I915_USER_RINGS) {
>> +		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>>   		return -EINVAL;
>>   	}
>>
>> -	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
>> +	if ((user_ring_id != I915_EXEC_BSD) &&
>>   	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
>>   		DRM_DEBUG("execbuf with non bsd ring but with invalid "
>>   			"bsd dispatch flags: %d\n", (int)(args->flags));
>>   		return -EINVAL;
>> -	}
>> -
>> -	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
>> -		ring = &dev_priv->ring[RCS];
>> -	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
>> -		if (HAS_BSD2(dev)) {
>> -			int ring_id;
>> -
>> -			switch (args->flags & I915_EXEC_BSD_MASK) {
>> -			case I915_EXEC_BSD_DEFAULT:
>> -				ring_id = gen8_dispatch_bsd_ring(dev, file);
>> -				ring = &dev_priv->ring[ring_id];
>> -				break;
>> -			case I915_EXEC_BSD_RING1:
>> -				ring = &dev_priv->ring[VCS];
>> -				break;
>> -			case I915_EXEC_BSD_RING2:
>> -				ring = &dev_priv->ring[VCS2];
>> -				break;
>> -			default:
>> -				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
>> -					  (int)(args->flags & I915_EXEC_BSD_MASK));
>> -				return -EINVAL;
>> -			}
>> -		} else
>> -			ring = &dev_priv->ring[VCS];
>> -	} else
>> -		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
>> +	}
>> +
>> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev)) {
>
> HAS_BSD2(dev_priv)
>
>> +		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>> +
>> +		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
>> +			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
>> +		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>> +			   bsd_idx <= I915_EXEC_BSD_RING2) {
>> +			bsd_idx--;
>> +		} else {
>> +			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
>> +				  bsd_idx);
>> +			return -EINVAL;
>> +		}
>> +
>> +		ring = &dev_priv->ring[_VCS(bsd_idx)];
>> +	} else {
>> +		ring = &dev_priv->ring[user_ring_map[user_ring_id]];
>> +	}
>>
>>   	if (!intel_ring_initialized(ring)) {
>> -		DRM_DEBUG("execbuf with invalid ring: %d\n",
>> -			  (int)(args->flags & I915_EXEC_RING_MASK));
>> +		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
>>   		return -EINVAL;
>>   	}
>
> One question, how does this look if we move this section to its own
> function:
>
> ring = eb_select_ring(dev_priv, file, args);
> if (IS_ERR(ring))
> 	return PTR_ERR(ring);
>
> Do you keep your code size reduction?

No, perhaps because of all the ERR_PTR business. But decoupling the ring 
and return value keeps it:

static int
eb_select_ring(struct drm_i915_private *dev_priv,
	       struct drm_file *file,
	       struct drm_i915_gem_execbuffer2 *args,
	       struct intel_engine_cs **ring)

Saves ~100 bytes out of ~4600 on my build.

>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 7349d9258191..fdc220fc2b18 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -148,14 +148,14 @@ struct  i915_ctx_workarounds {
>>   struct  intel_engine_cs {
>>   	const char	*name;
>>   	enum intel_ring_id {
>> -		RCS = 0x0,
>> -		VCS,
>> +		RCS = 0,
>>   		BCS,
>> -		VECS,
>> -		VCS2
>> +		VCS,
>> +		VCS2,	/* Keep instances of the same type engine together. */
>> +		VECS
>
> Technically, this breaks userspace ABI elsewhere. :| Though the only
> user of the id mask is only looking for RCS==0 vs the reset.
>
> So I think we would cope, but to be extra safe we could just avoid
> reshuffling the ids. Let me sleep on the implications. We may say just
> break that ABI whilst we still can and do a reverse-map to EXEC bit.
> Hmm.

What are the other places it breaks the ABI? I'd like to understand it.

Regards,

Tvrtko
Chris Wilson Jan. 15, 2016, 11:36 a.m. UTC | #3
On Fri, Jan 15, 2016 at 10:29:05AM +0000, Tvrtko Ursulin wrote:
> >One question, how does this look if we move this section to its own
> >function:
> >
> >ring = eb_select_ring(dev_priv, file, args);
> >if (IS_ERR(ring))
> >	return PTR_ERR(ring);
> >
> >Do you keep your code size reduction?
> 
> No, perhaps because of all the ERR_PTR business. But decoupling the
> ring and return value keeps it:
> 
> static int
> eb_select_ring(struct drm_i915_private *dev_priv,
> 	       struct drm_file *file,
> 	       struct drm_i915_gem_execbuffer2 *args,
> 	       struct intel_engine_cs **ring)

Hmm, I wonder why. ERR_PTR(ret) should be a no-op, so the only thing
that I would have thought changed would be the test afterwards. I guess
a foray into the assembly would be a good learning experience for me then!

> Saves ~100 bytes out of ~4600 on my build.
> 
> >>diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >>index 7349d9258191..fdc220fc2b18 100644
> >>--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >>+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >>@@ -148,14 +148,14 @@ struct  i915_ctx_workarounds {
> >>  struct  intel_engine_cs {
> >>  	const char	*name;
> >>  	enum intel_ring_id {
> >>-		RCS = 0x0,
> >>-		VCS,
> >>+		RCS = 0,
> >>  		BCS,
> >>-		VECS,
> >>-		VCS2
> >>+		VCS,
> >>+		VCS2,	/* Keep instances of the same type engine together. */
> >>+		VECS
> >
> >Technically, this breaks userspace ABI elsewhere. :| Though the only
> >user of the id mask is only looking for RCS==0 vs the reset.
> >
> >So I think we would cope, but to be extra safe we could just avoid
> >reshuffling the ids. Let me sleep on the implications. We may say just
> >break that ABI whilst we still can and do a reverse-map to EXEC bit.
> >Hmm.
> 
> What are the other places it breaks the ABI? I'd like to understand it.

The busy-ioctl is strong abi, i915_trace.h has a few weak abi uses of
ring->id, and the use in debugfs is no abi barrier at all.

ring->id also crops up in the guc_context_desc, which is intriguing.
-Cris
Tvrtko Ursulin Jan. 21, 2016, 11:05 a.m. UTC | #4
On 15/01/16 17:20, Patchwork wrote:
> == Summary ==
>
> Built on 615fbad7f4cea73b1a8eccdcc942c8ca1a708dab drm-intel-nightly: 2016y-01m-15d-09h-46m-32s UTC integration manifest
>
> Test gem_storedw_loop:
>          Subgroup basic-render:
>                  pass       -> DMESG-WARN (skl-i5k-2) UNSTABLE
>                  pass       -> DMESG-WARN (skl-i7k-2) UNSTABLE

Known unrelated: https://bugs.freedesktop.org/show_bug.cgi?id=93693

> Test kms_flip:
>          Subgroup basic-flip-vs-modeset:
>                  dmesg-warn -> PASS       (skl-i5k-2)
> Test kms_pipe_crc_basic:
>          Subgroup read-crc-pipe-b-frame-sequence:
>                  pass       -> DMESG-WARN (byt-nuc)

Known unrelated: https://bugs.freedesktop.org/show_bug.cgi?id=93121

>
> bdw-nuci7        total:138  pass:128  dwarn:1   dfail:0   fail:0   skip:9
> bdw-ultra        total:138  pass:132  dwarn:0   dfail:0   fail:0   skip:6
> bsw-nuc-2        total:141  pass:115  dwarn:2   dfail:0   fail:0   skip:24
> byt-nuc          total:141  pass:122  dwarn:4   dfail:0   fail:0   skip:15
> hsw-brixbox      total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7
> ilk-hp8440p      total:141  pass:101  dwarn:3   dfail:0   fail:0   skip:37
> ivb-t430s        total:135  pass:122  dwarn:3   dfail:4   fail:0   skip:6
> skl-i5k-2        total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8
> skl-i7k-2        total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8
> snb-dellxps      total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14
> snb-x220t        total:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13
>
> HANGED hsw-gt2 in igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a
>
> Results at /archive/results/CI_IGT_test/Patchwork_1201/
>
>

Merged.

Regards,

Tvrtko
Kukanova, Svetlana March 25, 2016, 1:18 p.m. UTC | #5
Hi everyone,

Yes, this breaks userspace ABI and in particular it broke VTune work. 
Ring ids are seen via i915 tracepoints, and VTune Amplifier uses them.
We were relying on the old ring ids, and assuming that the new rings would be added to the end of the enum.

I'm objecting just now because now this driver change reached our internal users and they complained that VTune is reporting DMA packets on wrong engines.

I would request this change (the enum intel_ring_id change) to be rolled back. Hope, it's still possible.

Regards,
Svetlana

-----Original Message-----
From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of Chris Wilson

Sent: Friday, January 15, 2016 1:09 AM
To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>; Intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH] drm/i915: Decouple execbuf uAPI from internal implementation

On Thu, Jan 14, 2016 at 03:02:59PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

> 

> At the moment execbuf ring selection is fully coupled to internal ring 

> ids which is not a good thing on its own.

> 

> This dependency is also spread between two source files and not 

> spelled out at either side which makes it hidden and fragile.

> 

> This patch decouples this dependency by introducing an explicit 

> translation table of execbuf uAPI to ring id close to the only call 

> site (i915_gem_do_execbuffer).

> 

> This way we are free to change driver internal implementation details 

> without breaking userspace. All state relating to the uAPI is now 

> contained in, or next to, i915_gem_do_execbuffer.


I was hesistant at first, since "why?" isn't fully developed, but the code won me over.

> +#define I915_USER_RINGS (4)

> +

> +static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {

> +	[I915_EXEC_DEFAULT]	= RCS,

> +	[I915_EXEC_RENDER]	= RCS,

> +	[I915_EXEC_BLT]		= BCS,

> +	[I915_EXEC_BSD]		= VCS,

> +	[I915_EXEC_VEBOX]	= VECS

> +};


I was wondering whether packing here mattered at all, but we're under a cacheline so highly unlikely.

>  static int

>  i915_gem_do_execbuffer(struct drm_device *dev, void *data,

>  		       struct drm_file *file,

> @@ -1393,6 +1393,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,

>  	struct i915_execbuffer_params params_master; /* XXX: will be removed later */

>  	struct i915_execbuffer_params *params = &params_master;

>  	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);

> +	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;

>  	u32 dispatch_flags;

>  	int ret;

>  	bool need_relocs;

> @@ -1414,49 +1415,39 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,

>  	if (args->flags & I915_EXEC_IS_PINNED)

>  		dispatch_flags |= I915_DISPATCH_PINNED;

>  

> -	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {

> -		DRM_DEBUG("execbuf with unknown ring: %d\n",

> -			  (int)(args->flags & I915_EXEC_RING_MASK));

> +	if (user_ring_id > I915_USER_RINGS) {

> +		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);

>  		return -EINVAL;

>  	}

>  

> -	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&

> +	if ((user_ring_id != I915_EXEC_BSD) &&

>  	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {

>  		DRM_DEBUG("execbuf with non bsd ring but with invalid "

>  			"bsd dispatch flags: %d\n", (int)(args->flags));

>  		return -EINVAL;

> -	} 

> -

> -	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)

> -		ring = &dev_priv->ring[RCS];

> -	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {

> -		if (HAS_BSD2(dev)) {

> -			int ring_id;

> -

> -			switch (args->flags & I915_EXEC_BSD_MASK) {

> -			case I915_EXEC_BSD_DEFAULT:

> -				ring_id = gen8_dispatch_bsd_ring(dev, file);

> -				ring = &dev_priv->ring[ring_id];

> -				break;

> -			case I915_EXEC_BSD_RING1:

> -				ring = &dev_priv->ring[VCS];

> -				break;

> -			case I915_EXEC_BSD_RING2:

> -				ring = &dev_priv->ring[VCS2];

> -				break;

> -			default:

> -				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",

> -					  (int)(args->flags & I915_EXEC_BSD_MASK));

> -				return -EINVAL;

> -			}

> -		} else

> -			ring = &dev_priv->ring[VCS];

> -	} else

> -		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];

> +	}

> +

> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev)) {


HAS_BSD2(dev_priv)

> +		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;

> +

> +		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {

> +			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);

> +		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&

> +			   bsd_idx <= I915_EXEC_BSD_RING2) {

> +			bsd_idx--;

> +		} else {

> +			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",

> +				  bsd_idx);

> +			return -EINVAL;

> +		}

> +

> +		ring = &dev_priv->ring[_VCS(bsd_idx)];

> +	} else {

> +		ring = &dev_priv->ring[user_ring_map[user_ring_id]];

> +	}

>  

>  	if (!intel_ring_initialized(ring)) {

> -		DRM_DEBUG("execbuf with invalid ring: %d\n",

> -			  (int)(args->flags & I915_EXEC_RING_MASK));

> +		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);

>  		return -EINVAL;

>  	}


One question, how does this look if we move this section to its own
function:

ring = eb_select_ring(dev_priv, file, args); if (IS_ERR(ring))
	return PTR_ERR(ring);

Do you keep your code size reduction?

> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 

> b/drivers/gpu/drm/i915/intel_ringbuffer.h

> index 7349d9258191..fdc220fc2b18 100644

> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h

> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h

> @@ -148,14 +148,14 @@ struct  i915_ctx_workarounds {  struct  

> intel_engine_cs {

>  	const char	*name;

>  	enum intel_ring_id {

> -		RCS = 0x0,

> -		VCS,

> +		RCS = 0,

>  		BCS,

> -		VECS,

> -		VCS2

> +		VCS,

> +		VCS2,	/* Keep instances of the same type engine together. */

> +		VECS


Technically, this breaks userspace ABI elsewhere. :| Though the only user of the id mask is only looking for RCS==0 vs the reset.

So I think we would cope, but to be extra safe we could just avoid reshuffling the ids. Let me sleep on the implications. We may say just break that ABI whilst we still can and do a reverse-map to EXEC bit.
Hmm.
-Chris

--
Chris Wilson, Intel Open Source Technology Centre _______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

--------------------------------------------------------------------
Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park,
17 Krylatskaya Str., Bldg 4, Moscow 121614,
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Tvrtko Ursulin April 5, 2016, 9:17 a.m. UTC | #6
Hi,

On 25/03/16 13:18, Kukanova, Svetlana wrote:
> Hi everyone,
>
> Yes, this breaks userspace ABI and in particular it broke VTune work.
> Ring ids are seen via i915 tracepoints, and VTune Amplifier uses them.
> We were relying on the old ring ids, and assuming that the new rings would be added to the end of the enum.
>
> I'm objecting just now because now this driver change reached our internal users and they complained that VTune is reporting DMA packets on wrong engines.
>
> I would request this change (the enum intel_ring_id change) to be rolled back. Hope, it's still possible.

This one will be the one for Daniel. Personally I didn't think 
tracepoints count as ABI, if you consider they can start and stop making 
sense just by the virtue of internal driver refactoring.

But as a quick Google search shows, there is some controversy on the 
topic, even if it is still questionable for device drivers.

Also, could you not detect the i915 version somehow from VTune and adapt?

Regards,

Tvrtko

> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of Chris Wilson
> Sent: Friday, January 15, 2016 1:09 AM
> To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>; Intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915: Decouple execbuf uAPI from internal implementation
>
> On Thu, Jan 14, 2016 at 03:02:59PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> At the moment execbuf ring selection is fully coupled to internal ring
>> ids which is not a good thing on its own.
>>
>> This dependency is also spread between two source files and not
>> spelled out at either side which makes it hidden and fragile.
>>
>> This patch decouples this dependency by introducing an explicit
>> translation table of execbuf uAPI to ring id close to the only call
>> site (i915_gem_do_execbuffer).
>>
>> This way we are free to change driver internal implementation details
>> without breaking userspace. All state relating to the uAPI is now
>> contained in, or next to, i915_gem_do_execbuffer.
>
> I was hesistant at first, since "why?" isn't fully developed, but the code won me over.
>
>> +#define I915_USER_RINGS (4)
>> +
>> +static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
>> +	[I915_EXEC_DEFAULT]	= RCS,
>> +	[I915_EXEC_RENDER]	= RCS,
>> +	[I915_EXEC_BLT]		= BCS,
>> +	[I915_EXEC_BSD]		= VCS,
>> +	[I915_EXEC_VEBOX]	= VECS
>> +};
>
> I was wondering whether packing here mattered at all, but we're under a cacheline so highly unlikely.
>
>>   static int
>>   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   		       struct drm_file *file,
>> @@ -1393,6 +1393,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
>>   	struct i915_execbuffer_params *params = &params_master;
>>   	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
>> +	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>>   	u32 dispatch_flags;
>>   	int ret;
>>   	bool need_relocs;
>> @@ -1414,49 +1415,39 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   	if (args->flags & I915_EXEC_IS_PINNED)
>>   		dispatch_flags |= I915_DISPATCH_PINNED;
>>
>> -	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
>> -		DRM_DEBUG("execbuf with unknown ring: %d\n",
>> -			  (int)(args->flags & I915_EXEC_RING_MASK));
>> +	if (user_ring_id > I915_USER_RINGS) {
>> +		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>>   		return -EINVAL;
>>   	}
>>
>> -	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
>> +	if ((user_ring_id != I915_EXEC_BSD) &&
>>   	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
>>   		DRM_DEBUG("execbuf with non bsd ring but with invalid "
>>   			"bsd dispatch flags: %d\n", (int)(args->flags));
>>   		return -EINVAL;
>> -	}
>> -
>> -	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
>> -		ring = &dev_priv->ring[RCS];
>> -	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
>> -		if (HAS_BSD2(dev)) {
>> -			int ring_id;
>> -
>> -			switch (args->flags & I915_EXEC_BSD_MASK) {
>> -			case I915_EXEC_BSD_DEFAULT:
>> -				ring_id = gen8_dispatch_bsd_ring(dev, file);
>> -				ring = &dev_priv->ring[ring_id];
>> -				break;
>> -			case I915_EXEC_BSD_RING1:
>> -				ring = &dev_priv->ring[VCS];
>> -				break;
>> -			case I915_EXEC_BSD_RING2:
>> -				ring = &dev_priv->ring[VCS2];
>> -				break;
>> -			default:
>> -				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
>> -					  (int)(args->flags & I915_EXEC_BSD_MASK));
>> -				return -EINVAL;
>> -			}
>> -		} else
>> -			ring = &dev_priv->ring[VCS];
>> -	} else
>> -		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
>> +	}
>> +
>> +	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev)) {
>
> HAS_BSD2(dev_priv)
>
>> +		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>> +
>> +		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
>> +			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
>> +		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>> +			   bsd_idx <= I915_EXEC_BSD_RING2) {
>> +			bsd_idx--;
>> +		} else {
>> +			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
>> +				  bsd_idx);
>> +			return -EINVAL;
>> +		}
>> +
>> +		ring = &dev_priv->ring[_VCS(bsd_idx)];
>> +	} else {
>> +		ring = &dev_priv->ring[user_ring_map[user_ring_id]];
>> +	}
>>
>>   	if (!intel_ring_initialized(ring)) {
>> -		DRM_DEBUG("execbuf with invalid ring: %d\n",
>> -			  (int)(args->flags & I915_EXEC_RING_MASK));
>> +		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
>>   		return -EINVAL;
>>   	}
>
> One question, how does this look if we move this section to its own
> function:
>
> ring = eb_select_ring(dev_priv, file, args); if (IS_ERR(ring))
> 	return PTR_ERR(ring);
>
> Do you keep your code size reduction?
>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 7349d9258191..fdc220fc2b18 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -148,14 +148,14 @@ struct  i915_ctx_workarounds {  struct
>> intel_engine_cs {
>>   	const char	*name;
>>   	enum intel_ring_id {
>> -		RCS = 0x0,
>> -		VCS,
>> +		RCS = 0,
>>   		BCS,
>> -		VECS,
>> -		VCS2
>> +		VCS,
>> +		VCS2,	/* Keep instances of the same type engine together. */
>> +		VECS
>
> Technically, this breaks userspace ABI elsewhere. :| Though the only user of the id mask is only looking for RCS==0 vs the reset.
>
> So I think we would cope, but to be extra safe we could just avoid reshuffling the ids. Let me sleep on the implications. We may say just break that ABI whilst we still can and do a reverse-map to EXEC bit.
> Hmm.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eb7bb97f7316..35d5d6099a44 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -334,7 +334,7 @@  struct drm_i915_file_private {
 		unsigned boosts;
 	} rps;
 
-	struct intel_engine_cs *bsd_ring;
+	unsigned int bsd_ring;
 };
 
 enum intel_dpll_id {
@@ -1300,7 +1300,7 @@  struct i915_gem_mm {
 	bool busy;
 
 	/* the indicator for dispatch video commands on two BSD rings */
-	int bsd_ring_dispatch_index;
+	unsigned int bsd_ring_dispatch_index;
 
 	/** Bit 6 swizzling required for X tiling */
 	uint32_t bit_6_swizzle_x;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ddc21d4b388d..26e6842f2df3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5112,6 +5112,8 @@  int i915_gem_open(struct drm_device *dev, struct drm_file *file)
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
 
+	file_priv->bsd_ring = -1;
+
 	ret = i915_gem_context_open(dev, file);
 	if (ret)
 		kfree(file_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d469c4779ff5..cd8646a23780 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1328,33 +1328,23 @@  i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
 
 /**
  * Find one BSD ring to dispatch the corresponding BSD command.
- * The Ring ID is returned.
+ * The ring index is returned.
  */
-static int gen8_dispatch_bsd_ring(struct drm_device *dev,
-				  struct drm_file *file)
+static unsigned int
+gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	/* Check whether the file_priv is using one ring */
-	if (file_priv->bsd_ring)
-		return file_priv->bsd_ring->id;
-	else {
-		/* If no, use the ping-pong mechanism to select one ring */
-		int ring_id;
-
-		mutex_lock(&dev->struct_mutex);
-		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
-			ring_id = VCS;
-			dev_priv->mm.bsd_ring_dispatch_index = 1;
-		} else {
-			ring_id = VCS2;
-			dev_priv->mm.bsd_ring_dispatch_index = 0;
-		}
-		file_priv->bsd_ring = &dev_priv->ring[ring_id];
-		mutex_unlock(&dev->struct_mutex);
-		return ring_id;
+	/* Check whether the file_priv has already selected one ring. */
+	if ((int)file_priv->bsd_ring < 0) {
+		/* If not, use the ping-pong mechanism to select one. */
+		mutex_lock(&dev_priv->dev->struct_mutex);
+		file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index;
+		dev_priv->mm.bsd_ring_dispatch_index ^= 1;
+		mutex_unlock(&dev_priv->dev->struct_mutex);
 	}
+
+	return file_priv->bsd_ring;
 }
 
 static struct drm_i915_gem_object *
@@ -1377,6 +1367,16 @@  eb_get_batch(struct eb_vmas *eb)
 	return vma->obj;
 }
 
+#define I915_USER_RINGS (4)
+
+static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
+	[I915_EXEC_DEFAULT]	= RCS,
+	[I915_EXEC_RENDER]	= RCS,
+	[I915_EXEC_BLT]		= BCS,
+	[I915_EXEC_BSD]		= VCS,
+	[I915_EXEC_VEBOX]	= VECS
+};
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
@@ -1393,6 +1393,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
+	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	u32 dispatch_flags;
 	int ret;
 	bool need_relocs;
@@ -1414,49 +1415,39 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		dispatch_flags |= I915_DISPATCH_PINNED;
 
-	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
-		DRM_DEBUG("execbuf with unknown ring: %d\n",
-			  (int)(args->flags & I915_EXEC_RING_MASK));
+	if (user_ring_id > I915_USER_RINGS) {
+		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return -EINVAL;
 	}
 
-	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
+	if ((user_ring_id != I915_EXEC_BSD) &&
 	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
 			"bsd dispatch flags: %d\n", (int)(args->flags));
 		return -EINVAL;
-	} 
-
-	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
-		ring = &dev_priv->ring[RCS];
-	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
-		if (HAS_BSD2(dev)) {
-			int ring_id;
-
-			switch (args->flags & I915_EXEC_BSD_MASK) {
-			case I915_EXEC_BSD_DEFAULT:
-				ring_id = gen8_dispatch_bsd_ring(dev, file);
-				ring = &dev_priv->ring[ring_id];
-				break;
-			case I915_EXEC_BSD_RING1:
-				ring = &dev_priv->ring[VCS];
-				break;
-			case I915_EXEC_BSD_RING2:
-				ring = &dev_priv->ring[VCS2];
-				break;
-			default:
-				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
-					  (int)(args->flags & I915_EXEC_BSD_MASK));
-				return -EINVAL;
-			}
-		} else
-			ring = &dev_priv->ring[VCS];
-	} else
-		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
+	}
+
+	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev)) {
+		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
+
+		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
+			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
+		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
+			   bsd_idx <= I915_EXEC_BSD_RING2) {
+			bsd_idx--;
+		} else {
+			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
+				  bsd_idx);
+			return -EINVAL;
+		}
+
+		ring = &dev_priv->ring[_VCS(bsd_idx)];
+	} else {
+		ring = &dev_priv->ring[user_ring_map[user_ring_id]];
+	}
 
 	if (!intel_ring_initialized(ring)) {
-		DRM_DEBUG("execbuf with invalid ring: %d\n",
-			  (int)(args->flags & I915_EXEC_RING_MASK));
+		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7349d9258191..fdc220fc2b18 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -148,14 +148,14 @@  struct  i915_ctx_workarounds {
 struct  intel_engine_cs {
 	const char	*name;
 	enum intel_ring_id {
-		RCS = 0x0,
-		VCS,
+		RCS = 0,
 		BCS,
-		VECS,
-		VCS2
+		VCS,
+		VCS2,	/* Keep instances of the same type engine together. */
+		VECS
 	} id;
 #define I915_NUM_RINGS 5
-#define LAST_USER_RING (VECS + 1)
+#define _VCS(n) (VCS + (n))
 	u32		mmio_base;
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;