diff mbox series

[08/27] drm/i915: Add logical engine mapping

Message ID 20210820224446.30620-9-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Parallel submission aka multi-bb execbuf | expand

Commit Message

Matthew Brost Aug. 20, 2021, 10:44 p.m. UTC
Add logical engine mapping. This is required for split-frame, as
workloads need to be placed on engines in a logically contiguous manner.

v2:
 (Daniel Vetter)
  - Add kernel doc for new fields

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
 .../drm/i915/gt/intel_execlists_submission.c  |  1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
 5 files changed, 60 insertions(+), 29 deletions(-)

Comments

Tvrtko Ursulin Sept. 10, 2021, 11:12 a.m. UTC | #1
On 20/08/2021 23:44, Matthew Brost wrote:
> Add logical engine mapping. This is required for split-frame, as
> workloads need to be placed on engines in a logically contiguous manner.
> 
> v2:
>   (Daniel Vetter)
>    - Add kernel doc for new fields
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
>   .../drm/i915/gt/intel_execlists_submission.c  |  1 +
>   drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
>   5 files changed, 60 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 0d9105a31d84..4d790f9a65dd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
>   	GEM_DEBUG_WARN_ON(iir);
>   }
>   
> -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
> +			      u8 logical_instance)
>   {
>   	const struct engine_info *info = &intel_engines[id];
>   	struct drm_i915_private *i915 = gt->i915;
> @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>   
>   	engine->class = info->class;
>   	engine->instance = info->instance;
> +	engine->logical_mask = BIT(logical_instance);
>   	__sprint_engine_name(engine);
>   
>   	engine->props.heartbeat_interval_ms =
> @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
>   	return info->engine_mask;
>   }
>   
> +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
> +				 u8 class, const u8 *map, u8 num_instances)
> +{
> +	int i, j;
> +	u8 current_logical_id = 0;
> +
> +	for (j = 0; j < num_instances; ++j) {
> +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> +			if (!HAS_ENGINE(gt, i) ||
> +			    intel_engines[i].class != class)
> +				continue;
> +
> +			if (intel_engines[i].instance == map[j]) {
> +				logical_ids[intel_engines[i].instance] =
> +					current_logical_id++;
> +				break;
> +			}
> +		}
> +	}
> +}
> +
> +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
> +{
> +	int i;
> +	u8 map[MAX_ENGINE_INSTANCE + 1];
> +
> +	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
> +		map[i] = i;

What's the point of the map array since it is 1:1 with instance?

> +	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
> +}
> +
>   /**
>    * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
>    * @gt: pointer to struct intel_gt
> @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
>   	struct drm_i915_private *i915 = gt->i915;
>   	const unsigned int engine_mask = init_engine_mask(gt);
>   	unsigned int mask = 0;
> -	unsigned int i;
> +	unsigned int i, class;
> +	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
>   	int err;
>   
>   	drm_WARN_ON(&i915->drm, engine_mask == 0);
> @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
>   	if (i915_inject_probe_failure(i915))
>   		return -ENODEV;
>   
> -	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> -		if (!HAS_ENGINE(gt, i))
> -			continue;
> +	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
> +		setup_logical_ids(gt, logical_ids, class);
>   
> -		err = intel_engine_setup(gt, i);
> -		if (err)
> -			goto cleanup;
> +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> +			u8 instance = intel_engines[i].instance;
> +
> +			if (intel_engines[i].class != class ||
> +			    !HAS_ENGINE(gt, i))
> +				continue;
>   
> -		mask |= BIT(i);
> +			err = intel_engine_setup(gt, i,
> +						 logical_ids[instance]);
> +			if (err)
> +				goto cleanup;
> +
> +			mask |= BIT(i);

I still this there is a less clunky way to set this up in less code and 
more readable at the same time. Like do it in two passes so you can 
iterate gt->engine_class[] array instead of having to implement a skip 
condition (both on class and HAS_ENGINE at two places) and also avoid 
walking the flat intel_engines array recursively.

> +		}
>   	}
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index ed91bcff20eb..fddf35546b58 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -266,6 +266,11 @@ struct intel_engine_cs {
>   	unsigned int guc_id;
>   
>   	intel_engine_mask_t mask;
> +	/**
> +	 * @logical_mask: logical mask of engine, reported to user space via
> +	 * query IOCTL and used to communicate with the GuC in logical space
> +	 */
> +	intel_engine_mask_t logical_mask;

You could prefix the new field with uabi_ to match the existing scheme 
and to signify to anyone who might be touching it in the future it 
should not be changed.

Also, I think comment should explain what is logical space ie. how the 
numbering works.

Not sure the part about GuC needs to be in the comment since uapi is 
supposed to be backend agnostic.

Regards,

Tvrtko

>   
>   	u8 class;
>   	u8 instance;
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index cafb0608ffb4..813a6de01382 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>   
>   		ve->siblings[ve->num_siblings++] = sibling;
>   		ve->base.mask |= sibling->mask;
> +		ve->base.logical_mask |= sibling->logical_mask;
>   
>   		/*
>   		 * All physical engines must be compatible for their emission
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 6926919bcac6..9f5f43a16182 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
>   	for_each_engine(engine, gt, id) {
>   		u8 guc_class = engine_class_to_guc_class(engine->class);
>   
> -		system_info->mapping_table[guc_class][engine->instance] =
> +		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
>   			engine->instance;
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index e0eed70f9b92..ffafbac7335e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
>   	return __guc_action_deregister_context(guc, guc_id, loop);
>   }
>   
> -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> -{
> -	switch (class) {
> -	case RENDER_CLASS:
> -		return mask >> RCS0;
> -	case VIDEO_ENHANCEMENT_CLASS:
> -		return mask >> VECS0;
> -	case VIDEO_DECODE_CLASS:
> -		return mask >> VCS0;
> -	case COPY_ENGINE_CLASS:
> -		return mask >> BCS0;
> -	default:
> -		MISSING_CASE(class);
> -		return 0;
> -	}
> -}
> -
>   static void guc_context_policy_init(struct intel_engine_cs *engine,
>   				    struct guc_lrc_desc *desc)
>   {
> @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>   
>   	desc = __get_lrc_desc(guc, desc_idx);
>   	desc->engine_class = engine_class_to_guc_class(engine->class);
> -	desc->engine_submit_mask = adjust_engine_mask(engine->class,
> -						      engine->mask);
> +	desc->engine_submit_mask = engine->logical_mask;
>   	desc->hw_context_desc = ce->lrc.lrca;
>   	desc->priority = ce->guc_state.prio;
>   	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>   		}
>   
>   		ve->base.mask |= sibling->mask;
> +		ve->base.logical_mask |= sibling->logical_mask;
>   
>   		if (n != 0 && ve->base.class != sibling->class) {
>   			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
>
Matthew Brost Sept. 10, 2021, 7:49 p.m. UTC | #2
On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
> 
> On 20/08/2021 23:44, Matthew Brost wrote:
> > Add logical engine mapping. This is required for split-frame, as
> > workloads need to be placed on engines in a logically contiguous manner.
> > 
> > v2:
> >   (Daniel Vetter)
> >    - Add kernel doc for new fields
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
> >   .../drm/i915/gt/intel_execlists_submission.c  |  1 +
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
> >   5 files changed, 60 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 0d9105a31d84..4d790f9a65dd 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
> >   	GEM_DEBUG_WARN_ON(iir);
> >   }
> > -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
> > +			      u8 logical_instance)
> >   {
> >   	const struct engine_info *info = &intel_engines[id];
> >   	struct drm_i915_private *i915 = gt->i915;
> > @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> >   	engine->class = info->class;
> >   	engine->instance = info->instance;
> > +	engine->logical_mask = BIT(logical_instance);
> >   	__sprint_engine_name(engine);
> >   	engine->props.heartbeat_interval_ms =
> > @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
> >   	return info->engine_mask;
> >   }
> > +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
> > +				 u8 class, const u8 *map, u8 num_instances)
> > +{
> > +	int i, j;
> > +	u8 current_logical_id = 0;
> > +
> > +	for (j = 0; j < num_instances; ++j) {
> > +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > +			if (!HAS_ENGINE(gt, i) ||
> > +			    intel_engines[i].class != class)
> > +				continue;
> > +
> > +			if (intel_engines[i].instance == map[j]) {
> > +				logical_ids[intel_engines[i].instance] =
> > +					current_logical_id++;
> > +				break;
> > +			}
> > +		}
> > +	}
> > +}
> > +
> > +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
> > +{
> > +	int i;
> > +	u8 map[MAX_ENGINE_INSTANCE + 1];
> > +
> > +	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
> > +		map[i] = i;
> 
> What's the point of the map array since it is 1:1 with instance?
> 

Future products do not have a 1 to 1 mapping and that mapping can change
based on fusing, e.g. XeHP SDV.

Also technically ICL / TGL / ADL physical instance 2 maps to logical
instance 1.

> > +	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
> > +}
> > +
> >   /**
> >    * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
> >    * @gt: pointer to struct intel_gt
> > @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> >   	struct drm_i915_private *i915 = gt->i915;
> >   	const unsigned int engine_mask = init_engine_mask(gt);
> >   	unsigned int mask = 0;
> > -	unsigned int i;
> > +	unsigned int i, class;
> > +	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
> >   	int err;
> >   	drm_WARN_ON(&i915->drm, engine_mask == 0);
> > @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> >   	if (i915_inject_probe_failure(i915))
> >   		return -ENODEV;
> > -	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> > -		if (!HAS_ENGINE(gt, i))
> > -			continue;
> > +	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
> > +		setup_logical_ids(gt, logical_ids, class);
> > -		err = intel_engine_setup(gt, i);
> > -		if (err)
> > -			goto cleanup;
> > +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > +			u8 instance = intel_engines[i].instance;
> > +
> > +			if (intel_engines[i].class != class ||
> > +			    !HAS_ENGINE(gt, i))
> > +				continue;
> > -		mask |= BIT(i);
> > +			err = intel_engine_setup(gt, i,
> > +						 logical_ids[instance]);
> > +			if (err)
> > +				goto cleanup;
> > +
> > +			mask |= BIT(i);
> 
> I still this there is a less clunky way to set this up in less code and more
> readable at the same time. Like do it in two passes so you can iterate
> gt->engine_class[] array instead of having to implement a skip condition
> (both on class and HAS_ENGINE at two places) and also avoid walking the flat
> intel_engines array recursively.
>

Kinda a bikeshed arguing about a pretty simple loop structure, don't you
think? I personally like the way it laid out.

Pseudo code for your suggestion?
 
> > +		}
> >   	}
> >   	/*
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index ed91bcff20eb..fddf35546b58 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -266,6 +266,11 @@ struct intel_engine_cs {
> >   	unsigned int guc_id;
> >   	intel_engine_mask_t mask;
> > +	/**
> > +	 * @logical_mask: logical mask of engine, reported to user space via
> > +	 * query IOCTL and used to communicate with the GuC in logical space
> > +	 */
> > +	intel_engine_mask_t logical_mask;
> 
> You could prefix the new field with uabi_ to match the existing scheme and
> to signify to anyone who might be touching it in the future it should not be
> changed.

This is kinda uabi, but also kinda isn't. We do report a logical
instance via IOCTL but it also really is tied the GuC backend as we only
can communicate with the GuC in logical space. IMO we should leave as
is.

> 
> Also, I think comment should explain what is logical space ie. how the
> numbering works.
>

Don't I already do that? I suppose I could add something like:

The logical mask within engine class must be contiguous across all
instances.

> Not sure the part about GuC needs to be in the comment since uapi is
> supposed to be backend agnostic.
> 

Communicating with the GuC in logical space is a pretty key point here.
The communication with the GuC in logical space is backend specific but
how our hardware works (e.g. split frame workloads must be placed
logical contiguous) is not. Mentioning the GuC requirement here makes
sense to me for completeness.

Matt

> Regards,
> 
> Tvrtko
> 
> >   	u8 class;
> >   	u8 instance;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index cafb0608ffb4..813a6de01382 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> >   		ve->siblings[ve->num_siblings++] = sibling;
> >   		ve->base.mask |= sibling->mask;
> > +		ve->base.logical_mask |= sibling->logical_mask;
> >   		/*
> >   		 * All physical engines must be compatible for their emission
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > index 6926919bcac6..9f5f43a16182 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
> >   	for_each_engine(engine, gt, id) {
> >   		u8 guc_class = engine_class_to_guc_class(engine->class);
> > -		system_info->mapping_table[guc_class][engine->instance] =
> > +		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
> >   			engine->instance;
> >   	}
> >   }
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index e0eed70f9b92..ffafbac7335e 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
> >   	return __guc_action_deregister_context(guc, guc_id, loop);
> >   }
> > -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> > -{
> > -	switch (class) {
> > -	case RENDER_CLASS:
> > -		return mask >> RCS0;
> > -	case VIDEO_ENHANCEMENT_CLASS:
> > -		return mask >> VECS0;
> > -	case VIDEO_DECODE_CLASS:
> > -		return mask >> VCS0;
> > -	case COPY_ENGINE_CLASS:
> > -		return mask >> BCS0;
> > -	default:
> > -		MISSING_CASE(class);
> > -		return 0;
> > -	}
> > -}
> > -
> >   static void guc_context_policy_init(struct intel_engine_cs *engine,
> >   				    struct guc_lrc_desc *desc)
> >   {
> > @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
> >   	desc = __get_lrc_desc(guc, desc_idx);
> >   	desc->engine_class = engine_class_to_guc_class(engine->class);
> > -	desc->engine_submit_mask = adjust_engine_mask(engine->class,
> > -						      engine->mask);
> > +	desc->engine_submit_mask = engine->logical_mask;
> >   	desc->hw_context_desc = ce->lrc.lrca;
> >   	desc->priority = ce->guc_state.prio;
> >   	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> > @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> >   		}
> >   		ve->base.mask |= sibling->mask;
> > +		ve->base.logical_mask |= sibling->logical_mask;
> >   		if (n != 0 && ve->base.class != sibling->class) {
> >   			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> >
Tvrtko Ursulin Sept. 13, 2021, 9:24 a.m. UTC | #3
On 10/09/2021 20:49, Matthew Brost wrote:
> On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
>>
>> On 20/08/2021 23:44, Matthew Brost wrote:
>>> Add logical engine mapping. This is required for split-frame, as
>>> workloads need to be placed on engines in a logically contiguous manner.
>>>
>>> v2:
>>>    (Daniel Vetter)
>>>     - Add kernel doc for new fields
>>>
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
>>>    drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
>>>    .../drm/i915/gt/intel_execlists_submission.c  |  1 +
>>>    drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
>>>    .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
>>>    5 files changed, 60 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> index 0d9105a31d84..4d790f9a65dd 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
>>>    	GEM_DEBUG_WARN_ON(iir);
>>>    }
>>> -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>>> +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
>>> +			      u8 logical_instance)
>>>    {
>>>    	const struct engine_info *info = &intel_engines[id];
>>>    	struct drm_i915_private *i915 = gt->i915;
>>> @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>>>    	engine->class = info->class;
>>>    	engine->instance = info->instance;
>>> +	engine->logical_mask = BIT(logical_instance);
>>>    	__sprint_engine_name(engine);
>>>    	engine->props.heartbeat_interval_ms =
>>> @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
>>>    	return info->engine_mask;
>>>    }
>>> +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
>>> +				 u8 class, const u8 *map, u8 num_instances)
>>> +{
>>> +	int i, j;
>>> +	u8 current_logical_id = 0;
>>> +
>>> +	for (j = 0; j < num_instances; ++j) {
>>> +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
>>> +			if (!HAS_ENGINE(gt, i) ||
>>> +			    intel_engines[i].class != class)
>>> +				continue;
>>> +
>>> +			if (intel_engines[i].instance == map[j]) {
>>> +				logical_ids[intel_engines[i].instance] =
>>> +					current_logical_id++;
>>> +				break;
>>> +			}
>>> +		}
>>> +	}
>>> +}
>>> +
>>> +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
>>> +{
>>> +	int i;
>>> +	u8 map[MAX_ENGINE_INSTANCE + 1];
>>> +
>>> +	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
>>> +		map[i] = i;
>>
>> What's the point of the map array since it is 1:1 with instance?
>>
> 
> Future products do not have a 1 to 1 mapping and that mapping can change
> based on fusing, e.g. XeHP SDV.
> 
> Also technically ICL / TGL / ADL physical instance 2 maps to logical
> instance 1.

I don't follow the argument. All I can see is that "map[i] = i" always 
in the proposed code, which is then used to check "instance == 
map[instance]". So I'd suggest to remove this array from the code until 
there is a need for it.

>>> +	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
>>> +}
>>> +
>>>    /**
>>>     * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
>>>     * @gt: pointer to struct intel_gt
>>> @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
>>>    	struct drm_i915_private *i915 = gt->i915;
>>>    	const unsigned int engine_mask = init_engine_mask(gt);
>>>    	unsigned int mask = 0;
>>> -	unsigned int i;
>>> +	unsigned int i, class;
>>> +	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
>>>    	int err;
>>>    	drm_WARN_ON(&i915->drm, engine_mask == 0);
>>> @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
>>>    	if (i915_inject_probe_failure(i915))
>>>    		return -ENODEV;
>>> -	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
>>> -		if (!HAS_ENGINE(gt, i))
>>> -			continue;
>>> +	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
>>> +		setup_logical_ids(gt, logical_ids, class);
>>> -		err = intel_engine_setup(gt, i);
>>> -		if (err)
>>> -			goto cleanup;
>>> +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
>>> +			u8 instance = intel_engines[i].instance;
>>> +
>>> +			if (intel_engines[i].class != class ||
>>> +			    !HAS_ENGINE(gt, i))
>>> +				continue;
>>> -		mask |= BIT(i);
>>> +			err = intel_engine_setup(gt, i,
>>> +						 logical_ids[instance]);
>>> +			if (err)
>>> +				goto cleanup;
>>> +
>>> +			mask |= BIT(i);
>>
>> I still this there is a less clunky way to set this up in less code and more
>> readable at the same time. Like do it in two passes so you can iterate
>> gt->engine_class[] array instead of having to implement a skip condition
>> (both on class and HAS_ENGINE at two places) and also avoid walking the flat
>> intel_engines array recursively.
>>
> 
> Kinda a bikeshed arguing about a pretty simple loop structure, don't you
> think? I personally like the way it laid out.
> 
> Pseudo code for your suggestion?

Leave the existing setup loop as is and add an additional "for engine 
class" walk after it. That way you can walk already setup 
gt->engine_class[] array so wouldn't need to skip wrong classes and have 
HAS_ENGINE checks when walking the flat intel_engines[] array several 
times. It also applies to the helper which counts logical instances per 
class.

>   
>>> +		}
>>>    	}
>>>    	/*
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>> index ed91bcff20eb..fddf35546b58 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>> @@ -266,6 +266,11 @@ struct intel_engine_cs {
>>>    	unsigned int guc_id;
>>>    	intel_engine_mask_t mask;
>>> +	/**
>>> +	 * @logical_mask: logical mask of engine, reported to user space via
>>> +	 * query IOCTL and used to communicate with the GuC in logical space
>>> +	 */
>>> +	intel_engine_mask_t logical_mask;
>>
>> You could prefix the new field with uabi_ to match the existing scheme and
>> to signify to anyone who might be touching it in the future it should not be
>> changed.
> 
> This is kinda uabi, but also kinda isn't. We do report a logical
> instance via IOCTL but it also really is tied the GuC backend as we only
> can communicate with the GuC in logical space. IMO we should leave as
> is.

Perhaps it would be best to call the new field uabi_logical_instance so 
it's clear it is reported in the query directly and do the BIT() 
transformation in the GuC backend?

> 
>>
>> Also, I think comment should explain what is logical space ie. how the
>> numbering works.
>>
> 
> Don't I already do that? I suppose I could add something like:

Where is it? Can't see it in the uapi kerneldoc AFAICS (for the new 
query) or here.

> 
> The logical mask within engine class must be contiguous across all
> instances.

Best not to start mentioning the mask for the first time. Just explain 
what logical numbering is in terms of how engines are enumerated in 
order of physical instances but skipping the fused off ones. In the 
kerneldoc for the new query is I think the right place.

>> Not sure the part about GuC needs to be in the comment since uapi is
>> supposed to be backend agnostic.
>>
> 
> Communicating with the GuC in logical space is a pretty key point here.
> The communication with the GuC in logical space is backend specific but
> how our hardware works (e.g. split frame workloads must be placed
> logical contiguous) is not. Mentioning the GuC requirement here makes
> sense to me for completeness.

Yeah might be, I was thinking more about the new query. Query definitely 
is backend agnostic but yes it is fine to say in the comment here the 
new field is used both for the query and for communicating with GuC.

Regards,

Tvrtko

> 
> Matt
> 
>> Regards,
>>
>> Tvrtko
>>
>>>    	u8 class;
>>>    	u8 instance;
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>> index cafb0608ffb4..813a6de01382 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>> @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>>>    		ve->siblings[ve->num_siblings++] = sibling;
>>>    		ve->base.mask |= sibling->mask;
>>> +		ve->base.logical_mask |= sibling->logical_mask;
>>>    		/*
>>>    		 * All physical engines must be compatible for their emission
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
>>> index 6926919bcac6..9f5f43a16182 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
>>> @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
>>>    	for_each_engine(engine, gt, id) {
>>>    		u8 guc_class = engine_class_to_guc_class(engine->class);
>>> -		system_info->mapping_table[guc_class][engine->instance] =
>>> +		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
>>>    			engine->instance;
>>>    	}
>>>    }
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> index e0eed70f9b92..ffafbac7335e 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
>>>    	return __guc_action_deregister_context(guc, guc_id, loop);
>>>    }
>>> -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
>>> -{
>>> -	switch (class) {
>>> -	case RENDER_CLASS:
>>> -		return mask >> RCS0;
>>> -	case VIDEO_ENHANCEMENT_CLASS:
>>> -		return mask >> VECS0;
>>> -	case VIDEO_DECODE_CLASS:
>>> -		return mask >> VCS0;
>>> -	case COPY_ENGINE_CLASS:
>>> -		return mask >> BCS0;
>>> -	default:
>>> -		MISSING_CASE(class);
>>> -		return 0;
>>> -	}
>>> -}
>>> -
>>>    static void guc_context_policy_init(struct intel_engine_cs *engine,
>>>    				    struct guc_lrc_desc *desc)
>>>    {
>>> @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>>>    	desc = __get_lrc_desc(guc, desc_idx);
>>>    	desc->engine_class = engine_class_to_guc_class(engine->class);
>>> -	desc->engine_submit_mask = adjust_engine_mask(engine->class,
>>> -						      engine->mask);
>>> +	desc->engine_submit_mask = engine->logical_mask;
>>>    	desc->hw_context_desc = ce->lrc.lrca;
>>>    	desc->priority = ce->guc_state.prio;
>>>    	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
>>> @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>>>    		}
>>>    		ve->base.mask |= sibling->mask;
>>> +		ve->base.logical_mask |= sibling->logical_mask;
>>>    		if (n != 0 && ve->base.class != sibling->class) {
>>>    			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
>>>
Matthew Brost Sept. 13, 2021, 4:50 p.m. UTC | #4
On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote:
> 
> On 10/09/2021 20:49, Matthew Brost wrote:
> > On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
> > > 
> > > On 20/08/2021 23:44, Matthew Brost wrote:
> > > > Add logical engine mapping. This is required for split-frame, as
> > > > workloads need to be placed on engines in a logically contiguous manner.
> > > > 
> > > > v2:
> > > >    (Daniel Vetter)
> > > >     - Add kernel doc for new fields
> > > > 
> > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > > ---
> > > >    drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
> > > >    drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
> > > >    .../drm/i915/gt/intel_execlists_submission.c  |  1 +
> > > >    drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
> > > >    .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
> > > >    5 files changed, 60 insertions(+), 29 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > index 0d9105a31d84..4d790f9a65dd 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
> > > >    	GEM_DEBUG_WARN_ON(iir);
> > > >    }
> > > > -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > > > +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
> > > > +			      u8 logical_instance)
> > > >    {
> > > >    	const struct engine_info *info = &intel_engines[id];
> > > >    	struct drm_i915_private *i915 = gt->i915;
> > > > @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > > >    	engine->class = info->class;
> > > >    	engine->instance = info->instance;
> > > > +	engine->logical_mask = BIT(logical_instance);
> > > >    	__sprint_engine_name(engine);
> > > >    	engine->props.heartbeat_interval_ms =
> > > > @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
> > > >    	return info->engine_mask;
> > > >    }
> > > > +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
> > > > +				 u8 class, const u8 *map, u8 num_instances)
> > > > +{
> > > > +	int i, j;
> > > > +	u8 current_logical_id = 0;
> > > > +
> > > > +	for (j = 0; j < num_instances; ++j) {
> > > > +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > +			if (!HAS_ENGINE(gt, i) ||
> > > > +			    intel_engines[i].class != class)
> > > > +				continue;
> > > > +
> > > > +			if (intel_engines[i].instance == map[j]) {
> > > > +				logical_ids[intel_engines[i].instance] =
> > > > +					current_logical_id++;
> > > > +				break;
> > > > +			}
> > > > +		}
> > > > +	}
> > > > +}
> > > > +
> > > > +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
> > > > +{
> > > > +	int i;
> > > > +	u8 map[MAX_ENGINE_INSTANCE + 1];
> > > > +
> > > > +	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
> > > > +		map[i] = i;
> > > 
> > > What's the point of the map array since it is 1:1 with instance?
> > > 
> > 
> > Future products do not have a 1 to 1 mapping and that mapping can change
> > based on fusing, e.g. XeHP SDV.
> > 
> > Also technically ICL / TGL / ADL physical instance 2 maps to logical
> > instance 1.
> 
> I don't follow the argument. All I can see is that "map[i] = i" always in
> the proposed code, which is then used to check "instance == map[instance]".
> So I'd suggest to remove this array from the code until there is a need for
> it.
> 

Ok, this logic is slightly confusing and makes more sense once we have
non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default
with the value in map[i] being a physical instance. Populate_logical_ids
searches the map finding all physical instances present in the map
assigning each found instance a new logical id increasing by 1 each
time.

e.g. If the map is setup 0-N and only physical instance 0 / 2 are
present they will get logical mapping 0 / 1 respectfully.

This algorithm works for non-standard mappings too /w fused parts. e.g.
on XeHP SDV the map is: { 0, 2, 4, 6, 1, 3, 5, 7 } and if any of the
physical instances can't be found due to fusing the logical mapping is
still correct per the bspec.

This array is absolutely needed for multi-lrc submission to work, even
on ICL / TGL / ADL as the GuC only supports logically contiguous engine
instances.

> > > > +	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
> > > > +}
> > > > +
> > > >    /**
> > > >     * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
> > > >     * @gt: pointer to struct intel_gt
> > > > @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> > > >    	struct drm_i915_private *i915 = gt->i915;
> > > >    	const unsigned int engine_mask = init_engine_mask(gt);
> > > >    	unsigned int mask = 0;
> > > > -	unsigned int i;
> > > > +	unsigned int i, class;
> > > > +	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
> > > >    	int err;
> > > >    	drm_WARN_ON(&i915->drm, engine_mask == 0);
> > > > @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> > > >    	if (i915_inject_probe_failure(i915))
> > > >    		return -ENODEV;
> > > > -	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> > > > -		if (!HAS_ENGINE(gt, i))
> > > > -			continue;
> > > > +	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
> > > > +		setup_logical_ids(gt, logical_ids, class);
> > > > -		err = intel_engine_setup(gt, i);
> > > > -		if (err)
> > > > -			goto cleanup;
> > > > +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > +			u8 instance = intel_engines[i].instance;
> > > > +
> > > > +			if (intel_engines[i].class != class ||
> > > > +			    !HAS_ENGINE(gt, i))
> > > > +				continue;
> > > > -		mask |= BIT(i);
> > > > +			err = intel_engine_setup(gt, i,
> > > > +						 logical_ids[instance]);
> > > > +			if (err)
> > > > +				goto cleanup;
> > > > +
> > > > +			mask |= BIT(i);
> > > 
> > > I still this there is a less clunky way to set this up in less code and more
> > > readable at the same time. Like do it in two passes so you can iterate
> > > gt->engine_class[] array instead of having to implement a skip condition
> > > (both on class and HAS_ENGINE at two places) and also avoid walking the flat
> > > intel_engines array recursively.
> > > 
> > 
> > Kinda a bikeshed arguing about a pretty simple loop structure, don't you
> > think? I personally like the way it laid out.
> > 
> > Pseudo code for your suggestion?
> 
> Leave the existing setup loop as is and add an additional "for engine class"
> walk after it. That way you can walk already setup gt->engine_class[] array
> so wouldn't need to skip wrong classes and have HAS_ENGINE checks when
> walking the flat intel_engines[] array several times. It also applies to the
> helper which counts logical instances per class.
>

Ok, I think I see what you are getting at. Again IMO this is a total
bikeshed as this is 1 time setup step that we really should only care if
the loop works or not rather than it being optimized / looks a way a
certain person wants. I can change this if you really insist but again
IMO disucssing this is a total waste of energy.
 
> > > > +		}
> > > >    	}
> > > >    	/*
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > index ed91bcff20eb..fddf35546b58 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > @@ -266,6 +266,11 @@ struct intel_engine_cs {
> > > >    	unsigned int guc_id;
> > > >    	intel_engine_mask_t mask;
> > > > +	/**
> > > > +	 * @logical_mask: logical mask of engine, reported to user space via
> > > > +	 * query IOCTL and used to communicate with the GuC in logical space
> > > > +	 */
> > > > +	intel_engine_mask_t logical_mask;
> > > 
> > > You could prefix the new field with uabi_ to match the existing scheme and
> > > to signify to anyone who might be touching it in the future it should not be
> > > changed.
> > 
> > This is kinda uabi, but also kinda isn't. We do report a logical
> > instance via IOCTL but it also really is tied the GuC backend as we only
> > can communicate with the GuC in logical space. IMO we should leave as
> > is.
> 
> Perhaps it would be best to call the new field uabi_logical_instance so it's
> clear it is reported in the query directly and do the BIT() transformation
> in the GuC backend?
>

Virtual engines can have a multiple bits set in this mask, so this is
used for both the query on physical engines via UABI and submission in
the GuC backend.

> > 
> > > 
> > > Also, I think comment should explain what is logical space ie. how the
> > > numbering works.
> > > 
> > 
> > Don't I already do that? I suppose I could add something like:
> 
> Where is it? Can't see it in the uapi kerneldoc AFAICS (for the new query)
> or here.
>

	/**
	 * @logical_mask: logical mask of engine, reported to user space via
	 * query IOCTL and used to communicate with the GuC in logical space
	 */
 
> > 
> > The logical mask within engine class must be contiguous across all
> > instances.
> 
> Best not to start mentioning the mask for the first time. Just explain what
> logical numbering is in terms of how engines are enumerated in order of
> physical instances but skipping the fused off ones. In the kerneldoc for the
> new query is I think the right place.
>

Maybe I can add:

The logical mapping is defined on per part basis in the bspec and can
very based the parts fusing.
 
> > > Not sure the part about GuC needs to be in the comment since uapi is
> > > supposed to be backend agnostic.
> > > 
> > 
> > Communicating with the GuC in logical space is a pretty key point here.
> > The communication with the GuC in logical space is backend specific but
> > how our hardware works (e.g. split frame workloads must be placed
> > logical contiguous) is not. Mentioning the GuC requirement here makes
> > sense to me for completeness.
> 
> Yeah might be, I was thinking more about the new query. Query definitely is
> backend agnostic but yes it is fine to say in the comment here the new field
> is used both for the query and for communicating with GuC.
>

Sounds good, will make it clear it used for the query and from
communicating with the GuC.

Matt
 
> Regards,
> 
> Tvrtko
> 
> > 
> > Matt
> > 
> > > Regards,
> > > 
> > > Tvrtko
> > > 
> > > >    	u8 class;
> > > >    	u8 instance;
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > index cafb0608ffb4..813a6de01382 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> > > >    		ve->siblings[ve->num_siblings++] = sibling;
> > > >    		ve->base.mask |= sibling->mask;
> > > > +		ve->base.logical_mask |= sibling->logical_mask;
> > > >    		/*
> > > >    		 * All physical engines must be compatible for their emission
> > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > index 6926919bcac6..9f5f43a16182 100644
> > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
> > > >    	for_each_engine(engine, gt, id) {
> > > >    		u8 guc_class = engine_class_to_guc_class(engine->class);
> > > > -		system_info->mapping_table[guc_class][engine->instance] =
> > > > +		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
> > > >    			engine->instance;
> > > >    	}
> > > >    }
> > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > index e0eed70f9b92..ffafbac7335e 100644
> > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
> > > >    	return __guc_action_deregister_context(guc, guc_id, loop);
> > > >    }
> > > > -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> > > > -{
> > > > -	switch (class) {
> > > > -	case RENDER_CLASS:
> > > > -		return mask >> RCS0;
> > > > -	case VIDEO_ENHANCEMENT_CLASS:
> > > > -		return mask >> VECS0;
> > > > -	case VIDEO_DECODE_CLASS:
> > > > -		return mask >> VCS0;
> > > > -	case COPY_ENGINE_CLASS:
> > > > -		return mask >> BCS0;
> > > > -	default:
> > > > -		MISSING_CASE(class);
> > > > -		return 0;
> > > > -	}
> > > > -}
> > > > -
> > > >    static void guc_context_policy_init(struct intel_engine_cs *engine,
> > > >    				    struct guc_lrc_desc *desc)
> > > >    {
> > > > @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
> > > >    	desc = __get_lrc_desc(guc, desc_idx);
> > > >    	desc->engine_class = engine_class_to_guc_class(engine->class);
> > > > -	desc->engine_submit_mask = adjust_engine_mask(engine->class,
> > > > -						      engine->mask);
> > > > +	desc->engine_submit_mask = engine->logical_mask;
> > > >    	desc->hw_context_desc = ce->lrc.lrca;
> > > >    	desc->priority = ce->guc_state.prio;
> > > >    	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> > > > @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> > > >    		}
> > > >    		ve->base.mask |= sibling->mask;
> > > > +		ve->base.logical_mask |= sibling->logical_mask;
> > > >    		if (n != 0 && ve->base.class != sibling->class) {
> > > >    			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> > > >
Tvrtko Ursulin Sept. 14, 2021, 8:34 a.m. UTC | #5
On 13/09/2021 17:50, Matthew Brost wrote:
> On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote:
>>
>> On 10/09/2021 20:49, Matthew Brost wrote:
>>> On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
>>>>
>>>> On 20/08/2021 23:44, Matthew Brost wrote:
>>>>> Add logical engine mapping. This is required for split-frame, as
>>>>> workloads need to be placed on engines in a logically contiguous manner.
>>>>>
>>>>> v2:
>>>>>     (Daniel Vetter)
>>>>>      - Add kernel doc for new fields
>>>>>
>>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
>>>>>     drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
>>>>>     .../drm/i915/gt/intel_execlists_submission.c  |  1 +
>>>>>     drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
>>>>>     .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
>>>>>     5 files changed, 60 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>>>> index 0d9105a31d84..4d790f9a65dd 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>>>> @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
>>>>>     	GEM_DEBUG_WARN_ON(iir);
>>>>>     }
>>>>> -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>>>>> +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
>>>>> +			      u8 logical_instance)
>>>>>     {
>>>>>     	const struct engine_info *info = &intel_engines[id];
>>>>>     	struct drm_i915_private *i915 = gt->i915;
>>>>> @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>>>>>     	engine->class = info->class;
>>>>>     	engine->instance = info->instance;
>>>>> +	engine->logical_mask = BIT(logical_instance);
>>>>>     	__sprint_engine_name(engine);
>>>>>     	engine->props.heartbeat_interval_ms =
>>>>> @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
>>>>>     	return info->engine_mask;
>>>>>     }
>>>>> +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
>>>>> +				 u8 class, const u8 *map, u8 num_instances)
>>>>> +{
>>>>> +	int i, j;
>>>>> +	u8 current_logical_id = 0;
>>>>> +
>>>>> +	for (j = 0; j < num_instances; ++j) {
>>>>> +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
>>>>> +			if (!HAS_ENGINE(gt, i) ||
>>>>> +			    intel_engines[i].class != class)
>>>>> +				continue;
>>>>> +
>>>>> +			if (intel_engines[i].instance == map[j]) {
>>>>> +				logical_ids[intel_engines[i].instance] =
>>>>> +					current_logical_id++;
>>>>> +				break;
>>>>> +			}
>>>>> +		}
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
>>>>> +{
>>>>> +	int i;
>>>>> +	u8 map[MAX_ENGINE_INSTANCE + 1];
>>>>> +
>>>>> +	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
>>>>> +		map[i] = i;
>>>>
>>>> What's the point of the map array since it is 1:1 with instance?
>>>>
>>>
>>> Future products do not have a 1 to 1 mapping and that mapping can change
>>> based on fusing, e.g. XeHP SDV.
>>>
>>> Also technically ICL / TGL / ADL physical instance 2 maps to logical
>>> instance 1.
>>
>> I don't follow the argument. All I can see is that "map[i] = i" always in
>> the proposed code, which is then used to check "instance == map[instance]".
>> So I'd suggest to remove this array from the code until there is a need for
>> it.
>>
> 
> Ok, this logic is slightly confusing and makes more sense once we have
> non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default
> with the value in map[i] being a physical instance. Populate_logical_ids
> searches the map finding all physical instances present in the map
> assigning each found instance a new logical id increasing by 1 each
> time.
> 
> e.g. If the map is setup 0-N and only physical instance 0 / 2 are
> present they will get logical mapping 0 / 1 respectfully.
> 
> This algorithm works for non-standard mappings too /w fused parts. e.g.
> on XeHP SDV the map is: { 0, 2, 4, 6, 1, 3, 5, 7 } and if any of the
> physical instances can't be found due to fusing the logical mapping is
> still correct per the bspec.
> 
> This array is absolutely needed for multi-lrc submission to work, even
> on ICL / TGL / ADL as the GuC only supports logically contiguous engine
> instances.

No idea how can an array fixed at "map[i] = i" be absolutely needed when 
you can just write it like "i". Sometimes it is okay to lay some ground 
work for future platforms but in this case to me it's just obfuscation 
which should be added later, when it is required.

>>>>> +	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
>>>>> +}
>>>>> +
>>>>>     /**
>>>>>      * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
>>>>>      * @gt: pointer to struct intel_gt
>>>>> @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
>>>>>     	struct drm_i915_private *i915 = gt->i915;
>>>>>     	const unsigned int engine_mask = init_engine_mask(gt);
>>>>>     	unsigned int mask = 0;
>>>>> -	unsigned int i;
>>>>> +	unsigned int i, class;
>>>>> +	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
>>>>>     	int err;
>>>>>     	drm_WARN_ON(&i915->drm, engine_mask == 0);
>>>>> @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
>>>>>     	if (i915_inject_probe_failure(i915))
>>>>>     		return -ENODEV;
>>>>> -	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
>>>>> -		if (!HAS_ENGINE(gt, i))
>>>>> -			continue;
>>>>> +	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
>>>>> +		setup_logical_ids(gt, logical_ids, class);
>>>>> -		err = intel_engine_setup(gt, i);
>>>>> -		if (err)
>>>>> -			goto cleanup;
>>>>> +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
>>>>> +			u8 instance = intel_engines[i].instance;
>>>>> +
>>>>> +			if (intel_engines[i].class != class ||
>>>>> +			    !HAS_ENGINE(gt, i))
>>>>> +				continue;
>>>>> -		mask |= BIT(i);
>>>>> +			err = intel_engine_setup(gt, i,
>>>>> +						 logical_ids[instance]);
>>>>> +			if (err)
>>>>> +				goto cleanup;
>>>>> +
>>>>> +			mask |= BIT(i);
>>>>
>>>> I still this there is a less clunky way to set this up in less code and more
>>>> readable at the same time. Like do it in two passes so you can iterate
>>>> gt->engine_class[] array instead of having to implement a skip condition
>>>> (both on class and HAS_ENGINE at two places) and also avoid walking the flat
>>>> intel_engines array recursively.
>>>>
>>>
>>> Kinda a bikeshed arguing about a pretty simple loop structure, don't you
>>> think? I personally like the way it laid out.
>>>
>>> Pseudo code for your suggestion?
>>
>> Leave the existing setup loop as is and add an additional "for engine class"
>> walk after it. That way you can walk already setup gt->engine_class[] array
>> so wouldn't need to skip wrong classes and have HAS_ENGINE checks when
>> walking the flat intel_engines[] array several times. It also applies to the
>> helper which counts logical instances per class.
>>
> 
> Ok, I think I see what you are getting at. Again IMO this is a total
> bikeshed as this is 1 time setup step that we really should only care if
> the loop works or not rather than it being optimized / looks a way a
> certain person wants. I can change this if you really insist but again
> IMO disucssing this is a total waste of energy.

It should be such a no brainer to go with simpler and less invasive 
change that I honestly don't understand where is the big deal. Here is 
my pseudo code one more time and that will be the last from me on the topic.

Today we have:

for_each intel_engines: // intel_engines is a flat list of all engines
	intel_engine_setup()

You propose to change it to:

for_each engine_class:
    for 0..max_global_engine_instance:
       for_each intel_engines:
          skip engine not present
          skip class not matching

          count logical instance

    for_each intel_engines:
       skip engine not present
       skip wrong class

       intel_engine_setup()


I propose:

// Leave as is:

for_each intel_engines:
    intel_engine_setup()

// Add:

for_each engine_class:
    logical = 0
    for_each gt->engine_class[class]:
       skip engine not present

       engine->logical_instance = logical++


When code which actually needs a preturbed "map" arrives you add that in 
to this second loop.

>   
>>>>> +		}
>>>>>     	}
>>>>>     	/*
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>>>> index ed91bcff20eb..fddf35546b58 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>>>>> @@ -266,6 +266,11 @@ struct intel_engine_cs {
>>>>>     	unsigned int guc_id;
>>>>>     	intel_engine_mask_t mask;
>>>>> +	/**
>>>>> +	 * @logical_mask: logical mask of engine, reported to user space via
>>>>> +	 * query IOCTL and used to communicate with the GuC in logical space
>>>>> +	 */
>>>>> +	intel_engine_mask_t logical_mask;
>>>>
>>>> You could prefix the new field with uabi_ to match the existing scheme and
>>>> to signify to anyone who might be touching it in the future it should not be
>>>> changed.
>>>
>>> This is kinda uabi, but also kinda isn't. We do report a logical
>>> instance via IOCTL but it also really is tied the GuC backend as we only
>>> can communicate with the GuC in logical space. IMO we should leave as
>>> is.
>>
>> Perhaps it would be best to call the new field uabi_logical_instance so it's
>> clear it is reported in the query directly and do the BIT() transformation
>> in the GuC backend?
>>
> 
> Virtual engines can have a multiple bits set in this mask, so this is
> used for both the query on physical engines via UABI and submission in
> the GuC backend.

You could add both fields if that would help. I just think it is 
preferrable to keep the existing convetion of uabi_ prefix for the 
fields which i915 refactors must not change willy-nilly.

> 
>>>
>>>>
>>>> Also, I think comment should explain what is logical space ie. how the
>>>> numbering works.
>>>>
>>>
>>> Don't I already do that? I suppose I could add something like:
>>
>> Where is it? Can't see it in the uapi kerneldoc AFAICS (for the new query)
>> or here.
>>
> 
> 	/**
> 	 * @logical_mask: logical mask of engine, reported to user space via
> 	 * query IOCTL and used to communicate with the GuC in logical space
> 	 */
>   
>>>
>>> The logical mask within engine class must be contiguous across all
>>> instances.
>>
>> Best not to start mentioning the mask for the first time. Just explain what
>> logical numbering is in terms of how engines are enumerated in order of
>> physical instances but skipping the fused off ones. In the kerneldoc for the
>> new query is I think the right place.
>>
> 
> Maybe I can add:
> 
> The logical mapping is defined on per part basis in the bspec and can
> very based the parts fusing.

Sounds good, I think that would be useful. But in the uapi kerneldoc.

Perhaps a cross-link to/from the kernel doc which talks about frame 
split to explain consecutive logical instances have to be used. That 
would tie the new query with that uapi in the narrative.

Regards,

Tvrtko

>>>> Not sure the part about GuC needs to be in the comment since uapi is
>>>> supposed to be backend agnostic.
>>>>
>>>
>>> Communicating with the GuC in logical space is a pretty key point here.
>>> The communication with the GuC in logical space is backend specific but
>>> how our hardware works (e.g. split frame workloads must be placed
>>> logical contiguous) is not. Mentioning the GuC requirement here makes
>>> sense to me for completeness.
>>
>> Yeah might be, I was thinking more about the new query. Query definitely is
>> backend agnostic but yes it is fine to say in the comment here the new field
>> is used both for the query and for communicating with GuC.
>>
> 
> Sounds good, will make it clear it used for the query and from
> communicating with the GuC.
> 
> Matt
>   
>> Regards,
>>
>> Tvrtko
>>
>>>
>>> Matt
>>>
>>>> Regards,
>>>>
>>>> Tvrtko
>>>>
>>>>>     	u8 class;
>>>>>     	u8 instance;
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>>>> index cafb0608ffb4..813a6de01382 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>>>> @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>>>>>     		ve->siblings[ve->num_siblings++] = sibling;
>>>>>     		ve->base.mask |= sibling->mask;
>>>>> +		ve->base.logical_mask |= sibling->logical_mask;
>>>>>     		/*
>>>>>     		 * All physical engines must be compatible for their emission
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
>>>>> index 6926919bcac6..9f5f43a16182 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
>>>>> @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
>>>>>     	for_each_engine(engine, gt, id) {
>>>>>     		u8 guc_class = engine_class_to_guc_class(engine->class);
>>>>> -		system_info->mapping_table[guc_class][engine->instance] =
>>>>> +		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
>>>>>     			engine->instance;
>>>>>     	}
>>>>>     }
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>>>> index e0eed70f9b92..ffafbac7335e 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>>>> @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
>>>>>     	return __guc_action_deregister_context(guc, guc_id, loop);
>>>>>     }
>>>>> -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
>>>>> -{
>>>>> -	switch (class) {
>>>>> -	case RENDER_CLASS:
>>>>> -		return mask >> RCS0;
>>>>> -	case VIDEO_ENHANCEMENT_CLASS:
>>>>> -		return mask >> VECS0;
>>>>> -	case VIDEO_DECODE_CLASS:
>>>>> -		return mask >> VCS0;
>>>>> -	case COPY_ENGINE_CLASS:
>>>>> -		return mask >> BCS0;
>>>>> -	default:
>>>>> -		MISSING_CASE(class);
>>>>> -		return 0;
>>>>> -	}
>>>>> -}
>>>>> -
>>>>>     static void guc_context_policy_init(struct intel_engine_cs *engine,
>>>>>     				    struct guc_lrc_desc *desc)
>>>>>     {
>>>>> @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
>>>>>     	desc = __get_lrc_desc(guc, desc_idx);
>>>>>     	desc->engine_class = engine_class_to_guc_class(engine->class);
>>>>> -	desc->engine_submit_mask = adjust_engine_mask(engine->class,
>>>>> -						      engine->mask);
>>>>> +	desc->engine_submit_mask = engine->logical_mask;
>>>>>     	desc->hw_context_desc = ce->lrc.lrca;
>>>>>     	desc->priority = ce->guc_state.prio;
>>>>>     	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
>>>>> @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
>>>>>     		}
>>>>>     		ve->base.mask |= sibling->mask;
>>>>> +		ve->base.logical_mask |= sibling->logical_mask;
>>>>>     		if (n != 0 && ve->base.class != sibling->class) {
>>>>>     			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
>>>>>
Matthew Brost Sept. 14, 2021, 6:04 p.m. UTC | #6
On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote:
> 
> On 13/09/2021 17:50, Matthew Brost wrote:
> > On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote:
> > > 
> > > On 10/09/2021 20:49, Matthew Brost wrote:
> > > > On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
> > > > > 
> > > > > On 20/08/2021 23:44, Matthew Brost wrote:
> > > > > > Add logical engine mapping. This is required for split-frame, as
> > > > > > workloads need to be placed on engines in a logically contiguous manner.
> > > > > > 
> > > > > > v2:
> > > > > >     (Daniel Vetter)
> > > > > >      - Add kernel doc for new fields
> > > > > > 
> > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > > > > ---
> > > > > >     drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 60 ++++++++++++++++---
> > > > > >     drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
> > > > > >     .../drm/i915/gt/intel_execlists_submission.c  |  1 +
> > > > > >     drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    |  2 +-
> > > > > >     .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------
> > > > > >     5 files changed, 60 insertions(+), 29 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > index 0d9105a31d84..4d790f9a65dd 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
> > > > > >     	GEM_DEBUG_WARN_ON(iir);
> > > > > >     }
> > > > > > -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > > > > > +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
> > > > > > +			      u8 logical_instance)
> > > > > >     {
> > > > > >     	const struct engine_info *info = &intel_engines[id];
> > > > > >     	struct drm_i915_private *i915 = gt->i915;
> > > > > > @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
> > > > > >     	engine->class = info->class;
> > > > > >     	engine->instance = info->instance;
> > > > > > +	engine->logical_mask = BIT(logical_instance);
> > > > > >     	__sprint_engine_name(engine);
> > > > > >     	engine->props.heartbeat_interval_ms =
> > > > > > @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
> > > > > >     	return info->engine_mask;
> > > > > >     }
> > > > > > +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
> > > > > > +				 u8 class, const u8 *map, u8 num_instances)
> > > > > > +{
> > > > > > +	int i, j;
> > > > > > +	u8 current_logical_id = 0;
> > > > > > +
> > > > > > +	for (j = 0; j < num_instances; ++j) {
> > > > > > +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > > > +			if (!HAS_ENGINE(gt, i) ||
> > > > > > +			    intel_engines[i].class != class)
> > > > > > +				continue;
> > > > > > +
> > > > > > +			if (intel_engines[i].instance == map[j]) {
> > > > > > +				logical_ids[intel_engines[i].instance] =
> > > > > > +					current_logical_id++;
> > > > > > +				break;
> > > > > > +			}
> > > > > > +		}
> > > > > > +	}
> > > > > > +}
> > > > > > +
> > > > > > +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
> > > > > > +{
> > > > > > +	int i;
> > > > > > +	u8 map[MAX_ENGINE_INSTANCE + 1];
> > > > > > +
> > > > > > +	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
> > > > > > +		map[i] = i;
> > > > > 
> > > > > What's the point of the map array since it is 1:1 with instance?
> > > > > 
> > > > 
> > > > Future products do not have a 1 to 1 mapping and that mapping can change
> > > > based on fusing, e.g. XeHP SDV.
> > > > 
> > > > Also technically ICL / TGL / ADL physical instance 2 maps to logical
> > > > instance 1.
> > > 
> > > I don't follow the argument. All I can see is that "map[i] = i" always in
> > > the proposed code, which is then used to check "instance == map[instance]".
> > > So I'd suggest to remove this array from the code until there is a need for
> > > it.
> > > 
> > 
> > Ok, this logic is slightly confusing and makes more sense once we have
> > non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default
> > with the value in map[i] being a physical instance. Populate_logical_ids
> > searches the map finding all physical instances present in the map
> > assigning each found instance a new logical id increasing by 1 each
> > time.
> > 
> > e.g. If the map is setup 0-N and only physical instance 0 / 2 are
> > present they will get logical mapping 0 / 1 respectfully.
> > 
> > This algorithm works for non-standard mappings too /w fused parts. e.g.
> > on XeHP SDV the map is: { 0, 2, 4, 6, 1, 3, 5, 7 } and if any of the
> > physical instances can't be found due to fusing the logical mapping is
> > still correct per the bspec.
> > 
> > This array is absolutely needed for multi-lrc submission to work, even
> > on ICL / TGL / ADL as the GuC only supports logically contiguous engine
> > instances.
> 
> No idea how can an array fixed at "map[i] = i" be absolutely needed when you
> can just write it like "i". Sometimes it is okay to lay some ground work for

You can't write "i", that is the point. The map is a search array saying
if entry is present assign + increase the logical id. That is how a map
of 0, 1, 2 with physical instancnes 0, 2 present result in a logical
mapping of 0, 1. This is the algorithm we use for all parts, albeit some
parts have different maps (e.g. XeHP SDV, PVC, etc...) compared to here
where we use the default map of sequential numbers.

> future platforms but in this case to me it's just obfuscation which should
> be added later, when it is required.

Same algorithm, see above. This should land is as it doesn't make sense
to hack a different algorithm only to replace it with the correct
algorithm later.

> 
> > > > > > +	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
> > > > > > +}
> > > > > > +
> > > > > >     /**
> > > > > >      * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
> > > > > >      * @gt: pointer to struct intel_gt
> > > > > > @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> > > > > >     	struct drm_i915_private *i915 = gt->i915;
> > > > > >     	const unsigned int engine_mask = init_engine_mask(gt);
> > > > > >     	unsigned int mask = 0;
> > > > > > -	unsigned int i;
> > > > > > +	unsigned int i, class;
> > > > > > +	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
> > > > > >     	int err;
> > > > > >     	drm_WARN_ON(&i915->drm, engine_mask == 0);
> > > > > > @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
> > > > > >     	if (i915_inject_probe_failure(i915))
> > > > > >     		return -ENODEV;
> > > > > > -	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
> > > > > > -		if (!HAS_ENGINE(gt, i))
> > > > > > -			continue;
> > > > > > +	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
> > > > > > +		setup_logical_ids(gt, logical_ids, class);
> > > > > > -		err = intel_engine_setup(gt, i);
> > > > > > -		if (err)
> > > > > > -			goto cleanup;
> > > > > > +		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > > > +			u8 instance = intel_engines[i].instance;
> > > > > > +
> > > > > > +			if (intel_engines[i].class != class ||
> > > > > > +			    !HAS_ENGINE(gt, i))
> > > > > > +				continue;
> > > > > > -		mask |= BIT(i);
> > > > > > +			err = intel_engine_setup(gt, i,
> > > > > > +						 logical_ids[instance]);
> > > > > > +			if (err)
> > > > > > +				goto cleanup;
> > > > > > +
> > > > > > +			mask |= BIT(i);
> > > > > 
> > > > > I still this there is a less clunky way to set this up in less code and more
> > > > > readable at the same time. Like do it in two passes so you can iterate
> > > > > gt->engine_class[] array instead of having to implement a skip condition
> > > > > (both on class and HAS_ENGINE at two places) and also avoid walking the flat
> > > > > intel_engines array recursively.
> > > > > 
> > > > 
> > > > Kinda a bikeshed arguing about a pretty simple loop structure, don't you
> > > > think? I personally like the way it laid out.
> > > > 
> > > > Pseudo code for your suggestion?
> > > 
> > > Leave the existing setup loop as is and add an additional "for engine class"
> > > walk after it. That way you can walk already setup gt->engine_class[] array
> > > so wouldn't need to skip wrong classes and have HAS_ENGINE checks when
> > > walking the flat intel_engines[] array several times. It also applies to the
> > > helper which counts logical instances per class.
> > > 
> > 
> > Ok, I think I see what you are getting at. Again IMO this is a total
> > bikeshed as this is 1 time setup step that we really should only care if
> > the loop works or not rather than it being optimized / looks a way a
> > certain person wants. I can change this if you really insist but again
> > IMO disucssing this is a total waste of energy.
> 
> It should be such a no brainer to go with simpler and less invasive change
> that I honestly don't understand where is the big deal. Here is my pseudo
> code one more time and that will be the last from me on the topic.
> 
> Today we have:
> 
> for_each intel_engines: // intel_engines is a flat list of all engines
> 	intel_engine_setup()
> 
> You propose to change it to:
> 
> for_each engine_class:
>    for 0..max_global_engine_instance:
>       for_each intel_engines:
>          skip engine not present
>          skip class not matching
> 
>          count logical instance
> 
>    for_each intel_engines:
>       skip engine not present
>       skip wrong class
> 
>       intel_engine_setup()
> 
> 
> I propose:
> 
> // Leave as is:
> 
> for_each intel_engines:
>    intel_engine_setup()
> 
> // Add:
> 
> for_each engine_class:
>    logical = 0
>    for_each gt->engine_class[class]:
>       skip engine not present
> 
>       engine->logical_instance = logical++
> 
> 
> When code which actually needs a preturbed "map" arrives you add that in to
> this second loop.
>

See above, why introduce an algorithm that doesn't work for future parts
+ future patches are land imminently? It makes zero sense whatsoever.
With your proposal we would literally land code to just throw it away a
couple of months from now + break patches we intend to land soon. This
algorithm works and has no reason whatsoever to be optimal as it a one
time setup call. I really don't understand why we are still talking
about this paint color.

> > > > > > +		}
> > > > > >     	}
> > > > > >     	/*
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > > > index ed91bcff20eb..fddf35546b58 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > > > > @@ -266,6 +266,11 @@ struct intel_engine_cs {
> > > > > >     	unsigned int guc_id;
> > > > > >     	intel_engine_mask_t mask;
> > > > > > +	/**
> > > > > > +	 * @logical_mask: logical mask of engine, reported to user space via
> > > > > > +	 * query IOCTL and used to communicate with the GuC in logical space
> > > > > > +	 */
> > > > > > +	intel_engine_mask_t logical_mask;
> > > > > 
> > > > > You could prefix the new field with uabi_ to match the existing scheme and
> > > > > to signify to anyone who might be touching it in the future it should not be
> > > > > changed.
> > > > 
> > > > This is kinda uabi, but also kinda isn't. We do report a logical
> > > > instance via IOCTL but it also really is tied the GuC backend as we only
> > > > can communicate with the GuC in logical space. IMO we should leave as
> > > > is.
> > > 
> > > Perhaps it would be best to call the new field uabi_logical_instance so it's
> > > clear it is reported in the query directly and do the BIT() transformation
> > > in the GuC backend?
> > > 
> > 
> > Virtual engines can have a multiple bits set in this mask, so this is
> > used for both the query on physical engines via UABI and submission in
> > the GuC backend.
> 
> You could add both fields if that would help. I just think it is preferrable
> to keep the existing convetion of uabi_ prefix for the fields which i915
> refactors must not change willy-nilly.
>

Sure can add another field to make the separation clear.
 
> > 
> > > > 
> > > > > 
> > > > > Also, I think comment should explain what is logical space ie. how the
> > > > > numbering works.
> > > > > 
> > > > 
> > > > Don't I already do that? I suppose I could add something like:
> > > 
> > > Where is it? Can't see it in the uapi kerneldoc AFAICS (for the new query)
> > > or here.
> > > 
> > 
> > 	/**
> > 	 * @logical_mask: logical mask of engine, reported to user space via
> > 	 * query IOCTL and used to communicate with the GuC in logical space
> > 	 */
> > > > 
> > > > The logical mask within engine class must be contiguous across all
> > > > instances.
> > > 
> > > Best not to start mentioning the mask for the first time. Just explain what
> > > logical numbering is in terms of how engines are enumerated in order of
> > > physical instances but skipping the fused off ones. In the kerneldoc for the
> > > new query is I think the right place.
> > > 
> > 
> > Maybe I can add:
> > 
> > The logical mapping is defined on per part basis in the bspec and can
> > very based the parts fusing.
> 
> Sounds good, I think that would be useful. But in the uapi kerneldoc.
> 
> Perhaps a cross-link to/from the kernel doc which talks about frame split to
> explain consecutive logical instances have to be used. That would tie the
> new query with that uapi in the narrative.
>

Sure, let me see what I can do here.

Matt
 
> Regards,
> 
> Tvrtko
> 
> > > > > Not sure the part about GuC needs to be in the comment since uapi is
> > > > > supposed to be backend agnostic.
> > > > > 
> > > > 
> > > > Communicating with the GuC in logical space is a pretty key point here.
> > > > The communication with the GuC in logical space is backend specific but
> > > > how our hardware works (e.g. split frame workloads must be placed
> > > > logical contiguous) is not. Mentioning the GuC requirement here makes
> > > > sense to me for completeness.
> > > 
> > > Yeah might be, I was thinking more about the new query. Query definitely is
> > > backend agnostic but yes it is fine to say in the comment here the new field
> > > is used both for the query and for communicating with GuC.
> > > 
> > 
> > Sounds good, will make it clear it used for the query and from
> > communicating with the GuC.
> > 
> > Matt
> > > Regards,
> > > 
> > > Tvrtko
> > > 
> > > > 
> > > > Matt
> > > > 
> > > > > Regards,
> > > > > 
> > > > > Tvrtko
> > > > > 
> > > > > >     	u8 class;
> > > > > >     	u8 instance;
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > > > index cafb0608ffb4..813a6de01382 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > > > > > @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> > > > > >     		ve->siblings[ve->num_siblings++] = sibling;
> > > > > >     		ve->base.mask |= sibling->mask;
> > > > > > +		ve->base.logical_mask |= sibling->logical_mask;
> > > > > >     		/*
> > > > > >     		 * All physical engines must be compatible for their emission
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > > > index 6926919bcac6..9f5f43a16182 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> > > > > > @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
> > > > > >     	for_each_engine(engine, gt, id) {
> > > > > >     		u8 guc_class = engine_class_to_guc_class(engine->class);
> > > > > > -		system_info->mapping_table[guc_class][engine->instance] =
> > > > > > +		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
> > > > > >     			engine->instance;
> > > > > >     	}
> > > > > >     }
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > > > index e0eed70f9b92..ffafbac7335e 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > > > > > @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
> > > > > >     	return __guc_action_deregister_context(guc, guc_id, loop);
> > > > > >     }
> > > > > > -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> > > > > > -{
> > > > > > -	switch (class) {
> > > > > > -	case RENDER_CLASS:
> > > > > > -		return mask >> RCS0;
> > > > > > -	case VIDEO_ENHANCEMENT_CLASS:
> > > > > > -		return mask >> VECS0;
> > > > > > -	case VIDEO_DECODE_CLASS:
> > > > > > -		return mask >> VCS0;
> > > > > > -	case COPY_ENGINE_CLASS:
> > > > > > -		return mask >> BCS0;
> > > > > > -	default:
> > > > > > -		MISSING_CASE(class);
> > > > > > -		return 0;
> > > > > > -	}
> > > > > > -}
> > > > > > -
> > > > > >     static void guc_context_policy_init(struct intel_engine_cs *engine,
> > > > > >     				    struct guc_lrc_desc *desc)
> > > > > >     {
> > > > > > @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
> > > > > >     	desc = __get_lrc_desc(guc, desc_idx);
> > > > > >     	desc->engine_class = engine_class_to_guc_class(engine->class);
> > > > > > -	desc->engine_submit_mask = adjust_engine_mask(engine->class,
> > > > > > -						      engine->mask);
> > > > > > +	desc->engine_submit_mask = engine->logical_mask;
> > > > > >     	desc->hw_context_desc = ce->lrc.lrca;
> > > > > >     	desc->priority = ce->guc_state.prio;
> > > > > >     	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> > > > > > @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
> > > > > >     		}
> > > > > >     		ve->base.mask |= sibling->mask;
> > > > > > +		ve->base.logical_mask |= sibling->logical_mask;
> > > > > >     		if (n != 0 && ve->base.class != sibling->class) {
> > > > > >     			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> > > > > >
Tvrtko Ursulin Sept. 15, 2021, 8:24 a.m. UTC | #7
On 14/09/2021 19:04, Matthew Brost wrote:
> On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote:
>>

8<

>> Today we have:
>>
>> for_each intel_engines: // intel_engines is a flat list of all engines
>> 	intel_engine_setup()
>>
>> You propose to change it to:
>>
>> for_each engine_class:
>>     for 0..max_global_engine_instance:
>>        for_each intel_engines:
>>           skip engine not present
>>           skip class not matching
>>
>>           count logical instance
>>
>>     for_each intel_engines:
>>        skip engine not present
>>        skip wrong class
>>
>>        intel_engine_setup()
>>
>>
>> I propose:
>>
>> // Leave as is:
>>
>> for_each intel_engines:
>>     intel_engine_setup()
>>
>> // Add:
>>
>> for_each engine_class:
>>     logical = 0
>>     for_each gt->engine_class[class]:
>>        skip engine not present
>>
>>        engine->logical_instance = logical++
>>
>>
>> When code which actually needs a preturbed "map" arrives you add that in to
>> this second loop.
>>
> 
> See above, why introduce an algorithm that doesn't work for future parts
> + future patches are land imminently? It makes zero sense whatsoever.
> With your proposal we would literally land code to just throw it away a
> couple of months from now + break patches we intend to land soon. This

It sure works, it just walks the per class list instead of walking the 
flat list skipping one class at the time.

Just add the map based transformation to the second pass later, when it 
becomes required.

> algorithm works and has no reason whatsoever to be optimal as it a one
> time setup call. I really don't understand why we are still talking
> about this paint color.

I don't think bike shedding is not an appropriate term when complaint is 
how proposed algorithm is needlessly complicated.

Regards,

Tvrtko
Matthew Brost Sept. 15, 2021, 4:58 p.m. UTC | #8
On Wed, Sep 15, 2021 at 09:24:15AM +0100, Tvrtko Ursulin wrote:
> 
> On 14/09/2021 19:04, Matthew Brost wrote:
> > On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote:
> > > 
> 
> 8<
> 
> > > Today we have:
> > > 
> > > for_each intel_engines: // intel_engines is a flat list of all engines
> > > 	intel_engine_setup()
> > > 
> > > You propose to change it to:
> > > 
> > > for_each engine_class:
> > >     for 0..max_global_engine_instance:
> > >        for_each intel_engines:
> > >           skip engine not present
> > >           skip class not matching
> > > 
> > >           count logical instance
> > > 
> > >     for_each intel_engines:
> > >        skip engine not present
> > >        skip wrong class
> > > 
> > >        intel_engine_setup()
> > > 
> > > 
> > > I propose:
> > > 
> > > // Leave as is:
> > > 
> > > for_each intel_engines:
> > >     intel_engine_setup()
> > > 
> > > // Add:
> > > 
> > > for_each engine_class:
> > >     logical = 0
> > >     for_each gt->engine_class[class]:
> > >        skip engine not present
> > > 
> > >        engine->logical_instance = logical++
> > > 
> > > 
> > > When code which actually needs a preturbed "map" arrives you add that in to
> > > this second loop.
> > > 
> > 
> > See above, why introduce an algorithm that doesn't work for future parts
> > + future patches are land imminently? It makes zero sense whatsoever.
> > With your proposal we would literally land code to just throw it away a
> > couple of months from now + break patches we intend to land soon. This
> 
> It sure works, it just walks the per class list instead of walking the flat
> list skipping one class at the time.
> 
> Just add the map based transformation to the second pass later, when it
> becomes required.
> 

I can flatten the algorithm if that helps alleviate your concerns but
with that being said, I've played around this locally and IMO makes the
code way more ugly. Sure it eliminates some iterations of the loop but
who really cares about that in a one time setup function?

> > algorithm works and has no reason whatsoever to be optimal as it a one
> > time setup call. I really don't understand why we are still talking
> > about this paint color.
> 
> I don't think bike shedding is not an appropriate term when complaint is how
> proposed algorithm is needlessly complicated.
>

Are you just ignoring the fact that the algorithm (map) is needed in
pending patches? IMO it is more complicated to write throw away code
when the proper algorithm is already written. If the logical mapping was
straight forward on all platforms as the ones currently upstream I would
100% agree with your suggestion, but it isn't on unembargoed platforms
eminently going upstream. The algorithm I have works for the current
platforms + the pending platforms. IMO is 100% acceptable to merge
something looking towards a known future.

Matt

> Regards,
> 
> Tvrtko
Tvrtko Ursulin Sept. 16, 2021, 8:31 a.m. UTC | #9
On 15/09/2021 17:58, Matthew Brost wrote:
> On Wed, Sep 15, 2021 at 09:24:15AM +0100, Tvrtko Ursulin wrote:
>>
>> On 14/09/2021 19:04, Matthew Brost wrote:
>>> On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote:
>>>>
>>
>> 8<
>>
>>>> Today we have:
>>>>
>>>> for_each intel_engines: // intel_engines is a flat list of all engines
>>>> 	intel_engine_setup()
>>>>
>>>> You propose to change it to:
>>>>
>>>> for_each engine_class:
>>>>      for 0..max_global_engine_instance:
>>>>         for_each intel_engines:
>>>>            skip engine not present
>>>>            skip class not matching
>>>>
>>>>            count logical instance
>>>>
>>>>      for_each intel_engines:
>>>>         skip engine not present
>>>>         skip wrong class
>>>>
>>>>         intel_engine_setup()
>>>>
>>>>
>>>> I propose:
>>>>
>>>> // Leave as is:
>>>>
>>>> for_each intel_engines:
>>>>      intel_engine_setup()
>>>>
>>>> // Add:
>>>>
>>>> for_each engine_class:
>>>>      logical = 0
>>>>      for_each gt->engine_class[class]:
>>>>         skip engine not present
>>>>
>>>>         engine->logical_instance = logical++
>>>>
>>>>
>>>> When code which actually needs a preturbed "map" arrives you add that in to
>>>> this second loop.
>>>>
>>>
>>> See above, why introduce an algorithm that doesn't work for future parts
>>> + future patches are land imminently? It makes zero sense whatsoever.
>>> With your proposal we would literally land code to just throw it away a
>>> couple of months from now + break patches we intend to land soon. This
>>
>> It sure works, it just walks the per class list instead of walking the flat
>> list skipping one class at the time.
>>
>> Just add the map based transformation to the second pass later, when it
>> becomes required.
>>
> 
> I can flatten the algorithm if that helps alleviate your concerns but
> with that being said, I've played around this locally and IMO makes the
> code way more ugly. Sure it eliminates some iterations of the loop but
> who really cares about that in a one time setup function?
> 
>>> algorithm works and has no reason whatsoever to be optimal as it a one
>>> time setup call. I really don't understand why we are still talking
>>> about this paint color.
>>
>> I don't think bike shedding is not an appropriate term when complaint is how
>> proposed algorithm is needlessly complicated.
>>
> 
> Are you just ignoring the fact that the algorithm (map) is needed in
> pending patches? IMO it is more complicated to write throw away code
> when the proper algorithm is already written. If the logical mapping was
> straight forward on all platforms as the ones currently upstream I would
> 100% agree with your suggestion, but it isn't on unembargoed platforms
> eminently going upstream. The algorithm I have works for the current
> platforms + the pending platforms. IMO is 100% acceptable to merge
> something looking towards a known future.

FWIW my 2c is that unused bits detract from review. And my gut feeling 
still is that code can be written in a simpler way and that the map 
transform can still plug in easily on top in a later series.

I said FWIW since even if I am right you can still view my comments as 
external/community inputs at this point and proceed however you wish.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 0d9105a31d84..4d790f9a65dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -290,7 +290,8 @@  static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
 	GEM_DEBUG_WARN_ON(iir);
 }
 
-static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
+			      u8 logical_instance)
 {
 	const struct engine_info *info = &intel_engines[id];
 	struct drm_i915_private *i915 = gt->i915;
@@ -334,6 +335,7 @@  static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 
 	engine->class = info->class;
 	engine->instance = info->instance;
+	engine->logical_mask = BIT(logical_instance);
 	__sprint_engine_name(engine);
 
 	engine->props.heartbeat_interval_ms =
@@ -572,6 +574,37 @@  static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
 	return info->engine_mask;
 }
 
+static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
+				 u8 class, const u8 *map, u8 num_instances)
+{
+	int i, j;
+	u8 current_logical_id = 0;
+
+	for (j = 0; j < num_instances; ++j) {
+		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+			if (!HAS_ENGINE(gt, i) ||
+			    intel_engines[i].class != class)
+				continue;
+
+			if (intel_engines[i].instance == map[j]) {
+				logical_ids[intel_engines[i].instance] =
+					current_logical_id++;
+				break;
+			}
+		}
+	}
+}
+
+static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
+{
+	int i;
+	u8 map[MAX_ENGINE_INSTANCE + 1];
+
+	for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+		map[i] = i;
+	populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+}
+
 /**
  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
  * @gt: pointer to struct intel_gt
@@ -583,7 +616,8 @@  int intel_engines_init_mmio(struct intel_gt *gt)
 	struct drm_i915_private *i915 = gt->i915;
 	const unsigned int engine_mask = init_engine_mask(gt);
 	unsigned int mask = 0;
-	unsigned int i;
+	unsigned int i, class;
+	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
 	int err;
 
 	drm_WARN_ON(&i915->drm, engine_mask == 0);
@@ -593,15 +627,23 @@  int intel_engines_init_mmio(struct intel_gt *gt)
 	if (i915_inject_probe_failure(i915))
 		return -ENODEV;
 
-	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
-		if (!HAS_ENGINE(gt, i))
-			continue;
+	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
+		setup_logical_ids(gt, logical_ids, class);
 
-		err = intel_engine_setup(gt, i);
-		if (err)
-			goto cleanup;
+		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+			u8 instance = intel_engines[i].instance;
+
+			if (intel_engines[i].class != class ||
+			    !HAS_ENGINE(gt, i))
+				continue;
 
-		mask |= BIT(i);
+			err = intel_engine_setup(gt, i,
+						 logical_ids[instance]);
+			if (err)
+				goto cleanup;
+
+			mask |= BIT(i);
+		}
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ed91bcff20eb..fddf35546b58 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -266,6 +266,11 @@  struct intel_engine_cs {
 	unsigned int guc_id;
 
 	intel_engine_mask_t mask;
+	/**
+	 * @logical_mask: logical mask of engine, reported to user space via
+	 * query IOCTL and used to communicate with the GuC in logical space
+	 */
+	intel_engine_mask_t logical_mask;
 
 	u8 class;
 	u8 instance;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index cafb0608ffb4..813a6de01382 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3875,6 +3875,7 @@  execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
 
 		ve->siblings[ve->num_siblings++] = sibling;
 		ve->base.mask |= sibling->mask;
+		ve->base.logical_mask |= sibling->logical_mask;
 
 		/*
 		 * All physical engines must be compatible for their emission
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 6926919bcac6..9f5f43a16182 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -176,7 +176,7 @@  static void guc_mapping_table_init(struct intel_gt *gt,
 	for_each_engine(engine, gt, id) {
 		u8 guc_class = engine_class_to_guc_class(engine->class);
 
-		system_info->mapping_table[guc_class][engine->instance] =
+		system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
 			engine->instance;
 	}
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e0eed70f9b92..ffafbac7335e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1401,23 +1401,6 @@  static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
 	return __guc_action_deregister_context(guc, guc_id, loop);
 }
 
-static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
-{
-	switch (class) {
-	case RENDER_CLASS:
-		return mask >> RCS0;
-	case VIDEO_ENHANCEMENT_CLASS:
-		return mask >> VECS0;
-	case VIDEO_DECODE_CLASS:
-		return mask >> VCS0;
-	case COPY_ENGINE_CLASS:
-		return mask >> BCS0;
-	default:
-		MISSING_CASE(class);
-		return 0;
-	}
-}
-
 static void guc_context_policy_init(struct intel_engine_cs *engine,
 				    struct guc_lrc_desc *desc)
 {
@@ -1459,8 +1442,7 @@  static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
 
 	desc = __get_lrc_desc(guc, desc_idx);
 	desc->engine_class = engine_class_to_guc_class(engine->class);
-	desc->engine_submit_mask = adjust_engine_mask(engine->class,
-						      engine->mask);
+	desc->engine_submit_mask = engine->logical_mask;
 	desc->hw_context_desc = ce->lrc.lrca;
 	desc->priority = ce->guc_state.prio;
 	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
@@ -3260,6 +3242,7 @@  guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
 		}
 
 		ve->base.mask |= sibling->mask;
+		ve->base.logical_mask |= sibling->logical_mask;
 
 		if (n != 0 && ve->base.class != sibling->class) {
 			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",