diff mbox

[4/4] drm/i915: expose rcs topology through discovery uAPI

Message ID 20171108162238.4072-5-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin Nov. 8, 2017, 4:22 p.m. UTC
With the introduction of asymetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam. Here we introduce a more detailed
way of querying the Gen's GPU topology that doesn't aggregate numbers.

This is essential for monitoring parts of the GPU with the OA unit,
because counters need to be normalized to the number of
EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
not gives us sufficient information.

This change introduce a new way to query properties of the GPU, making
room for new queries (some media related topology could be exposed in
the future).

As a bonus we can draw representations of the GPU :

        https://imgur.com/a/vuqpa

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/intel_query_info.c | 64 +++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h             | 55 ++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

Comments

Tvrtko Ursulin Nov. 9, 2017, 5:34 p.m. UTC | #1
On 08/11/2017 16:22, Lionel Landwerlin wrote:
> With the introduction of asymetric slices in CNL, we cannot rely on
> the previous SUBSLICE_MASK getparam. Here we introduce a more detailed
> way of querying the Gen's GPU topology that doesn't aggregate numbers.
> 
> This is essential for monitoring parts of the GPU with the OA unit,
> because counters need to be normalized to the number of
> EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
> not gives us sufficient information.
> 
> This change introduce a new way to query properties of the GPU, making
> room for new queries (some media related topology could be exposed in
> the future).
> 
> As a bonus we can draw representations of the GPU :
> 
>          https://imgur.com/a/vuqpa
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_query_info.c | 64 +++++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h             | 55 ++++++++++++++++++++++++++++
>   2 files changed, 119 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_query_info.c b/drivers/gpu/drm/i915/intel_query_info.c
> index 21434c393582..45c5b29e0988 100644
> --- a/drivers/gpu/drm/i915/intel_query_info.c
> +++ b/drivers/gpu/drm/i915/intel_query_info.c
> @@ -25,6 +25,68 @@
>   #include "i915_drv.h"
>   #include <uapi/drm/i915_drm.h>
>   
> +static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
> +				   struct drm_i915_query_info *args)
> +{
> +	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
> +	struct drm_i915_rcs_topology_info __user *user_topology =
> +		u64_to_user_ptr(args->info_ptr);
> +	struct drm_i915_rcs_topology_info topology;
> +	u32 data_size, total_size;
> +	const u8 *data = NULL;
> +	int ret;
> +
> +	/* Not supported on gen < 8. */
> +	if (sseu->max_slices == 0)
> +		return -ENODEV;
> +
> +	switch (args->query_params[0]) {
> +	case I915_RCS_TOPOLOGY_SLICE:
> +		topology.params[0] = sseu->max_slices;
> +		data_size = sizeof(sseu->slice_mask);
> +		data = &sseu->slice_mask;

Regardless of the solution, you will need to translate the internal 
kernel data into something strictly defined in the uAPI headers, 
otherwise we leak internal organization into ABI.

> +		break;
> +
> +	case I915_RCS_TOPOLOGY_SUBSLICE:
> +		topology.params[0] = sseu->max_slices;
> +		topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
> +		data_size = sseu->max_slices * topology.params[1];
> +		data = sseu->subslices_mask;
> +		break;
> +
> +	case I915_RCS_TOPOLOGY_EU:
> +		topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
> +		topology.params[1] = sseu->max_subslices * topology.params[2];
> +		topology.params[0] = sseu->max_slices;
> +		data_size = sseu->max_slices * topology.params[1];
> +		data = sseu->eu_mask;
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	total_size = sizeof(topology) + data_size;
> +
> +	if (args->info_ptr_len == 0) {
> +		args->info_ptr_len = total_size;
> +		return 0;
> +	}
> +
> +	if (args->info_ptr_len < total_size)
> +		return -EINVAL;
> +
> +	ret = copy_to_user(user_topology, &topology, sizeof(topology));
> +	if (ret)
> +		return -EFAULT;
> +
> +	ret = copy_to_user(user_topology + 1, data, data_size);
> +	if (ret)
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
>   	[I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
>   	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
> @@ -112,6 +174,8 @@ int i915_query_info_ioctl(struct drm_device *dev, void *data,
>   	switch (args->query) {
>   	case I915_QUERY_INFO_ENGINE:
>   		return query_info_engine(dev_priv, args);
> +	case I915_QUERY_INFO_RCS_TOPOLOGY:
> +		return query_info_rcs_topology(dev_priv, args);
>   	default:
>   		return -EINVAL;
>   	}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c6026616300e..5b6a8995a948 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1569,6 +1569,61 @@ struct drm_i915_engine_info {
>   	__u8 rsvd2[6];
>   };
>   
> +/* Query RCS topology.
> + *
> + * drm_i915_query_info.query_params[0] should be set to one of the
> + * I915_RCS_TOPOLOGY_* define.
> + *
> + * drm_i915_gem_query_info.info_ptr will be written to with
> + * drm_i915_rcs_topology_info.
> + */
> +#define I915_QUERY_INFO_RCS_TOPOLOGY	1 /* version 1 */
> +
> +/* Query RCS slice topology
> + *
> + * The meaning of the drm_i915_rcs_topology_info fields is :
> + *
> + * params[0]: number of slices
> + *
> + * data: Each bit indicates whether a slice is available (1) or fused off (0).
> + * Formula to tell if slice X is available :
> + *
> + *         (data[X / 8] >> (X % 8)) & 1
> + */
> +#define I915_RCS_TOPOLOGY_SLICE		0 /* version 1 */
> +/* Query RCS subslice topology
> + *
> + * The meaning of the drm_i915_rcs_topology_info fields is :
> + *
> + * params[0]: number of slices
> + * params[1]: slice stride
> + *
> + * data: each bit indicates whether a subslice is available (1) or fused off
> + * (0). Formula to tell if slice X subslice Y is available :
> + *
> + *         (data[(X * params[1]) + Y / 8] >> (Y % 8)) & 1
> + */
> +#define I915_RCS_TOPOLOGY_SUBSLICE	1 /* version 1 */
> +/* Query RCS EU topology
> + *
> + * The meaning of the drm_i915_rcs_topology_info fields is :
> + *
> + * params[0]: number of slices
> + * params[1]: slice stride
> + * params[2]: subslice stride
> + *
> + * data: Each bit indicates whether a subslice is available (1) or fused off
> + * (0). Formula to tell if slice X subslice Y eu Z is available :
> + *
> + *         (data[X * params[1] + Y * params[2] + Z / 8] >> (Z % 8)) & 1
> + */
> +#define I915_RCS_TOPOLOGY_EU		2 /* version 1 */
> +
> +struct drm_i915_rcs_topology_info {
> +	__u32 params[6];
> +
> +	__u8 data[];

You don't use this marker in the patch.

But in general would it be feasible to define and name the returned data 
more precisely? Like:

struct drm_engine_rcs_engine_info {
	...
	/existing_stuff/
	...

#define HAS_TOPOLOGY
	u32 flags;

	struct {
		u32 this;
		u32 that;
		...
		u8 mask[SOME_FUTURE_PROOF_NUMBER];
	} slice_topology;

	struct {
		u32 this;
		u32 that;
		...
		u8 mask[SOME_FUTURE_PROOF_NUMBER];
	} subslice_topology;

	struct {
		u32 this;
		u32 that;
		...
		u8 mask[SOME_FUTURE_PROOF_NUMBER];
	} eu_topology;
};

That said, perhaps RCS topology belongs more to the GPU than the render 
engine.

Regards,

Tvrtko

> +};
>   
>   struct drm_i915_query_info {
>   	/* in/out: Protocol version requested/supported. When set to 0, the
>
Lionel Landwerlin Nov. 10, 2017, 4:37 p.m. UTC | #2
On 09/11/17 17:34, Tvrtko Ursulin wrote:
>
> On 08/11/2017 16:22, Lionel Landwerlin wrote:
>> With the introduction of asymetric slices in CNL, we cannot rely on
>> the previous SUBSLICE_MASK getparam. Here we introduce a more detailed
>> way of querying the Gen's GPU topology that doesn't aggregate numbers.
>>
>> This is essential for monitoring parts of the GPU with the OA unit,
>> because counters need to be normalized to the number of
>> EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
>> not gives us sufficient information.
>>
>> This change introduce a new way to query properties of the GPU, making
>> room for new queries (some media related topology could be exposed in
>> the future).
>>
>> As a bonus we can draw representations of the GPU :
>>
>>          https://imgur.com/a/vuqpa
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_query_info.c | 64 
>> +++++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h             | 55 
>> ++++++++++++++++++++++++++++
>>   2 files changed, 119 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_query_info.c 
>> b/drivers/gpu/drm/i915/intel_query_info.c
>> index 21434c393582..45c5b29e0988 100644
>> --- a/drivers/gpu/drm/i915/intel_query_info.c
>> +++ b/drivers/gpu/drm/i915/intel_query_info.c
>> @@ -25,6 +25,68 @@
>>   #include "i915_drv.h"
>>   #include <uapi/drm/i915_drm.h>
>>   +static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
>> +                   struct drm_i915_query_info *args)
>> +{
>> +    const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
>> +    struct drm_i915_rcs_topology_info __user *user_topology =
>> +        u64_to_user_ptr(args->info_ptr);
>> +    struct drm_i915_rcs_topology_info topology;
>> +    u32 data_size, total_size;
>> +    const u8 *data = NULL;
>> +    int ret;
>> +
>> +    /* Not supported on gen < 8. */
>> +    if (sseu->max_slices == 0)
>> +        return -ENODEV;
>> +
>> +    switch (args->query_params[0]) {
>> +    case I915_RCS_TOPOLOGY_SLICE:
>> +        topology.params[0] = sseu->max_slices;
>> +        data_size = sizeof(sseu->slice_mask);
>> +        data = &sseu->slice_mask;
>
> Regardless of the solution, you will need to translate the internal 
> kernel data into something strictly defined in the uAPI headers, 
> otherwise we leak internal organization into ABI.

Sure, I'll add an assert to make sure that it is u8, so if it does 
switch to u16/u32 at some point, people will remember to maintain the API.

>
>> +        break;
>> +
>> +    case I915_RCS_TOPOLOGY_SUBSLICE:
>> +        topology.params[0] = sseu->max_slices;
>> +        topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
>> +        data_size = sseu->max_slices * topology.params[1];
>> +        data = sseu->subslices_mask;
>> +        break;
>> +
>> +    case I915_RCS_TOPOLOGY_EU:
>> +        topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
>> +        topology.params[1] = sseu->max_subslices * topology.params[2];
>> +        topology.params[0] = sseu->max_slices;
>> +        data_size = sseu->max_slices * topology.params[1];
>> +        data = sseu->eu_mask;
>> +        break;
>> +
>> +    default:
>> +        return -EINVAL;
>> +    }
>> +
>> +    total_size = sizeof(topology) + data_size;
>> +
>> +    if (args->info_ptr_len == 0) {
>> +        args->info_ptr_len = total_size;
>> +        return 0;
>> +    }
>> +
>> +    if (args->info_ptr_len < total_size)
>> +        return -EINVAL;
>> +
>> +    ret = copy_to_user(user_topology, &topology, sizeof(topology));
>> +    if (ret)
>> +        return -EFAULT;
>> +
>> +    ret = copy_to_user(user_topology + 1, data, data_size);
>> +    if (ret)
>> +        return -EFAULT;
>> +
>> +    return 0;
>> +}
>> +
>>   static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
>>       [I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
>>       [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
>> @@ -112,6 +174,8 @@ int i915_query_info_ioctl(struct drm_device *dev, 
>> void *data,
>>       switch (args->query) {
>>       case I915_QUERY_INFO_ENGINE:
>>           return query_info_engine(dev_priv, args);
>> +    case I915_QUERY_INFO_RCS_TOPOLOGY:
>> +        return query_info_rcs_topology(dev_priv, args);
>>       default:
>>           return -EINVAL;
>>       }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index c6026616300e..5b6a8995a948 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1569,6 +1569,61 @@ struct drm_i915_engine_info {
>>       __u8 rsvd2[6];
>>   };
>>   +/* Query RCS topology.
>> + *
>> + * drm_i915_query_info.query_params[0] should be set to one of the
>> + * I915_RCS_TOPOLOGY_* define.
>> + *
>> + * drm_i915_gem_query_info.info_ptr will be written to with
>> + * drm_i915_rcs_topology_info.
>> + */
>> +#define I915_QUERY_INFO_RCS_TOPOLOGY    1 /* version 1 */
>> +
>> +/* Query RCS slice topology
>> + *
>> + * The meaning of the drm_i915_rcs_topology_info fields is :
>> + *
>> + * params[0]: number of slices
>> + *
>> + * data: Each bit indicates whether a slice is available (1) or 
>> fused off (0).
>> + * Formula to tell if slice X is available :
>> + *
>> + *         (data[X / 8] >> (X % 8)) & 1
>> + */
>> +#define I915_RCS_TOPOLOGY_SLICE        0 /* version 1 */
>> +/* Query RCS subslice topology
>> + *
>> + * The meaning of the drm_i915_rcs_topology_info fields is :
>> + *
>> + * params[0]: number of slices
>> + * params[1]: slice stride
>> + *
>> + * data: each bit indicates whether a subslice is available (1) or 
>> fused off
>> + * (0). Formula to tell if slice X subslice Y is available :
>> + *
>> + *         (data[(X * params[1]) + Y / 8] >> (Y % 8)) & 1
>> + */
>> +#define I915_RCS_TOPOLOGY_SUBSLICE    1 /* version 1 */
>> +/* Query RCS EU topology
>> + *
>> + * The meaning of the drm_i915_rcs_topology_info fields is :
>> + *
>> + * params[0]: number of slices
>> + * params[1]: slice stride
>> + * params[2]: subslice stride
>> + *
>> + * data: Each bit indicates whether a subslice is available (1) or 
>> fused off
>> + * (0). Formula to tell if slice X subslice Y eu Z is available :
>> + *
>> + *         (data[X * params[1] + Y * params[2] + Z / 8] >> (Z % 8)) & 1
>> + */
>> +#define I915_RCS_TOPOLOGY_EU        2 /* version 1 */
>> +
>> +struct drm_i915_rcs_topology_info {
>> +    __u32 params[6];
>> +
>> +    __u8 data[];
>
> You don't use this marker in the patch.
>
> But in general would it be feasible to define and name the returned 
> data more precisely? Like:
>
> struct drm_engine_rcs_engine_info {
>     ...
>     /existing_stuff/
>     ...
>
> #define HAS_TOPOLOGY
>     u32 flags;
>
>     struct {
>         u32 this;
>         u32 that;
>         ...
>         u8 mask[SOME_FUTURE_PROOF_NUMBER];
>     } slice_topology;
>
>     struct {
>         u32 this;
>         u32 that;
>         ...
>         u8 mask[SOME_FUTURE_PROOF_NUMBER];
>     } subslice_topology;
>
>     struct {
>         u32 this;
>         u32 that;
>         ...
>         u8 mask[SOME_FUTURE_PROOF_NUMBER];
>     } eu_topology;
> };

I'm pretty sure we'll need to expose more than these 3 properties here 
(slice/subslice/eus) soon.
Some of the components residing in the subslice could be of interest.
So I'm reluctant to have all of this within a single struct which we 
can't change the size of.
Having an enum for each property and possibly an associated struct for 
each is fine though.

>
> That said, perhaps RCS topology belongs more to the GPU than the 
> render engine.
>
> Regards,
>
> Tvrtko
>
>> +};
>>     struct drm_i915_query_info {
>>       /* in/out: Protocol version requested/supported. When set to 0, 
>> the
>>
>
Chris Wilson Nov. 10, 2017, 4:47 p.m. UTC | #3
Quoting Lionel Landwerlin (2017-11-10 16:37:33)
> On 09/11/17 17:34, Tvrtko Ursulin wrote:
> >
> > On 08/11/2017 16:22, Lionel Landwerlin wrote:
> > But in general would it be feasible to define and name the returned 
> > data more precisely? Like:
> >
> > struct drm_engine_rcs_engine_info {
> >     ...
> >     /existing_stuff/
> >     ...
> >
> > #define HAS_TOPOLOGY
> >     u32 flags;
> >
> >     struct {
> >         u32 this;
> >         u32 that;
> >         ...
> >         u8 mask[SOME_FUTURE_PROOF_NUMBER];
> >     } slice_topology;
> >
> >     struct {
> >         u32 this;
> >         u32 that;
> >         ...
> >         u8 mask[SOME_FUTURE_PROOF_NUMBER];
> >     } subslice_topology;
> >
> >     struct {
> >         u32 this;
> >         u32 that;
> >         ...
> >         u8 mask[SOME_FUTURE_PROOF_NUMBER];
> >     } eu_topology;
> > };
> 
> I'm pretty sure we'll need to expose more than these 3 properties here 
> (slice/subslice/eus) soon.
> Some of the components residing in the subslice could be of interest.
> So I'm reluctant to have all of this within a single struct which we 
> can't change the size of.

The struct size doesn't have to be fixed, just reported. The underlying
question is can we construct an interface that is flexible enough?

Something along the lines of perf (GL even) where the output format is
constructed by request from the user, then we only need to declare how
long each field will be, get to the user allocate the buffer, then fill
on the second pass.

Alternatively we output some ASN string?

I don't want to overengineer, but at the same time this looks to be the
perfect excuse to require enough flexibility to cater for future
complexity without going too meta. :)
-Chris
Lionel Landwerlin Nov. 10, 2017, 6:29 p.m. UTC | #4
On 10/11/17 16:47, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2017-11-10 16:37:33)
>> On 09/11/17 17:34, Tvrtko Ursulin wrote:
>>> On 08/11/2017 16:22, Lionel Landwerlin wrote:
>>> But in general would it be feasible to define and name the returned
>>> data more precisely? Like:
>>>
>>> struct drm_engine_rcs_engine_info {
>>>      ...
>>>      /existing_stuff/
>>>      ...
>>>
>>> #define HAS_TOPOLOGY
>>>      u32 flags;
>>>
>>>      struct {
>>>          u32 this;
>>>          u32 that;
>>>          ...
>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>      } slice_topology;
>>>
>>>      struct {
>>>          u32 this;
>>>          u32 that;
>>>          ...
>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>      } subslice_topology;
>>>
>>>      struct {
>>>          u32 this;
>>>          u32 that;
>>>          ...
>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>      } eu_topology;
>>> };
>> I'm pretty sure we'll need to expose more than these 3 properties here
>> (slice/subslice/eus) soon.
>> Some of the components residing in the subslice could be of interest.
>> So I'm reluctant to have all of this within a single struct which we
>> can't change the size of.
> The struct size doesn't have to be fixed, just reported. The underlying
> question is can we construct an interface that is flexible enough?
>
> Something along the lines of perf (GL even) where the output format is
> constructed by request from the user, then we only need to declare how
> long each field will be, get to the user allocate the buffer, then fill
> on the second pass.
>
> Alternatively we output some ASN string?
>
> I don't want to overengineer, but at the same time this looks to be the
> perfect excuse to require enough flexibility to cater for future
> complexity without going too meta. :)
> -Chris
>
Heh, so one ioctl to get the string, another ioctl to get the data?
And a list of enum for all the properties you can list?

Unrelated question, have you considered ASN to describe the error state 
layout?

-
Lionel
Chris Wilson Nov. 10, 2017, 7:03 p.m. UTC | #5
Quoting Lionel Landwerlin (2017-11-10 18:29:45)
> On 10/11/17 16:47, Chris Wilson wrote:
> Unrelated question, have you considered ASN to describe the error state 
> layout?

I was thinking json (to make it semi-structured and extensible), but keep
reminding myself that if everything else speaks aub, we should make a
passable attempt too. Just keep it ascii though.

However, error-state needs large kick in the right direction for
userspace debugging; I am open for suggestions (in fact want suggestions
from people trying to do post-mortem debugging of GL/Vk, libva and CL).
(I quite liked the idea of the apitrace fdr recording the set of GL
commands that constructed the batch; possibly intermediate stages as
well.)
-Chris
Tvrtko Ursulin Nov. 13, 2017, 9:14 a.m. UTC | #6
On 10/11/2017 18:29, Lionel Landwerlin wrote:
> On 10/11/17 16:47, Chris Wilson wrote:
>> Quoting Lionel Landwerlin (2017-11-10 16:37:33)
>>> On 09/11/17 17:34, Tvrtko Ursulin wrote:
>>>> On 08/11/2017 16:22, Lionel Landwerlin wrote:
>>>> But in general would it be feasible to define and name the returned
>>>> data more precisely? Like:
>>>>
>>>> struct drm_engine_rcs_engine_info {
>>>>      ...
>>>>      /existing_stuff/
>>>>      ...
>>>>
>>>> #define HAS_TOPOLOGY
>>>>      u32 flags;
>>>>
>>>>      struct {
>>>>          u32 this;
>>>>          u32 that;
>>>>          ...
>>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>>      } slice_topology;
>>>>
>>>>      struct {
>>>>          u32 this;
>>>>          u32 that;
>>>>          ...
>>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>>      } subslice_topology;
>>>>
>>>>      struct {
>>>>          u32 this;
>>>>          u32 that;
>>>>          ...
>>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>>      } eu_topology;
>>>> };
>>> I'm pretty sure we'll need to expose more than these 3 properties here
>>> (slice/subslice/eus) soon.
>>> Some of the components residing in the subslice could be of interest.
>>> So I'm reluctant to have all of this within a single struct which we
>>> can't change the size of.
>> The struct size doesn't have to be fixed, just reported. The underlying
>> question is can we construct an interface that is flexible enough?
>>
>> Something along the lines of perf (GL even) where the output format is
>> constructed by request from the user, then we only need to declare how
>> long each field will be, get to the user allocate the buffer, then fill
>> on the second pass.
>>
>> Alternatively we output some ASN string?
>>
>> I don't want to overengineer, but at the same time this looks to be the
>> perfect excuse to require enough flexibility to cater for future
>> complexity without going too meta. :)
>> -Chris
>>
> Heh, so one ioctl to get the string, another ioctl to get the data?
> And a list of enum for all the properties you can list?
> 
> Unrelated question, have you considered ASN to describe the error state 
> layout?

Or we go with sysfs, plain and simple?

$ cat $i915root/engine/vcs0/info
hevc

$ cat $i915root/engine/vcs1/instance
1

$ cat $i915root/engine/rcs0/class
render

...

$i915root/gpu/topology/slice_mask

Should be able to design to avoid issues with extensibility and avoids 
the need to come up with complex binary structures or even adding new 
protocols like the ASN mentioned above.

?

Tvrtko
Lionel Landwerlin Nov. 13, 2017, 10 a.m. UTC | #7
On 13/11/17 09:14, Tvrtko Ursulin wrote:
>
> On 10/11/2017 18:29, Lionel Landwerlin wrote:
>> On 10/11/17 16:47, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2017-11-10 16:37:33)
>>>> On 09/11/17 17:34, Tvrtko Ursulin wrote:
>>>>> On 08/11/2017 16:22, Lionel Landwerlin wrote:
>>>>> But in general would it be feasible to define and name the returned
>>>>> data more precisely? Like:
>>>>>
>>>>> struct drm_engine_rcs_engine_info {
>>>>>      ...
>>>>>      /existing_stuff/
>>>>>      ...
>>>>>
>>>>> #define HAS_TOPOLOGY
>>>>>      u32 flags;
>>>>>
>>>>>      struct {
>>>>>          u32 this;
>>>>>          u32 that;
>>>>>          ...
>>>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>>>      } slice_topology;
>>>>>
>>>>>      struct {
>>>>>          u32 this;
>>>>>          u32 that;
>>>>>          ...
>>>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>>>      } subslice_topology;
>>>>>
>>>>>      struct {
>>>>>          u32 this;
>>>>>          u32 that;
>>>>>          ...
>>>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
>>>>>      } eu_topology;
>>>>> };
>>>> I'm pretty sure we'll need to expose more than these 3 properties here
>>>> (slice/subslice/eus) soon.
>>>> Some of the components residing in the subslice could be of interest.
>>>> So I'm reluctant to have all of this within a single struct which we
>>>> can't change the size of.
>>> The struct size doesn't have to be fixed, just reported. The underlying
>>> question is can we construct an interface that is flexible enough?
>>>
>>> Something along the lines of perf (GL even) where the output format is
>>> constructed by request from the user, then we only need to declare how
>>> long each field will be, get to the user allocate the buffer, then fill
>>> on the second pass.
>>>
>>> Alternatively we output some ASN string?
>>>
>>> I don't want to overengineer, but at the same time this looks to be the
>>> perfect excuse to require enough flexibility to cater for future
>>> complexity without going too meta. :)
>>> -Chris
>>>
>> Heh, so one ioctl to get the string, another ioctl to get the data?
>> And a list of enum for all the properties you can list?
>>
>> Unrelated question, have you considered ASN to describe the error 
>> state layout?
>
> Or we go with sysfs, plain and simple?
>
> $ cat $i915root/engine/vcs0/info
> hevc
>
> $ cat $i915root/engine/vcs1/instance
> 1
>
> $ cat $i915root/engine/rcs0/class
> render
>
> ...
>
> $i915root/gpu/topology/slice_mask
>
> Should be able to design to avoid issues with extensibility and avoids 
> the need to come up with complex binary structures or even adding new 
> protocols like the ASN mentioned above.
>
> ?
>
> Tvrtko
>
I guess that works too. What layout would fit for the other bits of 
topology though? :

topology/slice_mask
topology/slice0/subslice_mask
topology/slice0/subslice0/eu_mask
topology/slice0/subslice1/eu_mask
topology/slice0/subslice2/eu_mask
topology/slice1/subslice_mask
...
topology/slice2/subslice_mask
...

Or something flatter but then you could need lines in the files to split 
things by slice/subslice.

-
Lionel
Chris Wilson Nov. 13, 2017, 11:14 a.m. UTC | #8
Quoting Tvrtko Ursulin (2017-11-13 09:14:15)
> 
> On 10/11/2017 18:29, Lionel Landwerlin wrote:
> > On 10/11/17 16:47, Chris Wilson wrote:
> >> Quoting Lionel Landwerlin (2017-11-10 16:37:33)
> >>> On 09/11/17 17:34, Tvrtko Ursulin wrote:
> >>>> On 08/11/2017 16:22, Lionel Landwerlin wrote:
> >>>> But in general would it be feasible to define and name the returned
> >>>> data more precisely? Like:
> >>>>
> >>>> struct drm_engine_rcs_engine_info {
> >>>>      ...
> >>>>      /existing_stuff/
> >>>>      ...
> >>>>
> >>>> #define HAS_TOPOLOGY
> >>>>      u32 flags;
> >>>>
> >>>>      struct {
> >>>>          u32 this;
> >>>>          u32 that;
> >>>>          ...
> >>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
> >>>>      } slice_topology;
> >>>>
> >>>>      struct {
> >>>>          u32 this;
> >>>>          u32 that;
> >>>>          ...
> >>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
> >>>>      } subslice_topology;
> >>>>
> >>>>      struct {
> >>>>          u32 this;
> >>>>          u32 that;
> >>>>          ...
> >>>>          u8 mask[SOME_FUTURE_PROOF_NUMBER];
> >>>>      } eu_topology;
> >>>> };
> >>> I'm pretty sure we'll need to expose more than these 3 properties here
> >>> (slice/subslice/eus) soon.
> >>> Some of the components residing in the subslice could be of interest.
> >>> So I'm reluctant to have all of this within a single struct which we
> >>> can't change the size of.
> >> The struct size doesn't have to be fixed, just reported. The underlying
> >> question is can we construct an interface that is flexible enough?
> >>
> >> Something along the lines of perf (GL even) where the output format is
> >> constructed by request from the user, then we only need to declare how
> >> long each field will be, get to the user allocate the buffer, then fill
> >> on the second pass.
> >>
> >> Alternatively we output some ASN string?
> >>
> >> I don't want to overengineer, but at the same time this looks to be the
> >> perfect excuse to require enough flexibility to cater for future
> >> complexity without going too meta. :)
> >> -Chris
> >>
> > Heh, so one ioctl to get the string, another ioctl to get the data?
> > And a list of enum for all the properties you can list?
> > 
> > Unrelated question, have you considered ASN to describe the error state 
> > layout?
> 
> Or we go with sysfs, plain and simple?
> 
> $ cat $i915root/engine/vcs0/info
> hevc
> 
> $ cat $i915root/engine/vcs1/instance
> 1
> 
> $ cat $i915root/engine/rcs0/class
> render
> 
> ...
> 
> $i915root/gpu/topology/slice_mask
> 
> Should be able to design to avoid issues with extensibility and avoids 
> the need to come up with complex binary structures or even adding new 
> protocols like the ASN mentioned above.

That wins the flexibility argument hands down. (It is so flexible we have
to plan ahead to get the hierarchy right. We can even version it with a
$i915root/version. Except people will add it to their scripts and notice
if paths change. It has the benefit of being too convenient ;)

The only counter is, ugh that's a lot of work to gather the info on
userspace driver load. But everybody can cat their gpu details.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_query_info.c b/drivers/gpu/drm/i915/intel_query_info.c
index 21434c393582..45c5b29e0988 100644
--- a/drivers/gpu/drm/i915/intel_query_info.c
+++ b/drivers/gpu/drm/i915/intel_query_info.c
@@ -25,6 +25,68 @@ 
 #include "i915_drv.h"
 #include <uapi/drm/i915_drm.h>
 
+static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
+				   struct drm_i915_query_info *args)
+{
+	const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
+	struct drm_i915_rcs_topology_info __user *user_topology =
+		u64_to_user_ptr(args->info_ptr);
+	struct drm_i915_rcs_topology_info topology;
+	u32 data_size, total_size;
+	const u8 *data = NULL;
+	int ret;
+
+	/* Not supported on gen < 8. */
+	if (sseu->max_slices == 0)
+		return -ENODEV;
+
+	switch (args->query_params[0]) {
+	case I915_RCS_TOPOLOGY_SLICE:
+		topology.params[0] = sseu->max_slices;
+		data_size = sizeof(sseu->slice_mask);
+		data = &sseu->slice_mask;
+		break;
+
+	case I915_RCS_TOPOLOGY_SUBSLICE:
+		topology.params[0] = sseu->max_slices;
+		topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
+		data_size = sseu->max_slices * topology.params[1];
+		data = sseu->subslices_mask;
+		break;
+
+	case I915_RCS_TOPOLOGY_EU:
+		topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
+		topology.params[1] = sseu->max_subslices * topology.params[2];
+		topology.params[0] = sseu->max_slices;
+		data_size = sseu->max_slices * topology.params[1];
+		data = sseu->eu_mask;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	total_size = sizeof(topology) + data_size;
+
+	if (args->info_ptr_len == 0) {
+		args->info_ptr_len = total_size;
+		return 0;
+	}
+
+	if (args->info_ptr_len < total_size)
+		return -EINVAL;
+
+	ret = copy_to_user(user_topology, &topology, sizeof(topology));
+	if (ret)
+		return -EFAULT;
+
+	ret = copy_to_user(user_topology + 1, data, data_size);
+	if (ret)
+		return -EFAULT;
+
+	return 0;
+}
+
 static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
 	[I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
 	[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
@@ -112,6 +174,8 @@  int i915_query_info_ioctl(struct drm_device *dev, void *data,
 	switch (args->query) {
 	case I915_QUERY_INFO_ENGINE:
 		return query_info_engine(dev_priv, args);
+	case I915_QUERY_INFO_RCS_TOPOLOGY:
+		return query_info_rcs_topology(dev_priv, args);
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c6026616300e..5b6a8995a948 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1569,6 +1569,61 @@  struct drm_i915_engine_info {
 	__u8 rsvd2[6];
 };
 
+/* Query RCS topology.
+ *
+ * drm_i915_query_info.query_params[0] should be set to one of the
+ * I915_RCS_TOPOLOGY_* define.
+ *
+ * drm_i915_gem_query_info.info_ptr will be written to with
+ * drm_i915_rcs_topology_info.
+ */
+#define I915_QUERY_INFO_RCS_TOPOLOGY	1 /* version 1 */
+
+/* Query RCS slice topology
+ *
+ * The meaning of the drm_i915_rcs_topology_info fields is :
+ *
+ * params[0]: number of slices
+ *
+ * data: Each bit indicates whether a slice is available (1) or fused off (0).
+ * Formula to tell if slice X is available :
+ *
+ *         (data[X / 8] >> (X % 8)) & 1
+ */
+#define I915_RCS_TOPOLOGY_SLICE		0 /* version 1 */
+/* Query RCS subslice topology
+ *
+ * The meaning of the drm_i915_rcs_topology_info fields is :
+ *
+ * params[0]: number of slices
+ * params[1]: slice stride
+ *
+ * data: each bit indicates whether a subslice is available (1) or fused off
+ * (0). Formula to tell if slice X subslice Y is available :
+ *
+ *         (data[(X * params[1]) + Y / 8] >> (Y % 8)) & 1
+ */
+#define I915_RCS_TOPOLOGY_SUBSLICE	1 /* version 1 */
+/* Query RCS EU topology
+ *
+ * The meaning of the drm_i915_rcs_topology_info fields is :
+ *
+ * params[0]: number of slices
+ * params[1]: slice stride
+ * params[2]: subslice stride
+ *
+ * data: Each bit indicates whether a subslice is available (1) or fused off
+ * (0). Formula to tell if slice X subslice Y eu Z is available :
+ *
+ *         (data[X * params[1] + Y * params[2] + Z / 8] >> (Z % 8)) & 1
+ */
+#define I915_RCS_TOPOLOGY_EU		2 /* version 1 */
+
+struct drm_i915_rcs_topology_info {
+	__u32 params[6];
+
+	__u8 data[];
+};
 
 struct drm_i915_query_info {
 	/* in/out: Protocol version requested/supported. When set to 0, the