diff mbox series

[v6] drm/i915: Engine discovery query

Message ID 20181004105118.7693-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v6] drm/i915: Engine discovery query | expand

Commit Message

Tvrtko Ursulin Oct. 4, 2018, 10:51 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Engine discovery query allows userspace to enumerate engines, probe their
configuration features, all without needing to maintain the internal PCI
ID based database.

A new query for the generic i915 query ioctl is added named
DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
drm_i915_query_engine_info. The address of latter should be passed to the
kernel in the query.data_ptr field, and should be large enough for the
kernel to fill out all known engines as struct drm_i915_engine_info
elements trailing the query.

As with other queries, setting the item query length to zero allows
userspace to query minimum required buffer size.

Enumerated engines have common type mask which can be used to query all
hardware engines, versus engines userspace can submit to using the execbuf
uAPI.

Engines also have capabilities which are per engine class namespace of
bits describing features not present on all engine instances.

v2:
 * Fixed HEVC assignment.
 * Reorder some fields, rename type to flags, increase width. (Lionel)
 * No need to allocate temporary storage if we do it engine by engine.
   (Lionel)

v3:
 * Describe engine flags and mark mbz fields. (Lionel)
 * HEVC only applies to VCS.

v4:
 * Squash SFC flag into main patch.
 * Tidy some comments.

v5:
 * Add uabi_ prefix to engine capabilities. (Chris Wilson)
 * Report exact size of engine info array. (Chris Wilson)
 * Drop the engine flags. (Joonas Lahtinen)
 * Added some more reserved fields.
 * Move flags after class/instance.

v6:
 * Do not check engine info array was zeroed by userspace but zero the
   unused fields for them instead.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tony Ye <tony.ye@intel.com>
---
 drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
 4 files changed, 118 insertions(+)

Comments

Lionel Landwerlin Oct. 4, 2018, 11:03 a.m. UTC | #1
On 04/10/2018 12:51, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Engine discovery query allows userspace to enumerate engines, probe their
> configuration features, all without needing to maintain the internal PCI
> ID based database.
>
> A new query for the generic i915 query ioctl is added named
> DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
> drm_i915_query_engine_info. The address of latter should be passed to the
> kernel in the query.data_ptr field, and should be large enough for the
> kernel to fill out all known engines as struct drm_i915_engine_info
> elements trailing the query.
>
> As with other queries, setting the item query length to zero allows
> userspace to query minimum required buffer size.
>
> Enumerated engines have common type mask which can be used to query all
> hardware engines, versus engines userspace can submit to using the execbuf
> uAPI.
>
> Engines also have capabilities which are per engine class namespace of
> bits describing features not present on all engine instances.
>
> v2:
>   * Fixed HEVC assignment.
>   * Reorder some fields, rename type to flags, increase width. (Lionel)
>   * No need to allocate temporary storage if we do it engine by engine.
>     (Lionel)
>
> v3:
>   * Describe engine flags and mark mbz fields. (Lionel)
>   * HEVC only applies to VCS.
>
> v4:
>   * Squash SFC flag into main patch.
>   * Tidy some comments.
>
> v5:
>   * Add uabi_ prefix to engine capabilities. (Chris Wilson)
>   * Report exact size of engine info array. (Chris Wilson)
>   * Drop the engine flags. (Joonas Lahtinen)
>   * Added some more reserved fields.
>   * Move flags after class/instance.
>
> v6:
>   * Do not check engine info array was zeroed by userspace but zero the
>     unused fields for them instead.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tony Ye <tony.ye@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>   include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
>   4 files changed, 118 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 5821002cad42..5ac8ef9f5de4 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -84,9 +84,65 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
>   	return total_length;
>   }
>   
> +static int
> +query_engine_info(struct drm_i915_private *i915,
> +		  struct drm_i915_query_item *query_item)
> +{
> +	struct drm_i915_query_engine_info __user *query_ptr =
> +				u64_to_user_ptr(query_item->data_ptr);
> +	struct drm_i915_engine_info __user *info_ptr = &query_ptr->engines[0];
> +	struct drm_i915_query_engine_info query;
> +	struct drm_i915_engine_info info = { };


I would move the info variable down into the second for_each_engine() loop.


> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int len;
> +
> +	if (query_item->flags)
> +		return -EINVAL;
> +
> +	len = 0;
> +	for_each_engine(engine, i915, id)
> +		len++;
> +	len *= sizeof(struct drm_i915_engine_info);
> +	len += sizeof(struct drm_i915_query_engine_info);


Nitpicky, but what about :


len = sizeof(struct drm_i915_query_engine_info);

for_each_engine(engine, i915, id)

        len += sizeof(struct drm_i915_engine_info);


> +
> +	if (!query_item->length)
> +		return len;
> +	else if (query_item->length < len)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&query, query_ptr, sizeof(query)))
> +		return -EFAULT;
> +
> +	if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
> +	    query.rsvd[2])
> +		return -EINVAL;
> +
> +	if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))


Do we want to only verify only the length we're going to write (len)?


> +		return -EFAULT;
> +
> +	for_each_engine(engine, i915, id) {
> +		info.class = engine->uabi_class;
> +		info.instance = engine->instance;
> +		info.capabilities = engine->uabi_capabilities;
> +
> +		if (__copy_to_user(info_ptr, &info, sizeof(info)))
> +			return -EFAULT;
> +
> +		query.num_engines++;
> +		info_ptr++;
> +	}
> +
> +	if (__copy_to_user(query_ptr, &query, sizeof(query)))
> +		return -EFAULT;
> +
> +	return len;
> +}
> +
>   static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   					struct drm_i915_query_item *query_item) = {
>   	query_topology_info,
> +	query_engine_info,
>   };
>   
>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1c6143bdf5a4..134f0cec724c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -298,6 +298,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>   	engine->uabi_id = info->uabi_id;
>   	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>   
> +	if (engine->class == VIDEO_DECODE_CLASS) {
> +		/* HEVC support is present only on vcs0. */
> +		if (INTEL_GEN(dev_priv) >= 8 && info->instance == 0)
> +			engine->uabi_capabilities =
> +				I915_VCS_CLASS_CAPABILITY_HEVC;
> +
> +		/* SFC support is wired only to even VCS instances. */
> +		if (INTEL_GEN(dev_priv) >= 9 && !(info->instance & 1))
> +			engine->uabi_capabilities |=
> +				I915_VCS_CLASS_CAPABILITY_SFC;
> +	}
> +
>   	engine->context_size = __intel_engine_context_size(dev_priv,
>   							   engine->class);
>   	if (WARN_ON(engine->context_size > BIT(20)))
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index f6ec48a75a69..9dc738f1b175 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -370,6 +370,9 @@ struct intel_engine_cs {
>   
>   	u8 class;
>   	u8 instance;
> +
> +	u32 uabi_capabilities;
> +
>   	u32 context_size;
>   	u32 mmio_base;
>   
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 298b2e197744..3b0373fb0a93 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1650,6 +1650,7 @@ struct drm_i915_perf_oa_config {
>   struct drm_i915_query_item {
>   	__u64 query_id;
>   #define DRM_I915_QUERY_TOPOLOGY_INFO    1
> +#define DRM_I915_QUERY_ENGINE_INFO	2
>   
>   	/*
>   	 * When set to zero by userspace, this is filled with the size of the
> @@ -1747,6 +1748,52 @@ struct drm_i915_query_topology_info {
>   	__u8 data[];
>   };
>   
> +/**
> + * struct drm_i915_engine_info
> + *
> + * Describes one engine known to the driver, whether or not it is an user-
> + * accessible or hardware only engine, and what are it's capabilities where
> + * applicable.
> + */
> +struct drm_i915_engine_info {
> +	/** Engine class as in enum drm_i915_gem_engine_class. */
> +	__u16 class;
> +
> +	/** Engine instance number. */
> +	__u16 instance;
> +
> +	/** Reserved field must be cleared to zero. */
> +	__u32 rsvd0;
> +
> +	/** Engine flags. */
> +	__u64 flags;
> +
> +	/** Capabilities of this engine. */
> +	__u64 capabilities;
> +#define I915_VCS_CLASS_CAPABILITY_HEVC	(1 << 0)
> +#define I915_VCS_CLASS_CAPABILITY_SFC	(1 << 1)
> +
> +	/** Reserved fields must be cleared to zero. */
> +	__u64 rsvd1[4];
> +};
> +
> +/**
> + * struct drm_i915_query_engine_info
> + *
> + * Engine info query enumerates all engines known to the driver by filling in
> + * an array of struct drm_i915_engine_info structures.
> + */
> +struct drm_i915_query_engine_info {
> +	/** Number of struct drm_i915_engine_info structs following. */
> +	__u32 num_engines;
> +
> +	/** MBZ */
> +	__u32 rsvd[3];
> +
> +	/** Marker for drm_i915_engine_info structures. */
> +	struct drm_i915_engine_info engines[];
> +};
> +
>   #if defined(__cplusplus)
>   }
>   #endif
Lionel Landwerlin Oct. 4, 2018, 11:10 a.m. UTC | #2
Other than the nitpicking, I can't see anything wrong with this patch.

-
Lionel

On 04/10/2018 13:03, Lionel Landwerlin wrote:
> On 04/10/2018 12:51, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Engine discovery query allows userspace to enumerate engines, probe 
>> their
>> configuration features, all without needing to maintain the internal PCI
>> ID based database.
>>
>> A new query for the generic i915 query ioctl is added named
>> DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
>> drm_i915_query_engine_info. The address of latter should be passed to 
>> the
>> kernel in the query.data_ptr field, and should be large enough for the
>> kernel to fill out all known engines as struct drm_i915_engine_info
>> elements trailing the query.
>>
>> As with other queries, setting the item query length to zero allows
>> userspace to query minimum required buffer size.
>>
>> Enumerated engines have common type mask which can be used to query all
>> hardware engines, versus engines userspace can submit to using the 
>> execbuf
>> uAPI.
>>
>> Engines also have capabilities which are per engine class namespace of
>> bits describing features not present on all engine instances.
>>
>> v2:
>>   * Fixed HEVC assignment.
>>   * Reorder some fields, rename type to flags, increase width. (Lionel)
>>   * No need to allocate temporary storage if we do it engine by engine.
>>     (Lionel)
>>
>> v3:
>>   * Describe engine flags and mark mbz fields. (Lionel)
>>   * HEVC only applies to VCS.
>>
>> v4:
>>   * Squash SFC flag into main patch.
>>   * Tidy some comments.
>>
>> v5:
>>   * Add uabi_ prefix to engine capabilities. (Chris Wilson)
>>   * Report exact size of engine info array. (Chris Wilson)
>>   * Drop the engine flags. (Joonas Lahtinen)
>>   * Added some more reserved fields.
>>   * Move flags after class/instance.
>>
>> v6:
>>   * Do not check engine info array was zeroed by userspace but zero the
>>     unused fields for them instead.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Tony Ye <tony.ye@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>>   include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
>>   4 files changed, 118 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>> b/drivers/gpu/drm/i915/i915_query.c
>> index 5821002cad42..5ac8ef9f5de4 100644
>> --- a/drivers/gpu/drm/i915/i915_query.c
>> +++ b/drivers/gpu/drm/i915/i915_query.c
>> @@ -84,9 +84,65 @@ static int query_topology_info(struct 
>> drm_i915_private *dev_priv,
>>       return total_length;
>>   }
>>   +static int
>> +query_engine_info(struct drm_i915_private *i915,
>> +          struct drm_i915_query_item *query_item)
>> +{
>> +    struct drm_i915_query_engine_info __user *query_ptr =
>> +                u64_to_user_ptr(query_item->data_ptr);
>> +    struct drm_i915_engine_info __user *info_ptr = 
>> &query_ptr->engines[0];
>> +    struct drm_i915_query_engine_info query;
>> +    struct drm_i915_engine_info info = { };
>
>
> I would move the info variable down into the second for_each_engine() 
> loop.
>
>
>> +    struct intel_engine_cs *engine;
>> +    enum intel_engine_id id;
>> +    int len;
>> +
>> +    if (query_item->flags)
>> +        return -EINVAL;
>> +
>> +    len = 0;
>> +    for_each_engine(engine, i915, id)
>> +        len++;
>> +    len *= sizeof(struct drm_i915_engine_info);
>> +    len += sizeof(struct drm_i915_query_engine_info);
>
>
> Nitpicky, but what about :
>
>
> len = sizeof(struct drm_i915_query_engine_info);
>
> for_each_engine(engine, i915, id)
>
>        len += sizeof(struct drm_i915_engine_info);
>
>
>> +
>> +    if (!query_item->length)
>> +        return len;
>> +    else if (query_item->length < len)
>> +        return -EINVAL;
>> +
>> +    if (copy_from_user(&query, query_ptr, sizeof(query)))
>> +        return -EFAULT;
>> +
>> +    if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
>> +        query.rsvd[2])
>> +        return -EINVAL;
>> +
>> +    if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))
>
>
> Do we want to only verify only the length we're going to write (len)?
>
>
>> +        return -EFAULT;
>> +
>> +    for_each_engine(engine, i915, id) {
>> +        info.class = engine->uabi_class;
>> +        info.instance = engine->instance;
>> +        info.capabilities = engine->uabi_capabilities;
>> +
>> +        if (__copy_to_user(info_ptr, &info, sizeof(info)))
>> +            return -EFAULT;
>> +
>> +        query.num_engines++;
>> +        info_ptr++;
>> +    }
>> +
>> +    if (__copy_to_user(query_ptr, &query, sizeof(query)))
>> +        return -EFAULT;
>> +
>> +    return len;
>> +}
>> +
>>   static int (* const i915_query_funcs[])(struct drm_i915_private 
>> *dev_priv,
>>                       struct drm_i915_query_item *query_item) = {
>>       query_topology_info,
>> +    query_engine_info,
>>   };
>>     int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>> drm_file *file)
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 1c6143bdf5a4..134f0cec724c 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -298,6 +298,18 @@ intel_engine_setup(struct drm_i915_private 
>> *dev_priv,
>>       engine->uabi_id = info->uabi_id;
>>       engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>>   +    if (engine->class == VIDEO_DECODE_CLASS) {
>> +        /* HEVC support is present only on vcs0. */
>> +        if (INTEL_GEN(dev_priv) >= 8 && info->instance == 0)
>> +            engine->uabi_capabilities =
>> +                I915_VCS_CLASS_CAPABILITY_HEVC;
>> +
>> +        /* SFC support is wired only to even VCS instances. */
>> +        if (INTEL_GEN(dev_priv) >= 9 && !(info->instance & 1))
>> +            engine->uabi_capabilities |=
>> +                I915_VCS_CLASS_CAPABILITY_SFC;
>> +    }
>> +
>>       engine->context_size = __intel_engine_context_size(dev_priv,
>>                                  engine->class);
>>       if (WARN_ON(engine->context_size > BIT(20)))
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index f6ec48a75a69..9dc738f1b175 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -370,6 +370,9 @@ struct intel_engine_cs {
>>         u8 class;
>>       u8 instance;
>> +
>> +    u32 uabi_capabilities;
>> +
>>       u32 context_size;
>>       u32 mmio_base;
>>   diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 298b2e197744..3b0373fb0a93 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1650,6 +1650,7 @@ struct drm_i915_perf_oa_config {
>>   struct drm_i915_query_item {
>>       __u64 query_id;
>>   #define DRM_I915_QUERY_TOPOLOGY_INFO    1
>> +#define DRM_I915_QUERY_ENGINE_INFO    2
>>         /*
>>        * When set to zero by userspace, this is filled with the size 
>> of the
>> @@ -1747,6 +1748,52 @@ struct drm_i915_query_topology_info {
>>       __u8 data[];
>>   };
>>   +/**
>> + * struct drm_i915_engine_info
>> + *
>> + * Describes one engine known to the driver, whether or not it is an 
>> user-
>> + * accessible or hardware only engine, and what are it's 
>> capabilities where
>> + * applicable.
>> + */
>> +struct drm_i915_engine_info {
>> +    /** Engine class as in enum drm_i915_gem_engine_class. */
>> +    __u16 class;
>> +
>> +    /** Engine instance number. */
>> +    __u16 instance;
>> +
>> +    /** Reserved field must be cleared to zero. */
>> +    __u32 rsvd0;
>> +
>> +    /** Engine flags. */
>> +    __u64 flags;
>> +
>> +    /** Capabilities of this engine. */
>> +    __u64 capabilities;
>> +#define I915_VCS_CLASS_CAPABILITY_HEVC    (1 << 0)
>> +#define I915_VCS_CLASS_CAPABILITY_SFC    (1 << 1)
>> +
>> +    /** Reserved fields must be cleared to zero. */
>> +    __u64 rsvd1[4];
>> +};
>> +
>> +/**
>> + * struct drm_i915_query_engine_info
>> + *
>> + * Engine info query enumerates all engines known to the driver by 
>> filling in
>> + * an array of struct drm_i915_engine_info structures.
>> + */
>> +struct drm_i915_query_engine_info {
>> +    /** Number of struct drm_i915_engine_info structs following. */
>> +    __u32 num_engines;
>> +
>> +    /** MBZ */
>> +    __u32 rsvd[3];
>> +
>> +    /** Marker for drm_i915_engine_info structures. */
>> +    struct drm_i915_engine_info engines[];
>> +};
>> +
>>   #if defined(__cplusplus)
>>   }
>>   #endif
>
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin Oct. 4, 2018, 11:14 a.m. UTC | #3
On 04/10/2018 12:03, Lionel Landwerlin wrote:
> On 04/10/2018 12:51, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Engine discovery query allows userspace to enumerate engines, probe their
>> configuration features, all without needing to maintain the internal PCI
>> ID based database.
>>
>> A new query for the generic i915 query ioctl is added named
>> DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
>> drm_i915_query_engine_info. The address of latter should be passed to the
>> kernel in the query.data_ptr field, and should be large enough for the
>> kernel to fill out all known engines as struct drm_i915_engine_info
>> elements trailing the query.
>>
>> As with other queries, setting the item query length to zero allows
>> userspace to query minimum required buffer size.
>>
>> Enumerated engines have common type mask which can be used to query all
>> hardware engines, versus engines userspace can submit to using the 
>> execbuf
>> uAPI.
>>
>> Engines also have capabilities which are per engine class namespace of
>> bits describing features not present on all engine instances.
>>
>> v2:
>>   * Fixed HEVC assignment.
>>   * Reorder some fields, rename type to flags, increase width. (Lionel)
>>   * No need to allocate temporary storage if we do it engine by engine.
>>     (Lionel)
>>
>> v3:
>>   * Describe engine flags and mark mbz fields. (Lionel)
>>   * HEVC only applies to VCS.
>>
>> v4:
>>   * Squash SFC flag into main patch.
>>   * Tidy some comments.
>>
>> v5:
>>   * Add uabi_ prefix to engine capabilities. (Chris Wilson)
>>   * Report exact size of engine info array. (Chris Wilson)
>>   * Drop the engine flags. (Joonas Lahtinen)
>>   * Added some more reserved fields.
>>   * Move flags after class/instance.
>>
>> v6:
>>   * Do not check engine info array was zeroed by userspace but zero the
>>     unused fields for them instead.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Tony Ye <tony.ye@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>>   include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
>>   4 files changed, 118 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>> b/drivers/gpu/drm/i915/i915_query.c
>> index 5821002cad42..5ac8ef9f5de4 100644
>> --- a/drivers/gpu/drm/i915/i915_query.c
>> +++ b/drivers/gpu/drm/i915/i915_query.c
>> @@ -84,9 +84,65 @@ static int query_topology_info(struct 
>> drm_i915_private *dev_priv,
>>       return total_length;
>>   }
>> +static int
>> +query_engine_info(struct drm_i915_private *i915,
>> +          struct drm_i915_query_item *query_item)
>> +{
>> +    struct drm_i915_query_engine_info __user *query_ptr =
>> +                u64_to_user_ptr(query_item->data_ptr);
>> +    struct drm_i915_engine_info __user *info_ptr = 
>> &query_ptr->engines[0];
>> +    struct drm_i915_query_engine_info query;
>> +    struct drm_i915_engine_info info = { };
> 
> 
> I would move the info variable down into the second for_each_engine() loop.

Hey this saves some CPU cycles! :))

> 
> 
>> +    struct intel_engine_cs *engine;
>> +    enum intel_engine_id id;
>> +    int len;
>> +
>> +    if (query_item->flags)
>> +        return -EINVAL;
>> +
>> +    len = 0;
>> +    for_each_engine(engine, i915, id)
>> +        len++;
>> +    len *= sizeof(struct drm_i915_engine_info);
>> +    len += sizeof(struct drm_i915_query_engine_info);
> 
> 
> Nitpicky, but what about :
> 
> 
> len = sizeof(struct drm_i915_query_engine_info);
> 
> for_each_engine(engine, i915, id)
> 
>         len += sizeof(struct drm_i915_engine_info);

Saves a multiply, makes sense, thanks!

> 
>> +
>> +    if (!query_item->length)
>> +        return len;
>> +    else if (query_item->length < len)
>> +        return -EINVAL;
>> +
>> +    if (copy_from_user(&query, query_ptr, sizeof(query)))
>> +        return -EFAULT;
>> +
>> +    if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
>> +        query.rsvd[2])
>> +        return -EINVAL;
>> +
>> +    if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))
> 
> 
> Do we want to only verify only the length we're going to write (len)?

I thought to check all userspace claims it provided, even though extra 
robustness feels like an only hand-wavy benefit.

Regards,

Tvrtko

> 
> 
>> +        return -EFAULT;
>> +
>> +    for_each_engine(engine, i915, id) {
>> +        info.class = engine->uabi_class;
>> +        info.instance = engine->instance;
>> +        info.capabilities = engine->uabi_capabilities;
>> +
>> +        if (__copy_to_user(info_ptr, &info, sizeof(info)))
>> +            return -EFAULT;
>> +
>> +        query.num_engines++;
>> +        info_ptr++;
>> +    }
>> +
>> +    if (__copy_to_user(query_ptr, &query, sizeof(query)))
>> +        return -EFAULT;
>> +
>> +    return len;
>> +}
>> +
>>   static int (* const i915_query_funcs[])(struct drm_i915_private 
>> *dev_priv,
>>                       struct drm_i915_query_item *query_item) = {
>>       query_topology_info,
>> +    query_engine_info,
>>   };
>>   int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>> drm_file *file)
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 1c6143bdf5a4..134f0cec724c 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -298,6 +298,18 @@ intel_engine_setup(struct drm_i915_private 
>> *dev_priv,
>>       engine->uabi_id = info->uabi_id;
>>       engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>> +    if (engine->class == VIDEO_DECODE_CLASS) {
>> +        /* HEVC support is present only on vcs0. */
>> +        if (INTEL_GEN(dev_priv) >= 8 && info->instance == 0)
>> +            engine->uabi_capabilities =
>> +                I915_VCS_CLASS_CAPABILITY_HEVC;
>> +
>> +        /* SFC support is wired only to even VCS instances. */
>> +        if (INTEL_GEN(dev_priv) >= 9 && !(info->instance & 1))
>> +            engine->uabi_capabilities |=
>> +                I915_VCS_CLASS_CAPABILITY_SFC;
>> +    }
>> +
>>       engine->context_size = __intel_engine_context_size(dev_priv,
>>                                  engine->class);
>>       if (WARN_ON(engine->context_size > BIT(20)))
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index f6ec48a75a69..9dc738f1b175 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -370,6 +370,9 @@ struct intel_engine_cs {
>>       u8 class;
>>       u8 instance;
>> +
>> +    u32 uabi_capabilities;
>> +
>>       u32 context_size;
>>       u32 mmio_base;
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 298b2e197744..3b0373fb0a93 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1650,6 +1650,7 @@ struct drm_i915_perf_oa_config {
>>   struct drm_i915_query_item {
>>       __u64 query_id;
>>   #define DRM_I915_QUERY_TOPOLOGY_INFO    1
>> +#define DRM_I915_QUERY_ENGINE_INFO    2
>>       /*
>>        * When set to zero by userspace, this is filled with the size 
>> of the
>> @@ -1747,6 +1748,52 @@ struct drm_i915_query_topology_info {
>>       __u8 data[];
>>   };
>> +/**
>> + * struct drm_i915_engine_info
>> + *
>> + * Describes one engine known to the driver, whether or not it is an 
>> user-
>> + * accessible or hardware only engine, and what are it's capabilities 
>> where
>> + * applicable.
>> + */
>> +struct drm_i915_engine_info {
>> +    /** Engine class as in enum drm_i915_gem_engine_class. */
>> +    __u16 class;
>> +
>> +    /** Engine instance number. */
>> +    __u16 instance;
>> +
>> +    /** Reserved field must be cleared to zero. */
>> +    __u32 rsvd0;
>> +
>> +    /** Engine flags. */
>> +    __u64 flags;
>> +
>> +    /** Capabilities of this engine. */
>> +    __u64 capabilities;
>> +#define I915_VCS_CLASS_CAPABILITY_HEVC    (1 << 0)
>> +#define I915_VCS_CLASS_CAPABILITY_SFC    (1 << 1)
>> +
>> +    /** Reserved fields must be cleared to zero. */
>> +    __u64 rsvd1[4];
>> +};
>> +
>> +/**
>> + * struct drm_i915_query_engine_info
>> + *
>> + * Engine info query enumerates all engines known to the driver by 
>> filling in
>> + * an array of struct drm_i915_engine_info structures.
>> + */
>> +struct drm_i915_query_engine_info {
>> +    /** Number of struct drm_i915_engine_info structs following. */
>> +    __u32 num_engines;
>> +
>> +    /** MBZ */
>> +    __u32 rsvd[3];
>> +
>> +    /** Marker for drm_i915_engine_info structures. */
>> +    struct drm_i915_engine_info engines[];
>> +};
>> +
>>   #if defined(__cplusplus)
>>   }
>>   #endif
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 4, 2018, 4:33 p.m. UTC | #4
Quoting Tvrtko Ursulin (2018-10-04 11:51:18)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Engine discovery query allows userspace to enumerate engines, probe their
> configuration features, all without needing to maintain the internal PCI
> ID based database.
> 
> A new query for the generic i915 query ioctl is added named
> DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
> drm_i915_query_engine_info. The address of latter should be passed to the
> kernel in the query.data_ptr field, and should be large enough for the
> kernel to fill out all known engines as struct drm_i915_engine_info
> elements trailing the query.
> 
> As with other queries, setting the item query length to zero allows
> userspace to query minimum required buffer size.
> 
> Enumerated engines have common type mask which can be used to query all
> hardware engines, versus engines userspace can submit to using the execbuf
> uAPI.
> 
> Engines also have capabilities which are per engine class namespace of
> bits describing features not present on all engine instances.
> 
> v2:
>  * Fixed HEVC assignment.
>  * Reorder some fields, rename type to flags, increase width. (Lionel)
>  * No need to allocate temporary storage if we do it engine by engine.
>    (Lionel)
> 
> v3:
>  * Describe engine flags and mark mbz fields. (Lionel)
>  * HEVC only applies to VCS.
> 
> v4:
>  * Squash SFC flag into main patch.
>  * Tidy some comments.
> 
> v5:
>  * Add uabi_ prefix to engine capabilities. (Chris Wilson)
>  * Report exact size of engine info array. (Chris Wilson)
>  * Drop the engine flags. (Joonas Lahtinen)
>  * Added some more reserved fields.
>  * Move flags after class/instance.
> 
> v6:
>  * Do not check engine info array was zeroed by userspace but zero the
>    unused fields for them instead.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tony Ye <tony.ye@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>  include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
>  4 files changed, 118 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 5821002cad42..5ac8ef9f5de4 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -84,9 +84,65 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
>         return total_length;
>  }
>  
> +static int
> +query_engine_info(struct drm_i915_private *i915,
> +                 struct drm_i915_query_item *query_item)
> +{
> +       struct drm_i915_query_engine_info __user *query_ptr =
> +                               u64_to_user_ptr(query_item->data_ptr);
> +       struct drm_i915_engine_info __user *info_ptr = &query_ptr->engines[0];
> +       struct drm_i915_query_engine_info query;
> +       struct drm_i915_engine_info info = { };
> +       struct intel_engine_cs *engine;
> +       enum intel_engine_id id;
> +       int len;
> +
> +       if (query_item->flags)
> +               return -EINVAL;
> +
> +       len = 0;
> +       for_each_engine(engine, i915, id)
> +               len++;

(Isn't this INTEL_INFO()->num_rings?)

> +       len *= sizeof(struct drm_i915_engine_info);
> +       len += sizeof(struct drm_i915_query_engine_info);
> +
> +       if (!query_item->length)
> +               return len;
> +       else if (query_item->length < len)
> +               return -EINVAL;
> +
> +       if (copy_from_user(&query, query_ptr, sizeof(query)))
> +               return -EFAULT;
> +
> +       if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
> +           query.rsvd[2])
> +               return -EINVAL;
> +
> +       if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))
> +               return -EFAULT;

Hmm, so length is the sizeof of the whole struct and not the space in
the pointed at block? Ok. I was just a little confused by the lack of
checking for info_ptr, as by this point the connection with query_ptr is
off the page.

May I beg
	info_ptr = &query_ptr->engines[0];
here. That should make it more obvious for feeble readers like myself to
see that info_ptr is contained within the access_ok check.

> +       for_each_engine(engine, i915, id) {
> +               info.class = engine->uabi_class;
> +               info.instance = engine->instance;
> +               info.capabilities = engine->uabi_capabilities;
> +

GEM_BUG_ON((void *)info_ptr > (void *)query_ptr + query_item->length - sizeof(info));

> +               if (__copy_to_user(info_ptr, &info, sizeof(info)))
> +                       return -EFAULT;
> +
> +               query.num_engines++;
> +               info_ptr++;
> +       }
> +
> +       if (__copy_to_user(query_ptr, &query, sizeof(query)))
> +               return -EFAULT;
> +
> +       return len;
> +}
> +
>  static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>                                         struct drm_i915_query_item *query_item) = {
>         query_topology_info,
> +       query_engine_info,
>  };
>  
>  int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1c6143bdf5a4..134f0cec724c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -298,6 +298,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>         engine->uabi_id = info->uabi_id;
>         engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>  
> +       if (engine->class == VIDEO_DECODE_CLASS) {
> +               /* HEVC support is present only on vcs0. */
> +               if (INTEL_GEN(dev_priv) >= 8 && info->instance == 0)
> +                       engine->uabi_capabilities =
> +                               I915_VCS_CLASS_CAPABILITY_HEVC;
> +
> +               /* SFC support is wired only to even VCS instances. */
> +               if (INTEL_GEN(dev_priv) >= 9 && !(info->instance & 1))
> +                       engine->uabi_capabilities |=
> +                               I915_VCS_CLASS_CAPABILITY_SFC;

I trust your judgement here.

> +/**
> + * struct drm_i915_query_engine_info
> + *
> + * Engine info query enumerates all engines known to the driver by filling in
> + * an array of struct drm_i915_engine_info structures.
> + */
> +struct drm_i915_query_engine_info {
> +       /** Number of struct drm_i915_engine_info structs following. */
> +       __u32 num_engines;
> +
> +       /** MBZ */
> +       __u32 rsvd[3];
> +
> +       /** Marker for drm_i915_engine_info structures. */
> +       struct drm_i915_engine_info engines[];

Do we need [0] for old-c/c++ compatibility?

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

I'd value an ack from Lionel in case I've forgotten a quirk about the
query api, and an ack from a user that this meets their needs. It will
certainly simplify some of our igt code!

Question for overall design, do we want a conjoint capability flag
space, or one per class? One per class gives us more room, so I think
should be preferred, but I wonder if a set of common flags would make
it easier for userspace to filter. (Though not hard to match on both
class and caps!)
-Chris
Tvrtko Ursulin Oct. 5, 2018, 8:26 a.m. UTC | #5
On 04/10/2018 17:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-10-04 11:51:18)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Engine discovery query allows userspace to enumerate engines, probe their
>> configuration features, all without needing to maintain the internal PCI
>> ID based database.
>>
>> A new query for the generic i915 query ioctl is added named
>> DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
>> drm_i915_query_engine_info. The address of latter should be passed to the
>> kernel in the query.data_ptr field, and should be large enough for the
>> kernel to fill out all known engines as struct drm_i915_engine_info
>> elements trailing the query.
>>
>> As with other queries, setting the item query length to zero allows
>> userspace to query minimum required buffer size.
>>
>> Enumerated engines have common type mask which can be used to query all
>> hardware engines, versus engines userspace can submit to using the execbuf
>> uAPI.
>>
>> Engines also have capabilities which are per engine class namespace of
>> bits describing features not present on all engine instances.
>>
>> v2:
>>   * Fixed HEVC assignment.
>>   * Reorder some fields, rename type to flags, increase width. (Lionel)
>>   * No need to allocate temporary storage if we do it engine by engine.
>>     (Lionel)
>>
>> v3:
>>   * Describe engine flags and mark mbz fields. (Lionel)
>>   * HEVC only applies to VCS.
>>
>> v4:
>>   * Squash SFC flag into main patch.
>>   * Tidy some comments.
>>
>> v5:
>>   * Add uabi_ prefix to engine capabilities. (Chris Wilson)
>>   * Report exact size of engine info array. (Chris Wilson)
>>   * Drop the engine flags. (Joonas Lahtinen)
>>   * Added some more reserved fields.
>>   * Move flags after class/instance.
>>
>> v6:
>>   * Do not check engine info array was zeroed by userspace but zero the
>>     unused fields for them instead.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Tony Ye <tony.ye@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>>   include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
>>   4 files changed, 118 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
>> index 5821002cad42..5ac8ef9f5de4 100644
>> --- a/drivers/gpu/drm/i915/i915_query.c
>> +++ b/drivers/gpu/drm/i915/i915_query.c
>> @@ -84,9 +84,65 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
>>          return total_length;
>>   }
>>   
>> +static int
>> +query_engine_info(struct drm_i915_private *i915,
>> +                 struct drm_i915_query_item *query_item)
>> +{
>> +       struct drm_i915_query_engine_info __user *query_ptr =
>> +                               u64_to_user_ptr(query_item->data_ptr);
>> +       struct drm_i915_engine_info __user *info_ptr = &query_ptr->engines[0];
>> +       struct drm_i915_query_engine_info query;
>> +       struct drm_i915_engine_info info = { };
>> +       struct intel_engine_cs *engine;
>> +       enum intel_engine_id id;
>> +       int len;
>> +
>> +       if (query_item->flags)
>> +               return -EINVAL;
>> +
>> +       len = 0;
>> +       for_each_engine(engine, i915, id)
>> +               len++;
> 
> (Isn't this INTEL_INFO()->num_rings?)

/o\

>> +       len *= sizeof(struct drm_i915_engine_info);
>> +       len += sizeof(struct drm_i915_query_engine_info);
>> +
>> +       if (!query_item->length)
>> +               return len;
>> +       else if (query_item->length < len)
>> +               return -EINVAL;
>> +
>> +       if (copy_from_user(&query, query_ptr, sizeof(query)))
>> +               return -EFAULT;
>> +
>> +       if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
>> +           query.rsvd[2])
>> +               return -EINVAL;
>> +
>> +       if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))
>> +               return -EFAULT;
> 
> Hmm, so length is the sizeof of the whole struct and not the space in
> the pointed at block? Ok. I was just a little confused by the lack of
> checking for info_ptr, as by this point the connection with query_ptr is
> off the page.
> 
> May I beg
> 	info_ptr = &query_ptr->engines[0];
> here. That should make it more obvious for feeble readers like myself to
> see that info_ptr is contained within the access_ok check.

Ok.

> 
>> +       for_each_engine(engine, i915, id) {
>> +               info.class = engine->uabi_class;
>> +               info.instance = engine->instance;
>> +               info.capabilities = engine->uabi_capabilities;
>> +
> 
> GEM_BUG_ON((void *)info_ptr > (void *)query_ptr + query_item->length - sizeof(info));

There is a check above that len fits in query_item->length. So unless 
the code distrusts itself in a space of a few lines, or we distrust 
INTEL_INFO->num_engines vs for_each_engine I don't see it is needed?

> 
>> +               if (__copy_to_user(info_ptr, &info, sizeof(info)))
>> +                       return -EFAULT;
>> +
>> +               query.num_engines++;
>> +               info_ptr++;
>> +       }
>> +
>> +       if (__copy_to_user(query_ptr, &query, sizeof(query)))
>> +               return -EFAULT;
>> +
>> +       return len;
>> +}
>> +
>>   static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>>                                          struct drm_i915_query_item *query_item) = {
>>          query_topology_info,
>> +       query_engine_info,
>>   };
>>   
>>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 1c6143bdf5a4..134f0cec724c 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -298,6 +298,18 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
>>          engine->uabi_id = info->uabi_id;
>>          engine->uabi_class = intel_engine_classes[info->class].uabi_class;
>>   
>> +       if (engine->class == VIDEO_DECODE_CLASS) {
>> +               /* HEVC support is present only on vcs0. */
>> +               if (INTEL_GEN(dev_priv) >= 8 && info->instance == 0)
>> +                       engine->uabi_capabilities =
>> +                               I915_VCS_CLASS_CAPABILITY_HEVC;
>> +
>> +               /* SFC support is wired only to even VCS instances. */
>> +               if (INTEL_GEN(dev_priv) >= 9 && !(info->instance & 1))
>> +                       engine->uabi_capabilities |=
>> +                               I915_VCS_CLASS_CAPABILITY_SFC;
> 
> I trust your judgement here.

And I trust pending feedback from media team. :)

> 
>> +/**
>> + * struct drm_i915_query_engine_info
>> + *
>> + * Engine info query enumerates all engines known to the driver by filling in
>> + * an array of struct drm_i915_engine_info structures.
>> + */
>> +struct drm_i915_query_engine_info {
>> +       /** Number of struct drm_i915_engine_info structs following. */
>> +       __u32 num_engines;
>> +
>> +       /** MBZ */
>> +       __u32 rsvd[3];
>> +
>> +       /** Marker for drm_i915_engine_info structures. */
>> +       struct drm_i915_engine_info engines[];
> 
> Do we need [0] for old-c/c++ compatibility?

Hm.. trying to remember. It is GNU C vs C99 for zero sized array vs 
flexible array member. We went for the latter in the topology query 
after some discussion but I don't remember the details now. Grepping the 
uapi headers there are both so don't know.. GCC documentation says 
flexible members are preferred.

> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I'd value an ack from Lionel in case I've forgotten a quirk about the
> query api, and an ack from a user that this meets their needs. It will
> certainly simplify some of our igt code!

Have r-b from Lionel already.

> Question for overall design, do we want a conjoint capability flag
> space, or one per class? One per class gives us more room, so I think
> should be preferred, but I wonder if a set of common flags would make
> it easier for userspace to filter. (Though not hard to match on both
> class and caps!)

Could split common and per class, 32-bits each with separate fields?

	__u32 capabilities;
	__u32 class_capabilities;

Or keep __u64 capabilities and say common start from bit0 and per class 
start at bit32? this option could be added/defined post factum as well, 
with no required changes now - once there is a first common cap. As long 
as there is a nice block of free bits at that point.

Regards,

Tvrtko
Chris Wilson Oct. 5, 2018, 10:35 a.m. UTC | #6
Quoting Tvrtko Ursulin (2018-10-05 09:26:37)
> 
> On 04/10/2018 17:33, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-10-04 11:51:18)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Engine discovery query allows userspace to enumerate engines, probe their
> >> configuration features, all without needing to maintain the internal PCI
> >> ID based database.
> >>
> >> A new query for the generic i915 query ioctl is added named
> >> DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure
> >> drm_i915_query_engine_info. The address of latter should be passed to the
> >> kernel in the query.data_ptr field, and should be large enough for the
> >> kernel to fill out all known engines as struct drm_i915_engine_info
> >> elements trailing the query.
> >>
> >> As with other queries, setting the item query length to zero allows
> >> userspace to query minimum required buffer size.
> >>
> >> Enumerated engines have common type mask which can be used to query all
> >> hardware engines, versus engines userspace can submit to using the execbuf
> >> uAPI.
> >>
> >> Engines also have capabilities which are per engine class namespace of
> >> bits describing features not present on all engine instances.
> >>
> >> v2:
> >>   * Fixed HEVC assignment.
> >>   * Reorder some fields, rename type to flags, increase width. (Lionel)
> >>   * No need to allocate temporary storage if we do it engine by engine.
> >>     (Lionel)
> >>
> >> v3:
> >>   * Describe engine flags and mark mbz fields. (Lionel)
> >>   * HEVC only applies to VCS.
> >>
> >> v4:
> >>   * Squash SFC flag into main patch.
> >>   * Tidy some comments.
> >>
> >> v5:
> >>   * Add uabi_ prefix to engine capabilities. (Chris Wilson)
> >>   * Report exact size of engine info array. (Chris Wilson)
> >>   * Drop the engine flags. (Joonas Lahtinen)
> >>   * Added some more reserved fields.
> >>   * Move flags after class/instance.
> >>
> >> v6:
> >>   * Do not check engine info array was zeroed by userspace but zero the
> >>     unused fields for them instead.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> >> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> Cc: Tony Ye <tony.ye@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_query.c       | 56 +++++++++++++++++++++++++
> >>   drivers/gpu/drm/i915/intel_engine_cs.c  | 12 ++++++
> >>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
> >>   include/uapi/drm/i915_drm.h             | 47 +++++++++++++++++++++
> >>   4 files changed, 118 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> >> index 5821002cad42..5ac8ef9f5de4 100644
> >> --- a/drivers/gpu/drm/i915/i915_query.c
> >> +++ b/drivers/gpu/drm/i915/i915_query.c
> >> @@ -84,9 +84,65 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
> >>          return total_length;
> >>   }
> >>   
> >> +static int
> >> +query_engine_info(struct drm_i915_private *i915,
> >> +                 struct drm_i915_query_item *query_item)
> >> +{
> >> +       struct drm_i915_query_engine_info __user *query_ptr =
> >> +                               u64_to_user_ptr(query_item->data_ptr);
> >> +       struct drm_i915_engine_info __user *info_ptr = &query_ptr->engines[0];
> >> +       struct drm_i915_query_engine_info query;
> >> +       struct drm_i915_engine_info info = { };
> >> +       struct intel_engine_cs *engine;
> >> +       enum intel_engine_id id;
> >> +       int len;
> >> +
> >> +       if (query_item->flags)
> >> +               return -EINVAL;
> >> +
> >> +       len = 0;
> >> +       for_each_engine(engine, i915, id)
> >> +               len++;
> > 
> > (Isn't this INTEL_INFO()->num_rings?)
> 
> /o\
> 
> >> +       len *= sizeof(struct drm_i915_engine_info);
> >> +       len += sizeof(struct drm_i915_query_engine_info);
> >> +
> >> +       if (!query_item->length)
> >> +               return len;
> >> +       else if (query_item->length < len)
> >> +               return -EINVAL;
> >> +
> >> +       if (copy_from_user(&query, query_ptr, sizeof(query)))
> >> +               return -EFAULT;
> >> +
> >> +       if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
> >> +           query.rsvd[2])
> >> +               return -EINVAL;
> >> +
> >> +       if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))
> >> +               return -EFAULT;
> > 
> > Hmm, so length is the sizeof of the whole struct and not the space in
> > the pointed at block? Ok. I was just a little confused by the lack of
> > checking for info_ptr, as by this point the connection with query_ptr is
> > off the page.
> > 
> > May I beg
> >       info_ptr = &query_ptr->engines[0];
> > here. That should make it more obvious for feeble readers like myself to
> > see that info_ptr is contained within the access_ok check.
> 
> Ok.
> 
> > 
> >> +       for_each_engine(engine, i915, id) {
> >> +               info.class = engine->uabi_class;
> >> +               info.instance = engine->instance;
> >> +               info.capabilities = engine->uabi_capabilities;
> >> +
> > 
> > GEM_BUG_ON((void *)info_ptr > (void *)query_ptr + query_item->length - sizeof(info));
> 
> There is a check above that len fits in query_item->length. So unless 
> the code distrusts itself in a space of a few lines, or we distrust 
> INTEL_INFO->num_engines vs for_each_engine I don't see it is needed?

My only point was to keep tying info_ptr back to the early checks on
query_ptr. From the standpoint of looking at this loop copying into
userspace, I wanted to reassure myself that everything was checked and
cross-checked.

> >> +/**
> >> + * struct drm_i915_query_engine_info
> >> + *
> >> + * Engine info query enumerates all engines known to the driver by filling in
> >> + * an array of struct drm_i915_engine_info structures.
> >> + */
> >> +struct drm_i915_query_engine_info {
> >> +       /** Number of struct drm_i915_engine_info structs following. */
> >> +       __u32 num_engines;
> >> +
> >> +       /** MBZ */
> >> +       __u32 rsvd[3];
> >> +
> >> +       /** Marker for drm_i915_engine_info structures. */
> >> +       struct drm_i915_engine_info engines[];
> > 
> > Do we need [0] for old-c/c++ compatibility?
> 
> Hm.. trying to remember. It is GNU C vs C99 for zero sized array vs 
> flexible array member. We went for the latter in the topology query 
> after some discussion but I don't remember the details now. Grepping the 
> uapi headers there are both so don't know.. GCC documentation says 
> flexible members are preferred.
> 
> > 
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > I'd value an ack from Lionel in case I've forgotten a quirk about the
> > query api, and an ack from a user that this meets their needs. It will
> > certainly simplify some of our igt code!
> 
> Have r-b from Lionel already.
> 
> > Question for overall design, do we want a conjoint capability flag
> > space, or one per class? One per class gives us more room, so I think
> > should be preferred, but I wonder if a set of common flags would make
> > it easier for userspace to filter. (Though not hard to match on both
> > class and caps!)
> 
> Could split common and per class, 32-bits each with separate fields?
> 
>         __u32 capabilities;
>         __u32 class_capabilities;
> 
> Or keep __u64 capabilities and say common start from bit0 and per class 
> start at bit32? this option could be added/defined post factum as well, 
> with no required changes now - once there is a first common cap. As long 
> as there is a nice block of free bits at that point.

I like the suggestion to carve out some bits ahead of time for common
caps. Then I just worry if 32 is enough :)

Hmm, how about a challenge of describing which engines are compatible
for v.eng? Trial-and-error strikes me as being the simplest discovery
method still.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 5821002cad42..5ac8ef9f5de4 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -84,9 +84,65 @@  static int query_topology_info(struct drm_i915_private *dev_priv,
 	return total_length;
 }
 
+static int
+query_engine_info(struct drm_i915_private *i915,
+		  struct drm_i915_query_item *query_item)
+{
+	struct drm_i915_query_engine_info __user *query_ptr =
+				u64_to_user_ptr(query_item->data_ptr);
+	struct drm_i915_engine_info __user *info_ptr = &query_ptr->engines[0];
+	struct drm_i915_query_engine_info query;
+	struct drm_i915_engine_info info = { };
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int len;
+
+	if (query_item->flags)
+		return -EINVAL;
+
+	len = 0;
+	for_each_engine(engine, i915, id)
+		len++;
+	len *= sizeof(struct drm_i915_engine_info);
+	len += sizeof(struct drm_i915_query_engine_info);
+
+	if (!query_item->length)
+		return len;
+	else if (query_item->length < len)
+		return -EINVAL;
+
+	if (copy_from_user(&query, query_ptr, sizeof(query)))
+		return -EFAULT;
+
+	if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
+	    query.rsvd[2])
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, query_ptr, query_item->length))
+		return -EFAULT;
+
+	for_each_engine(engine, i915, id) {
+		info.class = engine->uabi_class;
+		info.instance = engine->instance;
+		info.capabilities = engine->uabi_capabilities;
+
+		if (__copy_to_user(info_ptr, &info, sizeof(info)))
+			return -EFAULT;
+
+		query.num_engines++;
+		info_ptr++;
+	}
+
+	if (__copy_to_user(query_ptr, &query, sizeof(query)))
+		return -EFAULT;
+
+	return len;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
 	query_topology_info,
+	query_engine_info,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1c6143bdf5a4..134f0cec724c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -298,6 +298,18 @@  intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->uabi_id = info->uabi_id;
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
+	if (engine->class == VIDEO_DECODE_CLASS) {
+		/* HEVC support is present only on vcs0. */
+		if (INTEL_GEN(dev_priv) >= 8 && info->instance == 0)
+			engine->uabi_capabilities =
+				I915_VCS_CLASS_CAPABILITY_HEVC;
+
+		/* SFC support is wired only to even VCS instances. */
+		if (INTEL_GEN(dev_priv) >= 9 && !(info->instance & 1))
+			engine->uabi_capabilities |=
+				I915_VCS_CLASS_CAPABILITY_SFC;
+	}
+
 	engine->context_size = __intel_engine_context_size(dev_priv,
 							   engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a75a69..9dc738f1b175 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -370,6 +370,9 @@  struct intel_engine_cs {
 
 	u8 class;
 	u8 instance;
+
+	u32 uabi_capabilities;
+
 	u32 context_size;
 	u32 mmio_base;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 298b2e197744..3b0373fb0a93 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1650,6 +1650,7 @@  struct drm_i915_perf_oa_config {
 struct drm_i915_query_item {
 	__u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO    1
+#define DRM_I915_QUERY_ENGINE_INFO	2
 
 	/*
 	 * When set to zero by userspace, this is filled with the size of the
@@ -1747,6 +1748,52 @@  struct drm_i915_query_topology_info {
 	__u8 data[];
 };
 
+/**
+ * struct drm_i915_engine_info
+ *
+ * Describes one engine known to the driver, whether or not it is an user-
+ * accessible or hardware only engine, and what are it's capabilities where
+ * applicable.
+ */
+struct drm_i915_engine_info {
+	/** Engine class as in enum drm_i915_gem_engine_class. */
+	__u16 class;
+
+	/** Engine instance number. */
+	__u16 instance;
+
+	/** Reserved field must be cleared to zero. */
+	__u32 rsvd0;
+
+	/** Engine flags. */
+	__u64 flags;
+
+	/** Capabilities of this engine. */
+	__u64 capabilities;
+#define I915_VCS_CLASS_CAPABILITY_HEVC	(1 << 0)
+#define I915_VCS_CLASS_CAPABILITY_SFC	(1 << 1)
+
+	/** Reserved fields must be cleared to zero. */
+	__u64 rsvd1[4];
+};
+
+/**
+ * struct drm_i915_query_engine_info
+ *
+ * Engine info query enumerates all engines known to the driver by filling in
+ * an array of struct drm_i915_engine_info structures.
+ */
+struct drm_i915_query_engine_info {
+	/** Number of struct drm_i915_engine_info structs following. */
+	__u32 num_engines;
+
+	/** MBZ */
+	__u32 rsvd[3];
+
+	/** Marker for drm_i915_engine_info structures. */
+	struct drm_i915_engine_info engines[];
+};
+
 #if defined(__cplusplus)
 }
 #endif