diff mbox series

drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES

Message ID 20220310051853.30440-1-matthew.s.atwood@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES | expand

Commit Message

Matt Atwood March 10, 2022, 5:18 a.m. UTC
Newer platforms have DSS that aren't necessarily available for both
geometry and compute, two queries will need to exist. This introduces
the first, when passing a valid engine class and engine instance in the
flags returns a topology describing geometry.

v2: fix white space errors

Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
---
 drivers/gpu/drm/i915/i915_query.c | 68 ++++++++++++++++++++++---------
 include/uapi/drm/i915_drm.h       | 24 +++++++----
 2 files changed, 65 insertions(+), 27 deletions(-)

Comments

Tvrtko Ursulin March 10, 2022, 12:26 p.m. UTC | #1
On 10/03/2022 05:18, Matt Atwood wrote:
> Newer platforms have DSS that aren't necessarily available for both
> geometry and compute, two queries will need to exist. This introduces
> the first, when passing a valid engine class and engine instance in the
> flags returns a topology describing geometry.
> 
> v2: fix white space errors
> 
> Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
> Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_query.c | 68 ++++++++++++++++++++++---------
>   include/uapi/drm/i915_drm.h       | 24 +++++++----
>   2 files changed, 65 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 2dfbc22857a3..e4f35da28642 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -9,6 +9,7 @@
>   #include "i915_drv.h"
>   #include "i915_perf.h"
>   #include "i915_query.h"
> +#include "gt/intel_engine_user.h"
>   #include <uapi/drm/i915_drm.h>
>   
>   static int copy_query_item(void *query_hdr, size_t query_sz,
> @@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
>   	return 0;
>   }
>   
> -static int query_topology_info(struct drm_i915_private *dev_priv,
> -			       struct drm_i915_query_item *query_item)
> +static int fill_topology_info(const struct sseu_dev_info *sseu,
> +			      struct drm_i915_query_item *query_item,
> +			      const u8 *subslice_mask)
>   {
> -	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
>   	struct drm_i915_query_topology_info topo;
>   	u32 slice_length, subslice_length, eu_length, total_length;
>   	int ret;
>   
> -	if (query_item->flags != 0)
> -		return -EINVAL;
> +	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
>   
>   	if (sseu->max_slices == 0)
>   		return -ENODEV;
>   
> -	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
> -
>   	slice_length = sizeof(sseu->slice_mask);
>   	subslice_length = sseu->max_slices * sseu->ss_stride;
>   	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
>   	total_length = sizeof(topo) + slice_length + subslice_length +
>   		       eu_length;
>   
> -	ret = copy_query_item(&topo, sizeof(topo), total_length,
> -			      query_item);
> +	ret = copy_query_item(&topo, sizeof(topo), total_length, query_item);
> +
>   	if (ret != 0)
>   		return ret;
>   
> -	if (topo.flags != 0)
> -		return -EINVAL;
> -
>   	memset(&topo, 0, sizeof(topo));
>   	topo.max_slices = sseu->max_slices;
>   	topo.max_subslices = sseu->max_subslices;
> @@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
>   	topo.eu_stride = sseu->eu_stride;
>   
>   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
> -			   &topo, sizeof(topo)))
> +			 &topo, sizeof(topo)))
>   		return -EFAULT;
>   
>   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
> -			   &sseu->slice_mask, slice_length))
> +			 &sseu->slice_mask, slice_length))
>   		return -EFAULT;
>   
>   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> -					   sizeof(topo) + slice_length),
> -			   sseu->subslice_mask, subslice_length))
> +					 sizeof(topo) + slice_length),
> +			 subslice_mask, subslice_length))
>   		return -EFAULT;
>   
>   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> -					   sizeof(topo) +
> -					   slice_length + subslice_length),
> -			   sseu->eu_mask, eu_length))
> +					 sizeof(topo) +
> +					 slice_length + subslice_length),
> +			 sseu->eu_mask, eu_length))
>   		return -EFAULT;
>   
>   	return total_length;
>   }
>   
> +static int query_topology_info(struct drm_i915_private *dev_priv,
> +			       struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
> +
> +	if (query_item->flags != 0)
> +		return -EINVAL;
> +
> +	return fill_topology_info(sseu, query_item, sseu->subslice_mask);
> +}
> +
> +static int query_geometry_subslices(struct drm_i915_private *i915,
> +				    struct drm_i915_query_item *query_item)
> +{
> +	const struct sseu_dev_info *sseu;
> +	struct intel_engine_cs *engine;
> +	u8 engine_class, engine_instance;
> +
> +	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
> +		return -ENODEV;
> +
> +	engine_class = query_item->flags & 0xFF;
> +	engine_instance = (query_item->flags >> 8) & 0xFF;
> +
> +	engine = intel_engine_lookup_user(i915, engine_class, engine_instance);
> +
> +	if (!engine)
> +		return -EINVAL;
> +
> +	sseu = &engine->gt->info.sseu;
> +
> +	return fill_topology_info(sseu, query_item, sseu->geometry_subslice_mask);
> +}
> +
>   static int
>   query_engine_info(struct drm_i915_private *i915,
>   		  struct drm_i915_query_item *query_item)
> @@ -485,6 +514,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   	query_engine_info,
>   	query_perf_config,
>   	query_memregion_info,
> +	query_geometry_subslices,
>   };
>   
>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 05c3642aaece..1fa6022e1558 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2687,10 +2687,11 @@ struct drm_i915_perf_oa_config {
>   struct drm_i915_query_item {
>   	/** @query_id: The id for this query */
>   	__u64 query_id;
> -#define DRM_I915_QUERY_TOPOLOGY_INFO    1
> -#define DRM_I915_QUERY_ENGINE_INFO	2
> -#define DRM_I915_QUERY_PERF_CONFIG      3
> -#define DRM_I915_QUERY_MEMORY_REGIONS   4
> +#define DRM_I915_QUERY_TOPOLOGY_INFO		1
> +#define DRM_I915_QUERY_ENGINE_INFO		2
> +#define DRM_I915_QUERY_PERF_CONFIG		3
> +#define DRM_I915_QUERY_MEMORY_REGIONS		4
> +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES	5
>   /* Must be kept compact -- no holes and well documented */
>   
>   	/**
> @@ -2714,6 +2715,9 @@ struct drm_i915_query_item {
>   	 *	- DRM_I915_QUERY_PERF_CONFIG_LIST
>   	 *      - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
>   	 *      - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
> +	 *
> +	 * When query_id == DRM_I915_QUERY_GEOMETRY_SUBSLICES must have bits 0:7 set
> +	 * as a valid engine class, and bits 8:15 must have a valid engine instance.

Alternatively, all other uapi uses struct i915_engine_class_instance to 
address engines which uses u16:u16.

How ugly it is to stuff a struct into u32 flags is the question... But 
you could at least use u16:u16 for consistency. Unless you wanted to 
leave some bits free for the future?

Regards,

Tvrtko

>   	 */
>   	__u32 flags;
>   #define DRM_I915_QUERY_PERF_CONFIG_LIST          1
> @@ -2772,16 +2776,20 @@ struct drm_i915_query {
>   };
>   
>   /*
> - * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
> + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO,
> + * DRM_I915_QUERY_GEOMETRY_SUBSLICE:
>    *
>    * data: contains the 3 pieces of information :
>    *
> - * - the slice mask with one bit per slice telling whether a slice is
> - *   available. The availability of slice X can be queried with the following
> - *   formula :
> + * - For DRM_I915_QUERY_TOPOLOGY_INFO the slice mask with one bit per slice
> + *   telling whether a slice is available. The availability of slice X can be
> + *   queried with the following formula :
>    *
>    *           (data[X / 8] >> (X % 8)) & 1
>    *
> + * - For DRM_I915_QUERY_GEOMETRY_SUBSLICES Slices are equal to 1 and this field
> + *   is not used.
> + *
>    * - the subslice mask for each slice with one bit per subslice telling
>    *   whether a subslice is available. Gen12 has dual-subslices, which are
>    *   similar to two gen11 subslices. For gen12, this array represents dual-
Matt Atwood March 12, 2022, 4:16 a.m. UTC | #2
On Thu, Mar 10, 2022 at 12:26:12PM +0000, Tvrtko Ursulin wrote:
> 
> On 10/03/2022 05:18, Matt Atwood wrote:
> > Newer platforms have DSS that aren't necessarily available for both
> > geometry and compute, two queries will need to exist. This introduces
> > the first, when passing a valid engine class and engine instance in the
> > flags returns a topology describing geometry.
> > 
> > v2: fix white space errors
> > 
> > Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > Cc: Matt Roper <matthew.d.roper@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
> > Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_query.c | 68 ++++++++++++++++++++++---------
> >   include/uapi/drm/i915_drm.h       | 24 +++++++----
> >   2 files changed, 65 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> > index 2dfbc22857a3..e4f35da28642 100644
> > --- a/drivers/gpu/drm/i915/i915_query.c
> > +++ b/drivers/gpu/drm/i915/i915_query.c
> > @@ -9,6 +9,7 @@
> >   #include "i915_drv.h"
> >   #include "i915_perf.h"
> >   #include "i915_query.h"
> > +#include "gt/intel_engine_user.h"
> >   #include <uapi/drm/i915_drm.h>
> >   static int copy_query_item(void *query_hdr, size_t query_sz,
> > @@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
> >   	return 0;
> >   }
> > -static int query_topology_info(struct drm_i915_private *dev_priv,
> > -			       struct drm_i915_query_item *query_item)
> > +static int fill_topology_info(const struct sseu_dev_info *sseu,
> > +			      struct drm_i915_query_item *query_item,
> > +			      const u8 *subslice_mask)
> >   {
> > -	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
> >   	struct drm_i915_query_topology_info topo;
> >   	u32 slice_length, subslice_length, eu_length, total_length;
> >   	int ret;
> > -	if (query_item->flags != 0)
> > -		return -EINVAL;
> > +	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
> >   	if (sseu->max_slices == 0)
> >   		return -ENODEV;
> > -	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
> > -
> >   	slice_length = sizeof(sseu->slice_mask);
> >   	subslice_length = sseu->max_slices * sseu->ss_stride;
> >   	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
> >   	total_length = sizeof(topo) + slice_length + subslice_length +
> >   		       eu_length;
> > -	ret = copy_query_item(&topo, sizeof(topo), total_length,
> > -			      query_item);
> > +	ret = copy_query_item(&topo, sizeof(topo), total_length, query_item);
> > +
> >   	if (ret != 0)
> >   		return ret;
> > -	if (topo.flags != 0)
> > -		return -EINVAL;
> > -
> >   	memset(&topo, 0, sizeof(topo));
> >   	topo.max_slices = sseu->max_slices;
> >   	topo.max_subslices = sseu->max_subslices;
> > @@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
> >   	topo.eu_stride = sseu->eu_stride;
> >   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
> > -			   &topo, sizeof(topo)))
> > +			 &topo, sizeof(topo)))
> >   		return -EFAULT;
> >   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
> > -			   &sseu->slice_mask, slice_length))
> > +			 &sseu->slice_mask, slice_length))
> >   		return -EFAULT;
> >   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> > -					   sizeof(topo) + slice_length),
> > -			   sseu->subslice_mask, subslice_length))
> > +					 sizeof(topo) + slice_length),
> > +			 subslice_mask, subslice_length))
> >   		return -EFAULT;
> >   	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> > -					   sizeof(topo) +
> > -					   slice_length + subslice_length),
> > -			   sseu->eu_mask, eu_length))
> > +					 sizeof(topo) +
> > +					 slice_length + subslice_length),
> > +			 sseu->eu_mask, eu_length))
> >   		return -EFAULT;
> >   	return total_length;
> >   }
> > +static int query_topology_info(struct drm_i915_private *dev_priv,
> > +			       struct drm_i915_query_item *query_item)
> > +{
> > +	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
> > +
> > +	if (query_item->flags != 0)
> > +		return -EINVAL;
> > +
> > +	return fill_topology_info(sseu, query_item, sseu->subslice_mask);
> > +}
> > +
> > +static int query_geometry_subslices(struct drm_i915_private *i915,
> > +				    struct drm_i915_query_item *query_item)
> > +{
> > +	const struct sseu_dev_info *sseu;
> > +	struct intel_engine_cs *engine;
> > +	u8 engine_class, engine_instance;
> > +
> > +	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
> > +		return -ENODEV;
> > +
> > +	engine_class = query_item->flags & 0xFF;
> > +	engine_instance = (query_item->flags >> 8) & 0xFF;
> > +
> > +	engine = intel_engine_lookup_user(i915, engine_class, engine_instance);
> > +
> > +	if (!engine)
> > +		return -EINVAL;
> > +
> > +	sseu = &engine->gt->info.sseu;
> > +
> > +	return fill_topology_info(sseu, query_item, sseu->geometry_subslice_mask);
> > +}
> > +
> >   static int
> >   query_engine_info(struct drm_i915_private *i915,
> >   		  struct drm_i915_query_item *query_item)
> > @@ -485,6 +514,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
> >   	query_engine_info,
> >   	query_perf_config,
> >   	query_memregion_info,
> > +	query_geometry_subslices,
> >   };
> >   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 05c3642aaece..1fa6022e1558 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -2687,10 +2687,11 @@ struct drm_i915_perf_oa_config {
> >   struct drm_i915_query_item {
> >   	/** @query_id: The id for this query */
> >   	__u64 query_id;
> > -#define DRM_I915_QUERY_TOPOLOGY_INFO    1
> > -#define DRM_I915_QUERY_ENGINE_INFO	2
> > -#define DRM_I915_QUERY_PERF_CONFIG      3
> > -#define DRM_I915_QUERY_MEMORY_REGIONS   4
> > +#define DRM_I915_QUERY_TOPOLOGY_INFO		1
> > +#define DRM_I915_QUERY_ENGINE_INFO		2
> > +#define DRM_I915_QUERY_PERF_CONFIG		3
> > +#define DRM_I915_QUERY_MEMORY_REGIONS		4
> > +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES	5
> >   /* Must be kept compact -- no holes and well documented */
> >   	/**
> > @@ -2714,6 +2715,9 @@ struct drm_i915_query_item {
> >   	 *	- DRM_I915_QUERY_PERF_CONFIG_LIST
> >   	 *      - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
> >   	 *      - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
> > +	 *
> > +	 * When query_id == DRM_I915_QUERY_GEOMETRY_SUBSLICES must have bits 0:7 set
> > +	 * as a valid engine class, and bits 8:15 must have a valid engine instance.
> 
> Alternatively, all other uapi uses struct i915_engine_class_instance to
> address engines which uses u16:u16.
> 
> How ugly it is to stuff a struct into u32 flags is the question... But you
> could at least use u16:u16 for consistency. Unless you wanted to leave some
> bits free for the future?
Originally when I wrote this I was wanting to leave space in case it was
ever needed. I'm not particularly for or against keeping the space now. 
MattA
> 
> Regards,
> 
> Tvrtko
> 
> >   	 */
> >   	__u32 flags;
> >   #define DRM_I915_QUERY_PERF_CONFIG_LIST          1
> > @@ -2772,16 +2776,20 @@ struct drm_i915_query {
> >   };
> >   /*
> > - * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
> > + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO,
> > + * DRM_I915_QUERY_GEOMETRY_SUBSLICE:
> >    *
> >    * data: contains the 3 pieces of information :
> >    *
> > - * - the slice mask with one bit per slice telling whether a slice is
> > - *   available. The availability of slice X can be queried with the following
> > - *   formula :
> > + * - For DRM_I915_QUERY_TOPOLOGY_INFO the slice mask with one bit per slice
> > + *   telling whether a slice is available. The availability of slice X can be
> > + *   queried with the following formula :
> >    *
> >    *           (data[X / 8] >> (X % 8)) & 1
> >    *
> > + * - For DRM_I915_QUERY_GEOMETRY_SUBSLICES Slices are equal to 1 and this field
> > + *   is not used.
> > + *
> >    * - the subslice mask for each slice with one bit per subslice telling
> >    *   whether a subslice is available. Gen12 has dual-subslices, which are
> >    *   similar to two gen11 subslices. For gen12, this array represents dual-
Tvrtko Ursulin March 14, 2022, 3:35 p.m. UTC | #3
On 12/03/2022 04:16, Matt Atwood wrote:
> On Thu, Mar 10, 2022 at 12:26:12PM +0000, Tvrtko Ursulin wrote:
>>
>> On 10/03/2022 05:18, Matt Atwood wrote:
>>> Newer platforms have DSS that aren't necessarily available for both
>>> geometry and compute, two queries will need to exist. This introduces
>>> the first, when passing a valid engine class and engine instance in the
>>> flags returns a topology describing geometry.
>>>
>>> v2: fix white space errors
>>>
>>> Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
>>> Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_query.c | 68 ++++++++++++++++++++++---------
>>>    include/uapi/drm/i915_drm.h       | 24 +++++++----
>>>    2 files changed, 65 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
>>> index 2dfbc22857a3..e4f35da28642 100644
>>> --- a/drivers/gpu/drm/i915/i915_query.c
>>> +++ b/drivers/gpu/drm/i915/i915_query.c
>>> @@ -9,6 +9,7 @@
>>>    #include "i915_drv.h"
>>>    #include "i915_perf.h"
>>>    #include "i915_query.h"
>>> +#include "gt/intel_engine_user.h"
>>>    #include <uapi/drm/i915_drm.h>
>>>    static int copy_query_item(void *query_hdr, size_t query_sz,
>>> @@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
>>>    	return 0;
>>>    }
>>> -static int query_topology_info(struct drm_i915_private *dev_priv,
>>> -			       struct drm_i915_query_item *query_item)
>>> +static int fill_topology_info(const struct sseu_dev_info *sseu,
>>> +			      struct drm_i915_query_item *query_item,
>>> +			      const u8 *subslice_mask)
>>>    {
>>> -	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
>>>    	struct drm_i915_query_topology_info topo;
>>>    	u32 slice_length, subslice_length, eu_length, total_length;
>>>    	int ret;
>>> -	if (query_item->flags != 0)
>>> -		return -EINVAL;
>>> +	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
>>>    	if (sseu->max_slices == 0)
>>>    		return -ENODEV;
>>> -	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
>>> -
>>>    	slice_length = sizeof(sseu->slice_mask);
>>>    	subslice_length = sseu->max_slices * sseu->ss_stride;
>>>    	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
>>>    	total_length = sizeof(topo) + slice_length + subslice_length +
>>>    		       eu_length;
>>> -	ret = copy_query_item(&topo, sizeof(topo), total_length,
>>> -			      query_item);
>>> +	ret = copy_query_item(&topo, sizeof(topo), total_length, query_item);
>>> +
>>>    	if (ret != 0)
>>>    		return ret;
>>> -	if (topo.flags != 0)
>>> -		return -EINVAL;
>>> -
>>>    	memset(&topo, 0, sizeof(topo));
>>>    	topo.max_slices = sseu->max_slices;
>>>    	topo.max_subslices = sseu->max_subslices;
>>> @@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
>>>    	topo.eu_stride = sseu->eu_stride;
>>>    	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
>>> -			   &topo, sizeof(topo)))
>>> +			 &topo, sizeof(topo)))
>>>    		return -EFAULT;
>>>    	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
>>> -			   &sseu->slice_mask, slice_length))
>>> +			 &sseu->slice_mask, slice_length))
>>>    		return -EFAULT;
>>>    	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
>>> -					   sizeof(topo) + slice_length),
>>> -			   sseu->subslice_mask, subslice_length))
>>> +					 sizeof(topo) + slice_length),
>>> +			 subslice_mask, subslice_length))
>>>    		return -EFAULT;
>>>    	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
>>> -					   sizeof(topo) +
>>> -					   slice_length + subslice_length),
>>> -			   sseu->eu_mask, eu_length))
>>> +					 sizeof(topo) +
>>> +					 slice_length + subslice_length),
>>> +			 sseu->eu_mask, eu_length))
>>>    		return -EFAULT;
>>>    	return total_length;
>>>    }
>>> +static int query_topology_info(struct drm_i915_private *dev_priv,
>>> +			       struct drm_i915_query_item *query_item)
>>> +{
>>> +	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
>>> +
>>> +	if (query_item->flags != 0)
>>> +		return -EINVAL;
>>> +
>>> +	return fill_topology_info(sseu, query_item, sseu->subslice_mask);
>>> +}
>>> +
>>> +static int query_geometry_subslices(struct drm_i915_private *i915,
>>> +				    struct drm_i915_query_item *query_item)
>>> +{
>>> +	const struct sseu_dev_info *sseu;
>>> +	struct intel_engine_cs *engine;
>>> +	u8 engine_class, engine_instance;
>>> +
>>> +	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
>>> +		return -ENODEV;
>>> +
>>> +	engine_class = query_item->flags & 0xFF;
>>> +	engine_instance = (query_item->flags >> 8) & 0xFF;
>>> +
>>> +	engine = intel_engine_lookup_user(i915, engine_class, engine_instance);
>>> +
>>> +	if (!engine)
>>> +		return -EINVAL;
>>> +
>>> +	sseu = &engine->gt->info.sseu;
>>> +
>>> +	return fill_topology_info(sseu, query_item, sseu->geometry_subslice_mask);
>>> +}
>>> +
>>>    static int
>>>    query_engine_info(struct drm_i915_private *i915,
>>>    		  struct drm_i915_query_item *query_item)
>>> @@ -485,6 +514,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>>>    	query_engine_info,
>>>    	query_perf_config,
>>>    	query_memregion_info,
>>> +	query_geometry_subslices,
>>>    };
>>>    int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 05c3642aaece..1fa6022e1558 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -2687,10 +2687,11 @@ struct drm_i915_perf_oa_config {
>>>    struct drm_i915_query_item {
>>>    	/** @query_id: The id for this query */
>>>    	__u64 query_id;
>>> -#define DRM_I915_QUERY_TOPOLOGY_INFO    1
>>> -#define DRM_I915_QUERY_ENGINE_INFO	2
>>> -#define DRM_I915_QUERY_PERF_CONFIG      3
>>> -#define DRM_I915_QUERY_MEMORY_REGIONS   4
>>> +#define DRM_I915_QUERY_TOPOLOGY_INFO		1
>>> +#define DRM_I915_QUERY_ENGINE_INFO		2
>>> +#define DRM_I915_QUERY_PERF_CONFIG		3
>>> +#define DRM_I915_QUERY_MEMORY_REGIONS		4
>>> +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES	5
>>>    /* Must be kept compact -- no holes and well documented */
>>>    	/**
>>> @@ -2714,6 +2715,9 @@ struct drm_i915_query_item {
>>>    	 *	- DRM_I915_QUERY_PERF_CONFIG_LIST
>>>    	 *      - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
>>>    	 *      - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
>>> +	 *
>>> +	 * When query_id == DRM_I915_QUERY_GEOMETRY_SUBSLICES must have bits 0:7 set
>>> +	 * as a valid engine class, and bits 8:15 must have a valid engine instance.
>>
>> Alternatively, all other uapi uses struct i915_engine_class_instance to
>> address engines which uses u16:u16.
>>
>> How ugly it is to stuff a struct into u32 flags is the question... But you
>> could at least use u16:u16 for consistency. Unless you wanted to leave some
>> bits free for the future?
> Originally when I wrote this I was wanting to leave space in case it was
> ever needed. I'm not particularly for or against keeping the space now.

Yes, shrug... Neither I can't guess if we are ever likely to hit a 
problem by having fewer bits for class:instance here compared to other 
uapi, or if stuffing struct i915_engine_class_instance into flags would 
just be too ugly. I mean there is option to define a new struct and not 
use flags at all but that's probably to complicated for what it is.

Anyone else with an opinion? Consistency or should be fine even like it is?

Regards,

Tvrtko

> MattA
>>
>> Regards,
>>
>> Tvrtko
>>
>>>    	 */
>>>    	__u32 flags;
>>>    #define DRM_I915_QUERY_PERF_CONFIG_LIST          1
>>> @@ -2772,16 +2776,20 @@ struct drm_i915_query {
>>>    };
>>>    /*
>>> - * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
>>> + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO,
>>> + * DRM_I915_QUERY_GEOMETRY_SUBSLICE:
>>>     *
>>>     * data: contains the 3 pieces of information :
>>>     *
>>> - * - the slice mask with one bit per slice telling whether a slice is
>>> - *   available. The availability of slice X can be queried with the following
>>> - *   formula :
>>> + * - For DRM_I915_QUERY_TOPOLOGY_INFO the slice mask with one bit per slice
>>> + *   telling whether a slice is available. The availability of slice X can be
>>> + *   queried with the following formula :
>>>     *
>>>     *           (data[X / 8] >> (X % 8)) & 1
>>>     *
>>> + * - For DRM_I915_QUERY_GEOMETRY_SUBSLICES Slices are equal to 1 and this field
>>> + *   is not used.
>>> + *
>>>     * - the subslice mask for each slice with one bit per subslice telling
>>>     *   whether a subslice is available. Gen12 has dual-subslices, which are
>>>     *   similar to two gen11 subslices. For gen12, this array represents dual-
Dixit, Ashutosh March 14, 2022, 5:50 p.m. UTC | #4
On Mon, 14 Mar 2022 08:35:17 -0700, Tvrtko Ursulin wrote:
>
> >> Alternatively, all other uapi uses struct i915_engine_class_instance to
> >> address engines which uses u16:u16.
> >>
> >> How ugly it is to stuff a struct into u32 flags is the question... But you
> >> could at least use u16:u16 for consistency. Unless you wanted to leave some
> >> bits free for the future?
> > Originally when I wrote this I was wanting to leave space in case it was
> > ever needed. I'm not particularly for or against keeping the space now.
>
> Yes, shrug... Neither I can't guess if we are ever likely to hit a problem
> by having fewer bits for class:instance here compared to other uapi, or if
> stuffing struct i915_engine_class_instance into flags would just be too
> ugly. I mean there is option to define a new struct and not use flags at
> all but that's probably to complicated for what it is.
>
> Anyone else with an opinion? Consistency or should be fine even like it is?

Consistency. I'd prefer to stuff struct i915_engine_class_instance into
flags, fwiw.
Joonas Lahtinen March 16, 2022, 11:30 a.m. UTC | #5
Quoting Tvrtko Ursulin (2022-03-14 17:35:17)
> 
> On 12/03/2022 04:16, Matt Atwood wrote:
> > On Thu, Mar 10, 2022 at 12:26:12PM +0000, Tvrtko Ursulin wrote:
> >>
> >> On 10/03/2022 05:18, Matt Atwood wrote:
> >>> Newer platforms have DSS that aren't necessarily available for both
> >>> geometry and compute, two queries will need to exist. This introduces
> >>> the first, when passing a valid engine class and engine instance in the
> >>> flags returns a topology describing geometry.
> >>>
> >>> v2: fix white space errors
> >>>
> >>> Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
> >>> Cc: Matt Roper <matthew.d.roper@intel.com>
> >>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >>> UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
> >>> Signed-off-by: Matt Atwood <matthew.s.atwood@intel.com>

<SNIP>

> >>> @@ -2714,6 +2715,9 @@ struct drm_i915_query_item {
> >>>      *      - DRM_I915_QUERY_PERF_CONFIG_LIST
> >>>      *      - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
> >>>      *      - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
> >>> +    *
> >>> +    * When query_id == DRM_I915_QUERY_GEOMETRY_SUBSLICES must have bits 0:7 set
> >>> +    * as a valid engine class, and bits 8:15 must have a valid engine instance.
> >>
> >> Alternatively, all other uapi uses struct i915_engine_class_instance to
> >> address engines which uses u16:u16.
> >>
> >> How ugly it is to stuff a struct into u32 flags is the question... But you
> >> could at least use u16:u16 for consistency. Unless you wanted to leave some
> >> bits free for the future?
> > Originally when I wrote this I was wanting to leave space in case it was
> > ever needed. I'm not particularly for or against keeping the space now.
> 
> Yes, shrug... Neither I can't guess if we are ever likely to hit a 
> problem by having fewer bits for class:instance here compared to other 
> uapi, or if stuffing struct i915_engine_class_instance into flags would 
> just be too ugly. I mean there is option to define a new struct and not 
> use flags at all but that's probably to complicated for what it is.
> 
> Anyone else with an opinion? Consistency or should be fine even like it is?

Stuffing a full i915_engine_class_instance was definitely intended when
putting it into the flags was suggested.

If that is hit with a complication, the next proposed alternative was a
new struct. That's why the query interface was made easily extensible,
after all...

Regards, Joonas
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 2dfbc22857a3..e4f35da28642 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -9,6 +9,7 @@ 
 #include "i915_drv.h"
 #include "i915_perf.h"
 #include "i915_query.h"
+#include "gt/intel_engine_user.h"
 #include <uapi/drm/i915_drm.h>
 
 static int copy_query_item(void *query_hdr, size_t query_sz,
@@ -28,36 +29,30 @@  static int copy_query_item(void *query_hdr, size_t query_sz,
 	return 0;
 }
 
-static int query_topology_info(struct drm_i915_private *dev_priv,
-			       struct drm_i915_query_item *query_item)
+static int fill_topology_info(const struct sseu_dev_info *sseu,
+			      struct drm_i915_query_item *query_item,
+			      const u8 *subslice_mask)
 {
-	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
 	struct drm_i915_query_topology_info topo;
 	u32 slice_length, subslice_length, eu_length, total_length;
 	int ret;
 
-	if (query_item->flags != 0)
-		return -EINVAL;
+	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
 	if (sseu->max_slices == 0)
 		return -ENODEV;
 
-	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
 	slice_length = sizeof(sseu->slice_mask);
 	subslice_length = sseu->max_slices * sseu->ss_stride;
 	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
 	total_length = sizeof(topo) + slice_length + subslice_length +
 		       eu_length;
 
-	ret = copy_query_item(&topo, sizeof(topo), total_length,
-			      query_item);
+	ret = copy_query_item(&topo, sizeof(topo), total_length, query_item);
+
 	if (ret != 0)
 		return ret;
 
-	if (topo.flags != 0)
-		return -EINVAL;
-
 	memset(&topo, 0, sizeof(topo));
 	topo.max_slices = sseu->max_slices;
 	topo.max_subslices = sseu->max_subslices;
@@ -69,27 +64,61 @@  static int query_topology_info(struct drm_i915_private *dev_priv,
 	topo.eu_stride = sseu->eu_stride;
 
 	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
-			   &topo, sizeof(topo)))
+			 &topo, sizeof(topo)))
 		return -EFAULT;
 
 	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
-			   &sseu->slice_mask, slice_length))
+			 &sseu->slice_mask, slice_length))
 		return -EFAULT;
 
 	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					   sizeof(topo) + slice_length),
-			   sseu->subslice_mask, subslice_length))
+					 sizeof(topo) + slice_length),
+			 subslice_mask, subslice_length))
 		return -EFAULT;
 
 	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					   sizeof(topo) +
-					   slice_length + subslice_length),
-			   sseu->eu_mask, eu_length))
+					 sizeof(topo) +
+					 slice_length + subslice_length),
+			 sseu->eu_mask, eu_length))
 		return -EFAULT;
 
 	return total_length;
 }
 
+static int query_topology_info(struct drm_i915_private *dev_priv,
+			       struct drm_i915_query_item *query_item)
+{
+	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
+
+	if (query_item->flags != 0)
+		return -EINVAL;
+
+	return fill_topology_info(sseu, query_item, sseu->subslice_mask);
+}
+
+static int query_geometry_subslices(struct drm_i915_private *i915,
+				    struct drm_i915_query_item *query_item)
+{
+	const struct sseu_dev_info *sseu;
+	struct intel_engine_cs *engine;
+	u8 engine_class, engine_instance;
+
+	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+		return -ENODEV;
+
+	engine_class = query_item->flags & 0xFF;
+	engine_instance = (query_item->flags >> 8) & 0xFF;
+
+	engine = intel_engine_lookup_user(i915, engine_class, engine_instance);
+
+	if (!engine)
+		return -EINVAL;
+
+	sseu = &engine->gt->info.sseu;
+
+	return fill_topology_info(sseu, query_item, sseu->geometry_subslice_mask);
+}
+
 static int
 query_engine_info(struct drm_i915_private *i915,
 		  struct drm_i915_query_item *query_item)
@@ -485,6 +514,7 @@  static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 	query_engine_info,
 	query_perf_config,
 	query_memregion_info,
+	query_geometry_subslices,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 05c3642aaece..1fa6022e1558 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2687,10 +2687,11 @@  struct drm_i915_perf_oa_config {
 struct drm_i915_query_item {
 	/** @query_id: The id for this query */
 	__u64 query_id;
-#define DRM_I915_QUERY_TOPOLOGY_INFO    1
-#define DRM_I915_QUERY_ENGINE_INFO	2
-#define DRM_I915_QUERY_PERF_CONFIG      3
-#define DRM_I915_QUERY_MEMORY_REGIONS   4
+#define DRM_I915_QUERY_TOPOLOGY_INFO		1
+#define DRM_I915_QUERY_ENGINE_INFO		2
+#define DRM_I915_QUERY_PERF_CONFIG		3
+#define DRM_I915_QUERY_MEMORY_REGIONS		4
+#define DRM_I915_QUERY_GEOMETRY_SUBSLICES	5
 /* Must be kept compact -- no holes and well documented */
 
 	/**
@@ -2714,6 +2715,9 @@  struct drm_i915_query_item {
 	 *	- DRM_I915_QUERY_PERF_CONFIG_LIST
 	 *      - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
 	 *      - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+	 *
+	 * When query_id == DRM_I915_QUERY_GEOMETRY_SUBSLICES must have bits 0:7 set
+	 * as a valid engine class, and bits 8:15 must have a valid engine instance.
 	 */
 	__u32 flags;
 #define DRM_I915_QUERY_PERF_CONFIG_LIST          1
@@ -2772,16 +2776,20 @@  struct drm_i915_query {
 };
 
 /*
- * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO,
+ * DRM_I915_QUERY_GEOMETRY_SUBSLICE:
  *
  * data: contains the 3 pieces of information :
  *
- * - the slice mask with one bit per slice telling whether a slice is
- *   available. The availability of slice X can be queried with the following
- *   formula :
+ * - For DRM_I915_QUERY_TOPOLOGY_INFO the slice mask with one bit per slice
+ *   telling whether a slice is available. The availability of slice X can be
+ *   queried with the following formula :
  *
  *           (data[X / 8] >> (X % 8)) & 1
  *
+ * - For DRM_I915_QUERY_GEOMETRY_SUBSLICES Slices are equal to 1 and this field
+ *   is not used.
+ *
  * - the subslice mask for each slice with one bit per subslice telling
  *   whether a subslice is available. Gen12 has dual-subslices, which are
  *   similar to two gen11 subslices. For gen12, this array represents dual-