diff mbox series

[RFC] drm/i915: Add GuC submission interface version query

Message ID 20240207115612.1322778-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [RFC] drm/i915: Add GuC submission interface version query | expand

Commit Message

Tvrtko Ursulin Feb. 7, 2024, 11:56 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Add a new query to the GuC submission interface version.

Mesa intends to use this information to check for old firmware versions
with a known bug where using the render and compute command streamers
simultaneously can cause GPU hangs due issues in firmware scheduling.

Based on patches from Vivaik and Joonas.

There is a little bit of an open around the width required for versions.
While the GuC FW iface tells they are u8, i915 GuC code uses u32:

 #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
 #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
 #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
...
 struct intel_uc_fw_ver {
         u32 major;
         u32 minor;
         u32 patch;
         u32 build;
 };

So we could make the query u8, and refactor the struct intel_uc_fw_ver
to use u8, or not. To avoid any doubts on why are we assigning u32 to
u8 I simply opted to use u64. Which avoids the need to add any padding
too.

Compile tested only.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Jose Souza <jose.souza@intel.com>
Cc: Sagar Ghuge <sagar.ghuge@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
---
 drivers/gpu/drm/i915/i915_query.c | 32 +++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h       | 11 +++++++++++
 2 files changed, 43 insertions(+)

Comments

Joonas Lahtinen Feb. 7, 2024, 3:09 p.m. UTC | #1
Quoting Tvrtko Ursulin (2024-02-07 13:56:12)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Add a new query to the GuC submission interface version.
> 
> Mesa intends to use this information to check for old firmware versions
> with a known bug where using the render and compute command streamers
> simultaneously can cause GPU hangs due issues in firmware scheduling.
> 
> Based on patches from Vivaik and Joonas.
> 
> There is a little bit of an open around the width required for versions.
> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
> 
>  #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>  #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>  #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
> ...
>  struct intel_uc_fw_ver {
>          u32 major;
>          u32 minor;
>          u32 patch;
>          u32 build;
>  };
> 
> So we could make the query u8, and refactor the struct intel_uc_fw_ver
> to use u8, or not. To avoid any doubts on why are we assigning u32 to
> u8 I simply opted to use u64. Which avoids the need to add any padding
> too.

This a single-shot init time query so I guess u64 is fine too, to keep
the code straightforward.

> Compile tested only.

If Mesa folks confirm this is working for them and after you add link to
the Mesa PR, then you can add my:

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jose Souza <jose.souza@intel.com>
> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Cc: John Harrison <John.C.Harrison@Intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_query.c | 32 +++++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h       | 11 +++++++++++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 00871ef99792..999687f6a3d4 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct drm_i915_private *i915,
>         return hwconfig->size;
>  }
>  
> +static int
> +query_guc_submission_version(struct drm_i915_private *i915,
> +                            struct drm_i915_query_item *query)
> +{
> +       struct drm_i915_query_guc_submission_version __user *query_ptr =
> +                                           u64_to_user_ptr(query->data_ptr);
> +       struct drm_i915_query_guc_submission_version ver;
> +       struct intel_guc *guc = &to_gt(i915)->uc.guc;
> +       const size_t size = sizeof(ver);
> +       int ret;
> +
> +       if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
> +               return -ENODEV;
> +
> +       ret = copy_query_item(&ver, size, size, query);
> +       if (ret != 0)
> +               return ret;
> +
> +       if (ver.major || ver.minor || ver.patch)
> +               return -EINVAL;
> +
> +       ver.major = guc->submission_version.major;
> +       ver.minor = guc->submission_version.minor;
> +       ver.patch = guc->submission_version.patch;
> +
> +       if (copy_to_user(query_ptr, &ver, size))
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
>  static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>                                         struct drm_i915_query_item *query_item) = {
>         query_topology_info,
> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>         query_memregion_info,
>         query_hwconfig_blob,
>         query_geometry_subslices,
> +       query_guc_submission_version,
>  };
>  
>  int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 550c496ce76d..d80d9b5e1eda 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>          *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
>          *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
>          *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
> +        *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
>          */
>         __u64 query_id;
>  #define DRM_I915_QUERY_TOPOLOGY_INFO           1
> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>  #define DRM_I915_QUERY_MEMORY_REGIONS          4
>  #define DRM_I915_QUERY_HWCONFIG_BLOB           5
>  #define DRM_I915_QUERY_GEOMETRY_SUBSLICES      6
> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION  7
>  /* Must be kept compact -- no holes and well documented */
>  
>         /**
> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>         struct drm_i915_memory_region_info regions[];
>  };
>  
> +/**
> +* struct drm_i915_query_guc_submission_version - query GuC submission interface version
> +*/
> +struct drm_i915_query_guc_submission_version {
> +       __u64 major;
> +       __u64 minor;
> +       __u64 patch;
> +};
> +
>  /**
>   * DOC: GuC HWCONFIG blob uAPI
>   *
> -- 
> 2.40.1
>
John Harrison Feb. 7, 2024, 6:12 p.m. UTC | #2
On 2/7/2024 03:56, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Add a new query to the GuC submission interface version.
>
> Mesa intends to use this information to check for old firmware versions
> with a known bug where using the render and compute command streamers
> simultaneously can cause GPU hangs due issues in firmware scheduling.
>
> Based on patches from Vivaik and Joonas.
>
> There is a little bit of an open around the width required for versions.
> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>
>   #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>   #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>   #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
> ...
>   struct intel_uc_fw_ver {
>           u32 major;
>           u32 minor;
>           u32 patch;
>           u32 build;
>   };
This is copied from generic code which supports firmwares other than 
GuC. Only GuC promises to use 8-bit version components. Other firmwares 
very definitely do not. There is no open.

>
> So we could make the query u8, and refactor the struct intel_uc_fw_ver
> to use u8, or not. To avoid any doubts on why are we assigning u32 to
> u8 I simply opted to use u64. Which avoids the need to add any padding
> too.
I don't follow how potential 8 vs 32 confusion means jump to 64?!

>
> Compile tested only.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> Cc: Jose Souza <jose.souza@intel.com>
> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Cc: John Harrison <John.C.Harrison@Intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_query.c | 32 +++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h       | 11 +++++++++++
>   2 files changed, 43 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
> index 00871ef99792..999687f6a3d4 100644
> --- a/drivers/gpu/drm/i915/i915_query.c
> +++ b/drivers/gpu/drm/i915/i915_query.c
> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct drm_i915_private *i915,
>   	return hwconfig->size;
>   }
>   
> +static int
> +query_guc_submission_version(struct drm_i915_private *i915,
> +			     struct drm_i915_query_item *query)
> +{
> +	struct drm_i915_query_guc_submission_version __user *query_ptr =
> +					    u64_to_user_ptr(query->data_ptr);
> +	struct drm_i915_query_guc_submission_version ver;
> +	struct intel_guc *guc = &to_gt(i915)->uc.guc;
> +	const size_t size = sizeof(ver);
> +	int ret;
> +
> +	if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
> +		return -ENODEV;
> +
> +	ret = copy_query_item(&ver, size, size, query);
> +	if (ret != 0)
> +		return ret;
> +
> +	if (ver.major || ver.minor || ver.patch)
> +		return -EINVAL;
> +
> +	ver.major = guc->submission_version.major;
> +	ver.minor = guc->submission_version.minor;
> +	ver.patch = guc->submission_version.patch;
This needs to include the branch version (currently set to zero) in the 
definition. And the UMD needs to barf if branch comes back as non-zero. 
I.e. there is no guarantee that a branched version will have the w/a + 
fix that they are wanting.

John.


> +
> +	if (copy_to_user(query_ptr, &ver, size))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   					struct drm_i915_query_item *query_item) = {
>   	query_topology_info,
> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
>   	query_memregion_info,
>   	query_hwconfig_blob,
>   	query_geometry_subslices,
> +	query_guc_submission_version,
>   };
>   
>   int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 550c496ce76d..d80d9b5e1eda 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>   	 *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
>   	 *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
>   	 *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
> +	 *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
>   	 */
>   	__u64 query_id;
>   #define DRM_I915_QUERY_TOPOLOGY_INFO		1
> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>   #define DRM_I915_QUERY_MEMORY_REGIONS		4
>   #define DRM_I915_QUERY_HWCONFIG_BLOB		5
>   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES	6
> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION	7
>   /* Must be kept compact -- no holes and well documented */
>   
>   	/**
> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>   	struct drm_i915_memory_region_info regions[];
>   };
>   
> +/**
> +* struct drm_i915_query_guc_submission_version - query GuC submission interface version
> +*/
> +struct drm_i915_query_guc_submission_version {
> +	__u64 major;
> +	__u64 minor;
> +	__u64 patch;
> +};
> +
>   /**
>    * DOC: GuC HWCONFIG blob uAPI
>    *
Tvrtko Ursulin Feb. 7, 2024, 6:49 p.m. UTC | #3
On 07/02/2024 18:12, John Harrison wrote:
> On 2/7/2024 03:56, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Add a new query to the GuC submission interface version.
>>
>> Mesa intends to use this information to check for old firmware versions
>> with a known bug where using the render and compute command streamers
>> simultaneously can cause GPU hangs due issues in firmware scheduling.
>>
>> Based on patches from Vivaik and Joonas.
>>
>> There is a little bit of an open around the width required for versions.
>> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>>
>>   #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>>   #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>>   #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
>> ...
>>   struct intel_uc_fw_ver {
>>           u32 major;
>>           u32 minor;
>>           u32 patch;
>>           u32 build;
>>   };
> This is copied from generic code which supports firmwares other than 
> GuC. Only GuC promises to use 8-bit version components. Other firmwares 
> very definitely do not. There is no open.

Ack.

>>
>> So we could make the query u8, and refactor the struct intel_uc_fw_ver
>> to use u8, or not. To avoid any doubts on why are we assigning u32 to
>> u8 I simply opted to use u64. Which avoids the need to add any padding
>> too.
> I don't follow how potential 8 vs 32 confusion means jump to 64?!

Suggestion was to use u8 in the uapi in order to align with GuC FW ABI (or however it's called), in which case there would be:

    ver.major = guc->submission_version.major;

which would be:

    (u8) = (u32)

And I was anticipating someone not liking that either. Using too wide u64 simply avoids the need to add a padding element to the uapi struct.

If you are positive we need to include a branch number, even though it does not seem to be implemented in the code even(*) then I can make uapi 4x u32 and achieve the same.

(*)
static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 css_value)
{
	/* Get version numbers from the CSS header */
	ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
	ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
	ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
}

No branch field in the CSS header?

And Why is UMD supposed to reject a non-zero branch? Like how would 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can respin if you definitely confirm.

Regards,

Tvrtko

>>
>> Compile tested only.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Kenneth Graunke <kenneth@whitecape.org>
>> Cc: Jose Souza <jose.souza@intel.com>
>> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> Cc: John Harrison <John.C.Harrison@Intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Jani Nikula <jani.nikula@intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_query.c | 32 +++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h       | 11 +++++++++++
>>   2 files changed, 43 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>> b/drivers/gpu/drm/i915/i915_query.c
>> index 00871ef99792..999687f6a3d4 100644
>> --- a/drivers/gpu/drm/i915/i915_query.c
>> +++ b/drivers/gpu/drm/i915/i915_query.c
>> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct 
>> drm_i915_private *i915,
>>       return hwconfig->size;
>>   }
>> +static int
>> +query_guc_submission_version(struct drm_i915_private *i915,
>> +                 struct drm_i915_query_item *query)
>> +{
>> +    struct drm_i915_query_guc_submission_version __user *query_ptr =
>> +                        u64_to_user_ptr(query->data_ptr);
>> +    struct drm_i915_query_guc_submission_version ver;
>> +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
>> +    const size_t size = sizeof(ver);
>> +    int ret;
>> +
>> +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
>> +        return -ENODEV;
>> +
>> +    ret = copy_query_item(&ver, size, size, query);
>> +    if (ret != 0)
>> +        return ret;
>> +
>> +    if (ver.major || ver.minor || ver.patch)
>> +        return -EINVAL;
>> +
>> +    ver.major = guc->submission_version.major;
>> +    ver.minor = guc->submission_version.minor;
>> +    ver.patch = guc->submission_version.patch;
> This needs to include the branch version (currently set to zero) in the 
> definition. And the UMD needs to barf if branch comes back as non-zero. 
> I.e. there is no guarantee that a branched version will have the w/a + 
> fix that they are wanting.
> 
> John.
> 
> 
>> +
>> +    if (copy_to_user(query_ptr, &ver, size))
>> +        return -EFAULT;
>> +
>> +    return 0;
>> +}
>> +
>>   static int (* const i915_query_funcs[])(struct drm_i915_private 
>> *dev_priv,
>>                       struct drm_i915_query_item *query_item) = {
>>       query_topology_info,
>> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct 
>> drm_i915_private *dev_priv,
>>       query_memregion_info,
>>       query_hwconfig_blob,
>>       query_geometry_subslices,
>> +    query_guc_submission_version,
>>   };
>>   int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>> drm_file *file)
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 550c496ce76d..d80d9b5e1eda 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>>        *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct 
>> drm_i915_query_memory_regions)
>>        *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
>>        *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct 
>> drm_i915_query_topology_info)
>> +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct 
>> drm_i915_query_guc_submission_version)
>>        */
>>       __u64 query_id;
>>   #define DRM_I915_QUERY_TOPOLOGY_INFO        1
>> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>>   #define DRM_I915_QUERY_MEMORY_REGIONS        4
>>   #define DRM_I915_QUERY_HWCONFIG_BLOB        5
>>   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
>> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
>>   /* Must be kept compact -- no holes and well documented */
>>       /**
>> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>>       struct drm_i915_memory_region_info regions[];
>>   };
>> +/**
>> +* struct drm_i915_query_guc_submission_version - query GuC submission 
>> interface version
>> +*/
>> +struct drm_i915_query_guc_submission_version {
>> +    __u64 major;
>> +    __u64 minor;
>> +    __u64 patch;
>> +};
>> +
>>   /**
>>    * DOC: GuC HWCONFIG blob uAPI
>>    *
>
John Harrison Feb. 7, 2024, 7:34 p.m. UTC | #4
On 2/7/2024 10:49, Tvrtko Ursulin wrote:
> On 07/02/2024 18:12, John Harrison wrote:
>> On 2/7/2024 03:56, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> Add a new query to the GuC submission interface version.
>>>
>>> Mesa intends to use this information to check for old firmware versions
>>> with a known bug where using the render and compute command streamers
>>> simultaneously can cause GPU hangs due issues in firmware scheduling.
>>>
>>> Based on patches from Vivaik and Joonas.
>>>
>>> There is a little bit of an open around the width required for 
>>> versions.
>>> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>>>
>>>   #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>>>   #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>>>   #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
>>> ...
>>>   struct intel_uc_fw_ver {
>>>           u32 major;
>>>           u32 minor;
>>>           u32 patch;
>>>           u32 build;
>>>   };
>> This is copied from generic code which supports firmwares other than 
>> GuC. Only GuC promises to use 8-bit version components. Other 
>> firmwares very definitely do not. There is no open.
>
> Ack.
>
>>>
>>> So we could make the query u8, and refactor the struct intel_uc_fw_ver
>>> to use u8, or not. To avoid any doubts on why are we assigning u32 to
>>> u8 I simply opted to use u64. Which avoids the need to add any padding
>>> too.
>> I don't follow how potential 8 vs 32 confusion means jump to 64?!
>
> Suggestion was to use u8 in the uapi in order to align with GuC FW ABI 
> (or however it's called), in which case there would be:
>
>    ver.major = guc->submission_version.major;
>
> which would be:
>
>    (u8) = (u32)
>
> And I was anticipating someone not liking that either. Using too wide 
> u64 simply avoids the need to add a padding element to the uapi struct.
>
> If you are positive we need to include a branch number, even though it 
> does not seem to be implemented in the code even(*) then I can make 
> uapi 4x u32 and achieve the same.
It's not implemented in the code because we've never had to, and it is 
yet another train wreck waiting to happen. There are a bunch of issues 
at different levels that need to be resolved. But that is all in the 
kernel and/or firmware and so can be added by a later kernel update when 
necessary. However, if the UMDs are not already taking it into account 
or its not even in the UAPI, then we can't back fill in the kernel 
later, we are just broken.

>
> (*)
> static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 
> css_value)
> {
>     /* Get version numbers from the CSS header */
>     ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
>     ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
>     ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
> }
>
> No branch field in the CSS header?
I think there is, it's just not officially implemented yet.

>
> And Why is UMD supposed to reject a non-zero branch? Like how would 
> 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can 
> respin if you definitely confirm.
Because that is backwards. The branch number goes at the front.

So, for example (using made up numbers, I don't recall offhand what 
versions we have where) say we currently have 0.1.3.0 in tip and 0.1.1.0 
in the last LTS. We then need to ship a critical security fix and back 
port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't become 0.1.1.1 
because that version already exists in the history of tip and does not 
contain the fix. So the LTS gets branched to 1.1.0.0. We then have both 
branches potentially moving forwards with completely independent versioning.

Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel 
versioning. You cannot make any assumptions about what might be in 
1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a stack 
of security fixes but none of the features, workarounds or bug fixes 
that are in 0.1.2.3.

Hence, if the branch number changes then all bets are off. You have to 
start over and reject anything you do not explicitly know about.

This is why we were saying that exposing version numbers to UMDs breaks 
down horribly as soon as we have to start branching. There is no clean 
or simple way to do this.

John.


>
> Regards,
>
> Tvrtko
>
>>>
>>> Compile tested only.
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Kenneth Graunke <kenneth@whitecape.org>
>>> Cc: Jose Souza <jose.souza@intel.com>
>>> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
>>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>> Cc: John Harrison <John.C.Harrison@Intel.com>
>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>> Cc: Jani Nikula <jani.nikula@intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_query.c | 32 
>>> +++++++++++++++++++++++++++++++
>>>   include/uapi/drm/i915_drm.h       | 11 +++++++++++
>>>   2 files changed, 43 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>>> b/drivers/gpu/drm/i915/i915_query.c
>>> index 00871ef99792..999687f6a3d4 100644
>>> --- a/drivers/gpu/drm/i915/i915_query.c
>>> +++ b/drivers/gpu/drm/i915/i915_query.c
>>> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct 
>>> drm_i915_private *i915,
>>>       return hwconfig->size;
>>>   }
>>> +static int
>>> +query_guc_submission_version(struct drm_i915_private *i915,
>>> +                 struct drm_i915_query_item *query)
>>> +{
>>> +    struct drm_i915_query_guc_submission_version __user *query_ptr =
>>> +                        u64_to_user_ptr(query->data_ptr);
>>> +    struct drm_i915_query_guc_submission_version ver;
>>> +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
>>> +    const size_t size = sizeof(ver);
>>> +    int ret;
>>> +
>>> +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
>>> +        return -ENODEV;
>>> +
>>> +    ret = copy_query_item(&ver, size, size, query);
>>> +    if (ret != 0)
>>> +        return ret;
>>> +
>>> +    if (ver.major || ver.minor || ver.patch)
>>> +        return -EINVAL;
>>> +
>>> +    ver.major = guc->submission_version.major;
>>> +    ver.minor = guc->submission_version.minor;
>>> +    ver.patch = guc->submission_version.patch;
>> This needs to include the branch version (currently set to zero) in 
>> the definition. And the UMD needs to barf if branch comes back as 
>> non-zero. I.e. there is no guarantee that a branched version will 
>> have the w/a + fix that they are wanting.
>>
>> John.
>>
>>
>>> +
>>> +    if (copy_to_user(query_ptr, &ver, size))
>>> +        return -EFAULT;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static int (* const i915_query_funcs[])(struct drm_i915_private 
>>> *dev_priv,
>>>                       struct drm_i915_query_item *query_item) = {
>>>       query_topology_info,
>>> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct 
>>> drm_i915_private *dev_priv,
>>>       query_memregion_info,
>>>       query_hwconfig_blob,
>>>       query_geometry_subslices,
>>> +    query_guc_submission_version,
>>>   };
>>>   int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>>> drm_file *file)
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 550c496ce76d..d80d9b5e1eda 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>>>        *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct 
>>> drm_i915_query_memory_regions)
>>>        *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob 
>>> uAPI`)
>>>        *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct 
>>> drm_i915_query_topology_info)
>>> +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct 
>>> drm_i915_query_guc_submission_version)
>>>        */
>>>       __u64 query_id;
>>>   #define DRM_I915_QUERY_TOPOLOGY_INFO        1
>>> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>>>   #define DRM_I915_QUERY_MEMORY_REGIONS        4
>>>   #define DRM_I915_QUERY_HWCONFIG_BLOB        5
>>>   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
>>> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
>>>   /* Must be kept compact -- no holes and well documented */
>>>       /**
>>> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>>>       struct drm_i915_memory_region_info regions[];
>>>   };
>>> +/**
>>> +* struct drm_i915_query_guc_submission_version - query GuC 
>>> submission interface version
>>> +*/
>>> +struct drm_i915_query_guc_submission_version {
>>> +    __u64 major;
>>> +    __u64 minor;
>>> +    __u64 patch;
>>> +};
>>> +
>>>   /**
>>>    * DOC: GuC HWCONFIG blob uAPI
>>>    *
>>
Souza, Jose Feb. 7, 2024, 7:43 p.m. UTC | #5
On Wed, 2024-02-07 at 11:34 -0800, John Harrison wrote:
> On 2/7/2024 10:49, Tvrtko Ursulin wrote:
> > On 07/02/2024 18:12, John Harrison wrote:
> > > On 2/7/2024 03:56, Tvrtko Ursulin wrote:
> > > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > 
> > > > Add a new query to the GuC submission interface version.
> > > > 
> > > > Mesa intends to use this information to check for old firmware versions
> > > > with a known bug where using the render and compute command streamers
> > > > simultaneously can cause GPU hangs due issues in firmware scheduling.
> > > > 
> > > > Based on patches from Vivaik and Joonas.
> > > > 
> > > > There is a little bit of an open around the width required for 
> > > > versions.
> > > > While the GuC FW iface tells they are u8, i915 GuC code uses u32:
> > > > 
> > > >   #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
> > > >   #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
> > > >   #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
> > > > ...
> > > >   struct intel_uc_fw_ver {
> > > >           u32 major;
> > > >           u32 minor;
> > > >           u32 patch;
> > > >           u32 build;
> > > >   };
> > > This is copied from generic code which supports firmwares other than 
> > > GuC. Only GuC promises to use 8-bit version components. Other 
> > > firmwares very definitely do not. There is no open.
> > 
> > Ack.
> > 
> > > > 
> > > > So we could make the query u8, and refactor the struct intel_uc_fw_ver
> > > > to use u8, or not. To avoid any doubts on why are we assigning u32 to
> > > > u8 I simply opted to use u64. Which avoids the need to add any padding
> > > > too.
> > > I don't follow how potential 8 vs 32 confusion means jump to 64?!
> > 
> > Suggestion was to use u8 in the uapi in order to align with GuC FW ABI 
> > (or however it's called), in which case there would be:
> > 
> >    ver.major = guc->submission_version.major;
> > 
> > which would be:
> > 
> >    (u8) = (u32)
> > 
> > And I was anticipating someone not liking that either. Using too wide 
> > u64 simply avoids the need to add a padding element to the uapi struct.
> > 
> > If you are positive we need to include a branch number, even though it 
> > does not seem to be implemented in the code even(*) then I can make 
> > uapi 4x u32 and achieve the same.
> It's not implemented in the code because we've never had to, and it is 
> yet another train wreck waiting to happen. There are a bunch of issues 
> at different levels that need to be resolved. But that is all in the 
> kernel and/or firmware and so can be added by a later kernel update when 
> necessary. However, if the UMDs are not already taking it into account 
> or its not even in the UAPI, then we can't back fill in the kernel 
> later, we are just broken.

This sounds to me like a firmware version for internal testing or for pre-production HW, would any branched firmware be released to customers?

> 
> > 
> > (*)
> > static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 
> > css_value)
> > {
> >     /* Get version numbers from the CSS header */
> >     ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
> >     ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
> >     ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
> > }
> > 
> > No branch field in the CSS header?
> I think there is, it's just not officially implemented yet.
> 
> > 
> > And Why is UMD supposed to reject a non-zero branch? Like how would 
> > 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can 
> > respin if you definitely confirm.
> Because that is backwards. The branch number goes at the front.
> 
> So, for example (using made up numbers, I don't recall offhand what 
> versions we have where) say we currently have 0.1.3.0 in tip and 0.1.1.0 
> in the last LTS. We then need to ship a critical security fix and back 
> port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't become 0.1.1.1 
> because that version already exists in the history of tip and does not 
> contain the fix. So the LTS gets branched to 1.1.0.0. We then have both 
> branches potentially moving forwards with completely independent versioning.
> 
> Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel 
> versioning. You cannot make any assumptions about what might be in 
> 1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a stack 
> of security fixes but none of the features, workarounds or bug fixes 
> that are in 0.1.2.3.
> 
> Hence, if the branch number changes then all bets are off. You have to 
> start over and reject anything you do not explicitly know about.
> 
> This is why we were saying that exposing version numbers to UMDs breaks 
> down horribly as soon as we have to start branching. There is no clean 
> or simple way to do this.
> 
> John.
> 
> 
> > 
> > Regards,
> > 
> > Tvrtko
> > 
> > > > 
> > > > Compile tested only.
> > > > 
> > > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > Cc: Kenneth Graunke <kenneth@whitecape.org>
> > > > Cc: Jose Souza <jose.souza@intel.com>
> > > > Cc: Sagar Ghuge <sagar.ghuge@intel.com>
> > > > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > Cc: John Harrison <John.C.Harrison@Intel.com>
> > > > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
> > > > ---
> > > >   drivers/gpu/drm/i915/i915_query.c | 32 
> > > > +++++++++++++++++++++++++++++++
> > > >   include/uapi/drm/i915_drm.h       | 11 +++++++++++
> > > >   2 files changed, 43 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_query.c 
> > > > b/drivers/gpu/drm/i915/i915_query.c
> > > > index 00871ef99792..999687f6a3d4 100644
> > > > --- a/drivers/gpu/drm/i915/i915_query.c
> > > > +++ b/drivers/gpu/drm/i915/i915_query.c
> > > > @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct 
> > > > drm_i915_private *i915,
> > > >       return hwconfig->size;
> > > >   }
> > > > +static int
> > > > +query_guc_submission_version(struct drm_i915_private *i915,
> > > > +                 struct drm_i915_query_item *query)
> > > > +{
> > > > +    struct drm_i915_query_guc_submission_version __user *query_ptr =
> > > > +                        u64_to_user_ptr(query->data_ptr);
> > > > +    struct drm_i915_query_guc_submission_version ver;
> > > > +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
> > > > +    const size_t size = sizeof(ver);
> > > > +    int ret;
> > > > +
> > > > +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
> > > > +        return -ENODEV;
> > > > +
> > > > +    ret = copy_query_item(&ver, size, size, query);
> > > > +    if (ret != 0)
> > > > +        return ret;
> > > > +
> > > > +    if (ver.major || ver.minor || ver.patch)
> > > > +        return -EINVAL;
> > > > +
> > > > +    ver.major = guc->submission_version.major;
> > > > +    ver.minor = guc->submission_version.minor;
> > > > +    ver.patch = guc->submission_version.patch;
> > > This needs to include the branch version (currently set to zero) in 
> > > the definition. And the UMD needs to barf if branch comes back as 
> > > non-zero. I.e. there is no guarantee that a branched version will 
> > > have the w/a + fix that they are wanting.
> > > 
> > > John.
> > > 
> > > 
> > > > +
> > > > +    if (copy_to_user(query_ptr, &ver, size))
> > > > +        return -EFAULT;
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > >   static int (* const i915_query_funcs[])(struct drm_i915_private 
> > > > *dev_priv,
> > > >                       struct drm_i915_query_item *query_item) = {
> > > >       query_topology_info,
> > > > @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct 
> > > > drm_i915_private *dev_priv,
> > > >       query_memregion_info,
> > > >       query_hwconfig_blob,
> > > >       query_geometry_subslices,
> > > > +    query_guc_submission_version,
> > > >   };
> > > >   int i915_query_ioctl(struct drm_device *dev, void *data, struct 
> > > > drm_file *file)
> > > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > > index 550c496ce76d..d80d9b5e1eda 100644
> > > > --- a/include/uapi/drm/i915_drm.h
> > > > +++ b/include/uapi/drm/i915_drm.h
> > > > @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
> > > >        *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct 
> > > > drm_i915_query_memory_regions)
> > > >        *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob 
> > > > uAPI`)
> > > >        *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct 
> > > > drm_i915_query_topology_info)
> > > > +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct 
> > > > drm_i915_query_guc_submission_version)
> > > >        */
> > > >       __u64 query_id;
> > > >   #define DRM_I915_QUERY_TOPOLOGY_INFO        1
> > > > @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
> > > >   #define DRM_I915_QUERY_MEMORY_REGIONS        4
> > > >   #define DRM_I915_QUERY_HWCONFIG_BLOB        5
> > > >   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
> > > > +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
> > > >   /* Must be kept compact -- no holes and well documented */
> > > >       /**
> > > > @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
> > > >       struct drm_i915_memory_region_info regions[];
> > > >   };
> > > > +/**
> > > > +* struct drm_i915_query_guc_submission_version - query GuC 
> > > > submission interface version
> > > > +*/
> > > > +struct drm_i915_query_guc_submission_version {
> > > > +    __u64 major;
> > > > +    __u64 minor;
> > > > +    __u64 patch;
> > > > +};
> > > > +
> > > >   /**
> > > >    * DOC: GuC HWCONFIG blob uAPI
> > > >    *
> > > 
>
John Harrison Feb. 7, 2024, 7:52 p.m. UTC | #6
On 2/7/2024 11:43, Souza, Jose wrote:
> On Wed, 2024-02-07 at 11:34 -0800, John Harrison wrote:
>> On 2/7/2024 10:49, Tvrtko Ursulin wrote:
>>> On 07/02/2024 18:12, John Harrison wrote:
>>>> On 2/7/2024 03:56, Tvrtko Ursulin wrote:
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> Add a new query to the GuC submission interface version.
>>>>>
>>>>> Mesa intends to use this information to check for old firmware versions
>>>>> with a known bug where using the render and compute command streamers
>>>>> simultaneously can cause GPU hangs due issues in firmware scheduling.
>>>>>
>>>>> Based on patches from Vivaik and Joonas.
>>>>>
>>>>> There is a little bit of an open around the width required for
>>>>> versions.
>>>>> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>>>>>
>>>>>    #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>>>>>    #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>>>>>    #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
>>>>> ...
>>>>>    struct intel_uc_fw_ver {
>>>>>            u32 major;
>>>>>            u32 minor;
>>>>>            u32 patch;
>>>>>            u32 build;
>>>>>    };
>>>> This is copied from generic code which supports firmwares other than
>>>> GuC. Only GuC promises to use 8-bit version components. Other
>>>> firmwares very definitely do not. There is no open.
>>> Ack.
>>>
>>>>> So we could make the query u8, and refactor the struct intel_uc_fw_ver
>>>>> to use u8, or not. To avoid any doubts on why are we assigning u32 to
>>>>> u8 I simply opted to use u64. Which avoids the need to add any padding
>>>>> too.
>>>> I don't follow how potential 8 vs 32 confusion means jump to 64?!
>>> Suggestion was to use u8 in the uapi in order to align with GuC FW ABI
>>> (or however it's called), in which case there would be:
>>>
>>>     ver.major = guc->submission_version.major;
>>>
>>> which would be:
>>>
>>>     (u8) = (u32)
>>>
>>> And I was anticipating someone not liking that either. Using too wide
>>> u64 simply avoids the need to add a padding element to the uapi struct.
>>>
>>> If you are positive we need to include a branch number, even though it
>>> does not seem to be implemented in the code even(*) then I can make
>>> uapi 4x u32 and achieve the same.
>> It's not implemented in the code because we've never had to, and it is
>> yet another train wreck waiting to happen. There are a bunch of issues
>> at different levels that need to be resolved. But that is all in the
>> kernel and/or firmware and so can be added by a later kernel update when
>> necessary. However, if the UMDs are not already taking it into account
>> or its not even in the UAPI, then we can't back fill in the kernel
>> later, we are just broken.
> This sounds to me like a firmware version for internal testing or for pre-production HW, would any branched firmware be released to customers?
See comments below. Branching is about back porting critical fixes to 
older releases. I.e. supporting LTS releases. There is absolutely 
nothing internal only or testing related about branching.

Just because we haven't had to do so yet doesn't mean we won't need to 
do so tomorrow.

John.

>
>>> (*)
>>> static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32
>>> css_value)
>>> {
>>>      /* Get version numbers from the CSS header */
>>>      ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
>>>      ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
>>>      ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
>>> }
>>>
>>> No branch field in the CSS header?
>> I think there is, it's just not officially implemented yet.
>>
>>> And Why is UMD supposed to reject a non-zero branch? Like how would
>>> 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can
>>> respin if you definitely confirm.
>> Because that is backwards. The branch number goes at the front.
>>
>> So, for example (using made up numbers, I don't recall offhand what
>> versions we have where) say we currently have 0.1.3.0 in tip and 0.1.1.0
>> in the last LTS. We then need to ship a critical security fix and back
>> port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't become 0.1.1.1
>> because that version already exists in the history of tip and does not
>> contain the fix. So the LTS gets branched to 1.1.0.0. We then have both
>> branches potentially moving forwards with completely independent versioning.
>>
>> Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel
>> versioning. You cannot make any assumptions about what might be in
>> 1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a stack
>> of security fixes but none of the features, workarounds or bug fixes
>> that are in 0.1.2.3.
>>
>> Hence, if the branch number changes then all bets are off. You have to
>> start over and reject anything you do not explicitly know about.
>>
>> This is why we were saying that exposing version numbers to UMDs breaks
>> down horribly as soon as we have to start branching. There is no clean
>> or simple way to do this.
>>
>> John.
>>
>>
>>> Regards,
>>>
>>> Tvrtko
>>>
>>>>> Compile tested only.
>>>>>
>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Kenneth Graunke <kenneth@whitecape.org>
>>>>> Cc: Jose Souza <jose.souza@intel.com>
>>>>> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
>>>>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>>>> Cc: John Harrison <John.C.Harrison@Intel.com>
>>>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>>> Cc: Jani Nikula <jani.nikula@intel.com>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/i915_query.c | 32
>>>>> +++++++++++++++++++++++++++++++
>>>>>    include/uapi/drm/i915_drm.h       | 11 +++++++++++
>>>>>    2 files changed, 43 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_query.c
>>>>> b/drivers/gpu/drm/i915/i915_query.c
>>>>> index 00871ef99792..999687f6a3d4 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_query.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_query.c
>>>>> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct
>>>>> drm_i915_private *i915,
>>>>>        return hwconfig->size;
>>>>>    }
>>>>> +static int
>>>>> +query_guc_submission_version(struct drm_i915_private *i915,
>>>>> +                 struct drm_i915_query_item *query)
>>>>> +{
>>>>> +    struct drm_i915_query_guc_submission_version __user *query_ptr =
>>>>> +                        u64_to_user_ptr(query->data_ptr);
>>>>> +    struct drm_i915_query_guc_submission_version ver;
>>>>> +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
>>>>> +    const size_t size = sizeof(ver);
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    ret = copy_query_item(&ver, size, size, query);
>>>>> +    if (ret != 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    if (ver.major || ver.minor || ver.patch)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ver.major = guc->submission_version.major;
>>>>> +    ver.minor = guc->submission_version.minor;
>>>>> +    ver.patch = guc->submission_version.patch;
>>>> This needs to include the branch version (currently set to zero) in
>>>> the definition. And the UMD needs to barf if branch comes back as
>>>> non-zero. I.e. there is no guarantee that a branched version will
>>>> have the w/a + fix that they are wanting.
>>>>
>>>> John.
>>>>
>>>>
>>>>> +
>>>>> +    if (copy_to_user(query_ptr, &ver, size))
>>>>> +        return -EFAULT;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>    static int (* const i915_query_funcs[])(struct drm_i915_private
>>>>> *dev_priv,
>>>>>                        struct drm_i915_query_item *query_item) = {
>>>>>        query_topology_info,
>>>>> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct
>>>>> drm_i915_private *dev_priv,
>>>>>        query_memregion_info,
>>>>>        query_hwconfig_blob,
>>>>>        query_geometry_subslices,
>>>>> +    query_guc_submission_version,
>>>>>    };
>>>>>    int i915_query_ioctl(struct drm_device *dev, void *data, struct
>>>>> drm_file *file)
>>>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>> index 550c496ce76d..d80d9b5e1eda 100644
>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>>>>>         *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct
>>>>> drm_i915_query_memory_regions)
>>>>>         *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob
>>>>> uAPI`)
>>>>>         *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct
>>>>> drm_i915_query_topology_info)
>>>>> +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct
>>>>> drm_i915_query_guc_submission_version)
>>>>>         */
>>>>>        __u64 query_id;
>>>>>    #define DRM_I915_QUERY_TOPOLOGY_INFO        1
>>>>> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>>>>>    #define DRM_I915_QUERY_MEMORY_REGIONS        4
>>>>>    #define DRM_I915_QUERY_HWCONFIG_BLOB        5
>>>>>    #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
>>>>> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
>>>>>    /* Must be kept compact -- no holes and well documented */
>>>>>        /**
>>>>> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>>>>>        struct drm_i915_memory_region_info regions[];
>>>>>    };
>>>>> +/**
>>>>> +* struct drm_i915_query_guc_submission_version - query GuC
>>>>> submission interface version
>>>>> +*/
>>>>> +struct drm_i915_query_guc_submission_version {
>>>>> +    __u64 major;
>>>>> +    __u64 minor;
>>>>> +    __u64 patch;
>>>>> +};
>>>>> +
>>>>>    /**
>>>>>     * DOC: GuC HWCONFIG blob uAPI
>>>>>     *
Souza, Jose Feb. 7, 2024, 8:47 p.m. UTC | #7
On Wed, 2024-02-07 at 11:52 -0800, John Harrison wrote:
> On 2/7/2024 11:43, Souza, Jose wrote:
> > On Wed, 2024-02-07 at 11:34 -0800, John Harrison wrote:
> > > On 2/7/2024 10:49, Tvrtko Ursulin wrote:
> > > > On 07/02/2024 18:12, John Harrison wrote:
> > > > > On 2/7/2024 03:56, Tvrtko Ursulin wrote:
> > > > > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > > > 
> > > > > > Add a new query to the GuC submission interface version.
> > > > > > 
> > > > > > Mesa intends to use this information to check for old firmware versions
> > > > > > with a known bug where using the render and compute command streamers
> > > > > > simultaneously can cause GPU hangs due issues in firmware scheduling.
> > > > > > 
> > > > > > Based on patches from Vivaik and Joonas.
> > > > > > 
> > > > > > There is a little bit of an open around the width required for
> > > > > > versions.
> > > > > > While the GuC FW iface tells they are u8, i915 GuC code uses u32:
> > > > > > 
> > > > > >    #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
> > > > > >    #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
> > > > > >    #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
> > > > > > ...
> > > > > >    struct intel_uc_fw_ver {
> > > > > >            u32 major;
> > > > > >            u32 minor;
> > > > > >            u32 patch;
> > > > > >            u32 build;
> > > > > >    };
> > > > > This is copied from generic code which supports firmwares other than
> > > > > GuC. Only GuC promises to use 8-bit version components. Other
> > > > > firmwares very definitely do not. There is no open.
> > > > Ack.
> > > > 
> > > > > > So we could make the query u8, and refactor the struct intel_uc_fw_ver
> > > > > > to use u8, or not. To avoid any doubts on why are we assigning u32 to
> > > > > > u8 I simply opted to use u64. Which avoids the need to add any padding
> > > > > > too.
> > > > > I don't follow how potential 8 vs 32 confusion means jump to 64?!
> > > > Suggestion was to use u8 in the uapi in order to align with GuC FW ABI
> > > > (or however it's called), in which case there would be:
> > > > 
> > > >     ver.major = guc->submission_version.major;
> > > > 
> > > > which would be:
> > > > 
> > > >     (u8) = (u32)
> > > > 
> > > > And I was anticipating someone not liking that either. Using too wide
> > > > u64 simply avoids the need to add a padding element to the uapi struct.
> > > > 
> > > > If you are positive we need to include a branch number, even though it
> > > > does not seem to be implemented in the code even(*) then I can make
> > > > uapi 4x u32 and achieve the same.
> > > It's not implemented in the code because we've never had to, and it is
> > > yet another train wreck waiting to happen. There are a bunch of issues
> > > at different levels that need to be resolved. But that is all in the
> > > kernel and/or firmware and so can be added by a later kernel update when
> > > necessary. However, if the UMDs are not already taking it into account
> > > or its not even in the UAPI, then we can't back fill in the kernel
> > > later, we are just broken.
> > This sounds to me like a firmware version for internal testing or for pre-production HW, would any branched firmware be released to customers?
> See comments below. Branching is about back porting critical fixes to 
> older releases. I.e. supporting LTS releases. There is absolutely 
> nothing internal only or testing related about branching.
> 
> Just because we haven't had to do so yet doesn't mean we won't need to 
> do so tomorrow.
> 
> John.
> 
> > 
> > > > (*)
> > > > static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32
> > > > css_value)
> > > > {
> > > >      /* Get version numbers from the CSS header */
> > > >      ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
> > > >      ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
> > > >      ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
> > > > }
> > > > 
> > > > No branch field in the CSS header?
> > > I think there is, it's just not officially implemented yet.
> > > 
> > > > And Why is UMD supposed to reject a non-zero branch? Like how would
> > > > 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can
> > > > respin if you definitely confirm.
> > > Because that is backwards. The branch number goes at the front.
> > > 
> > > So, for example (using made up numbers, I don't recall offhand what
> > > versions we have where) say we currently have 0.1.3.0 in tip and 0.1.1.0
> > > in the last LTS. We then need to ship a critical security fix and back
> > > port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't become 0.1.1.1
> > > because that version already exists in the history of tip and does not
> > > contain the fix. So the LTS gets branched to 1.1.0.0. We then have both
> > > branches potentially moving forwards with completely independent versioning.
> > > 
> > > Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel
> > > versioning. You cannot make any assumptions about what might be in
> > > 1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a stack
> > > of security fixes but none of the features, workarounds or bug fixes
> > > that are in 0.1.2.3.
> > > 
> > > Hence, if the branch number changes then all bets are off. You have to
> > > start over and reject anything you do not explicitly know about.
> > > 
> > > This is why we were saying that exposing version numbers to UMDs breaks
> > > down horribly as soon as we have to start branching. There is no clean
> > > or simple way to do this.

Odd versioning, would expect that fixes on LTS would increase patch version.
Anyways so unless UMDs needs to check for a bug fixed in branched release we could just check for major.minor.patch?

Just to make sure I understood it correctly, see my examples below using version format branch.major.minor.patch:

- start
tip 0.1.4.0
LTS 0.1.0.0

- secure fix needed in tip and LTS
tip 0.1.5.0
LTS 1.1.0.0

- bug fix on tip
tip 0.1.6.0
LTS 1.1.0.0

- another secure fix needed in tip and LTS
tip 0.1.7.0
LTS 2.1.0.0

Is this how the version is supposed to work?

So yeah I think KMD needs to provide branch version in the uAPI even for i915.

> > > 
> > > John.
> > > 
> > > 
> > > > Regards,
> > > > 
> > > > Tvrtko
> > > > 
> > > > > > Compile tested only.
> > > > > > 
> > > > > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > > > Cc: Kenneth Graunke <kenneth@whitecape.org>
> > > > > > Cc: Jose Souza <jose.souza@intel.com>
> > > > > > Cc: Sagar Ghuge <sagar.ghuge@intel.com>
> > > > > > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > > > Cc: John Harrison <John.C.Harrison@Intel.com>
> > > > > > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > > > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > > > Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
> > > > > > ---
> > > > > >    drivers/gpu/drm/i915/i915_query.c | 32
> > > > > > +++++++++++++++++++++++++++++++
> > > > > >    include/uapi/drm/i915_drm.h       | 11 +++++++++++
> > > > > >    2 files changed, 43 insertions(+)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_query.c
> > > > > > b/drivers/gpu/drm/i915/i915_query.c
> > > > > > index 00871ef99792..999687f6a3d4 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_query.c
> > > > > > +++ b/drivers/gpu/drm/i915/i915_query.c
> > > > > > @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct
> > > > > > drm_i915_private *i915,
> > > > > >        return hwconfig->size;
> > > > > >    }
> > > > > > +static int
> > > > > > +query_guc_submission_version(struct drm_i915_private *i915,
> > > > > > +                 struct drm_i915_query_item *query)
> > > > > > +{
> > > > > > +    struct drm_i915_query_guc_submission_version __user *query_ptr =
> > > > > > +                        u64_to_user_ptr(query->data_ptr);
> > > > > > +    struct drm_i915_query_guc_submission_version ver;
> > > > > > +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
> > > > > > +    const size_t size = sizeof(ver);
> > > > > > +    int ret;
> > > > > > +
> > > > > > +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
> > > > > > +        return -ENODEV;
> > > > > > +
> > > > > > +    ret = copy_query_item(&ver, size, size, query);
> > > > > > +    if (ret != 0)
> > > > > > +        return ret;
> > > > > > +
> > > > > > +    if (ver.major || ver.minor || ver.patch)
> > > > > > +        return -EINVAL;
> > > > > > +
> > > > > > +    ver.major = guc->submission_version.major;
> > > > > > +    ver.minor = guc->submission_version.minor;
> > > > > > +    ver.patch = guc->submission_version.patch;
> > > > > This needs to include the branch version (currently set to zero) in
> > > > > the definition. And the UMD needs to barf if branch comes back as
> > > > > non-zero. I.e. there is no guarantee that a branched version will
> > > > > have the w/a + fix that they are wanting.
> > > > > 
> > > > > John.
> > > > > 
> > > > > 
> > > > > > +
> > > > > > +    if (copy_to_user(query_ptr, &ver, size))
> > > > > > +        return -EFAULT;
> > > > > > +
> > > > > > +    return 0;
> > > > > > +}
> > > > > > +
> > > > > >    static int (* const i915_query_funcs[])(struct drm_i915_private
> > > > > > *dev_priv,
> > > > > >                        struct drm_i915_query_item *query_item) = {
> > > > > >        query_topology_info,
> > > > > > @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct
> > > > > > drm_i915_private *dev_priv,
> > > > > >        query_memregion_info,
> > > > > >        query_hwconfig_blob,
> > > > > >        query_geometry_subslices,
> > > > > > +    query_guc_submission_version,
> > > > > >    };
> > > > > >    int i915_query_ioctl(struct drm_device *dev, void *data, struct
> > > > > > drm_file *file)
> > > > > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > > > > > index 550c496ce76d..d80d9b5e1eda 100644
> > > > > > --- a/include/uapi/drm/i915_drm.h
> > > > > > +++ b/include/uapi/drm/i915_drm.h
> > > > > > @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
> > > > > >         *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct
> > > > > > drm_i915_query_memory_regions)
> > > > > >         *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob
> > > > > > uAPI`)
> > > > > >         *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct
> > > > > > drm_i915_query_topology_info)
> > > > > > +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct
> > > > > > drm_i915_query_guc_submission_version)
> > > > > >         */
> > > > > >        __u64 query_id;
> > > > > >    #define DRM_I915_QUERY_TOPOLOGY_INFO        1
> > > > > > @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
> > > > > >    #define DRM_I915_QUERY_MEMORY_REGIONS        4
> > > > > >    #define DRM_I915_QUERY_HWCONFIG_BLOB        5
> > > > > >    #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
> > > > > > +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
> > > > > >    /* Must be kept compact -- no holes and well documented */
> > > > > >        /**
> > > > > > @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
> > > > > >        struct drm_i915_memory_region_info regions[];
> > > > > >    };
> > > > > > +/**
> > > > > > +* struct drm_i915_query_guc_submission_version - query GuC
> > > > > > submission interface version
> > > > > > +*/
> > > > > > +struct drm_i915_query_guc_submission_version {
> > > > > > +    __u64 major;
> > > > > > +    __u64 minor;
> > > > > > +    __u64 patch;
> > > > > > +};
> > > > > > +
> > > > > >    /**
> > > > > >     * DOC: GuC HWCONFIG blob uAPI
> > > > > >     *
>
John Harrison Feb. 7, 2024, 9:16 p.m. UTC | #8
On 2/7/2024 12:47, Souza, Jose wrote:
> On Wed, 2024-02-07 at 11:52 -0800, John Harrison wrote:
>> On 2/7/2024 11:43, Souza, Jose wrote:
>>> On Wed, 2024-02-07 at 11:34 -0800, John Harrison wrote:
>>>> On 2/7/2024 10:49, Tvrtko Ursulin wrote:
>>>>> On 07/02/2024 18:12, John Harrison wrote:
>>>>>> On 2/7/2024 03:56, Tvrtko Ursulin wrote:
>>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>>
>>>>>>> Add a new query to the GuC submission interface version.
>>>>>>>
>>>>>>> Mesa intends to use this information to check for old firmware versions
>>>>>>> with a known bug where using the render and compute command streamers
>>>>>>> simultaneously can cause GPU hangs due issues in firmware scheduling.
>>>>>>>
>>>>>>> Based on patches from Vivaik and Joonas.
>>>>>>>
>>>>>>> There is a little bit of an open around the width required for
>>>>>>> versions.
>>>>>>> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>>>>>>>
>>>>>>>     #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>>>>>>>     #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>>>>>>>     #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
>>>>>>> ...
>>>>>>>     struct intel_uc_fw_ver {
>>>>>>>             u32 major;
>>>>>>>             u32 minor;
>>>>>>>             u32 patch;
>>>>>>>             u32 build;
>>>>>>>     };
>>>>>> This is copied from generic code which supports firmwares other than
>>>>>> GuC. Only GuC promises to use 8-bit version components. Other
>>>>>> firmwares very definitely do not. There is no open.
>>>>> Ack.
>>>>>
>>>>>>> So we could make the query u8, and refactor the struct intel_uc_fw_ver
>>>>>>> to use u8, or not. To avoid any doubts on why are we assigning u32 to
>>>>>>> u8 I simply opted to use u64. Which avoids the need to add any padding
>>>>>>> too.
>>>>>> I don't follow how potential 8 vs 32 confusion means jump to 64?!
>>>>> Suggestion was to use u8 in the uapi in order to align with GuC FW ABI
>>>>> (or however it's called), in which case there would be:
>>>>>
>>>>>      ver.major = guc->submission_version.major;
>>>>>
>>>>> which would be:
>>>>>
>>>>>      (u8) = (u32)
>>>>>
>>>>> And I was anticipating someone not liking that either. Using too wide
>>>>> u64 simply avoids the need to add a padding element to the uapi struct.
>>>>>
>>>>> If you are positive we need to include a branch number, even though it
>>>>> does not seem to be implemented in the code even(*) then I can make
>>>>> uapi 4x u32 and achieve the same.
>>>> It's not implemented in the code because we've never had to, and it is
>>>> yet another train wreck waiting to happen. There are a bunch of issues
>>>> at different levels that need to be resolved. But that is all in the
>>>> kernel and/or firmware and so can be added by a later kernel update when
>>>> necessary. However, if the UMDs are not already taking it into account
>>>> or its not even in the UAPI, then we can't back fill in the kernel
>>>> later, we are just broken.
>>> This sounds to me like a firmware version for internal testing or for pre-production HW, would any branched firmware be released to customers?
>> See comments below. Branching is about back porting critical fixes to
>> older releases. I.e. supporting LTS releases. There is absolutely
>> nothing internal only or testing related about branching.
>>
>> Just because we haven't had to do so yet doesn't mean we won't need to
>> do so tomorrow.
>>
>> John.
>>
>>>>> (*)
>>>>> static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32
>>>>> css_value)
>>>>> {
>>>>>       /* Get version numbers from the CSS header */
>>>>>       ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
>>>>>       ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
>>>>>       ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
>>>>> }
>>>>>
>>>>> No branch field in the CSS header?
>>>> I think there is, it's just not officially implemented yet.
>>>>
>>>>> And Why is UMD supposed to reject a non-zero branch? Like how would
>>>>> 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can
>>>>> respin if you definitely confirm.
>>>> Because that is backwards. The branch number goes at the front.
>>>>
>>>> So, for example (using made up numbers, I don't recall offhand what
>>>> versions we have where) say we currently have 0.1.3.0 in tip and 0.1.1.0
>>>> in the last LTS. We then need to ship a critical security fix and back
>>>> port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't become 0.1.1.1
>>>> because that version already exists in the history of tip and does not
>>>> contain the fix. So the LTS gets branched to 1.1.0.0. We then have both
>>>> branches potentially moving forwards with completely independent versioning.
>>>>
>>>> Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel
>>>> versioning. You cannot make any assumptions about what might be in
>>>> 1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a stack
>>>> of security fixes but none of the features, workarounds or bug fixes
>>>> that are in 0.1.2.3.
>>>>
>>>> Hence, if the branch number changes then all bets are off. You have to
>>>> start over and reject anything you do not explicitly know about.
>>>>
>>>> This is why we were saying that exposing version numbers to UMDs breaks
>>>> down horribly as soon as we have to start branching. There is no clean
>>>> or simple way to do this.
> Odd versioning, would expect that fixes on LTS would increase patch version.
You can't. That would create multiple firmware entities with the same 
version number.

E.g. everything is on 0.1.2.3. Tip moves on to 0.1.2.4, 0.1.2.5, 
0.1.3.0, 0.1.4.0 but LTS stays on 0.1.2.3. We then find a critical 
security issue that must be fixed. So tip moves on to 0.1.4.1 but LTS 
cannot move to 0.1.2.4 because that version already exists out in user's 
systems and does not have the fix. But LTS also can't move to 0.1.4.1 
because that includes a whole bunch of new features that we are not 
allowed to push to the LTS branch. So we need to branch and create 
1.1.0.0 which is 0.1.2.3 + security fix. LTS is now on 1.1.0.0 and tip 
is on 0.1.4.1. Both branches can continue incrementing as more patches / 
updates are added to each as appropriate. But the version numbers are 
now totally diverged and have no meaning when compared to each other.

> Anyways so unless UMDs needs to check for a bug fixed in branched release we could just check for major.minor.patch?
As above, you have to check for the branch. Just because m.m.p has a 
higher number than you desire doesn't mean your fix/feature is present 
because that m.m.p might be on a different branch.

>
> Just to make sure I understood it correctly, see my examples below using version format branch.major.minor.patch:
>
> - start
> tip 0.1.4.0
> LTS 0.1.0.0
>
> - secure fix needed in tip and LTS
> tip 0.1.5.0
> LTS 1.1.0.0
>
> - bug fix on tip
> tip 0.1.6.0
> LTS 1.1.0.0
>
> - another secure fix needed in tip and LTS
> tip 0.1.7.0
> LTS 2.1.0.0
>
> Is this how the version is supposed to work?
More or less. Bug fixes are generally only patch level rather than minor 
level bumps but you could potentially have a security fix that requires 
a new interface addition as well.

And simply adding a new bug fix patch to the LTS does not necessarily 
require a branch bump. Assuming that you are talking about the same LTS 
Linux release, the second fix would go in to the same LTS firmware 
branch. I.e., as 1.1.0.1 not 2.1.0.0.

>
> So yeah I think KMD needs to provide branch version in the uAPI even for i915.
And the UMD needs to check it and reject any branch it does not 
explicitly recognise.

John.

>
>>>> John.
>>>>
>>>>
>>>>> Regards,
>>>>>
>>>>> Tvrtko
>>>>>
>>>>>>> Compile tested only.
>>>>>>>
>>>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>> Cc: Kenneth Graunke <kenneth@whitecape.org>
>>>>>>> Cc: Jose Souza <jose.souza@intel.com>
>>>>>>> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
>>>>>>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>>>>>> Cc: John Harrison <John.C.Harrison@Intel.com>
>>>>>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>>>>> Cc: Jani Nikula <jani.nikula@intel.com>
>>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/i915/i915_query.c | 32
>>>>>>> +++++++++++++++++++++++++++++++
>>>>>>>     include/uapi/drm/i915_drm.h       | 11 +++++++++++
>>>>>>>     2 files changed, 43 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_query.c
>>>>>>> b/drivers/gpu/drm/i915/i915_query.c
>>>>>>> index 00871ef99792..999687f6a3d4 100644
>>>>>>> --- a/drivers/gpu/drm/i915/i915_query.c
>>>>>>> +++ b/drivers/gpu/drm/i915/i915_query.c
>>>>>>> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct
>>>>>>> drm_i915_private *i915,
>>>>>>>         return hwconfig->size;
>>>>>>>     }
>>>>>>> +static int
>>>>>>> +query_guc_submission_version(struct drm_i915_private *i915,
>>>>>>> +                 struct drm_i915_query_item *query)
>>>>>>> +{
>>>>>>> +    struct drm_i915_query_guc_submission_version __user *query_ptr =
>>>>>>> +                        u64_to_user_ptr(query->data_ptr);
>>>>>>> +    struct drm_i915_query_guc_submission_version ver;
>>>>>>> +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
>>>>>>> +    const size_t size = sizeof(ver);
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
>>>>>>> +        return -ENODEV;
>>>>>>> +
>>>>>>> +    ret = copy_query_item(&ver, size, size, query);
>>>>>>> +    if (ret != 0)
>>>>>>> +        return ret;
>>>>>>> +
>>>>>>> +    if (ver.major || ver.minor || ver.patch)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    ver.major = guc->submission_version.major;
>>>>>>> +    ver.minor = guc->submission_version.minor;
>>>>>>> +    ver.patch = guc->submission_version.patch;
>>>>>> This needs to include the branch version (currently set to zero) in
>>>>>> the definition. And the UMD needs to barf if branch comes back as
>>>>>> non-zero. I.e. there is no guarantee that a branched version will
>>>>>> have the w/a + fix that they are wanting.
>>>>>>
>>>>>> John.
>>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +    if (copy_to_user(query_ptr, &ver, size))
>>>>>>> +        return -EFAULT;
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>     static int (* const i915_query_funcs[])(struct drm_i915_private
>>>>>>> *dev_priv,
>>>>>>>                         struct drm_i915_query_item *query_item) = {
>>>>>>>         query_topology_info,
>>>>>>> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct
>>>>>>> drm_i915_private *dev_priv,
>>>>>>>         query_memregion_info,
>>>>>>>         query_hwconfig_blob,
>>>>>>>         query_geometry_subslices,
>>>>>>> +    query_guc_submission_version,
>>>>>>>     };
>>>>>>>     int i915_query_ioctl(struct drm_device *dev, void *data, struct
>>>>>>> drm_file *file)
>>>>>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>>>> index 550c496ce76d..d80d9b5e1eda 100644
>>>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>>>> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>>>>>>>          *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct
>>>>>>> drm_i915_query_memory_regions)
>>>>>>>          *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob
>>>>>>> uAPI`)
>>>>>>>          *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct
>>>>>>> drm_i915_query_topology_info)
>>>>>>> +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct
>>>>>>> drm_i915_query_guc_submission_version)
>>>>>>>          */
>>>>>>>         __u64 query_id;
>>>>>>>     #define DRM_I915_QUERY_TOPOLOGY_INFO        1
>>>>>>> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>>>>>>>     #define DRM_I915_QUERY_MEMORY_REGIONS        4
>>>>>>>     #define DRM_I915_QUERY_HWCONFIG_BLOB        5
>>>>>>>     #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
>>>>>>> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
>>>>>>>     /* Must be kept compact -- no holes and well documented */
>>>>>>>         /**
>>>>>>> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>>>>>>>         struct drm_i915_memory_region_info regions[];
>>>>>>>     };
>>>>>>> +/**
>>>>>>> +* struct drm_i915_query_guc_submission_version - query GuC
>>>>>>> submission interface version
>>>>>>> +*/
>>>>>>> +struct drm_i915_query_guc_submission_version {
>>>>>>> +    __u64 major;
>>>>>>> +    __u64 minor;
>>>>>>> +    __u64 patch;
>>>>>>> +};
>>>>>>> +
>>>>>>>     /**
>>>>>>>      * DOC: GuC HWCONFIG blob uAPI
>>>>>>>      *
Tvrtko Ursulin Feb. 8, 2024, 8:41 a.m. UTC | #9
On 07/02/2024 19:34, John Harrison wrote:
> On 2/7/2024 10:49, Tvrtko Ursulin wrote:
>> On 07/02/2024 18:12, John Harrison wrote:
>>> On 2/7/2024 03:56, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Add a new query to the GuC submission interface version.
>>>>
>>>> Mesa intends to use this information to check for old firmware versions
>>>> with a known bug where using the render and compute command streamers
>>>> simultaneously can cause GPU hangs due issues in firmware scheduling.
>>>>
>>>> Based on patches from Vivaik and Joonas.
>>>>
>>>> There is a little bit of an open around the width required for 
>>>> versions.
>>>> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>>>>
>>>>   #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>>>>   #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>>>>   #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
>>>> ...
>>>>   struct intel_uc_fw_ver {
>>>>           u32 major;
>>>>           u32 minor;
>>>>           u32 patch;
>>>>           u32 build;
>>>>   };
>>> This is copied from generic code which supports firmwares other than 
>>> GuC. Only GuC promises to use 8-bit version components. Other 
>>> firmwares very definitely do not. There is no open.
>>
>> Ack.
>>
>>>>
>>>> So we could make the query u8, and refactor the struct intel_uc_fw_ver
>>>> to use u8, or not. To avoid any doubts on why are we assigning u32 to
>>>> u8 I simply opted to use u64. Which avoids the need to add any padding
>>>> too.
>>> I don't follow how potential 8 vs 32 confusion means jump to 64?!
>>
>> Suggestion was to use u8 in the uapi in order to align with GuC FW ABI 
>> (or however it's called), in which case there would be:
>>
>>    ver.major = guc->submission_version.major;
>>
>> which would be:
>>
>>    (u8) = (u32)
>>
>> And I was anticipating someone not liking that either. Using too wide 
>> u64 simply avoids the need to add a padding element to the uapi struct.
>>
>> If you are positive we need to include a branch number, even though it 
>> does not seem to be implemented in the code even(*) then I can make 
>> uapi 4x u32 and achieve the same.
> It's not implemented in the code because we've never had to, and it is 
> yet another train wreck waiting to happen. There are a bunch of issues 
> at different levels that need to be resolved. But that is all in the 
> kernel and/or firmware and so can be added by a later kernel update when 
> necessary. However, if the UMDs are not already taking it into account 
> or its not even in the UAPI, then we can't back fill in the kernel 
> later, we are just broken.
> 
>>
>> (*)
>> static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 
>> css_value)
>> {
>>     /* Get version numbers from the CSS header */
>>     ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
>>     ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
>>     ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
>> }
>>
>> No branch field in the CSS header?
> I think there is, it's just not officially implemented yet.
> 
>>
>> And Why is UMD supposed to reject a non-zero branch? Like how would 
>> 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I can 
>> respin if you definitely confirm.
> Because that is backwards. The branch number goes at the front.
> 
> So, for example (using made up numbers, I don't recall offhand what 
> versions we have where) say we currently have 0.1.3.0 in tip and 0.1.1.0 
> in the last LTS. We then need to ship a critical security fix and back 
> port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't become 0.1.1.1 
> because that version already exists in the history of tip and does not 
> contain the fix. So the LTS gets branched to 1.1.0.0. We then have both 
> branches potentially moving forwards with completely independent 
> versioning.
> 
> Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel 
> versioning. You cannot make any assumptions about what might be in 
> 1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a stack 
> of security fixes but none of the features, workarounds or bug fixes 
> that are in 0.1.2.3.
> 
> Hence, if the branch number changes then all bets are off. You have to 
> start over and reject anything you do not explicitly know about.
> 
> This is why we were saying that exposing version numbers to UMDs breaks 
> down horribly as soon as we have to start branching. There is no clean 
> or simple way to do this.

Right, thank you, I know we talked about the challenges with version 
numbers in the past and fully agreed. I just did not think to idea is to 
conceptually put the branch number first.

(It is called build btw in the i915 struct if that needs cleanup at some 
point. Or maybe name depends on the firmware type.)

But as the plan to piggy back on the existing semaphore capability flag 
has failed and i915 definitely does not want to keep a database of 
version branches to bugs fixes, and Mesa is immovable that they cannot 
ship without something, agreement was to let them have it that 
something. At least from the pretend level one can say it makes sense to 
expose the version and don't care what people are doing with it. Even if 
in practice it potentially never manages to see any old firmware.

Hmmm although now I can imagine that if the policy needs to be reject 
unknown branch numbers, it will make a "nice" maintenance burden to Mesa 
to ship updates synchronized with the firmware releases, in order to 
keep the feature active i.e. not inadvertently revert to disabled when 
say 0.1.1.3 goes to 1.1.1.3 (or 1.1.1.4) to unrelated fix getting added.

Jose could you double check that angle please? Will it just work or will 
it need constant Mesa updates in this situation otherwise we are back at 
square one - silently not exposing maximum performance.

Anyways, I've sent v2 with the branch included and fields shrunk to u32 
since as mentioned before four fields now align to quad-word nicely.

Regards,

Tvrtko


> 
> John.
> 
> 
>>
>> Regards,
>>
>> Tvrtko
>>
>>>>
>>>> Compile tested only.
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Cc: Kenneth Graunke <kenneth@whitecape.org>
>>>> Cc: Jose Souza <jose.souza@intel.com>
>>>> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
>>>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>>> Cc: John Harrison <John.C.Harrison@Intel.com>
>>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>> Cc: Jani Nikula <jani.nikula@intel.com>
>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_query.c | 32 
>>>> +++++++++++++++++++++++++++++++
>>>>   include/uapi/drm/i915_drm.h       | 11 +++++++++++
>>>>   2 files changed, 43 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>>>> b/drivers/gpu/drm/i915/i915_query.c
>>>> index 00871ef99792..999687f6a3d4 100644
>>>> --- a/drivers/gpu/drm/i915/i915_query.c
>>>> +++ b/drivers/gpu/drm/i915/i915_query.c
>>>> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct 
>>>> drm_i915_private *i915,
>>>>       return hwconfig->size;
>>>>   }
>>>> +static int
>>>> +query_guc_submission_version(struct drm_i915_private *i915,
>>>> +                 struct drm_i915_query_item *query)
>>>> +{
>>>> +    struct drm_i915_query_guc_submission_version __user *query_ptr =
>>>> +                        u64_to_user_ptr(query->data_ptr);
>>>> +    struct drm_i915_query_guc_submission_version ver;
>>>> +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
>>>> +    const size_t size = sizeof(ver);
>>>> +    int ret;
>>>> +
>>>> +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
>>>> +        return -ENODEV;
>>>> +
>>>> +    ret = copy_query_item(&ver, size, size, query);
>>>> +    if (ret != 0)
>>>> +        return ret;
>>>> +
>>>> +    if (ver.major || ver.minor || ver.patch)
>>>> +        return -EINVAL;
>>>> +
>>>> +    ver.major = guc->submission_version.major;
>>>> +    ver.minor = guc->submission_version.minor;
>>>> +    ver.patch = guc->submission_version.patch;
>>> This needs to include the branch version (currently set to zero) in 
>>> the definition. And the UMD needs to barf if branch comes back as 
>>> non-zero. I.e. there is no guarantee that a branched version will 
>>> have the w/a + fix that they are wanting.
>>>
>>> John.
>>>
>>>
>>>> +
>>>> +    if (copy_to_user(query_ptr, &ver, size))
>>>> +        return -EFAULT;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static int (* const i915_query_funcs[])(struct drm_i915_private 
>>>> *dev_priv,
>>>>                       struct drm_i915_query_item *query_item) = {
>>>>       query_topology_info,
>>>> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct 
>>>> drm_i915_private *dev_priv,
>>>>       query_memregion_info,
>>>>       query_hwconfig_blob,
>>>>       query_geometry_subslices,
>>>> +    query_guc_submission_version,
>>>>   };
>>>>   int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>>>> drm_file *file)
>>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>> index 550c496ce76d..d80d9b5e1eda 100644
>>>> --- a/include/uapi/drm/i915_drm.h
>>>> +++ b/include/uapi/drm/i915_drm.h
>>>> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>>>>        *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct 
>>>> drm_i915_query_memory_regions)
>>>>        *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob 
>>>> uAPI`)
>>>>        *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct 
>>>> drm_i915_query_topology_info)
>>>> +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct 
>>>> drm_i915_query_guc_submission_version)
>>>>        */
>>>>       __u64 query_id;
>>>>   #define DRM_I915_QUERY_TOPOLOGY_INFO        1
>>>> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>>>>   #define DRM_I915_QUERY_MEMORY_REGIONS        4
>>>>   #define DRM_I915_QUERY_HWCONFIG_BLOB        5
>>>>   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
>>>> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
>>>>   /* Must be kept compact -- no holes and well documented */
>>>>       /**
>>>> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>>>>       struct drm_i915_memory_region_info regions[];
>>>>   };
>>>> +/**
>>>> +* struct drm_i915_query_guc_submission_version - query GuC 
>>>> submission interface version
>>>> +*/
>>>> +struct drm_i915_query_guc_submission_version {
>>>> +    __u64 major;
>>>> +    __u64 minor;
>>>> +    __u64 patch;
>>>> +};
>>>> +
>>>>   /**
>>>>    * DOC: GuC HWCONFIG blob uAPI
>>>>    *
>>>
>
John Harrison Feb. 8, 2024, 5:46 p.m. UTC | #10
On 2/8/2024 00:41, Tvrtko Ursulin wrote:
> On 07/02/2024 19:34, John Harrison wrote:
>> On 2/7/2024 10:49, Tvrtko Ursulin wrote:
>>> On 07/02/2024 18:12, John Harrison wrote:
>>>> On 2/7/2024 03:56, Tvrtko Ursulin wrote:
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> Add a new query to the GuC submission interface version.
>>>>>
>>>>> Mesa intends to use this information to check for old firmware 
>>>>> versions
>>>>> with a known bug where using the render and compute command streamers
>>>>> simultaneously can cause GPU hangs due issues in firmware scheduling.
>>>>>
>>>>> Based on patches from Vivaik and Joonas.
>>>>>
>>>>> There is a little bit of an open around the width required for 
>>>>> versions.
>>>>> While the GuC FW iface tells they are u8, i915 GuC code uses u32:
>>>>>
>>>>>   #define CSS_SW_VERSION_UC_MAJOR               (0xFF << 16)
>>>>>   #define CSS_SW_VERSION_UC_MINOR               (0xFF << 8)
>>>>>   #define CSS_SW_VERSION_UC_PATCH               (0xFF << 0)
>>>>> ...
>>>>>   struct intel_uc_fw_ver {
>>>>>           u32 major;
>>>>>           u32 minor;
>>>>>           u32 patch;
>>>>>           u32 build;
>>>>>   };
>>>> This is copied from generic code which supports firmwares other 
>>>> than GuC. Only GuC promises to use 8-bit version components. Other 
>>>> firmwares very definitely do not. There is no open.
>>>
>>> Ack.
>>>
>>>>>
>>>>> So we could make the query u8, and refactor the struct 
>>>>> intel_uc_fw_ver
>>>>> to use u8, or not. To avoid any doubts on why are we assigning u32 to
>>>>> u8 I simply opted to use u64. Which avoids the need to add any 
>>>>> padding
>>>>> too.
>>>> I don't follow how potential 8 vs 32 confusion means jump to 64?!
>>>
>>> Suggestion was to use u8 in the uapi in order to align with GuC FW 
>>> ABI (or however it's called), in which case there would be:
>>>
>>>    ver.major = guc->submission_version.major;
>>>
>>> which would be:
>>>
>>>    (u8) = (u32)
>>>
>>> And I was anticipating someone not liking that either. Using too 
>>> wide u64 simply avoids the need to add a padding element to the uapi 
>>> struct.
>>>
>>> If you are positive we need to include a branch number, even though 
>>> it does not seem to be implemented in the code even(*) then I can 
>>> make uapi 4x u32 and achieve the same.
>> It's not implemented in the code because we've never had to, and it 
>> is yet another train wreck waiting to happen. There are a bunch of 
>> issues at different levels that need to be resolved. But that is all 
>> in the kernel and/or firmware and so can be added by a later kernel 
>> update when necessary. However, if the UMDs are not already taking it 
>> into account or its not even in the UAPI, then we can't back fill in 
>> the kernel later, we are just broken.
>>
>>>
>>> (*)
>>> static void uc_unpack_css_version(struct intel_uc_fw_ver *ver, u32 
>>> css_value)
>>> {
>>>     /* Get version numbers from the CSS header */
>>>     ver->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css_value);
>>>     ver->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css_value);
>>>     ver->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css_value);
>>> }
>>>
>>> No branch field in the CSS header?
>> I think there is, it's just not officially implemented yet.
>>
>>>
>>> And Why is UMD supposed to reject a non-zero branch? Like how would 
>>> 1.1.3.0 be fine and 1.1.3.1 be bad? I don't get it. But anyway, I 
>>> can respin if you definitely confirm.
>> Because that is backwards. The branch number goes at the front.
>>
>> So, for example (using made up numbers, I don't recall offhand what 
>> versions we have where) say we currently have 0.1.3.0 in tip and 
>> 0.1.1.0 in the last LTS. We then need to ship a critical security fix 
>> and back port it to the LTS. Tip becomes 0.1.3.1 but the LTS can't 
>> become 0.1.1.1 because that version already exists in the history of 
>> tip and does not contain the fix. So the LTS gets branched to 
>> 1.1.0.0. We then have both branches potentially moving forwards with 
>> completely independent versioning.
>>
>> Exactly the same as 5.8.x, 5.9,y, 6.0.z, etc in the Linux kernel 
>> versioning. You cannot make any assumptions about what might be in 
>> 1.4.5.6 compared to 0.1.2.3. 1.4.5.6 could actually 0.1.0.3 with a 
>> stack of security fixes but none of the features, workarounds or bug 
>> fixes that are in 0.1.2.3.
>>
>> Hence, if the branch number changes then all bets are off. You have 
>> to start over and reject anything you do not explicitly know about.
>>
>> This is why we were saying that exposing version numbers to UMDs 
>> breaks down horribly as soon as we have to start branching. There is 
>> no clean or simple way to do this.
>
> Right, thank you, I know we talked about the challenges with version 
> numbers in the past and fully agreed. I just did not think to idea is 
> to conceptually put the branch number first.
>
> (It is called build btw in the i915 struct if that needs cleanup at 
> some point. Or maybe name depends on the firmware type.)
That's different. Some of the firmware files we have do have a build 
number. As I said before, branching isn't really implemented yet because 
we've never had to use for real. And generally, we don't spend time 
implementing stuff in the KMD that isn't being used. But we definitely 
need to make sure it is present in any relevant UAPIs so that if/when we 
do need to start using it, we can.

>
> But as the plan to piggy back on the existing semaphore capability 
> flag has failed and i915 definitely does not want to keep a database 
> of version branches to bugs fixes, and Mesa is immovable that they 
> cannot ship without something, agreement was to let them have it that 
> something. At least from the pretend level one can say it makes sense 
> to expose the version and don't care what people are doing with it. 
> Even if in practice it potentially never manages to see any old firmware.
>
> Hmmm although now I can imagine that if the policy needs to be reject 
> unknown branch numbers, it will make a "nice" maintenance burden to 
> Mesa to ship updates synchronized with the firmware releases, in order 
> to keep the feature active i.e. not inadvertently revert to disabled 
> when say 0.1.1.3 goes to 1.1.1.3 (or 1.1.1.4) to unrelated fix getting 
> added.
>
> Jose could you double check that angle please? Will it just work or 
> will it need constant Mesa updates in this situation otherwise we are 
> back at square one - silently not exposing maximum performance.
>
I believe that any UMD using this API will have to manually update 
whenever a new branch is released. There isn't really any way around 
that because we can't know in advance what random point in history will 
need to be the baseline for some new branch. All we can do is hope that 
we never actually need to do a branch! Or we have some other baseline 
mechanism that the UMDs can use to say 'if this other version is greater 
than X then it guarantees the GuC must be new enough'. E.g. check the 
LTS version and say that LTS X was release after GuC version Y therefore 
any branch baseline must be at least Y. But that is still a future 
update to be made when the next LTS is settled.

And of course, the hope is that we never have to create a GuC branch. 
Creating a branch means we've had some horrific issue with an LTS and a 
whole bunch of teams are having to jump through burning hoops to get 
everything in place to support the fix!

John.

> Anyways, I've sent v2 with the branch included and fields shrunk to 
> u32 since as mentioned before four fields now align to quad-word nicely.
>
> Regards,
>
> Tvrtko
>
>
>>
>> John.
>>
>>
>>>
>>> Regards,
>>>
>>> Tvrtko
>>>
>>>>>
>>>>> Compile tested only.
>>>>>
>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Kenneth Graunke <kenneth@whitecape.org>
>>>>> Cc: Jose Souza <jose.souza@intel.com>
>>>>> Cc: Sagar Ghuge <sagar.ghuge@intel.com>
>>>>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>>>> Cc: John Harrison <John.C.Harrison@Intel.com>
>>>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>>> Cc: Jani Nikula <jani.nikula@intel.com>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/i915_query.c | 32 
>>>>> +++++++++++++++++++++++++++++++
>>>>>   include/uapi/drm/i915_drm.h       | 11 +++++++++++
>>>>>   2 files changed, 43 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_query.c 
>>>>> b/drivers/gpu/drm/i915/i915_query.c
>>>>> index 00871ef99792..999687f6a3d4 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_query.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_query.c
>>>>> @@ -551,6 +551,37 @@ static int query_hwconfig_blob(struct 
>>>>> drm_i915_private *i915,
>>>>>       return hwconfig->size;
>>>>>   }
>>>>> +static int
>>>>> +query_guc_submission_version(struct drm_i915_private *i915,
>>>>> +                 struct drm_i915_query_item *query)
>>>>> +{
>>>>> +    struct drm_i915_query_guc_submission_version __user *query_ptr =
>>>>> + u64_to_user_ptr(query->data_ptr);
>>>>> +    struct drm_i915_query_guc_submission_version ver;
>>>>> +    struct intel_guc *guc = &to_gt(i915)->uc.guc;
>>>>> +    const size_t size = sizeof(ver);
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    ret = copy_query_item(&ver, size, size, query);
>>>>> +    if (ret != 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    if (ver.major || ver.minor || ver.patch)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ver.major = guc->submission_version.major;
>>>>> +    ver.minor = guc->submission_version.minor;
>>>>> +    ver.patch = guc->submission_version.patch;
>>>> This needs to include the branch version (currently set to zero) in 
>>>> the definition. And the UMD needs to barf if branch comes back as 
>>>> non-zero. I.e. there is no guarantee that a branched version will 
>>>> have the w/a + fix that they are wanting.
>>>>
>>>> John.
>>>>
>>>>
>>>>> +
>>>>> +    if (copy_to_user(query_ptr, &ver, size))
>>>>> +        return -EFAULT;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>   static int (* const i915_query_funcs[])(struct drm_i915_private 
>>>>> *dev_priv,
>>>>>                       struct drm_i915_query_item *query_item) = {
>>>>>       query_topology_info,
>>>>> @@ -559,6 +590,7 @@ static int (* const i915_query_funcs[])(struct 
>>>>> drm_i915_private *dev_priv,
>>>>>       query_memregion_info,
>>>>>       query_hwconfig_blob,
>>>>>       query_geometry_subslices,
>>>>> +    query_guc_submission_version,
>>>>>   };
>>>>>   int i915_query_ioctl(struct drm_device *dev, void *data, struct 
>>>>> drm_file *file)
>>>>> diff --git a/include/uapi/drm/i915_drm.h 
>>>>> b/include/uapi/drm/i915_drm.h
>>>>> index 550c496ce76d..d80d9b5e1eda 100644
>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>> @@ -3038,6 +3038,7 @@ struct drm_i915_query_item {
>>>>>        *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct 
>>>>> drm_i915_query_memory_regions)
>>>>>        *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob 
>>>>> uAPI`)
>>>>>        *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct 
>>>>> drm_i915_query_topology_info)
>>>>> +     *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct 
>>>>> drm_i915_query_guc_submission_version)
>>>>>        */
>>>>>       __u64 query_id;
>>>>>   #define DRM_I915_QUERY_TOPOLOGY_INFO        1
>>>>> @@ -3046,6 +3047,7 @@ struct drm_i915_query_item {
>>>>>   #define DRM_I915_QUERY_MEMORY_REGIONS        4
>>>>>   #define DRM_I915_QUERY_HWCONFIG_BLOB        5
>>>>>   #define DRM_I915_QUERY_GEOMETRY_SUBSLICES    6
>>>>> +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION    7
>>>>>   /* Must be kept compact -- no holes and well documented */
>>>>>       /**
>>>>> @@ -3591,6 +3593,15 @@ struct drm_i915_query_memory_regions {
>>>>>       struct drm_i915_memory_region_info regions[];
>>>>>   };
>>>>> +/**
>>>>> +* struct drm_i915_query_guc_submission_version - query GuC 
>>>>> submission interface version
>>>>> +*/
>>>>> +struct drm_i915_query_guc_submission_version {
>>>>> +    __u64 major;
>>>>> +    __u64 minor;
>>>>> +    __u64 patch;
>>>>> +};
>>>>> +
>>>>>   /**
>>>>>    * DOC: GuC HWCONFIG blob uAPI
>>>>>    *
>>>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 00871ef99792..999687f6a3d4 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -551,6 +551,37 @@  static int query_hwconfig_blob(struct drm_i915_private *i915,
 	return hwconfig->size;
 }
 
+static int
+query_guc_submission_version(struct drm_i915_private *i915,
+			     struct drm_i915_query_item *query)
+{
+	struct drm_i915_query_guc_submission_version __user *query_ptr =
+					    u64_to_user_ptr(query->data_ptr);
+	struct drm_i915_query_guc_submission_version ver;
+	struct intel_guc *guc = &to_gt(i915)->uc.guc;
+	const size_t size = sizeof(ver);
+	int ret;
+
+	if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
+		return -ENODEV;
+
+	ret = copy_query_item(&ver, size, size, query);
+	if (ret != 0)
+		return ret;
+
+	if (ver.major || ver.minor || ver.patch)
+		return -EINVAL;
+
+	ver.major = guc->submission_version.major;
+	ver.minor = guc->submission_version.minor;
+	ver.patch = guc->submission_version.patch;
+
+	if (copy_to_user(query_ptr, &ver, size))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
 	query_topology_info,
@@ -559,6 +590,7 @@  static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 	query_memregion_info,
 	query_hwconfig_blob,
 	query_geometry_subslices,
+	query_guc_submission_version,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 550c496ce76d..d80d9b5e1eda 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3038,6 +3038,7 @@  struct drm_i915_query_item {
 	 *  - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
 	 *  - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
 	 *  - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+	 *  - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
 	 */
 	__u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO		1
@@ -3046,6 +3047,7 @@  struct drm_i915_query_item {
 #define DRM_I915_QUERY_MEMORY_REGIONS		4
 #define DRM_I915_QUERY_HWCONFIG_BLOB		5
 #define DRM_I915_QUERY_GEOMETRY_SUBSLICES	6
+#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION	7
 /* Must be kept compact -- no holes and well documented */
 
 	/**
@@ -3591,6 +3593,15 @@  struct drm_i915_query_memory_regions {
 	struct drm_i915_memory_region_info regions[];
 };
 
+/**
+* struct drm_i915_query_guc_submission_version - query GuC submission interface version
+*/
+struct drm_i915_query_guc_submission_version {
+	__u64 major;
+	__u64 minor;
+	__u64 patch;
+};
+
 /**
  * DOC: GuC HWCONFIG blob uAPI
  *