diff mbox series

[v5,1/4] drm/i915/guc: Add fetch of hwconfig table

Message ID 20220222103640.1006006-2-jordan.l.justen@intel.com (mailing list archive)
State New, archived
Headers show
Series GuC HWCONFIG with documentation | expand

Commit Message

Jordan Justen Feb. 22, 2022, 10:36 a.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

Implement support for fetching the hardware description table from the
GuC. The call is made twice - once without a destination buffer to
query the size and then a second time to fill in the buffer.

Note that the table is only available on ADL-P and later platforms.

v5 (of Jordan's posting):
 * Various changes made by Jordan and recommended by Michal
   - Makefile ordering
   - Adjust "struct intel_guc_hwconfig hwconfig" comment
   - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
   - Drop inline from hwconfig_to_guc()
   - Replace hwconfig param with guc in __guc_action_get_hwconfig()
   - Move zero size check into guc_hwconfig_discover_size()
   - Change comment to say zero size offset/size is needed to get size
   - Add has_guc_hwconfig to devinfo and drop has_table()
   - Change drm_err to notice in __uc_init_hw() and use %pe

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
 .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   3 +
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 145 ++++++++++++++++++
 .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 +++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c         |   7 +
 drivers/gpu/drm/i915/i915_pci.c               |   1 +
 drivers/gpu/drm/i915/intel_device_info.h      |   1 +
 9 files changed, 182 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
 create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h

Comments

Michal Wajdeczko Feb. 24, 2022, 1:58 p.m. UTC | #1
On 22.02.2022 11:36, Jordan Justen wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> Implement support for fetching the hardware description table from the
> GuC. The call is made twice - once without a destination buffer to
> query the size and then a second time to fill in the buffer.
> 
> Note that the table is only available on ADL-P and later platforms.
> 
> v5 (of Jordan's posting):
>  * Various changes made by Jordan and recommended by Michal
>    - Makefile ordering
>    - Adjust "struct intel_guc_hwconfig hwconfig" comment
>    - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
>    - Drop inline from hwconfig_to_guc()
>    - Replace hwconfig param with guc in __guc_action_get_hwconfig()
>    - Move zero size check into guc_hwconfig_discover_size()
>    - Change comment to say zero size offset/size is needed to get size
>    - Add has_guc_hwconfig to devinfo and drop has_table()
>    - Change drm_err to notice in __uc_init_hw() and use %pe
> 
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
>  .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
>  drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   3 +
>  .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 145 ++++++++++++++++++
>  .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 +++
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c         |   7 +
>  drivers/gpu/drm/i915/i915_pci.c               |   1 +
>  drivers/gpu/drm/i915/intel_device_info.h      |   1 +
>  9 files changed, 182 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
>  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index e9ce09620eb5..661f1afb51d7 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -188,6 +188,7 @@ i915-y += gt/uc/intel_uc.o \
>  	  gt/uc/intel_guc_ct.o \
>  	  gt/uc/intel_guc_debugfs.o \
>  	  gt/uc/intel_guc_fw.o \
> +	  gt/uc/intel_guc_hwconfig.o \
>  	  gt/uc/intel_guc_log.o \
>  	  gt/uc/intel_guc_log_debugfs.o \
>  	  gt/uc/intel_guc_rc.o \
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> index fe5d7d261797..4a61c819f32b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> @@ -137,6 +137,7 @@ enum intel_guc_action {
>  	INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
>  	INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
>  	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
> +	INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
>  	INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
>  	INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
>  	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
> index 488b6061ee89..f9e2a6aaef4a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
> @@ -8,6 +8,10 @@
>  
>  enum intel_guc_response_status {
>  	INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
> +	INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
> +	INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
> +	INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
> +	INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
>  	INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
>  };
>  
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index f9240d4baa69..2058eb8c3d0c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -13,6 +13,7 @@
>  #include "intel_guc_fw.h"
>  #include "intel_guc_fwif.h"
>  #include "intel_guc_ct.h"
> +#include "intel_guc_hwconfig.h"
>  #include "intel_guc_log.h"
>  #include "intel_guc_reg.h"
>  #include "intel_guc_slpc_types.h"
> @@ -37,6 +38,8 @@ struct intel_guc {
>  	struct intel_guc_ct ct;
>  	/** @slpc: sub-structure containing SLPC related data and objects */
>  	struct intel_guc_slpc slpc;
> +	/** @hwconfig: data related to hardware configuration KLV blob */
> +	struct intel_guc_hwconfig hwconfig;
>  
>  	/** @sched_engine: Global engine used to submit requests to GuC */
>  	struct i915_sched_engine *sched_engine;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> new file mode 100644
> index 000000000000..ad289603460c
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> @@ -0,0 +1,145 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#include "gt/intel_gt.h"
> +#include "i915_drv.h"
> +#include "i915_memcpy.h"
> +#include "intel_guc_hwconfig.h"
> +
> +static struct intel_guc *hwconfig_to_guc(struct intel_guc_hwconfig *hwconfig)
> +{
> +	return container_of(hwconfig, struct intel_guc, hwconfig);
> +}
> +
> +/*
> + * GuC has a blob containing hardware configuration information (HWConfig).
> + * This is formatted as a simple and flexible KLV (Key/Length/Value) table.
> + *
> + * For example, a minimal version could be:
> + *   enum device_attr {
> + *     ATTR_SOME_VALUE = 0,
> + *     ATTR_SOME_MASK  = 1,
> + *   };
> + *
> + *   static const u32 hwconfig[] = {
> + *     ATTR_SOME_VALUE,
> + *     1,		// Value Length in DWords
> + *     8,		// Value
> + *
> + *     ATTR_SOME_MASK,
> + *     3,
> + *     0x00FFFFFFFF, 0xFFFFFFFF, 0xFF000000,
> + *   };
> + *
> + * The attribute ids are defined in a hardware spec.
> + */
> +
> +static int __guc_action_get_hwconfig(struct intel_guc *guc,
> +				     u32 ggtt_offset, u32 ggtt_size)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_GET_HWCONFIG,
> +		ggtt_offset,
> +		0, /* upper 32 bits of address */

nit: to avoid comments we can use

	lower_32_bits(ggtt_offset),
	upper_32_bits(ggtt_offset),

> +		ggtt_size,
> +	};
> +	int ret;
> +
> +	ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
> +	if (ret == -ENXIO)
> +		return -ENOENT;
> +
> +	return ret;
> +}
> +
> +static int guc_hwconfig_discover_size(struct intel_guc_hwconfig *hwconfig)
> +{
> +	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
> +	int ret;
> +
> +	/* Sending a query with zero offset and size will return the
> +	 * size of the blob.
> +	 */

nit: wrong format of multi line comment

> +	ret = __guc_action_get_hwconfig(guc, 0, 0);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (ret == 0)
> +		return -EINVAL;
> +
> +	hwconfig->size = ret;
> +	return 0;
> +}
> +
> +static int guc_hwconfig_fill_buffer(struct intel_guc_hwconfig *hwconfig)
> +{
> +	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
> +	struct i915_vma *vma;
> +	u32 ggtt_offset;
> +	void *vaddr;
> +	int ret;
> +
> +	GEM_BUG_ON(!hwconfig->size);
> +
> +	ret = intel_guc_allocate_and_map_vma(guc, hwconfig->size, &vma, &vaddr);
> +	if (ret)
> +		return ret;
> +
> +	ggtt_offset = intel_guc_ggtt_offset(guc, vma);
> +
> +	ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);
> +	if (ret >= 0)
> +		memcpy(hwconfig->ptr, vaddr, hwconfig->size);
> +
> +	i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
> +
> +	return ret;
> +}
> +
> +/**
> + * intel_guc_hwconfig_fini - Finalize the HWConfig
> + *
> + * Free up the memory allocation holding the table.
> + */
> +void intel_guc_hwconfig_fini(struct intel_guc_hwconfig *hwconfig)
> +{
> +	kfree(hwconfig->ptr);
> +	hwconfig->size = 0;
> +	hwconfig->ptr = NULL;
> +}
> +
> +/**
> + * intel_guc_hwconfig_init - Initialize the HWConfig
> + *
> + * Retrieve the HWConfig table from the GuC and save it away in a local memory

"local memory" may have different meaning, maybe ".. save it locally"

with comment reworded,

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>

> + * allocation. It can then be queried on demand by other users later on.
> + */
> +int intel_guc_hwconfig_init(struct intel_guc_hwconfig *hwconfig)
> +{
> +	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
> +	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> +	int ret;
> +
> +	if (!INTEL_INFO(i915)->has_guc_hwconfig)
> +		return 0;
> +
> +	ret = guc_hwconfig_discover_size(hwconfig);
> +	if (ret)
> +		return ret;
> +
> +	hwconfig->ptr = kmalloc(hwconfig->size, GFP_KERNEL);
> +	if (!hwconfig->ptr) {
> +		hwconfig->size = 0;
> +		return -ENOMEM;
> +	}
> +
> +	ret = guc_hwconfig_fill_buffer(hwconfig);
> +	if (ret < 0) {
> +		intel_guc_hwconfig_fini(hwconfig);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
> new file mode 100644
> index 000000000000..bfb90ae168dc
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#ifndef _INTEL_GUC_HWCONFIG_H_
> +#define _INTEL_GUC_HWCONFIG_H_
> +
> +#include <linux/types.h>
> +
> +struct intel_guc_hwconfig {
> +	u32 size;
> +	void *ptr;
> +};
> +
> +int intel_guc_hwconfig_init(struct intel_guc_hwconfig *hwconfig);
> +void intel_guc_hwconfig_fini(struct intel_guc_hwconfig *hwconfig);
> +
> +#endif /* _INTEL_GUC_HWCONFIG_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 09ed29df67bc..0cefa2a95190 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -489,6 +489,11 @@ static int __uc_init_hw(struct intel_uc *uc)
>  	if (ret)
>  		goto err_log_capture;
>  
> +	ret = intel_guc_hwconfig_init(&guc->hwconfig);
> +	if (ret)
> +		drm_notice(&i915->drm, "Failed to retrieve hwconfig table: %pe\n",
> +			   ERR_PTR(ret));
> +
>  	ret = guc_enable_communication(guc);
>  	if (ret)
>  		goto err_log_capture;
> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>  	if (intel_uc_uses_guc_submission(uc))
>  		intel_guc_submission_disable(guc);
>  
> +	intel_guc_hwconfig_fini(&guc->hwconfig);
> +
>  	__uc_sanitize(uc);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 76e590fcb903..1d31e35a5154 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = {
>  		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
>  	.ppgtt_size = 48,
>  	.dma_mask_size = 39,
> +	.has_guc_hwconfig = 1,
>  };
>  
>  #undef GEN
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 3699b1c539ea..82d8d6bc30ff 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -133,6 +133,7 @@ enum intel_ppgtt_type {
>  	func(gpu_reset_clobbers_display); \
>  	func(has_reset_engine); \
>  	func(has_global_mocs); \
> +	func(has_guc_hwconfig); \
>  	func(has_gt_uc); \
>  	func(has_l3_dpf); \
>  	func(has_llc); \
John Harrison Feb. 25, 2022, 2:17 a.m. UTC | #2
On 2/22/2022 02:36, Jordan Justen wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> Implement support for fetching the hardware description table from the
> GuC. The call is made twice - once without a destination buffer to
> query the size and then a second time to fill in the buffer.
>
> Note that the table is only available on ADL-P and later platforms.
>
> v5 (of Jordan's posting):
>   * Various changes made by Jordan and recommended by Michal
>     - Makefile ordering
>     - Adjust "struct intel_guc_hwconfig hwconfig" comment
>     - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
>     - Drop inline from hwconfig_to_guc()
>     - Replace hwconfig param with guc in __guc_action_get_hwconfig()
>     - Move zero size check into guc_hwconfig_discover_size()
>     - Change comment to say zero size offset/size is needed to get size
>     - Add has_guc_hwconfig to devinfo and drop has_table()
>     - Change drm_err to notice in __uc_init_hw() and use %pe
>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   1 +
>   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
>   .../gpu/drm/i915/gt/uc/abi/guc_errors_abi.h   |   4 +
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   3 +
>   .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.c   | 145 ++++++++++++++++++
>   .../gpu/drm/i915/gt/uc/intel_guc_hwconfig.h   |  19 +++
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c         |   7 +
>   drivers/gpu/drm/i915/i915_pci.c               |   1 +
>   drivers/gpu/drm/i915/intel_device_info.h      |   1 +
>   9 files changed, 182 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
>   create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index e9ce09620eb5..661f1afb51d7 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -188,6 +188,7 @@ i915-y += gt/uc/intel_uc.o \
>   	  gt/uc/intel_guc_ct.o \
>   	  gt/uc/intel_guc_debugfs.o \
>   	  gt/uc/intel_guc_fw.o \
> +	  gt/uc/intel_guc_hwconfig.o \
>   	  gt/uc/intel_guc_log.o \
>   	  gt/uc/intel_guc_log_debugfs.o \
>   	  gt/uc/intel_guc_rc.o \
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> index fe5d7d261797..4a61c819f32b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
> @@ -137,6 +137,7 @@ enum intel_guc_action {
>   	INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
>   	INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
>   	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
> +	INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
>   	INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
>   	INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
>   	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
> index 488b6061ee89..f9e2a6aaef4a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
> @@ -8,6 +8,10 @@
>   
>   enum intel_guc_response_status {
>   	INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
> +	INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
> +	INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
> +	INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
> +	INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
>   	INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
>   };
>   
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index f9240d4baa69..2058eb8c3d0c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -13,6 +13,7 @@
>   #include "intel_guc_fw.h"
>   #include "intel_guc_fwif.h"
>   #include "intel_guc_ct.h"
> +#include "intel_guc_hwconfig.h"
>   #include "intel_guc_log.h"
>   #include "intel_guc_reg.h"
>   #include "intel_guc_slpc_types.h"
> @@ -37,6 +38,8 @@ struct intel_guc {
>   	struct intel_guc_ct ct;
>   	/** @slpc: sub-structure containing SLPC related data and objects */
>   	struct intel_guc_slpc slpc;
> +	/** @hwconfig: data related to hardware configuration KLV blob */
> +	struct intel_guc_hwconfig hwconfig;
>   
>   	/** @sched_engine: Global engine used to submit requests to GuC */
>   	struct i915_sched_engine *sched_engine;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> new file mode 100644
> index 000000000000..ad289603460c
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
> @@ -0,0 +1,145 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#include "gt/intel_gt.h"
> +#include "i915_drv.h"
> +#include "i915_memcpy.h"
> +#include "intel_guc_hwconfig.h"
> +
> +static struct intel_guc *hwconfig_to_guc(struct intel_guc_hwconfig *hwconfig)
> +{
> +	return container_of(hwconfig, struct intel_guc, hwconfig);
> +}
> +
> +/*
> + * GuC has a blob containing hardware configuration information (HWConfig).
> + * This is formatted as a simple and flexible KLV (Key/Length/Value) table.
> + *
> + * For example, a minimal version could be:
> + *   enum device_attr {
> + *     ATTR_SOME_VALUE = 0,
> + *     ATTR_SOME_MASK  = 1,
> + *   };
> + *
> + *   static const u32 hwconfig[] = {
> + *     ATTR_SOME_VALUE,
> + *     1,		// Value Length in DWords
> + *     8,		// Value
> + *
> + *     ATTR_SOME_MASK,
> + *     3,
> + *     0x00FFFFFFFF, 0xFFFFFFFF, 0xFF000000,
> + *   };
> + *
> + * The attribute ids are defined in a hardware spec.
> + */
> +
> +static int __guc_action_get_hwconfig(struct intel_guc *guc,
> +				     u32 ggtt_offset, u32 ggtt_size)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_GET_HWCONFIG,
> +		ggtt_offset,
> +		0, /* upper 32 bits of address */
> +		ggtt_size,
> +	};
> +	int ret;
> +
> +	ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
> +	if (ret == -ENXIO)
> +		return -ENOENT;
> +
> +	return ret;
> +}
> +
> +static int guc_hwconfig_discover_size(struct intel_guc_hwconfig *hwconfig)
> +{
> +	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
> +	int ret;
> +
> +	/* Sending a query with zero offset and size will return the
> +	 * size of the blob.
> +	 */
> +	ret = __guc_action_get_hwconfig(guc, 0, 0);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (ret == 0)
> +		return -EINVAL;
> +
> +	hwconfig->size = ret;
> +	return 0;
> +}
> +
> +static int guc_hwconfig_fill_buffer(struct intel_guc_hwconfig *hwconfig)
> +{
> +	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
> +	struct i915_vma *vma;
> +	u32 ggtt_offset;
> +	void *vaddr;
> +	int ret;
> +
> +	GEM_BUG_ON(!hwconfig->size);
> +
> +	ret = intel_guc_allocate_and_map_vma(guc, hwconfig->size, &vma, &vaddr);
> +	if (ret)
> +		return ret;
> +
> +	ggtt_offset = intel_guc_ggtt_offset(guc, vma);
> +
> +	ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);
> +	if (ret >= 0)
> +		memcpy(hwconfig->ptr, vaddr, hwconfig->size);
> +
> +	i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
> +
> +	return ret;
> +}
> +
> +/**
> + * intel_guc_hwconfig_fini - Finalize the HWConfig
> + *
> + * Free up the memory allocation holding the table.
> + */
> +void intel_guc_hwconfig_fini(struct intel_guc_hwconfig *hwconfig)
> +{
> +	kfree(hwconfig->ptr);
> +	hwconfig->size = 0;
> +	hwconfig->ptr = NULL;
> +}
> +
> +/**
> + * intel_guc_hwconfig_init - Initialize the HWConfig
> + *
> + * Retrieve the HWConfig table from the GuC and save it away in a local memory
> + * allocation. It can then be queried on demand by other users later on.
> + */
> +int intel_guc_hwconfig_init(struct intel_guc_hwconfig *hwconfig)
> +{
> +	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
> +	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> +	int ret;
> +
> +	if (!INTEL_INFO(i915)->has_guc_hwconfig)
> +		return 0;
> +
> +	ret = guc_hwconfig_discover_size(hwconfig);
> +	if (ret)
> +		return ret;
> +
> +	hwconfig->ptr = kmalloc(hwconfig->size, GFP_KERNEL);
> +	if (!hwconfig->ptr) {
> +		hwconfig->size = 0;
> +		return -ENOMEM;
> +	}
> +
> +	ret = guc_hwconfig_fill_buffer(hwconfig);
> +	if (ret < 0) {
> +		intel_guc_hwconfig_fini(hwconfig);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
> new file mode 100644
> index 000000000000..bfb90ae168dc
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2022 Intel Corporation
> + */
> +
> +#ifndef _INTEL_GUC_HWCONFIG_H_
> +#define _INTEL_GUC_HWCONFIG_H_
> +
> +#include <linux/types.h>
> +
> +struct intel_guc_hwconfig {
> +	u32 size;
> +	void *ptr;
> +};
> +
> +int intel_guc_hwconfig_init(struct intel_guc_hwconfig *hwconfig);
> +void intel_guc_hwconfig_fini(struct intel_guc_hwconfig *hwconfig);
> +
> +#endif /* _INTEL_GUC_HWCONFIG_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 09ed29df67bc..0cefa2a95190 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -489,6 +489,11 @@ static int __uc_init_hw(struct intel_uc *uc)
>   	if (ret)
>   		goto err_log_capture;
>   
> +	ret = intel_guc_hwconfig_init(&guc->hwconfig);
> +	if (ret)
> +		drm_notice(&i915->drm, "Failed to retrieve hwconfig table: %pe\n",
Why only drm_notice? As you are keen to point out, the UMDs won't work 
if the table is not available. All the failure paths in your own 
verification function are 'drm_err'. So why is it only a 'notice' if 
there is no table at all?

Note that this function is called as part of the reset path. The reset 
path is not allowed to allocate memory. The table is stored in a 
dynamically allocated object. Hence the IGT test failure. The table 
query has to be done elsewhere at driver init time only.

> +			   ERR_PTR(ret));
> +
>   	ret = guc_enable_communication(guc);
>   	if (ret)
>   		goto err_log_capture;
> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>   	if (intel_uc_uses_guc_submission(uc))
>   		intel_guc_submission_disable(guc);
>   
> +	intel_guc_hwconfig_fini(&guc->hwconfig);
> +
>   	__uc_sanitize(uc);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 76e590fcb903..1d31e35a5154 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = {
>   		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
>   	.ppgtt_size = 48,
>   	.dma_mask_size = 39,
> +	.has_guc_hwconfig = 1,
Who requested this change? It was previously done this way but the 
instruction was that i915_pci.c is for hardware features only but that 
this, as you seem extremely keen about pointing out at every 
opportunity, is a software feature.

John.


>   };
>   
>   #undef GEN
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 3699b1c539ea..82d8d6bc30ff 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -133,6 +133,7 @@ enum intel_ppgtt_type {
>   	func(gpu_reset_clobbers_display); \
>   	func(has_reset_engine); \
>   	func(has_global_mocs); \
> +	func(has_guc_hwconfig); \
>   	func(has_gt_uc); \
>   	func(has_l3_dpf); \
>   	func(has_llc); \
Jordan Justen Feb. 25, 2022, 5:03 a.m. UTC | #3
John Harrison <john.c.harrison@intel.com> writes:

> On 2/22/2022 02:36, Jordan Justen wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> Implement support for fetching the hardware description table from the
>> GuC. The call is made twice - once without a destination buffer to
>> query the size and then a second time to fill in the buffer.
>>
>> Note that the table is only available on ADL-P and later platforms.
>>
>> v5 (of Jordan's posting):
>>   * Various changes made by Jordan and recommended by Michal
>>     - Makefile ordering
>>     - Adjust "struct intel_guc_hwconfig hwconfig" comment
>>     - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
>>     - Drop inline from hwconfig_to_guc()
>>     - Replace hwconfig param with guc in __guc_action_get_hwconfig()
>>     - Move zero size check into guc_hwconfig_discover_size()
>>     - Change comment to say zero size offset/size is needed to get size
>>     - Add has_guc_hwconfig to devinfo and drop has_table()
>>     - Change drm_err to notice in __uc_init_hw() and use %pe
>>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
>> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
>> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
>> ---
>>   
>> +	ret = intel_guc_hwconfig_init(&guc->hwconfig);
>> +	if (ret)
>> +		drm_notice(&i915->drm, "Failed to retrieve hwconfig table: %pe\n",
> Why only drm_notice? As you are keen to point out, the UMDs won't work 
> if the table is not available. All the failure paths in your own 
> verification function are 'drm_err'. So why is it only a 'notice' if 
> there is no table at all?

This was requested by Michal in my v3 posting:

https://patchwork.freedesktop.org/patch/472936/?series=99787&rev=3

I don't think that it should be a failure for i915 if it is unable to
read the table, or if the table read is invalid. I think it should be up
to the UMD to react to the missing hwconfig however they think is
appropriate, but I would like the i915 to guarantee & document the
format returned to userspace to whatever extent is feasible.

As you point out there is a discrepancy, and I think I should be
consistent with whatever is used here in my "drm/i915/guc: Verify
hwconfig blob matches supported format" patch.

I guess I'd tend to agree with Michal that "maybe drm_notice since we
continue probe", but I would go along with either if you two want to
discuss further.

> Note that this function is called as part of the reset path. The reset 
> path is not allowed to allocate memory. The table is stored in a 
> dynamically allocated object. Hence the IGT test failure. The table 
> query has to be done elsewhere at driver init time only.

Thanks for clearing this up. I did notice on dg2 that gpu resets were
causing a re-read of the hwconfig from GuC, but it definitely was not
clear to me that there would be a connection to the IGT failure that you
pointed out.

>
>> +			   ERR_PTR(ret));
>> +
>>   	ret = guc_enable_communication(guc);
>>   	if (ret)
>>   		goto err_log_capture;
>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>   	if (intel_uc_uses_guc_submission(uc))
>>   		intel_guc_submission_disable(guc);
>>   
>> +	intel_guc_hwconfig_fini(&guc->hwconfig);
>> +
>>   	__uc_sanitize(uc);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 76e590fcb903..1d31e35a5154 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = {
>>   		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
>>   	.ppgtt_size = 48,
>>   	.dma_mask_size = 39,
>> +	.has_guc_hwconfig = 1,
> Who requested this change? It was previously done this way but the 
> instruction was that i915_pci.c is for hardware features only but that 
> this, as you seem extremely keen about pointing out at every 
> opportunity, is a software feature.

This was requested by Michal as well. I definitely agree it is a
software feature, but I was not aware that "i915_pci.c is for hardware
features only".

Michal, do you agree with this and returning to the previous method for
enabling the feature?

-Jordan
Michal Wajdeczko Feb. 25, 2022, 9:44 a.m. UTC | #4
On 25.02.2022 06:03, Jordan Justen wrote:
> John Harrison <john.c.harrison@intel.com> writes:
> 
>> On 2/22/2022 02:36, Jordan Justen wrote:
>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>
>>> Implement support for fetching the hardware description table from the
>>> GuC. The call is made twice - once without a destination buffer to
>>> query the size and then a second time to fill in the buffer.
>>>
>>> Note that the table is only available on ADL-P and later platforms.
>>>
>>> v5 (of Jordan's posting):
>>>   * Various changes made by Jordan and recommended by Michal
>>>     - Makefile ordering
>>>     - Adjust "struct intel_guc_hwconfig hwconfig" comment
>>>     - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
>>>     - Drop inline from hwconfig_to_guc()
>>>     - Replace hwconfig param with guc in __guc_action_get_hwconfig()
>>>     - Move zero size check into guc_hwconfig_discover_size()
>>>     - Change comment to say zero size offset/size is needed to get size
>>>     - Add has_guc_hwconfig to devinfo and drop has_table()
>>>     - Change drm_err to notice in __uc_init_hw() and use %pe
>>>
>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
>>> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
>>> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
>>> ---
>>>   
>>> +	ret = intel_guc_hwconfig_init(&guc->hwconfig);
>>> +	if (ret)
>>> +		drm_notice(&i915->drm, "Failed to retrieve hwconfig table: %pe\n",
>> Why only drm_notice? As you are keen to point out, the UMDs won't work 
>> if the table is not available. All the failure paths in your own 
>> verification function are 'drm_err'. So why is it only a 'notice' if 
>> there is no table at all?
> 
> This was requested by Michal in my v3 posting:
> 
> https://patchwork.freedesktop.org/patch/472936/?series=99787&rev=3
> 
> I don't think that it should be a failure for i915 if it is unable to
> read the table, or if the table read is invalid. I think it should be up
> to the UMD to react to the missing hwconfig however they think is
> appropriate, but I would like the i915 to guarantee & document the
> format returned to userspace to whatever extent is feasible.
> 
> As you point out there is a discrepancy, and I think I should be
> consistent with whatever is used here in my "drm/i915/guc: Verify
> hwconfig blob matches supported format" patch.
> 
> I guess I'd tend to agree with Michal that "maybe drm_notice since we
> continue probe", but I would go along with either if you two want to
> discuss further.

having consistent message level is a clear benefit but on other hand
these other 'errors' may indicate more serious problems related to use
of wrong/incompatible firmware that returns corrupted HWconfig (or we
use wrong actions), while since we are not using this HWconfig in the
driver we don't care that much that we failed to load HWconfig and
'notice' is enough.

but I'm fine with all messages being drm_err (as we will not have to
change that once again after HWconfig will be mandatory for the driver
as well)

> 
>> Note that this function is called as part of the reset path. The reset 
>> path is not allowed to allocate memory. The table is stored in a 
>> dynamically allocated object. Hence the IGT test failure. The table 
>> query has to be done elsewhere at driver init time only.
> 
> Thanks for clearing this up. I did notice on dg2 that gpu resets were
> causing a re-read of the hwconfig from GuC, but it definitely was not
> clear to me that there would be a connection to the IGT failure that you
> pointed out.
> 
>>
>>> +			   ERR_PTR(ret));
>>> +
>>>   	ret = guc_enable_communication(guc);
>>>   	if (ret)
>>>   		goto err_log_capture;
>>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>>   	if (intel_uc_uses_guc_submission(uc))
>>>   		intel_guc_submission_disable(guc);
>>>   
>>> +	intel_guc_hwconfig_fini(&guc->hwconfig);
>>> +
>>>   	__uc_sanitize(uc);
>>>   }
>>>   
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>> index 76e590fcb903..1d31e35a5154 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = {
>>>   		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
>>>   	.ppgtt_size = 48,
>>>   	.dma_mask_size = 39,
>>> +	.has_guc_hwconfig = 1,
>> Who requested this change? It was previously done this way but the 
>> instruction was that i915_pci.c is for hardware features only but that 
>> this, as you seem extremely keen about pointing out at every 
>> opportunity, is a software feature.
> 
> This was requested by Michal as well. I definitely agree it is a
> software feature, but I was not aware that "i915_pci.c is for hardware
> features only".
> 
> Michal, do you agree with this and returning to the previous method for
> enabling the feature?

now I'm little confused as some arch direction was to treat FW as
extension of the HW so for me it was natural to have 'has_guc_hwconfig'
flag in device_info

if still for some reason it is undesired to mix HW and FW/SW flags
inside single group of flags then maybe we should just add separate
group of immutable flags where has_guc_hwconfig could be defined.

let our maintainers decide

Michal

> 
> -Jordan
Tvrtko Ursulin Feb. 25, 2022, 1:26 p.m. UTC | #5
On 25/02/2022 09:44, Michal Wajdeczko wrote:
> On 25.02.2022 06:03, Jordan Justen wrote:
>> John Harrison <john.c.harrison@intel.com> writes:
>>
>>> On 2/22/2022 02:36, Jordan Justen wrote:
>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>
>>>> Implement support for fetching the hardware description table from the
>>>> GuC. The call is made twice - once without a destination buffer to
>>>> query the size and then a second time to fill in the buffer.
>>>>
>>>> Note that the table is only available on ADL-P and later platforms.
>>>>
>>>> v5 (of Jordan's posting):
>>>>    * Various changes made by Jordan and recommended by Michal
>>>>      - Makefile ordering
>>>>      - Adjust "struct intel_guc_hwconfig hwconfig" comment
>>>>      - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
>>>>      - Drop inline from hwconfig_to_guc()
>>>>      - Replace hwconfig param with guc in __guc_action_get_hwconfig()
>>>>      - Move zero size check into guc_hwconfig_discover_size()
>>>>      - Change comment to say zero size offset/size is needed to get size
>>>>      - Add has_guc_hwconfig to devinfo and drop has_table()
>>>>      - Change drm_err to notice in __uc_init_hw() and use %pe
>>>>
>>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
>>>> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
>>>> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
>>>> ---
>>>>    
>>>> +	ret = intel_guc_hwconfig_init(&guc->hwconfig);
>>>> +	if (ret)
>>>> +		drm_notice(&i915->drm, "Failed to retrieve hwconfig table: %pe\n",
>>> Why only drm_notice? As you are keen to point out, the UMDs won't work
>>> if the table is not available. All the failure paths in your own
>>> verification function are 'drm_err'. So why is it only a 'notice' if
>>> there is no table at all?
>>
>> This was requested by Michal in my v3 posting:
>>
>> https://patchwork.freedesktop.org/patch/472936/?series=99787&rev=3
>>
>> I don't think that it should be a failure for i915 if it is unable to
>> read the table, or if the table read is invalid. I think it should be up
>> to the UMD to react to the missing hwconfig however they think is
>> appropriate, but I would like the i915 to guarantee & document the
>> format returned to userspace to whatever extent is feasible.
>>
>> As you point out there is a discrepancy, and I think I should be
>> consistent with whatever is used here in my "drm/i915/guc: Verify
>> hwconfig blob matches supported format" patch.
>>
>> I guess I'd tend to agree with Michal that "maybe drm_notice since we
>> continue probe", but I would go along with either if you two want to
>> discuss further.
> 
> having consistent message level is a clear benefit but on other hand
> these other 'errors' may indicate more serious problems related to use
> of wrong/incompatible firmware that returns corrupted HWconfig (or we
> use wrong actions), while since we are not using this HWconfig in the
> driver we don't care that much that we failed to load HWconfig and
> 'notice' is enough.
> 
> but I'm fine with all messages being drm_err (as we will not have to
> change that once again after HWconfig will be mandatory for the driver
> as well)

I would be against drm_err.

#define KERN_EMERG      KERN_SOH "0"    /* system is unusable */
#define KERN_ALERT      KERN_SOH "1"    /* action must be taken immediately */
#define KERN_CRIT       KERN_SOH "2"    /* critical conditions */
#define KERN_ERR        KERN_SOH "3"    /* error conditions */
#define KERN_WARNING    KERN_SOH "4"    /* warning conditions */
#define KERN_NOTICE     KERN_SOH "5"    /* normal but significant condition */
#define KERN_INFO       KERN_SOH "6"    /* informational */
#define KERN_DEBUG      KERN_SOH "7"    /* debug-level messages */

 From the point of view of the kernel driver, this is not an error to its operation. It can at most be a warning, but notice is also fine by me. One could argue when reading "normal but significant condition" that it is not normal, when it is in fact unexpected, so if people prefer warning that is also okay by me. I still lean towards notice becuase of the hands-off nature i915 has with the pass-through of this blob.

>>> Note that this function is called as part of the reset path. The reset
>>> path is not allowed to allocate memory. The table is stored in a
>>> dynamically allocated object. Hence the IGT test failure. The table
>>> query has to be done elsewhere at driver init time only.
>>
>> Thanks for clearing this up. I did notice on dg2 that gpu resets were
>> causing a re-read of the hwconfig from GuC, but it definitely was not
>> clear to me that there would be a connection to the IGT failure that you
>> pointed out.
>>
>>>
>>>> +			   ERR_PTR(ret));
>>>> +
>>>>    	ret = guc_enable_communication(guc);
>>>>    	if (ret)
>>>>    		goto err_log_capture;
>>>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>>>    	if (intel_uc_uses_guc_submission(uc))
>>>>    		intel_guc_submission_disable(guc);
>>>>    
>>>> +	intel_guc_hwconfig_fini(&guc->hwconfig);
>>>> +
>>>>    	__uc_sanitize(uc);
>>>>    }
>>>>    
>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>>> index 76e590fcb903..1d31e35a5154 100644
>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>> @@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = {
>>>>    		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
>>>>    	.ppgtt_size = 48,
>>>>    	.dma_mask_size = 39,
>>>> +	.has_guc_hwconfig = 1,
>>> Who requested this change? It was previously done this way but the
>>> instruction was that i915_pci.c is for hardware features only but that
>>> this, as you seem extremely keen about pointing out at every
>>> opportunity, is a software feature.
>>
>> This was requested by Michal as well. I definitely agree it is a
>> software feature, but I was not aware that "i915_pci.c is for hardware
>> features only".
>>
>> Michal, do you agree with this and returning to the previous method for
>> enabling the feature?
> 
> now I'm little confused as some arch direction was to treat FW as
> extension of the HW so for me it was natural to have 'has_guc_hwconfig'
> flag in device_info
> 
> if still for some reason it is undesired to mix HW and FW/SW flags
> inside single group of flags then maybe we should just add separate
> group of immutable flags where has_guc_hwconfig could be defined.
> 
> let our maintainers decide

Bah.. :)

And what was the previous method?

[comes back later]

Okay it was:

+static bool has_table(struct drm_i915_private *i915)
+{
+	if (IS_ALDERLAKE_P(i915))
+		return true;

Which sucks a bit if we want to argue it does not belong in device info.

Why can't we ask the GuC if the blob exists? In fact what would happen if one would call __guc_action_get_hwconfig on any GuC platform?

Regards,

Tvrtko
John Harrison Feb. 25, 2022, 4:46 p.m. UTC | #6
On 2/25/2022 05:26, Tvrtko Ursulin wrote:
> On 25/02/2022 09:44, Michal Wajdeczko wrote:
>> On 25.02.2022 06:03, Jordan Justen wrote:
>>> John Harrison <john.c.harrison@intel.com> writes:
>>>
>>>> On 2/22/2022 02:36, Jordan Justen wrote:
>>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>>
>>>>> Implement support for fetching the hardware description table from 
>>>>> the
>>>>> GuC. The call is made twice - once without a destination buffer to
>>>>> query the size and then a second time to fill in the buffer.
>>>>>
>>>>> Note that the table is only available on ADL-P and later platforms.
>>>>>
>>>>> v5 (of Jordan's posting):
>>>>>    * Various changes made by Jordan and recommended by Michal
>>>>>      - Makefile ordering
>>>>>      - Adjust "struct intel_guc_hwconfig hwconfig" comment
>>>>>      - Set Copyright year to 2022 in intel_guc_hwconfig.c/.h
>>>>>      - Drop inline from hwconfig_to_guc()
>>>>>      - Replace hwconfig param with guc in __guc_action_get_hwconfig()
>>>>>      - Move zero size check into guc_hwconfig_discover_size()
>>>>>      - Change comment to say zero size offset/size is needed to 
>>>>> get size
>>>>>      - Add has_guc_hwconfig to devinfo and drop has_table()
>>>>>      - Change drm_err to notice in __uc_init_hw() and use %pe
>>>>>
>>>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>>>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>>> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
>>>>> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com>
>>>>> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
>>>>> ---
>>>>>    +    ret = intel_guc_hwconfig_init(&guc->hwconfig);
>>>>> +    if (ret)
>>>>> +        drm_notice(&i915->drm, "Failed to retrieve hwconfig 
>>>>> table: %pe\n",
>>>> Why only drm_notice? As you are keen to point out, the UMDs won't work
>>>> if the table is not available. All the failure paths in your own
>>>> verification function are 'drm_err'. So why is it only a 'notice' if
>>>> there is no table at all?
>>>
>>> This was requested by Michal in my v3 posting:
>>>
>>> https://patchwork.freedesktop.org/patch/472936/?series=99787&rev=3
>>>
>>> I don't think that it should be a failure for i915 if it is unable to
>>> read the table, or if the table read is invalid. I think it should 
>>> be up
>>> to the UMD to react to the missing hwconfig however they think is
>>> appropriate, but I would like the i915 to guarantee & document the
>>> format returned to userspace to whatever extent is feasible.
>>>
>>> As you point out there is a discrepancy, and I think I should be
>>> consistent with whatever is used here in my "drm/i915/guc: Verify
>>> hwconfig blob matches supported format" patch.
>>>
>>> I guess I'd tend to agree with Michal that "maybe drm_notice since we
>>> continue probe", but I would go along with either if you two want to
>>> discuss further.
>>
>> having consistent message level is a clear benefit but on other hand
>> these other 'errors' may indicate more serious problems related to use
>> of wrong/incompatible firmware that returns corrupted HWconfig (or we
>> use wrong actions), while since we are not using this HWconfig in the
As stated ad nauseam, you can rule out 'corrupted' hwconfig. The GuC 
firmware is signed and will not load if it has become corrupted somehow. 
Likewise, a 'wrong/incompatible' firmware will refuse to load. So it is 
physically impossible for the later verification stage to ever find an 
error.


>> driver we don't care that much that we failed to load HWconfig and
>> 'notice' is enough.
>>
>> but I'm fine with all messages being drm_err (as we will not have to
>> change that once again after HWconfig will be mandatory for the driver
>> as well)
>
> I would be against drm_err.
>
> #define KERN_EMERG      KERN_SOH "0"    /* system is unusable */
> #define KERN_ALERT      KERN_SOH "1"    /* action must be taken 
> immediately */
> #define KERN_CRIT       KERN_SOH "2"    /* critical conditions */
> #define KERN_ERR        KERN_SOH "3"    /* error conditions */
> #define KERN_WARNING    KERN_SOH "4"    /* warning conditions */
> #define KERN_NOTICE     KERN_SOH "5"    /* normal but significant 
> condition */
> #define KERN_INFO       KERN_SOH "6"    /* informational */
> #define KERN_DEBUG      KERN_SOH "7"    /* debug-level messages */
>
> From the point of view of the kernel driver, this is not an error to 
> its operation. It can at most be a warning, but notice is also fine by 
> me. One could argue when reading "normal but significant condition" 
> that it is not normal, when it is in fact unexpected, so if people 
> prefer warning that is also okay by me. I still lean towards notice 
> becuase of the hands-off nature i915 has with the pass-through of this 
> blob.
 From the point of view of the KMD, i915 will load and be 'functional' 
if it can't talk to the hardware at all. The UMDs won't work at all but 
the driver load will be 'fine'. That's a requirement to be able to get 
the user to a software fallback desktop in order to work out why the 
hardware isn't working (e.g. no GuC firmware file). I would view this as 
similar. The KMD might have loaded but the UMDs are not functional. That 
is definitely an error condition to me.

>
>>>> Note that this function is called as part of the reset path. The reset
>>>> path is not allowed to allocate memory. The table is stored in a
>>>> dynamically allocated object. Hence the IGT test failure. The table
>>>> query has to be done elsewhere at driver init time only.
>>>
>>> Thanks for clearing this up. I did notice on dg2 that gpu resets were
>>> causing a re-read of the hwconfig from GuC, but it definitely was not
>>> clear to me that there would be a connection to the IGT failure that 
>>> you
>>> pointed out.
>>>
>>>>
>>>>> +               ERR_PTR(ret));
>>>>> +
>>>>>        ret = guc_enable_communication(guc);
>>>>>        if (ret)
>>>>>            goto err_log_capture;
>>>>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>>>>        if (intel_uc_uses_guc_submission(uc))
>>>>>            intel_guc_submission_disable(guc);
>>>>>    +    intel_guc_hwconfig_fini(&guc->hwconfig);
>>>>> +
>>>>>        __uc_sanitize(uc);
>>>>>    }
>>>>>    diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>> index 76e590fcb903..1d31e35a5154 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>> @@ -990,6 +990,7 @@ static const struct intel_device_info 
>>>>> adl_p_info = {
>>>>>            BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | 
>>>>> BIT(VCS2),
>>>>>        .ppgtt_size = 48,
>>>>>        .dma_mask_size = 39,
>>>>> +    .has_guc_hwconfig = 1,
>>>> Who requested this change? It was previously done this way but the
>>>> instruction was that i915_pci.c is for hardware features only but that
>>>> this, as you seem extremely keen about pointing out at every
>>>> opportunity, is a software feature.
>>>
>>> This was requested by Michal as well. I definitely agree it is a
>>> software feature, but I was not aware that "i915_pci.c is for hardware
>>> features only".
>>>
>>> Michal, do you agree with this and returning to the previous method for
>>> enabling the feature?
>>
>> now I'm little confused as some arch direction was to treat FW as
>> extension of the HW so for me it was natural to have 'has_guc_hwconfig'
>> flag in device_info
>>
>> if still for some reason it is undesired to mix HW and FW/SW flags
>> inside single group of flags then maybe we should just add separate
>> group of immutable flags where has_guc_hwconfig could be defined.
>>
>> let our maintainers decide
>
> Bah.. :)
>
> And what was the previous method?
>
> [comes back later]
>
> Okay it was:
>
> +static bool has_table(struct drm_i915_private *i915)
> +{
> +    if (IS_ALDERLAKE_P(i915))
> +        return true;
>
> Which sucks a bit if we want to argue it does not belong in device info.
>
> Why can't we ask the GuC if the blob exists? In fact what would happen 
> if one would call __guc_action_get_hwconfig on any GuC platform?
That was how I originally wrote the code. However, other parties refuse 
to allow a H2G call to fail. The underlying CTB layers complain loudly 
on any CTB error. And the GuC architects insist that a call to query the 
table on an unsupported platform is an error and should return an 
'unsupported' error code.

John.

>
> Regards,
>
> Tvrtko
Tvrtko Ursulin Feb. 25, 2022, 5:18 p.m. UTC | #7
On 25/02/2022 16:46, John Harrison wrote:

>>> driver we don't care that much that we failed to load HWconfig and
>>> 'notice' is enough.
>>>
>>> but I'm fine with all messages being drm_err (as we will not have to
>>> change that once again after HWconfig will be mandatory for the driver
>>> as well)
>>
>> I would be against drm_err.
>>
>> #define KERN_EMERG      KERN_SOH "0"    /* system is unusable */
>> #define KERN_ALERT      KERN_SOH "1"    /* action must be taken 
>> immediately */
>> #define KERN_CRIT       KERN_SOH "2"    /* critical conditions */
>> #define KERN_ERR        KERN_SOH "3"    /* error conditions */
>> #define KERN_WARNING    KERN_SOH "4"    /* warning conditions */
>> #define KERN_NOTICE     KERN_SOH "5"    /* normal but significant 
>> condition */
>> #define KERN_INFO       KERN_SOH "6"    /* informational */
>> #define KERN_DEBUG      KERN_SOH "7"    /* debug-level messages */
>>
>> From the point of view of the kernel driver, this is not an error to 
>> its operation. It can at most be a warning, but notice is also fine by 
>> me. One could argue when reading "normal but significant condition" 
>> that it is not normal, when it is in fact unexpected, so if people 
>> prefer warning that is also okay by me. I still lean towards notice 
>> becuase of the hands-off nature i915 has with the pass-through of this 
>> blob.
>  From the point of view of the KMD, i915 will load and be 'functional' 
> if it can't talk to the hardware at all. The UMDs won't work at all but 

Well this reductio ad absurdum fails I think... :)

> the driver load will be 'fine'. That's a requirement to be able to get 
> the user to a software fallback desktop in order to work out why the 
> hardware isn't working (e.g. no GuC firmware file). I would view this as 
> similar. The KMD might have loaded but the UMDs are not functional. That 
> is definitely an error condition to me.

... If GuC fails to load there is no command submission and driver will 
obviously log that with drm_err.

If blob fails to verify it depends on the userspace stack what will 
happen. As stated before on some platforms, and/or after a certain time, 
Mesa will not look at the blob at all. So i915 is fine (it is after all 
just a conduit for opaque data!), system overall is fine, so it 
definitely isn't a KERN_ERR level event.

>>>>>> +               ERR_PTR(ret));
>>>>>> +
>>>>>>        ret = guc_enable_communication(guc);
>>>>>>        if (ret)
>>>>>>            goto err_log_capture;
>>>>>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>>>>>        if (intel_uc_uses_guc_submission(uc))
>>>>>>            intel_guc_submission_disable(guc);
>>>>>>    +    intel_guc_hwconfig_fini(&guc->hwconfig);
>>>>>> +
>>>>>>        __uc_sanitize(uc);
>>>>>>    }
>>>>>>    diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>>> index 76e590fcb903..1d31e35a5154 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>>> @@ -990,6 +990,7 @@ static const struct intel_device_info 
>>>>>> adl_p_info = {
>>>>>>            BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | 
>>>>>> BIT(VCS2),
>>>>>>        .ppgtt_size = 48,
>>>>>>        .dma_mask_size = 39,
>>>>>> +    .has_guc_hwconfig = 1,
>>>>> Who requested this change? It was previously done this way but the
>>>>> instruction was that i915_pci.c is for hardware features only but that
>>>>> this, as you seem extremely keen about pointing out at every
>>>>> opportunity, is a software feature.
>>>>
>>>> This was requested by Michal as well. I definitely agree it is a
>>>> software feature, but I was not aware that "i915_pci.c is for hardware
>>>> features only".
>>>>
>>>> Michal, do you agree with this and returning to the previous method for
>>>> enabling the feature?
>>>
>>> now I'm little confused as some arch direction was to treat FW as
>>> extension of the HW so for me it was natural to have 'has_guc_hwconfig'
>>> flag in device_info
>>>
>>> if still for some reason it is undesired to mix HW and FW/SW flags
>>> inside single group of flags then maybe we should just add separate
>>> group of immutable flags where has_guc_hwconfig could be defined.
>>>
>>> let our maintainers decide
>>
>> Bah.. :)
>>
>> And what was the previous method?
>>
>> [comes back later]
>>
>> Okay it was:
>>
>> +static bool has_table(struct drm_i915_private *i915)
>> +{
>> +    if (IS_ALDERLAKE_P(i915))
>> +        return true;
>>
>> Which sucks a bit if we want to argue it does not belong in device info.
>>
>> Why can't we ask the GuC if the blob exists? In fact what would happen 
>> if one would call __guc_action_get_hwconfig on any GuC platform?
> That was how I originally wrote the code. However, other parties refuse 
> to allow a H2G call to fail. The underlying CTB layers complain loudly 
> on any CTB error. And the GuC architects insist that a call to query the 
> table on an unsupported platform is an error and should return an 
> 'unsupported' error code.

Oh well, shrug, sounds silly but I will not pretend I am familiar with H2G

In this case has_table does sound better since it indeed isn't a 
hardware feature. It is a GuC fw thing and if we don't have a way to 
probe things there at runtime, then at least limit the knowledge to GuC 
files.

Regards,

Tvrtko
Michal Wajdeczko Feb. 25, 2022, 6:05 p.m. UTC | #8
On 25.02.2022 18:18, Tvrtko Ursulin wrote:
> 
> On 25/02/2022 16:46, John Harrison wrote:
> 
>>>> driver we don't care that much that we failed to load HWconfig and
>>>> 'notice' is enough.
>>>>
>>>> but I'm fine with all messages being drm_err (as we will not have to
>>>> change that once again after HWconfig will be mandatory for the driver
>>>> as well)
>>>
>>> I would be against drm_err.
>>>
>>> #define KERN_EMERG      KERN_SOH "0"    /* system is unusable */
>>> #define KERN_ALERT      KERN_SOH "1"    /* action must be taken
>>> immediately */
>>> #define KERN_CRIT       KERN_SOH "2"    /* critical conditions */
>>> #define KERN_ERR        KERN_SOH "3"    /* error conditions */
>>> #define KERN_WARNING    KERN_SOH "4"    /* warning conditions */
>>> #define KERN_NOTICE     KERN_SOH "5"    /* normal but significant
>>> condition */
>>> #define KERN_INFO       KERN_SOH "6"    /* informational */
>>> #define KERN_DEBUG      KERN_SOH "7"    /* debug-level messages */
>>>
>>> From the point of view of the kernel driver, this is not an error to
>>> its operation. It can at most be a warning, but notice is also fine
>>> by me. One could argue when reading "normal but significant
>>> condition" that it is not normal, when it is in fact unexpected, so
>>> if people prefer warning that is also okay by me. I still lean
>>> towards notice becuase of the hands-off nature i915 has with the
>>> pass-through of this blob.
>>  From the point of view of the KMD, i915 will load and be 'functional'
>> if it can't talk to the hardware at all. The UMDs won't work at all but 
> 
> Well this reductio ad absurdum fails I think... :)
> 
>> the driver load will be 'fine'. That's a requirement to be able to get
>> the user to a software fallback desktop in order to work out why the
>> hardware isn't working (e.g. no GuC firmware file). I would view this
>> as similar. The KMD might have loaded but the UMDs are not functional.
>> That is definitely an error condition to me.
> 
> ... If GuC fails to load there is no command submission and driver will
> obviously log that with drm_err.
> 
> If blob fails to verify it depends on the userspace stack what will
> happen. As stated before on some platforms, and/or after a certain time,
> Mesa will not look at the blob at all. So i915 is fine (it is after all
> just a conduit for opaque data!), system overall is fine, so it
> definitely isn't a KERN_ERR level event.
> 
>>>>>>> +               ERR_PTR(ret));
>>>>>>> +
>>>>>>>        ret = guc_enable_communication(guc);
>>>>>>>        if (ret)
>>>>>>>            goto err_log_capture;
>>>>>>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>>>>>>        if (intel_uc_uses_guc_submission(uc))
>>>>>>>            intel_guc_submission_disable(guc);
>>>>>>>    +    intel_guc_hwconfig_fini(&guc->hwconfig);
>>>>>>> +
>>>>>>>        __uc_sanitize(uc);
>>>>>>>    }
>>>>>>>    diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>>>> index 76e590fcb903..1d31e35a5154 100644
>>>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>>>> @@ -990,6 +990,7 @@ static const struct intel_device_info
>>>>>>> adl_p_info = {
>>>>>>>            BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) |
>>>>>>> BIT(VCS2),
>>>>>>>        .ppgtt_size = 48,
>>>>>>>        .dma_mask_size = 39,
>>>>>>> +    .has_guc_hwconfig = 1,
>>>>>> Who requested this change? It was previously done this way but the
>>>>>> instruction was that i915_pci.c is for hardware features only but
>>>>>> that
>>>>>> this, as you seem extremely keen about pointing out at every
>>>>>> opportunity, is a software feature.
>>>>>
>>>>> This was requested by Michal as well. I definitely agree it is a
>>>>> software feature, but I was not aware that "i915_pci.c is for hardware
>>>>> features only".
>>>>>
>>>>> Michal, do you agree with this and returning to the previous method
>>>>> for
>>>>> enabling the feature?
>>>>
>>>> now I'm little confused as some arch direction was to treat FW as
>>>> extension of the HW so for me it was natural to have 'has_guc_hwconfig'
>>>> flag in device_info
>>>>
>>>> if still for some reason it is undesired to mix HW and FW/SW flags
>>>> inside single group of flags then maybe we should just add separate
>>>> group of immutable flags where has_guc_hwconfig could be defined.
>>>>
>>>> let our maintainers decide
>>>
>>> Bah.. :)
>>>
>>> And what was the previous method?
>>>
>>> [comes back later]
>>>
>>> Okay it was:
>>>
>>> +static bool has_table(struct drm_i915_private *i915)
>>> +{
>>> +    if (IS_ALDERLAKE_P(i915))
>>> +        return true;
>>>
>>> Which sucks a bit if we want to argue it does not belong in device info.
>>>
>>> Why can't we ask the GuC if the blob exists? In fact what would
>>> happen if one would call __guc_action_get_hwconfig on any GuC platform?
>> That was how I originally wrote the code. However, other parties
>> refuse to allow a H2G call to fail. The underlying CTB layers complain
>> loudly on any CTB error. And the GuC architects insist that a call to
>> query the table on an unsupported platform is an error and should
>> return an 'unsupported' error code.

just for the background: IIRC the reason why arch didn't want 'query'
mode that wont fail was that HWconfig is not optional on these selected
platforms, so driver shall know up-front on which platforms it shall be
working (and driver shall even fail if blob is not available)

> 
> Oh well, shrug, sounds silly but I will not pretend I am familiar with H2G
> 
> In this case has_table does sound better since it indeed isn't a
> hardware feature. It is a GuC fw thing and if we don't have a way to
> probe things there at runtime, then at least limit the knowledge to GuC
> files.

note that arch expectation is that driver itself shall be using this
hwconfig (ie. will decode required data). while current approach is that
driver acts only as a proxy to UMD, this has to change and refactoring
will start (likely) soon. therefore flag has_guc_hwconfig fits better
IMHO as this wont be just 'GuC' fw thing.

Michal

> 
> Regards,
> 
> Tvrtko
Tvrtko Ursulin Feb. 25, 2022, 6:35 p.m. UTC | #9
On 25/02/2022 18:05, Michal Wajdeczko wrote:
> On 25.02.2022 18:18, Tvrtko Ursulin wrote:
>>
>> On 25/02/2022 16:46, John Harrison wrote:
>>
>>>>> driver we don't care that much that we failed to load HWconfig and
>>>>> 'notice' is enough.
>>>>>
>>>>> but I'm fine with all messages being drm_err (as we will not have to
>>>>> change that once again after HWconfig will be mandatory for the driver
>>>>> as well)
>>>>
>>>> I would be against drm_err.
>>>>
>>>> #define KERN_EMERG      KERN_SOH "0"    /* system is unusable */
>>>> #define KERN_ALERT      KERN_SOH "1"    /* action must be taken
>>>> immediately */
>>>> #define KERN_CRIT       KERN_SOH "2"    /* critical conditions */
>>>> #define KERN_ERR        KERN_SOH "3"    /* error conditions */
>>>> #define KERN_WARNING    KERN_SOH "4"    /* warning conditions */
>>>> #define KERN_NOTICE     KERN_SOH "5"    /* normal but significant
>>>> condition */
>>>> #define KERN_INFO       KERN_SOH "6"    /* informational */
>>>> #define KERN_DEBUG      KERN_SOH "7"    /* debug-level messages */
>>>>
>>>>  From the point of view of the kernel driver, this is not an error to
>>>> its operation. It can at most be a warning, but notice is also fine
>>>> by me. One could argue when reading "normal but significant
>>>> condition" that it is not normal, when it is in fact unexpected, so
>>>> if people prefer warning that is also okay by me. I still lean
>>>> towards notice becuase of the hands-off nature i915 has with the
>>>> pass-through of this blob.
>>>   From the point of view of the KMD, i915 will load and be 'functional'
>>> if it can't talk to the hardware at all. The UMDs won't work at all but
>>
>> Well this reductio ad absurdum fails I think... :)
>>
>>> the driver load will be 'fine'. That's a requirement to be able to get
>>> the user to a software fallback desktop in order to work out why the
>>> hardware isn't working (e.g. no GuC firmware file). I would view this
>>> as similar. The KMD might have loaded but the UMDs are not functional.
>>> That is definitely an error condition to me.
>>
>> ... If GuC fails to load there is no command submission and driver will
>> obviously log that with drm_err.
>>
>> If blob fails to verify it depends on the userspace stack what will
>> happen. As stated before on some platforms, and/or after a certain time,
>> Mesa will not look at the blob at all. So i915 is fine (it is after all
>> just a conduit for opaque data!), system overall is fine, so it
>> definitely isn't a KERN_ERR level event.
>>
>>>>>>>> +               ERR_PTR(ret));
>>>>>>>> +
>>>>>>>>         ret = guc_enable_communication(guc);
>>>>>>>>         if (ret)
>>>>>>>>             goto err_log_capture;
>>>>>>>> @@ -562,6 +567,8 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>>>>>>>         if (intel_uc_uses_guc_submission(uc))
>>>>>>>>             intel_guc_submission_disable(guc);
>>>>>>>>     +    intel_guc_hwconfig_fini(&guc->hwconfig);
>>>>>>>> +
>>>>>>>>         __uc_sanitize(uc);
>>>>>>>>     }
>>>>>>>>     diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>>>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>>>>> index 76e590fcb903..1d31e35a5154 100644
>>>>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>>>>> @@ -990,6 +990,7 @@ static const struct intel_device_info
>>>>>>>> adl_p_info = {
>>>>>>>>             BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) |
>>>>>>>> BIT(VCS2),
>>>>>>>>         .ppgtt_size = 48,
>>>>>>>>         .dma_mask_size = 39,
>>>>>>>> +    .has_guc_hwconfig = 1,
>>>>>>> Who requested this change? It was previously done this way but the
>>>>>>> instruction was that i915_pci.c is for hardware features only but
>>>>>>> that
>>>>>>> this, as you seem extremely keen about pointing out at every
>>>>>>> opportunity, is a software feature.
>>>>>>
>>>>>> This was requested by Michal as well. I definitely agree it is a
>>>>>> software feature, but I was not aware that "i915_pci.c is for hardware
>>>>>> features only".
>>>>>>
>>>>>> Michal, do you agree with this and returning to the previous method
>>>>>> for
>>>>>> enabling the feature?
>>>>>
>>>>> now I'm little confused as some arch direction was to treat FW as
>>>>> extension of the HW so for me it was natural to have 'has_guc_hwconfig'
>>>>> flag in device_info
>>>>>
>>>>> if still for some reason it is undesired to mix HW and FW/SW flags
>>>>> inside single group of flags then maybe we should just add separate
>>>>> group of immutable flags where has_guc_hwconfig could be defined.
>>>>>
>>>>> let our maintainers decide
>>>>
>>>> Bah.. :)
>>>>
>>>> And what was the previous method?
>>>>
>>>> [comes back later]
>>>>
>>>> Okay it was:
>>>>
>>>> +static bool has_table(struct drm_i915_private *i915)
>>>> +{
>>>> +    if (IS_ALDERLAKE_P(i915))
>>>> +        return true;
>>>>
>>>> Which sucks a bit if we want to argue it does not belong in device info.
>>>>
>>>> Why can't we ask the GuC if the blob exists? In fact what would
>>>> happen if one would call __guc_action_get_hwconfig on any GuC platform?
>>> That was how I originally wrote the code. However, other parties
>>> refuse to allow a H2G call to fail. The underlying CTB layers complain
>>> loudly on any CTB error. And the GuC architects insist that a call to
>>> query the table on an unsupported platform is an error and should
>>> return an 'unsupported' error code.
> 
> just for the background: IIRC the reason why arch didn't want 'query'
> mode that wont fail was that HWconfig is not optional on these selected
> platforms, so driver shall know up-front on which platforms it shall be
> working (and driver shall even fail if blob is not available)
> 
>>
>> Oh well, shrug, sounds silly but I will not pretend I am familiar with H2G
>>
>> In this case has_table does sound better since it indeed isn't a
>> hardware feature. It is a GuC fw thing and if we don't have a way to
>> probe things there at runtime, then at least limit the knowledge to GuC
>> files.
> 
> note that arch expectation is that driver itself shall be using this
> hwconfig (ie. will decode required data). while current approach is that
> driver acts only as a proxy to UMD, this has to change and refactoring
> will start (likely) soon. therefore flag has_guc_hwconfig fits better
> IMHO as this wont be just 'GuC' fw thing.

There we go, Jordan's validation gets a new unexpected use. :D

In this case it should maybe be like:

#define I915_NEEDS_GUC_HWCONFIG(i915) (IS_PLATFORM(xxx) || VER >= nnn)

Since it is still not a hardware feature like other device info flags.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e9ce09620eb5..661f1afb51d7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -188,6 +188,7 @@  i915-y += gt/uc/intel_uc.o \
 	  gt/uc/intel_guc_ct.o \
 	  gt/uc/intel_guc_debugfs.o \
 	  gt/uc/intel_guc_fw.o \
+	  gt/uc/intel_guc_hwconfig.o \
 	  gt/uc/intel_guc_log.o \
 	  gt/uc/intel_guc_log_debugfs.o \
 	  gt/uc/intel_guc_rc.o \
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index fe5d7d261797..4a61c819f32b 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -137,6 +137,7 @@  enum intel_guc_action {
 	INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
 	INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004,
 	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100,
 	INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502,
 	INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503,
 	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 488b6061ee89..f9e2a6aaef4a 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -8,6 +8,10 @@ 
 
 enum intel_guc_response_status {
 	INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+	INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20,
+	INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201,
+	INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202,
+	INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204,
 	INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index f9240d4baa69..2058eb8c3d0c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -13,6 +13,7 @@ 
 #include "intel_guc_fw.h"
 #include "intel_guc_fwif.h"
 #include "intel_guc_ct.h"
+#include "intel_guc_hwconfig.h"
 #include "intel_guc_log.h"
 #include "intel_guc_reg.h"
 #include "intel_guc_slpc_types.h"
@@ -37,6 +38,8 @@  struct intel_guc {
 	struct intel_guc_ct ct;
 	/** @slpc: sub-structure containing SLPC related data and objects */
 	struct intel_guc_slpc slpc;
+	/** @hwconfig: data related to hardware configuration KLV blob */
+	struct intel_guc_hwconfig hwconfig;
 
 	/** @sched_engine: Global engine used to submit requests to GuC */
 	struct i915_sched_engine *sched_engine;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
new file mode 100644
index 000000000000..ad289603460c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -0,0 +1,145 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "i915_drv.h"
+#include "i915_memcpy.h"
+#include "intel_guc_hwconfig.h"
+
+static struct intel_guc *hwconfig_to_guc(struct intel_guc_hwconfig *hwconfig)
+{
+	return container_of(hwconfig, struct intel_guc, hwconfig);
+}
+
+/*
+ * GuC has a blob containing hardware configuration information (HWConfig).
+ * This is formatted as a simple and flexible KLV (Key/Length/Value) table.
+ *
+ * For example, a minimal version could be:
+ *   enum device_attr {
+ *     ATTR_SOME_VALUE = 0,
+ *     ATTR_SOME_MASK  = 1,
+ *   };
+ *
+ *   static const u32 hwconfig[] = {
+ *     ATTR_SOME_VALUE,
+ *     1,		// Value Length in DWords
+ *     8,		// Value
+ *
+ *     ATTR_SOME_MASK,
+ *     3,
+ *     0x00FFFFFFFF, 0xFFFFFFFF, 0xFF000000,
+ *   };
+ *
+ * The attribute ids are defined in a hardware spec.
+ */
+
+static int __guc_action_get_hwconfig(struct intel_guc *guc,
+				     u32 ggtt_offset, u32 ggtt_size)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_GET_HWCONFIG,
+		ggtt_offset,
+		0, /* upper 32 bits of address */
+		ggtt_size,
+	};
+	int ret;
+
+	ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+	if (ret == -ENXIO)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int guc_hwconfig_discover_size(struct intel_guc_hwconfig *hwconfig)
+{
+	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+	int ret;
+
+	/* Sending a query with zero offset and size will return the
+	 * size of the blob.
+	 */
+	ret = __guc_action_get_hwconfig(guc, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	if (ret == 0)
+		return -EINVAL;
+
+	hwconfig->size = ret;
+	return 0;
+}
+
+static int guc_hwconfig_fill_buffer(struct intel_guc_hwconfig *hwconfig)
+{
+	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+	struct i915_vma *vma;
+	u32 ggtt_offset;
+	void *vaddr;
+	int ret;
+
+	GEM_BUG_ON(!hwconfig->size);
+
+	ret = intel_guc_allocate_and_map_vma(guc, hwconfig->size, &vma, &vaddr);
+	if (ret)
+		return ret;
+
+	ggtt_offset = intel_guc_ggtt_offset(guc, vma);
+
+	ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size);
+	if (ret >= 0)
+		memcpy(hwconfig->ptr, vaddr, hwconfig->size);
+
+	i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP);
+
+	return ret;
+}
+
+/**
+ * intel_guc_hwconfig_fini - Finalize the HWConfig
+ *
+ * Free up the memory allocation holding the table.
+ */
+void intel_guc_hwconfig_fini(struct intel_guc_hwconfig *hwconfig)
+{
+	kfree(hwconfig->ptr);
+	hwconfig->size = 0;
+	hwconfig->ptr = NULL;
+}
+
+/**
+ * intel_guc_hwconfig_init - Initialize the HWConfig
+ *
+ * Retrieve the HWConfig table from the GuC and save it away in a local memory
+ * allocation. It can then be queried on demand by other users later on.
+ */
+int intel_guc_hwconfig_init(struct intel_guc_hwconfig *hwconfig)
+{
+	struct intel_guc *guc = hwconfig_to_guc(hwconfig);
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	int ret;
+
+	if (!INTEL_INFO(i915)->has_guc_hwconfig)
+		return 0;
+
+	ret = guc_hwconfig_discover_size(hwconfig);
+	if (ret)
+		return ret;
+
+	hwconfig->ptr = kmalloc(hwconfig->size, GFP_KERNEL);
+	if (!hwconfig->ptr) {
+		hwconfig->size = 0;
+		return -ENOMEM;
+	}
+
+	ret = guc_hwconfig_fill_buffer(hwconfig);
+	if (ret < 0) {
+		intel_guc_hwconfig_fini(hwconfig);
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
new file mode 100644
index 000000000000..bfb90ae168dc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.h
@@ -0,0 +1,19 @@ 
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_HWCONFIG_H_
+#define _INTEL_GUC_HWCONFIG_H_
+
+#include <linux/types.h>
+
+struct intel_guc_hwconfig {
+	u32 size;
+	void *ptr;
+};
+
+int intel_guc_hwconfig_init(struct intel_guc_hwconfig *hwconfig);
+void intel_guc_hwconfig_fini(struct intel_guc_hwconfig *hwconfig);
+
+#endif /* _INTEL_GUC_HWCONFIG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 09ed29df67bc..0cefa2a95190 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -489,6 +489,11 @@  static int __uc_init_hw(struct intel_uc *uc)
 	if (ret)
 		goto err_log_capture;
 
+	ret = intel_guc_hwconfig_init(&guc->hwconfig);
+	if (ret)
+		drm_notice(&i915->drm, "Failed to retrieve hwconfig table: %pe\n",
+			   ERR_PTR(ret));
+
 	ret = guc_enable_communication(guc);
 	if (ret)
 		goto err_log_capture;
@@ -562,6 +567,8 @@  static void __uc_fini_hw(struct intel_uc *uc)
 	if (intel_uc_uses_guc_submission(uc))
 		intel_guc_submission_disable(guc);
 
+	intel_guc_hwconfig_fini(&guc->hwconfig);
+
 	__uc_sanitize(uc);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 76e590fcb903..1d31e35a5154 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -990,6 +990,7 @@  static const struct intel_device_info adl_p_info = {
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
 	.ppgtt_size = 48,
 	.dma_mask_size = 39,
+	.has_guc_hwconfig = 1,
 };
 
 #undef GEN
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 3699b1c539ea..82d8d6bc30ff 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -133,6 +133,7 @@  enum intel_ppgtt_type {
 	func(gpu_reset_clobbers_display); \
 	func(has_reset_engine); \
 	func(has_global_mocs); \
+	func(has_guc_hwconfig); \
 	func(has_gt_uc); \
 	func(has_l3_dpf); \
 	func(has_llc); \