diff mbox series

[02/33] drm/amdkfd: display debug capabilities

Message ID 20230525172745.702700-2-jonathan.kim@amd.com (mailing list archive)
State New, archived
Headers show
Series [01/33] drm/amdkfd: add debug and runtime enable interface | expand

Commit Message

Kim, Jonathan May 25, 2023, 5:27 p.m. UTC
Expose debug capabilities in the KFD topology node's HSA capabilities and
debug properties flags.

Ensure correct capabilities are exposed based on firmware support.

Flag definitions can be referenced in uapi/linux/kfd_sysfs.h.

v2: rebase topology fw check fix with kfd_node struct update

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 101 ++++++++++++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   6 ++
 include/uapi/linux/kfd_sysfs.h            |  15 ++++
 3 files changed, 117 insertions(+), 5 deletions(-)

Comments

Felix Kuehling May 30, 2023, 7:20 p.m. UTC | #1
Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> Expose debug capabilities in the KFD topology node's HSA capabilities and
> debug properties flags.
>
> Ensure correct capabilities are exposed based on firmware support.
>
> Flag definitions can be referenced in uapi/linux/kfd_sysfs.h.
>
> v2: rebase topology fw check fix with kfd_node struct update
>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 101 ++++++++++++++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   6 ++
>   include/uapi/linux/kfd_sysfs.h            |  15 ++++
>   3 files changed, 117 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 8302d8967158..3def25b2bdbb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -535,6 +535,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
>   				      dev->gpu->kfd->mec_fw_version);
>   		sysfs_show_32bit_prop(buffer, offs, "capability",
>   				      dev->node_props.capability);
> +		sysfs_show_64bit_prop(buffer, offs, "debug_prop",
> +				      dev->node_props.debug_prop);
>   		sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
>   				      dev->gpu->kfd->sdma_fw_version);
>   		sysfs_show_64bit_prop(buffer, offs, "unique_id",
> @@ -1857,6 +1859,97 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
>   	return res;
>   }
>   
> +static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev)
> +{
> +	bool firmware_supported = true;
> +
> +	if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
> +			KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
> +		firmware_supported =
> +			(dev->gpu->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 9;
> +		goto out;
> +	}
> +
> +	/*
> +	 * Note: Any unlisted devices here are assumed to support exception handling.
> +	 * Add additional checks here as needed.
> +	 */
> +	switch (KFD_GC_VERSION(dev->gpu)) {
> +	case IP_VERSION(9, 0, 1):
> +		firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768;
> +		break;
> +	case IP_VERSION(9, 1, 0):
> +	case IP_VERSION(9, 2, 1):
> +	case IP_VERSION(9, 2, 2):
> +	case IP_VERSION(9, 3, 0):
> +	case IP_VERSION(9, 4, 0):
> +		firmware_supported = dev->gpu->kfd->mec_fw_version >= 459;
> +		break;
> +	case IP_VERSION(9, 4, 1):
> +		firmware_supported = dev->gpu->kfd->mec_fw_version >= 60;
> +		break;
> +	case IP_VERSION(9, 4, 2):
> +		firmware_supported = dev->gpu->kfd->mec_fw_version >= 51;
> +		break;
> +	case IP_VERSION(10, 1, 10):
> +	case IP_VERSION(10, 1, 2):
> +	case IP_VERSION(10, 1, 1):
> +		firmware_supported = dev->gpu->kfd->mec_fw_version >= 144;
> +		break;
> +	case IP_VERSION(10, 3, 0):
> +	case IP_VERSION(10, 3, 2):
> +	case IP_VERSION(10, 3, 1):
> +	case IP_VERSION(10, 3, 4):
> +	case IP_VERSION(10, 3, 5):
> +		firmware_supported = dev->gpu->kfd->mec_fw_version >= 89;
> +		break;
> +	case IP_VERSION(10, 1, 3):
> +	case IP_VERSION(10, 3, 3):
> +		firmware_supported = false;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +out:
> +	if (firmware_supported)
> +		dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
> +}
> +
> +static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
> +{
> +	dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
> +				HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
> +				HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
> +
> +	dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT |
> +			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
> +			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
> +
> +	if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
> +		dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
> +						HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
> +
> +		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2))
> +			dev->node_props.debug_prop |=
> +				HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> +		else
> +			dev->node_props.capability |=
> +				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
> +	} else {
> +		dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
> +					HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
> +
> +		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
> +			dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
> +		else
> +			dev->node_props.capability |=
> +				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
> +	}
> +
> +	kfd_topology_set_dbg_firmware_support(dev);
> +}
> +
>   int kfd_topology_add_device(struct kfd_node *gpu)
>   {
>   	uint32_t gpu_id;
> @@ -1967,13 +2060,11 @@ int kfd_topology_add_device(struct kfd_node *gpu)
>   			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
>   		break;
>   	default:
> -		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1))
> -			dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
> -				HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
> -				HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
> -		else
> +		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1))
>   			WARN(1, "Unexpected ASIC family %u",
>   			     dev->gpu->adev->asic_type);
> +		else
> +			kfd_topology_set_capabilities(dev);
>   	}
>   
>   	/*
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> index 3b8afb6aba79..cba2cd5ed9d1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> @@ -31,6 +31,11 @@
>   
>   #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
>   
> +#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9	6
> +#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10	7
> +#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT  \
> +			(29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
> +
>   struct kfd_node_properties {
>   	uint64_t hive_id;
>   	uint32_t cpu_cores_count;
> @@ -42,6 +47,7 @@ struct kfd_node_properties {
>   	uint32_t cpu_core_id_base;
>   	uint32_t simd_id_base;
>   	uint32_t capability;
> +	uint64_t debug_prop;
>   	uint32_t max_waves_per_simd;
>   	uint32_t lds_size_in_kb;
>   	uint32_t gds_size_in_kb;
> diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
> index 3e330f368917..a51b7331e0b4 100644
> --- a/include/uapi/linux/kfd_sysfs.h
> +++ b/include/uapi/linux/kfd_sysfs.h
> @@ -43,6 +43,11 @@
>   #define HSA_CAP_DOORBELL_TYPE_2_0		0x2
>   #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000
>   
> +#define HSA_CAP_TRAP_DEBUG_SUPPORT              0x00008000
> +#define HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED  0x00010000
> +#define HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED           0x00020000
> +#define HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED  0x00040000
> +
>   /* Old buggy user mode depends on this being 0 */
>   #define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED	0x00080000
>   
> @@ -53,8 +58,18 @@
>   #define HSA_CAP_SRAM_EDCSUPPORTED		0x04000000
>   #define HSA_CAP_SVMAPI_SUPPORTED		0x08000000
>   #define HSA_CAP_FLAGS_COHERENTHOSTACCESS	0x10000000
> +#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED   0x20000000
>   #define HSA_CAP_RESERVED			0xe00f8000
>   
> +/* debug_prop bits in node properties */
> +#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK     0x0000000f
> +#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_SHIFT    0
> +#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_MASK     0x000003f0
> +#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT    4
> +#define HSA_DBG_DISPATCH_INFO_ALWAYS_VALID      0x00000400
> +#define HSA_DBG_WATCHPOINTS_EXCLUSIVE           0x00000800
> +#define HSA_DBG_RESERVED                0xfffffffffffff000ull
> +
>   /* Heap types in memory properties */
>   #define HSA_MEM_HEAP_TYPE_SYSTEM	0
>   #define HSA_MEM_HEAP_TYPE_FB_PUBLIC	1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 8302d8967158..3def25b2bdbb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -535,6 +535,8 @@  static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 				      dev->gpu->kfd->mec_fw_version);
 		sysfs_show_32bit_prop(buffer, offs, "capability",
 				      dev->node_props.capability);
+		sysfs_show_64bit_prop(buffer, offs, "debug_prop",
+				      dev->node_props.debug_prop);
 		sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
 				      dev->gpu->kfd->sdma_fw_version);
 		sysfs_show_64bit_prop(buffer, offs, "unique_id",
@@ -1857,6 +1859,97 @@  static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
 	return res;
 }
 
+static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev)
+{
+	bool firmware_supported = true;
+
+	if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
+			KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
+		firmware_supported =
+			(dev->gpu->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 9;
+		goto out;
+	}
+
+	/*
+	 * Note: Any unlisted devices here are assumed to support exception handling.
+	 * Add additional checks here as needed.
+	 */
+	switch (KFD_GC_VERSION(dev->gpu)) {
+	case IP_VERSION(9, 0, 1):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768;
+		break;
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 459;
+		break;
+	case IP_VERSION(9, 4, 1):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 60;
+		break;
+	case IP_VERSION(9, 4, 2):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 51;
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 1):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 144;
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 89;
+		break;
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 3, 3):
+		firmware_supported = false;
+		break;
+	default:
+		break;
+	}
+
+out:
+	if (firmware_supported)
+		dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
+}
+
+static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
+{
+	dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+				HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+				HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+
+	dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT |
+			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
+			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
+
+	if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
+		dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
+						HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 4, 2))
+			dev->node_props.debug_prop |=
+				HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+		else
+			dev->node_props.capability |=
+				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+	} else {
+		dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
+					HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
+			dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+		else
+			dev->node_props.capability |=
+				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+	}
+
+	kfd_topology_set_dbg_firmware_support(dev);
+}
+
 int kfd_topology_add_device(struct kfd_node *gpu)
 {
 	uint32_t gpu_id;
@@ -1967,13 +2060,11 @@  int kfd_topology_add_device(struct kfd_node *gpu)
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
 		break;
 	default:
-		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1))
-			dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
-				HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
-				HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
-		else
+		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1))
 			WARN(1, "Unexpected ASIC family %u",
 			     dev->gpu->adev->asic_type);
+		else
+			kfd_topology_set_capabilities(dev);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 3b8afb6aba79..cba2cd5ed9d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -31,6 +31,11 @@ 
 
 #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
 
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9	6
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10	7
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT  \
+			(29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+
 struct kfd_node_properties {
 	uint64_t hive_id;
 	uint32_t cpu_cores_count;
@@ -42,6 +47,7 @@  struct kfd_node_properties {
 	uint32_t cpu_core_id_base;
 	uint32_t simd_id_base;
 	uint32_t capability;
+	uint64_t debug_prop;
 	uint32_t max_waves_per_simd;
 	uint32_t lds_size_in_kb;
 	uint32_t gds_size_in_kb;
diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h
index 3e330f368917..a51b7331e0b4 100644
--- a/include/uapi/linux/kfd_sysfs.h
+++ b/include/uapi/linux/kfd_sysfs.h
@@ -43,6 +43,11 @@ 
 #define HSA_CAP_DOORBELL_TYPE_2_0		0x2
 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000
 
+#define HSA_CAP_TRAP_DEBUG_SUPPORT              0x00008000
+#define HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED  0x00010000
+#define HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED           0x00020000
+#define HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED  0x00040000
+
 /* Old buggy user mode depends on this being 0 */
 #define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED	0x00080000
 
@@ -53,8 +58,18 @@ 
 #define HSA_CAP_SRAM_EDCSUPPORTED		0x04000000
 #define HSA_CAP_SVMAPI_SUPPORTED		0x08000000
 #define HSA_CAP_FLAGS_COHERENTHOSTACCESS	0x10000000
+#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED   0x20000000
 #define HSA_CAP_RESERVED			0xe00f8000
 
+/* debug_prop bits in node properties */
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK     0x0000000f
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_SHIFT    0
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_MASK     0x000003f0
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT    4
+#define HSA_DBG_DISPATCH_INFO_ALWAYS_VALID      0x00000400
+#define HSA_DBG_WATCHPOINTS_EXCLUSIVE           0x00000800
+#define HSA_DBG_RESERVED                0xfffffffffffff000ull
+
 /* Heap types in memory properties */
 #define HSA_MEM_HEAP_TYPE_SYSTEM	0
 #define HSA_MEM_HEAP_TYPE_FB_PUBLIC	1