diff mbox series

[libdrm,1/4] tests/amdgpu: add dispatch hang test

Message ID 20191113045812.24465-1-flora.cui@amd.com (mailing list archive)
State New, archived
Headers show
Series [libdrm,1/4] tests/amdgpu: add dispatch hang test | expand

Commit Message

Flora Cui Nov. 13, 2019, 4:58 a.m. UTC
add compute/gfx dispatch hang test for gfx9

Signed-off-by: Flora Cui <flora.cui@amd.com>
---
 tests/amdgpu/amdgpu_test.c    | 12 +++++++
 tests/amdgpu/amdgpu_test.h    |  1 +
 tests/amdgpu/basic_tests.c    | 67 ++++++++++++++++++++++++++++-------
 tests/amdgpu/deadlock_tests.c | 14 ++++++++
 4 files changed, 81 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
index 94bc3056..3ac9d8d2 100644
--- a/tests/amdgpu/amdgpu_test.c
+++ b/tests/amdgpu/amdgpu_test.c
@@ -460,6 +460,18 @@  static void amdgpu_disable_suites()
 			"illegal mem access test (set amdgpu.vm_fault_stop=2)", CU_FALSE))
 		fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
 
+	/* This test was ran on GFX9 only */
+	//if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV)
+		if (amdgpu_set_test_active(DEADLOCK_TESTS_STR,
+				"gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", CU_FALSE))
+			fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
+	/* This test was ran on GFX9 only */
+	//if (family_id < AMDGPU_FAMILY_AI || family_id > AMDGPU_FAMILY_RV)
+		if (amdgpu_set_test_active(DEADLOCK_TESTS_STR,
+				"compute ring bad dispatch test (set amdgpu.lockup_timeout=50)", CU_FALSE))
+			fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
+
 	if (amdgpu_set_test_active(BO_TESTS_STR, "Metadata", CU_FALSE))
 		fprintf(stderr, "test deactivation failed - %s\n", CU_get_error_msg());
 
diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
index 0cb6ee98..2b01bf41 100644
--- a/tests/amdgpu/amdgpu_test.h
+++ b/tests/amdgpu/amdgpu_test.h
@@ -241,6 +241,7 @@  CU_BOOL suite_syncobj_timeline_tests_enable(void);
  */
 extern CU_TestInfo syncobj_timeline_tests[];
 
+void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type);
 
 /**
  * Helper functions
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c
index a57dcbb4..71c9220d 100644
--- a/tests/amdgpu/basic_tests.c
+++ b/tests/amdgpu/basic_tests.c
@@ -311,7 +311,8 @@  static  uint32_t shader_bin[] = {
 
 enum cs_type {
 	CS_BUFFERCLEAR,
-	CS_BUFFERCOPY
+	CS_BUFFERCOPY,
+	CS_HANG
 };
 
 static const uint32_t bufferclear_cs_shader_gfx9[] = {
@@ -473,6 +474,14 @@  static const uint32_t cached_cmd_gfx9[] = {
 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
 };
 
+unsigned int memcpy_ps_hang[] = {
+        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
+        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
+        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
+        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
+        0xF800180F, 0x03020100, 0xBF810000
+};
+
 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
@@ -2189,6 +2198,10 @@  static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
 			shader = buffercopy_cs_shader_gfx9;
 			shader_size = sizeof(buffercopy_cs_shader_gfx9);
 			break;
+		case CS_HANG:
+			shader = memcpy_ps_hang;
+			shader_size = sizeof(memcpy_ps_hang);
+			break;
 		default:
 			return -1;
 			break;
@@ -2409,7 +2422,8 @@  static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
 
 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 					uint32_t ip_type,
-					uint32_t ring)
+					uint32_t ring,
+					int hang)
 {
 	amdgpu_context_handle context_handle;
 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
@@ -2425,7 +2439,8 @@  static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 	int bo_cmd_size = 4096;
 	struct amdgpu_cs_request ibs_request = {0};
 	struct amdgpu_cs_ib_info ib_info= {0};
-	uint32_t expired;
+	uint32_t expired, hang_state, hangs;
+	enum cs_type cs_type;
 	amdgpu_bo_list_handle bo_list;
 	struct amdgpu_cs_fence fence_status = {0};
 
@@ -2446,7 +2461,8 @@  static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 	CU_ASSERT_EQUAL(r, 0);
 	memset(ptr_shader, 0, bo_shader_size);
 
-	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCOPY );
+	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
+	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
 	CU_ASSERT_EQUAL(r, 0);
 
 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
@@ -2532,14 +2548,21 @@  static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 	r = amdgpu_cs_query_fence_status(&fence_status,
 					 AMDGPU_TIMEOUT_INFINITE,
 					 0, &expired);
-	CU_ASSERT_EQUAL(r, 0);
-	CU_ASSERT_EQUAL(expired, true);
 
-	/* verify if memcpy test result meets with expected */
-	i = 0;
-	while(i < bo_dst_size) {
-		CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
-		i++;
+	if (!hang) {
+		CU_ASSERT_EQUAL(r, 0);
+		CU_ASSERT_EQUAL(expired, true);
+
+		/* verify if memcpy test result meets with expected */
+		i = 0;
+		while(i < bo_dst_size) {
+			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
+			i++;
+		}
+	} else {
+		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
+		CU_ASSERT_EQUAL(r, 0);
+		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
 	}
 
 	r = amdgpu_bo_list_destroy(bo_list);
@@ -2573,7 +2596,7 @@  static void amdgpu_compute_dispatch_test(void)
 
 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
-		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
+		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
 	}
 }
 
@@ -2590,7 +2613,25 @@  static void amdgpu_gfx_dispatch_test(void)
 
 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
-		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
+		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
+	}
+}
+
+void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
+{
+	int r;
+	struct drm_amdgpu_info_hw_ip info;
+	uint32_t ring_id;
+
+	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
+	CU_ASSERT_EQUAL(r, 0);
+	if (!info.available_rings)
+		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
+
+	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
+		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
+		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
+		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
 	}
 }
 
diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c
index 7d028829..61342d1a 100644
--- a/tests/amdgpu/deadlock_tests.c
+++ b/tests/amdgpu/deadlock_tests.c
@@ -114,6 +114,8 @@  static void amdgpu_deadlock_compute(void);
 static void amdgpu_illegal_reg_access();
 static void amdgpu_illegal_mem_access();
 static void amdgpu_deadlock_sdma(void);
+static void amdgpu_dispatch_hang_gfx(void);
+static void amdgpu_dispatch_hang_compute(void);
 
 CU_BOOL suite_deadlock_tests_enable(void)
 {
@@ -188,6 +190,8 @@  CU_TestInfo deadlock_tests[] = {
 	{ "sdma ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_sdma },
 	{ "illegal reg access test", amdgpu_illegal_reg_access },
 	{ "illegal mem access test (set amdgpu.vm_fault_stop=2)", amdgpu_illegal_mem_access },
+	{ "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_gfx },
+	{ "compute ring bad dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_compute },
 	CU_TEST_INFO_NULL,
 };
 
@@ -488,3 +492,13 @@  static void amdgpu_illegal_mem_access()
 {
 	bad_access_helper(0);
 }
+
+static void amdgpu_dispatch_hang_gfx(void)
+{
+	amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX);
+}
+
+static void amdgpu_dispatch_hang_compute(void)
+{
+	amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
+}