diff mbox series

[4/4] drm/amdgpu: Avoid accidental thread thread reactivation.

Message ID 1573062672-23698-4-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/4] Revert "drm/amdgpu: dont schedule jobs while in reset" | expand

Commit Message

Andrey Grodzovsky Nov. 6, 2019, 5:51 p.m. UTC
Problem:
During GPU reset we call the GPU scheduler to suspend it's
thread, those two functions in amdgpu also suspend and resume
the sceduler for their needs but this can collide with GPU
reset in progress and accidently restart a suspended thread
before time.

Fix:
Serialize with GPU reset.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 693f17e..8e6726e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -859,6 +859,9 @@  static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 	struct amdgpu_device *adev = dev->dev_private;
 	int r = 0, i;
 
+	/* Avoid accidently unparking the sched thread during GPU reset */
+	mutex_lock(&adev->lock_reset);
+
 	/* hold on the scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -884,6 +887,8 @@  static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 		kthread_unpark(ring->sched.thread);
 	}
 
+	mutex_unlock(&adev->lock_reset);
+
 	return 0;
 }
 
@@ -1036,6 +1041,9 @@  static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	if (!fences)
 		return -ENOMEM;
 
+	/* Avoid accidently unparking the sched thread during GPU reset */
+	mutex_lock(&adev->lock_reset);
+
 	/* stop the scheduler */
 	kthread_park(ring->sched.thread);
 
@@ -1075,6 +1083,8 @@  static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	/* restart the scheduler */
 	kthread_unpark(ring->sched.thread);
 
+	mutex_unlock(&adev->lock_reset);
+
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 
 	kfree(fences);