diff mbox series

[v5,17/27] drm/amdgpu: Add rw_sem to pushing job into sched queue

Message ID 20210428151207.1212258-18-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky April 28, 2021, 3:11 p.m. UTC
Will be later used block further submissions once device is
removed. Also complete schedule fence if scheduling failed
due to submission blocking.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 13 ++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 14 +++++++++++++-
 4 files changed, 29 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3e4755fc10c8..0db0ba4fba89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1057,6 +1057,9 @@  struct amdgpu_device {
 
 	struct list_head                device_bo_list;
 
+	bool				stop_job_submissions;
+	struct rw_semaphore		sched_fence_completion_sem;
+
 	/* List of all MMIO BOs */
 	struct list_head                mmio_list;
 	struct mutex                    mmio_list_lock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 3e240b952e79..ac092a5eb4e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1256,7 +1256,18 @@  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	trace_amdgpu_cs_ioctl(job);
 	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
-	drm_sched_entity_push_job(&job->base, entity);
+
+	down_read(&p->adev->sched_fence_completion_sem);
+	if (!p->adev->stop_job_submissions) {
+		drm_sched_entity_push_job(&job->base, entity);
+	} else {
+		dma_fence_set_error(&job->base.s_fence->scheduled, -ENODEV);
+		dma_fence_set_error(&job->base.s_fence->finished, -ENODEV);
+		dma_fence_signal(&job->base.s_fence->scheduled);
+		dma_fence_signal(&job->base.s_fence->finished);
+	}
+
+	up_read(&p->adev->sched_fence_completion_sem);
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3ddad6cba62d..33e8e9e1d1fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3302,6 +3302,7 @@  int amdgpu_device_init(struct amdgpu_device *adev,
 	init_rwsem(&adev->reset_sem);
 	mutex_init(&adev->psp.mutex);
 	mutex_init(&adev->notifier_lock);
+	init_rwsem(&adev->sched_fence_completion_sem);
 
 	r = amdgpu_device_check_arguments(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d33e6d97cc89..26d8b79ea165 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -162,6 +162,7 @@  int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 		      void *owner, struct dma_fence **f)
 {
 	int r;
+	struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
 
 	if (!f)
 		return -EINVAL;
@@ -172,7 +173,18 @@  int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
-	drm_sched_entity_push_job(&job->base, entity);
+
+	down_read(&ring->adev->sched_fence_completion_sem);
+	if (!ring->adev->stop_job_submissions) {
+		drm_sched_entity_push_job(&job->base, entity);
+	} else {
+		dma_fence_set_error(&job->base.s_fence->scheduled, -ENODEV);
+		dma_fence_set_error(&job->base.s_fence->finished, -ENODEV);
+		dma_fence_signal(&job->base.s_fence->scheduled);
+		dma_fence_signal(&job->base.s_fence->finished);
+
+	}
+	up_read(&ring->adev->sched_fence_completion_sem);
 
 	return 0;
 }