diff mbox series

[1/4] drm/sched: Add job popping API

Message ID 20250120165240.9105-2-tvrtko.ursulin@igalia.com (mailing list archive)
State New, archived
Headers show
Series A bit of struct drm_sched_job cleanup | expand

Commit Message

Tvrtko Ursulin Jan. 20, 2025, 4:52 p.m. UTC
Idea is to add a helper for popping jobs from the entity with the goal
of making amdgpu use it in the next patch. That way amdgpu will decouple
itself a tiny bit more from scheduler implementation details.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <phasta@kernel.org>
---
 drivers/gpu/drm/scheduler/sched_entity.c |  2 +-
 include/drm/gpu_scheduler.h              | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

Comments

Danilo Krummrich Jan. 20, 2025, 5:13 p.m. UTC | #1
On Mon, Jan 20, 2025 at 04:52:37PM +0000, Tvrtko Ursulin wrote:
> Idea is to add a helper for popping jobs from the entity with the goal
> of making amdgpu use it in the next patch. That way amdgpu will decouple
> itself a tiny bit more from scheduler implementation details.

I object to adding this function if it's *only* used for something that looks
like an abuse of the API by amdgpu. Let's not make that more convenient.

> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Philipp Stanner <phasta@kernel.org>
> ---
>  drivers/gpu/drm/scheduler/sched_entity.c |  2 +-
>  include/drm/gpu_scheduler.h              | 21 +++++++++++++++++++++
>  2 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 69bcf0e99d57..7c0d266a89ef 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -255,7 +255,7 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity)
>  	/* The entity is guaranteed to not be used by the scheduler */
>  	prev = rcu_dereference_check(entity->last_scheduled, true);
>  	dma_fence_get(prev);
> -	while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
> +	while ((job = __drm_sched_entity_pop_job(entity))) {
>  		struct drm_sched_fence *s_fence = job->s_fence;
>  
>  		dma_fence_get(&s_fence->finished);
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index a0ff08123f07..092242f2464f 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -611,6 +611,27 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>  bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>  int drm_sched_entity_error(struct drm_sched_entity *entity);
>  
> +/**
> + * __drm_sched_entity_pop_job - Low level helper for popping queued jobs
> + *
> + * @entity: scheduler entity
> + *
> + * Low level helper for popping queued jobs. Drivers should not use it.
> + *
> + * Returns the job dequeued or NULL.
> + */
> +static inline struct drm_sched_job *
> +__drm_sched_entity_pop_job(struct drm_sched_entity *entity)
> +{
> +	struct spsc_node *node;
> +
> +	node = spsc_queue_pop(&entity->job_queue);
> +	if (!node)
> +		return NULL;
> +
> +	return container_of(node, struct drm_sched_job, queue_node);
> +}
> +
>  struct drm_sched_fence *drm_sched_fence_alloc(
>  	struct drm_sched_entity *s_entity, void *owner);
>  void drm_sched_fence_init(struct drm_sched_fence *fence,
> -- 
> 2.47.1
>
Christian König Jan. 20, 2025, 6:49 p.m. UTC | #2
Am 20.01.25 um 18:13 schrieb Danilo Krummrich:
> On Mon, Jan 20, 2025 at 04:52:37PM +0000, Tvrtko Ursulin wrote:
>> Idea is to add a helper for popping jobs from the entity with the goal
>> of making amdgpu use it in the next patch. That way amdgpu will decouple
>> itself a tiny bit more from scheduler implementation details.
> I object to adding this function if it's *only* used for something that looks
> like an abuse of the API by amdgpu. Let's not make that more convenient.

Completely agree. Since when do we have that?

I don't remember agreeing to anything which messes so badly with 
scheduler internals.

Regards,
Christian.

>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Danilo Krummrich <dakr@kernel.org>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: Philipp Stanner <phasta@kernel.org>
>> ---
>>   drivers/gpu/drm/scheduler/sched_entity.c |  2 +-
>>   include/drm/gpu_scheduler.h              | 21 +++++++++++++++++++++
>>   2 files changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>> index 69bcf0e99d57..7c0d266a89ef 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -255,7 +255,7 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity)
>>   	/* The entity is guaranteed to not be used by the scheduler */
>>   	prev = rcu_dereference_check(entity->last_scheduled, true);
>>   	dma_fence_get(prev);
>> -	while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
>> +	while ((job = __drm_sched_entity_pop_job(entity))) {
>>   		struct drm_sched_fence *s_fence = job->s_fence;
>>   
>>   		dma_fence_get(&s_fence->finished);
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index a0ff08123f07..092242f2464f 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -611,6 +611,27 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
>>   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>   int drm_sched_entity_error(struct drm_sched_entity *entity);
>>   
>> +/**
>> + * __drm_sched_entity_pop_job - Low level helper for popping queued jobs
>> + *
>> + * @entity: scheduler entity
>> + *
>> + * Low level helper for popping queued jobs. Drivers should not use it.
>> + *
>> + * Returns the job dequeued or NULL.
>> + */
>> +static inline struct drm_sched_job *
>> +__drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>> +{
>> +	struct spsc_node *node;
>> +
>> +	node = spsc_queue_pop(&entity->job_queue);
>> +	if (!node)
>> +		return NULL;
>> +
>> +	return container_of(node, struct drm_sched_job, queue_node);
>> +}
>> +
>>   struct drm_sched_fence *drm_sched_fence_alloc(
>>   	struct drm_sched_entity *s_entity, void *owner);
>>   void drm_sched_fence_init(struct drm_sched_fence *fence,
>> -- 
>> 2.47.1
>>
Tvrtko Ursulin Jan. 21, 2025, 9:38 a.m. UTC | #3
On 20/01/2025 18:49, Christian König wrote:
> Am 20.01.25 um 18:13 schrieb Danilo Krummrich:
>> On Mon, Jan 20, 2025 at 04:52:37PM +0000, Tvrtko Ursulin wrote:
>>> Idea is to add a helper for popping jobs from the entity with the goal
>>> of making amdgpu use it in the next patch. That way amdgpu will decouple
>>> itself a tiny bit more from scheduler implementation details.
>> I object to adding this function if it's *only* used for something 
>> that looks
>> like an abuse of the API by amdgpu. Let's not make that more convenient.
> 
> Completely agree. Since when do we have that?
> 
> I don't remember agreeing to anything which messes so badly with 
> scheduler internals.

Since 7c6e68c777f1 ("drm/amdgpu: Avoid HW GPU reset for RAS."). Where it 
looks to be implementing a permanently wedged state - drops everything 
submitted so far and claims to be blocking any future submission.

Remove that part instead and let the unsubmitted jobs be cleaned up when 
userspace clients exit? Would need specific hardware to test so it would 
need to be done on the AMD side.

Regards,

Tvrtko

>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>> Cc: Philipp Stanner <phasta@kernel.org>
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_entity.c |  2 +-
>>>   include/drm/gpu_scheduler.h              | 21 +++++++++++++++++++++
>>>   2 files changed, 22 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 69bcf0e99d57..7c0d266a89ef 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -255,7 +255,7 @@ static void drm_sched_entity_kill(struct 
>>> drm_sched_entity *entity)
>>>       /* The entity is guaranteed to not be used by the scheduler */
>>>       prev = rcu_dereference_check(entity->last_scheduled, true);
>>>       dma_fence_get(prev);
>>> -    while ((job = 
>>> to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
>>> +    while ((job = __drm_sched_entity_pop_job(entity))) {
>>>           struct drm_sched_fence *s_fence = job->s_fence;
>>>           dma_fence_get(&s_fence->finished);
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index a0ff08123f07..092242f2464f 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -611,6 +611,27 @@ void drm_sched_entity_set_priority(struct 
>>> drm_sched_entity *entity,
>>>   bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>>   int drm_sched_entity_error(struct drm_sched_entity *entity);
>>> +/**
>>> + * __drm_sched_entity_pop_job - Low level helper for popping queued 
>>> jobs
>>> + *
>>> + * @entity: scheduler entity
>>> + *
>>> + * Low level helper for popping queued jobs. Drivers should not use it.
>>> + *
>>> + * Returns the job dequeued or NULL.
>>> + */
>>> +static inline struct drm_sched_job *
>>> +__drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>> +{
>>> +    struct spsc_node *node;
>>> +
>>> +    node = spsc_queue_pop(&entity->job_queue);
>>> +    if (!node)
>>> +        return NULL;
>>> +
>>> +    return container_of(node, struct drm_sched_job, queue_node);
>>> +}
>>> +
>>>   struct drm_sched_fence *drm_sched_fence_alloc(
>>>       struct drm_sched_entity *s_entity, void *owner);
>>>   void drm_sched_fence_init(struct drm_sched_fence *fence,
>>> -- 
>>> 2.47.1
>>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 69bcf0e99d57..7c0d266a89ef 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -255,7 +255,7 @@  static void drm_sched_entity_kill(struct drm_sched_entity *entity)
 	/* The entity is guaranteed to not be used by the scheduler */
 	prev = rcu_dereference_check(entity->last_scheduled, true);
 	dma_fence_get(prev);
-	while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
+	while ((job = __drm_sched_entity_pop_job(entity))) {
 		struct drm_sched_fence *s_fence = job->s_fence;
 
 		dma_fence_get(&s_fence->finished);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index a0ff08123f07..092242f2464f 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -611,6 +611,27 @@  void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
 int drm_sched_entity_error(struct drm_sched_entity *entity);
 
+/**
+ * __drm_sched_entity_pop_job - Low level helper for popping queued jobs
+ *
+ * @entity: scheduler entity
+ *
+ * Low level helper for popping queued jobs. Drivers should not use it.
+ *
+ * Returns the job dequeued or NULL.
+ */
+static inline struct drm_sched_job *
+__drm_sched_entity_pop_job(struct drm_sched_entity *entity)
+{
+	struct spsc_node *node;
+
+	node = spsc_queue_pop(&entity->job_queue);
+	if (!node)
+		return NULL;
+
+	return container_of(node, struct drm_sched_job, queue_node);
+}
+
 struct drm_sched_fence *drm_sched_fence_alloc(
 	struct drm_sched_entity *s_entity, void *owner);
 void drm_sched_fence_init(struct drm_sched_fence *fence,