diff mbox series

[v2] drm/sched: Further optimise drm_sched_entity_push_job

Message ID 20240916173007.118-1-tursulin@igalia.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/sched: Further optimise drm_sched_entity_push_job | expand

Commit Message

Tvrtko Ursulin Sept. 16, 2024, 5:30 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>

Having removed one re-lock cycle on the entity->lock in a patch titled
"drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
larger refactoring we can do the same optimisation on the rq->lock.
(Currently both drm_sched_rq_add_entity() and
drm_sched_rq_update_fifo_locked() take and release the same lock.)

To achieve this we make drm_sched_rq_update_fifo_locked() and
drm_sched_rq_add_entity() expect the rq->lock to be held.

We also align drm_sched_rq_update_fifo_locked(),
drm_sched_rq_add_entity() and
drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
parameter to the latter.

v2:
 * Fix after rebase of the series.
 * Avoid naming incosistency between drm_sched_rq_add/remove. (Christian)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <pstanner@redhat.com>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
 drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++------------
 include/drm/gpu_scheduler.h              |  3 ++-
 3 files changed, 26 insertions(+), 18 deletions(-)

Comments

Tvrtko Ursulin Sept. 23, 2024, 2:35 p.m. UTC | #1
Ping Christian and Philipp - reasonably happy with v2? I think it's the 
only unreviewed patch from the series.

Regards,

Tvrtko

On 16/09/2024 18:30, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> 
> Having removed one re-lock cycle on the entity->lock in a patch titled
> "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
> larger refactoring we can do the same optimisation on the rq->lock.
> (Currently both drm_sched_rq_add_entity() and
> drm_sched_rq_update_fifo_locked() take and release the same lock.)
> 
> To achieve this we make drm_sched_rq_update_fifo_locked() and
> drm_sched_rq_add_entity() expect the rq->lock to be held.
> 
> We also align drm_sched_rq_update_fifo_locked(),
> drm_sched_rq_add_entity() and
> drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
> parameter to the latter.
> 
> v2:
>   * Fix after rebase of the series.
>   * Avoid naming incosistency between drm_sched_rq_add/remove. (Christian)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Luben Tuikov <ltuikov89@gmail.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Philipp Stanner <pstanner@redhat.com>
> ---
>   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>   drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++------------
>   include/drm/gpu_scheduler.h              |  3 ++-
>   3 files changed, 26 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index d982cebc6bee..8ace1f1ea66b 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -515,9 +515,14 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   
>   		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>   		if (next) {
> +			struct drm_sched_rq *rq;
> +
>   			spin_lock(&entity->lock);
> -			drm_sched_rq_update_fifo_locked(entity,
> +			rq = entity->rq;
> +			spin_lock(&rq->lock);
> +			drm_sched_rq_update_fifo_locked(entity, rq,
>   							next->submit_ts);
> +			spin_unlock(&rq->lock);
>   			spin_unlock(&entity->lock);
>   		}
>   	}
> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
>   		sched = rq->sched;
>   
>   		atomic_inc(sched->score);
> +
> +		spin_lock(&rq->lock);
>   		drm_sched_rq_add_entity(rq, entity);
>   
>   		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -			drm_sched_rq_update_fifo_locked(entity, submit_ts);
> +			drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>   
> +		spin_unlock(&rq->lock);
>   		spin_unlock(&entity->lock);
>   
>   		drm_sched_wakeup(sched, entity);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 18a952f73ecb..5c83fb92bb89 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -153,17 +153,18 @@ static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
>   	return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
>   }
>   
> -static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity)
> +static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
> +					    struct drm_sched_rq *rq)
>   {
> -	struct drm_sched_rq *rq = entity->rq;
> -
>   	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>   		rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
>   		RB_CLEAR_NODE(&entity->rb_tree_node);
>   	}
>   }
>   
> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts)
> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> +				     struct drm_sched_rq *rq,
> +				     ktime_t ts)
>   {
>   	/*
>   	 * Both locks need to be grabbed, one to protect from entity->rq change
> @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts
>   	 * other to update the rb tree structure.
>   	 */
>   	lockdep_assert_held(&entity->lock);
> +	lockdep_assert_held(&rq->lock);
>   
> -	spin_lock(&entity->rq->lock);
> -
> -	drm_sched_rq_remove_fifo_locked(entity);
> +	drm_sched_rq_remove_fifo_locked(entity, rq);
>   
>   	entity->oldest_job_waiting = ts;
>   
> -	rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
> +	rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>   		      drm_sched_entity_compare_before);
> -
> -	spin_unlock(&entity->rq->lock);
>   }
>   
>   /**
> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
>   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>   			     struct drm_sched_entity *entity)
>   {
> +	lockdep_assert_held(&entity->lock);
> +	lockdep_assert_held(&rq->lock);
> +
>   	if (!list_empty(&entity->list))
>   		return;
>   
> -	spin_lock(&rq->lock);
> -
>   	atomic_inc(rq->sched->score);
>   	list_add_tail(&entity->list, &rq->entities);
> -
> -	spin_unlock(&rq->lock);
>   }
>   
>   /**
> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   				struct drm_sched_entity *entity)
>   {
> +	lockdep_assert_held(&entity->lock);
> +
>   	if (list_empty(&entity->list))
>   		return;
>   
> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   		rq->current_entity = NULL;
>   
>   	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -		drm_sched_rq_remove_fifo_locked(entity);
> +		drm_sched_rq_remove_fifo_locked(entity, rq);
>   
>   	spin_unlock(&rq->lock);
>   }
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 80198e6cf537..b21806d5a8eb 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   				struct drm_sched_entity *entity);
>   
> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> +				     struct drm_sched_rq *rq, ktime_t ts);
>   
>   int drm_sched_entity_init(struct drm_sched_entity *entity,
>   			  enum drm_sched_priority priority,
Christian König Sept. 24, 2024, 8:20 a.m. UTC | #2
Am 16.09.24 um 19:30 schrieb Tvrtko Ursulin:
> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>
> Having removed one re-lock cycle on the entity->lock in a patch titled
> "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
> larger refactoring we can do the same optimisation on the rq->lock.
> (Currently both drm_sched_rq_add_entity() and
> drm_sched_rq_update_fifo_locked() take and release the same lock.)
>
> To achieve this we make drm_sched_rq_update_fifo_locked() and
> drm_sched_rq_add_entity() expect the rq->lock to be held.
>
> We also align drm_sched_rq_update_fifo_locked(),
> drm_sched_rq_add_entity() and
> drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
> parameter to the latter.
>
> v2:
>   * Fix after rebase of the series.
>   * Avoid naming incosistency between drm_sched_rq_add/remove. (Christian)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Cc: Luben Tuikov <ltuikov89@gmail.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Philipp Stanner <pstanner@redhat.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>   drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++------------
>   include/drm/gpu_scheduler.h              |  3 ++-
>   3 files changed, 26 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index d982cebc6bee..8ace1f1ea66b 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -515,9 +515,14 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   
>   		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>   		if (next) {
> +			struct drm_sched_rq *rq;
> +
>   			spin_lock(&entity->lock);
> -			drm_sched_rq_update_fifo_locked(entity,
> +			rq = entity->rq;
> +			spin_lock(&rq->lock);
> +			drm_sched_rq_update_fifo_locked(entity, rq,
>   							next->submit_ts);
> +			spin_unlock(&rq->lock);
>   			spin_unlock(&entity->lock);
>   		}
>   	}
> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
>   		sched = rq->sched;
>   
>   		atomic_inc(sched->score);
> +
> +		spin_lock(&rq->lock);
>   		drm_sched_rq_add_entity(rq, entity);
>   
>   		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -			drm_sched_rq_update_fifo_locked(entity, submit_ts);
> +			drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>   
> +		spin_unlock(&rq->lock);
>   		spin_unlock(&entity->lock);
>   
>   		drm_sched_wakeup(sched, entity);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 18a952f73ecb..5c83fb92bb89 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -153,17 +153,18 @@ static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
>   	return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
>   }
>   
> -static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity)
> +static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
> +					    struct drm_sched_rq *rq)
>   {
> -	struct drm_sched_rq *rq = entity->rq;
> -
>   	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>   		rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
>   		RB_CLEAR_NODE(&entity->rb_tree_node);
>   	}
>   }
>   
> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts)
> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> +				     struct drm_sched_rq *rq,
> +				     ktime_t ts)
>   {
>   	/*
>   	 * Both locks need to be grabbed, one to protect from entity->rq change
> @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts
>   	 * other to update the rb tree structure.
>   	 */
>   	lockdep_assert_held(&entity->lock);
> +	lockdep_assert_held(&rq->lock);
>   
> -	spin_lock(&entity->rq->lock);
> -
> -	drm_sched_rq_remove_fifo_locked(entity);
> +	drm_sched_rq_remove_fifo_locked(entity, rq);
>   
>   	entity->oldest_job_waiting = ts;
>   
> -	rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
> +	rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>   		      drm_sched_entity_compare_before);
> -
> -	spin_unlock(&entity->rq->lock);
>   }
>   
>   /**
> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
>   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>   			     struct drm_sched_entity *entity)
>   {
> +	lockdep_assert_held(&entity->lock);
> +	lockdep_assert_held(&rq->lock);
> +
>   	if (!list_empty(&entity->list))
>   		return;
>   
> -	spin_lock(&rq->lock);
> -
>   	atomic_inc(rq->sched->score);
>   	list_add_tail(&entity->list, &rq->entities);
> -
> -	spin_unlock(&rq->lock);
>   }
>   
>   /**
> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   				struct drm_sched_entity *entity)
>   {
> +	lockdep_assert_held(&entity->lock);
> +
>   	if (list_empty(&entity->list))
>   		return;
>   
> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   		rq->current_entity = NULL;
>   
>   	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> -		drm_sched_rq_remove_fifo_locked(entity);
> +		drm_sched_rq_remove_fifo_locked(entity, rq);
>   
>   	spin_unlock(&rq->lock);
>   }
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 80198e6cf537..b21806d5a8eb 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>   				struct drm_sched_entity *entity);
>   
> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
> +				     struct drm_sched_rq *rq, ktime_t ts);
>   
>   int drm_sched_entity_init(struct drm_sched_entity *entity,
>   			  enum drm_sched_priority priority,
Tvrtko Ursulin Sept. 24, 2024, 9:45 a.m. UTC | #3
On 24/09/2024 09:20, Christian König wrote:
> Am 16.09.24 um 19:30 schrieb Tvrtko Ursulin:
>> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>
>> Having removed one re-lock cycle on the entity->lock in a patch titled
>> "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
>> larger refactoring we can do the same optimisation on the rq->lock.
>> (Currently both drm_sched_rq_add_entity() and
>> drm_sched_rq_update_fifo_locked() take and release the same lock.)
>>
>> To achieve this we make drm_sched_rq_update_fifo_locked() and
>> drm_sched_rq_add_entity() expect the rq->lock to be held.
>>
>> We also align drm_sched_rq_update_fifo_locked(),
>> drm_sched_rq_add_entity() and
>> drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
>> parameter to the latter.
>>
>> v2:
>>   * Fix after rebase of the series.
>>   * Avoid naming incosistency between drm_sched_rq_add/remove. 
>> (Christian)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Cc: Luben Tuikov <ltuikov89@gmail.com>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: Philipp Stanner <pstanner@redhat.com>
> 
> Reviewed-by: Christian König <christian.koenig@amd.com>

Thanks!

Are you okay to pull into drm-misc-next or we should do some more 
testing on this?

And/or should I resend the series once more in it's entirety so this v2 
is not a reply-to to the original?

Regards,

Tvrtko

> 
>> ---
>>   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>>   drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++------------
>>   include/drm/gpu_scheduler.h              |  3 ++-
>>   3 files changed, 26 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>> b/drivers/gpu/drm/scheduler/sched_entity.c
>> index d982cebc6bee..8ace1f1ea66b 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -515,9 +515,14 @@ struct drm_sched_job 
>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>           next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>>           if (next) {
>> +            struct drm_sched_rq *rq;
>> +
>>               spin_lock(&entity->lock);
>> -            drm_sched_rq_update_fifo_locked(entity,
>> +            rq = entity->rq;
>> +            spin_lock(&rq->lock);
>> +            drm_sched_rq_update_fifo_locked(entity, rq,
>>                               next->submit_ts);
>> +            spin_unlock(&rq->lock);
>>               spin_unlock(&entity->lock);
>>           }
>>       }
>> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct 
>> drm_sched_job *sched_job)
>>           sched = rq->sched;
>>           atomic_inc(sched->score);
>> +
>> +        spin_lock(&rq->lock);
>>           drm_sched_rq_add_entity(rq, entity);
>>           if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>> -            drm_sched_rq_update_fifo_locked(entity, submit_ts);
>> +            drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>> +        spin_unlock(&rq->lock);
>>           spin_unlock(&entity->lock);
>>           drm_sched_wakeup(sched, entity);
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 18a952f73ecb..5c83fb92bb89 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -153,17 +153,18 @@ static __always_inline bool 
>> drm_sched_entity_compare_before(struct rb_node *a,
>>       return ktime_before(ent_a->oldest_job_waiting, 
>> ent_b->oldest_job_waiting);
>>   }
>> -static inline void drm_sched_rq_remove_fifo_locked(struct 
>> drm_sched_entity *entity)
>> +static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity 
>> *entity,
>> +                        struct drm_sched_rq *rq)
>>   {
>> -    struct drm_sched_rq *rq = entity->rq;
>> -
>>       if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>>           rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
>>           RB_CLEAR_NODE(&entity->rb_tree_node);
>>       }
>>   }
>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, 
>> ktime_t ts)
>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
>> +                     struct drm_sched_rq *rq,
>> +                     ktime_t ts)
>>   {
>>       /*
>>        * Both locks need to be grabbed, one to protect from entity->rq 
>> change
>> @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct 
>> drm_sched_entity *entity, ktime_t ts
>>        * other to update the rb tree structure.
>>        */
>>       lockdep_assert_held(&entity->lock);
>> +    lockdep_assert_held(&rq->lock);
>> -    spin_lock(&entity->rq->lock);
>> -
>> -    drm_sched_rq_remove_fifo_locked(entity);
>> +    drm_sched_rq_remove_fifo_locked(entity, rq);
>>       entity->oldest_job_waiting = ts;
>> -    rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
>> +    rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>>                 drm_sched_entity_compare_before);
>> -
>> -    spin_unlock(&entity->rq->lock);
>>   }
>>   /**
>> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct 
>> drm_gpu_scheduler *sched,
>>   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>                    struct drm_sched_entity *entity)
>>   {
>> +    lockdep_assert_held(&entity->lock);
>> +    lockdep_assert_held(&rq->lock);
>> +
>>       if (!list_empty(&entity->list))
>>           return;
>> -    spin_lock(&rq->lock);
>> -
>>       atomic_inc(rq->sched->score);
>>       list_add_tail(&entity->list, &rq->entities);
>> -
>> -    spin_unlock(&rq->lock);
>>   }
>>   /**
>> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>                   struct drm_sched_entity *entity)
>>   {
>> +    lockdep_assert_held(&entity->lock);
>> +
>>       if (list_empty(&entity->list))
>>           return;
>> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct 
>> drm_sched_rq *rq,
>>           rq->current_entity = NULL;
>>       if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>> -        drm_sched_rq_remove_fifo_locked(entity);
>> +        drm_sched_rq_remove_fifo_locked(entity, rq);
>>       spin_unlock(&rq->lock);
>>   }
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 80198e6cf537..b21806d5a8eb 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>                   struct drm_sched_entity *entity);
>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, 
>> ktime_t ts);
>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
>> +                     struct drm_sched_rq *rq, ktime_t ts);
>>   int drm_sched_entity_init(struct drm_sched_entity *entity,
>>                 enum drm_sched_priority priority,
>
Tvrtko Ursulin Sept. 24, 2024, 9:58 a.m. UTC | #4
On 24/09/2024 10:45, Tvrtko Ursulin wrote:
> 
> On 24/09/2024 09:20, Christian König wrote:
>> Am 16.09.24 um 19:30 schrieb Tvrtko Ursulin:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>>
>>> Having removed one re-lock cycle on the entity->lock in a patch titled
>>> "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
>>> larger refactoring we can do the same optimisation on the rq->lock.
>>> (Currently both drm_sched_rq_add_entity() and
>>> drm_sched_rq_update_fifo_locked() take and release the same lock.)
>>>
>>> To achieve this we make drm_sched_rq_update_fifo_locked() and
>>> drm_sched_rq_add_entity() expect the rq->lock to be held.
>>>
>>> We also align drm_sched_rq_update_fifo_locked(),
>>> drm_sched_rq_add_entity() and
>>> drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
>>> parameter to the latter.
>>>
>>> v2:
>>>   * Fix after rebase of the series.
>>>   * Avoid naming incosistency between drm_sched_rq_add/remove. 
>>> (Christian)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: Luben Tuikov <ltuikov89@gmail.com>
>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>> Cc: Philipp Stanner <pstanner@redhat.com>
>>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
> 
> Thanks!
> 
> Are you okay to pull into drm-misc-next or we should do some more 
> testing on this?
> 
> And/or should I resend the series once more in it's entirety so this v2 
> is not a reply-to to the original?

I have to respin for the drm_sched_wakeup fix that landed.

Regards,

Tvrtko

> 
> Regards,
> 
> Tvrtko
> 
>>
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>>>   drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++------------
>>>   include/drm/gpu_scheduler.h              |  3 ++-
>>>   3 files changed, 26 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index d982cebc6bee..8ace1f1ea66b 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -515,9 +515,14 @@ struct drm_sched_job 
>>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>>           next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>>>           if (next) {
>>> +            struct drm_sched_rq *rq;
>>> +
>>>               spin_lock(&entity->lock);
>>> -            drm_sched_rq_update_fifo_locked(entity,
>>> +            rq = entity->rq;
>>> +            spin_lock(&rq->lock);
>>> +            drm_sched_rq_update_fifo_locked(entity, rq,
>>>                               next->submit_ts);
>>> +            spin_unlock(&rq->lock);
>>>               spin_unlock(&entity->lock);
>>>           }
>>>       }
>>> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct 
>>> drm_sched_job *sched_job)
>>>           sched = rq->sched;
>>>           atomic_inc(sched->score);
>>> +
>>> +        spin_lock(&rq->lock);
>>>           drm_sched_rq_add_entity(rq, entity);
>>>           if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>> -            drm_sched_rq_update_fifo_locked(entity, submit_ts);
>>> +            drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>>> +        spin_unlock(&rq->lock);
>>>           spin_unlock(&entity->lock);
>>>           drm_sched_wakeup(sched, entity);
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 18a952f73ecb..5c83fb92bb89 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -153,17 +153,18 @@ static __always_inline bool 
>>> drm_sched_entity_compare_before(struct rb_node *a,
>>>       return ktime_before(ent_a->oldest_job_waiting, 
>>> ent_b->oldest_job_waiting);
>>>   }
>>> -static inline void drm_sched_rq_remove_fifo_locked(struct 
>>> drm_sched_entity *entity)
>>> +static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity 
>>> *entity,
>>> +                        struct drm_sched_rq *rq)
>>>   {
>>> -    struct drm_sched_rq *rq = entity->rq;
>>> -
>>>       if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>>>           rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
>>>           RB_CLEAR_NODE(&entity->rb_tree_node);
>>>       }
>>>   }
>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity 
>>> *entity, ktime_t ts)
>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
>>> +                     struct drm_sched_rq *rq,
>>> +                     ktime_t ts)
>>>   {
>>>       /*
>>>        * Both locks need to be grabbed, one to protect from 
>>> entity->rq change
>>> @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct 
>>> drm_sched_entity *entity, ktime_t ts
>>>        * other to update the rb tree structure.
>>>        */
>>>       lockdep_assert_held(&entity->lock);
>>> +    lockdep_assert_held(&rq->lock);
>>> -    spin_lock(&entity->rq->lock);
>>> -
>>> -    drm_sched_rq_remove_fifo_locked(entity);
>>> +    drm_sched_rq_remove_fifo_locked(entity, rq);
>>>       entity->oldest_job_waiting = ts;
>>> -    rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
>>> +    rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>>>                 drm_sched_entity_compare_before);
>>> -
>>> -    spin_unlock(&entity->rq->lock);
>>>   }
>>>   /**
>>> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct 
>>> drm_gpu_scheduler *sched,
>>>   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>>                    struct drm_sched_entity *entity)
>>>   {
>>> +    lockdep_assert_held(&entity->lock);
>>> +    lockdep_assert_held(&rq->lock);
>>> +
>>>       if (!list_empty(&entity->list))
>>>           return;
>>> -    spin_lock(&rq->lock);
>>> -
>>>       atomic_inc(rq->sched->score);
>>>       list_add_tail(&entity->list, &rq->entities);
>>> -
>>> -    spin_unlock(&rq->lock);
>>>   }
>>>   /**
>>> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq 
>>> *rq,
>>>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>                   struct drm_sched_entity *entity)
>>>   {
>>> +    lockdep_assert_held(&entity->lock);
>>> +
>>>       if (list_empty(&entity->list))
>>>           return;
>>> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct 
>>> drm_sched_rq *rq,
>>>           rq->current_entity = NULL;
>>>       if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>> -        drm_sched_rq_remove_fifo_locked(entity);
>>> +        drm_sched_rq_remove_fifo_locked(entity, rq);
>>>       spin_unlock(&rq->lock);
>>>   }
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 80198e6cf537..b21806d5a8eb 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq 
>>> *rq,
>>>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>                   struct drm_sched_entity *entity);
>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity 
>>> *entity, ktime_t ts);
>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
>>> +                     struct drm_sched_rq *rq, ktime_t ts);
>>>   int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>                 enum drm_sched_priority priority,
>>
Christian König Sept. 24, 2024, 12:02 p.m. UTC | #5
Am 24.09.24 um 11:58 schrieb Tvrtko Ursulin:
>
> On 24/09/2024 10:45, Tvrtko Ursulin wrote:
>>
>> On 24/09/2024 09:20, Christian König wrote:
>>> Am 16.09.24 um 19:30 schrieb Tvrtko Ursulin:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>>>
>>>> Having removed one re-lock cycle on the entity->lock in a patch titled
>>>> "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
>>>> larger refactoring we can do the same optimisation on the rq->lock.
>>>> (Currently both drm_sched_rq_add_entity() and
>>>> drm_sched_rq_update_fifo_locked() take and release the same lock.)
>>>>
>>>> To achieve this we make drm_sched_rq_update_fifo_locked() and
>>>> drm_sched_rq_add_entity() expect the rq->lock to be held.
>>>>
>>>> We also align drm_sched_rq_update_fifo_locked(),
>>>> drm_sched_rq_add_entity() and
>>>> drm_sched_rq_remove_fifo_locked() function signatures, by adding rq 
>>>> as a
>>>> parameter to the latter.
>>>>
>>>> v2:
>>>>   * Fix after rebase of the series.
>>>>   * Avoid naming incosistency between drm_sched_rq_add/remove. 
>>>> (Christian)
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>>> Cc: Christian König <christian.koenig@amd.com>
>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>> Cc: Luben Tuikov <ltuikov89@gmail.com>
>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>> Cc: Philipp Stanner <pstanner@redhat.com>
>>>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>
>> Thanks!
>>
>> Are you okay to pull into drm-misc-next or we should do some more 
>> testing on this?
>>
>> And/or should I resend the series once more in it's entirety so this 
>> v2 is not a reply-to to the original?
>
> I have to respin for the drm_sched_wakeup fix that landed.

When I should push the series to drm-misc-next then please send it to me 
once more.

On the other hand we should now have to maintainers for that.

Regards,
Christian.

>
> Regards,
>
> Tvrtko
>
>>
>> Regards,
>>
>> Tvrtko
>>
>>>
>>>> ---
>>>>   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>>>>   drivers/gpu/drm/scheduler/sched_main.c   | 29 
>>>> ++++++++++++------------
>>>>   include/drm/gpu_scheduler.h              |  3 ++-
>>>>   3 files changed, 26 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>>> index d982cebc6bee..8ace1f1ea66b 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>>> @@ -515,9 +515,14 @@ struct drm_sched_job 
>>>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>>>           next = 
>>>> to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>>>>           if (next) {
>>>> +            struct drm_sched_rq *rq;
>>>> +
>>>>               spin_lock(&entity->lock);
>>>> -            drm_sched_rq_update_fifo_locked(entity,
>>>> +            rq = entity->rq;
>>>> +            spin_lock(&rq->lock);
>>>> +            drm_sched_rq_update_fifo_locked(entity, rq,
>>>>                               next->submit_ts);
>>>> +            spin_unlock(&rq->lock);
>>>>               spin_unlock(&entity->lock);
>>>>           }
>>>>       }
>>>> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct 
>>>> drm_sched_job *sched_job)
>>>>           sched = rq->sched;
>>>>           atomic_inc(sched->score);
>>>> +
>>>> +        spin_lock(&rq->lock);
>>>>           drm_sched_rq_add_entity(rq, entity);
>>>>           if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>>> -            drm_sched_rq_update_fifo_locked(entity, submit_ts);
>>>> +            drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
>>>> +        spin_unlock(&rq->lock);
>>>>           spin_unlock(&entity->lock);
>>>>           drm_sched_wakeup(sched, entity);
>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>> index 18a952f73ecb..5c83fb92bb89 100644
>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>> @@ -153,17 +153,18 @@ static __always_inline bool 
>>>> drm_sched_entity_compare_before(struct rb_node *a,
>>>>       return ktime_before(ent_a->oldest_job_waiting, 
>>>> ent_b->oldest_job_waiting);
>>>>   }
>>>> -static inline void drm_sched_rq_remove_fifo_locked(struct 
>>>> drm_sched_entity *entity)
>>>> +static void drm_sched_rq_remove_fifo_locked(struct 
>>>> drm_sched_entity *entity,
>>>> +                        struct drm_sched_rq *rq)
>>>>   {
>>>> -    struct drm_sched_rq *rq = entity->rq;
>>>> -
>>>>       if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>>>>           rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
>>>>           RB_CLEAR_NODE(&entity->rb_tree_node);
>>>>       }
>>>>   }
>>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity 
>>>> *entity, ktime_t ts)
>>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
>>>> +                     struct drm_sched_rq *rq,
>>>> +                     ktime_t ts)
>>>>   {
>>>>       /*
>>>>        * Both locks need to be grabbed, one to protect from 
>>>> entity->rq change
>>>> @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct 
>>>> drm_sched_entity *entity, ktime_t ts
>>>>        * other to update the rb tree structure.
>>>>        */
>>>>       lockdep_assert_held(&entity->lock);
>>>> +    lockdep_assert_held(&rq->lock);
>>>> -    spin_lock(&entity->rq->lock);
>>>> -
>>>> -    drm_sched_rq_remove_fifo_locked(entity);
>>>> +    drm_sched_rq_remove_fifo_locked(entity, rq);
>>>>       entity->oldest_job_waiting = ts;
>>>> -    rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
>>>> +    rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>>>>                 drm_sched_entity_compare_before);
>>>> -
>>>> -    spin_unlock(&entity->rq->lock);
>>>>   }
>>>>   /**
>>>> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct 
>>>> drm_gpu_scheduler *sched,
>>>>   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>>>                    struct drm_sched_entity *entity)
>>>>   {
>>>> +    lockdep_assert_held(&entity->lock);
>>>> +    lockdep_assert_held(&rq->lock);
>>>> +
>>>>       if (!list_empty(&entity->list))
>>>>           return;
>>>> -    spin_lock(&rq->lock);
>>>> -
>>>>       atomic_inc(rq->sched->score);
>>>>       list_add_tail(&entity->list, &rq->entities);
>>>> -
>>>> -    spin_unlock(&rq->lock);
>>>>   }
>>>>   /**
>>>> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct 
>>>> drm_sched_rq *rq,
>>>>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>>                   struct drm_sched_entity *entity)
>>>>   {
>>>> +    lockdep_assert_held(&entity->lock);
>>>> +
>>>>       if (list_empty(&entity->list))
>>>>           return;
>>>> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct 
>>>> drm_sched_rq *rq,
>>>>           rq->current_entity = NULL;
>>>>       if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>>> -        drm_sched_rq_remove_fifo_locked(entity);
>>>> +        drm_sched_rq_remove_fifo_locked(entity, rq);
>>>>       spin_unlock(&rq->lock);
>>>>   }
>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>>> index 80198e6cf537..b21806d5a8eb 100644
>>>> --- a/include/drm/gpu_scheduler.h
>>>> +++ b/include/drm/gpu_scheduler.h
>>>> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct 
>>>> drm_sched_rq *rq,
>>>>   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>>                   struct drm_sched_entity *entity);
>>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity 
>>>> *entity, ktime_t ts);
>>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
>>>> +                     struct drm_sched_rq *rq, ktime_t ts);
>>>>   int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>>                 enum drm_sched_priority priority,
>>>
Philipp Stanner Sept. 26, 2024, 8:15 a.m. UTC | #6
On Mon, 2024-09-23 at 15:35 +0100, Tvrtko Ursulin wrote:
> 
> Ping Christian and Philipp - reasonably happy with v2? I think it's
> the 
> only unreviewed patch from the series.

Howdy,

sry for the delay, I had been traveling.

I have a few nits below regarding the commit message. Besides, I'm OK
with that, thx for your work :)

> 
> Regards,
> 
> Tvrtko
> 
> On 16/09/2024 18:30, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> > 
> > Having removed one re-lock cycle on the entity->lock in a patch
> > titled
> > "drm/sched: Optimise drm_sched_entity_push_job", 
> > with only a tiny bit
> > larger refactoring we can do the same optimisation 

Well, the commit message does not state which optimization that is. One
would have to look for the previous patch, which you apparently cannot
provide a commit ID for yet because it's not in Big Boss's branch.

In this case I am for including a sentence about what is being
optimized also because

> > on the rq->lock.
> > (Currently both drm_sched_rq_add_entity() and
> > drm_sched_rq_update_fifo_locked() take and release the same lock.)
> > 
> > To achieve this we make drm_sched_rq_update_fifo_locked() and

it's not clear what the "this" that's being achieved is.

> > drm_sched_rq_add_entity() expect the rq->lock to be held.
> > 
> > We also align drm_sched_rq_update_fifo_locked(),
> > drm_sched_rq_add_entity() and
> > drm_sched_rq_remove_fifo_locked() function signatures, by adding rq
> > as a
> > parameter to the latter.
> > 
> > v2:
> >   * Fix after rebase of the series.
> >   * Avoid naming incosistency between drm_sched_rq_add/remove.
> > (Christian)
> > 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>

Reviewed-by: Philipp Stanner <pstanner@redhat.com>

> > Cc: Christian König <christian.koenig@amd.com>
> > Cc: Alex Deucher <alexander.deucher@amd.com>
> > Cc: Luben Tuikov <ltuikov89@gmail.com>
> > Cc: Matthew Brost <matthew.brost@intel.com>
> > Cc: Philipp Stanner <pstanner@redhat.com>
> > ---
> >   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
> >   drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++-------
> > -----
> >   include/drm/gpu_scheduler.h              |  3 ++-
> >   3 files changed, 26 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
> > b/drivers/gpu/drm/scheduler/sched_entity.c
> > index d982cebc6bee..8ace1f1ea66b 100644
> > --- a/drivers/gpu/drm/scheduler/sched_entity.c
> > +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> > @@ -515,9 +515,14 @@ struct drm_sched_job
> > *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
> >   
> >   		next = to_drm_sched_job(spsc_queue_peek(&entity-
> > >job_queue));
> >   		if (next) {
> > +			struct drm_sched_rq *rq;
> > +
> >   			spin_lock(&entity->lock);
> > -			drm_sched_rq_update_fifo_locked(entity,
> > +			rq = entity->rq;
> > +			spin_lock(&rq->lock);
> > +			drm_sched_rq_update_fifo_locked(entity,
> > rq,
> >   							next-
> > >submit_ts);
> > +			spin_unlock(&rq->lock);
> >   			spin_unlock(&entity->lock);
> >   		}
> >   	}
> > @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct
> > drm_sched_job *sched_job)
> >   		sched = rq->sched;
> >   
> >   		atomic_inc(sched->score);
> > +
> > +		spin_lock(&rq->lock);
> >   		drm_sched_rq_add_entity(rq, entity);
> >   
> >   		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> > -			drm_sched_rq_update_fifo_locked(entity,
> > submit_ts);
> > +			drm_sched_rq_update_fifo_locked(entity,
> > rq, submit_ts);
> >   
> > +		spin_unlock(&rq->lock);
> >   		spin_unlock(&entity->lock);
> >   
> >   		drm_sched_wakeup(sched, entity);
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index 18a952f73ecb..5c83fb92bb89 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -153,17 +153,18 @@ static __always_inline bool
> > drm_sched_entity_compare_before(struct rb_node *a,
> >   	return ktime_before(ent_a->oldest_job_waiting, ent_b-
> > >oldest_job_waiting);
> >   }
> >   
> > -static inline void drm_sched_rq_remove_fifo_locked(struct
> > drm_sched_entity *entity)
> > +static void drm_sched_rq_remove_fifo_locked(struct

I think the commit message should contain a short sentence about why
you removed the inline.

AKA "As we're at it, remove the inline function specifier from
drm_sched_rq_remove_fifo_locked() because XYZ"


P.

> > drm_sched_entity *entity,
> > +					    struct drm_sched_rq
> > *rq)
> >   {
> > -	struct drm_sched_rq *rq = entity->rq;
> > -
> >   	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
> >   		rb_erase_cached(&entity->rb_tree_node, &rq-
> > >rb_tree_root);
> >   		RB_CLEAR_NODE(&entity->rb_tree_node);
> >   	}
> >   }
> >   
> > -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > *entity, ktime_t ts)
> > +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > *entity,
> > +				     struct drm_sched_rq *rq,
> > +				     ktime_t ts)
> >   {
> >   	/*
> >   	 * Both locks need to be grabbed, one to protect from
> > entity->rq change
> > @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct
> > drm_sched_entity *entity, ktime_t ts
> >   	 * other to update the rb tree structure.
> >   	 */
> >   	lockdep_assert_held(&entity->lock);
> > +	lockdep_assert_held(&rq->lock);
> >   
> > -	spin_lock(&entity->rq->lock);
> > -
> > -	drm_sched_rq_remove_fifo_locked(entity);
> > +	drm_sched_rq_remove_fifo_locked(entity, rq);
> >   
> >   	entity->oldest_job_waiting = ts;
> >   
> > -	rb_add_cached(&entity->rb_tree_node, &entity->rq-
> > >rb_tree_root,
> > +	rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
> >   		      drm_sched_entity_compare_before);
> > -
> > -	spin_unlock(&entity->rq->lock);
> >   }
> >   
> >   /**
> > @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct
> > drm_gpu_scheduler *sched,
> >   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
> >   			     struct drm_sched_entity *entity)
> >   {
> > +	lockdep_assert_held(&entity->lock);
> > +	lockdep_assert_held(&rq->lock);
> > +
> >   	if (!list_empty(&entity->list))
> >   		return;
> >   
> > -	spin_lock(&rq->lock);
> > -
> >   	atomic_inc(rq->sched->score);
> >   	list_add_tail(&entity->list, &rq->entities);
> > -
> > -	spin_unlock(&rq->lock);
> >   }
> >   
> >   /**
> > @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct
> > drm_sched_rq *rq,
> >   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
> >   				struct drm_sched_entity *entity)
> >   {
> > +	lockdep_assert_held(&entity->lock);
> > +
> >   	if (list_empty(&entity->list))
> >   		return;
> >   
> > @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct
> > drm_sched_rq *rq,
> >   		rq->current_entity = NULL;
> >   
> >   	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> > -		drm_sched_rq_remove_fifo_locked(entity);
> > +		drm_sched_rq_remove_fifo_locked(entity, rq);
> >   
> >   	spin_unlock(&rq->lock);
> >   }
> > diff --git a/include/drm/gpu_scheduler.h
> > b/include/drm/gpu_scheduler.h
> > index 80198e6cf537..b21806d5a8eb 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct
> > drm_sched_rq *rq,
> >   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
> >   				struct drm_sched_entity *entity);
> >   
> > -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > *entity, ktime_t ts);
> > +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > *entity,
> > +				     struct drm_sched_rq *rq,
> > ktime_t ts);
> >   
> >   int drm_sched_entity_init(struct drm_sched_entity *entity,
> >   			  enum drm_sched_priority priority,
>
Philipp Stanner Sept. 26, 2024, 8:18 a.m. UTC | #7
On Tue, 2024-09-24 at 14:02 +0200, Christian König wrote:
> Am 24.09.24 um 11:58 schrieb Tvrtko Ursulin:
> > 
> > On 24/09/2024 10:45, Tvrtko Ursulin wrote:
> > > 
> > > On 24/09/2024 09:20, Christian König wrote:
> > > > Am 16.09.24 um 19:30 schrieb Tvrtko Ursulin:
> > > > > From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> > > > > 
> > > > > Having removed one re-lock cycle on the entity->lock in a
> > > > > patch titled
> > > > > "drm/sched: Optimise drm_sched_entity_push_job", with only a
> > > > > tiny bit
> > > > > larger refactoring we can do the same optimisation on the rq-
> > > > > >lock.
> > > > > (Currently both drm_sched_rq_add_entity() and
> > > > > drm_sched_rq_update_fifo_locked() take and release the same
> > > > > lock.)
> > > > > 
> > > > > To achieve this we make drm_sched_rq_update_fifo_locked() and
> > > > > drm_sched_rq_add_entity() expect the rq->lock to be held.
> > > > > 
> > > > > We also align drm_sched_rq_update_fifo_locked(),
> > > > > drm_sched_rq_add_entity() and
> > > > > drm_sched_rq_remove_fifo_locked() function signatures, by
> > > > > adding rq 
> > > > > as a
> > > > > parameter to the latter.
> > > > > 
> > > > > v2:
> > > > >   * Fix after rebase of the series.
> > > > >   * Avoid naming incosistency between
> > > > > drm_sched_rq_add/remove. 
> > > > > (Christian)
> > > > > 
> > > > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> > > > > Cc: Christian König <christian.koenig@amd.com>
> > > > > Cc: Alex Deucher <alexander.deucher@amd.com>
> > > > > Cc: Luben Tuikov <ltuikov89@gmail.com>
> > > > > Cc: Matthew Brost <matthew.brost@intel.com>
> > > > > Cc: Philipp Stanner <pstanner@redhat.com>
> > > > 
> > > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > > 
> > > Thanks!
> > > 
> > > Are you okay to pull into drm-misc-next or we should do some more
> > > testing on this?
> > > 
> > > And/or should I resend the series once more in it's entirety so
> > > this 
> > > v2 is not a reply-to to the original?
> > 
> > I have to respin for the drm_sched_wakeup fix that landed.
> 
> When I should push the series to drm-misc-next then please send it to
> me 
> once more.
> 
> On the other hand we should now have to maintainers for that.

Yup, will pick up this responsibilities soonish. Danilo and I have been
on conference recently and I'm out of office soon for a bit, but you
can expect me / us to take over that work soonish in early autumn.

Regards,
P.

> 
> Regards,
> Christian.
> 
> > 
> > Regards,
> > 
> > Tvrtko
> > 
> > > 
> > > Regards,
> > > 
> > > Tvrtko
> > > 
> > > > 
> > > > > ---
> > > > >   drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
> > > > >   drivers/gpu/drm/scheduler/sched_main.c   | 29 
> > > > > ++++++++++++------------
> > > > >   include/drm/gpu_scheduler.h              |  3 ++-
> > > > >   3 files changed, 26 insertions(+), 18 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
> > > > > b/drivers/gpu/drm/scheduler/sched_entity.c
> > > > > index d982cebc6bee..8ace1f1ea66b 100644
> > > > > --- a/drivers/gpu/drm/scheduler/sched_entity.c
> > > > > +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> > > > > @@ -515,9 +515,14 @@ struct drm_sched_job 
> > > > > *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
> > > > >           next = 
> > > > > to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
> > > > >           if (next) {
> > > > > +            struct drm_sched_rq *rq;
> > > > > +
> > > > >               spin_lock(&entity->lock);
> > > > > -            drm_sched_rq_update_fifo_locked(entity,
> > > > > +            rq = entity->rq;
> > > > > +            spin_lock(&rq->lock);
> > > > > +            drm_sched_rq_update_fifo_locked(entity, rq,
> > > > >                               next->submit_ts);
> > > > > +            spin_unlock(&rq->lock);
> > > > >               spin_unlock(&entity->lock);
> > > > >           }
> > > > >       }
> > > > > @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct 
> > > > > drm_sched_job *sched_job)
> > > > >           sched = rq->sched;
> > > > >           atomic_inc(sched->score);
> > > > > +
> > > > > +        spin_lock(&rq->lock);
> > > > >           drm_sched_rq_add_entity(rq, entity);
> > > > >           if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> > > > > -            drm_sched_rq_update_fifo_locked(entity,
> > > > > submit_ts);
> > > > > +            drm_sched_rq_update_fifo_locked(entity, rq,
> > > > > submit_ts);
> > > > > +        spin_unlock(&rq->lock);
> > > > >           spin_unlock(&entity->lock);
> > > > >           drm_sched_wakeup(sched, entity);
> > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> > > > > b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > index 18a952f73ecb..5c83fb92bb89 100644
> > > > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > @@ -153,17 +153,18 @@ static __always_inline bool 
> > > > > drm_sched_entity_compare_before(struct rb_node *a,
> > > > >       return ktime_before(ent_a->oldest_job_waiting, 
> > > > > ent_b->oldest_job_waiting);
> > > > >   }
> > > > > -static inline void drm_sched_rq_remove_fifo_locked(struct 
> > > > > drm_sched_entity *entity)
> > > > > +static void drm_sched_rq_remove_fifo_locked(struct 
> > > > > drm_sched_entity *entity,
> > > > > +                        struct drm_sched_rq *rq)
> > > > >   {
> > > > > -    struct drm_sched_rq *rq = entity->rq;
> > > > > -
> > > > >       if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
> > > > >           rb_erase_cached(&entity->rb_tree_node, &rq-
> > > > > >rb_tree_root);
> > > > >           RB_CLEAR_NODE(&entity->rb_tree_node);
> > > > >       }
> > > > >   }
> > > > > -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > > > > *entity, ktime_t ts)
> > > > > +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > > > > *entity,
> > > > > +                     struct drm_sched_rq *rq,
> > > > > +                     ktime_t ts)
> > > > >   {
> > > > >       /*
> > > > >        * Both locks need to be grabbed, one to protect from 
> > > > > entity->rq change
> > > > > @@ -171,17 +172,14 @@ void
> > > > > drm_sched_rq_update_fifo_locked(struct 
> > > > > drm_sched_entity *entity, ktime_t ts
> > > > >        * other to update the rb tree structure.
> > > > >        */
> > > > >       lockdep_assert_held(&entity->lock);
> > > > > +    lockdep_assert_held(&rq->lock);
> > > > > -    spin_lock(&entity->rq->lock);
> > > > > -
> > > > > -    drm_sched_rq_remove_fifo_locked(entity);
> > > > > +    drm_sched_rq_remove_fifo_locked(entity, rq);
> > > > >       entity->oldest_job_waiting = ts;
> > > > > -    rb_add_cached(&entity->rb_tree_node, &entity->rq-
> > > > > >rb_tree_root,
> > > > > +    rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
> > > > >                 drm_sched_entity_compare_before);
> > > > > -
> > > > > -    spin_unlock(&entity->rq->lock);
> > > > >   }
> > > > >   /**
> > > > > @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct 
> > > > > drm_gpu_scheduler *sched,
> > > > >   void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
> > > > >                    struct drm_sched_entity *entity)
> > > > >   {
> > > > > +    lockdep_assert_held(&entity->lock);
> > > > > +    lockdep_assert_held(&rq->lock);
> > > > > +
> > > > >       if (!list_empty(&entity->list))
> > > > >           return;
> > > > > -    spin_lock(&rq->lock);
> > > > > -
> > > > >       atomic_inc(rq->sched->score);
> > > > >       list_add_tail(&entity->list, &rq->entities);
> > > > > -
> > > > > -    spin_unlock(&rq->lock);
> > > > >   }
> > > > >   /**
> > > > > @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct 
> > > > > drm_sched_rq *rq,
> > > > >   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
> > > > >                   struct drm_sched_entity *entity)
> > > > >   {
> > > > > +    lockdep_assert_held(&entity->lock);
> > > > > +
> > > > >       if (list_empty(&entity->list))
> > > > >           return;
> > > > > @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct 
> > > > > drm_sched_rq *rq,
> > > > >           rq->current_entity = NULL;
> > > > >       if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
> > > > > -        drm_sched_rq_remove_fifo_locked(entity);
> > > > > +        drm_sched_rq_remove_fifo_locked(entity, rq);
> > > > >       spin_unlock(&rq->lock);
> > > > >   }
> > > > > diff --git a/include/drm/gpu_scheduler.h
> > > > > b/include/drm/gpu_scheduler.h
> > > > > index 80198e6cf537..b21806d5a8eb 100644
> > > > > --- a/include/drm/gpu_scheduler.h
> > > > > +++ b/include/drm/gpu_scheduler.h
> > > > > @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct 
> > > > > drm_sched_rq *rq,
> > > > >   void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
> > > > >                   struct drm_sched_entity *entity);
> > > > > -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > > > > *entity, ktime_t ts);
> > > > > +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
> > > > > *entity,
> > > > > +                     struct drm_sched_rq *rq, ktime_t ts);
> > > > >   int drm_sched_entity_init(struct drm_sched_entity *entity,
> > > > >                 enum drm_sched_priority priority,
> > > > 
>
Christian König Sept. 26, 2024, 9:15 a.m. UTC | #8
Am 26.09.24 um 10:18 schrieb Philipp Stanner:
> On Tue, 2024-09-24 at 14:02 +0200, Christian König wrote:
>> Am 24.09.24 um 11:58 schrieb Tvrtko Ursulin:
>>> On 24/09/2024 10:45, Tvrtko Ursulin wrote:
>>>> On 24/09/2024 09:20, Christian König wrote:
>>>>> Am 16.09.24 um 19:30 schrieb Tvrtko Ursulin:
>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>>>>>
>>>>>> Having removed one re-lock cycle on the entity->lock in a
>>>>>> patch titled
>>>>>> "drm/sched: Optimise drm_sched_entity_push_job", with only a
>>>>>> tiny bit
>>>>>> larger refactoring we can do the same optimisation on the rq-
>>>>>>> lock.
>>>>>> (Currently both drm_sched_rq_add_entity() and
>>>>>> drm_sched_rq_update_fifo_locked() take and release the same
>>>>>> lock.)
>>>>>>
>>>>>> To achieve this we make drm_sched_rq_update_fifo_locked() and
>>>>>> drm_sched_rq_add_entity() expect the rq->lock to be held.
>>>>>>
>>>>>> We also align drm_sched_rq_update_fifo_locked(),
>>>>>> drm_sched_rq_add_entity() and
>>>>>> drm_sched_rq_remove_fifo_locked() function signatures, by
>>>>>> adding rq
>>>>>> as a
>>>>>> parameter to the latter.
>>>>>>
>>>>>> v2:
>>>>>>    * Fix after rebase of the series.
>>>>>>    * Avoid naming incosistency between
>>>>>> drm_sched_rq_add/remove.
>>>>>> (Christian)
>>>>>>
>>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>>>> Cc: Luben Tuikov <ltuikov89@gmail.com>
>>>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>>>> Cc: Philipp Stanner <pstanner@redhat.com>
>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>> Thanks!
>>>>
>>>> Are you okay to pull into drm-misc-next or we should do some more
>>>> testing on this?
>>>>
>>>> And/or should I resend the series once more in it's entirety so
>>>> this
>>>> v2 is not a reply-to to the original?
>>> I have to respin for the drm_sched_wakeup fix that landed.
>> When I should push the series to drm-misc-next then please send it to
>> me
>> once more.
>>
>> On the other hand we should now have to maintainers for that.
> Yup, will pick up this responsibilities soonish. Danilo and I have been
> on conference recently and I'm out of office soon for a bit, but you
> can expect me / us to take over that work soonish in early autumn.

But please don't push this set. I have messed up the push because I 
didn't realized that the first three patches were supposed to land in 
-fixes instead of -next.

Should be cleaned up my me by the end of the day.

Sorry for the noise,
Christian.

>
> Regards,
> P.
>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>
>>> Tvrtko
>>>
>>>> Regards,
>>>>
>>>> Tvrtko
>>>>
>>>>>> ---
>>>>>>    drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>>>>>>    drivers/gpu/drm/scheduler/sched_main.c   | 29
>>>>>> ++++++++++++------------
>>>>>>    include/drm/gpu_scheduler.h              |  3 ++-
>>>>>>    3 files changed, 26 insertions(+), 18 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
>>>>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>>>>> index d982cebc6bee..8ace1f1ea66b 100644
>>>>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>>>>> @@ -515,9 +515,14 @@ struct drm_sched_job
>>>>>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>>>>>            next =
>>>>>> to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
>>>>>>            if (next) {
>>>>>> +            struct drm_sched_rq *rq;
>>>>>> +
>>>>>>                spin_lock(&entity->lock);
>>>>>> -            drm_sched_rq_update_fifo_locked(entity,
>>>>>> +            rq = entity->rq;
>>>>>> +            spin_lock(&rq->lock);
>>>>>> +            drm_sched_rq_update_fifo_locked(entity, rq,
>>>>>>                                next->submit_ts);
>>>>>> +            spin_unlock(&rq->lock);
>>>>>>                spin_unlock(&entity->lock);
>>>>>>            }
>>>>>>        }
>>>>>> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct
>>>>>> drm_sched_job *sched_job)
>>>>>>            sched = rq->sched;
>>>>>>            atomic_inc(sched->score);
>>>>>> +
>>>>>> +        spin_lock(&rq->lock);
>>>>>>            drm_sched_rq_add_entity(rq, entity);
>>>>>>            if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>>>>> -            drm_sched_rq_update_fifo_locked(entity,
>>>>>> submit_ts);
>>>>>> +            drm_sched_rq_update_fifo_locked(entity, rq,
>>>>>> submit_ts);
>>>>>> +        spin_unlock(&rq->lock);
>>>>>>            spin_unlock(&entity->lock);
>>>>>>            drm_sched_wakeup(sched, entity);
>>>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> index 18a952f73ecb..5c83fb92bb89 100644
>>>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>>>>> @@ -153,17 +153,18 @@ static __always_inline bool
>>>>>> drm_sched_entity_compare_before(struct rb_node *a,
>>>>>>        return ktime_before(ent_a->oldest_job_waiting,
>>>>>> ent_b->oldest_job_waiting);
>>>>>>    }
>>>>>> -static inline void drm_sched_rq_remove_fifo_locked(struct
>>>>>> drm_sched_entity *entity)
>>>>>> +static void drm_sched_rq_remove_fifo_locked(struct
>>>>>> drm_sched_entity *entity,
>>>>>> +                        struct drm_sched_rq *rq)
>>>>>>    {
>>>>>> -    struct drm_sched_rq *rq = entity->rq;
>>>>>> -
>>>>>>        if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>>>>>>            rb_erase_cached(&entity->rb_tree_node, &rq-
>>>>>>> rb_tree_root);
>>>>>>            RB_CLEAR_NODE(&entity->rb_tree_node);
>>>>>>        }
>>>>>>    }
>>>>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>>>>> *entity, ktime_t ts)
>>>>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>>>>> *entity,
>>>>>> +                     struct drm_sched_rq *rq,
>>>>>> +                     ktime_t ts)
>>>>>>    {
>>>>>>        /*
>>>>>>         * Both locks need to be grabbed, one to protect from
>>>>>> entity->rq change
>>>>>> @@ -171,17 +172,14 @@ void
>>>>>> drm_sched_rq_update_fifo_locked(struct
>>>>>> drm_sched_entity *entity, ktime_t ts
>>>>>>         * other to update the rb tree structure.
>>>>>>         */
>>>>>>        lockdep_assert_held(&entity->lock);
>>>>>> +    lockdep_assert_held(&rq->lock);
>>>>>> -    spin_lock(&entity->rq->lock);
>>>>>> -
>>>>>> -    drm_sched_rq_remove_fifo_locked(entity);
>>>>>> +    drm_sched_rq_remove_fifo_locked(entity, rq);
>>>>>>        entity->oldest_job_waiting = ts;
>>>>>> -    rb_add_cached(&entity->rb_tree_node, &entity->rq-
>>>>>>> rb_tree_root,
>>>>>> +    rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>>>>>>                  drm_sched_entity_compare_before);
>>>>>> -
>>>>>> -    spin_unlock(&entity->rq->lock);
>>>>>>    }
>>>>>>    /**
>>>>>> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct
>>>>>> drm_gpu_scheduler *sched,
>>>>>>    void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>>>>>                     struct drm_sched_entity *entity)
>>>>>>    {
>>>>>> +    lockdep_assert_held(&entity->lock);
>>>>>> +    lockdep_assert_held(&rq->lock);
>>>>>> +
>>>>>>        if (!list_empty(&entity->list))
>>>>>>            return;
>>>>>> -    spin_lock(&rq->lock);
>>>>>> -
>>>>>>        atomic_inc(rq->sched->score);
>>>>>>        list_add_tail(&entity->list, &rq->entities);
>>>>>> -
>>>>>> -    spin_unlock(&rq->lock);
>>>>>>    }
>>>>>>    /**
>>>>>> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct
>>>>>> drm_sched_rq *rq,
>>>>>>    void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>>>>                    struct drm_sched_entity *entity)
>>>>>>    {
>>>>>> +    lockdep_assert_held(&entity->lock);
>>>>>> +
>>>>>>        if (list_empty(&entity->list))
>>>>>>            return;
>>>>>> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct
>>>>>> drm_sched_rq *rq,
>>>>>>            rq->current_entity = NULL;
>>>>>>        if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>>>>> -        drm_sched_rq_remove_fifo_locked(entity);
>>>>>> +        drm_sched_rq_remove_fifo_locked(entity, rq);
>>>>>>        spin_unlock(&rq->lock);
>>>>>>    }
>>>>>> diff --git a/include/drm/gpu_scheduler.h
>>>>>> b/include/drm/gpu_scheduler.h
>>>>>> index 80198e6cf537..b21806d5a8eb 100644
>>>>>> --- a/include/drm/gpu_scheduler.h
>>>>>> +++ b/include/drm/gpu_scheduler.h
>>>>>> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct
>>>>>> drm_sched_rq *rq,
>>>>>>    void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>>>>                    struct drm_sched_entity *entity);
>>>>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>>>>> *entity, ktime_t ts);
>>>>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>>>>> *entity,
>>>>>> +                     struct drm_sched_rq *rq, ktime_t ts);
>>>>>>    int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>>>>                  enum drm_sched_priority priority,
Tvrtko Ursulin Sept. 26, 2024, 1:47 p.m. UTC | #9
On 26/09/2024 09:15, Philipp Stanner wrote:
> On Mon, 2024-09-23 at 15:35 +0100, Tvrtko Ursulin wrote:
>>
>> Ping Christian and Philipp - reasonably happy with v2? I think it's
>> the
>> only unreviewed patch from the series.
> 
> Howdy,
> 
> sry for the delay, I had been traveling.
> 
> I have a few nits below regarding the commit message. Besides, I'm OK
> with that, thx for your work :)

No worries.

>> On 16/09/2024 18:30, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
>>>
>>> Having removed one re-lock cycle on the entity->lock in a patch
>>> titled
>>> "drm/sched: Optimise drm_sched_entity_push_job",
>>> with only a tiny bit
>>> larger refactoring we can do the same optimisation
> 
> Well, the commit message does not state which optimization that is. One
> would have to look for the previous patch, which you apparently cannot
> provide a commit ID for yet because it's not in Big Boss's branch.

With added emphasis:

"Having _removed one re-lock cycle_ on the entity-lock..."

"...do the same optimisation on the rq->lock."

How it is not clear?

> In this case I am for including a sentence about what is being
> optimized also because
> 
>>> on the rq->lock.
>>> (Currently both drm_sched_rq_add_entity() and
>>> drm_sched_rq_update_fifo_locked() take and release the same lock.)
>>>
>>> To achieve this we make drm_sched_rq_update_fifo_locked() and
> 
> it's not clear what the "this" that's being achieved is.

"This" is the optimisation previous paragraph talks about.

What/why followed by how.

I honestly think this part of the commit text is good enough.

>>> drm_sched_rq_add_entity() expect the rq->lock to be held.
>>>
>>> We also align drm_sched_rq_update_fifo_locked(),
>>> drm_sched_rq_add_entity() and
>>> drm_sched_rq_remove_fifo_locked() function signatures, by adding rq
>>> as a
>>> parameter to the latter.
>>>
>>> v2:
>>>    * Fix after rebase of the series.
>>>    * Avoid naming incosistency between drm_sched_rq_add/remove.
>>> (Christian)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> 
> Reviewed-by: Philipp Stanner <pstanner@redhat.com>

Thank you!

>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Cc: Luben Tuikov <ltuikov89@gmail.com>
>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>> Cc: Philipp Stanner <pstanner@redhat.com>
>>> ---
>>>    drivers/gpu/drm/scheduler/sched_entity.c | 12 ++++++++--
>>>    drivers/gpu/drm/scheduler/sched_main.c   | 29 ++++++++++++-------
>>> -----
>>>    include/drm/gpu_scheduler.h              |  3 ++-
>>>    3 files changed, 26 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index d982cebc6bee..8ace1f1ea66b 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -515,9 +515,14 @@ struct drm_sched_job
>>> *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>>>    
>>>    		next = to_drm_sched_job(spsc_queue_peek(&entity-
>>>> job_queue));
>>>    		if (next) {
>>> +			struct drm_sched_rq *rq;
>>> +
>>>    			spin_lock(&entity->lock);
>>> -			drm_sched_rq_update_fifo_locked(entity,
>>> +			rq = entity->rq;
>>> +			spin_lock(&rq->lock);
>>> +			drm_sched_rq_update_fifo_locked(entity,
>>> rq,
>>>    							next-
>>>> submit_ts);
>>> +			spin_unlock(&rq->lock);
>>>    			spin_unlock(&entity->lock);
>>>    		}
>>>    	}
>>> @@ -618,11 +623,14 @@ void drm_sched_entity_push_job(struct
>>> drm_sched_job *sched_job)
>>>    		sched = rq->sched;
>>>    
>>>    		atomic_inc(sched->score);
>>> +
>>> +		spin_lock(&rq->lock);
>>>    		drm_sched_rq_add_entity(rq, entity);
>>>    
>>>    		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>> -			drm_sched_rq_update_fifo_locked(entity,
>>> submit_ts);
>>> +			drm_sched_rq_update_fifo_locked(entity,
>>> rq, submit_ts);
>>>    
>>> +		spin_unlock(&rq->lock);
>>>    		spin_unlock(&entity->lock);
>>>    
>>>    		drm_sched_wakeup(sched, entity);
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 18a952f73ecb..5c83fb92bb89 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -153,17 +153,18 @@ static __always_inline bool
>>> drm_sched_entity_compare_before(struct rb_node *a,
>>>    	return ktime_before(ent_a->oldest_job_waiting, ent_b-
>>>> oldest_job_waiting);
>>>    }
>>>    
>>> -static inline void drm_sched_rq_remove_fifo_locked(struct
>>> drm_sched_entity *entity)
>>> +static void drm_sched_rq_remove_fifo_locked(struct
> 
> I think the commit message should contain a short sentence about why
> you removed the inline.
> 
> AKA "As we're at it, remove the inline function specifier from
> drm_sched_rq_remove_fifo_locked() because XYZ"

Fair play on this one, should have mentioned it. Probably just removed 
the inline by habit while touching the function signature. Under the 
"compiler knows better" mantra.

Regards,

Tvrtko

>>> drm_sched_entity *entity,
>>> +					    struct drm_sched_rq
>>> *rq)
>>>    {
>>> -	struct drm_sched_rq *rq = entity->rq;
>>> -
>>>    	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>>>    		rb_erase_cached(&entity->rb_tree_node, &rq-
>>>> rb_tree_root);
>>>    		RB_CLEAR_NODE(&entity->rb_tree_node);
>>>    	}
>>>    }
>>>    
>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>> *entity, ktime_t ts)
>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>> *entity,
>>> +				     struct drm_sched_rq *rq,
>>> +				     ktime_t ts)
>>>    {
>>>    	/*
>>>    	 * Both locks need to be grabbed, one to protect from
>>> entity->rq change
>>> @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct
>>> drm_sched_entity *entity, ktime_t ts
>>>    	 * other to update the rb tree structure.
>>>    	 */
>>>    	lockdep_assert_held(&entity->lock);
>>> +	lockdep_assert_held(&rq->lock);
>>>    
>>> -	spin_lock(&entity->rq->lock);
>>> -
>>> -	drm_sched_rq_remove_fifo_locked(entity);
>>> +	drm_sched_rq_remove_fifo_locked(entity, rq);
>>>    
>>>    	entity->oldest_job_waiting = ts;
>>>    
>>> -	rb_add_cached(&entity->rb_tree_node, &entity->rq-
>>>> rb_tree_root,
>>> +	rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>>>    		      drm_sched_entity_compare_before);
>>> -
>>> -	spin_unlock(&entity->rq->lock);
>>>    }
>>>    
>>>    /**
>>> @@ -213,15 +211,14 @@ static void drm_sched_rq_init(struct
>>> drm_gpu_scheduler *sched,
>>>    void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
>>>    			     struct drm_sched_entity *entity)
>>>    {
>>> +	lockdep_assert_held(&entity->lock);
>>> +	lockdep_assert_held(&rq->lock);
>>> +
>>>    	if (!list_empty(&entity->list))
>>>    		return;
>>>    
>>> -	spin_lock(&rq->lock);
>>> -
>>>    	atomic_inc(rq->sched->score);
>>>    	list_add_tail(&entity->list, &rq->entities);
>>> -
>>> -	spin_unlock(&rq->lock);
>>>    }
>>>    
>>>    /**
>>> @@ -235,6 +232,8 @@ void drm_sched_rq_add_entity(struct
>>> drm_sched_rq *rq,
>>>    void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>    				struct drm_sched_entity *entity)
>>>    {
>>> +	lockdep_assert_held(&entity->lock);
>>> +
>>>    	if (list_empty(&entity->list))
>>>    		return;
>>>    
>>> @@ -247,7 +246,7 @@ void drm_sched_rq_remove_entity(struct
>>> drm_sched_rq *rq,
>>>    		rq->current_entity = NULL;
>>>    
>>>    	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
>>> -		drm_sched_rq_remove_fifo_locked(entity);
>>> +		drm_sched_rq_remove_fifo_locked(entity, rq);
>>>    
>>>    	spin_unlock(&rq->lock);
>>>    }
>>> diff --git a/include/drm/gpu_scheduler.h
>>> b/include/drm/gpu_scheduler.h
>>> index 80198e6cf537..b21806d5a8eb 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -596,7 +596,8 @@ void drm_sched_rq_add_entity(struct
>>> drm_sched_rq *rq,
>>>    void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
>>>    				struct drm_sched_entity *entity);
>>>    
>>> -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>> *entity, ktime_t ts);
>>> +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity
>>> *entity,
>>> +				     struct drm_sched_rq *rq,
>>> ktime_t ts);
>>>    
>>>    int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>    			  enum drm_sched_priority priority,
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index d982cebc6bee..8ace1f1ea66b 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -515,9 +515,14 @@  struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 
 		next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
 		if (next) {
+			struct drm_sched_rq *rq;
+
 			spin_lock(&entity->lock);
-			drm_sched_rq_update_fifo_locked(entity,
+			rq = entity->rq;
+			spin_lock(&rq->lock);
+			drm_sched_rq_update_fifo_locked(entity, rq,
 							next->submit_ts);
+			spin_unlock(&rq->lock);
 			spin_unlock(&entity->lock);
 		}
 	}
@@ -618,11 +623,14 @@  void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
 		sched = rq->sched;
 
 		atomic_inc(sched->score);
+
+		spin_lock(&rq->lock);
 		drm_sched_rq_add_entity(rq, entity);
 
 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
-			drm_sched_rq_update_fifo_locked(entity, submit_ts);
+			drm_sched_rq_update_fifo_locked(entity, rq, submit_ts);
 
+		spin_unlock(&rq->lock);
 		spin_unlock(&entity->lock);
 
 		drm_sched_wakeup(sched, entity);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 18a952f73ecb..5c83fb92bb89 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -153,17 +153,18 @@  static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
 	return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
 }
 
-static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity)
+static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
+					    struct drm_sched_rq *rq)
 {
-	struct drm_sched_rq *rq = entity->rq;
-
 	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
 		rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
 		RB_CLEAR_NODE(&entity->rb_tree_node);
 	}
 }
 
-void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts)
+void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
+				     struct drm_sched_rq *rq,
+				     ktime_t ts)
 {
 	/*
 	 * Both locks need to be grabbed, one to protect from entity->rq change
@@ -171,17 +172,14 @@  void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts
 	 * other to update the rb tree structure.
 	 */
 	lockdep_assert_held(&entity->lock);
+	lockdep_assert_held(&rq->lock);
 
-	spin_lock(&entity->rq->lock);
-
-	drm_sched_rq_remove_fifo_locked(entity);
+	drm_sched_rq_remove_fifo_locked(entity, rq);
 
 	entity->oldest_job_waiting = ts;
 
-	rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
+	rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
 		      drm_sched_entity_compare_before);
-
-	spin_unlock(&entity->rq->lock);
 }
 
 /**
@@ -213,15 +211,14 @@  static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 			     struct drm_sched_entity *entity)
 {
+	lockdep_assert_held(&entity->lock);
+	lockdep_assert_held(&rq->lock);
+
 	if (!list_empty(&entity->list))
 		return;
 
-	spin_lock(&rq->lock);
-
 	atomic_inc(rq->sched->score);
 	list_add_tail(&entity->list, &rq->entities);
-
-	spin_unlock(&rq->lock);
 }
 
 /**
@@ -235,6 +232,8 @@  void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 				struct drm_sched_entity *entity)
 {
+	lockdep_assert_held(&entity->lock);
+
 	if (list_empty(&entity->list))
 		return;
 
@@ -247,7 +246,7 @@  void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 		rq->current_entity = NULL;
 
 	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
-		drm_sched_rq_remove_fifo_locked(entity);
+		drm_sched_rq_remove_fifo_locked(entity, rq);
 
 	spin_unlock(&rq->lock);
 }
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 80198e6cf537..b21806d5a8eb 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -596,7 +596,8 @@  void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 				struct drm_sched_entity *entity);
 
-void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
+void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
+				     struct drm_sched_rq *rq, ktime_t ts);
 
 int drm_sched_entity_init(struct drm_sched_entity *entity,
 			  enum drm_sched_priority priority,