diff mbox series

[1/2] drm/sched: add drm_sched_prealloc_dependency_slots v2

Message ID 20250321155852.15162-1-christian.koenig@amd.com (mailing list archive)
State New
Headers show
Series [1/2] drm/sched: add drm_sched_prealloc_dependency_slots v2 | expand

Commit Message

Christian König March 21, 2025, 3:58 p.m. UTC
Sometimes drivers need to be able to submit multiple jobs which depend on
each other to different schedulers at the same time, but using
drm_sched_job_add_dependency() can't fail any more after the first job is
initialized.

This function preallocate memory for dependency slots so that no ENOMEM
can come later while adding dependencies.

v2: rework implementation an documentation

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 44 ++++++++++++++++++++++++--
 include/drm/gpu_scheduler.h            |  2 ++
 2 files changed, 43 insertions(+), 3 deletions(-)

Comments

Philipp Stanner March 21, 2025, 6:05 p.m. UTC | #1
On Fri, 2025-03-21 at 16:58 +0100, Christian König wrote:
> Sometimes drivers need to be able to submit multiple jobs which
> depend on
> each other to different schedulers at the same time, but using
> drm_sched_job_add_dependency() can't fail any more after the first
> job is
> initialized.
> 
> This function preallocate memory for dependency slots so that no
> ENOMEM
> can come later while adding dependencies.
> 
> v2: rework implementation an documentation

For drm/sched Danilo & I think that changelogs shouldn't be in the
commit message anymore. The Link: applied by the DRM tools will be
sufficient to find the history in the archives if necessary.

> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/scheduler/sched_main.c | 44
> ++++++++++++++++++++++++--
>  include/drm/gpu_scheduler.h            |  2 ++
>  2 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 4d4219fbe49d..ee3701f346b2 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -852,6 +852,39 @@ void drm_sched_job_arm(struct drm_sched_job
> *job)
>  }
>  EXPORT_SYMBOL(drm_sched_job_arm);
>  
> +/**
> + * drm_sched_job_prealloc_dependency_slots - avoid ENOMEM on adding
> dependencies
> + * @job: scheduler job where dependencies will be added
> + * @num_deps: number of dependencies to preallocate slots for
> +  *
> + * Sometimes drivers need to be able to submit multiple jobs which
> depend on
> + * each other to different schedulers at the same time, but using
> + * drm_sched_job_add_dependency() can't fail any more after the
> first job is
> + * initialized.
> + *
> + * This function preallocate memory for dependency slots so that no
> ENOMEM can
> + * come later while adding dependencies.
> + *
> + * Return:
> + * 0 on success, or an error on failing to expand the array.
> + */
> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job
> *job,
> +					    unsigned int num_deps)
> +{
> +	u32 id = 0;
> +	int ret;
> +
> +	while (num_deps--) {
> +		ret = xa_alloc(&job->dependencies, &id,
> XA_ZERO_ENTRY,
> +			       xa_limit_32b, GFP_KERNEL);

Fine by me, but out of curiousity about the xarray: you mentioned
xa_reserve() might work, too?


P.

> +		if (ret != 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(drm_sched_job_prealloc_dependency_slots);
> +
>  /**
>   * drm_sched_job_add_dependency - adds the fence as a job dependency
>   * @job: scheduler job to add the dependencies to
> @@ -878,10 +911,15 @@ int drm_sched_job_add_dependency(struct
> drm_sched_job *job,
>  	 * engines involved, rather than the number of BOs.
>  	 */
>  	xa_for_each(&job->dependencies, index, entry) {
> -		if (entry->context != fence->context)
> +		if (xa_is_zero(entry)) {
> +			/*
> +			 * Reserved entries must not alloc memory,
> but let's
> +			 * use GFP_ATOMIC just to be on the
> defensive side.
> +			*/
> +			xa_store(&job->dependencies, index, fence,
> GFP_ATOMIC);
> +		} else if (entry->context != fence->context) {
>  			continue;
> -
> -		if (dma_fence_is_later(fence, entry)) {
> +		} else if (dma_fence_is_later(fence, entry)) {
>  			dma_fence_put(entry);
>  			xa_store(&job->dependencies, index, fence,
> GFP_KERNEL);
>  		} else {
> diff --git a/include/drm/gpu_scheduler.h
> b/include/drm/gpu_scheduler.h
> index 1a7e377d4cbb..916e820b27ff 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -632,6 +632,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
>  		       u32 credits, void *owner);
>  void drm_sched_job_arm(struct drm_sched_job *job);
>  void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job
> *job,
> +					    unsigned int num_deps);
>  int drm_sched_job_add_dependency(struct drm_sched_job *job,
>  				 struct dma_fence *fence);
>  int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
Tvrtko Ursulin March 24, 2025, 8:35 a.m. UTC | #2
On 21/03/2025 15:58, Christian König wrote:
> Sometimes drivers need to be able to submit multiple jobs which depend on
> each other to different schedulers at the same time, but using
> drm_sched_job_add_dependency() can't fail any more after the first job is
> initialized.
> 
> This function preallocate memory for dependency slots so that no ENOMEM
> can come later while adding dependencies.
> 
> v2: rework implementation an documentation
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 44 ++++++++++++++++++++++++--
>   include/drm/gpu_scheduler.h            |  2 ++
>   2 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 4d4219fbe49d..ee3701f346b2 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -852,6 +852,39 @@ void drm_sched_job_arm(struct drm_sched_job *job)
>   }
>   EXPORT_SYMBOL(drm_sched_job_arm);
>   
> +/**
> + * drm_sched_job_prealloc_dependency_slots - avoid ENOMEM on adding dependencies
> + * @job: scheduler job where dependencies will be added
> + * @num_deps: number of dependencies to preallocate slots for
> +  *
> + * Sometimes drivers need to be able to submit multiple jobs which depend on
> + * each other to different schedulers at the same time, but using
> + * drm_sched_job_add_dependency() can't fail any more after the first job is
> + * initialized.
> + *
> + * This function preallocate memory for dependency slots so that no ENOMEM can
> + * come later while adding dependencies.
> + *
> + * Return:
> + * 0 on success, or an error on failing to expand the array.
> + */
> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job *job,
> +					    unsigned int num_deps)
> +{
> +	u32 id = 0;
> +	int ret;
> +
> +	while (num_deps--) {
> +		ret = xa_alloc(&job->dependencies, &id, XA_ZERO_ENTRY,
> +			       xa_limit_32b, GFP_KERNEL);
> +		if (ret != 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(drm_sched_job_prealloc_dependency_slots);
> +
>   /**
>    * drm_sched_job_add_dependency - adds the fence as a job dependency
>    * @job: scheduler job to add the dependencies to
> @@ -878,10 +911,15 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
>   	 * engines involved, rather than the number of BOs.
>   	 */
>   	xa_for_each(&job->dependencies, index, entry) {
> -		if (entry->context != fence->context)
> +		if (xa_is_zero(entry)) {

If xa_for_each shows them I think we also need to skip them in 
drm_sched_job_dependency() and drm_sched_job_cleanup(). Or remove the 
unused ones at arm time. Hm, we could also put a noisy fail if 
add_dependency allocates after a job is armed. (I can imagine a few unit 
tests for this once we merge them.)

Regards,

Tvrtko

> +			/*
> +			 * Reserved entries must not alloc memory, but let's
> +			 * use GFP_ATOMIC just to be on the defensive side.
> +			*/
> +			xa_store(&job->dependencies, index, fence, GFP_ATOMIC);
> +		} else if (entry->context != fence->context) {
>   			continue;
> -
> -		if (dma_fence_is_later(fence, entry)) {
> +		} else if (dma_fence_is_later(fence, entry)) {
>   			dma_fence_put(entry);
>   			xa_store(&job->dependencies, index, fence, GFP_KERNEL);
>   		} else {
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 1a7e377d4cbb..916e820b27ff 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -632,6 +632,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   		       u32 credits, void *owner);
>   void drm_sched_job_arm(struct drm_sched_job *job);
>   void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job *job,
> +					    unsigned int num_deps);
>   int drm_sched_job_add_dependency(struct drm_sched_job *job,
>   				 struct dma_fence *fence);
>   int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
Christian König March 24, 2025, 12:59 p.m. UTC | #3
Am 21.03.25 um 19:05 schrieb Philipp Stanner:
> On Fri, 2025-03-21 at 16:58 +0100, Christian König wrote:
>> Sometimes drivers need to be able to submit multiple jobs which
>> depend on
>> each other to different schedulers at the same time, but using
>> drm_sched_job_add_dependency() can't fail any more after the first
>> job is
>> initialized.
>>
>> This function preallocate memory for dependency slots so that no
>> ENOMEM
>> can come later while adding dependencies.
>>
>> v2: rework implementation an documentation
> For drm/sched Danilo & I think that changelogs shouldn't be in the
> commit message anymore. The Link: applied by the DRM tools will be
> sufficient to find the history in the archives if necessary.

Sure, going to drop that.

>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>  drivers/gpu/drm/scheduler/sched_main.c | 44
>> ++++++++++++++++++++++++--
>>  include/drm/gpu_scheduler.h            |  2 ++
>>  2 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 4d4219fbe49d..ee3701f346b2 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -852,6 +852,39 @@ void drm_sched_job_arm(struct drm_sched_job
>> *job)
>>  }
>>  EXPORT_SYMBOL(drm_sched_job_arm);
>>  
>> +/**
>> + * drm_sched_job_prealloc_dependency_slots - avoid ENOMEM on adding
>> dependencies
>> + * @job: scheduler job where dependencies will be added
>> + * @num_deps: number of dependencies to preallocate slots for
>> +  *
>> + * Sometimes drivers need to be able to submit multiple jobs which
>> depend on
>> + * each other to different schedulers at the same time, but using
>> + * drm_sched_job_add_dependency() can't fail any more after the
>> first job is
>> + * initialized.
>> + *
>> + * This function preallocate memory for dependency slots so that no
>> ENOMEM can
>> + * come later while adding dependencies.
>> + *
>> + * Return:
>> + * 0 on success, or an error on failing to expand the array.
>> + */
>> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job
>> *job,
>> +					    unsigned int num_deps)
>> +{
>> +	u32 id = 0;
>> +	int ret;
>> +
>> +	while (num_deps--) {
>> +		ret = xa_alloc(&job->dependencies, &id,
>> XA_ZERO_ENTRY,
>> +			       xa_limit_32b, GFP_KERNEL);
> Fine by me, but out of curiousity about the xarray: you mentioned
> xa_reserve() might work, too?

Different use case, xa_reserve() can only reserve a specific index and not allocate one.

Allocating and reserving at the same time seems to be done by the pattern xa_alloc(...XA_ZERO_ENTRY...) like we use here.

Regards,
Christian.

>
>
> P.
>
>> +		if (ret != 0)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL(drm_sched_job_prealloc_dependency_slots);
>> +
>>  /**
>>   * drm_sched_job_add_dependency - adds the fence as a job dependency
>>   * @job: scheduler job to add the dependencies to
>> @@ -878,10 +911,15 @@ int drm_sched_job_add_dependency(struct
>> drm_sched_job *job,
>>  	 * engines involved, rather than the number of BOs.
>>  	 */
>>  	xa_for_each(&job->dependencies, index, entry) {
>> -		if (entry->context != fence->context)
>> +		if (xa_is_zero(entry)) {
>> +			/*
>> +			 * Reserved entries must not alloc memory,
>> but let's
>> +			 * use GFP_ATOMIC just to be on the
>> defensive side.
>> +			*/
>> +			xa_store(&job->dependencies, index, fence,
>> GFP_ATOMIC);
>> +		} else if (entry->context != fence->context) {
>>  			continue;
>> -
>> -		if (dma_fence_is_later(fence, entry)) {
>> +		} else if (dma_fence_is_later(fence, entry)) {
>>  			dma_fence_put(entry);
>>  			xa_store(&job->dependencies, index, fence,
>> GFP_KERNEL);
>>  		} else {
>> diff --git a/include/drm/gpu_scheduler.h
>> b/include/drm/gpu_scheduler.h
>> index 1a7e377d4cbb..916e820b27ff 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -632,6 +632,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>  		       u32 credits, void *owner);
>>  void drm_sched_job_arm(struct drm_sched_job *job);
>>  void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
>> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job
>> *job,
>> +					    unsigned int num_deps);
>>  int drm_sched_job_add_dependency(struct drm_sched_job *job,
>>  				 struct dma_fence *fence);
>>  int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
Christian König March 24, 2025, 1:01 p.m. UTC | #4
Am 24.03.25 um 09:35 schrieb Tvrtko Ursulin:
>
> On 21/03/2025 15:58, Christian König wrote:
>> Sometimes drivers need to be able to submit multiple jobs which depend on
>> each other to different schedulers at the same time, but using
>> drm_sched_job_add_dependency() can't fail any more after the first job is
>> initialized.
>>
>> This function preallocate memory for dependency slots so that no ENOMEM
>> can come later while adding dependencies.
>>
>> v2: rework implementation an documentation
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/sched_main.c | 44 ++++++++++++++++++++++++--
>>   include/drm/gpu_scheduler.h            |  2 ++
>>   2 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 4d4219fbe49d..ee3701f346b2 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -852,6 +852,39 @@ void drm_sched_job_arm(struct drm_sched_job *job)
>>   }
>>   EXPORT_SYMBOL(drm_sched_job_arm);
>>   +/**
>> + * drm_sched_job_prealloc_dependency_slots - avoid ENOMEM on adding dependencies
>> + * @job: scheduler job where dependencies will be added
>> + * @num_deps: number of dependencies to preallocate slots for
>> +  *
>> + * Sometimes drivers need to be able to submit multiple jobs which depend on
>> + * each other to different schedulers at the same time, but using
>> + * drm_sched_job_add_dependency() can't fail any more after the first job is
>> + * initialized.
>> + *
>> + * This function preallocate memory for dependency slots so that no ENOMEM can
>> + * come later while adding dependencies.
>> + *
>> + * Return:
>> + * 0 on success, or an error on failing to expand the array.
>> + */
>> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job *job,
>> +                        unsigned int num_deps)
>> +{
>> +    u32 id = 0;
>> +    int ret;
>> +
>> +    while (num_deps--) {
>> +        ret = xa_alloc(&job->dependencies, &id, XA_ZERO_ENTRY,
>> +                   xa_limit_32b, GFP_KERNEL);
>> +        if (ret != 0)
>> +            return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +EXPORT_SYMBOL(drm_sched_job_prealloc_dependency_slots);
>> +
>>   /**
>>    * drm_sched_job_add_dependency - adds the fence as a job dependency
>>    * @job: scheduler job to add the dependencies to
>> @@ -878,10 +911,15 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
>>        * engines involved, rather than the number of BOs.
>>        */
>>       xa_for_each(&job->dependencies, index, entry) {
>> -        if (entry->context != fence->context)
>> +        if (xa_is_zero(entry)) {
>
> If xa_for_each shows them I think we also need to skip them in drm_sched_job_dependency() and drm_sched_job_cleanup().

Oh, really good point. I've completely forgotten about them.

> Or remove the unused ones at arm time. Hm, we could also put a noisy fail if add_dependency allocates after a job is armed. (I can imagine a few unit tests for this once we merge them.)

Good point as well. I was also waiting for your unit testing stuff to land in drm-misc-next so I can write a test for that.

Thanks,
Christian.

>
> Regards,
>
> Tvrtko
>
>> +            /*
>> +             * Reserved entries must not alloc memory, but let's
>> +             * use GFP_ATOMIC just to be on the defensive side.
>> +            */
>> +            xa_store(&job->dependencies, index, fence, GFP_ATOMIC);
>> +        } else if (entry->context != fence->context) {
>>               continue;
>> -
>> -        if (dma_fence_is_later(fence, entry)) {
>> +        } else if (dma_fence_is_later(fence, entry)) {
>>               dma_fence_put(entry);
>>               xa_store(&job->dependencies, index, fence, GFP_KERNEL);
>>           } else {
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 1a7e377d4cbb..916e820b27ff 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -632,6 +632,8 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>                  u32 credits, void *owner);
>>   void drm_sched_job_arm(struct drm_sched_job *job);
>>   void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
>> +int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job *job,
>> +                        unsigned int num_deps);
>>   int drm_sched_job_add_dependency(struct drm_sched_job *job,
>>                    struct dma_fence *fence);
>>   int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 4d4219fbe49d..ee3701f346b2 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -852,6 +852,39 @@  void drm_sched_job_arm(struct drm_sched_job *job)
 }
 EXPORT_SYMBOL(drm_sched_job_arm);
 
+/**
+ * drm_sched_job_prealloc_dependency_slots - avoid ENOMEM on adding dependencies
+ * @job: scheduler job where dependencies will be added
+ * @num_deps: number of dependencies to preallocate slots for
+  *
+ * Sometimes drivers need to be able to submit multiple jobs which depend on
+ * each other to different schedulers at the same time, but using
+ * drm_sched_job_add_dependency() can't fail any more after the first job is
+ * initialized.
+ *
+ * This function preallocate memory for dependency slots so that no ENOMEM can
+ * come later while adding dependencies.
+ *
+ * Return:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job *job,
+					    unsigned int num_deps)
+{
+	u32 id = 0;
+	int ret;
+
+	while (num_deps--) {
+		ret = xa_alloc(&job->dependencies, &id, XA_ZERO_ENTRY,
+			       xa_limit_32b, GFP_KERNEL);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_sched_job_prealloc_dependency_slots);
+
 /**
  * drm_sched_job_add_dependency - adds the fence as a job dependency
  * @job: scheduler job to add the dependencies to
@@ -878,10 +911,15 @@  int drm_sched_job_add_dependency(struct drm_sched_job *job,
 	 * engines involved, rather than the number of BOs.
 	 */
 	xa_for_each(&job->dependencies, index, entry) {
-		if (entry->context != fence->context)
+		if (xa_is_zero(entry)) {
+			/*
+			 * Reserved entries must not alloc memory, but let's
+			 * use GFP_ATOMIC just to be on the defensive side.
+			*/
+			xa_store(&job->dependencies, index, fence, GFP_ATOMIC);
+		} else if (entry->context != fence->context) {
 			continue;
-
-		if (dma_fence_is_later(fence, entry)) {
+		} else if (dma_fence_is_later(fence, entry)) {
 			dma_fence_put(entry);
 			xa_store(&job->dependencies, index, fence, GFP_KERNEL);
 		} else {
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 1a7e377d4cbb..916e820b27ff 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -632,6 +632,8 @@  int drm_sched_job_init(struct drm_sched_job *job,
 		       u32 credits, void *owner);
 void drm_sched_job_arm(struct drm_sched_job *job);
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job);
+int drm_sched_job_prealloc_dependency_slots(struct drm_sched_job *job,
+					    unsigned int num_deps);
 int drm_sched_job_add_dependency(struct drm_sched_job *job,
 				 struct dma_fence *fence);
 int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,