diff mbox series

[RFC] drm/sched: Make sure drm_sched_fence_ops don't vanish

Message ID 20240830104057.1479321-1-boris.brezillon@collabora.com (mailing list archive)
State New, archived
Headers show
Series [RFC] drm/sched: Make sure drm_sched_fence_ops don't vanish | expand

Commit Message

Boris Brezillon Aug. 30, 2024, 10:40 a.m. UTC
dma_fence objects created by drm_sched might hit other subsystems, which
means the fence object might potentially outlive the drm_sched module
lifetime, and at this point the dma_fence::ops points to a memory region
that no longer exists.

In order to fix that, let's make sure the drm_sched_fence code is always
statically linked, just like dma-fence{-chain}.c does.

Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Danilo Krummrich <dakr@redhat.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
Just like for the other UAF fix, this is an RFC, as I'm not sure that's
how we want it fixed. The other option we have is to retain a module ref
for each initialized fence and release it when the fence is freed.
---
 drivers/gpu/drm/Makefile                |  2 +-
 drivers/gpu/drm/scheduler/Makefile      |  7 ++++++-
 drivers/gpu/drm/scheduler/sched_fence.c | 21 +++++++++------------
 drivers/gpu/drm/scheduler/sched_main.c  |  3 +++
 4 files changed, 19 insertions(+), 14 deletions(-)

Comments

Matthew Brost Aug. 30, 2024, 10:28 p.m. UTC | #1
On Fri, Aug 30, 2024 at 12:40:57PM +0200, Boris Brezillon wrote:
> dma_fence objects created by drm_sched might hit other subsystems, which
> means the fence object might potentially outlive the drm_sched module
> lifetime, and at this point the dma_fence::ops points to a memory region
> that no longer exists.
> 
> In order to fix that, let's make sure the drm_sched_fence code is always
> statically linked, just like dma-fence{-chain}.c does.
> 
> Cc: Luben Tuikov <ltuikov89@gmail.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: "Christian König" <christian.koenig@amd.com>
> Cc: Danilo Krummrich <dakr@redhat.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> Just like for the other UAF fix, this is an RFC, as I'm not sure that's
> how we want it fixed. The other option we have is to retain a module ref
> for each initialized fence and release it when the fence is freed.

So this is different than your other patch. From Xe PoV the other patch
is referencing an object which is tied to an IOCTL rather than a module
whereas this referencing a module. If a module can disappear when fence
tied to the module, well that is a bit scary and Xe might have some
issues there - I'd have audit our of exporting internally created
fences.

Based on Christian's feedback we really shouldn't be allowing this but
also don't really love the idea of a fence holding a module ref either.

Seems like we need a well defined + documented rule - exported fences
need to be completely decoupled from the module upon signaling or
exported fences must retain a module ref. I'd probably lean towards the
former. One reason to support the former is fences can be released in
IRQ contexts and dropping a module ref in IRQ context seems a bit
problematic. Also because some oher part of kernel holds on to fence ref
lazily block a module unload just seems wrong.

Sorry if above we have well defined rule and I'm just not aware.

Matt 

> ---
>  drivers/gpu/drm/Makefile                |  2 +-
>  drivers/gpu/drm/scheduler/Makefile      |  7 ++++++-
>  drivers/gpu/drm/scheduler/sched_fence.c | 21 +++++++++------------
>  drivers/gpu/drm/scheduler/sched_main.c  |  3 +++
>  4 files changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index 68cc9258ffc4..b97127da58b7 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -158,7 +158,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
>  obj-y			+= arm/
>  obj-y			+= display/
>  obj-$(CONFIG_DRM_TTM)	+= ttm/
> -obj-$(CONFIG_DRM_SCHED)	+= scheduler/
> +obj-y			+= scheduler/
>  obj-$(CONFIG_DRM_RADEON)+= radeon/
>  obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
>  obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
> diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile
> index 53863621829f..bc18d26f27a1 100644
> --- a/drivers/gpu/drm/scheduler/Makefile
> +++ b/drivers/gpu/drm/scheduler/Makefile
> @@ -20,6 +20,11 @@
>  # OTHER DEALINGS IN THE SOFTWARE.
>  #
>  #
> -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o
> +
> +# sched_fence.c contains dma_fence_ops that can't go away, so make sure it's
> +# statically linked when CONFIG_DRM_SCHED=m
> +obj-y += $(if $(CONFIG_DRM_SCHED),sched_fence.o,)
> +
> +gpu-sched-y := sched_main.o sched_entity.o
>  
>  obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index efa2a71d98eb..ac65589476dd 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -39,12 +39,7 @@ static int __init drm_sched_fence_slab_init(void)
>  
>  	return 0;
>  }
> -
> -static void __exit drm_sched_fence_slab_fini(void)
> -{
> -	rcu_barrier();
> -	kmem_cache_destroy(sched_fence_slab);
> -}
> +subsys_initcall(drm_sched_fence_slab_init);
>  
>  static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>  				       struct dma_fence *fence)
> @@ -74,6 +69,7 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
>  
>  	dma_fence_signal(&fence->scheduled);
>  }
> +EXPORT_SYMBOL(drm_sched_fence_scheduled);
>  
>  void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
>  {
> @@ -81,6 +77,7 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
>  		dma_fence_set_error(&fence->finished, result);
>  	dma_fence_signal(&fence->finished);
>  }
> +EXPORT_SYMBOL(drm_sched_fence_finished);
>  
>  static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
>  {
> @@ -118,6 +115,7 @@ void drm_sched_fence_free(struct drm_sched_fence *fence)
>  	if (!WARN_ON_ONCE(fence->timeline))
>  		kmem_cache_free(sched_fence_slab, fence);
>  }
> +EXPORT_SYMBOL(drm_sched_fence_free);
>  
>  /**
>   * drm_sched_fence_release_scheduled - callback that fence can be freed
> @@ -210,6 +208,9 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>  {
>  	struct drm_sched_fence *fence = NULL;
>  
> +	if (unlikely(!sched_fence_slab))
> +		return NULL;
> +
>  	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
>  	if (fence == NULL)
>  		return NULL;
> @@ -219,6 +220,7 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>  
>  	return fence;
>  }
> +EXPORT_SYMBOL(drm_sched_fence_alloc);
>  
>  void drm_sched_fence_init(struct drm_sched_fence *fence,
>  			  struct drm_sched_entity *entity)
> @@ -234,9 +236,4 @@ void drm_sched_fence_init(struct drm_sched_fence *fence,
>  	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
>  		       &fence->lock, entity->fence_context + 1, seq);
>  }
> -
> -module_init(drm_sched_fence_slab_init);
> -module_exit(drm_sched_fence_slab_fini);
> -
> -MODULE_DESCRIPTION("DRM GPU scheduler");
> -MODULE_LICENSE("GPL and additional rights");
> +EXPORT_SYMBOL(drm_sched_fence_init);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 1e31a9c8ce15..eaaf086eab30 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -1467,3 +1467,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
>  	queue_work(sched->submit_wq, &sched->work_free_job);
>  }
>  EXPORT_SYMBOL(drm_sched_wqueue_start);
> +
> +MODULE_DESCRIPTION("DRM GPU scheduler");
> +MODULE_LICENSE("GPL and additional rights");
> -- 
> 2.46.0
>
Boris Brezillon Aug. 31, 2024, 7:13 a.m. UTC | #2
Hi Matthew,

On Fri, 30 Aug 2024 22:28:19 +0000
Matthew Brost <matthew.brost@intel.com> wrote:

> On Fri, Aug 30, 2024 at 12:40:57PM +0200, Boris Brezillon wrote:
> > dma_fence objects created by drm_sched might hit other subsystems, which
> > means the fence object might potentially outlive the drm_sched module
> > lifetime, and at this point the dma_fence::ops points to a memory region
> > that no longer exists.
> > 
> > In order to fix that, let's make sure the drm_sched_fence code is always
> > statically linked, just like dma-fence{-chain}.c does.
> > 
> > Cc: Luben Tuikov <ltuikov89@gmail.com>
> > Cc: Matthew Brost <matthew.brost@intel.com>
> > Cc: "Christian König" <christian.koenig@amd.com>
> > Cc: Danilo Krummrich <dakr@redhat.com>
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > ---
> > Just like for the other UAF fix, this is an RFC, as I'm not sure that's
> > how we want it fixed. The other option we have is to retain a module ref
> > for each initialized fence and release it when the fence is freed.  
> 
> So this is different than your other patch. From Xe PoV the other patch
> is referencing an object which is tied to an IOCTL rather than a module
> whereas this referencing a module.

Well, it's not fixing the exact same problem either. My other patch was
about making sure the timeline name string doesn't disappear when a
scheduler is destroyed, which can happen while the module is still
loaded. As Christian pointed out, the "module unload while fences
exist" problem can be solved by binding the module refcounting to
the drm_sched_fence_timeline object lifetime, but I wanted to show a
simpler alternative to this approach with this patch.

> If a module can disappear when fence
> tied to the module, well that is a bit scary and Xe might have some
> issues there - I'd have audit our of exporting internally created
> fences.

Yeah, I moved away from exposing driver fences directly because of
that. Now all I expose to the outside world are drm_sched_fence
objects, which just moves the problem one level down, but at least we
can fix it generically if all the drivers take this precaution.

> 
> Based on Christian's feedback we really shouldn't be allowing this but
> also don't really love the idea of a fence holding a module ref either.
> 
> Seems like we need a well defined + documented rule - exported fences
> need to be completely decoupled from the module upon signaling

That basically means patching drm_sched_fence::ops (with the lock held)
at signal time so it points to a dummy ops that's statically linked
(something in dma-fence.c, I guess). But that also means the names
returned by get_{driver,timeline}_name() no longer reflect the original
fence owner after signalling, or you have to do some allocation+copy
of these strings. Neither of these options sound good to me.

> or
> exported fences must retain a module ref.

Yes, and that's the option I was originally heading to, until I
realized we have a 3rd option that's already working well for the core
dma-fence code (AKA the stub, the chain, and other containers I might
have missed). If the code is not in a module but instead statically
linked, all our module-lifetime vs fence-lifetime issues go away
without resorting to this module refcounting trick. Given sched_fence.c
is pretty self-contained (no deps on other DRM functions that might be
linked as a module), I now think this is our best shot for drm_sched
fences.

The story is a bit different for driver fences exposed to the outside
world, but if you're using drm_sched, I see no good reason for not using
the drm_sched_fence proxy for all your fences. Note that the same kind
of proxy can be generalized at the dma-fence.c level if it's deemed
valuable for other subsystems.

The idea behind it being that:

- the dma_fence_proxy ops should live in dma-fence.c (or any other
  object that's statically linked)
- driver and timeline names attached to a proxy need to be dynamically
  allocated and owned by some dma_fence_proxy_context object
- the dma_fence_proxy_context object needs to be refcounted, and each
  fence attached to this 'fence-context' needs to hold a ref on this
  struct

> I'd probably lean towards the
> former.

For the reasons exposed above, I don't think that's a viable option,
unless we decide we no longer care about the fence origin after
signalling happened (which would be a mess for people looking a
dma_buf's debug file to be honest).

> One reason to support the former is fences can be released in
> IRQ contexts and dropping a module ref in IRQ context seems a bit
> problematic. Also because some oher part of kernel holds on to fence ref
> lazily block a module unload just seems wrong.

There's always the option to defer the release if we're in a context
where we can't release immediately. But I think we're overthinking it.
For the drm_sched_fence, the simple answer is to make this code static,
just like dma-fence{-chain}.c.

> 
> Sorry if above we have well defined rule and I'm just not aware.

AFAICT, it's not.

Regards,

Boris
Christian König Sept. 2, 2024, 10:58 a.m. UTC | #3
Am 31.08.24 um 00:28 schrieb Matthew Brost:
> On Fri, Aug 30, 2024 at 12:40:57PM +0200, Boris Brezillon wrote:
>> dma_fence objects created by drm_sched might hit other subsystems, which
>> means the fence object might potentially outlive the drm_sched module
>> lifetime, and at this point the dma_fence::ops points to a memory region
>> that no longer exists.
>>
>> In order to fix that, let's make sure the drm_sched_fence code is always
>> statically linked, just like dma-fence{-chain}.c does.
>>
>> Cc: Luben Tuikov <ltuikov89@gmail.com>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: "Christian König" <christian.koenig@amd.com>
>> Cc: Danilo Krummrich <dakr@redhat.com>
>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>> ---
>> Just like for the other UAF fix, this is an RFC, as I'm not sure that's
>> how we want it fixed. The other option we have is to retain a module ref
>> for each initialized fence and release it when the fence is freed.
> So this is different than your other patch. From Xe PoV the other patch
> is referencing an object which is tied to an IOCTL rather than a module
> whereas this referencing a module. If a module can disappear when fence
> tied to the module, well that is a bit scary and Xe might have some
> issues there - I'd have audit our of exporting internally created
> fences.
>
> Based on Christian's feedback we really shouldn't be allowing this but
> also don't really love the idea of a fence holding a module ref either.

IIRC the initial proposal for dma_fences actually contained grabbing a 
module reference, similar to what we do for dma-bufs.

But I think that was dropped because of the circle dependency issues and 
preventing module unload. After that nobody really looked into it again.

I think using the scheduler fence to decouple the hardware fence 
lifetime should work. We would just need to drop the hardware fence 
reference after the scheduler fence signaled and not when it is destroyed.

This unfortunately also creates another problems for error recovery,  
but that is a different topic I think.

> Seems like we need a well defined + documented rule - exported fences
> need to be completely decoupled from the module upon signaling or
> exported fences must retain a module ref. I'd probably lean towards the
> former. One reason to support the former is fences can be released in
> IRQ contexts and dropping a module ref in IRQ context seems a bit
> problematic. Also because some oher part of kernel holds on to fence ref
> lazily block a module unload just seems wrong.

Modules are not auto unloaded when their reference count becomes zero. 
Only when rmmod (or the corresponding system call) is issued.

So dropping a module reference from interrupt context should be 
unproblematic I think. But we should probably double check.

Fully decoupling fence destruction from the module is most likely not 
possible since we will always need the free callback from the ops for 
some use cases.

> Sorry if above we have well defined rule and I'm just not aware.

No, it's basically just a well known mess nobody cared much about.

Regards,
Christian.

>
> Matt
>
>> ---
>>   drivers/gpu/drm/Makefile                |  2 +-
>>   drivers/gpu/drm/scheduler/Makefile      |  7 ++++++-
>>   drivers/gpu/drm/scheduler/sched_fence.c | 21 +++++++++------------
>>   drivers/gpu/drm/scheduler/sched_main.c  |  3 +++
>>   4 files changed, 19 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
>> index 68cc9258ffc4..b97127da58b7 100644
>> --- a/drivers/gpu/drm/Makefile
>> +++ b/drivers/gpu/drm/Makefile
>> @@ -158,7 +158,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
>>   obj-y			+= arm/
>>   obj-y			+= display/
>>   obj-$(CONFIG_DRM_TTM)	+= ttm/
>> -obj-$(CONFIG_DRM_SCHED)	+= scheduler/
>> +obj-y			+= scheduler/
>>   obj-$(CONFIG_DRM_RADEON)+= radeon/
>>   obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
>>   obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
>> diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile
>> index 53863621829f..bc18d26f27a1 100644
>> --- a/drivers/gpu/drm/scheduler/Makefile
>> +++ b/drivers/gpu/drm/scheduler/Makefile
>> @@ -20,6 +20,11 @@
>>   # OTHER DEALINGS IN THE SOFTWARE.
>>   #
>>   #
>> -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o
>> +
>> +# sched_fence.c contains dma_fence_ops that can't go away, so make sure it's
>> +# statically linked when CONFIG_DRM_SCHED=m
>> +obj-y += $(if $(CONFIG_DRM_SCHED),sched_fence.o,)
>> +
>> +gpu-sched-y := sched_main.o sched_entity.o
>>   
>>   obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
>> index efa2a71d98eb..ac65589476dd 100644
>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>> @@ -39,12 +39,7 @@ static int __init drm_sched_fence_slab_init(void)
>>   
>>   	return 0;
>>   }
>> -
>> -static void __exit drm_sched_fence_slab_fini(void)
>> -{
>> -	rcu_barrier();
>> -	kmem_cache_destroy(sched_fence_slab);
>> -}
>> +subsys_initcall(drm_sched_fence_slab_init);
>>   
>>   static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
>>   				       struct dma_fence *fence)
>> @@ -74,6 +69,7 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
>>   
>>   	dma_fence_signal(&fence->scheduled);
>>   }
>> +EXPORT_SYMBOL(drm_sched_fence_scheduled);
>>   
>>   void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
>>   {
>> @@ -81,6 +77,7 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
>>   		dma_fence_set_error(&fence->finished, result);
>>   	dma_fence_signal(&fence->finished);
>>   }
>> +EXPORT_SYMBOL(drm_sched_fence_finished);
>>   
>>   static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
>>   {
>> @@ -118,6 +115,7 @@ void drm_sched_fence_free(struct drm_sched_fence *fence)
>>   	if (!WARN_ON_ONCE(fence->timeline))
>>   		kmem_cache_free(sched_fence_slab, fence);
>>   }
>> +EXPORT_SYMBOL(drm_sched_fence_free);
>>   
>>   /**
>>    * drm_sched_fence_release_scheduled - callback that fence can be freed
>> @@ -210,6 +208,9 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>>   {
>>   	struct drm_sched_fence *fence = NULL;
>>   
>> +	if (unlikely(!sched_fence_slab))
>> +		return NULL;
>> +
>>   	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
>>   	if (fence == NULL)
>>   		return NULL;
>> @@ -219,6 +220,7 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
>>   
>>   	return fence;
>>   }
>> +EXPORT_SYMBOL(drm_sched_fence_alloc);
>>   
>>   void drm_sched_fence_init(struct drm_sched_fence *fence,
>>   			  struct drm_sched_entity *entity)
>> @@ -234,9 +236,4 @@ void drm_sched_fence_init(struct drm_sched_fence *fence,
>>   	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
>>   		       &fence->lock, entity->fence_context + 1, seq);
>>   }
>> -
>> -module_init(drm_sched_fence_slab_init);
>> -module_exit(drm_sched_fence_slab_fini);
>> -
>> -MODULE_DESCRIPTION("DRM GPU scheduler");
>> -MODULE_LICENSE("GPL and additional rights");
>> +EXPORT_SYMBOL(drm_sched_fence_init);
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 1e31a9c8ce15..eaaf086eab30 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -1467,3 +1467,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
>>   	queue_work(sched->submit_wq, &sched->work_free_job);
>>   }
>>   EXPORT_SYMBOL(drm_sched_wqueue_start);
>> +
>> +MODULE_DESCRIPTION("DRM GPU scheduler");
>> +MODULE_LICENSE("GPL and additional rights");
>> -- 
>> 2.46.0
>>
Daniel Vetter Sept. 2, 2024, 1:40 p.m. UTC | #4
On Mon, Sep 02, 2024 at 12:58:54PM +0200, Christian König wrote:
> Am 31.08.24 um 00:28 schrieb Matthew Brost:
> > On Fri, Aug 30, 2024 at 12:40:57PM +0200, Boris Brezillon wrote:
> > > dma_fence objects created by drm_sched might hit other subsystems, which
> > > means the fence object might potentially outlive the drm_sched module
> > > lifetime, and at this point the dma_fence::ops points to a memory region
> > > that no longer exists.
> > > 
> > > In order to fix that, let's make sure the drm_sched_fence code is always
> > > statically linked, just like dma-fence{-chain}.c does.
> > > 
> > > Cc: Luben Tuikov <ltuikov89@gmail.com>
> > > Cc: Matthew Brost <matthew.brost@intel.com>
> > > Cc: "Christian König" <christian.koenig@amd.com>
> > > Cc: Danilo Krummrich <dakr@redhat.com>
> > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > > ---
> > > Just like for the other UAF fix, this is an RFC, as I'm not sure that's
> > > how we want it fixed. The other option we have is to retain a module ref
> > > for each initialized fence and release it when the fence is freed.
> > So this is different than your other patch. From Xe PoV the other patch
> > is referencing an object which is tied to an IOCTL rather than a module
> > whereas this referencing a module. If a module can disappear when fence
> > tied to the module, well that is a bit scary and Xe might have some
> > issues there - I'd have audit our of exporting internally created
> > fences.
> > 
> > Based on Christian's feedback we really shouldn't be allowing this but
> > also don't really love the idea of a fence holding a module ref either.
> 
> IIRC the initial proposal for dma_fences actually contained grabbing a
> module reference, similar to what we do for dma-bufs.
> 
> But I think that was dropped because of the circle dependency issues and
> preventing module unload. After that nobody really looked into it again.
> 
> I think using the scheduler fence to decouple the hardware fence lifetime
> should work. We would just need to drop the hardware fence reference after
> the scheduler fence signaled and not when it is destroyed.
> 
> This unfortunately also creates another problems for error recovery,  but
> that is a different topic I think.
> 
> > Seems like we need a well defined + documented rule - exported fences
> > need to be completely decoupled from the module upon signaling or
> > exported fences must retain a module ref. I'd probably lean towards the
> > former. One reason to support the former is fences can be released in
> > IRQ contexts and dropping a module ref in IRQ context seems a bit
> > problematic. Also because some oher part of kernel holds on to fence ref
> > lazily block a module unload just seems wrong.
> 
> Modules are not auto unloaded when their reference count becomes zero. Only
> when rmmod (or the corresponding system call) is issued.
> 
> So dropping a module reference from interrupt context should be
> unproblematic I think. But we should probably double check.
> 
> Fully decoupling fence destruction from the module is most likely not
> possible since we will always need the free callback from the ops for some
> use cases.

I think the issue is that we don't garbage collect fences, so with a full
module refcount you can pin a module. Which isn't great.
-Sima

> 
> > Sorry if above we have well defined rule and I'm just not aware.
> 
> No, it's basically just a well known mess nobody cared much about.
> 
> Regards,
> Christian.
> 
> > 
> > Matt
> > 
> > > ---
> > >   drivers/gpu/drm/Makefile                |  2 +-
> > >   drivers/gpu/drm/scheduler/Makefile      |  7 ++++++-
> > >   drivers/gpu/drm/scheduler/sched_fence.c | 21 +++++++++------------
> > >   drivers/gpu/drm/scheduler/sched_main.c  |  3 +++
> > >   4 files changed, 19 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> > > index 68cc9258ffc4..b97127da58b7 100644
> > > --- a/drivers/gpu/drm/Makefile
> > > +++ b/drivers/gpu/drm/Makefile
> > > @@ -158,7 +158,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
> > >   obj-y			+= arm/
> > >   obj-y			+= display/
> > >   obj-$(CONFIG_DRM_TTM)	+= ttm/
> > > -obj-$(CONFIG_DRM_SCHED)	+= scheduler/
> > > +obj-y			+= scheduler/
> > >   obj-$(CONFIG_DRM_RADEON)+= radeon/
> > >   obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
> > >   obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
> > > diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile
> > > index 53863621829f..bc18d26f27a1 100644
> > > --- a/drivers/gpu/drm/scheduler/Makefile
> > > +++ b/drivers/gpu/drm/scheduler/Makefile
> > > @@ -20,6 +20,11 @@
> > >   # OTHER DEALINGS IN THE SOFTWARE.
> > >   #
> > >   #
> > > -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o
> > > +
> > > +# sched_fence.c contains dma_fence_ops that can't go away, so make sure it's
> > > +# statically linked when CONFIG_DRM_SCHED=m
> > > +obj-y += $(if $(CONFIG_DRM_SCHED),sched_fence.o,)
> > > +
> > > +gpu-sched-y := sched_main.o sched_entity.o
> > >   obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
> > > diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> > > index efa2a71d98eb..ac65589476dd 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_fence.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> > > @@ -39,12 +39,7 @@ static int __init drm_sched_fence_slab_init(void)
> > >   	return 0;
> > >   }
> > > -
> > > -static void __exit drm_sched_fence_slab_fini(void)
> > > -{
> > > -	rcu_barrier();
> > > -	kmem_cache_destroy(sched_fence_slab);
> > > -}
> > > +subsys_initcall(drm_sched_fence_slab_init);
> > >   static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
> > >   				       struct dma_fence *fence)
> > > @@ -74,6 +69,7 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
> > >   	dma_fence_signal(&fence->scheduled);
> > >   }
> > > +EXPORT_SYMBOL(drm_sched_fence_scheduled);
> > >   void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
> > >   {
> > > @@ -81,6 +77,7 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
> > >   		dma_fence_set_error(&fence->finished, result);
> > >   	dma_fence_signal(&fence->finished);
> > >   }
> > > +EXPORT_SYMBOL(drm_sched_fence_finished);
> > >   static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
> > >   {
> > > @@ -118,6 +115,7 @@ void drm_sched_fence_free(struct drm_sched_fence *fence)
> > >   	if (!WARN_ON_ONCE(fence->timeline))
> > >   		kmem_cache_free(sched_fence_slab, fence);
> > >   }
> > > +EXPORT_SYMBOL(drm_sched_fence_free);
> > >   /**
> > >    * drm_sched_fence_release_scheduled - callback that fence can be freed
> > > @@ -210,6 +208,9 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> > >   {
> > >   	struct drm_sched_fence *fence = NULL;
> > > +	if (unlikely(!sched_fence_slab))
> > > +		return NULL;
> > > +
> > >   	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
> > >   	if (fence == NULL)
> > >   		return NULL;
> > > @@ -219,6 +220,7 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
> > >   	return fence;
> > >   }
> > > +EXPORT_SYMBOL(drm_sched_fence_alloc);
> > >   void drm_sched_fence_init(struct drm_sched_fence *fence,
> > >   			  struct drm_sched_entity *entity)
> > > @@ -234,9 +236,4 @@ void drm_sched_fence_init(struct drm_sched_fence *fence,
> > >   	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
> > >   		       &fence->lock, entity->fence_context + 1, seq);
> > >   }
> > > -
> > > -module_init(drm_sched_fence_slab_init);
> > > -module_exit(drm_sched_fence_slab_fini);
> > > -
> > > -MODULE_DESCRIPTION("DRM GPU scheduler");
> > > -MODULE_LICENSE("GPL and additional rights");
> > > +EXPORT_SYMBOL(drm_sched_fence_init);
> > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> > > index 1e31a9c8ce15..eaaf086eab30 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > @@ -1467,3 +1467,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
> > >   	queue_work(sched->submit_wq, &sched->work_free_job);
> > >   }
> > >   EXPORT_SYMBOL(drm_sched_wqueue_start);
> > > +
> > > +MODULE_DESCRIPTION("DRM GPU scheduler");
> > > +MODULE_LICENSE("GPL and additional rights");
> > > -- 
> > > 2.46.0
> > > 
>
Simona Vetter Sept. 3, 2024, 8:29 a.m. UTC | #5
On Sat, Aug 31, 2024 at 09:13:52AM +0200, Boris Brezillon wrote:
> Hi Matthew,
> 
> On Fri, 30 Aug 2024 22:28:19 +0000
> Matthew Brost <matthew.brost@intel.com> wrote:
> 
> > On Fri, Aug 30, 2024 at 12:40:57PM +0200, Boris Brezillon wrote:
> > > dma_fence objects created by drm_sched might hit other subsystems, which
> > > means the fence object might potentially outlive the drm_sched module
> > > lifetime, and at this point the dma_fence::ops points to a memory region
> > > that no longer exists.
> > > 
> > > In order to fix that, let's make sure the drm_sched_fence code is always
> > > statically linked, just like dma-fence{-chain}.c does.
> > > 
> > > Cc: Luben Tuikov <ltuikov89@gmail.com>
> > > Cc: Matthew Brost <matthew.brost@intel.com>
> > > Cc: "Christian König" <christian.koenig@amd.com>
> > > Cc: Danilo Krummrich <dakr@redhat.com>
> > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > > ---
> > > Just like for the other UAF fix, this is an RFC, as I'm not sure that's
> > > how we want it fixed. The other option we have is to retain a module ref
> > > for each initialized fence and release it when the fence is freed.  
> > 
> > So this is different than your other patch. From Xe PoV the other patch
> > is referencing an object which is tied to an IOCTL rather than a module
> > whereas this referencing a module.
> 
> Well, it's not fixing the exact same problem either. My other patch was
> about making sure the timeline name string doesn't disappear when a
> scheduler is destroyed, which can happen while the module is still
> loaded. As Christian pointed out, the "module unload while fences
> exist" problem can be solved by binding the module refcounting to
> the drm_sched_fence_timeline object lifetime, but I wanted to show a
> simpler alternative to this approach with this patch.
> 
> > If a module can disappear when fence
> > tied to the module, well that is a bit scary and Xe might have some
> > issues there - I'd have audit our of exporting internally created
> > fences.
> 
> Yeah, I moved away from exposing driver fences directly because of
> that. Now all I expose to the outside world are drm_sched_fence
> objects, which just moves the problem one level down, but at least we
> can fix it generically if all the drivers take this precaution.
> 
> > 
> > Based on Christian's feedback we really shouldn't be allowing this but
> > also don't really love the idea of a fence holding a module ref either.
> > 
> > Seems like we need a well defined + documented rule - exported fences
> > need to be completely decoupled from the module upon signaling
> 
> That basically means patching drm_sched_fence::ops (with the lock held)
> at signal time so it points to a dummy ops that's statically linked
> (something in dma-fence.c, I guess). But that also means the names
> returned by get_{driver,timeline}_name() no longer reflect the original
> fence owner after signalling, or you have to do some allocation+copy
> of these strings. Neither of these options sound good to me.

I replied in the other thread, my take is we should sort this out in
dma_fence.c. Essentially:

- split the fence cleanup into driver release and the final kfree_rcu.

- move the final kfree_rcu into dma_fence.c code. This means no more
  special slabs all over the place.

- fix up all the rules around the various ->ops callbacks so they don't
  race as much anymore, where we do race. I think the worst is ->wait(),
  we might want to really ditch that one (or at least deprecate it, I
  think for some defacto unfixable drivers it needs to stay).

- add dma_fence guarantee that after dma_fence_signal() has returned it
  will never ever call into ->ops ever again.

It's a bit radical, but I don't think anything less really plugs this mess
for good.

> > or
> > exported fences must retain a module ref.
> 
> Yes, and that's the option I was originally heading to, until I
> realized we have a 3rd option that's already working well for the core
> dma-fence code (AKA the stub, the chain, and other containers I might
> have missed). If the code is not in a module but instead statically
> linked, all our module-lifetime vs fence-lifetime issues go away
> without resorting to this module refcounting trick. Given sched_fence.c
> is pretty self-contained (no deps on other DRM functions that might be
> linked as a module), I now think this is our best shot for drm_sched
> fences.
> 
> The story is a bit different for driver fences exposed to the outside
> world, but if you're using drm_sched, I see no good reason for not using
> the drm_sched_fence proxy for all your fences. Note that the same kind
> of proxy can be generalized at the dma-fence.c level if it's deemed
> valuable for other subsystems.
> 
> The idea behind it being that:
> 
> - the dma_fence_proxy ops should live in dma-fence.c (or any other
>   object that's statically linked)

Proxy ops don't work, unless you fully serialize access to ->ops with
locking (or some other kind of serialization). At that point you might as
well have special code in dma_fence.c that just bypasses ->ops.

> - driver and timeline names attached to a proxy need to be dynamically
>   allocated and owned by some dma_fence_proxy_context object
> - the dma_fence_proxy_context object needs to be refcounted, and each
>   fence attached to this 'fence-context' needs to hold a ref on this
>   struct

Yeah making the fence context/timeline a fully refcounted thing might be
needed, instead of just a integer we allocate with atomic_add. But would
first need to add a proper special case for all the unsorted dma_fences,
we currently still just consume each a timeline ...

> > I'd probably lean towards the
> > former.
> 
> For the reasons exposed above, I don't think that's a viable option,
> unless we decide we no longer care about the fence origin after
> signalling happened (which would be a mess for people looking a
> dma_buf's debug file to be honest).

Why would that be a mess? A signalled fence shouldn't be an issue anymore,
so not sure why you want to know where they came from. And as long as
they're stuck the fences have meaningful debug info ...

> > One reason to support the former is fences can be released in
> > IRQ contexts and dropping a module ref in IRQ context seems a bit
> > problematic. Also because some oher part of kernel holds on to fence ref
> > lazily block a module unload just seems wrong.
> 
> There's always the option to defer the release if we're in a context
> where we can't release immediately. But I think we're overthinking it.
> For the drm_sched_fence, the simple answer is to make this code static,
> just like dma-fence{-chain}.c.

Yeah I think what we all agree is that the final cleanup code must be
built-in for all fences, or this just wont work.

> > Sorry if above we have well defined rule and I'm just not aware.
> 
> AFAICT, it's not.

Yeah we yolo'ed this for more than a decade. Sounds like time to sort it
out ...
-Sima
diff mbox series

Patch

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 68cc9258ffc4..b97127da58b7 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -158,7 +158,7 @@  obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
 obj-y			+= arm/
 obj-y			+= display/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
-obj-$(CONFIG_DRM_SCHED)	+= scheduler/
+obj-y			+= scheduler/
 obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile
index 53863621829f..bc18d26f27a1 100644
--- a/drivers/gpu/drm/scheduler/Makefile
+++ b/drivers/gpu/drm/scheduler/Makefile
@@ -20,6 +20,11 @@ 
 # OTHER DEALINGS IN THE SOFTWARE.
 #
 #
-gpu-sched-y := sched_main.o sched_fence.o sched_entity.o
+
+# sched_fence.c contains dma_fence_ops that can't go away, so make sure it's
+# statically linked when CONFIG_DRM_SCHED=m
+obj-y += $(if $(CONFIG_DRM_SCHED),sched_fence.o,)
+
+gpu-sched-y := sched_main.o sched_entity.o
 
 obj-$(CONFIG_DRM_SCHED) += gpu-sched.o
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index efa2a71d98eb..ac65589476dd 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -39,12 +39,7 @@  static int __init drm_sched_fence_slab_init(void)
 
 	return 0;
 }
-
-static void __exit drm_sched_fence_slab_fini(void)
-{
-	rcu_barrier();
-	kmem_cache_destroy(sched_fence_slab);
-}
+subsys_initcall(drm_sched_fence_slab_init);
 
 static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
 				       struct dma_fence *fence)
@@ -74,6 +69,7 @@  void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
 
 	dma_fence_signal(&fence->scheduled);
 }
+EXPORT_SYMBOL(drm_sched_fence_scheduled);
 
 void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
 {
@@ -81,6 +77,7 @@  void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
 		dma_fence_set_error(&fence->finished, result);
 	dma_fence_signal(&fence->finished);
 }
+EXPORT_SYMBOL(drm_sched_fence_finished);
 
 static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
 {
@@ -118,6 +115,7 @@  void drm_sched_fence_free(struct drm_sched_fence *fence)
 	if (!WARN_ON_ONCE(fence->timeline))
 		kmem_cache_free(sched_fence_slab, fence);
 }
+EXPORT_SYMBOL(drm_sched_fence_free);
 
 /**
  * drm_sched_fence_release_scheduled - callback that fence can be freed
@@ -210,6 +208,9 @@  struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
 {
 	struct drm_sched_fence *fence = NULL;
 
+	if (unlikely(!sched_fence_slab))
+		return NULL;
+
 	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
 	if (fence == NULL)
 		return NULL;
@@ -219,6 +220,7 @@  struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
 
 	return fence;
 }
+EXPORT_SYMBOL(drm_sched_fence_alloc);
 
 void drm_sched_fence_init(struct drm_sched_fence *fence,
 			  struct drm_sched_entity *entity)
@@ -234,9 +236,4 @@  void drm_sched_fence_init(struct drm_sched_fence *fence,
 	dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished,
 		       &fence->lock, entity->fence_context + 1, seq);
 }
-
-module_init(drm_sched_fence_slab_init);
-module_exit(drm_sched_fence_slab_fini);
-
-MODULE_DESCRIPTION("DRM GPU scheduler");
-MODULE_LICENSE("GPL and additional rights");
+EXPORT_SYMBOL(drm_sched_fence_init);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 1e31a9c8ce15..eaaf086eab30 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1467,3 +1467,6 @@  void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
 	queue_work(sched->submit_wq, &sched->work_free_job);
 }
 EXPORT_SYMBOL(drm_sched_wqueue_start);
+
+MODULE_DESCRIPTION("DRM GPU scheduler");
+MODULE_LICENSE("GPL and additional rights");