diff mbox

dma-buf/fence: make fence context 64 bit

Message ID 1463648436-4624-1-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König May 19, 2016, 9 a.m. UTC
From: Christian König <christian.koenig@amd.com>

Fence contexts are created on the fly (for example) by the GPU scheduler used
in the amdgpu driver as a result of an userspace request. Because of this
userspace could in theory force a wrap around of the 32bit context number
if it doesn't behave well.

Avoid this by increasing the context number to 64bits. This way even when
userspace manages to allocate a billion contexts per second it takes more
than 500 years for the context number to wrap around.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/fence.c                 | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h   | 2 +-
 drivers/gpu/drm/nouveau/nouveau_fence.h | 3 ++-
 drivers/gpu/drm/radeon/radeon.h         | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   | 2 +-
 drivers/staging/android/sync.h          | 3 ++-
 include/linux/fence.h                   | 7 ++++---
 8 files changed, 16 insertions(+), 13 deletions(-)

Comments

Daniel Vetter May 19, 2016, 9:14 a.m. UTC | #1
On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
> 
> Fence contexts are created on the fly (for example) by the GPU scheduler used
> in the amdgpu driver as a result of an userspace request. Because of this
> userspace could in theory force a wrap around of the 32bit context number
> if it doesn't behave well.
> 
> Avoid this by increasing the context number to 64bits. This way even when
> userspace manages to allocate a billion contexts per second it takes more
> than 500 years for the context number to wrap around.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Hm, I think it'd be nice to wrap this up into a real struct and then
manage them with some idr or whatever. For debugging we might also want to
keep track of all fences on a given timeline and similar things, so
there will be a need for this in the future.

So if you go through every driver I think it's better to replace the type
with struct fence_context *context while we're at it. Makes it a notch
bigger since we need to add a little bit of error handling to all callers
of fence_context_alloc.

Volunteered? ;-)

Cheers, Daniel
> ---
>  drivers/dma-buf/fence.c                 | 8 ++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h   | 2 +-
>  drivers/gpu/drm/nouveau/nouveau_fence.h | 3 ++-
>  drivers/gpu/drm/radeon/radeon.h         | 2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   | 2 +-
>  drivers/staging/android/sync.h          | 3 ++-
>  include/linux/fence.h                   | 7 ++++---
>  8 files changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
> index 7b05dbe..4d51f9e 100644
> --- a/drivers/dma-buf/fence.c
> +++ b/drivers/dma-buf/fence.c
> @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit);
>   * context or not. One device can have multiple separate contexts,
>   * and they're used if some engine can run independently of another.
>   */
> -static atomic_t fence_context_counter = ATOMIC_INIT(0);
> +static atomic64_t fence_context_counter = ATOMIC64_INIT(0);
>  
>  /**
>   * fence_context_alloc - allocate an array of fence contexts
> @@ -44,10 +44,10 @@ static atomic_t fence_context_counter = ATOMIC_INIT(0);
>   * This function will return the first index of the number of fences allocated.
>   * The fence context is used for setting fence->context to a unique number.
>   */
> -unsigned fence_context_alloc(unsigned num)
> +u64 fence_context_alloc(unsigned num)
>  {
>  	BUG_ON(!num);
> -	return atomic_add_return(num, &fence_context_counter) - num;
> +	return atomic64_add_return(num, &fence_context_counter) - num;
>  }
>  EXPORT_SYMBOL(fence_context_alloc);
>  
> @@ -513,7 +513,7 @@ EXPORT_SYMBOL(fence_wait_any_timeout);
>   */
>  void
>  fence_init(struct fence *fence, const struct fence_ops *ops,
> -	     spinlock_t *lock, unsigned context, unsigned seqno)
> +	     spinlock_t *lock, u64 context, unsigned seqno)
>  {
>  	BUG_ON(!lock);
>  	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 992f00b..da3d021 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -2032,7 +2032,7 @@ struct amdgpu_device {
>  	struct amdgpu_irq_src		hpd_irq;
>  
>  	/* rings */
> -	unsigned			fence_context;
> +	u64				fence_context;
>  	unsigned			num_rings;
>  	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
>  	bool				ib_pool_ready;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index f5321e2..a69cdd5 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -125,7 +125,7 @@ struct etnaviv_gpu {
>  	u32 completed_fence;
>  	u32 retired_fence;
>  	wait_queue_head_t fence_event;
> -	unsigned int fence_context;
> +	u64 fence_context;
>  	spinlock_t fence_spinlock;
>  
>  	/* worker for handling active-list retiring: */
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
> index 2e3a62d..64c4ce7 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> @@ -57,7 +57,8 @@ struct nouveau_fence_priv {
>  	int  (*context_new)(struct nouveau_channel *);
>  	void (*context_del)(struct nouveau_channel *);
>  
> -	u32 contexts, context_base;
> +	u32 contexts;
> +	u64 context_base;
>  	bool uevent;
>  };
>  
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 80b24a4..5633ee3 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2386,7 +2386,7 @@ struct radeon_device {
>  	struct radeon_mman		mman;
>  	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
>  	wait_queue_head_t		fence_queue;
> -	unsigned			fence_context;
> +	u64				fence_context;
>  	struct mutex			ring_lock;
>  	struct radeon_ring		ring[RADEON_NUM_RINGS];
>  	bool				ib_pool_ready;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index e959df6..26ac8e8 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -46,7 +46,7 @@ struct vmw_fence_manager {
>  	bool goal_irq_on; /* Protected by @goal_irq_mutex */
>  	bool seqno_valid; /* Protected by @lock, and may not be set to true
>  			     without the @goal_irq_mutex held. */
> -	unsigned ctx;
> +	u64 ctx;
>  };
>  
>  struct vmw_user_fence {
> diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
> index d2a1734..ef1f7d4 100644
> --- a/drivers/staging/android/sync.h
> +++ b/drivers/staging/android/sync.h
> @@ -68,7 +68,8 @@ struct sync_timeline {
>  
>  	/* protected by child_list_lock */
>  	bool			destroyed;
> -	int			context, value;
> +	u64			context;
> +	int			value;
>  
>  	struct list_head	child_list_head;
>  	spinlock_t		child_list_lock;
> diff --git a/include/linux/fence.h b/include/linux/fence.h
> index 2b17698..350caaa 100644
> --- a/include/linux/fence.h
> +++ b/include/linux/fence.h
> @@ -75,7 +75,8 @@ struct fence {
>  	struct rcu_head rcu;
>  	struct list_head cb_list;
>  	spinlock_t *lock;
> -	unsigned context, seqno;
> +	u64 context;
> +	unsigned seqno;
>  	unsigned long flags;
>  	ktime_t timestamp;
>  	int status;
> @@ -178,7 +179,7 @@ struct fence_ops {
>  };
>  
>  void fence_init(struct fence *fence, const struct fence_ops *ops,
> -		spinlock_t *lock, unsigned context, unsigned seqno);
> +		spinlock_t *lock, u64 context, unsigned seqno);
>  
>  void fence_release(struct kref *kref);
>  void fence_free(struct fence *fence);
> @@ -352,7 +353,7 @@ static inline signed long fence_wait(struct fence *fence, bool intr)
>  	return ret < 0 ? ret : 0;
>  }
>  
> -unsigned fence_context_alloc(unsigned num);
> +u64 fence_context_alloc(unsigned num);
>  
>  #define FENCE_TRACE(f, fmt, args...) \
>  	do {								\
> -- 
> 2.5.0
>
Christian König May 19, 2016, 9:30 a.m. UTC | #2
Am 19.05.2016 um 11:14 schrieb Daniel Vetter:
> On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote:
>> From: Christian König <christian.koenig@amd.com>
>>
>> Fence contexts are created on the fly (for example) by the GPU scheduler used
>> in the amdgpu driver as a result of an userspace request. Because of this
>> userspace could in theory force a wrap around of the 32bit context number
>> if it doesn't behave well.
>>
>> Avoid this by increasing the context number to 64bits. This way even when
>> userspace manages to allocate a billion contexts per second it takes more
>> than 500 years for the context number to wrap around.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Hm, I think it'd be nice to wrap this up into a real struct and then
> manage them with some idr or whatever. For debugging we might also want to
> keep track of all fences on a given timeline and similar things, so
> there will be a need for this in the future.
>
> So if you go through every driver I think it's better to replace the type
> with struct fence_context *context while we're at it. Makes it a notch
> bigger since we need to add a little bit of error handling to all callers
> of fence_context_alloc.
>
> Volunteered? ;-)

Well, that's exactly what I wanted to avoid. 64bit numbers are fast to 
allocate and easy to compare.

If I make it a structure then we would need to kmalloc() it and make 
sure it is reference counted so it stays alive as long as any fence 
structure is alive which is referring to it.

The overhead sounds to much to me, especially since we currently don't 
have a real use for that right now.

Christian.

>
> Cheers, Daniel
>> ---
>>   drivers/dma-buf/fence.c                 | 8 ++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 2 +-
>>   drivers/gpu/drm/etnaviv/etnaviv_gpu.h   | 2 +-
>>   drivers/gpu/drm/nouveau/nouveau_fence.h | 3 ++-
>>   drivers/gpu/drm/radeon/radeon.h         | 2 +-
>>   drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   | 2 +-
>>   drivers/staging/android/sync.h          | 3 ++-
>>   include/linux/fence.h                   | 7 ++++---
>>   8 files changed, 16 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
>> index 7b05dbe..4d51f9e 100644
>> --- a/drivers/dma-buf/fence.c
>> +++ b/drivers/dma-buf/fence.c
>> @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit);
>>    * context or not. One device can have multiple separate contexts,
>>    * and they're used if some engine can run independently of another.
>>    */
>> -static atomic_t fence_context_counter = ATOMIC_INIT(0);
>> +static atomic64_t fence_context_counter = ATOMIC64_INIT(0);
>>   
>>   /**
>>    * fence_context_alloc - allocate an array of fence contexts
>> @@ -44,10 +44,10 @@ static atomic_t fence_context_counter = ATOMIC_INIT(0);
>>    * This function will return the first index of the number of fences allocated.
>>    * The fence context is used for setting fence->context to a unique number.
>>    */
>> -unsigned fence_context_alloc(unsigned num)
>> +u64 fence_context_alloc(unsigned num)
>>   {
>>   	BUG_ON(!num);
>> -	return atomic_add_return(num, &fence_context_counter) - num;
>> +	return atomic64_add_return(num, &fence_context_counter) - num;
>>   }
>>   EXPORT_SYMBOL(fence_context_alloc);
>>   
>> @@ -513,7 +513,7 @@ EXPORT_SYMBOL(fence_wait_any_timeout);
>>    */
>>   void
>>   fence_init(struct fence *fence, const struct fence_ops *ops,
>> -	     spinlock_t *lock, unsigned context, unsigned seqno)
>> +	     spinlock_t *lock, u64 context, unsigned seqno)
>>   {
>>   	BUG_ON(!lock);
>>   	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 992f00b..da3d021 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -2032,7 +2032,7 @@ struct amdgpu_device {
>>   	struct amdgpu_irq_src		hpd_irq;
>>   
>>   	/* rings */
>> -	unsigned			fence_context;
>> +	u64				fence_context;
>>   	unsigned			num_rings;
>>   	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
>>   	bool				ib_pool_ready;
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> index f5321e2..a69cdd5 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
>> @@ -125,7 +125,7 @@ struct etnaviv_gpu {
>>   	u32 completed_fence;
>>   	u32 retired_fence;
>>   	wait_queue_head_t fence_event;
>> -	unsigned int fence_context;
>> +	u64 fence_context;
>>   	spinlock_t fence_spinlock;
>>   
>>   	/* worker for handling active-list retiring: */
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
>> index 2e3a62d..64c4ce7 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
>> @@ -57,7 +57,8 @@ struct nouveau_fence_priv {
>>   	int  (*context_new)(struct nouveau_channel *);
>>   	void (*context_del)(struct nouveau_channel *);
>>   
>> -	u32 contexts, context_base;
>> +	u32 contexts;
>> +	u64 context_base;
>>   	bool uevent;
>>   };
>>   
>> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
>> index 80b24a4..5633ee3 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -2386,7 +2386,7 @@ struct radeon_device {
>>   	struct radeon_mman		mman;
>>   	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
>>   	wait_queue_head_t		fence_queue;
>> -	unsigned			fence_context;
>> +	u64				fence_context;
>>   	struct mutex			ring_lock;
>>   	struct radeon_ring		ring[RADEON_NUM_RINGS];
>>   	bool				ib_pool_ready;
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
>> index e959df6..26ac8e8 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
>> @@ -46,7 +46,7 @@ struct vmw_fence_manager {
>>   	bool goal_irq_on; /* Protected by @goal_irq_mutex */
>>   	bool seqno_valid; /* Protected by @lock, and may not be set to true
>>   			     without the @goal_irq_mutex held. */
>> -	unsigned ctx;
>> +	u64 ctx;
>>   };
>>   
>>   struct vmw_user_fence {
>> diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
>> index d2a1734..ef1f7d4 100644
>> --- a/drivers/staging/android/sync.h
>> +++ b/drivers/staging/android/sync.h
>> @@ -68,7 +68,8 @@ struct sync_timeline {
>>   
>>   	/* protected by child_list_lock */
>>   	bool			destroyed;
>> -	int			context, value;
>> +	u64			context;
>> +	int			value;
>>   
>>   	struct list_head	child_list_head;
>>   	spinlock_t		child_list_lock;
>> diff --git a/include/linux/fence.h b/include/linux/fence.h
>> index 2b17698..350caaa 100644
>> --- a/include/linux/fence.h
>> +++ b/include/linux/fence.h
>> @@ -75,7 +75,8 @@ struct fence {
>>   	struct rcu_head rcu;
>>   	struct list_head cb_list;
>>   	spinlock_t *lock;
>> -	unsigned context, seqno;
>> +	u64 context;
>> +	unsigned seqno;
>>   	unsigned long flags;
>>   	ktime_t timestamp;
>>   	int status;
>> @@ -178,7 +179,7 @@ struct fence_ops {
>>   };
>>   
>>   void fence_init(struct fence *fence, const struct fence_ops *ops,
>> -		spinlock_t *lock, unsigned context, unsigned seqno);
>> +		spinlock_t *lock, u64 context, unsigned seqno);
>>   
>>   void fence_release(struct kref *kref);
>>   void fence_free(struct fence *fence);
>> @@ -352,7 +353,7 @@ static inline signed long fence_wait(struct fence *fence, bool intr)
>>   	return ret < 0 ? ret : 0;
>>   }
>>   
>> -unsigned fence_context_alloc(unsigned num);
>> +u64 fence_context_alloc(unsigned num);
>>   
>>   #define FENCE_TRACE(f, fmt, args...) \
>>   	do {								\
>> -- 
>> 2.5.0
>>
Daniel Vetter May 19, 2016, 12:07 p.m. UTC | #3
On Thu, May 19, 2016 at 11:30 AM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 19.05.2016 um 11:14 schrieb Daniel Vetter:
>>
>> On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote:
>>>
>>> From: Christian König <christian.koenig@amd.com>
>>>
>>> Fence contexts are created on the fly (for example) by the GPU scheduler
>>> used
>>> in the amdgpu driver as a result of an userspace request. Because of this
>>> userspace could in theory force a wrap around of the 32bit context number
>>> if it doesn't behave well.
>>>
>>> Avoid this by increasing the context number to 64bits. This way even when
>>> userspace manages to allocate a billion contexts per second it takes more
>>> than 500 years for the context number to wrap around.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>
>> Hm, I think it'd be nice to wrap this up into a real struct and then
>> manage them with some idr or whatever. For debugging we might also want to
>> keep track of all fences on a given timeline and similar things, so
>> there will be a need for this in the future.
>>
>> So if you go through every driver I think it's better to replace the type
>> with struct fence_context *context while we're at it. Makes it a notch
>> bigger since we need to add a little bit of error handling to all callers
>> of fence_context_alloc.
>>
>> Volunteered? ;-)
>
>
> Well, that's exactly what I wanted to avoid. 64bit numbers are fast to
> allocate and easy to compare.
>
> If I make it a structure then we would need to kmalloc() it and make sure it
> is reference counted so it stays alive as long as any fence structure is
> alive which is referring to it.
>
> The overhead sounds to much to me, especially since we currently don't have
> a real use for that right now.

Hm, I guess if you're worried about the kmalloc we could make
fence_context embeddable. At least I assume you have to allcate
something somewhere already to store the u64, and that something also
needs to be refcounted already (or cleaned up suitably) to make sure
it doesn't disappear before the fences go away.

I'm just raising this because the longer we wait with redoing this
interface the more painful it'll be. Android at least does have a
full-blown struct, and the reason is exclusively for debugging. And
from what I've heard from android devs debugging fence lockups is a
complete pain. That's why I think sooner or later there's no way
around a full blown struct.
-Daniel
Christian König May 19, 2016, 12:21 p.m. UTC | #4
Am 19.05.2016 um 14:07 schrieb Daniel Vetter:
> On Thu, May 19, 2016 at 11:30 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Am 19.05.2016 um 11:14 schrieb Daniel Vetter:
>>> On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote:
>>>> From: Christian König <christian.koenig@amd.com>
>>>>
>>>> Fence contexts are created on the fly (for example) by the GPU scheduler
>>>> used
>>>> in the amdgpu driver as a result of an userspace request. Because of this
>>>> userspace could in theory force a wrap around of the 32bit context number
>>>> if it doesn't behave well.
>>>>
>>>> Avoid this by increasing the context number to 64bits. This way even when
>>>> userspace manages to allocate a billion contexts per second it takes more
>>>> than 500 years for the context number to wrap around.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Hm, I think it'd be nice to wrap this up into a real struct and then
>>> manage them with some idr or whatever. For debugging we might also want to
>>> keep track of all fences on a given timeline and similar things, so
>>> there will be a need for this in the future.
>>>
>>> So if you go through every driver I think it's better to replace the type
>>> with struct fence_context *context while we're at it. Makes it a notch
>>> bigger since we need to add a little bit of error handling to all callers
>>> of fence_context_alloc.
>>>
>>> Volunteered? ;-)
>>
>> Well, that's exactly what I wanted to avoid. 64bit numbers are fast to
>> allocate and easy to compare.
>>
>> If I make it a structure then we would need to kmalloc() it and make sure it
>> is reference counted so it stays alive as long as any fence structure is
>> alive which is referring to it.
>>
>> The overhead sounds to much to me, especially since we currently don't have
>> a real use for that right now.
> Hm, I guess if you're worried about the kmalloc we could make
> fence_context embeddable. At least I assume you have to allcate
> something somewhere already to store the u64, and that something also
> needs to be refcounted already (or cleaned up suitably) to make sure
> it doesn't disappear before the fences go away.

Nope, while it's true that we allocate the fence context with the 
command submission context in an IOCTL for amdgpu freeing the command 
submission context happens while fences using the fence context are 
still around. E.g. the application could have dies, but some hardware 
operations are still under way referencing the context.

Keeping it as a structure can cause all kind of problems in OOM 
situations. For example we don't have a limit on the number of contexts 
created. E.g. an application could allocate a lot of command submissions 
context, submits a single waiting command and exits.

> I'm just raising this because the longer we wait with redoing this
> interface the more painful it'll be. Android at least does have a
> full-blown struct, and the reason is exclusively for debugging. And
> from what I've heard from android devs debugging fence lockups is a
> complete pain. That's why I think sooner or later there's no way
> around a full blown struct.
I actually don't think so. Having the context as a simple number gives 
much greater flexibility to us because we don't need to worry about any 
overhead.

For debugging you can just put the context number into a hashtable or 
tree if you need to store additional information to it.

Christian.

> -Daniel
Daniel Vetter May 19, 2016, 1:30 p.m. UTC | #5
On Thu, May 19, 2016 at 02:21:43PM +0200, Christian König wrote:
> Am 19.05.2016 um 14:07 schrieb Daniel Vetter:
> >On Thu, May 19, 2016 at 11:30 AM, Christian König
> ><deathsimple@vodafone.de> wrote:
> >>Am 19.05.2016 um 11:14 schrieb Daniel Vetter:
> >>>On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote:
> >>>>From: Christian König <christian.koenig@amd.com>
> >>>>
> >>>>Fence contexts are created on the fly (for example) by the GPU scheduler
> >>>>used
> >>>>in the amdgpu driver as a result of an userspace request. Because of this
> >>>>userspace could in theory force a wrap around of the 32bit context number
> >>>>if it doesn't behave well.
> >>>>
> >>>>Avoid this by increasing the context number to 64bits. This way even when
> >>>>userspace manages to allocate a billion contexts per second it takes more
> >>>>than 500 years for the context number to wrap around.
> >>>>
> >>>>Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>Hm, I think it'd be nice to wrap this up into a real struct and then
> >>>manage them with some idr or whatever. For debugging we might also want to
> >>>keep track of all fences on a given timeline and similar things, so
> >>>there will be a need for this in the future.
> >>>
> >>>So if you go through every driver I think it's better to replace the type
> >>>with struct fence_context *context while we're at it. Makes it a notch
> >>>bigger since we need to add a little bit of error handling to all callers
> >>>of fence_context_alloc.
> >>>
> >>>Volunteered? ;-)
> >>
> >>Well, that's exactly what I wanted to avoid. 64bit numbers are fast to
> >>allocate and easy to compare.
> >>
> >>If I make it a structure then we would need to kmalloc() it and make sure it
> >>is reference counted so it stays alive as long as any fence structure is
> >>alive which is referring to it.
> >>
> >>The overhead sounds to much to me, especially since we currently don't have
> >>a real use for that right now.
> >Hm, I guess if you're worried about the kmalloc we could make
> >fence_context embeddable. At least I assume you have to allcate
> >something somewhere already to store the u64, and that something also
> >needs to be refcounted already (or cleaned up suitably) to make sure
> >it doesn't disappear before the fences go away.
> 
> Nope, while it's true that we allocate the fence context with the command
> submission context in an IOCTL for amdgpu freeing the command submission
> context happens while fences using the fence context are still around. E.g.
> the application could have dies, but some hardware operations are still
> under way referencing the context.
> 
> Keeping it as a structure can cause all kind of problems in OOM situations.
> For example we don't have a limit on the number of contexts created. E.g. an
> application could allocate a lot of command submissions context, submits a
> single waiting command and exits.
> 
> >I'm just raising this because the longer we wait with redoing this
> >interface the more painful it'll be. Android at least does have a
> >full-blown struct, and the reason is exclusively for debugging. And
> >from what I've heard from android devs debugging fence lockups is a
> >complete pain. That's why I think sooner or later there's no way
> >around a full blown struct.
> I actually don't think so. Having the context as a simple number gives much
> greater flexibility to us because we don't need to worry about any overhead.
> 
> For debugging you can just put the context number into a hashtable or tree
> if you need to store additional information to it.

The trouble is that you then still have the lifetime issues. But I guess
we can fix this later on, so Ack.
-Daniel
diff mbox

Patch

diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 7b05dbe..4d51f9e 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -35,7 +35,7 @@  EXPORT_TRACEPOINT_SYMBOL(fence_emit);
  * context or not. One device can have multiple separate contexts,
  * and they're used if some engine can run independently of another.
  */
-static atomic_t fence_context_counter = ATOMIC_INIT(0);
+static atomic64_t fence_context_counter = ATOMIC64_INIT(0);
 
 /**
  * fence_context_alloc - allocate an array of fence contexts
@@ -44,10 +44,10 @@  static atomic_t fence_context_counter = ATOMIC_INIT(0);
  * This function will return the first index of the number of fences allocated.
  * The fence context is used for setting fence->context to a unique number.
  */
-unsigned fence_context_alloc(unsigned num)
+u64 fence_context_alloc(unsigned num)
 {
 	BUG_ON(!num);
-	return atomic_add_return(num, &fence_context_counter) - num;
+	return atomic64_add_return(num, &fence_context_counter) - num;
 }
 EXPORT_SYMBOL(fence_context_alloc);
 
@@ -513,7 +513,7 @@  EXPORT_SYMBOL(fence_wait_any_timeout);
  */
 void
 fence_init(struct fence *fence, const struct fence_ops *ops,
-	     spinlock_t *lock, unsigned context, unsigned seqno)
+	     spinlock_t *lock, u64 context, unsigned seqno)
 {
 	BUG_ON(!lock);
 	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 992f00b..da3d021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2032,7 +2032,7 @@  struct amdgpu_device {
 	struct amdgpu_irq_src		hpd_irq;
 
 	/* rings */
-	unsigned			fence_context;
+	u64				fence_context;
 	unsigned			num_rings;
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index f5321e2..a69cdd5 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -125,7 +125,7 @@  struct etnaviv_gpu {
 	u32 completed_fence;
 	u32 retired_fence;
 	wait_queue_head_t fence_event;
-	unsigned int fence_context;
+	u64 fence_context;
 	spinlock_t fence_spinlock;
 
 	/* worker for handling active-list retiring: */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 2e3a62d..64c4ce7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -57,7 +57,8 @@  struct nouveau_fence_priv {
 	int  (*context_new)(struct nouveau_channel *);
 	void (*context_del)(struct nouveau_channel *);
 
-	u32 contexts, context_base;
+	u32 contexts;
+	u64 context_base;
 	bool uevent;
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 80b24a4..5633ee3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2386,7 +2386,7 @@  struct radeon_device {
 	struct radeon_mman		mman;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
 	wait_queue_head_t		fence_queue;
-	unsigned			fence_context;
+	u64				fence_context;
 	struct mutex			ring_lock;
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	bool				ib_pool_ready;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index e959df6..26ac8e8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -46,7 +46,7 @@  struct vmw_fence_manager {
 	bool goal_irq_on; /* Protected by @goal_irq_mutex */
 	bool seqno_valid; /* Protected by @lock, and may not be set to true
 			     without the @goal_irq_mutex held. */
-	unsigned ctx;
+	u64 ctx;
 };
 
 struct vmw_user_fence {
diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
index d2a1734..ef1f7d4 100644
--- a/drivers/staging/android/sync.h
+++ b/drivers/staging/android/sync.h
@@ -68,7 +68,8 @@  struct sync_timeline {
 
 	/* protected by child_list_lock */
 	bool			destroyed;
-	int			context, value;
+	u64			context;
+	int			value;
 
 	struct list_head	child_list_head;
 	spinlock_t		child_list_lock;
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 2b17698..350caaa 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -75,7 +75,8 @@  struct fence {
 	struct rcu_head rcu;
 	struct list_head cb_list;
 	spinlock_t *lock;
-	unsigned context, seqno;
+	u64 context;
+	unsigned seqno;
 	unsigned long flags;
 	ktime_t timestamp;
 	int status;
@@ -178,7 +179,7 @@  struct fence_ops {
 };
 
 void fence_init(struct fence *fence, const struct fence_ops *ops,
-		spinlock_t *lock, unsigned context, unsigned seqno);
+		spinlock_t *lock, u64 context, unsigned seqno);
 
 void fence_release(struct kref *kref);
 void fence_free(struct fence *fence);
@@ -352,7 +353,7 @@  static inline signed long fence_wait(struct fence *fence, bool intr)
 	return ret < 0 ? ret : 0;
 }
 
-unsigned fence_context_alloc(unsigned num);
+u64 fence_context_alloc(unsigned num);
 
 #define FENCE_TRACE(f, fmt, args...) \
 	do {								\