Message ID | 1463648436-4624-1-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > Fence contexts are created on the fly (for example) by the GPU scheduler used > in the amdgpu driver as a result of an userspace request. Because of this > userspace could in theory force a wrap around of the 32bit context number > if it doesn't behave well. > > Avoid this by increasing the context number to 64bits. This way even when > userspace manages to allocate a billion contexts per second it takes more > than 500 years for the context number to wrap around. > > Signed-off-by: Christian König <christian.koenig@amd.com> Hm, I think it'd be nice to wrap this up into a real struct and then manage them with some idr or whatever. For debugging we might also want to keep track of all fences on a given timeline and similar things, so there will be a need for this in the future. So if you go through every driver I think it's better to replace the type with struct fence_context *context while we're at it. Makes it a notch bigger since we need to add a little bit of error handling to all callers of fence_context_alloc. Volunteered? ;-) Cheers, Daniel > --- > drivers/dma-buf/fence.c | 8 ++++---- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- > drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 2 +- > drivers/gpu/drm/nouveau/nouveau_fence.h | 3 ++- > drivers/gpu/drm/radeon/radeon.h | 2 +- > drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- > drivers/staging/android/sync.h | 3 ++- > include/linux/fence.h | 7 ++++--- > 8 files changed, 16 insertions(+), 13 deletions(-) > > diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c > index 7b05dbe..4d51f9e 100644 > --- a/drivers/dma-buf/fence.c > +++ b/drivers/dma-buf/fence.c > @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit); > * context or not. One device can have multiple separate contexts, > * and they're used if some engine can run independently of another. > */ > -static atomic_t fence_context_counter = ATOMIC_INIT(0); > +static atomic64_t fence_context_counter = ATOMIC64_INIT(0); > > /** > * fence_context_alloc - allocate an array of fence contexts > @@ -44,10 +44,10 @@ static atomic_t fence_context_counter = ATOMIC_INIT(0); > * This function will return the first index of the number of fences allocated. > * The fence context is used for setting fence->context to a unique number. > */ > -unsigned fence_context_alloc(unsigned num) > +u64 fence_context_alloc(unsigned num) > { > BUG_ON(!num); > - return atomic_add_return(num, &fence_context_counter) - num; > + return atomic64_add_return(num, &fence_context_counter) - num; > } > EXPORT_SYMBOL(fence_context_alloc); > > @@ -513,7 +513,7 @@ EXPORT_SYMBOL(fence_wait_any_timeout); > */ > void > fence_init(struct fence *fence, const struct fence_ops *ops, > - spinlock_t *lock, unsigned context, unsigned seqno) > + spinlock_t *lock, u64 context, unsigned seqno) > { > BUG_ON(!lock); > BUG_ON(!ops || !ops->wait || !ops->enable_signaling || > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 992f00b..da3d021 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -2032,7 +2032,7 @@ struct amdgpu_device { > struct amdgpu_irq_src hpd_irq; > > /* rings */ > - unsigned fence_context; > + u64 fence_context; > unsigned num_rings; > struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; > bool ib_pool_ready; > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > index f5321e2..a69cdd5 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h > @@ -125,7 +125,7 @@ struct etnaviv_gpu { > u32 completed_fence; > u32 retired_fence; > wait_queue_head_t fence_event; > - unsigned int fence_context; > + u64 fence_context; > spinlock_t fence_spinlock; > > /* worker for handling active-list retiring: */ > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h > index 2e3a62d..64c4ce7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h > @@ -57,7 +57,8 @@ struct nouveau_fence_priv { > int (*context_new)(struct nouveau_channel *); > void (*context_del)(struct nouveau_channel *); > > - u32 contexts, context_base; > + u32 contexts; > + u64 context_base; > bool uevent; > }; > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 80b24a4..5633ee3 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2386,7 +2386,7 @@ struct radeon_device { > struct radeon_mman mman; > struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; > wait_queue_head_t fence_queue; > - unsigned fence_context; > + u64 fence_context; > struct mutex ring_lock; > struct radeon_ring ring[RADEON_NUM_RINGS]; > bool ib_pool_ready; > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c > index e959df6..26ac8e8 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c > @@ -46,7 +46,7 @@ struct vmw_fence_manager { > bool goal_irq_on; /* Protected by @goal_irq_mutex */ > bool seqno_valid; /* Protected by @lock, and may not be set to true > without the @goal_irq_mutex held. */ > - unsigned ctx; > + u64 ctx; > }; > > struct vmw_user_fence { > diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h > index d2a1734..ef1f7d4 100644 > --- a/drivers/staging/android/sync.h > +++ b/drivers/staging/android/sync.h > @@ -68,7 +68,8 @@ struct sync_timeline { > > /* protected by child_list_lock */ > bool destroyed; > - int context, value; > + u64 context; > + int value; > > struct list_head child_list_head; > spinlock_t child_list_lock; > diff --git a/include/linux/fence.h b/include/linux/fence.h > index 2b17698..350caaa 100644 > --- a/include/linux/fence.h > +++ b/include/linux/fence.h > @@ -75,7 +75,8 @@ struct fence { > struct rcu_head rcu; > struct list_head cb_list; > spinlock_t *lock; > - unsigned context, seqno; > + u64 context; > + unsigned seqno; > unsigned long flags; > ktime_t timestamp; > int status; > @@ -178,7 +179,7 @@ struct fence_ops { > }; > > void fence_init(struct fence *fence, const struct fence_ops *ops, > - spinlock_t *lock, unsigned context, unsigned seqno); > + spinlock_t *lock, u64 context, unsigned seqno); > > void fence_release(struct kref *kref); > void fence_free(struct fence *fence); > @@ -352,7 +353,7 @@ static inline signed long fence_wait(struct fence *fence, bool intr) > return ret < 0 ? ret : 0; > } > > -unsigned fence_context_alloc(unsigned num); > +u64 fence_context_alloc(unsigned num); > > #define FENCE_TRACE(f, fmt, args...) \ > do { \ > -- > 2.5.0 >
Am 19.05.2016 um 11:14 schrieb Daniel Vetter: > On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote: >> From: Christian König <christian.koenig@amd.com> >> >> Fence contexts are created on the fly (for example) by the GPU scheduler used >> in the amdgpu driver as a result of an userspace request. Because of this >> userspace could in theory force a wrap around of the 32bit context number >> if it doesn't behave well. >> >> Avoid this by increasing the context number to 64bits. This way even when >> userspace manages to allocate a billion contexts per second it takes more >> than 500 years for the context number to wrap around. >> >> Signed-off-by: Christian König <christian.koenig@amd.com> > Hm, I think it'd be nice to wrap this up into a real struct and then > manage them with some idr or whatever. For debugging we might also want to > keep track of all fences on a given timeline and similar things, so > there will be a need for this in the future. > > So if you go through every driver I think it's better to replace the type > with struct fence_context *context while we're at it. Makes it a notch > bigger since we need to add a little bit of error handling to all callers > of fence_context_alloc. > > Volunteered? ;-) Well, that's exactly what I wanted to avoid. 64bit numbers are fast to allocate and easy to compare. If I make it a structure then we would need to kmalloc() it and make sure it is reference counted so it stays alive as long as any fence structure is alive which is referring to it. The overhead sounds to much to me, especially since we currently don't have a real use for that right now. Christian. > > Cheers, Daniel >> --- >> drivers/dma-buf/fence.c | 8 ++++---- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- >> drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 2 +- >> drivers/gpu/drm/nouveau/nouveau_fence.h | 3 ++- >> drivers/gpu/drm/radeon/radeon.h | 2 +- >> drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- >> drivers/staging/android/sync.h | 3 ++- >> include/linux/fence.h | 7 ++++--- >> 8 files changed, 16 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c >> index 7b05dbe..4d51f9e 100644 >> --- a/drivers/dma-buf/fence.c >> +++ b/drivers/dma-buf/fence.c >> @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit); >> * context or not. One device can have multiple separate contexts, >> * and they're used if some engine can run independently of another. >> */ >> -static atomic_t fence_context_counter = ATOMIC_INIT(0); >> +static atomic64_t fence_context_counter = ATOMIC64_INIT(0); >> >> /** >> * fence_context_alloc - allocate an array of fence contexts >> @@ -44,10 +44,10 @@ static atomic_t fence_context_counter = ATOMIC_INIT(0); >> * This function will return the first index of the number of fences allocated. >> * The fence context is used for setting fence->context to a unique number. >> */ >> -unsigned fence_context_alloc(unsigned num) >> +u64 fence_context_alloc(unsigned num) >> { >> BUG_ON(!num); >> - return atomic_add_return(num, &fence_context_counter) - num; >> + return atomic64_add_return(num, &fence_context_counter) - num; >> } >> EXPORT_SYMBOL(fence_context_alloc); >> >> @@ -513,7 +513,7 @@ EXPORT_SYMBOL(fence_wait_any_timeout); >> */ >> void >> fence_init(struct fence *fence, const struct fence_ops *ops, >> - spinlock_t *lock, unsigned context, unsigned seqno) >> + spinlock_t *lock, u64 context, unsigned seqno) >> { >> BUG_ON(!lock); >> BUG_ON(!ops || !ops->wait || !ops->enable_signaling || >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index 992f00b..da3d021 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -2032,7 +2032,7 @@ struct amdgpu_device { >> struct amdgpu_irq_src hpd_irq; >> >> /* rings */ >> - unsigned fence_context; >> + u64 fence_context; >> unsigned num_rings; >> struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; >> bool ib_pool_ready; >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> index f5321e2..a69cdd5 100644 >> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> @@ -125,7 +125,7 @@ struct etnaviv_gpu { >> u32 completed_fence; >> u32 retired_fence; >> wait_queue_head_t fence_event; >> - unsigned int fence_context; >> + u64 fence_context; >> spinlock_t fence_spinlock; >> >> /* worker for handling active-list retiring: */ >> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h >> index 2e3a62d..64c4ce7 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h >> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h >> @@ -57,7 +57,8 @@ struct nouveau_fence_priv { >> int (*context_new)(struct nouveau_channel *); >> void (*context_del)(struct nouveau_channel *); >> >> - u32 contexts, context_base; >> + u32 contexts; >> + u64 context_base; >> bool uevent; >> }; >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h >> index 80b24a4..5633ee3 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -2386,7 +2386,7 @@ struct radeon_device { >> struct radeon_mman mman; >> struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; >> wait_queue_head_t fence_queue; >> - unsigned fence_context; >> + u64 fence_context; >> struct mutex ring_lock; >> struct radeon_ring ring[RADEON_NUM_RINGS]; >> bool ib_pool_ready; >> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c >> index e959df6..26ac8e8 100644 >> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c >> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c >> @@ -46,7 +46,7 @@ struct vmw_fence_manager { >> bool goal_irq_on; /* Protected by @goal_irq_mutex */ >> bool seqno_valid; /* Protected by @lock, and may not be set to true >> without the @goal_irq_mutex held. */ >> - unsigned ctx; >> + u64 ctx; >> }; >> >> struct vmw_user_fence { >> diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h >> index d2a1734..ef1f7d4 100644 >> --- a/drivers/staging/android/sync.h >> +++ b/drivers/staging/android/sync.h >> @@ -68,7 +68,8 @@ struct sync_timeline { >> >> /* protected by child_list_lock */ >> bool destroyed; >> - int context, value; >> + u64 context; >> + int value; >> >> struct list_head child_list_head; >> spinlock_t child_list_lock; >> diff --git a/include/linux/fence.h b/include/linux/fence.h >> index 2b17698..350caaa 100644 >> --- a/include/linux/fence.h >> +++ b/include/linux/fence.h >> @@ -75,7 +75,8 @@ struct fence { >> struct rcu_head rcu; >> struct list_head cb_list; >> spinlock_t *lock; >> - unsigned context, seqno; >> + u64 context; >> + unsigned seqno; >> unsigned long flags; >> ktime_t timestamp; >> int status; >> @@ -178,7 +179,7 @@ struct fence_ops { >> }; >> >> void fence_init(struct fence *fence, const struct fence_ops *ops, >> - spinlock_t *lock, unsigned context, unsigned seqno); >> + spinlock_t *lock, u64 context, unsigned seqno); >> >> void fence_release(struct kref *kref); >> void fence_free(struct fence *fence); >> @@ -352,7 +353,7 @@ static inline signed long fence_wait(struct fence *fence, bool intr) >> return ret < 0 ? ret : 0; >> } >> >> -unsigned fence_context_alloc(unsigned num); >> +u64 fence_context_alloc(unsigned num); >> >> #define FENCE_TRACE(f, fmt, args...) \ >> do { \ >> -- >> 2.5.0 >>
On Thu, May 19, 2016 at 11:30 AM, Christian König <deathsimple@vodafone.de> wrote: > Am 19.05.2016 um 11:14 schrieb Daniel Vetter: >> >> On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote: >>> >>> From: Christian König <christian.koenig@amd.com> >>> >>> Fence contexts are created on the fly (for example) by the GPU scheduler >>> used >>> in the amdgpu driver as a result of an userspace request. Because of this >>> userspace could in theory force a wrap around of the 32bit context number >>> if it doesn't behave well. >>> >>> Avoid this by increasing the context number to 64bits. This way even when >>> userspace manages to allocate a billion contexts per second it takes more >>> than 500 years for the context number to wrap around. >>> >>> Signed-off-by: Christian König <christian.koenig@amd.com> >> >> Hm, I think it'd be nice to wrap this up into a real struct and then >> manage them with some idr or whatever. For debugging we might also want to >> keep track of all fences on a given timeline and similar things, so >> there will be a need for this in the future. >> >> So if you go through every driver I think it's better to replace the type >> with struct fence_context *context while we're at it. Makes it a notch >> bigger since we need to add a little bit of error handling to all callers >> of fence_context_alloc. >> >> Volunteered? ;-) > > > Well, that's exactly what I wanted to avoid. 64bit numbers are fast to > allocate and easy to compare. > > If I make it a structure then we would need to kmalloc() it and make sure it > is reference counted so it stays alive as long as any fence structure is > alive which is referring to it. > > The overhead sounds to much to me, especially since we currently don't have > a real use for that right now. Hm, I guess if you're worried about the kmalloc we could make fence_context embeddable. At least I assume you have to allcate something somewhere already to store the u64, and that something also needs to be refcounted already (or cleaned up suitably) to make sure it doesn't disappear before the fences go away. I'm just raising this because the longer we wait with redoing this interface the more painful it'll be. Android at least does have a full-blown struct, and the reason is exclusively for debugging. And from what I've heard from android devs debugging fence lockups is a complete pain. That's why I think sooner or later there's no way around a full blown struct. -Daniel
Am 19.05.2016 um 14:07 schrieb Daniel Vetter: > On Thu, May 19, 2016 at 11:30 AM, Christian König > <deathsimple@vodafone.de> wrote: >> Am 19.05.2016 um 11:14 schrieb Daniel Vetter: >>> On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote: >>>> From: Christian König <christian.koenig@amd.com> >>>> >>>> Fence contexts are created on the fly (for example) by the GPU scheduler >>>> used >>>> in the amdgpu driver as a result of an userspace request. Because of this >>>> userspace could in theory force a wrap around of the 32bit context number >>>> if it doesn't behave well. >>>> >>>> Avoid this by increasing the context number to 64bits. This way even when >>>> userspace manages to allocate a billion contexts per second it takes more >>>> than 500 years for the context number to wrap around. >>>> >>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>> Hm, I think it'd be nice to wrap this up into a real struct and then >>> manage them with some idr or whatever. For debugging we might also want to >>> keep track of all fences on a given timeline and similar things, so >>> there will be a need for this in the future. >>> >>> So if you go through every driver I think it's better to replace the type >>> with struct fence_context *context while we're at it. Makes it a notch >>> bigger since we need to add a little bit of error handling to all callers >>> of fence_context_alloc. >>> >>> Volunteered? ;-) >> >> Well, that's exactly what I wanted to avoid. 64bit numbers are fast to >> allocate and easy to compare. >> >> If I make it a structure then we would need to kmalloc() it and make sure it >> is reference counted so it stays alive as long as any fence structure is >> alive which is referring to it. >> >> The overhead sounds to much to me, especially since we currently don't have >> a real use for that right now. > Hm, I guess if you're worried about the kmalloc we could make > fence_context embeddable. At least I assume you have to allcate > something somewhere already to store the u64, and that something also > needs to be refcounted already (or cleaned up suitably) to make sure > it doesn't disappear before the fences go away. Nope, while it's true that we allocate the fence context with the command submission context in an IOCTL for amdgpu freeing the command submission context happens while fences using the fence context are still around. E.g. the application could have dies, but some hardware operations are still under way referencing the context. Keeping it as a structure can cause all kind of problems in OOM situations. For example we don't have a limit on the number of contexts created. E.g. an application could allocate a lot of command submissions context, submits a single waiting command and exits. > I'm just raising this because the longer we wait with redoing this > interface the more painful it'll be. Android at least does have a > full-blown struct, and the reason is exclusively for debugging. And > from what I've heard from android devs debugging fence lockups is a > complete pain. That's why I think sooner or later there's no way > around a full blown struct. I actually don't think so. Having the context as a simple number gives much greater flexibility to us because we don't need to worry about any overhead. For debugging you can just put the context number into a hashtable or tree if you need to store additional information to it. Christian. > -Daniel
On Thu, May 19, 2016 at 02:21:43PM +0200, Christian König wrote: > Am 19.05.2016 um 14:07 schrieb Daniel Vetter: > >On Thu, May 19, 2016 at 11:30 AM, Christian König > ><deathsimple@vodafone.de> wrote: > >>Am 19.05.2016 um 11:14 schrieb Daniel Vetter: > >>>On Thu, May 19, 2016 at 11:00:36AM +0200, Christian König wrote: > >>>>From: Christian König <christian.koenig@amd.com> > >>>> > >>>>Fence contexts are created on the fly (for example) by the GPU scheduler > >>>>used > >>>>in the amdgpu driver as a result of an userspace request. Because of this > >>>>userspace could in theory force a wrap around of the 32bit context number > >>>>if it doesn't behave well. > >>>> > >>>>Avoid this by increasing the context number to 64bits. This way even when > >>>>userspace manages to allocate a billion contexts per second it takes more > >>>>than 500 years for the context number to wrap around. > >>>> > >>>>Signed-off-by: Christian König <christian.koenig@amd.com> > >>>Hm, I think it'd be nice to wrap this up into a real struct and then > >>>manage them with some idr or whatever. For debugging we might also want to > >>>keep track of all fences on a given timeline and similar things, so > >>>there will be a need for this in the future. > >>> > >>>So if you go through every driver I think it's better to replace the type > >>>with struct fence_context *context while we're at it. Makes it a notch > >>>bigger since we need to add a little bit of error handling to all callers > >>>of fence_context_alloc. > >>> > >>>Volunteered? ;-) > >> > >>Well, that's exactly what I wanted to avoid. 64bit numbers are fast to > >>allocate and easy to compare. > >> > >>If I make it a structure then we would need to kmalloc() it and make sure it > >>is reference counted so it stays alive as long as any fence structure is > >>alive which is referring to it. > >> > >>The overhead sounds to much to me, especially since we currently don't have > >>a real use for that right now. > >Hm, I guess if you're worried about the kmalloc we could make > >fence_context embeddable. At least I assume you have to allcate > >something somewhere already to store the u64, and that something also > >needs to be refcounted already (or cleaned up suitably) to make sure > >it doesn't disappear before the fences go away. > > Nope, while it's true that we allocate the fence context with the command > submission context in an IOCTL for amdgpu freeing the command submission > context happens while fences using the fence context are still around. E.g. > the application could have dies, but some hardware operations are still > under way referencing the context. > > Keeping it as a structure can cause all kind of problems in OOM situations. > For example we don't have a limit on the number of contexts created. E.g. an > application could allocate a lot of command submissions context, submits a > single waiting command and exits. > > >I'm just raising this because the longer we wait with redoing this > >interface the more painful it'll be. Android at least does have a > >full-blown struct, and the reason is exclusively for debugging. And > >from what I've heard from android devs debugging fence lockups is a > >complete pain. That's why I think sooner or later there's no way > >around a full blown struct. > I actually don't think so. Having the context as a simple number gives much > greater flexibility to us because we don't need to worry about any overhead. > > For debugging you can just put the context number into a hashtable or tree > if you need to store additional information to it. The trouble is that you then still have the lifetime issues. But I guess we can fix this later on, so Ack. -Daniel
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index 7b05dbe..4d51f9e 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit); * context or not. One device can have multiple separate contexts, * and they're used if some engine can run independently of another. */ -static atomic_t fence_context_counter = ATOMIC_INIT(0); +static atomic64_t fence_context_counter = ATOMIC64_INIT(0); /** * fence_context_alloc - allocate an array of fence contexts @@ -44,10 +44,10 @@ static atomic_t fence_context_counter = ATOMIC_INIT(0); * This function will return the first index of the number of fences allocated. * The fence context is used for setting fence->context to a unique number. */ -unsigned fence_context_alloc(unsigned num) +u64 fence_context_alloc(unsigned num) { BUG_ON(!num); - return atomic_add_return(num, &fence_context_counter) - num; + return atomic64_add_return(num, &fence_context_counter) - num; } EXPORT_SYMBOL(fence_context_alloc); @@ -513,7 +513,7 @@ EXPORT_SYMBOL(fence_wait_any_timeout); */ void fence_init(struct fence *fence, const struct fence_ops *ops, - spinlock_t *lock, unsigned context, unsigned seqno) + spinlock_t *lock, u64 context, unsigned seqno) { BUG_ON(!lock); BUG_ON(!ops || !ops->wait || !ops->enable_signaling || diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 992f00b..da3d021 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2032,7 +2032,7 @@ struct amdgpu_device { struct amdgpu_irq_src hpd_irq; /* rings */ - unsigned fence_context; + u64 fence_context; unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; bool ib_pool_ready; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index f5321e2..a69cdd5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -125,7 +125,7 @@ struct etnaviv_gpu { u32 completed_fence; u32 retired_fence; wait_queue_head_t fence_event; - unsigned int fence_context; + u64 fence_context; spinlock_t fence_spinlock; /* worker for handling active-list retiring: */ diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 2e3a62d..64c4ce7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -57,7 +57,8 @@ struct nouveau_fence_priv { int (*context_new)(struct nouveau_channel *); void (*context_del)(struct nouveau_channel *); - u32 contexts, context_base; + u32 contexts; + u64 context_base; bool uevent; }; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 80b24a4..5633ee3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2386,7 +2386,7 @@ struct radeon_device { struct radeon_mman mman; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; wait_queue_head_t fence_queue; - unsigned fence_context; + u64 fence_context; struct mutex ring_lock; struct radeon_ring ring[RADEON_NUM_RINGS]; bool ib_pool_ready; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index e959df6..26ac8e8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -46,7 +46,7 @@ struct vmw_fence_manager { bool goal_irq_on; /* Protected by @goal_irq_mutex */ bool seqno_valid; /* Protected by @lock, and may not be set to true without the @goal_irq_mutex held. */ - unsigned ctx; + u64 ctx; }; struct vmw_user_fence { diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h index d2a1734..ef1f7d4 100644 --- a/drivers/staging/android/sync.h +++ b/drivers/staging/android/sync.h @@ -68,7 +68,8 @@ struct sync_timeline { /* protected by child_list_lock */ bool destroyed; - int context, value; + u64 context; + int value; struct list_head child_list_head; spinlock_t child_list_lock; diff --git a/include/linux/fence.h b/include/linux/fence.h index 2b17698..350caaa 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -75,7 +75,8 @@ struct fence { struct rcu_head rcu; struct list_head cb_list; spinlock_t *lock; - unsigned context, seqno; + u64 context; + unsigned seqno; unsigned long flags; ktime_t timestamp; int status; @@ -178,7 +179,7 @@ struct fence_ops { }; void fence_init(struct fence *fence, const struct fence_ops *ops, - spinlock_t *lock, unsigned context, unsigned seqno); + spinlock_t *lock, u64 context, unsigned seqno); void fence_release(struct kref *kref); void fence_free(struct fence *fence); @@ -352,7 +353,7 @@ static inline signed long fence_wait(struct fence *fence, bool intr) return ret < 0 ? ret : 0; } -unsigned fence_context_alloc(unsigned num); +u64 fence_context_alloc(unsigned num); #define FENCE_TRACE(f, fmt, args...) \ do { \