Message ID | 20240123072538.1290035-1-airlied@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nouveau: rip out fence irq allow/block sequences. | expand |
On Tue, Jan 23, 2024 at 05:25:38PM +1000, Dave Airlie wrote: > From: Dave Airlie <airlied@redhat.com> > > fences are signalled on nvidia hw using non-stall interrupts. > > non-stall interrupts are not latched from my reading. > > When nouveau emits a fence, it requests a NON_STALL signalling, > but it only calls the interface to allow the non-stall irq to happen > after it has already emitted the fence. A recent change > eacabb546271 ("nouveau: push event block/allowing out of the fence context") > made this worse by pushing out the fence allow/block to a workqueue. > > However I can't see how this could ever work great, since when > enable signalling is called, the semaphore has already been emitted > to the ring, and the hw could already have tried to set the bits, > but it's been masked off. Changing the allowed mask later won't make > the interrupt get called again. > > For now rip all of this out. > > This fixes a bunch of stalls seen running VK CTS sync tests. > > Signed-off-by: Dave Airlie <airlied@redhat.com> > --- > drivers/gpu/drm/nouveau/nouveau_fence.c | 77 +++++-------------------- > drivers/gpu/drm/nouveau/nouveau_fence.h | 2 - > 2 files changed, 16 insertions(+), 63 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c > index 5057d976fa57..d6d50cdccf75 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > @@ -50,24 +50,14 @@ nouveau_fctx(struct nouveau_fence *fence) > return container_of(fence->base.lock, struct nouveau_fence_chan, lock); > } > > -static int > +static void > nouveau_fence_signal(struct nouveau_fence *fence) > { > - int drop = 0; > - > dma_fence_signal_locked(&fence->base); > list_del(&fence->head); > rcu_assign_pointer(fence->channel, NULL); > > - if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { > - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); > - > - if (atomic_dec_and_test(&fctx->notify_ref)) > - drop = 1; > - } > - > dma_fence_put(&fence->base); > - return drop; > } > > static struct nouveau_fence * > @@ -93,8 +83,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) > if (error) > dma_fence_set_error(&fence->base, error); > > - if (nouveau_fence_signal(fence)) > - nvif_event_block(&fctx->event); > + nouveau_fence_signal(fence); > } > fctx->killed = 1; > spin_unlock_irqrestore(&fctx->lock, flags); > @@ -103,8 +92,8 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) > void > nouveau_fence_context_del(struct nouveau_fence_chan *fctx) > { > - cancel_work_sync(&fctx->allow_block_work); > nouveau_fence_context_kill(fctx, 0); > + nvif_event_block(&fctx->event); > nvif_event_dtor(&fctx->event); > fctx->dead = 1; > > @@ -127,11 +116,10 @@ nouveau_fence_context_free(struct nouveau_fence_chan *fctx) > kref_put(&fctx->fence_ref, nouveau_fence_context_put); > } > > -static int > +static void > nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) > { > struct nouveau_fence *fence; > - int drop = 0; > u32 seq = fctx->read(chan); > > while (!list_empty(&fctx->pending)) { > @@ -140,10 +128,8 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc > if ((int)(seq - fence->base.seqno) < 0) > break; > > - drop |= nouveau_fence_signal(fence); > + nouveau_fence_signal(fence); > } > - > - return drop; > } > > static int > @@ -160,26 +146,13 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc > > fence = list_entry(fctx->pending.next, typeof(*fence), head); > chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); > - if (nouveau_fence_update(chan, fctx)) > - ret = NVIF_EVENT_DROP; > + nouveau_fence_update(chan, fctx); > } > spin_unlock_irqrestore(&fctx->lock, flags); > > return ret; > } > > -static void > -nouveau_fence_work_allow_block(struct work_struct *work) > -{ > - struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, > - allow_block_work); > - > - if (atomic_read(&fctx->notify_ref) == 0) > - nvif_event_block(&fctx->event); > - else > - nvif_event_allow(&fctx->event); > -} > - > void > nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) > { > @@ -191,7 +164,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha > } args; > int ret; > > - INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); > INIT_LIST_HEAD(&fctx->flip); > INIT_LIST_HEAD(&fctx->pending); > spin_lock_init(&fctx->lock); > @@ -216,6 +188,12 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha > &args.base, sizeof(args), &fctx->event); > > WARN_ON(ret); > + > + /* > + * Always allow non-stall irq events - previously this code tried to > + * enable/disable them, but that just seems racy as nonstall irqs are unlatched. > + */ > + nvif_event_allow(&fctx->event); > } > > int > @@ -247,8 +225,7 @@ nouveau_fence_emit(struct nouveau_fence *fence) > return -ENODEV; > } > > - if (nouveau_fence_update(chan, fctx)) > - nvif_event_block(&fctx->event); > + nouveau_fence_update(chan, fctx); > > list_add_tail(&fence->head, &fctx->pending); > spin_unlock_irq(&fctx->lock); > @@ -271,8 +248,8 @@ nouveau_fence_done(struct nouveau_fence *fence) > > spin_lock_irqsave(&fctx->lock, flags); > chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); > - if (chan && nouveau_fence_update(chan, fctx)) > - nvif_event_block(&fctx->event); > + if (chan) > + nouveau_fence_update(chan, fctx); > spin_unlock_irqrestore(&fctx->lock, flags); > } > return dma_fence_is_signaled(&fence->base); > @@ -530,32 +507,10 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy = { > .release = nouveau_fence_release > }; > > -static bool nouveau_fence_enable_signaling(struct dma_fence *f) > -{ > - struct nouveau_fence *fence = from_fence(f); > - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); > - bool ret; > - bool do_work; > - > - if (atomic_inc_return(&fctx->notify_ref) == 0) > - do_work = true; > - > - ret = nouveau_fence_no_signaling(f); > - if (ret) > - set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); > - else if (atomic_dec_and_test(&fctx->notify_ref)) > - do_work = true; > - > - if (do_work) > - schedule_work(&fctx->allow_block_work); > - > - return ret; > -} > - > static const struct dma_fence_ops nouveau_fence_ops_uevent = { > .get_driver_name = nouveau_fence_get_get_driver_name, > .get_timeline_name = nouveau_fence_get_timeline_name, > - .enable_signaling = nouveau_fence_enable_signaling, > + .enable_signaling = nouveau_fence_no_signaling, I think you can rip nouveau_fence_no_signaling out too, it doesn't do anything more than what the signalling codepath does too. But maybe separate path since maybe this makes an existing leak more of a sieve, but it really should be an existing one since you cannot assume that someone external will ever look at whether your fence is signalled or not. -Sima > .signaled = nouveau_fence_is_signaled, > .release = nouveau_fence_release > }; > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h > index 28f5cf013b89..380bb0397ed2 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h > @@ -46,8 +46,6 @@ struct nouveau_fence_chan { > char name[32]; > > struct nvif_event event; > - struct work_struct allow_block_work; > - atomic_t notify_ref; > int dead, killed; > }; > > -- > 2.43.0 >
On Fri, 26 Jan 2024 at 04:28, Daniel Vetter <daniel@ffwll.ch> wrote: > > On Tue, Jan 23, 2024 at 05:25:38PM +1000, Dave Airlie wrote: > > From: Dave Airlie <airlied@redhat.com> > > > > fences are signalled on nvidia hw using non-stall interrupts. > > > > non-stall interrupts are not latched from my reading. > > > > When nouveau emits a fence, it requests a NON_STALL signalling, > > but it only calls the interface to allow the non-stall irq to happen > > after it has already emitted the fence. A recent change > > eacabb546271 ("nouveau: push event block/allowing out of the fence context") > > made this worse by pushing out the fence allow/block to a workqueue. > > > > However I can't see how this could ever work great, since when > > enable signalling is called, the semaphore has already been emitted > > to the ring, and the hw could already have tried to set the bits, > > but it's been masked off. Changing the allowed mask later won't make > > the interrupt get called again. > > > > For now rip all of this out. > > > > This fixes a bunch of stalls seen running VK CTS sync tests. > > > > Signed-off-by: Dave Airlie <airlied@redhat.com> > > --- > > drivers/gpu/drm/nouveau/nouveau_fence.c | 77 +++++-------------------- > > drivers/gpu/drm/nouveau/nouveau_fence.h | 2 - > > 2 files changed, 16 insertions(+), 63 deletions(-) > > > > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c > > index 5057d976fa57..d6d50cdccf75 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > > @@ -50,24 +50,14 @@ nouveau_fctx(struct nouveau_fence *fence) > > return container_of(fence->base.lock, struct nouveau_fence_chan, lock); > > } > > > > -static int > > +static void > > nouveau_fence_signal(struct nouveau_fence *fence) > > { > > - int drop = 0; > > - > > dma_fence_signal_locked(&fence->base); > > list_del(&fence->head); > > rcu_assign_pointer(fence->channel, NULL); > > > > - if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { > > - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); > > - > > - if (atomic_dec_and_test(&fctx->notify_ref)) > > - drop = 1; > > - } > > - > > dma_fence_put(&fence->base); > > - return drop; > > } > > > > static struct nouveau_fence * > > @@ -93,8 +83,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) > > if (error) > > dma_fence_set_error(&fence->base, error); > > > > - if (nouveau_fence_signal(fence)) > > - nvif_event_block(&fctx->event); > > + nouveau_fence_signal(fence); > > } > > fctx->killed = 1; > > spin_unlock_irqrestore(&fctx->lock, flags); > > @@ -103,8 +92,8 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) > > void > > nouveau_fence_context_del(struct nouveau_fence_chan *fctx) > > { > > - cancel_work_sync(&fctx->allow_block_work); > > nouveau_fence_context_kill(fctx, 0); > > + nvif_event_block(&fctx->event); > > nvif_event_dtor(&fctx->event); > > fctx->dead = 1; > > > > @@ -127,11 +116,10 @@ nouveau_fence_context_free(struct nouveau_fence_chan *fctx) > > kref_put(&fctx->fence_ref, nouveau_fence_context_put); > > } > > > > -static int > > +static void > > nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) > > { > > struct nouveau_fence *fence; > > - int drop = 0; > > u32 seq = fctx->read(chan); > > > > while (!list_empty(&fctx->pending)) { > > @@ -140,10 +128,8 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc > > if ((int)(seq - fence->base.seqno) < 0) > > break; > > > > - drop |= nouveau_fence_signal(fence); > > + nouveau_fence_signal(fence); > > } > > - > > - return drop; > > } > > > > static int > > @@ -160,26 +146,13 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc > > > > fence = list_entry(fctx->pending.next, typeof(*fence), head); > > chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); > > - if (nouveau_fence_update(chan, fctx)) > > - ret = NVIF_EVENT_DROP; > > + nouveau_fence_update(chan, fctx); > > } > > spin_unlock_irqrestore(&fctx->lock, flags); > > > > return ret; > > } > > > > -static void > > -nouveau_fence_work_allow_block(struct work_struct *work) > > -{ > > - struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, > > - allow_block_work); > > - > > - if (atomic_read(&fctx->notify_ref) == 0) > > - nvif_event_block(&fctx->event); > > - else > > - nvif_event_allow(&fctx->event); > > -} > > - > > void > > nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) > > { > > @@ -191,7 +164,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha > > } args; > > int ret; > > > > - INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); > > INIT_LIST_HEAD(&fctx->flip); > > INIT_LIST_HEAD(&fctx->pending); > > spin_lock_init(&fctx->lock); > > @@ -216,6 +188,12 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha > > &args.base, sizeof(args), &fctx->event); > > > > WARN_ON(ret); > > + > > + /* > > + * Always allow non-stall irq events - previously this code tried to > > + * enable/disable them, but that just seems racy as nonstall irqs are unlatched. > > + */ > > + nvif_event_allow(&fctx->event); > > } > > > > int > > @@ -247,8 +225,7 @@ nouveau_fence_emit(struct nouveau_fence *fence) > > return -ENODEV; > > } > > > > - if (nouveau_fence_update(chan, fctx)) > > - nvif_event_block(&fctx->event); > > + nouveau_fence_update(chan, fctx); > > > > list_add_tail(&fence->head, &fctx->pending); > > spin_unlock_irq(&fctx->lock); > > @@ -271,8 +248,8 @@ nouveau_fence_done(struct nouveau_fence *fence) > > > > spin_lock_irqsave(&fctx->lock, flags); > > chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); > > - if (chan && nouveau_fence_update(chan, fctx)) > > - nvif_event_block(&fctx->event); > > + if (chan) > > + nouveau_fence_update(chan, fctx); > > spin_unlock_irqrestore(&fctx->lock, flags); > > } > > return dma_fence_is_signaled(&fence->base); > > @@ -530,32 +507,10 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy = { > > .release = nouveau_fence_release > > }; > > > > -static bool nouveau_fence_enable_signaling(struct dma_fence *f) > > -{ > > - struct nouveau_fence *fence = from_fence(f); > > - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); > > - bool ret; > > - bool do_work; > > - > > - if (atomic_inc_return(&fctx->notify_ref) == 0) > > - do_work = true; > > - > > - ret = nouveau_fence_no_signaling(f); > > - if (ret) > > - set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); > > - else if (atomic_dec_and_test(&fctx->notify_ref)) > > - do_work = true; > > - > > - if (do_work) > > - schedule_work(&fctx->allow_block_work); > > - > > - return ret; > > -} > > - > > static const struct dma_fence_ops nouveau_fence_ops_uevent = { > > .get_driver_name = nouveau_fence_get_get_driver_name, > > .get_timeline_name = nouveau_fence_get_timeline_name, > > - .enable_signaling = nouveau_fence_enable_signaling, > > + .enable_signaling = nouveau_fence_no_signaling, > > I think you can rip nouveau_fence_no_signaling out too, it doesn't do > anything more than what the signalling codepath does too. > > But maybe separate path since maybe this makes an existing leak more of a > sieve, but it really should be an existing one since you cannot assume > that someone external will ever look at whether your fence is signalled or > not. > -Sima > I think it might be overkill to rip this out, but the fix I put in 6.7 is also having bad side effects, so I'm going to try and revert that and fix that problem first. I think I'd like to keep this irq handling stuff as it seems to matter, but I think the atomic in fctx is wrongly handled and it's a case of misusing atomics instead of locks and I'm going to spend next week considering it in a bit more depth. Dave.
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 5057d976fa57..d6d50cdccf75 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -50,24 +50,14 @@ nouveau_fctx(struct nouveau_fence *fence) return container_of(fence->base.lock, struct nouveau_fence_chan, lock); } -static int +static void nouveau_fence_signal(struct nouveau_fence *fence) { - int drop = 0; - dma_fence_signal_locked(&fence->base); list_del(&fence->head); rcu_assign_pointer(fence->channel, NULL); - if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - - if (atomic_dec_and_test(&fctx->notify_ref)) - drop = 1; - } - dma_fence_put(&fence->base); - return drop; } static struct nouveau_fence * @@ -93,8 +83,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) if (error) dma_fence_set_error(&fence->base, error); - if (nouveau_fence_signal(fence)) - nvif_event_block(&fctx->event); + nouveau_fence_signal(fence); } fctx->killed = 1; spin_unlock_irqrestore(&fctx->lock, flags); @@ -103,8 +92,8 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { - cancel_work_sync(&fctx->allow_block_work); nouveau_fence_context_kill(fctx, 0); + nvif_event_block(&fctx->event); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -127,11 +116,10 @@ nouveau_fence_context_free(struct nouveau_fence_chan *fctx) kref_put(&fctx->fence_ref, nouveau_fence_context_put); } -static int +static void nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) { struct nouveau_fence *fence; - int drop = 0; u32 seq = fctx->read(chan); while (!list_empty(&fctx->pending)) { @@ -140,10 +128,8 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc if ((int)(seq - fence->base.seqno) < 0) break; - drop |= nouveau_fence_signal(fence); + nouveau_fence_signal(fence); } - - return drop; } static int @@ -160,26 +146,13 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (nouveau_fence_update(chan, fctx)) - ret = NVIF_EVENT_DROP; + nouveau_fence_update(chan, fctx); } spin_unlock_irqrestore(&fctx->lock, flags); return ret; } -static void -nouveau_fence_work_allow_block(struct work_struct *work) -{ - struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, - allow_block_work); - - if (atomic_read(&fctx->notify_ref) == 0) - nvif_event_block(&fctx->event); - else - nvif_event_allow(&fctx->event); -} - void nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) { @@ -191,7 +164,6 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; - INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -216,6 +188,12 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha &args.base, sizeof(args), &fctx->event); WARN_ON(ret); + + /* + * Always allow non-stall irq events - previously this code tried to + * enable/disable them, but that just seems racy as nonstall irqs are unlatched. + */ + nvif_event_allow(&fctx->event); } int @@ -247,8 +225,7 @@ nouveau_fence_emit(struct nouveau_fence *fence) return -ENODEV; } - if (nouveau_fence_update(chan, fctx)) - nvif_event_block(&fctx->event); + nouveau_fence_update(chan, fctx); list_add_tail(&fence->head, &fctx->pending); spin_unlock_irq(&fctx->lock); @@ -271,8 +248,8 @@ nouveau_fence_done(struct nouveau_fence *fence) spin_lock_irqsave(&fctx->lock, flags); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (chan && nouveau_fence_update(chan, fctx)) - nvif_event_block(&fctx->event); + if (chan) + nouveau_fence_update(chan, fctx); spin_unlock_irqrestore(&fctx->lock, flags); } return dma_fence_is_signaled(&fence->base); @@ -530,32 +507,10 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy = { .release = nouveau_fence_release }; -static bool nouveau_fence_enable_signaling(struct dma_fence *f) -{ - struct nouveau_fence *fence = from_fence(f); - struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - bool ret; - bool do_work; - - if (atomic_inc_return(&fctx->notify_ref) == 0) - do_work = true; - - ret = nouveau_fence_no_signaling(f); - if (ret) - set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); - else if (atomic_dec_and_test(&fctx->notify_ref)) - do_work = true; - - if (do_work) - schedule_work(&fctx->allow_block_work); - - return ret; -} - static const struct dma_fence_ops nouveau_fence_ops_uevent = { .get_driver_name = nouveau_fence_get_get_driver_name, .get_timeline_name = nouveau_fence_get_timeline_name, - .enable_signaling = nouveau_fence_enable_signaling, + .enable_signaling = nouveau_fence_no_signaling, .signaled = nouveau_fence_is_signaled, .release = nouveau_fence_release }; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 28f5cf013b89..380bb0397ed2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -46,8 +46,6 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - struct work_struct allow_block_work; - atomic_t notify_ref; int dead, killed; };