Message ID | 1354384130-9022-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote: > Before queuing the flip but crucially after attaching the unpin-work to > the crtc, we continue to setup the unpin-work. However, should the > hardware fire early, we see the connected unpin-work and queue the task. > The task then promptly runs and unpins the fb before we finish taking > the required references or even pinning it... Havoc. > > To close the race, we use the flip-pending atomic to indicate when the > flip is finally setup and enqueued. So during the flip-done processing, > we can check more accurately whether the flip was expected. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Hm, can't this logic race? - emit the MI_FLIP - flip irq happens because the gpu is idle and completes it right away (or our thread is preempted), work->pending increments from 0 -> 1 - queue_flip sets work->pending to 1 So work->pending will be stuck at 1 forverer, the unpin never happens and stalls all subsequent flips. Then there's also the usual annoying maintainer questions: - do we have a way to readily reproduce this race? - do we have a chance to just shut out all these spurious pageflip interrupts? If they all come from MMIO access, we should be able to lock them out somehow ... Cheers, Daniel > --- > drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- > drivers/gpu/drm/i915/i915_irq.c | 4 +++- > drivers/gpu/drm/i915/intel_display.c | 23 ++++++++++++++++------- > drivers/gpu/drm/i915/intel_drv.h | 5 ++++- > 4 files changed, 25 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 8afc0dd..e6a11ca 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -317,7 +317,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) > seq_printf(m, "No flip due on pipe %c (plane %c)\n", > pipe, plane); > } else { > - if (!work->pending) { > + if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) { > seq_printf(m, "Flip queued on pipe %c (plane %c)\n", > pipe, plane); > } else { > @@ -328,7 +328,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) > seq_printf(m, "Stall check enabled, "); > else > seq_printf(m, "Stall check waiting for page flip ioctl, "); > - seq_printf(m, "%d prepares\n", work->pending); > + seq_printf(m, "%d prepares\n", atomic_read(&work->pending)); > > if (work->old_fb_obj) { > struct drm_i915_gem_object *obj = work->old_fb_obj; > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 6cd3dc9..a4dc97f 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1466,7 +1466,9 @@ static void i915_pageflip_stall_check(struct drm_device *dev, int pipe) > spin_lock_irqsave(&dev->event_lock, flags); > work = intel_crtc->unpin_work; > > - if (work == NULL || work->pending || !work->enable_stall_check) { > + if (work == NULL || > + atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE || > + !work->enable_stall_check) { > /* Either the pending flip IRQ arrived, or we're too early. Don't check */ > spin_unlock_irqrestore(&dev->event_lock, flags); > return; > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 78d12c4..2746b39 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -6929,7 +6929,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev, > > spin_lock_irqsave(&dev->event_lock, flags); > work = intel_crtc->unpin_work; > - if (work == NULL || !work->pending) { > + if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) { > spin_unlock_irqrestore(&dev->event_lock, flags); > return; > } > @@ -6977,13 +6977,13 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane) > to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]); > unsigned long flags; > > + /* NB: An MMIO update of the plane base pointer will also > + * generate a page-flip completion irq, i.e. every modeset > + * is also accompanied by a spurious intel_prepare_page_flip(). > + */ > spin_lock_irqsave(&dev->event_lock, flags); > - if (intel_crtc->unpin_work) { > - if ((++intel_crtc->unpin_work->pending) > 1) > - DRM_ERROR("Prepared flip multiple times\n"); > - } else { > - DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n"); > - } > + if (intel_crtc->unpin_work) > + atomic_inc_not_zero(&intel_crtc->unpin_work->pending); > spin_unlock_irqrestore(&dev->event_lock, flags); > } > > @@ -7020,6 +7020,8 @@ static int intel_gen2_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, fb->pitches[0]); > intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); > intel_ring_emit(ring, 0); /* aux display base address, unused */ > + > + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); > intel_ring_advance(ring); > return 0; > > @@ -7060,6 +7062,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); > intel_ring_emit(ring, MI_NOOP); > > + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); > intel_ring_advance(ring); > return 0; > > @@ -7106,6 +7109,8 @@ static int intel_gen4_queue_flip(struct drm_device *dev, > pf = 0; > pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; > intel_ring_emit(ring, pf | pipesrc); > + > + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); > intel_ring_advance(ring); > return 0; > > @@ -7148,6 +7153,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev, > pf = 0; > pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; > intel_ring_emit(ring, pf | pipesrc); > + > + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); > intel_ring_advance(ring); > return 0; > > @@ -7202,6 +7209,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); > intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); > intel_ring_emit(ring, (MI_NOOP)); > + > + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); > intel_ring_advance(ring); > return 0; > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 522061c..3915ca9 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -401,7 +401,10 @@ struct intel_unpin_work { > struct drm_i915_gem_object *old_fb_obj; > struct drm_i915_gem_object *pending_flip_obj; > struct drm_pending_vblank_event *event; > - int pending; > + atomic_t pending; > +#define INTEL_FLIP_INACTIVE 0 > +#define INTEL_FLIP_PENDING 1 > +#define INTEL_FLIP_COMPLETE 2 > bool enable_stall_check; > }; > > -- > 1.7.10.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Sat, 1 Dec 2012 21:35:21 +0100, Daniel Vetter <daniel@ffwll.ch> wrote: > On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote: > > Before queuing the flip but crucially after attaching the unpin-work to > > the crtc, we continue to setup the unpin-work. However, should the > > hardware fire early, we see the connected unpin-work and queue the task. > > The task then promptly runs and unpins the fb before we finish taking > > the required references or even pinning it... Havoc. > > > > To close the race, we use the flip-pending atomic to indicate when the > > flip is finally setup and enqueued. So during the flip-done processing, > > we can check more accurately whether the flip was expected. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Hm, can't this logic race? > > - emit the MI_FLIP > > - flip irq happens because the gpu is idle and completes it right away > (or our thread is preempted), work->pending increments from 0 -> 1 > > - queue_flip sets work->pending to 1 -> write RING_TAIL, flush the commands to CS, begin execution of MI_FLIP I'm not happy with the explanation, but I could reliably (100%) hit the race whilst loading a 2+GiB image using eog under compiz on an 965gm with only 2GIB of ram. As soon as it hit kswapd, the system would OOPS with an unpin leak. Which means that was a flip pending/done prior to the pinning + MI_FLIP. This patch adds a strong defence against that spurious flip done, but doesn't explain where it came from. -Chris
On Sat, Dec 1, 2012 at 11:32 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Sat, 1 Dec 2012 21:35:21 +0100, Daniel Vetter <daniel@ffwll.ch> wrote: >> On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote: >> > Before queuing the flip but crucially after attaching the unpin-work to >> > the crtc, we continue to setup the unpin-work. However, should the >> > hardware fire early, we see the connected unpin-work and queue the task. >> > The task then promptly runs and unpins the fb before we finish taking >> > the required references or even pinning it... Havoc. >> > >> > To close the race, we use the flip-pending atomic to indicate when the >> > flip is finally setup and enqueued. So during the flip-done processing, >> > we can check more accurately whether the flip was expected. >> > >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> >> Hm, can't this logic race? >> >> - emit the MI_FLIP >> >> - flip irq happens because the gpu is idle and completes it right away >> (or our thread is preempted), work->pending increments from 0 -> 1 >> >> - queue_flip sets work->pending to 1 > > -> write RING_TAIL, flush the commands to CS, begin execution of MI_FLIP Yeah, that should be the normal course of events where the MI_FLIP gets executed after we set work->pending to 1 (and after all the stuff has been done). The race I see is that the real MI_FLIP (not a spurious one this patch defends against) happens before we set work->pending to 1, so that we essentially lose the increment to 2 and so block any further flips on this crtc (or modesets for the matter, once the finish_fb stuff is fixed) indefinitely. Iow I think it's a bit too good at preventing unpins ;-) > I'm not happy with the explanation, but I could reliably (100%) hit the > race whilst loading a 2+GiB image using eog under compiz on an 965gm > with only 2GIB of ram. As soon as it hit kswapd, the system would OOPS > with an unpin leak. Which means that was a flip pending/done prior to > the pinning + MI_FLIP. This patch adds a strong defence against that > spurious flip done, but doesn't explain where it came from. Hm, I have no idea how that could cause the spurious flip - the most likely cause is that something introduces a nice delay somewhere (through kswapd), but I don't really see how that can happen. I guess I need to write a flip vs. swapping test. Was the swap due to unrelated memory pressue, or due to our own gem objects? -Daniel
On Sun, 2 Dec 2012 02:15:23 +0100, Daniel Vetter <daniel@ffwll.ch> wrote: > On Sat, Dec 1, 2012 at 11:32 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > On Sat, 1 Dec 2012 21:35:21 +0100, Daniel Vetter <daniel@ffwll.ch> wrote: > >> On Sat, Dec 01, 2012 at 05:48:50PM +0000, Chris Wilson wrote: > >> > Before queuing the flip but crucially after attaching the unpin-work to > >> > the crtc, we continue to setup the unpin-work. However, should the > >> > hardware fire early, we see the connected unpin-work and queue the task. > >> > The task then promptly runs and unpins the fb before we finish taking > >> > the required references or even pinning it... Havoc. > >> > > >> > To close the race, we use the flip-pending atomic to indicate when the > >> > flip is finally setup and enqueued. So during the flip-done processing, > >> > we can check more accurately whether the flip was expected. > >> > > >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >> > >> Hm, can't this logic race? > >> > >> - emit the MI_FLIP > >> > >> - flip irq happens because the gpu is idle and completes it right away > >> (or our thread is preempted), work->pending increments from 0 -> 1 > >> > >> - queue_flip sets work->pending to 1 > > > > -> write RING_TAIL, flush the commands to CS, begin execution of MI_FLIP > > Yeah, that should be the normal course of events where the MI_FLIP > gets executed after we set work->pending to 1 (and after all the stuff > has been done). The race I see is that the real MI_FLIP (not a > spurious one this patch defends against) happens before we set > work->pending to 1, so that we essentially lose the increment to 2 and > so block any further flips on this crtc (or modesets for the matter, > once the finish_fb stuff is fixed) indefinitely. > > Iow I think it's a bit too good at preventing unpins ;-) There isn't a race with hardware. So are you concerned about the write ordering, and so want some smb_mb()? > > I'm not happy with the explanation, but I could reliably (100%) hit the > > race whilst loading a 2+GiB image using eog under compiz on an 965gm > > with only 2GIB of ram. As soon as it hit kswapd, the system would OOPS > > with an unpin leak. Which means that was a flip pending/done prior to > > the pinning + MI_FLIP. This patch adds a strong defence against that > > spurious flip done, but doesn't explain where it came from. > > Hm, I have no idea how that could cause the spurious flip - the most > likely cause is that something introduces a nice delay somewhere > (through kswapd), but I don't really see how that can happen. I guess > I need to write a flip vs. swapping test. Was the swap due to > unrelated memory pressue, or due to our own gem objects? eog starts swapping long before it sends the image to X, but at the same time it continues to render its progress bar. -Chris
On Sun, Dec 2, 2012 at 10:26 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > There isn't a race with hardware. So are you concerned about the write > ordering, and so want some smb_mb()? Oops, I've misread the patch, there's no race where I've seen it. Barriers is a good topic though I think we need an smp_wmb before the atomic set in queue_flip to ensure that all stores to the work struct have landed before. Together with the implicit barriers of the spinlock we should be fine. -Daniel
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8afc0dd..e6a11ca 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -317,7 +317,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "No flip due on pipe %c (plane %c)\n", pipe, plane); } else { - if (!work->pending) { + if (atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) { seq_printf(m, "Flip queued on pipe %c (plane %c)\n", pipe, plane); } else { @@ -328,7 +328,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "Stall check enabled, "); else seq_printf(m, "Stall check waiting for page flip ioctl, "); - seq_printf(m, "%d prepares\n", work->pending); + seq_printf(m, "%d prepares\n", atomic_read(&work->pending)); if (work->old_fb_obj) { struct drm_i915_gem_object *obj = work->old_fb_obj; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6cd3dc9..a4dc97f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1466,7 +1466,9 @@ static void i915_pageflip_stall_check(struct drm_device *dev, int pipe) spin_lock_irqsave(&dev->event_lock, flags); work = intel_crtc->unpin_work; - if (work == NULL || work->pending || !work->enable_stall_check) { + if (work == NULL || + atomic_read(&work->pending) >= INTEL_FLIP_COMPLETE || + !work->enable_stall_check) { /* Either the pending flip IRQ arrived, or we're too early. Don't check */ spin_unlock_irqrestore(&dev->event_lock, flags); return; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 78d12c4..2746b39 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6929,7 +6929,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev, spin_lock_irqsave(&dev->event_lock, flags); work = intel_crtc->unpin_work; - if (work == NULL || !work->pending) { + if (work == NULL || atomic_read(&work->pending) < INTEL_FLIP_COMPLETE) { spin_unlock_irqrestore(&dev->event_lock, flags); return; } @@ -6977,13 +6977,13 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane) to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]); unsigned long flags; + /* NB: An MMIO update of the plane base pointer will also + * generate a page-flip completion irq, i.e. every modeset + * is also accompanied by a spurious intel_prepare_page_flip(). + */ spin_lock_irqsave(&dev->event_lock, flags); - if (intel_crtc->unpin_work) { - if ((++intel_crtc->unpin_work->pending) > 1) - DRM_ERROR("Prepared flip multiple times\n"); - } else { - DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n"); - } + if (intel_crtc->unpin_work) + atomic_inc_not_zero(&intel_crtc->unpin_work->pending); spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -7020,6 +7020,8 @@ static int intel_gen2_queue_flip(struct drm_device *dev, intel_ring_emit(ring, fb->pitches[0]); intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); intel_ring_emit(ring, 0); /* aux display base address, unused */ + + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); intel_ring_advance(ring); return 0; @@ -7060,6 +7062,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); intel_ring_emit(ring, MI_NOOP); + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); intel_ring_advance(ring); return 0; @@ -7106,6 +7109,8 @@ static int intel_gen4_queue_flip(struct drm_device *dev, pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; intel_ring_emit(ring, pf | pipesrc); + + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); intel_ring_advance(ring); return 0; @@ -7148,6 +7153,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev, pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; intel_ring_emit(ring, pf | pipesrc); + + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); intel_ring_advance(ring); return 0; @@ -7202,6 +7209,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset); intel_ring_emit(ring, (MI_NOOP)); + + atomic_set(&intel_crtc->unpin_work->pending, INTEL_FLIP_PENDING); intel_ring_advance(ring); return 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 522061c..3915ca9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -401,7 +401,10 @@ struct intel_unpin_work { struct drm_i915_gem_object *old_fb_obj; struct drm_i915_gem_object *pending_flip_obj; struct drm_pending_vblank_event *event; - int pending; + atomic_t pending; +#define INTEL_FLIP_INACTIVE 0 +#define INTEL_FLIP_PENDING 1 +#define INTEL_FLIP_COMPLETE 2 bool enable_stall_check; };
Before queuing the flip but crucially after attaching the unpin-work to the crtc, we continue to setup the unpin-work. However, should the hardware fire early, we see the connected unpin-work and queue the task. The task then promptly runs and unpins the fb before we finish taking the required references or even pinning it... Havoc. To close the race, we use the flip-pending atomic to indicate when the flip is finally setup and enqueued. So during the flip-done processing, we can check more accurately whether the flip was expected. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/intel_display.c | 23 ++++++++++++++++------- drivers/gpu/drm/i915/intel_drv.h | 5 ++++- 4 files changed, 25 insertions(+), 11 deletions(-)