diff mbox

[4/6] drm/i915: Serialize all register access

Message ID 1373648907-28774-4-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 12, 2013, 5:08 p.m. UTC
In theory, the different register blocks were meant to be only ever
touched when holding either the struct_mutex, mode_config.lock or even a
specific localised lock. This does not seem to be the case, and the
hardware reacts extremely badly if we attempt to concurrently access two
registers within the same cacheline.

v2: Rebase onto uncore

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63914
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_uncore.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Ben Widawsky July 14, 2013, 8:23 p.m. UTC | #1
On Fri, Jul 12, 2013 at 06:08:25PM +0100, Chris Wilson wrote:
> In theory, the different register blocks were meant to be only ever
> touched when holding either the struct_mutex, mode_config.lock or even a
> specific localised lock. This does not seem to be the case, and the
> hardware reacts extremely badly if we attempt to concurrently access two
> registers within the same cacheline.
> 
> v2: Rebase onto uncore
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63914
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index d7989b8..a89efc6 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -342,21 +342,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
>  
>  #define __i915_read(x, y) \
>  u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace) { \
> +	unsigned long irqflags; \
>  	u##x val = 0; \
> +	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
>  	if (IS_GEN5(dev_priv->dev)) \
>  		ilk_dummy_write(dev_priv); \
>  	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> -		unsigned long irqflags; \
> -		spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \

Looking at this now, it looks like the old code was wrong. I think we
needed the lock before ilk_dummy_write when introduced in

commit a8b1397d717e36abd9e45f8fee61d800f7d236ec
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Thu Oct 18 14:16:09 2012 +0200

    drm/i915: implement WaIssueDummyWriteToWakeupFromRC6

While on this topic, did we really need a dummy write or a write, that
seems very weird. That also has a bug where we don't issue the dummy
write before doing a read of the fifo free entries.  Anyway, it seems we
had no bugs associated with it, so meh.

I also think it might be time for per gen MMIO functions, but don't care
enough to do anything more than state it.

>  		if (dev_priv->uncore.forcewake_count == 0) \
>  			dev_priv->uncore.funcs.force_wake_get(dev_priv); \
>  		val = read##y(dev_priv->regs + reg); \
>  		if (dev_priv->uncore.forcewake_count == 0) \
>  			dev_priv->uncore.funcs.force_wake_put(dev_priv); \
> -		spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
>  	} else { \
>  		val = read##y(dev_priv->regs + reg); \
>  	} \
> +	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
>  	if (trace) trace_i915_reg_rw(false, reg, val, sizeof(val)); \
>  	return val; \
>  }
> @@ -369,8 +369,10 @@ __i915_read(64, q)
>  
>  #define __i915_write(x, y) \
>  void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace) { \
> +	unsigned long irqflags; \
>  	u32 __fifo_ret = 0; \
>  	if (trace) trace_i915_reg_rw(true, reg, val, sizeof(val)); \
> +	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
>  	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
>  		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
>  	} \

I think for the sake of timing, doing the trace after the lock is more
desirable.

> @@ -382,6 +384,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool tr
>  		gen6_gt_check_fifodbg(dev_priv); \
>  	} \
>  	hsw_unclaimed_reg_check(dev_priv, reg); \
> +	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
>  }
>  __i915_write(8, b)
>  __i915_write(16, w)

Is there any reason you kept read##y instead of using the new raw
functions? I would like to have only one place where we readl/writel if
possible.

Anyway, I can't find anything wrong with the patch otherwise, and I
think we can try throwing it at all hangs from SNB->HSW since it should
serialize the display accesses too.

So let's say with all my comments at least read by someone, it's
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Chris Wilson July 16, 2013, 4:16 p.m. UTC | #2
On Sun, Jul 14, 2013 at 01:23:52PM -0700, Ben Widawsky wrote:
> On Fri, Jul 12, 2013 at 06:08:25PM +0100, Chris Wilson wrote:
> > In theory, the different register blocks were meant to be only ever
> > touched when holding either the struct_mutex, mode_config.lock or even a
> > specific localised lock. This does not seem to be the case, and the
> > hardware reacts extremely badly if we attempt to concurrently access two
> > registers within the same cacheline.
> > 
> > v2: Rebase onto uncore
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63914
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/intel_uncore.c | 9 ++++++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> > index d7989b8..a89efc6 100644
> > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > @@ -342,21 +342,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
> >  
> >  #define __i915_read(x, y) \
> >  u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace) { \
> > +	unsigned long irqflags; \
> >  	u##x val = 0; \
> > +	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
> >  	if (IS_GEN5(dev_priv->dev)) \
> >  		ilk_dummy_write(dev_priv); \
> >  	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> > -		unsigned long irqflags; \
> > -		spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
> 
> Looking at this now, it looks like the old code was wrong. I think we
> needed the lock before ilk_dummy_write when introduced in
> 
> commit a8b1397d717e36abd9e45f8fee61d800f7d236ec
> Author: Daniel Vetter <daniel.vetter@ffwll.ch>
> Date:   Thu Oct 18 14:16:09 2012 +0200
> 
>     drm/i915: implement WaIssueDummyWriteToWakeupFromRC6

Yes, there is a risk of pre-emption in the current code that could cause
us to screw up.
 
> While on this topic, did we really need a dummy write or a write, that
> seems very weird. That also has a bug where we don't issue the dummy
> write before doing a read of the fifo free entries.  Anyway, it seems we
> had no bugs associated with it, so meh.

The fifo entries are for a different platform than the dummy write, so
no trouble there. I leave the concern of ilk rc6 w/a in your capable
hands. :)
 
> I also think it might be time for per gen MMIO functions, but don't care
> enough to do anything more than state it.

True for everyone. Let us remember to flame the next person who tries to
add an extra w/a here!
 
> >  		if (dev_priv->uncore.forcewake_count == 0) \
> >  			dev_priv->uncore.funcs.force_wake_get(dev_priv); \
> >  		val = read##y(dev_priv->regs + reg); \
> >  		if (dev_priv->uncore.forcewake_count == 0) \
> >  			dev_priv->uncore.funcs.force_wake_put(dev_priv); \
> > -		spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
> >  	} else { \
> >  		val = read##y(dev_priv->regs + reg); \
> >  	} \
> > +	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
> >  	if (trace) trace_i915_reg_rw(false, reg, val, sizeof(val)); \
> >  	return val; \
> >  }
> > @@ -369,8 +369,10 @@ __i915_read(64, q)
> >  
> >  #define __i915_write(x, y) \
> >  void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace) { \
> > +	unsigned long irqflags; \
> >  	u32 __fifo_ret = 0; \
> >  	if (trace) trace_i915_reg_rw(true, reg, val, sizeof(val)); \
> > +	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
> >  	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> >  		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
> >  	} \
> 
> I think for the sake of timing, doing the trace after the lock is more
> desirable.

I err on trying to keep the locked section small, timing in debug
statements are less critical, imo.
 
> > @@ -382,6 +384,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool tr
> >  		gen6_gt_check_fifodbg(dev_priv); \
> >  	} \
> >  	hsw_unclaimed_reg_check(dev_priv, reg); \
> > +	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
> >  }
> >  __i915_write(8, b)
> >  __i915_write(16, w)
> 
> Is there any reason you kept read##y instead of using the new raw
> functions? I would like to have only one place where we readl/writel if
> possible.

Done.
 
> Anyway, I can't find anything wrong with the patch otherwise, and I
> think we can try throwing it at all hangs from SNB->HSW since it should
> serialize the display accesses too.

Aye, all those rc6 dropped mmio for starters. And maybe even Stephane's
snb/ivb rc6 lockups.
 
> So let's say with all my comments at least read by someone, it's
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Thanks for the review,
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index d7989b8..a89efc6 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -342,21 +342,21 @@  hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
 
 #define __i915_read(x, y) \
 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace) { \
+	unsigned long irqflags; \
 	u##x val = 0; \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
 	if (IS_GEN5(dev_priv->dev)) \
 		ilk_dummy_write(dev_priv); \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		unsigned long irqflags; \
-		spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
 		if (dev_priv->uncore.forcewake_count == 0) \
 			dev_priv->uncore.funcs.force_wake_get(dev_priv); \
 		val = read##y(dev_priv->regs + reg); \
 		if (dev_priv->uncore.forcewake_count == 0) \
 			dev_priv->uncore.funcs.force_wake_put(dev_priv); \
-		spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
 	} else { \
 		val = read##y(dev_priv->regs + reg); \
 	} \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
 	if (trace) trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -369,8 +369,10 @@  __i915_read(64, q)
 
 #define __i915_write(x, y) \
 void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace) { \
+	unsigned long irqflags; \
 	u32 __fifo_ret = 0; \
 	if (trace) trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
@@ -382,6 +384,7 @@  void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool tr
 		gen6_gt_check_fifodbg(dev_priv); \
 	} \
 	hsw_unclaimed_reg_check(dev_priv, reg); \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
 }
 __i915_write(8, b)
 __i915_write(16, w)