diff mbox

[02/62] drm/i915/bdw: Handle forcewake for writes on gen8

Message ID 1383451680-11173-3-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Nov. 3, 2013, 4:07 a.m. UTC
GEN8 removes the GT FIFO which we've all come to know and love. Instead
it offers a wider range of optimized registers which always keep a
shadowed copy, and are fed to the GPU when it wakes.

How this is implemented in hardware is still somewhat of a mystery. As
far as I can tell, the basic design is as follows:

If the register is not optimized, you must use the old forcewake
mechanism to bring the GT out of sleep. [1]

If register is in the optimized list the write will signal that the
GT should begin to come out of whatever sleep state it is in.

While the GT is coming out of sleep, the requested write will be stored
in an intermediate shadow register.

Do to the fact that the implementation details are not clear, I see
several risks:
1. Order is not preserved as it is with GT FIFO. If we issue multiple
writes to optimized registers, where order matters, we may need to
serialize it with forcewake.
2. The optimized registers have only 1 shadowed slot, meaning if we
issue multiple writes to the same register, and those values need to
reach the GPU in order, forcewake will be required.

[1] We could implement a SW queue the way the GT FIFO used to work if
desired.

NOTE: Compile tested only until we get real silicon.

v2:
- Use a default case to make future platforms also work.
- Get rid of IS_BROADWELL since that's not yet defined, but we want to
  MMIO as soon as possible.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_uncore.c | 53 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

Comments

Chris Wilson Nov. 4, 2013, 2:19 p.m. UTC | #1
On Sat, Nov 02, 2013 at 09:07:00PM -0700, Ben Widawsky wrote:
> +static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg)
> +{
> +	int i;
> +	for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++)
> +		if (reg == gen8_optimized_regs[i])
> +			return false;
> +
> +	return true;
> +}

The sense of this function is reversed, it really should be
is_gen8_legacy_register()? I'd prefer this to be an explicit switch() in
ascending order -- the compiler should emit an efficient bsearch.
-Chris
Mika Kuoppala Nov. 5, 2013, 9:24 a.m. UTC | #2
Ben Widawsky <benjamin.widawsky@intel.com> writes:

> GEN8 removes the GT FIFO which we've all come to know and love. Instead
> it offers a wider range of optimized registers which always keep a
> shadowed copy, and are fed to the GPU when it wakes.
>
> How this is implemented in hardware is still somewhat of a mystery. As
> far as I can tell, the basic design is as follows:
>
> If the register is not optimized, you must use the old forcewake
> mechanism to bring the GT out of sleep. [1]
>
> If register is in the optimized list the write will signal that the
> GT should begin to come out of whatever sleep state it is in.
>
> While the GT is coming out of sleep, the requested write will be stored
> in an intermediate shadow register.
>
> Do to the fact that the implementation details are not clear, I see
> several risks:
> 1. Order is not preserved as it is with GT FIFO. If we issue multiple
> writes to optimized registers, where order matters, we may need to
> serialize it with forcewake.
> 2. The optimized registers have only 1 shadowed slot, meaning if we
> issue multiple writes to the same register, and those values need to
> reach the GPU in order, forcewake will be required.
>
> [1] We could implement a SW queue the way the GT FIFO used to work if
> desired.
>
> NOTE: Compile tested only until we get real silicon.
>
> v2:
> - Use a default case to make future platforms also work.
> - Get rid of IS_BROADWELL since that's not yet defined, but we want to
>   MMIO as soon as possible.
>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 53 ++++++++++++++++++++++++++++++++++++-
>  1 file changed, 52 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index f6fae35..fa06ce4 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -93,7 +93,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
>  {
>  	u32 forcewake_ack;
>  
> -	if (IS_HASWELL(dev_priv->dev))
> +	if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev))
>  		forcewake_ack = FORCEWAKE_ACK_HSW;
>  	else
>  		forcewake_ack = FORCEWAKE_MT_ACK;
> @@ -459,6 +459,47 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace)
>  	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
>  }
>  
> +static const u32 gen8_optimized_regs[] = {
> +	FORCEWAKE_MT,
> +	GEN6_RPNSWREQ,
> +	GEN6_RC_VIDEO_FREQ,
> +	RING_TAIL(RENDER_RING_BASE),
> +	RING_TAIL(GEN6_BSD_RING_BASE),
> +	RING_TAIL(VEBOX_RING_BASE),
> +	RING_TAIL(BLT_RING_BASE),
> +	/* TODO: Other registers are not yet used */
> +};
> +

s/optimized/shadowed would make more sense i think.

> +static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg)
> +{
> +	int i;
> +	for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++)
> +		if (reg == gen8_optimized_regs[i])
> +			return false;

The logic is reversed here with regards to function name.

> +
> +	return true;
> +}
> +
> +#define __gen8_write(x) \
> +static void \
> +gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
> +	bool __needs_put = false; \
                       
This initialization is unnecessary.

> +	REG_WRITE_HEADER; \
> +	__needs_put = is_gen8_optimized(dev_priv, reg); \

This logic is backwards.
Please consider:

        __needs_put = !is_gen8_shadowed(dev_priv, reg);


-- Mika

> +	if (__needs_put) { \
> +		dev_priv->uncore.funcs.force_wake_get(dev_priv); \
> +	} \
> +	__raw_i915_write##x(dev_priv, reg, val); \
> +	if (__needs_put) { \
> +		dev_priv->uncore.funcs.force_wake_put(dev_priv); \
> +	} \
> +	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
> +}
> +
> +__gen8_write(8)
> +__gen8_write(16)
> +__gen8_write(32)
> +__gen8_write(64)
>  __hsw_write(8)
>  __hsw_write(16)
>  __hsw_write(32)
> @@ -476,6 +517,7 @@ __gen4_write(16)
>  __gen4_write(32)
>  __gen4_write(64)
>  
> +#undef __gen8_write
>  #undef __hsw_write
>  #undef __gen6_write
>  #undef __gen5_write
> @@ -534,6 +576,15 @@ void intel_uncore_init(struct drm_device *dev)
>  	}
>  
>  	switch (INTEL_INFO(dev)->gen) {
> +	default:
> +		dev_priv->uncore.funcs.mmio_writeb  = gen8_write8;
> +		dev_priv->uncore.funcs.mmio_writew  = gen8_write16;
> +		dev_priv->uncore.funcs.mmio_writel  = gen8_write32;
> +		dev_priv->uncore.funcs.mmio_writeq  = gen8_write64;
> +		dev_priv->uncore.funcs.mmio_readb  = gen6_read8;
> +		dev_priv->uncore.funcs.mmio_readw  = gen6_read16;
> +		dev_priv->uncore.funcs.mmio_readl  = gen6_read32;
> +		dev_priv->uncore.funcs.mmio_readq  = gen6_read64;
>  	case 7:
>  	case 6:
>  		if (IS_HASWELL(dev)) {
> -- 
> 1.8.4.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index f6fae35..fa06ce4 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -93,7 +93,7 @@  static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
 {
 	u32 forcewake_ack;
 
-	if (IS_HASWELL(dev_priv->dev))
+	if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev))
 		forcewake_ack = FORCEWAKE_ACK_HSW;
 	else
 		forcewake_ack = FORCEWAKE_MT_ACK;
@@ -459,6 +459,47 @@  hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
 }
 
+static const u32 gen8_optimized_regs[] = {
+	FORCEWAKE_MT,
+	GEN6_RPNSWREQ,
+	GEN6_RC_VIDEO_FREQ,
+	RING_TAIL(RENDER_RING_BASE),
+	RING_TAIL(GEN6_BSD_RING_BASE),
+	RING_TAIL(VEBOX_RING_BASE),
+	RING_TAIL(BLT_RING_BASE),
+	/* TODO: Other registers are not yet used */
+};
+
+static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++)
+		if (reg == gen8_optimized_regs[i])
+			return false;
+
+	return true;
+}
+
+#define __gen8_write(x) \
+static void \
+gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+	bool __needs_put = false; \
+	REG_WRITE_HEADER; \
+	__needs_put = is_gen8_optimized(dev_priv, reg); \
+	if (__needs_put) { \
+		dev_priv->uncore.funcs.force_wake_get(dev_priv); \
+	} \
+	__raw_i915_write##x(dev_priv, reg, val); \
+	if (__needs_put) { \
+		dev_priv->uncore.funcs.force_wake_put(dev_priv); \
+	} \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
+}
+
+__gen8_write(8)
+__gen8_write(16)
+__gen8_write(32)
+__gen8_write(64)
 __hsw_write(8)
 __hsw_write(16)
 __hsw_write(32)
@@ -476,6 +517,7 @@  __gen4_write(16)
 __gen4_write(32)
 __gen4_write(64)
 
+#undef __gen8_write
 #undef __hsw_write
 #undef __gen6_write
 #undef __gen5_write
@@ -534,6 +576,15 @@  void intel_uncore_init(struct drm_device *dev)
 	}
 
 	switch (INTEL_INFO(dev)->gen) {
+	default:
+		dev_priv->uncore.funcs.mmio_writeb  = gen8_write8;
+		dev_priv->uncore.funcs.mmio_writew  = gen8_write16;
+		dev_priv->uncore.funcs.mmio_writel  = gen8_write32;
+		dev_priv->uncore.funcs.mmio_writeq  = gen8_write64;
+		dev_priv->uncore.funcs.mmio_readb  = gen6_read8;
+		dev_priv->uncore.funcs.mmio_readw  = gen6_read16;
+		dev_priv->uncore.funcs.mmio_readl  = gen6_read32;
+		dev_priv->uncore.funcs.mmio_readq  = gen6_read64;
 	case 7:
 	case 6:
 		if (IS_HASWELL(dev)) {