Message ID | 1383451680-11173-3-git-send-email-benjamin.widawsky@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Nov 02, 2013 at 09:07:00PM -0700, Ben Widawsky wrote: > +static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg) > +{ > + int i; > + for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++) > + if (reg == gen8_optimized_regs[i]) > + return false; > + > + return true; > +} The sense of this function is reversed, it really should be is_gen8_legacy_register()? I'd prefer this to be an explicit switch() in ascending order -- the compiler should emit an efficient bsearch. -Chris
Ben Widawsky <benjamin.widawsky@intel.com> writes: > GEN8 removes the GT FIFO which we've all come to know and love. Instead > it offers a wider range of optimized registers which always keep a > shadowed copy, and are fed to the GPU when it wakes. > > How this is implemented in hardware is still somewhat of a mystery. As > far as I can tell, the basic design is as follows: > > If the register is not optimized, you must use the old forcewake > mechanism to bring the GT out of sleep. [1] > > If register is in the optimized list the write will signal that the > GT should begin to come out of whatever sleep state it is in. > > While the GT is coming out of sleep, the requested write will be stored > in an intermediate shadow register. > > Do to the fact that the implementation details are not clear, I see > several risks: > 1. Order is not preserved as it is with GT FIFO. If we issue multiple > writes to optimized registers, where order matters, we may need to > serialize it with forcewake. > 2. The optimized registers have only 1 shadowed slot, meaning if we > issue multiple writes to the same register, and those values need to > reach the GPU in order, forcewake will be required. > > [1] We could implement a SW queue the way the GT FIFO used to work if > desired. > > NOTE: Compile tested only until we get real silicon. > > v2: > - Use a default case to make future platforms also work. > - Get rid of IS_BROADWELL since that's not yet defined, but we want to > MMIO as soon as possible. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1) > Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> > --- > drivers/gpu/drm/i915/intel_uncore.c | 53 ++++++++++++++++++++++++++++++++++++- > 1 file changed, 52 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index f6fae35..fa06ce4 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -93,7 +93,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv) > { > u32 forcewake_ack; > > - if (IS_HASWELL(dev_priv->dev)) > + if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev)) > forcewake_ack = FORCEWAKE_ACK_HSW; > else > forcewake_ack = FORCEWAKE_MT_ACK; > @@ -459,6 +459,47 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) > spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ > } > > +static const u32 gen8_optimized_regs[] = { > + FORCEWAKE_MT, > + GEN6_RPNSWREQ, > + GEN6_RC_VIDEO_FREQ, > + RING_TAIL(RENDER_RING_BASE), > + RING_TAIL(GEN6_BSD_RING_BASE), > + RING_TAIL(VEBOX_RING_BASE), > + RING_TAIL(BLT_RING_BASE), > + /* TODO: Other registers are not yet used */ > +}; > + s/optimized/shadowed would make more sense i think. > +static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg) > +{ > + int i; > + for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++) > + if (reg == gen8_optimized_regs[i]) > + return false; The logic is reversed here with regards to function name. > + > + return true; > +} > + > +#define __gen8_write(x) \ > +static void \ > +gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \ > + bool __needs_put = false; \ This initialization is unnecessary. > + REG_WRITE_HEADER; \ > + __needs_put = is_gen8_optimized(dev_priv, reg); \ This logic is backwards. Please consider: __needs_put = !is_gen8_shadowed(dev_priv, reg); -- Mika > + if (__needs_put) { \ > + dev_priv->uncore.funcs.force_wake_get(dev_priv); \ > + } \ > + __raw_i915_write##x(dev_priv, reg, val); \ > + if (__needs_put) { \ > + dev_priv->uncore.funcs.force_wake_put(dev_priv); \ > + } \ > + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ > +} > + > +__gen8_write(8) > +__gen8_write(16) > +__gen8_write(32) > +__gen8_write(64) > __hsw_write(8) > __hsw_write(16) > __hsw_write(32) > @@ -476,6 +517,7 @@ __gen4_write(16) > __gen4_write(32) > __gen4_write(64) > > +#undef __gen8_write > #undef __hsw_write > #undef __gen6_write > #undef __gen5_write > @@ -534,6 +576,15 @@ void intel_uncore_init(struct drm_device *dev) > } > > switch (INTEL_INFO(dev)->gen) { > + default: > + dev_priv->uncore.funcs.mmio_writeb = gen8_write8; > + dev_priv->uncore.funcs.mmio_writew = gen8_write16; > + dev_priv->uncore.funcs.mmio_writel = gen8_write32; > + dev_priv->uncore.funcs.mmio_writeq = gen8_write64; > + dev_priv->uncore.funcs.mmio_readb = gen6_read8; > + dev_priv->uncore.funcs.mmio_readw = gen6_read16; > + dev_priv->uncore.funcs.mmio_readl = gen6_read32; > + dev_priv->uncore.funcs.mmio_readq = gen6_read64; > case 7: > case 6: > if (IS_HASWELL(dev)) { > -- > 1.8.4.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index f6fae35..fa06ce4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -93,7 +93,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv) { u32 forcewake_ack; - if (IS_HASWELL(dev_priv->dev)) + if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev)) forcewake_ack = FORCEWAKE_ACK_HSW; else forcewake_ack = FORCEWAKE_MT_ACK; @@ -459,6 +459,47 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ } +static const u32 gen8_optimized_regs[] = { + FORCEWAKE_MT, + GEN6_RPNSWREQ, + GEN6_RC_VIDEO_FREQ, + RING_TAIL(RENDER_RING_BASE), + RING_TAIL(GEN6_BSD_RING_BASE), + RING_TAIL(VEBOX_RING_BASE), + RING_TAIL(BLT_RING_BASE), + /* TODO: Other registers are not yet used */ +}; + +static bool is_gen8_optimized(struct drm_i915_private *dev_priv, u32 reg) +{ + int i; + for (i = 0; i < ARRAY_SIZE(gen8_optimized_regs); i++) + if (reg == gen8_optimized_regs[i]) + return false; + + return true; +} + +#define __gen8_write(x) \ +static void \ +gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \ + bool __needs_put = false; \ + REG_WRITE_HEADER; \ + __needs_put = is_gen8_optimized(dev_priv, reg); \ + if (__needs_put) { \ + dev_priv->uncore.funcs.force_wake_get(dev_priv); \ + } \ + __raw_i915_write##x(dev_priv, reg, val); \ + if (__needs_put) { \ + dev_priv->uncore.funcs.force_wake_put(dev_priv); \ + } \ + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ +} + +__gen8_write(8) +__gen8_write(16) +__gen8_write(32) +__gen8_write(64) __hsw_write(8) __hsw_write(16) __hsw_write(32) @@ -476,6 +517,7 @@ __gen4_write(16) __gen4_write(32) __gen4_write(64) +#undef __gen8_write #undef __hsw_write #undef __gen6_write #undef __gen5_write @@ -534,6 +576,15 @@ void intel_uncore_init(struct drm_device *dev) } switch (INTEL_INFO(dev)->gen) { + default: + dev_priv->uncore.funcs.mmio_writeb = gen8_write8; + dev_priv->uncore.funcs.mmio_writew = gen8_write16; + dev_priv->uncore.funcs.mmio_writel = gen8_write32; + dev_priv->uncore.funcs.mmio_writeq = gen8_write64; + dev_priv->uncore.funcs.mmio_readb = gen6_read8; + dev_priv->uncore.funcs.mmio_readw = gen6_read16; + dev_priv->uncore.funcs.mmio_readl = gen6_read32; + dev_priv->uncore.funcs.mmio_readq = gen6_read64; case 7: case 6: if (IS_HASWELL(dev)) {