From patchwork Thu Sep 18 14:58:33 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mika Kuoppala X-Patchwork-Id: 4931241 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 16B7ABEEA5 for ; Thu, 18 Sep 2014 14:58:27 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 6910A201E4 for ; Thu, 18 Sep 2014 14:58:25 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id B8773201B4 for ; Thu, 18 Sep 2014 14:58:23 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3A9F86E6CA; Thu, 18 Sep 2014 07:58:23 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by gabe.freedesktop.org (Postfix) with ESMTP id 2676D6E6C9 for ; Thu, 18 Sep 2014 07:58:22 -0700 (PDT) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga103.fm.intel.com with ESMTP; 18 Sep 2014 07:49:08 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.04,547,1406617200"; d="scan'208";a="593295443" Received: from rosetta.fi.intel.com (HELO rosetta) ([10.237.72.93]) by fmsmga001.fm.intel.com with ESMTP; 18 Sep 2014 07:58:01 -0700 Received: by rosetta (Postfix, from userid 1000) id E1ED780057; Thu, 18 Sep 2014 17:58:41 +0300 (EEST) From: Mika Kuoppala To: intel-gfx@lists.freedesktop.org Date: Thu, 18 Sep 2014 17:58:33 +0300 Message-Id: <1411052315-22979-5-git-send-email-mika.kuoppala@intel.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1411052315-22979-1-git-send-email-mika.kuoppala@intel.com> References: <1411052315-22979-1-git-send-email-mika.kuoppala@intel.com> Subject: [Intel-gfx] [PATCH 4/6] drm/i915: Build workaround list in ring initialization X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Spam-Status: No, score=-4.8 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP to disassociate workaround list init from the actual writing of values. This is needed as not workarounds will be masked bit enables and we want full control on when the read part of RMW will happen. Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 18 +-- drivers/gpu/drm/i915/i915_drv.h | 28 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 188 ++++++++++++++++++-------------- 3 files changed, 129 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 89b740b..c35c6ce 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2657,18 +2657,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused) gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs); - for (i = 0; i < dev_priv->num_wa_regs; ++i) { + seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count); + for (i = 0; i < dev_priv->workarounds.count; ++i) { u32 addr, mask; - addr = dev_priv->intel_wa_regs[i].addr; - mask = dev_priv->intel_wa_regs[i].mask; - dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask; - if (dev_priv->intel_wa_regs[i].addr) + addr = dev_priv->workarounds.reg[i].addr; + mask = dev_priv->workarounds.reg[i].mask; + dev_priv->workarounds.reg[i].value = I915_READ(addr) | mask; + if (dev_priv->workarounds.reg[i].addr) seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n", - dev_priv->intel_wa_regs[i].addr, - dev_priv->intel_wa_regs[i].value, - dev_priv->intel_wa_regs[i].mask); + dev_priv->workarounds.reg[i].addr, + dev_priv->workarounds.reg[i].value, + dev_priv->workarounds.reg[i].mask); } gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 49b45ec..3087d5a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1445,6 +1445,20 @@ struct i915_frontbuffer_tracking { unsigned flip_bits; }; +struct i915_wa_reg { + u32 addr; + u32 value; + /* bitmask representing WA bits */ + u32 mask; +}; + +#define I915_MAX_WA_REGS 16 + +struct i915_workarounds { + struct i915_wa_reg reg[I915_MAX_WA_REGS]; + u32 count; +}; + struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1587,19 +1601,7 @@ struct drm_i915_private { struct intel_shared_dpll shared_dplls[I915_NUM_PLLS]; int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; - /* - * workarounds are currently applied at different places and - * changes are being done to consolidate them so exact count is - * not clear at this point, use a max value for now. - */ -#define I915_MAX_WA_REGS 16 - struct { - u32 addr; - u32 value; - /* bitmask representing WA bits */ - u32 mask; - } intel_wa_regs[I915_MAX_WA_REGS]; - u32 num_wa_regs; + struct i915_workarounds workarounds; /* Reclocking support */ bool render_reclock_avail; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 46cd0f9..4f336e23 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,87 +665,113 @@ err: return ret; } -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, - u32 addr, u32 value) +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) { + int ret, i; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_workarounds *w = &dev_priv->workarounds; - if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) - return; + if (WARN_ON(w->count == 0)) + return 0; - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, addr); - intel_ring_emit(ring, value); + ring->gpu_caches_dirty = true; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; - /* value is updated with the status of remaining bits of this - * register when it is read from debugfs file - */ - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; - dev_priv->num_wa_regs++; + ret = intel_ring_begin(ring, w->count * 3); + if (ret) + return ret; + + for (i = 0; i < w->count; i++) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); + } + + intel_ring_advance(ring); + + ring->gpu_caches_dirty = true; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - return; + return 0; +} + +static int wa_add(struct drm_i915_private *dev_priv, + const u32 addr, const u32 val, const u32 mask) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; } +#define WA_REG(addr, val, mask) { \ + const int r = wa_add(dev_priv, (addr), (val), (mask)); \ + if (r) return r; } + +#define WA_SET_BIT_MASKED(addr, mask) WA_REG(addr, \ + _MASKED_BIT_ENABLE(mask), (mask) & 0xffff) + +#define WA_CLR_BIT_MASKED(addr, mask) WA_REG(addr, \ + _MASKED_BIT_DISABLE(mask), (mask) & 0xffff) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask) + +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff) + static int bdw_init_workarounds(struct intel_engine_cs *ring) { - int ret; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - /* - * update the number of dwords required based on the - * actual number of workarounds applied - */ - ret = intel_ring_begin(ring, 24); - if (ret) - return ret; - /* WaDisablePartialInstShootdown:bdw */ /* WaDisableThreadStallDopClockGating:bdw */ - /* FIXME: Unclear whether we really need this on production bdw. */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE - | STALL_DOP_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | + STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw May not be needed for production */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); /* * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for * pre-production hardware */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS - | GEN8_SAMPLER_POWER_BYPASS_DIS)); + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_CENTROID_PIXEL_OPT_DIS | GEN8_SAMPLER_POWER_BYPASS_DIS); - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE); - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ - intel_ring_emit_wa(ring, HDC_CHICKEN0, - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); /* Wa4x4STCOptimizationDisable:bdw */ - intel_ring_emit_wa(ring, CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE); /* * BSpec recommends 8x4 when MSAA is used, @@ -755,52 +781,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - intel_ring_emit_wa(ring, GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - intel_ring_advance(ring); - - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", - dev_priv->num_wa_regs); + WA_SET_BIT_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); return 0; } static int chv_init_workarounds(struct intel_engine_cs *ring) { - int ret; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - ret = intel_ring_begin(ring, 12); - if (ret) - return ret; - /* WaDisablePartialInstShootdown:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaDisableThreadStallDopClockGating:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:chv (pre-production hw) */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); - intel_ring_advance(ring); + return 0; +} + +static int init_workarounds_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + WARN_ON(ring->id != RCS); + + dev_priv->workarounds.count = 0; + + if (IS_BROADWELL(dev)) + return bdw_init_workarounds(ring); + + if (IS_CHERRYVIEW(dev)) + return chv_init_workarounds(ring); return 0; } @@ -864,7 +888,7 @@ static int init_render_ring(struct intel_engine_cs *ring) gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); - return ret; + return init_workarounds_ring(ring); } static void render_ring_cleanup(struct intel_engine_cs *ring) @@ -2305,10 +2329,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - if (IS_CHERRYVIEW(dev)) - ring->init_context = chv_init_workarounds; - else - ring->init_context = bdw_init_workarounds; + + ring->init_context = intel_ring_workarounds_emit; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq;