[1/2] drm/i915/bdw: Apply workarounds using the golden render state

Message ID	1408544358-26735-2-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Arun Siluvery <arun.siluvery@linux.intel.com> To: intel-gfx@lists.freedesktop.org Date: Wed, 20 Aug 2014 15:19:17 +0100 Message-Id: <1408544358-26735-2-git-send-email-arun.siluvery@linux.intel.com> In-Reply-To: <1408544358-26735-1-git-send-email-arun.siluvery@linux.intel.com> References: <1408544358-26735-1-git-send-email-arun.siluvery@linux.intel.com> Subject: [Intel-gfx] [PATCH 1/2] drm/i915/bdw: Apply workarounds using the golden render state Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c8f744c..bcae3dc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5507,101 +5507,52 @@ static void gen8_init_clock_gating(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe; I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); /* FIXME(BDW): Check all the w/a, some might only apply to * pre-production hw. */ - /* WaDisablePartialInstShootdown:bdw */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - - /* WaDisableThreadStallDopClockGating:bdw */ - /* FIXME: Unclear whether we really need this on production bdw. */ - I915_WRITE(GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - - /* - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for - * pre-production hardware - */ - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); - I915_WRITE(HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); - I915_WRITE(COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); - - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); - - /* WaDisableDopClockGating:bdw May not be needed for production */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); /* WaPsrDPAMaskVBlankInSRD:bdw */ I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ for_each_pipe(pipe) { I915_WRITE(CHICKEN_PIPESL_1(pipe), I915_READ(CHICKEN_PIPESL_1(pipe)) | BDW_DPRS_MASK_VBLANK_SRD); } - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - I915_WRITE(HDC_CHICKEN0, - I915_READ(HDC_CHICKEN0) | - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); - /* WaVSRefCountFullforceMissDisable:bdw */ /* WaDSRefCountFullforceMissDisable:bdw */ I915_WRITE(GEN7_FF_THREAD_MODE, I915_READ(GEN7_FF_THREAD_MODE) & ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); /* WaDisableSDEUnitClockGating:bdw */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); } static void haswell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; ilk_init_lp_watermarks(dev); /* L3 caching of data atomics doesn't work -- disable it. */ I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c index 75ef1b5d..617be0f 100644 --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c @@ -1,21 +1,78 @@ #include "intel_renderstate.h" static const u32 gen8_null_state_relocs[] = { - 0x00000048, - 0x00000050, - 0x00000060, - 0x000003ec, + 0x000000a8, + 0x000000b0, + 0x000000c0, + 0x0000044c, -1, }; static const u32 gen8_null_state_batch[] = { + 0x11000001, /* Apply workarounds - start */ + /* GEN8_ROW_CHICKEN + * WaDisablePartialInstShootdown:bdw + * WaDisableThreadStallDopClockGating:bdw + */ + 0x0000e4f0, + 0x83208320, + 0x11000001, + /* GEN7_ROW_CHICKEN2 + * WaDisableDopClockGating:bdw, may not be needed for production. + */ + 0x0000e4f4, + 0x00010001, + 0x11000001, + /* HALF_SLICE_CHICKEN3 + * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for + * pre-production hardware + */ + 0x0000e184, + 0x01020102, + 0x11000001, + /* GEN7_HALF_SLICE_CHICKEN1 + * Wa: GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE + */ + 0x0000e100, + 0x04000400, + 0x11000001, + /* COMMON_SLICE_CHICKEN2 + * Wa: GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE + */ + 0x00007014, + 0x00010001, + 0x11000001, + /* HDC_CHICKEN0 + * Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + 0x00007300, + 0x00100010, + 0x11000001, + /* CACHE_MODE_1 + * Wa4x4STCOptimizationDisable:bdw + */ + 0x00007004, + 0x00400040, + 0x11000001, + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + 0x00007008, + 0x02800200, /* Apply workarounds - end */ 0x69040000, 0x61020001, 0x00000000, 0x00000000, 0x79120000, 0x00000000, 0x79130000, 0x00000000, 0x79140000, 0x00000000, @@ -33,35 +90,35 @@ static const u32 gen8_null_state_batch[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000001, /* reloc */ 0x00000000, 0xfffff001, 0x00001001, 0xfffff001, 0x00001001, 0x78230000, - 0x000006e0, + 0x00000720, 0x78210000, - 0x00000700, + 0x00000740, 0x78300000, 0x08010040, 0x78330000, 0x08000000, 0x78310000, 0x08000000, 0x78320000, 0x08000000, 0x78240000, - 0x00000641, + 0x00000681, 0x780e0000, - 0x00000601, + 0x00000641, 0x780d0000, 0x00000000, 0x78180000, 0x00000001, 0x78520003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x78190009, @@ -192,54 +249,54 @@ static const u32 gen8_null_state_batch[] = { 0x78500003, 0x00210000, 0x00000000, 0x00000000, 0x00000000, 0x78130002, 0x00000000, 0x00000000, 0x00000000, 0x782a0000, - 0x00000480, + 0x000004c0, 0x782f0000, - 0x00000540, + 0x00000580, 0x78140000, 0x00000800, 0x78170009, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x7820000a, - 0x00000580, + 0x000005c0, 0x00000000, 0x08080000, 0x00000000, 0x00000000, 0x1f000002, 0x00060000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x784d0000, 0x40000000, 0x784f0000, 0x80000100, 0x780f0000, - 0x00000740, + 0x00000780, 0x78050006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x78070003, 0x00000000, @@ -253,21 +310,21 @@ static const u32 gen8_null_state_batch[] = { 0x00000000, 0x78040001, 0x00000000, 0x00000001, 0x79000002, 0xffffffff, 0x00000000, 0x00000000, 0x78080003, 0x00006000, - 0x000005e0, /* reloc */ + 0x00000620, /* reloc */ 0x00000000, 0x00000000, 0x78090005, 0x02000000, 0x22220000, 0x02f60000, 0x11230000, 0x02850004, 0x11230000, 0x784b0000, @@ -282,30 +339,22 @@ static const u32 gen8_null_state_batch[] = { 0x00000001, 0x00000000, 0x00000000, 0x05000000, /* cmds end */ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x000004c0, /* state start */ - 0x00000500, + 0x00000500, /* state start */ + 0x00000540, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

[1/2] drm/i915/bdw: Apply workarounds using the golden render state

Commit Message

Comments

Patch