diff mbox

[01/22] drm/i915/icl: Introduce initial Icelake Workarounds

Message ID 1524256446-28490-2-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com April 20, 2018, 8:33 p.m. UTC
Inherit workarounds from previous platforms that are still valid for
Icelake.

v2: GEN7_ROW_CHICKEN2 is masked
v3:
  - Since it has been fixed already in upstream, removed the TODO
    comment about WA_SET_BIT for WaInPlaceDecompressionHang.
  - Squashed with this patch:
      drm/i915/icl: add icelake_init_clock_gating()
    from Paulo Zanoni <paulo.r.zanoni@intel.com>
  - Squashed with this patch:
      drm/i915/icl: WaForceEnableNonCoherent
    from Oscar Mateo <oscar.mateo@intel.com>
  - WaPushConstantDereferenceHoldDisable is now Wa_1604370585 and
    applies to B0 as well.
  - WaPipeControlBefore3DStateSamplePattern WABB was being applied
    to ICL incorrectly.
v4:
  - Wrap the commit message
  - s/dev_priv/p to please checkpatch
v5: Rebased on top of the WA refactoring
v6: Rebased on top of further whitelist registers refactoring (Michel)
v7: Added WaRsForcewakeAddDelayForAck

Cc: Tomasz Lis <tomasz.lis@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  9 +++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  4 +--
 drivers/gpu/drm/i915/i915_reg.h          |  1 +
 drivers/gpu/drm/i915/intel_lrc.c         |  2 ++
 drivers/gpu/drm/i915/intel_pm.c          |  4 ++-
 drivers/gpu/drm/i915/intel_uncore.c      |  7 +++--
 drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++++++++++
 7 files changed, 68 insertions(+), 5 deletions(-)

Comments

Mika Kuoppala April 26, 2018, 3:01 p.m. UTC | #1
Oscar Mateo <oscar.mateo@intel.com> writes:

> Inherit workarounds from previous platforms that are still valid for
> Icelake.
>
> v2: GEN7_ROW_CHICKEN2 is masked
> v3:
>   - Since it has been fixed already in upstream, removed the TODO
>     comment about WA_SET_BIT for WaInPlaceDecompressionHang.
>   - Squashed with this patch:
>       drm/i915/icl: add icelake_init_clock_gating()
>     from Paulo Zanoni <paulo.r.zanoni@intel.com>
>   - Squashed with this patch:
>       drm/i915/icl: WaForceEnableNonCoherent
>     from Oscar Mateo <oscar.mateo@intel.com>
>   - WaPushConstantDereferenceHoldDisable is now Wa_1604370585 and
>     applies to B0 as well.
>   - WaPipeControlBefore3DStateSamplePattern WABB was being applied
>     to ICL incorrectly.
> v4:
>   - Wrap the commit message
>   - s/dev_priv/p to please checkpatch
> v5: Rebased on top of the WA refactoring
> v6: Rebased on top of further whitelist registers refactoring (Michel)
> v7: Added WaRsForcewakeAddDelayForAck
>
> Cc: Tomasz Lis <tomasz.lis@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  9 +++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.c      |  4 +--
>  drivers/gpu/drm/i915/i915_reg.h          |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c         |  2 ++
>  drivers/gpu/drm/i915/intel_pm.c          |  4 ++-
>  drivers/gpu/drm/i915/intel_uncore.c      |  7 +++--
>  drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++++++++++
>  7 files changed, 68 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0286911..1dc157f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2462,6 +2462,15 @@ static inline unsigned int i915_sg_segment_size(void)
>  #define IS_CNL_REVID(p, since, until) \
>  	(IS_CANNONLAKE(p) && IS_REVID(p, since, until))
>  
> +#define ICL_REVID_A0		0x0
> +#define ICL_REVID_A2		0x1

Just noted that for some reason bspec puts A0 and A2 under
same revid. Bspec err?

> +#define ICL_REVID_B0		0x3
> +#define ICL_REVID_B2		0x4
> +#define ICL_REVID_C0		0x5
> +
> +#define IS_ICL_REVID(p, since, until) \
> +	(IS_ICELAKE(p) && IS_REVID(p, since, until))
> +
>  /*
>   * The genX designation typically refers to the render engine, so render
>   * capability related checks should use IS_GEN, while display and other checks
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 21d72f6..221b873 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2140,12 +2140,12 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
>  	 * called on driver load and after a GPU reset, so you can place
>  	 * workarounds here even if they get overwritten by GPU reset.
>  	 */
> -	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
> +	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
>  	if (IS_BROADWELL(dev_priv))
>  		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
>  	else if (IS_CHERRYVIEW(dev_priv))
>  		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
> -	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
> +	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv) || IS_GEN11(dev_priv))
>  		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
>  	else if (IS_GEN9_LP(dev_priv))
>  		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index fb10602..f2ee225 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7203,6 +7203,7 @@ enum {
>  /* GEN8 chicken */
>  #define HDC_CHICKEN0				_MMIO(0x7300)
>  #define CNL_HDC_CHICKEN0			_MMIO(0xE5F0)
> +#define ICL_HDC_CHICKEN0			_MMIO(0xE5F4)
>  #define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
>  #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
>  #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 029901a..2d6572a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1636,6 +1636,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
>  		return -EINVAL;
>  
>  	switch (INTEL_GEN(engine->i915)) {
> +	case 11:
> +		return 0;
>  	case 10:
>  		wa_bb_fn[0] = gen10_init_indirectctx_bb;
>  		wa_bb_fn[1] = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 4baab85..3b7d804 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -9123,7 +9123,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
>   */
>  void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
>  {
> -	if (IS_CANNONLAKE(dev_priv))
> +	if (IS_ICELAKE(dev_priv))
> +		dev_priv->display.init_clock_gating = nop_init_clock_gating;
> +	else if (IS_CANNONLAKE(dev_priv))
>  		dev_priv->display.init_clock_gating = cnl_init_clock_gating;
>  	else if (IS_COFFEELAKE(dev_priv))
>  		dev_priv->display.init_clock_gating = cfl_init_clock_gating;
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index d6e20f0..448293e 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -139,7 +139,9 @@ enum ack_type {
>  	 * in the hope that the original ack will be delivered along with
>  	 * the fallback ack.
>  	 *
> -	 * This workaround is described in HSDES #1604254524
> +	 * This workaround is described in HSDES #1604254524 and it's known as:
> +	 * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl
> +	 * although the name is a bit misleading.

Just for the record:

When I implemented this there was recommendation to do both, delaying
for ack and then this method of using a reserver bit. My interpretation
was that the delay was used as a first weapon to combat the issue. And
then later, reserve bit method appeared.

I did not use WaRsForcewakeAddDelayForAck as I thought that this will be
named differently. And also I think this method is a superset,
making delaying irrelevant. As we fallback to reserve is we miss ack
so no need to delay before polling. And adding delay to hotpath should
be the last resort anyways.

I think this is the evolution of WaRsForcewakeAddDelayForAck
(v2) and there is no better name, we should keep it.

>  	 */
>  
>  	pass = 1;
> @@ -1394,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>  	if (INTEL_GEN(dev_priv) >= 11) {
>  		int i;
>  
> -		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
> +		dev_priv->uncore.funcs.force_wake_get =
> +			fw_domains_get_with_fallback;
>  		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>  		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>  			       FORCEWAKE_RENDER_GEN9,
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index ec9d340..3f00623 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -441,6 +441,27 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> +{
> +	/* Wa_1604370585:icl (pre-prod)
> +	 * Formerly known as WaPushConstantDereferenceHoldDisable
> +	 */
> +	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
> +		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
> +				  PUSH_CONSTANT_DEREF_DISABLE);

Inherited from CNL and had to check if we have that on cnl. We do.

> +
> +	/* WaForceEnableNonCoherent:icl
> +	 * This is not the same workaround as in early Gen9 platforms, where
> +	 * lacking this could cause system hangs, but coherency performance
> +	 * overhead is high and only a few compute workloads really need it
> +	 * (the register is whitelisted in hardware now, so UMDs can opt in
> +	 * for coherency if they have a good reason).
> +	 */
> +	WA_SET_BIT_MASKED(ICL_HDC_CHICKEN0, HDC_FORCE_NON_COHERENT);

Right, but the register name should be ICL_HDC_MODE.

> +
> +	return 0;
> +}
> +
>  int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>  {
>  	int err = 0;
> @@ -465,6 +486,8 @@ int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>  		err = cfl_ctx_workarounds_init(dev_priv);
>  	else if (IS_CANNONLAKE(dev_priv))
>  		err = cnl_ctx_workarounds_init(dev_priv);
> +	else if (IS_ICELAKE(dev_priv))
> +		err = icl_ctx_workarounds_init(dev_priv);
>  	else
>  		MISSING_CASE(INTEL_GEN(dev_priv));
>  	if (err)
> @@ -663,6 +686,21 @@ static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>  		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
>  }
>  
> +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> +	/* This is not an Wa. Enable for better image quality */
> +	I915_WRITE(_3D_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
> +
> +	/* WaInPlaceDecompressionHang:icl */
> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> +					     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> +
> +	/* WaPipelineFlushCoherentLines:icl */
> +	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
> +				    GEN8_LQSC_FLUSH_COHERENT_LINES));

Didn't find a HSDES entry for this. The workaround name and the reg/bit
matches tho.

But the real question in here is that do we need to set this through
indirect bb like we do with gen[8,9].

And just to note that cnl is missing this too. But that can be done
as a followup when we first figure out that should we use the indirect
bb for all >= gen8.

-Mika

> +}
> +
>  void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>  {
>  	if (INTEL_GEN(dev_priv) < 8)
> @@ -683,6 +721,8 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>  		cfl_gt_workarounds_apply(dev_priv);
>  	else if (IS_CANNONLAKE(dev_priv))
>  		cnl_gt_workarounds_apply(dev_priv);
> +	else if (IS_ICELAKE(dev_priv))
> +		icl_gt_workarounds_apply(dev_priv);
>  	else
>  		MISSING_CASE(INTEL_GEN(dev_priv));
>  }
> @@ -761,6 +801,10 @@ static void cnl_whitelist_build(struct whitelist *w)
>  	whitelist_reg(w, GEN8_CS_CHICKEN1);
>  }
>  
> +static void icl_whitelist_build(struct whitelist *w)
> +{
> +}
> +
>  static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
>  					 struct whitelist *w)
>  {
> @@ -789,6 +833,8 @@ static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
>  		cfl_whitelist_build(w);
>  	else if (IS_CANNONLAKE(i915))
>  		cnl_whitelist_build(w);
> +	else if (IS_ICELAKE(i915))
> +		icl_whitelist_build(w);
>  	else
>  		MISSING_CASE(INTEL_GEN(i915));
>  
> -- 
> 1.9.1
oscar.mateo@intel.com April 30, 2018, 5:12 p.m. UTC | #2
On 04/26/2018 08:01 AM, Mika Kuoppala wrote:
> Oscar Mateo <oscar.mateo@intel.com> writes:
>
>> Inherit workarounds from previous platforms that are still valid for
>> Icelake.
>>
>> v2: GEN7_ROW_CHICKEN2 is masked
>> v3:
>>    - Since it has been fixed already in upstream, removed the TODO
>>      comment about WA_SET_BIT for WaInPlaceDecompressionHang.
>>    - Squashed with this patch:
>>        drm/i915/icl: add icelake_init_clock_gating()
>>      from Paulo Zanoni <paulo.r.zanoni@intel.com>
>>    - Squashed with this patch:
>>        drm/i915/icl: WaForceEnableNonCoherent
>>      from Oscar Mateo <oscar.mateo@intel.com>
>>    - WaPushConstantDereferenceHoldDisable is now Wa_1604370585 and
>>      applies to B0 as well.
>>    - WaPipeControlBefore3DStateSamplePattern WABB was being applied
>>      to ICL incorrectly.
>> v4:
>>    - Wrap the commit message
>>    - s/dev_priv/p to please checkpatch
>> v5: Rebased on top of the WA refactoring
>> v6: Rebased on top of further whitelist registers refactoring (Michel)
>> v7: Added WaRsForcewakeAddDelayForAck
>>
>> Cc: Tomasz Lis <tomasz.lis@intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |  9 +++++++
>>   drivers/gpu/drm/i915/i915_gem_gtt.c      |  4 +--
>>   drivers/gpu/drm/i915/i915_reg.h          |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         |  2 ++
>>   drivers/gpu/drm/i915/intel_pm.c          |  4 ++-
>>   drivers/gpu/drm/i915/intel_uncore.c      |  7 +++--
>>   drivers/gpu/drm/i915/intel_workarounds.c | 46 ++++++++++++++++++++++++++++++++
>>   7 files changed, 68 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 0286911..1dc157f 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2462,6 +2462,15 @@ static inline unsigned int i915_sg_segment_size(void)
>>   #define IS_CNL_REVID(p, since, until) \
>>   	(IS_CANNONLAKE(p) && IS_REVID(p, since, until))
>>   
>> +#define ICL_REVID_A0		0x0
>> +#define ICL_REVID_A2		0x1
> Just noted that for some reason bspec puts A0 and A2 under
> same revid. Bspec err?

That's what I hope. I have opened a bug against the BSpec to be 100% sure.

>> +#define ICL_REVID_B0		0x3
>> +#define ICL_REVID_B2		0x4
>> +#define ICL_REVID_C0		0x5
>> +
>> +#define IS_ICL_REVID(p, since, until) \
>> +	(IS_ICELAKE(p) && IS_REVID(p, since, until))
>> +
>>   /*
>>    * The genX designation typically refers to the render engine, so render
>>    * capability related checks should use IS_GEN, while display and other checks
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 21d72f6..221b873 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -2140,12 +2140,12 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
>>   	 * called on driver load and after a GPU reset, so you can place
>>   	 * workarounds here even if they get overwritten by GPU reset.
>>   	 */
>> -	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
>> +	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
>>   	if (IS_BROADWELL(dev_priv))
>>   		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
>>   	else if (IS_CHERRYVIEW(dev_priv))
>>   		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
>> -	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
>> +	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv) || IS_GEN11(dev_priv))
>>   		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
>>   	else if (IS_GEN9_LP(dev_priv))
>>   		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index fb10602..f2ee225 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7203,6 +7203,7 @@ enum {
>>   /* GEN8 chicken */
>>   #define HDC_CHICKEN0				_MMIO(0x7300)
>>   #define CNL_HDC_CHICKEN0			_MMIO(0xE5F0)
>> +#define ICL_HDC_CHICKEN0			_MMIO(0xE5F4)
>>   #define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
>>   #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
>>   #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 029901a..2d6572a 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1636,6 +1636,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
>>   		return -EINVAL;
>>   
>>   	switch (INTEL_GEN(engine->i915)) {
>> +	case 11:
>> +		return 0;
>>   	case 10:
>>   		wa_bb_fn[0] = gen10_init_indirectctx_bb;
>>   		wa_bb_fn[1] = NULL;
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 4baab85..3b7d804 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -9123,7 +9123,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
>>    */
>>   void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
>>   {
>> -	if (IS_CANNONLAKE(dev_priv))
>> +	if (IS_ICELAKE(dev_priv))
>> +		dev_priv->display.init_clock_gating = nop_init_clock_gating;
>> +	else if (IS_CANNONLAKE(dev_priv))
>>   		dev_priv->display.init_clock_gating = cnl_init_clock_gating;
>>   	else if (IS_COFFEELAKE(dev_priv))
>>   		dev_priv->display.init_clock_gating = cfl_init_clock_gating;
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index d6e20f0..448293e 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -139,7 +139,9 @@ enum ack_type {
>>   	 * in the hope that the original ack will be delivered along with
>>   	 * the fallback ack.
>>   	 *
>> -	 * This workaround is described in HSDES #1604254524
>> +	 * This workaround is described in HSDES #1604254524 and it's known as:
>> +	 * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl
>> +	 * although the name is a bit misleading.
> Just for the record:
>
> When I implemented this there was recommendation to do both, delaying
> for ack and then this method of using a reserver bit. My interpretation
> was that the delay was used as a first weapon to combat the issue. And
> then later, reserve bit method appeared.
>
> I did not use WaRsForcewakeAddDelayForAck as I thought that this will be
> named differently. And also I think this method is a superset,
> making delaying irrelevant. As we fallback to reserve is we miss ack
> so no need to delay before polling. And adding delay to hotpath should
> be the last resort anyways.
>
> I think this is the evolution of WaRsForcewakeAddDelayForAck
> (v2) and there is no better name, we should keep it.

Yes, I read the mailing lists comments about this. AFAICT, there is no 
better name.

>>   	 */
>>   
>>   	pass = 1;
>> @@ -1394,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>>   	if (INTEL_GEN(dev_priv) >= 11) {
>>   		int i;
>>   
>> -		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
>> +		dev_priv->uncore.funcs.force_wake_get =
>> +			fw_domains_get_with_fallback;
>>   		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>>   		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>>   			       FORCEWAKE_RENDER_GEN9,
>> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
>> index ec9d340..3f00623 100644
>> --- a/drivers/gpu/drm/i915/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
>> @@ -441,6 +441,27 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>>   	return 0;
>>   }
>>   
>> +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>> +{
>> +	/* Wa_1604370585:icl (pre-prod)
>> +	 * Formerly known as WaPushConstantDereferenceHoldDisable
>> +	 */
>> +	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
>> +		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
>> +				  PUSH_CONSTANT_DEREF_DISABLE);
> Inherited from CNL and had to check if we have that on cnl. We do.
>
>> +
>> +	/* WaForceEnableNonCoherent:icl
>> +	 * This is not the same workaround as in early Gen9 platforms, where
>> +	 * lacking this could cause system hangs, but coherency performance
>> +	 * overhead is high and only a few compute workloads really need it
>> +	 * (the register is whitelisted in hardware now, so UMDs can opt in
>> +	 * for coherency if they have a good reason).
>> +	 */
>> +	WA_SET_BIT_MASKED(ICL_HDC_CHICKEN0, HDC_FORCE_NON_COHERENT);
> Right, but the register name should be ICL_HDC_MODE.

ACK

>> +
>> +	return 0;
>> +}
>> +
>>   int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>>   {
>>   	int err = 0;
>> @@ -465,6 +486,8 @@ int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
>>   		err = cfl_ctx_workarounds_init(dev_priv);
>>   	else if (IS_CANNONLAKE(dev_priv))
>>   		err = cnl_ctx_workarounds_init(dev_priv);
>> +	else if (IS_ICELAKE(dev_priv))
>> +		err = icl_ctx_workarounds_init(dev_priv);
>>   	else
>>   		MISSING_CASE(INTEL_GEN(dev_priv));
>>   	if (err)
>> @@ -663,6 +686,21 @@ static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>>   		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
>>   }
>>   
>> +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>> +{
>> +	/* This is not an Wa. Enable for better image quality */
>> +	I915_WRITE(_3D_CHICKEN3,
>> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
>> +
>> +	/* WaInPlaceDecompressionHang:icl */
>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> +					     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
>> +
>> +	/* WaPipelineFlushCoherentLines:icl */
>> +	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
>> +				    GEN8_LQSC_FLUSH_COHERENT_LINES));
> Didn't find a HSDES entry for this. The workaround name and the reg/bit
> matches tho.
>
> But the real question in here is that do we need to set this through
> indirect bb like we do with gen[8,9].
>
> And just to note that cnl is missing this too. But that can be done
> as a followup when we first figure out that should we use the indirect
> bb for all >= gen8.
>
> -Mika

Hmmmm... looking at it more carefully: isn't that a slightly different 
WA: WaPipelineFlushCoherentLines (not needed starting CNL)?
This is equivalent to a different WA: WaOCLCoherentLineFlush.
But indeed, it seems to be required for CNL as well. In fact, I'm pretty 
sure I added it to this patch because I saw it in CNL. Did it get lost?

>> +}
>> +
>>   void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>>   {
>>   	if (INTEL_GEN(dev_priv) < 8)
>> @@ -683,6 +721,8 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>>   		cfl_gt_workarounds_apply(dev_priv);
>>   	else if (IS_CANNONLAKE(dev_priv))
>>   		cnl_gt_workarounds_apply(dev_priv);
>> +	else if (IS_ICELAKE(dev_priv))
>> +		icl_gt_workarounds_apply(dev_priv);
>>   	else
>>   		MISSING_CASE(INTEL_GEN(dev_priv));
>>   }
>> @@ -761,6 +801,10 @@ static void cnl_whitelist_build(struct whitelist *w)
>>   	whitelist_reg(w, GEN8_CS_CHICKEN1);
>>   }
>>   
>> +static void icl_whitelist_build(struct whitelist *w)
>> +{
>> +}
>> +
>>   static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
>>   					 struct whitelist *w)
>>   {
>> @@ -789,6 +833,8 @@ static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
>>   		cfl_whitelist_build(w);
>>   	else if (IS_CANNONLAKE(i915))
>>   		cnl_whitelist_build(w);
>> +	else if (IS_ICELAKE(i915))
>> +		icl_whitelist_build(w);
>>   	else
>>   		MISSING_CASE(INTEL_GEN(i915));
>>   
>> -- 
>> 1.9.1
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0286911..1dc157f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2462,6 +2462,15 @@  static inline unsigned int i915_sg_segment_size(void)
 #define IS_CNL_REVID(p, since, until) \
 	(IS_CANNONLAKE(p) && IS_REVID(p, since, until))
 
+#define ICL_REVID_A0		0x0
+#define ICL_REVID_A2		0x1
+#define ICL_REVID_B0		0x3
+#define ICL_REVID_B2		0x4
+#define ICL_REVID_C0		0x5
+
+#define IS_ICL_REVID(p, since, until) \
+	(IS_ICELAKE(p) && IS_REVID(p, since, until))
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 21d72f6..221b873 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2140,12 +2140,12 @@  static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
 	 * called on driver load and after a GPU reset, so you can place
 	 * workarounds here even if they get overwritten by GPU reset.
 	 */
-	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
+	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
 	if (IS_BROADWELL(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
 	else if (IS_CHERRYVIEW(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
-	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
+	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv) || IS_GEN11(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
 	else if (IS_GEN9_LP(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fb10602..f2ee225 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7203,6 +7203,7 @@  enum {
 /* GEN8 chicken */
 #define HDC_CHICKEN0				_MMIO(0x7300)
 #define CNL_HDC_CHICKEN0			_MMIO(0xE5F0)
+#define ICL_HDC_CHICKEN0			_MMIO(0xE5F4)
 #define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
 #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 029901a..2d6572a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1636,6 +1636,8 @@  static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 		return -EINVAL;
 
 	switch (INTEL_GEN(engine->i915)) {
+	case 11:
+		return 0;
 	case 10:
 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
 		wa_bb_fn[1] = NULL;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4baab85..3b7d804 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9123,7 +9123,9 @@  static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
  */
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv))
+		dev_priv->display.init_clock_gating = nop_init_clock_gating;
+	else if (IS_CANNONLAKE(dev_priv))
 		dev_priv->display.init_clock_gating = cnl_init_clock_gating;
 	else if (IS_COFFEELAKE(dev_priv))
 		dev_priv->display.init_clock_gating = cfl_init_clock_gating;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index d6e20f0..448293e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -139,7 +139,9 @@  enum ack_type {
 	 * in the hope that the original ack will be delivered along with
 	 * the fallback ack.
 	 *
-	 * This workaround is described in HSDES #1604254524
+	 * This workaround is described in HSDES #1604254524 and it's known as:
+	 * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl
+	 * although the name is a bit misleading.
 	 */
 
 	pass = 1;
@@ -1394,7 +1396,8 @@  static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	if (INTEL_GEN(dev_priv) >= 11) {
 		int i;
 
-		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
+		dev_priv->uncore.funcs.force_wake_get =
+			fw_domains_get_with_fallback;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_RENDER_GEN9,
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index ec9d340..3f00623 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -441,6 +441,27 @@  static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	/* Wa_1604370585:icl (pre-prod)
+	 * Formerly known as WaPushConstantDereferenceHoldDisable
+	 */
+	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
+		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+				  PUSH_CONSTANT_DEREF_DISABLE);
+
+	/* WaForceEnableNonCoherent:icl
+	 * This is not the same workaround as in early Gen9 platforms, where
+	 * lacking this could cause system hangs, but coherency performance
+	 * overhead is high and only a few compute workloads really need it
+	 * (the register is whitelisted in hardware now, so UMDs can opt in
+	 * for coherency if they have a good reason).
+	 */
+	WA_SET_BIT_MASKED(ICL_HDC_CHICKEN0, HDC_FORCE_NON_COHERENT);
+
+	return 0;
+}
+
 int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
 {
 	int err = 0;
@@ -465,6 +486,8 @@  int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
 		err = cfl_ctx_workarounds_init(dev_priv);
 	else if (IS_CANNONLAKE(dev_priv))
 		err = cnl_ctx_workarounds_init(dev_priv);
+	else if (IS_ICELAKE(dev_priv))
+		err = icl_ctx_workarounds_init(dev_priv);
 	else
 		MISSING_CASE(INTEL_GEN(dev_priv));
 	if (err)
@@ -663,6 +686,21 @@  static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
 }
 
+static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	/* This is not an Wa. Enable for better image quality */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
+
+	/* WaInPlaceDecompressionHang:icl */
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+					     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
+
+	/* WaPipelineFlushCoherentLines:icl */
+	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
+				    GEN8_LQSC_FLUSH_COHERENT_LINES));
+}
+
 void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 {
 	if (INTEL_GEN(dev_priv) < 8)
@@ -683,6 +721,8 @@  void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 		cfl_gt_workarounds_apply(dev_priv);
 	else if (IS_CANNONLAKE(dev_priv))
 		cnl_gt_workarounds_apply(dev_priv);
+	else if (IS_ICELAKE(dev_priv))
+		icl_gt_workarounds_apply(dev_priv);
 	else
 		MISSING_CASE(INTEL_GEN(dev_priv));
 }
@@ -761,6 +801,10 @@  static void cnl_whitelist_build(struct whitelist *w)
 	whitelist_reg(w, GEN8_CS_CHICKEN1);
 }
 
+static void icl_whitelist_build(struct whitelist *w)
+{
+}
+
 static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
 					 struct whitelist *w)
 {
@@ -789,6 +833,8 @@  static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
 		cfl_whitelist_build(w);
 	else if (IS_CANNONLAKE(i915))
 		cnl_whitelist_build(w);
+	else if (IS_ICELAKE(i915))
+		icl_whitelist_build(w);
 	else
 		MISSING_CASE(INTEL_GEN(i915));