diff mbox

[v7,1/2] drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads

Message ID 1523913758-32381-1-git-send-email-yunwei.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yunwei Zhang April 16, 2018, 9:22 p.m. UTC
WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any MMIO
read into Slice/Subslice specific registers, MCR packet control
register(0xFDC) needs to be programmed to point to any enabled
slice/subslice pair. Otherwise, incorrect value will be returned.

However, that means each subsequent MMIO read will be forwarded to a
specific slice/subslice combination as read is unicast. This is OK since
slice/subslice specific register values are consistent in almost all cases
across slice/subslice. There are rare occasions such as INSTDONE that this
value will be dependent on slice/subslice combo, in such cases, we need to
program 0xFDC and recover this after. This is already covered by
read_subslice_reg.

Also, 0xFDC will lose its information after TDR/engine reset/power state
change.

References: HSD#1405586840, BSID#0575

v2:
 - use fls() instead of find_last_bit() (Chris)
 - added INTEL_SSEU to extract sseu from device info. (Chris)
v3:
 - rebase on latest tip
v5:
 - Added references (Mika)
 - Change the ordered of passing arguments and etc. (Ursulin)
v7:
 - Rebased.

Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Yunwei Zhang <yunwei.zhang@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/intel_engine_cs.c   | 30 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_workarounds.c | 12 ++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

Comments

oscar.mateo@intel.com April 16, 2018, 10:09 p.m. UTC | #1
On 04/16/2018 02:22 PM, Yunwei Zhang wrote:
> WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any MMIO
> read into Slice/Subslice specific registers, MCR packet control
> register(0xFDC) needs to be programmed to point to any enabled
> slice/subslice pair. Otherwise, incorrect value will be returned.
>
> However, that means each subsequent MMIO read will be forwarded to a
> specific slice/subslice combination as read is unicast. This is OK since
> slice/subslice specific register values are consistent in almost all cases
> across slice/subslice. There are rare occasions such as INSTDONE that this
> value will be dependent on slice/subslice combo, in such cases, we need to
> program 0xFDC and recover this after. This is already covered by
> read_subslice_reg.
>
> Also, 0xFDC will lose its information after TDR/engine reset/power state
> change.
>
> References: HSD#1405586840, BSID#0575
>
> v2:
>   - use fls() instead of find_last_bit() (Chris)
>   - added INTEL_SSEU to extract sseu from device info. (Chris)
> v3:
>   - rebase on latest tip
> v5:
>   - Added references (Mika)
>   - Change the ordered of passing arguments and etc. (Ursulin)
> v7:
>   - Rebased.
>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Yunwei Zhang <yunwei.zhang@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>   drivers/gpu/drm/i915/intel_engine_cs.c   | 30 +++++++++++++++++++++++++++---
>   drivers/gpu/drm/i915/intel_workarounds.c | 12 ++++++++++++
>   3 files changed, 41 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8e8667d..43498a47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2725,6 +2725,8 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
>   int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
>   int intel_engines_init(struct drm_i915_private *dev_priv);
>   
> +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);
> +

As a global function, this could use a better prefix (intel_something_)

Or, alternatively, make it local and store the calculation somewhere.

>   /* intel_hotplug.c */
>   void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
>   			   u32 pin_mask, u32 long_mask);
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1a83707..3b6bc5e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -799,6 +799,18 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
>   	}
>   }
>   
> +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)
> +{
> +	const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
> +	u32 slice = fls(sseu->slice_mask);
> +	u32 subslice = fls(sseu->subslice_mask[slice]);
> +
> +	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
> +	mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
> +
> +	return mcr;
> +}
> +
>   static inline uint32_t
>   read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
>   		  int subslice, i915_reg_t reg)
> @@ -831,18 +843,30 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
>   	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
>   
>   	mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
> +
>   	/*
>   	 * The HW expects the slice and sublice selectors to be reset to 0
> -	 * after reading out the registers.
> +	 * before GEN10 or to a enabled s/ss post GEN10 after reading out the
> +	 * registers.
>   	 */
> -	WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
> +	WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
> +		     (mcr & mcr_slice_subslice_mask));

Advantage of storing the calculation: you can assert here for the 
expected value, independently of the platform.

>   	mcr &= ~mcr_slice_subslice_mask;
>   	mcr |= mcr_slice_subslice_select;
>   	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
>   
>   	ret = I915_READ_FW(reg);
>   
> -	mcr &= ~mcr_slice_subslice_mask;
> +	/*
> +	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
> +	 * expects mcr to be programed to a enabled slice/subslice pair
> +	 * before any MMIO read into slice/subslice register
> +	 */
> +	if (INTEL_GEN(dev_priv) < 10)
> +		mcr &= ~mcr_slice_subslice_mask;
> +	else
> +		mcr = calculate_mcr(dev_priv, mcr);

Another advantage: no branching here either.

> +
>   	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
>   
>   	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index ec9d340..8a2354e 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -645,8 +645,20 @@ static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>   		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>   }
>   
> +static void wa_init_mcr(struct drm_i915_private *dev_priv)
> +{
> +	u32 mcr;
> +
> +	mcr = I915_READ(GEN8_MCR_SELECTOR);
> +	mcr = calculate_mcr(dev_priv, mcr);
> +	I915_WRITE(GEN8_MCR_SELECTOR, mcr);
> +}
> +
>   static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>   {
> +	/* WaProgramMgsrForCorrectSliceSpecificMmioReads: cnl */
> +	wa_init_mcr(dev_priv);
> +
>   	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
>   	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
>   		I915_WRITE(GAMT_CHKN_BIT_REG,

With one of the two above (appropriate prefix or store value), this is:

Reviewed-by: Oscar Mateo <oscar.mateo@intel.com>

And as a side note: this is also needed for Icelake.
Yunwei Zhang April 17, 2018, 3:54 p.m. UTC | #2
On 4/16/2018 3:09 PM, Oscar Mateo wrote:
>
>
> On 04/16/2018 02:22 PM, Yunwei Zhang wrote:
>> WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any 
>> MMIO
>> read into Slice/Subslice specific registers, MCR packet control
>> register(0xFDC) needs to be programmed to point to any enabled
>> slice/subslice pair. Otherwise, incorrect value will be returned.
>>
>> However, that means each subsequent MMIO read will be forwarded to a
>> specific slice/subslice combination as read is unicast. This is OK since
>> slice/subslice specific register values are consistent in almost all 
>> cases
>> across slice/subslice. There are rare occasions such as INSTDONE that 
>> this
>> value will be dependent on slice/subslice combo, in such cases, we 
>> need to
>> program 0xFDC and recover this after. This is already covered by
>> read_subslice_reg.
>>
>> Also, 0xFDC will lose its information after TDR/engine reset/power state
>> change.
>>
>> References: HSD#1405586840, BSID#0575
>>
>> v2:
>>   - use fls() instead of find_last_bit() (Chris)
>>   - added INTEL_SSEU to extract sseu from device info. (Chris)
>> v3:
>>   - rebase on latest tip
>> v5:
>>   - Added references (Mika)
>>   - Change the ordered of passing arguments and etc. (Ursulin)
>> v7:
>>   - Rebased.
>>
>> Cc: Oscar Mateo <oscar.mateo@intel.com>
>> Cc: Michel Thierry <michel.thierry@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> Signed-off-by: Yunwei Zhang <yunwei.zhang@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>   drivers/gpu/drm/i915/intel_engine_cs.c   | 30 
>> +++++++++++++++++++++++++++---
>>   drivers/gpu/drm/i915/intel_workarounds.c | 12 ++++++++++++
>>   3 files changed, 41 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 8e8667d..43498a47 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2725,6 +2725,8 @@ int vlv_force_gfx_clock(struct drm_i915_private 
>> *dev_priv, bool on);
>>   int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
>>   int intel_engines_init(struct drm_i915_private *dev_priv);
>>   +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);
>> +
>
> As a global function, this could use a better prefix (intel_something_)
>
> Or, alternatively, make it local and store the calculation somewhere.
Good suggestion, do you think intel_device_info will be a good place to 
store, it is deduced from that structure after all? Or should I put it 
in drm_i915_private?
>
>>   /* intel_hotplug.c */
>>   void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
>>                  u32 pin_mask, u32 long_mask);
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 1a83707..3b6bc5e 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -799,6 +799,18 @@ const char *i915_cache_level_str(struct 
>> drm_i915_private *i915, int type)
>>       }
>>   }
>>   +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)
>> +{
>> +    const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
>> +    u32 slice = fls(sseu->slice_mask);
>> +    u32 subslice = fls(sseu->subslice_mask[slice]);
>> +
>> +    mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
>> +    mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
>> +
>> +    return mcr;
>> +}
>> +
>>   static inline uint32_t
>>   read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
>>             int subslice, i915_reg_t reg)
>> @@ -831,18 +843,30 @@ read_subslice_reg(struct drm_i915_private 
>> *dev_priv, int slice,
>>       intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
>>         mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
>> +
>>       /*
>>        * The HW expects the slice and sublice selectors to be reset to 0
>> -     * after reading out the registers.
>> +     * before GEN10 or to a enabled s/ss post GEN10 after reading 
>> out the
>> +     * registers.
>>        */
>> -    WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
>> +    WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
>> +             (mcr & mcr_slice_subslice_mask));
>
> Advantage of storing the calculation: you can assert here for the 
> expected value, independently of the platform.
>
>>       mcr &= ~mcr_slice_subslice_mask;
>>       mcr |= mcr_slice_subslice_select;
>>       I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
>>         ret = I915_READ_FW(reg);
>>   -    mcr &= ~mcr_slice_subslice_mask;
>> +    /*
>> +     * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
>> +     * expects mcr to be programed to a enabled slice/subslice pair
>> +     * before any MMIO read into slice/subslice register
>> +     */
>> +    if (INTEL_GEN(dev_priv) < 10)
>> +        mcr &= ~mcr_slice_subslice_mask;
>> +    else
>> +        mcr = calculate_mcr(dev_priv, mcr);
>
> Another advantage: no branching here either.
>
>> +
>>       I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
>>         intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
>> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c 
>> b/drivers/gpu/drm/i915/intel_workarounds.c
>> index ec9d340..8a2354e 100644
>> --- a/drivers/gpu/drm/i915/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
>> @@ -645,8 +645,20 @@ static void cfl_gt_workarounds_apply(struct 
>> drm_i915_private *dev_priv)
>>              GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>   }
>>   +static void wa_init_mcr(struct drm_i915_private *dev_priv)
>> +{
>> +    u32 mcr;
>> +
>> +    mcr = I915_READ(GEN8_MCR_SELECTOR);
>> +    mcr = calculate_mcr(dev_priv, mcr);
>> +    I915_WRITE(GEN8_MCR_SELECTOR, mcr);
>> +}
>> +
>>   static void cnl_gt_workarounds_apply(struct drm_i915_private 
>> *dev_priv)
>>   {
>> +    /* WaProgramMgsrForCorrectSliceSpecificMmioReads: cnl */
>> +    wa_init_mcr(dev_priv);
>> +
>>       /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
>>       if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
>>           I915_WRITE(GAMT_CHKN_BIT_REG,
>
> With one of the two above (appropriate prefix or store value), this is:
>
> Reviewed-by: Oscar Mateo <oscar.mateo@intel.com>
>
> And as a side note: this is also needed for Icelake.
Will do in a separate patch, when I first floated the patch, icl was 
still in internal.

Thanks,
Yunwei
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8e8667d..43498a47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2725,6 +2725,8 @@  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
 
+u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);
+
 /* intel_hotplug.c */
 void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 			   u32 pin_mask, u32 long_mask);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a83707..3b6bc5e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -799,6 +799,18 @@  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 	}
 }
 
+u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)
+{
+	const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
+	u32 slice = fls(sseu->slice_mask);
+	u32 subslice = fls(sseu->subslice_mask[slice]);
+
+	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+	mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+
+	return mcr;
+}
+
 static inline uint32_t
 read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 		  int subslice, i915_reg_t reg)
@@ -831,18 +843,30 @@  read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
 
 	mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
+
 	/*
 	 * The HW expects the slice and sublice selectors to be reset to 0
-	 * after reading out the registers.
+	 * before GEN10 or to a enabled s/ss post GEN10 after reading out the
+	 * registers.
 	 */
-	WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+	WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
+		     (mcr & mcr_slice_subslice_mask));
 	mcr &= ~mcr_slice_subslice_mask;
 	mcr |= mcr_slice_subslice_select;
 	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 
 	ret = I915_READ_FW(reg);
 
-	mcr &= ~mcr_slice_subslice_mask;
+	/*
+	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
+	 * expects mcr to be programed to a enabled slice/subslice pair
+	 * before any MMIO read into slice/subslice register
+	 */
+	if (INTEL_GEN(dev_priv) < 10)
+		mcr &= ~mcr_slice_subslice_mask;
+	else
+		mcr = calculate_mcr(dev_priv, mcr);
+
 	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 
 	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index ec9d340..8a2354e 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -645,8 +645,20 @@  static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
+static void wa_init_mcr(struct drm_i915_private *dev_priv)
+{
+	u32 mcr;
+
+	mcr = I915_READ(GEN8_MCR_SELECTOR);
+	mcr = calculate_mcr(dev_priv, mcr);
+	I915_WRITE(GEN8_MCR_SELECTOR, mcr);
+}
+
 static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 {
+	/* WaProgramMgsrForCorrectSliceSpecificMmioReads: cnl */
+	wa_init_mcr(dev_priv);
+
 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
 	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
 		I915_WRITE(GAMT_CHKN_BIT_REG,