[SKL-DMC-BUGFIX,1/5] drm/i915/gen9: Removed byte swapping for csr firmware
diff mbox

Message ID DFCE9515A6E7D340BE366A94B943DE1310C9CBE9@BGSMSX103.gar.corp.intel.com
State New
Headers show

Commit Message

vathsala nagaraju Aug. 4, 2015, 3:46 a.m. UTC
"This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.

-----Original Message-----
From: Manna, Animesh 
Sent: Monday, August 3, 2015 9:56 PM
To: intel-gfx@lists.freedesktop.org
Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware

This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.

v1: Initial version.

v2: Corrected firmware size during memcpy(). (Suggested by Sunil)

Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Damien Lespiau <damien.lespiau@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
 2 files changed, 5 insertions(+), 13 deletions(-)

 	for (i = 0; i < fw_size; i++)
 		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
-			(u32 __force)payload[i]);
+			payload[i]);
 
 	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
 		I915_WRITE(dev_priv->csr.mmioaddr[i],
@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
 	char substepping = intel_get_substepping(dev);
 	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
 	uint32_t i;
-	__be32 *dmc_payload;
+	uint32_t *dmc_payload;
 	bool fw_loaded = false;
 
 	if (!fw) {
@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
 	}
 
 	dmc_payload = csr->dmc_payload;
-	for (i = 0; i < dmc_header->fw_size; i++) {
-		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
-		/*
-		 * The firmware payload is an array of 32 bit words stored in
-		 * little-endian format in the firmware image and programmed
-		 * as 32 bit big-endian format to memory.
-		 */
-		dmc_payload[i] = cpu_to_be32(*tmp);
-	}
+	memcpy(dmc_payload, &fw->data[readcount], nbytes);
 
 	/* load csr program during system boot, as needed for DC states */
 	intel_csr_load_program(dev);
--
2.0.2

Comments

Manna, Animesh Aug. 4, 2015, 5:55 a.m. UTC | #1
On 8/4/2015 9:16 AM, Nagaraju, Vathsala wrote:
> "This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
> Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.

Packaging of firmware binary completely changed after api version 1.0, so by old firmware I want to mean
the initial version before dmc 1.0.

>
> -----Original Message-----
> From: Manna, Animesh
> Sent: Monday, August 3, 2015 9:56 PM
> To: intel-gfx@lists.freedesktop.org
> Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
> Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware
>
> This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
> While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.
>
> v1: Initial version.
>
> v2: Corrected firmware size during memcpy(). (Suggested by Sunil)
>
> Cc: Daniel Vetter <daniel.vetter@intel.com>
> Cc: Damien Lespiau <damien.lespiau@intel.com>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Sunil Kamath <sunil.kamath@intel.com>
> Signed-off-by: Animesh Manna <animesh.manna@intel.com>
> Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
>   2 files changed, 5 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -742,7 +742,7 @@ enum csr_state {
>   
>   struct intel_csr {
>   	const char *fw_path;
> -	__be32 *dmc_payload;
> +	uint32_t *dmc_payload;
>   	uint32_t dmc_fw_size;
>   	uint32_t mmio_count;
>   	uint32_t mmioaddr[8];
> diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
> index 6d8a7bf..ba1ae03 100644
> --- a/drivers/gpu/drm/i915/intel_csr.c
> +++ b/drivers/gpu/drm/i915/intel_csr.c
> @@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> -	__be32 *payload = dev_priv->csr.dmc_payload;
> +	u32 *payload = dev_priv->csr.dmc_payload;
>   	uint32_t i, fw_size;
>   
>   	if (!IS_GEN9(dev)) {
> @@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
>   	fw_size = dev_priv->csr.dmc_fw_size;
>   	for (i = 0; i < fw_size; i++)
>   		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
> -			(u32 __force)payload[i]);
> +			payload[i]);
>   
>   	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
>   		I915_WRITE(dev_priv->csr.mmioaddr[i],
> @@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>   	char substepping = intel_get_substepping(dev);
>   	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
>   	uint32_t i;
> -	__be32 *dmc_payload;
> +	uint32_t *dmc_payload;
>   	bool fw_loaded = false;
>   
>   	if (!fw) {
> @@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>   	}
>   
>   	dmc_payload = csr->dmc_payload;
> -	for (i = 0; i < dmc_header->fw_size; i++) {
> -		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
> -		/*
> -		 * The firmware payload is an array of 32 bit words stored in
> -		 * little-endian format in the firmware image and programmed
> -		 * as 32 bit big-endian format to memory.
> -		 */
> -		dmc_payload[i] = cpu_to_be32(*tmp);
> -	}
> +	memcpy(dmc_payload, &fw->data[readcount], nbytes);
>   
>   	/* load csr program during system boot, as needed for DC states */
>   	intel_csr_load_program(dev);
> --
> 2.0.2
>
Daniel Vetter Aug. 5, 2015, 9:01 a.m. UTC | #2
On Tue, Aug 04, 2015 at 11:25:40AM +0530, Animesh Manna wrote:
> 
> 
> On 8/4/2015 9:16 AM, Nagaraju, Vathsala wrote:
> >"This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
> >Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.
> 
> Packaging of firmware binary completely changed after api version 1.0, so by old firmware I want to mean
> the initial version before dmc 1.0.

This kind of information really must be included in the commit message.
Very likely someone with old firmware will bisect to this commit, and if
you don't include that people need latest dmc firmware there will be
confusion.

Commit message _must_ be complete and contain everything that was
discussed while reviewing and developing a patch.

I added a note while merging the patch.
-Daniel

> 
> >
> >-----Original Message-----
> >From: Manna, Animesh
> >Sent: Monday, August 3, 2015 9:56 PM
> >To: intel-gfx@lists.freedesktop.org
> >Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
> >Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware
> >
> >This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
> >While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.
> >
> >v1: Initial version.
> >
> >v2: Corrected firmware size during memcpy(). (Suggested by Sunil)
> >
> >Cc: Daniel Vetter <daniel.vetter@intel.com>
> >Cc: Damien Lespiau <damien.lespiau@intel.com>
> >Cc: Imre Deak <imre.deak@intel.com>
> >Cc: Sunil Kamath <sunil.kamath@intel.com>
> >Signed-off-by: Animesh Manna <animesh.manna@intel.com>
> >Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
> >  2 files changed, 5 insertions(+), 13 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -742,7 +742,7 @@ enum csr_state {
> >  struct intel_csr {
> >  	const char *fw_path;
> >-	__be32 *dmc_payload;
> >+	uint32_t *dmc_payload;
> >  	uint32_t dmc_fw_size;
> >  	uint32_t mmio_count;
> >  	uint32_t mmioaddr[8];
> >diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
> >index 6d8a7bf..ba1ae03 100644
> >--- a/drivers/gpu/drm/i915/intel_csr.c
> >+++ b/drivers/gpu/drm/i915/intel_csr.c
> >@@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >-	__be32 *payload = dev_priv->csr.dmc_payload;
> >+	u32 *payload = dev_priv->csr.dmc_payload;
> >  	uint32_t i, fw_size;
> >  	if (!IS_GEN9(dev)) {
> >@@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
> >  	fw_size = dev_priv->csr.dmc_fw_size;
> >  	for (i = 0; i < fw_size; i++)
> >  		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
> >-			(u32 __force)payload[i]);
> >+			payload[i]);
> >  	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
> >  		I915_WRITE(dev_priv->csr.mmioaddr[i],
> >@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
> >  	char substepping = intel_get_substepping(dev);
> >  	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
> >  	uint32_t i;
> >-	__be32 *dmc_payload;
> >+	uint32_t *dmc_payload;
> >  	bool fw_loaded = false;
> >  	if (!fw) {
> >@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
> >  	}
> >  	dmc_payload = csr->dmc_payload;
> >-	for (i = 0; i < dmc_header->fw_size; i++) {
> >-		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
> >-		/*
> >-		 * The firmware payload is an array of 32 bit words stored in
> >-		 * little-endian format in the firmware image and programmed
> >-		 * as 32 bit big-endian format to memory.
> >-		 */
> >-		dmc_payload[i] = cpu_to_be32(*tmp);
> >-	}
> >+	memcpy(dmc_payload, &fw->data[readcount], nbytes);
> >  	/* load csr program during system boot, as needed for DC states */
> >  	intel_csr_load_program(dev);
> >--
> >2.0.2
> >
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Manna, Animesh Aug. 6, 2015, 9:20 a.m. UTC | #3
On 8/5/2015 2:31 PM, Daniel Vetter wrote:
> On Tue, Aug 04, 2015 at 11:25:40AM +0530, Animesh Manna wrote:
>>
>> On 8/4/2015 9:16 AM, Nagaraju, Vathsala wrote:
>>> "This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
>>> Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.
>> Packaging of firmware binary completely changed after api version 1.0, so by old firmware I want to mean
>> the initial version before dmc 1.0.
> This kind of information really must be included in the commit message.
> Very likely someone with old firmware will bisect to this commit, and if
> you don't include that people need latest dmc firmware there will be
> confusion.
>
> Commit message _must_ be complete and contain everything that was
> discussed while reviewing and developing a patch.
>
> I added a note while merging the patch.
> -Daniel

Thanks Daniel. Sure, will follow your suggestion in future.

-Animesh


>>> -----Original Message-----
>>> From: Manna, Animesh
>>> Sent: Monday, August 3, 2015 9:56 PM
>>> To: intel-gfx@lists.freedesktop.org
>>> Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
>>> Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware
>>>
>>> This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
>>> While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.
>>>
>>> v1: Initial version.
>>>
>>> v2: Corrected firmware size during memcpy(). (Suggested by Sunil)
>>>
>>> Cc: Daniel Vetter <daniel.vetter@intel.com>
>>> Cc: Damien Lespiau <damien.lespiau@intel.com>
>>> Cc: Imre Deak <imre.deak@intel.com>
>>> Cc: Sunil Kamath <sunil.kamath@intel.com>
>>> Signed-off-by: Animesh Manna <animesh.manna@intel.com>
>>> Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
>>>   2 files changed, 5 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -742,7 +742,7 @@ enum csr_state {
>>>   struct intel_csr {
>>>   	const char *fw_path;
>>> -	__be32 *dmc_payload;
>>> +	uint32_t *dmc_payload;
>>>   	uint32_t dmc_fw_size;
>>>   	uint32_t mmio_count;
>>>   	uint32_t mmioaddr[8];
>>> diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
>>> index 6d8a7bf..ba1ae03 100644
>>> --- a/drivers/gpu/drm/i915/intel_csr.c
>>> +++ b/drivers/gpu/drm/i915/intel_csr.c
>>> @@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
>>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>> -	__be32 *payload = dev_priv->csr.dmc_payload;
>>> +	u32 *payload = dev_priv->csr.dmc_payload;
>>>   	uint32_t i, fw_size;
>>>   	if (!IS_GEN9(dev)) {
>>> @@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
>>>   	fw_size = dev_priv->csr.dmc_fw_size;
>>>   	for (i = 0; i < fw_size; i++)
>>>   		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
>>> -			(u32 __force)payload[i]);
>>> +			payload[i]);
>>>   	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
>>>   		I915_WRITE(dev_priv->csr.mmioaddr[i],
>>> @@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>>>   	char substepping = intel_get_substepping(dev);
>>>   	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
>>>   	uint32_t i;
>>> -	__be32 *dmc_payload;
>>> +	uint32_t *dmc_payload;
>>>   	bool fw_loaded = false;
>>>   	if (!fw) {
>>> @@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>>>   	}
>>>   	dmc_payload = csr->dmc_payload;
>>> -	for (i = 0; i < dmc_header->fw_size; i++) {
>>> -		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
>>> -		/*
>>> -		 * The firmware payload is an array of 32 bit words stored in
>>> -		 * little-endian format in the firmware image and programmed
>>> -		 * as 32 bit big-endian format to memory.
>>> -		 */
>>> -		dmc_payload[i] = cpu_to_be32(*tmp);
>>> -	}
>>> +	memcpy(dmc_payload, &fw->data[readcount], nbytes);
>>>   	/* load csr program during system boot, as needed for DC states */
>>>   	intel_csr_load_program(dev);
>>> --
>>> 2.0.2
>>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Mika Kuoppala Sept. 11, 2015, 3:29 p.m. UTC | #4
Daniel Vetter <daniel@ffwll.ch> writes:

> On Tue, Aug 04, 2015 at 11:25:40AM +0530, Animesh Manna wrote:
>> 
>> 
>> On 8/4/2015 9:16 AM, Nagaraju, Vathsala wrote:
>> >"This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
>> >Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.
>> 
>> Packaging of firmware binary completely changed after api version 1.0, so by old firmware I want to mean
>> the initial version before dmc 1.0.
>
> This kind of information really must be included in the commit message.
> Very likely someone with old firmware will bisect to this commit, and if
> you don't include that people need latest dmc firmware there will be
> confusion.
>
> Commit message _must_ be complete and contain everything that was
> discussed while reviewing and developing a patch.
>
> I added a note while merging the patch.
> -Daniel
>

Hi,

There is problem with gem_exec_nop throwing gpu hang with GPU HANG:
ecode 9:0:0xfffffffe on some skls. Looks like faster ones are
more suspectible.

My bisect pointed to this commit. As this looks like commit
to fix the loading, one could assume that at this point the firmware
started to actually do something.

So my failure hypothesis is that our driver runtime power well
bookkeeping and handover to dmc is racy. Lots of speculation here but
i suspect that during initialization, when the gpu is busy with the
workarounds and null state batch, handover to dmc happens
and something bad with it, like render side power toggles
while the dmc initializes?

I haven't yet tested the redesign series for cure/clue, but changing
the firmware from 1.19 to 1.21 makes it very hard to reproduce
the issue.

On a related note, we need the firmware version to be part of
error state. Also the driver could warn if not-new-enough firmware
is found, to atleast push the unlucky bisecter to a right
direction.

- Mika

>> 
>> >
>> >-----Original Message-----
>> >From: Manna, Animesh
>> >Sent: Monday, August 3, 2015 9:56 PM
>> >To: intel-gfx@lists.freedesktop.org
>> >Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
>> >Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware
>> >
>> >This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
>> >While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.
>> >
>> >v1: Initial version.
>> >
>> >v2: Corrected firmware size during memcpy(). (Suggested by Sunil)
>> >
>> >Cc: Daniel Vetter <daniel.vetter@intel.com>
>> >Cc: Damien Lespiau <damien.lespiau@intel.com>
>> >Cc: Imre Deak <imre.deak@intel.com>
>> >Cc: Sunil Kamath <sunil.kamath@intel.com>
>> >Signed-off-by: Animesh Manna <animesh.manna@intel.com>
>> >Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
>> >---
>> >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
>> >  2 files changed, 5 insertions(+), 13 deletions(-)
>> >
>> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
>> >--- a/drivers/gpu/drm/i915/i915_drv.h
>> >+++ b/drivers/gpu/drm/i915/i915_drv.h
>> >@@ -742,7 +742,7 @@ enum csr_state {
>> >  struct intel_csr {
>> >  	const char *fw_path;
>> >-	__be32 *dmc_payload;
>> >+	uint32_t *dmc_payload;
>> >  	uint32_t dmc_fw_size;
>> >  	uint32_t mmio_count;
>> >  	uint32_t mmioaddr[8];
>> >diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
>> >index 6d8a7bf..ba1ae03 100644
>> >--- a/drivers/gpu/drm/i915/intel_csr.c
>> >+++ b/drivers/gpu/drm/i915/intel_csr.c
>> >@@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
>> >  	struct drm_i915_private *dev_priv = dev->dev_private;
>> >-	__be32 *payload = dev_priv->csr.dmc_payload;
>> >+	u32 *payload = dev_priv->csr.dmc_payload;
>> >  	uint32_t i, fw_size;
>> >  	if (!IS_GEN9(dev)) {
>> >@@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
>> >  	fw_size = dev_priv->csr.dmc_fw_size;
>> >  	for (i = 0; i < fw_size; i++)
>> >  		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
>> >-			(u32 __force)payload[i]);
>> >+			payload[i]);
>> >  	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
>> >  		I915_WRITE(dev_priv->csr.mmioaddr[i],
>> >@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>> >  	char substepping = intel_get_substepping(dev);
>> >  	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
>> >  	uint32_t i;
>> >-	__be32 *dmc_payload;
>> >+	uint32_t *dmc_payload;
>> >  	bool fw_loaded = false;
>> >  	if (!fw) {
>> >@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>> >  	}
>> >  	dmc_payload = csr->dmc_payload;
>> >-	for (i = 0; i < dmc_header->fw_size; i++) {
>> >-		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
>> >-		/*
>> >-		 * The firmware payload is an array of 32 bit words stored in
>> >-		 * little-endian format in the firmware image and programmed
>> >-		 * as 32 bit big-endian format to memory.
>> >-		 */
>> >-		dmc_payload[i] = cpu_to_be32(*tmp);
>> >-	}
>> >+	memcpy(dmc_payload, &fw->data[readcount], nbytes);
>> >  	/* load csr program during system boot, as needed for DC states */
>> >  	intel_csr_load_program(dev);
>> >--
>> >2.0.2
>> >
>> 
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Sept. 14, 2015, 7:35 a.m. UTC | #5
On Fri, Sep 11, 2015 at 06:29:19PM +0300, Mika Kuoppala wrote:
> Daniel Vetter <daniel@ffwll.ch> writes:
> 
> > On Tue, Aug 04, 2015 at 11:25:40AM +0530, Animesh Manna wrote:
> >> 
> >> 
> >> On 8/4/2015 9:16 AM, Nagaraju, Vathsala wrote:
> >> >"This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
> >> >Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.
> >> 
> >> Packaging of firmware binary completely changed after api version 1.0, so by old firmware I want to mean
> >> the initial version before dmc 1.0.
> >
> > This kind of information really must be included in the commit message.
> > Very likely someone with old firmware will bisect to this commit, and if
> > you don't include that people need latest dmc firmware there will be
> > confusion.
> >
> > Commit message _must_ be complete and contain everything that was
> > discussed while reviewing and developing a patch.
> >
> > I added a note while merging the patch.
> > -Daniel
> >
> 
> Hi,
> 
> There is problem with gem_exec_nop throwing gpu hang with GPU HANG:
> ecode 9:0:0xfffffffe on some skls. Looks like faster ones are
> more suspectible.
> 
> My bisect pointed to this commit. As this looks like commit
> to fix the loading, one could assume that at this point the firmware
> started to actually do something.
> 
> So my failure hypothesis is that our driver runtime power well
> bookkeeping and handover to dmc is racy. Lots of speculation here but
> i suspect that during initialization, when the gpu is busy with the
> workarounds and null state batch, handover to dmc happens
> and something bad with it, like render side power toggles
> while the dmc initializes?
> 
> I haven't yet tested the redesign series for cure/clue, but changing
> the firmware from 1.19 to 1.21 makes it very hard to reproduce
> the issue.
> 
> On a related note, we need the firmware version to be part of
> error state. Also the driver could warn if not-new-enough firmware
> is found, to atleast push the unlucky bisecter to a right
> direction.

Does a revert remedy the problem?
-Daniel

> 
> - Mika
> 
> >> 
> >> >
> >> >-----Original Message-----
> >> >From: Manna, Animesh
> >> >Sent: Monday, August 3, 2015 9:56 PM
> >> >To: intel-gfx@lists.freedesktop.org
> >> >Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
> >> >Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware
> >> >
> >> >This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
> >> >While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.
> >> >
> >> >v1: Initial version.
> >> >
> >> >v2: Corrected firmware size during memcpy(). (Suggested by Sunil)
> >> >
> >> >Cc: Daniel Vetter <daniel.vetter@intel.com>
> >> >Cc: Damien Lespiau <damien.lespiau@intel.com>
> >> >Cc: Imre Deak <imre.deak@intel.com>
> >> >Cc: Sunil Kamath <sunil.kamath@intel.com>
> >> >Signed-off-by: Animesh Manna <animesh.manna@intel.com>
> >> >Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
> >> >---
> >> >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
> >> >  2 files changed, 5 insertions(+), 13 deletions(-)
> >> >
> >> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
> >> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >> >@@ -742,7 +742,7 @@ enum csr_state {
> >> >  struct intel_csr {
> >> >  	const char *fw_path;
> >> >-	__be32 *dmc_payload;
> >> >+	uint32_t *dmc_payload;
> >> >  	uint32_t dmc_fw_size;
> >> >  	uint32_t mmio_count;
> >> >  	uint32_t mmioaddr[8];
> >> >diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
> >> >index 6d8a7bf..ba1ae03 100644
> >> >--- a/drivers/gpu/drm/i915/intel_csr.c
> >> >+++ b/drivers/gpu/drm/i915/intel_csr.c
> >> >@@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
> >> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >> >-	__be32 *payload = dev_priv->csr.dmc_payload;
> >> >+	u32 *payload = dev_priv->csr.dmc_payload;
> >> >  	uint32_t i, fw_size;
> >> >  	if (!IS_GEN9(dev)) {
> >> >@@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
> >> >  	fw_size = dev_priv->csr.dmc_fw_size;
> >> >  	for (i = 0; i < fw_size; i++)
> >> >  		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
> >> >-			(u32 __force)payload[i]);
> >> >+			payload[i]);
> >> >  	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
> >> >  		I915_WRITE(dev_priv->csr.mmioaddr[i],
> >> >@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
> >> >  	char substepping = intel_get_substepping(dev);
> >> >  	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
> >> >  	uint32_t i;
> >> >-	__be32 *dmc_payload;
> >> >+	uint32_t *dmc_payload;
> >> >  	bool fw_loaded = false;
> >> >  	if (!fw) {
> >> >@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
> >> >  	}
> >> >  	dmc_payload = csr->dmc_payload;
> >> >-	for (i = 0; i < dmc_header->fw_size; i++) {
> >> >-		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
> >> >-		/*
> >> >-		 * The firmware payload is an array of 32 bit words stored in
> >> >-		 * little-endian format in the firmware image and programmed
> >> >-		 * as 32 bit big-endian format to memory.
> >> >-		 */
> >> >-		dmc_payload[i] = cpu_to_be32(*tmp);
> >> >-	}
> >> >+	memcpy(dmc_payload, &fw->data[readcount], nbytes);
> >> >  	/* load csr program during system boot, as needed for DC states */
> >> >  	intel_csr_load_program(dev);
> >> >--
> >> >2.0.2
> >> >
> >> 
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> > -- 
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Mika Kuoppala Sept. 17, 2015, 9:36 a.m. UTC | #6
Daniel Vetter <daniel@ffwll.ch> writes:

> On Fri, Sep 11, 2015 at 06:29:19PM +0300, Mika Kuoppala wrote:
>> Daniel Vetter <daniel@ffwll.ch> writes:
>> 
>> > On Tue, Aug 04, 2015 at 11:25:40AM +0530, Animesh Manna wrote:
>> >> 
>> >> 
>> >> On 8/4/2015 9:16 AM, Nagaraju, Vathsala wrote:
>> >> >"This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware."
>> >> >Which version of DMC need reversal  logic?  Atleast , 4,1.16,1.18 follow the same format.
>> >> 
>> >> Packaging of firmware binary completely changed after api version 1.0, so by old firmware I want to mean
>> >> the initial version before dmc 1.0.
>> >
>> > This kind of information really must be included in the commit message.
>> > Very likely someone with old firmware will bisect to this commit, and if
>> > you don't include that people need latest dmc firmware there will be
>> > confusion.
>> >
>> > Commit message _must_ be complete and contain everything that was
>> > discussed while reviewing and developing a patch.
>> >
>> > I added a note while merging the patch.
>> > -Daniel
>> >
>> 
>> Hi,
>> 
>> There is problem with gem_exec_nop throwing gpu hang with GPU HANG:
>> ecode 9:0:0xfffffffe on some skls. Looks like faster ones are
>> more suspectible.
>> 
>> My bisect pointed to this commit. As this looks like commit
>> to fix the loading, one could assume that at this point the firmware
>> started to actually do something.
>> 
>> So my failure hypothesis is that our driver runtime power well
>> bookkeeping and handover to dmc is racy. Lots of speculation here but
>> i suspect that during initialization, when the gpu is busy with the
>> workarounds and null state batch, handover to dmc happens
>> and something bad with it, like render side power toggles
>> while the dmc initializes?
>> 
>> I haven't yet tested the redesign series for cure/clue, but changing
>> the firmware from 1.19 to 1.21 makes it very hard to reproduce
>> the issue.
>> 
>> On a related note, we need the firmware version to be part of
>> error state. Also the driver could warn if not-new-enough firmware
>> is found, to atleast push the unlucky bisecter to a right
>> direction.
>
> Does a revert remedy the problem?
> -Daniel

Yes. By making the loading fail and firmware not starting.

With current knowledge I would say that upgrading from 1.19
to 1.21 is better than revert.

-Mika

>
>> 
>> - Mika
>> 
>> >> 
>> >> >
>> >> >-----Original Message-----
>> >> >From: Manna, Animesh
>> >> >Sent: Monday, August 3, 2015 9:56 PM
>> >> >To: intel-gfx@lists.freedesktop.org
>> >> >Cc: Manna, Animesh; Vetter, Daniel; Lespiau, Damien; Deak, Imre; Kamath, Sunil; Nagaraju, Vathsala
>> >> >Subject: [SKL-DMC-BUGFIX 1/5] drm/i915/gen9: Removed byte swapping for csr firmware
>> >> >
>> >> >This patch contains the changes to remove the byte swapping logic introduced with old dmc firmware.
>> >> >While debugging PC10 entry issue for skylake found with latest dmc firmware version 1.18 without byte swapping dmc is working fine and able to enter PC10.
>> >> >
>> >> >v1: Initial version.
>> >> >
>> >> >v2: Corrected firmware size during memcpy(). (Suggested by Sunil)
>> >> >
>> >> >Cc: Daniel Vetter <daniel.vetter@intel.com>
>> >> >Cc: Damien Lespiau <damien.lespiau@intel.com>
>> >> >Cc: Imre Deak <imre.deak@intel.com>
>> >> >Cc: Sunil Kamath <sunil.kamath@intel.com>
>> >> >Signed-off-by: Animesh Manna <animesh.manna@intel.com>
>> >> >Signed-off-by: Vathsala Nagaraju <vathsala.nagaraju@intel.com>
>> >> >---
>> >> >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-  drivers/gpu/drm/i915/intel_csr.c | 16 ++++------------
>> >> >  2 files changed, 5 insertions(+), 13 deletions(-)
>> >> >
>> >> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
>> >> >--- a/drivers/gpu/drm/i915/i915_drv.h
>> >> >+++ b/drivers/gpu/drm/i915/i915_drv.h
>> >> >@@ -742,7 +742,7 @@ enum csr_state {
>> >> >  struct intel_csr {
>> >> >  	const char *fw_path;
>> >> >-	__be32 *dmc_payload;
>> >> >+	uint32_t *dmc_payload;
>> >> >  	uint32_t dmc_fw_size;
>> >> >  	uint32_t mmio_count;
>> >> >  	uint32_t mmioaddr[8];
>> >> >diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
>> >> >index 6d8a7bf..ba1ae03 100644
>> >> >--- a/drivers/gpu/drm/i915/intel_csr.c
>> >> >+++ b/drivers/gpu/drm/i915/intel_csr.c
>> >> >@@ -244,7 +244,7 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
>> >> >  	struct drm_i915_private *dev_priv = dev->dev_private;
>> >> >-	__be32 *payload = dev_priv->csr.dmc_payload;
>> >> >+	u32 *payload = dev_priv->csr.dmc_payload;
>> >> >  	uint32_t i, fw_size;
>> >> >  	if (!IS_GEN9(dev)) {
>> >> >@@ -256,7 +256,7 @@ void intel_csr_load_program(struct drm_device *dev)
>> >> >  	fw_size = dev_priv->csr.dmc_fw_size;
>> >> >  	for (i = 0; i < fw_size; i++)
>> >> >  		I915_WRITE(CSR_PROGRAM_BASE + i * 4,
>> >> >-			(u32 __force)payload[i]);
>> >> >+			payload[i]);
>> >> >  	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
>> >> >  		I915_WRITE(dev_priv->csr.mmioaddr[i],
>> >> >@@ -279,7 +279,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>> >> >  	char substepping = intel_get_substepping(dev);
>> >> >  	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
>> >> >  	uint32_t i;
>> >> >-	__be32 *dmc_payload;
>> >> >+	uint32_t *dmc_payload;
>> >> >  	bool fw_loaded = false;
>> >> >  	if (!fw) {
>> >> >@@ -375,15 +375,7 @@ static void finish_csr_load(const struct firmware *fw, void *context)
>> >> >  	}
>> >> >  	dmc_payload = csr->dmc_payload;
>> >> >-	for (i = 0; i < dmc_header->fw_size; i++) {
>> >> >-		uint32_t *tmp = (u32 *)&fw->data[readcount + i * 4];
>> >> >-		/*
>> >> >-		 * The firmware payload is an array of 32 bit words stored in
>> >> >-		 * little-endian format in the firmware image and programmed
>> >> >-		 * as 32 bit big-endian format to memory.
>> >> >-		 */
>> >> >-		dmc_payload[i] = cpu_to_be32(*tmp);
>> >> >-	}
>> >> >+	memcpy(dmc_payload, &fw->data[readcount], nbytes);
>> >> >  	/* load csr program during system boot, as needed for DC states */
>> >> >  	intel_csr_load_program(dev);
>> >> >--
>> >> >2.0.2
>> >> >
>> >> 
>> >> _______________________________________________
>> >> Intel-gfx mailing list
>> >> Intel-gfx@lists.freedesktop.org
>> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> >
>> > -- 
>> > Daniel Vetter
>> > Software Engineer, Intel Corporation
>> > http://blog.ffwll.ch
>> > _______________________________________________
>> > Intel-gfx mailing list
>> > Intel-gfx@lists.freedesktop.org
>> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b94ada9..9d0ee58 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -742,7 +742,7 @@  enum csr_state {
 
 struct intel_csr {
 	const char *fw_path;
-	__be32 *dmc_payload;
+	uint32_t *dmc_payload;
 	uint32_t dmc_fw_size;
 	uint32_t mmio_count;
 	uint32_t mmioaddr[8];
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 6d8a7bf..ba1ae03 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -244,7 +244,7 @@  void intel_csr_load_status_set(struct drm_i915_private *dev_priv,  void intel_csr_load_program(struct drm_device *dev)  {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	__be32 *payload = dev_priv->csr.dmc_payload;
+	u32 *payload = dev_priv->csr.dmc_payload;
 	uint32_t i, fw_size;
 
 	if (!IS_GEN9(dev)) {
@@ -256,7 +256,7 @@  void intel_csr_load_program(struct drm_device *dev)
 	fw_size = dev_priv->csr.dmc_fw_size;