diff mbox series

drm/i915/guc: Add debug capture of GuC exception

Message ID 20190621200940.32665-1-robert.m.fosha@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/guc: Add debug capture of GuC exception | expand

Commit Message

Fosha, Robert M June 21, 2019, 8:09 p.m. UTC
Detect GuC firmware load failure due to an exception during execution
in GuC firmware. Output the GuC EIP where excpetion occured to dmesg
for GuC debug information.

Signed-off-by: Robert M. Fosha <robert.m.fosha@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_fw.c  | 7 +++++++
 drivers/gpu/drm/i915/intel_guc_reg.h | 1 +
 2 files changed, 8 insertions(+)

Comments

Michal Wajdeczko June 21, 2019, 8:40 p.m. UTC | #1
On Fri, 21 Jun 2019 22:09:40 +0200, Robert M. Fosha  
<robert.m.fosha@intel.com> wrote:

> Detect GuC firmware load failure due to an exception during execution
> in GuC firmware. Output the GuC EIP where excpetion occured to dmesg

two typos here

> for GuC debug information.
>
> Signed-off-by: Robert M. Fosha <robert.m.fosha@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_guc_fw.c  | 7 +++++++
>  drivers/gpu/drm/i915/intel_guc_reg.h | 1 +
>  2 files changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c  
> b/drivers/gpu/drm/i915/intel_guc_fw.c
> index 72cdafd9636a..90cb65641d60 100644
> --- a/drivers/gpu/drm/i915/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/intel_guc_fw.c
> @@ -197,6 +197,7 @@ static inline bool guc_ready(struct intel_guc *guc,  
> u32 *status)
> static int guc_wait_ucode(struct intel_guc *guc)
>  {
> +	struct drm_i915_private *i915 = guc_to_i915(guc);
>  	u32 status;
>  	int ret;
> @@ -216,6 +217,12 @@ static int guc_wait_ucode(struct intel_guc *guc)
>  		ret = -ENOEXEC;
>  	}
> +	if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {

I guess GuC exception could also happen after fw boot time,
can we add support to detect and log such exceptions?
and maybe such unified approach will cover this case as well?

> +		DRM_ERROR("GuC fw exception. GuC uKernel EIP: %#x\n",

do we need to use two names "fw" and "uKernel" here? maybe just:

	DRM_ERROR("GuC firmware exception. EIP: %#x\n",

> +			  intel_uncore_read(&i915->uncore, SOFT_SCRATCH(13)));
> +		ret = -ENOEXEC;

as this is not a blob problem, maybe -ENXIO will fit better ?

> +	}
> +
>  	if (ret == 0 && !guc_xfer_completed(guc, &status)) {
>  		DRM_ERROR("GuC is ready, but the xfer %08x is incomplete\n",
>  			  status);
> diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h  
> b/drivers/gpu/drm/i915/intel_guc_reg.h
> index a214f8b71929..d90b88fadb5e 100644
> --- a/drivers/gpu/drm/i915/intel_guc_reg.h
> +++ b/drivers/gpu/drm/i915/intel_guc_reg.h
> @@ -37,6 +37,7 @@
>  #define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
>  #define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
>  #define   GS_UKERNEL_DPC_ERROR		  (0x60 << GS_UKERNEL_SHIFT)
> +#define   GS_UKERNEL_EXCEPTION		  (0x70 << GS_UKERNEL_SHIFT)
>  #define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
>  #define   GS_MIA_SHIFT			16
>  #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
Fosha, Robert M June 21, 2019, 8:58 p.m. UTC | #2
On 6/21/19 1:40 PM, Michal Wajdeczko wrote:
> On Fri, 21 Jun 2019 22:09:40 +0200, Robert M. Fosha 
> <robert.m.fosha@intel.com> wrote:
>
>> Detect GuC firmware load failure due to an exception during execution
>> in GuC firmware. Output the GuC EIP where excpetion occured to dmesg
>
> two typos here
>
I'll correct the typos.

>> for GuC debug information.
>>
>> Signed-off-by: Robert M. Fosha <robert.m.fosha@intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> ---
>>  drivers/gpu/drm/i915/intel_guc_fw.c  | 7 +++++++
>>  drivers/gpu/drm/i915/intel_guc_reg.h | 1 +
>>  2 files changed, 8 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c 
>> b/drivers/gpu/drm/i915/intel_guc_fw.c
>> index 72cdafd9636a..90cb65641d60 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_fw.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_fw.c
>> @@ -197,6 +197,7 @@ static inline bool guc_ready(struct intel_guc 
>> *guc, u32 *status)
>> static int guc_wait_ucode(struct intel_guc *guc)
>>  {
>> +    struct drm_i915_private *i915 = guc_to_i915(guc);
>>      u32 status;
>>      int ret;
>> @@ -216,6 +217,12 @@ static int guc_wait_ucode(struct intel_guc *guc)
>>          ret = -ENOEXEC;
>>      }
>> +    if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
>
> I guess GuC exception could also happen after fw boot time,
> can we add support to detect and log such exceptions?
> and maybe such unified approach will cover this case as well?
>
Initial plan was to add the check during load and boot. Additional 
checks may be added later.

>> +        DRM_ERROR("GuC fw exception. GuC uKernel EIP: %#x\n",
>
> do we need to use two names "fw" and "uKernel" here? maybe just:
>
I'll change to use your suggestion.
>     DRM_ERROR("GuC firmware exception. EIP: %#x\n",
>
>> + intel_uncore_read(&i915->uncore, SOFT_SCRATCH(13)));
>> +        ret = -ENOEXEC;
>
> as this is not a blob problem, maybe -ENXIO will fit better ?
>
Good question. I can change to -ENXIO if you think it makes more sense.
>> +    }
>> +
>>      if (ret == 0 && !guc_xfer_completed(guc, &status)) {
>>          DRM_ERROR("GuC is ready, but the xfer %08x is incomplete\n",
>>                status);
>> diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h 
>> b/drivers/gpu/drm/i915/intel_guc_reg.h
>> index a214f8b71929..d90b88fadb5e 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_reg.h
>> +++ b/drivers/gpu/drm/i915/intel_guc_reg.h
>> @@ -37,6 +37,7 @@
>>  #define   GS_UKERNEL_MASK          (0xFF << GS_UKERNEL_SHIFT)
>>  #define   GS_UKERNEL_LAPIC_DONE          (0x30 << GS_UKERNEL_SHIFT)
>>  #define   GS_UKERNEL_DPC_ERROR          (0x60 << GS_UKERNEL_SHIFT)
>> +#define   GS_UKERNEL_EXCEPTION          (0x70 << GS_UKERNEL_SHIFT)
>>  #define   GS_UKERNEL_READY          (0xF0 << GS_UKERNEL_SHIFT)
>>  #define   GS_MIA_SHIFT            16
>>  #define   GS_MIA_MASK              (0x07 << GS_MIA_SHIFT)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
index 72cdafd9636a..90cb65641d60 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/intel_guc_fw.c
@@ -197,6 +197,7 @@  static inline bool guc_ready(struct intel_guc *guc, u32 *status)
 
 static int guc_wait_ucode(struct intel_guc *guc)
 {
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 	u32 status;
 	int ret;
 
@@ -216,6 +217,12 @@  static int guc_wait_ucode(struct intel_guc *guc)
 		ret = -ENOEXEC;
 	}
 
+	if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
+		DRM_ERROR("GuC fw exception. GuC uKernel EIP: %#x\n",
+			  intel_uncore_read(&i915->uncore, SOFT_SCRATCH(13)));
+		ret = -ENOEXEC;
+	}
+
 	if (ret == 0 && !guc_xfer_completed(guc, &status)) {
 		DRM_ERROR("GuC is ready, but the xfer %08x is incomplete\n",
 			  status);
diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h
index a214f8b71929..d90b88fadb5e 100644
--- a/drivers/gpu/drm/i915/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/intel_guc_reg.h
@@ -37,6 +37,7 @@ 
 #define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
 #define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
 #define   GS_UKERNEL_DPC_ERROR		  (0x60 << GS_UKERNEL_SHIFT)
+#define   GS_UKERNEL_EXCEPTION		  (0x70 << GS_UKERNEL_SHIFT)
 #define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
 #define   GS_MIA_SHIFT			16
 #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)