diff mbox series

[2/2] drm/i915: Make GEM errors non-fatal by default

Message ID 20210114113434.8229-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915: Add DEBUG_GEM to the recommended CI config | expand

Commit Message

Chris Wilson Jan. 14, 2021, 11:34 a.m. UTC
While immensely convenient for developing to only tackle the first
error, and not be flooded by repeated or secondiary issues, many more
casual testers are not setup to remotely capture debug traces. For those
testers, it is more beneficial to keep the system running in the remote
chance that they are able to extract the original debug logs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.h    |  9 ++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

Comments

Mika Kuoppala Jan. 19, 2021, 11:33 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> While immensely convenient for developing to only tackle the first
> error, and not be flooded by repeated or secondiary issues, many more
> casual testers are not setup to remotely capture debug traces. For those
> testers, it is more beneficial to keep the system running in the remote
> chance that they are able to extract the original debug logs.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig.debug | 16 ++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem.h    |  9 ++++++++-
>  2 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 3701bae5b855..4005f6619bec 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -41,6 +41,7 @@ config DRM_I915_DEBUG
>  	select SW_SYNC # signaling validation framework (igt/syncobj*)
>  	select DRM_I915_WERROR
>  	select DRM_I915_DEBUG_GEM
> +	select DRM_I915_DEBUG_GEM_ONCE
>  	select DRM_I915_DEBUG_MMIO
>  	select DRM_I915_DEBUG_RUNTIME_PM
>  	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
> @@ -80,6 +81,21 @@ config DRM_I915_DEBUG_GEM
>  
>  	  If in doubt, say "N".
>  
> +config DRM_I915_DEBUG_GEM_ONCE
> +	bool "Make a GEM debug failure fatal"
> +	default n
> +	depends on DRM_I915_DEBUG_GEM
> +	help
> +	  During development, we often only want the very first failure
> +	  as that would otherwise be lost in the deluge of subsequent
> +	  failures. However, more causal testers may not want to trigger
> +	  a hard BUG_ON and hope that the system remains sufficiently usable
> +	  to capture a bug report in situ.

Yes. And also sometimes check the state the hardware had ended up into.
eg. unusual conditions that are not captured by hang/hangcheck.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +
> +	  Recommended for driver developers only.
> +
> +	  If in doubt, say "N".
> +
>  config DRM_I915_ERRLOG_GEM
>  	bool "Insert extra logging (very verbose) for common GEM errors"
>  	default n
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index a4cad3f154ca..e622aee6e4be 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -38,11 +38,18 @@ struct drm_i915_private;
>  
>  #define GEM_SHOW_DEBUG() drm_debug_enabled(DRM_UT_DRIVER)
>  
> +#ifdef CONFIG_DRM_I915_DEBUG_GEM_ONCE
> +#define __GEM_BUG(cond) BUG()
> +#else
> +#define __GEM_BUG(cond) \
> +	WARN(1, "%s:%d GEM_BUG_ON(%s)\n", __func__, __LINE__, __stringify(cond))
> +#endif
> +
>  #define GEM_BUG_ON(condition) do { if (unlikely((condition))) {	\
>  		GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \
>  			      __func__, __LINE__, __stringify(condition)); \
>  		GEM_TRACE_DUMP(); \
> -		BUG(); \
> +		__GEM_BUG(condition); \
>  		} \
>  	} while(0)
>  #define GEM_WARN_ON(expr) WARN_ON(expr)
> -- 
> 2.20.1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 3701bae5b855..4005f6619bec 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -41,6 +41,7 @@  config DRM_I915_DEBUG
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_WERROR
 	select DRM_I915_DEBUG_GEM
+	select DRM_I915_DEBUG_GEM_ONCE
 	select DRM_I915_DEBUG_MMIO
 	select DRM_I915_DEBUG_RUNTIME_PM
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
@@ -80,6 +81,21 @@  config DRM_I915_DEBUG_GEM
 
 	  If in doubt, say "N".
 
+config DRM_I915_DEBUG_GEM_ONCE
+	bool "Make a GEM debug failure fatal"
+	default n
+	depends on DRM_I915_DEBUG_GEM
+	help
+	  During development, we often only want the very first failure
+	  as that would otherwise be lost in the deluge of subsequent
+	  failures. However, more causal testers may not want to trigger
+	  a hard BUG_ON and hope that the system remains sufficiently usable
+	  to capture a bug report in situ.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
 config DRM_I915_ERRLOG_GEM
 	bool "Insert extra logging (very verbose) for common GEM errors"
 	default n
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index a4cad3f154ca..e622aee6e4be 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -38,11 +38,18 @@  struct drm_i915_private;
 
 #define GEM_SHOW_DEBUG() drm_debug_enabled(DRM_UT_DRIVER)
 
+#ifdef CONFIG_DRM_I915_DEBUG_GEM_ONCE
+#define __GEM_BUG(cond) BUG()
+#else
+#define __GEM_BUG(cond) \
+	WARN(1, "%s:%d GEM_BUG_ON(%s)\n", __func__, __LINE__, __stringify(cond))
+#endif
+
 #define GEM_BUG_ON(condition) do { if (unlikely((condition))) {	\
 		GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \
 			      __func__, __LINE__, __stringify(condition)); \
 		GEM_TRACE_DUMP(); \
-		BUG(); \
+		__GEM_BUG(condition); \
 		} \
 	} while(0)
 #define GEM_WARN_ON(expr) WARN_ON(expr)