diff mbox series

[v3] drm/i915: Skip error capture when wedged on init

Message ID 20211111130634.266098-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v3] drm/i915: Skip error capture when wedged on init | expand

Commit Message

Tvrtko Ursulin Nov. 11, 2021, 1:06 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Trying to capture uninitialised engines when we wedged on init ends in
tears. Skip that together with uC capture, since failure to initialise the
latter can actually be one of the reasons for wedging on init.

v2:
 * Use i915_disable_error_state when wedging on init/fini.

v3:
 * Handle mock tests.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com> # v1
---
 drivers/gpu/drm/i915/gt/intel_reset.c            | 2 ++
 drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 ++
 2 files changed, 4 insertions(+)

Comments

Matthew Auld Nov. 12, 2021, 3:44 p.m. UTC | #1
On 11/11/2021 13:06, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Trying to capture uninitialised engines when we wedged on init ends in
> tears. Skip that together with uC capture, since failure to initialise the
> latter can actually be one of the reasons for wedging on init.
> 
> v2:
>   * Use i915_disable_error_state when wedging on init/fini.
> 
> v3:
>   * Handle mock tests.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com> # v1

Assuming this works locally, r-b still stands.

> ---
>   drivers/gpu/drm/i915/gt/intel_reset.c            | 2 ++
>   drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 ++
>   2 files changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 51b56b8e5003..0fbd6dbadce7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -1448,6 +1448,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
>   	BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
>   		     I915_WEDGED_ON_INIT);
>   	intel_gt_set_wedged(gt);
> +	i915_disable_error_state(gt->i915, -ENODEV);
>   	set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
>   
>   	/* Wedged on init is non-recoverable */
> @@ -1457,6 +1458,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
>   void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
>   {
>   	intel_gt_set_wedged(gt);
> +	i915_disable_error_state(gt->i915, -ENODEV);
>   	set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
>   	intel_gt_retire_requests(gt); /* cleanup any wedged requests */
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 9ab3f284d1dd..d0e2e61de8d4 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -177,6 +177,8 @@ struct drm_i915_private *mock_gem_device(void)
>   
>   	mock_uncore_init(&i915->uncore, i915);
>   
> +	spin_lock_init(&i915->gpu_error.lock);
> +
>   	i915_gem_init__mm(i915);
>   	intel_gt_init_early(&i915->gt, i915);
>   	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 51b56b8e5003..0fbd6dbadce7 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1448,6 +1448,7 @@  void intel_gt_set_wedged_on_init(struct intel_gt *gt)
 	BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
 		     I915_WEDGED_ON_INIT);
 	intel_gt_set_wedged(gt);
+	i915_disable_error_state(gt->i915, -ENODEV);
 	set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
 
 	/* Wedged on init is non-recoverable */
@@ -1457,6 +1458,7 @@  void intel_gt_set_wedged_on_init(struct intel_gt *gt)
 void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
 {
 	intel_gt_set_wedged(gt);
+	i915_disable_error_state(gt->i915, -ENODEV);
 	set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
 	intel_gt_retire_requests(gt); /* cleanup any wedged requests */
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 9ab3f284d1dd..d0e2e61de8d4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -177,6 +177,8 @@  struct drm_i915_private *mock_gem_device(void)
 
 	mock_uncore_init(&i915->uncore, i915);
 
+	spin_lock_init(&i915->gpu_error.lock);
+
 	i915_gem_init__mm(i915);
 	intel_gt_init_early(&i915->gt, i915);
 	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */