diff mbox

[4/5] drm/i915: After reset on sanitization, reset the engine backends

Message ID 20180529132922.6831-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 29, 2018, 1:29 p.m. UTC
As we reset the GPU on suspend/resume, we also do need to reset the
engine state tracking so call into the engine backends. This is
especially important so that we can also sanitize the state tracking
across resume.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106702
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

kernel test robot May 31, 2018, 1:11 p.m. UTC | #1
Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on next-20180530]
[cannot apply to v4.17-rc7]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-Remove-stale-asserts-from-i915_gem_find_active_request/20180531-202540
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-x019-201821 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_gem.c: In function 'i915_gem_sanitize':
>> drivers/gpu/drm/i915/i915_gem.c:5035:15: error: 'struct intel_engine_cs' has no member named 'reset'; did you mean 'reset_hw'?
      if (engine->reset.reset)
                  ^~~~~
                  reset_hw
   drivers/gpu/drm/i915/i915_gem.c:5036:12: error: 'struct intel_engine_cs' has no member named 'reset'; did you mean 'reset_hw'?
       engine->reset.reset(engine, NULL);
               ^~~~~
               reset_hw

vim +5035 drivers/gpu/drm/i915/i915_gem.c

  5000	
  5001	void i915_gem_sanitize(struct drm_i915_private *i915)
  5002	{
  5003		struct intel_engine_cs *engine;
  5004		enum intel_engine_id id;
  5005	
  5006		GEM_TRACE("\n");
  5007	
  5008		mutex_lock(&i915->drm.struct_mutex);
  5009	
  5010		intel_runtime_pm_get(i915);
  5011		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
  5012	
  5013		/*
  5014		 * As we have just resumed the machine and woken the device up from
  5015		 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
  5016		 * back to defaults, recovering from whatever wedged state we left it
  5017		 * in and so worth trying to use the device once more.
  5018		 */
  5019		if (i915_terminally_wedged(&i915->gpu_error))
  5020			i915_gem_unset_wedged(i915);
  5021	
  5022		/*
  5023		 * If we inherit context state from the BIOS or earlier occupants
  5024		 * of the GPU, the GPU may be in an inconsistent state when we
  5025		 * try to take over. The only way to remove the earlier state
  5026		 * is by resetting. However, resetting on earlier gen is tricky as
  5027		 * it may impact the display and we are uncertain about the stability
  5028		 * of the reset, so this could be applied to even earlier gen.
  5029		 */
  5030		if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
  5031			WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
  5032	
  5033		/* Reset the submission backend after resume as well as the GPU reset */
  5034		for_each_engine(engine, i915, id) {
> 5035			if (engine->reset.reset)
  5036				engine->reset.reset(engine, NULL);
  5037		}
  5038	
  5039		intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
  5040		intel_runtime_pm_put(i915);
  5041	
  5042		i915_gem_contexts_lost(i915);
  5043		mutex_unlock(&i915->drm.struct_mutex);
  5044	}
  5045	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot May 31, 2018, 2:34 p.m. UTC | #2
Hi Chris,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on next-20180530]
[cannot apply to v4.17-rc7]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-Remove-stale-asserts-from-i915_gem_find_active_request/20180531-202540
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-x006-201821 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:10:0,
                    from include/linux/list.h:9,
                    from include/linux/agp_backend.h:33,
                    from include/drm/drmP.h:35,
                    from drivers/gpu//drm/i915/i915_gem.c:28:
   drivers/gpu//drm/i915/i915_gem.c: In function 'i915_gem_sanitize':
   drivers/gpu//drm/i915/i915_gem.c:5035:15: error: 'struct intel_engine_cs' has no member named 'reset'; did you mean 'reset_hw'?
      if (engine->reset.reset)
                  ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> drivers/gpu//drm/i915/i915_gem.c:5035:3: note: in expansion of macro 'if'
      if (engine->reset.reset)
      ^~
   drivers/gpu//drm/i915/i915_gem.c:5035:15: error: 'struct intel_engine_cs' has no member named 'reset'; did you mean 'reset_hw'?
      if (engine->reset.reset)
                  ^
   include/linux/compiler.h:58:42: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                             ^~~~
>> drivers/gpu//drm/i915/i915_gem.c:5035:3: note: in expansion of macro 'if'
      if (engine->reset.reset)
      ^~
   drivers/gpu//drm/i915/i915_gem.c:5035:15: error: 'struct intel_engine_cs' has no member named 'reset'; did you mean 'reset_hw'?
      if (engine->reset.reset)
                  ^
   include/linux/compiler.h:69:16: note: in definition of macro '__trace_if'
      ______r = !!(cond);     \
                   ^~~~
>> drivers/gpu//drm/i915/i915_gem.c:5035:3: note: in expansion of macro 'if'
      if (engine->reset.reset)
      ^~
   drivers/gpu//drm/i915/i915_gem.c:5036:12: error: 'struct intel_engine_cs' has no member named 'reset'; did you mean 'reset_hw'?
       engine->reset.reset(engine, NULL);
               ^~~~~
               reset_hw

vim +/if +5035 drivers/gpu//drm/i915/i915_gem.c

  5000	
  5001	void i915_gem_sanitize(struct drm_i915_private *i915)
  5002	{
  5003		struct intel_engine_cs *engine;
  5004		enum intel_engine_id id;
  5005	
  5006		GEM_TRACE("\n");
  5007	
  5008		mutex_lock(&i915->drm.struct_mutex);
  5009	
  5010		intel_runtime_pm_get(i915);
  5011		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
  5012	
  5013		/*
  5014		 * As we have just resumed the machine and woken the device up from
  5015		 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
  5016		 * back to defaults, recovering from whatever wedged state we left it
  5017		 * in and so worth trying to use the device once more.
  5018		 */
  5019		if (i915_terminally_wedged(&i915->gpu_error))
  5020			i915_gem_unset_wedged(i915);
  5021	
  5022		/*
  5023		 * If we inherit context state from the BIOS or earlier occupants
  5024		 * of the GPU, the GPU may be in an inconsistent state when we
  5025		 * try to take over. The only way to remove the earlier state
  5026		 * is by resetting. However, resetting on earlier gen is tricky as
  5027		 * it may impact the display and we are uncertain about the stability
  5028		 * of the reset, so this could be applied to even earlier gen.
  5029		 */
  5030		if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
  5031			WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
  5032	
  5033		/* Reset the submission backend after resume as well as the GPU reset */
  5034		for_each_engine(engine, i915, id) {
> 5035			if (engine->reset.reset)
  5036				engine->reset.reset(engine, NULL);
  5037		}
  5038	
  5039		intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
  5040		intel_runtime_pm_put(i915);
  5041	
  5042		i915_gem_contexts_lost(i915);
  5043		mutex_unlock(&i915->drm.struct_mutex);
  5044	}
  5045	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 773a5910cc29..75bdfafc97a2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4954,7 +4954,22 @@  static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	GEM_TRACE("\n");
+
 	mutex_lock(&i915->drm.struct_mutex);
+
+	intel_runtime_pm_get(i915);
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+
+	/*
+	 * As we have just resumed the machine and woken the device up from
+	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
+	 * back to defaults, recovering from whatever wedged state we left it
+	 * in and so worth trying to use the device once more.
+	 */
 	if (i915_terminally_wedged(&i915->gpu_error))
 		i915_gem_unset_wedged(i915);
 
@@ -4969,6 +4984,15 @@  void i915_gem_sanitize(struct drm_i915_private *i915)
 	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
 		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
 
+	/* Reset the submission backend after resume as well as the GPU reset */
+	for_each_engine(engine, i915, id) {
+		if (engine->reset.reset)
+			engine->reset.reset(engine, NULL);
+	}
+
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+	intel_runtime_pm_put(i915);
+
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 }