diff mbox series

drm/i915: Watchdog timeout: Include threshold value in error state

Message ID 20190105024001.37629-6-carlos.santa@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Watchdog timeout: Include threshold value in error state | expand

Commit Message

Santa, Carlos Jan. 5, 2019, 2:39 a.m. UTC
From: Michel Thierry <michel.thierry@intel.com>

Save the watchdog threshold (in us) as part of the engine state.

v2: Only do it for gen8+ (and prevent a missing-case warn).
v3: use ctx->__engine.
v4: Rebase.

Cc: Antonio Argenziano <antonio.argenziano@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Signed-off-by: Carlos Santa <carlos.santa@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 12 ++++++++----
 drivers/gpu/drm/i915/i915_gpu_error.h |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

Comments

kernel test robot Jan. 5, 2019, 4:19 a.m. UTC | #1
Hi Michel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v4.20 next-20190103]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Carlos-Santa/drm-i915-Watchdog-timeout-Include-threshold-value-in-error-state/20190105-112649
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-x019-201900 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_gpu_error.c: In function 'error_print_context':
>> drivers/gpu/drm/i915/i915_gpu_error.c:466:4: error: implicit declaration of function 'watchdog_to_us'; did you mean 'wq_watchdog_touch'? [-Werror=implicit-function-declaration]
       watchdog_to_us(m->i915, ctx->watchdog_threshold) : 0);
       ^
   drivers/gpu/drm/i915/i915_gpu_error.c:189:49: note: in definition of macro 'err_printf'
    #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
                                                    ^~~~~~~~~~~
   drivers/gpu/drm/i915/i915_gpu_error.c: In function 'record_context':
>> drivers/gpu/drm/i915/i915_gpu_error.c:1481:50: error: 'struct intel_context' has no member named 'watchdog_threshold'
     e->watchdog_threshold = ctx->__engine[engine_id].watchdog_threshold;
                                                     ^
   cc1: all warnings being treated as errors

vim +466 drivers/gpu/drm/i915/i915_gpu_error.c

   456	
   457	static void error_print_context(struct drm_i915_error_state_buf *m,
   458					const char *header,
   459					const struct drm_i915_error_context *ctx)
   460	{
   461		err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d, watchdog %dus\n",
   462			   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
   463			   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
   464			   ctx->guilty, ctx->active,
   465			   INTEL_GEN(m->i915) >= 8 ?
 > 466				watchdog_to_us(m->i915, ctx->watchdog_threshold) : 0);
   467	}
   468	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Jan. 5, 2019, 4:39 a.m. UTC | #2
Hi Michel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v4.20 next-20190103]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Carlos-Santa/drm-i915-Watchdog-timeout-Include-threshold-value-in-error-state/20190105-112649
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-s1-201900 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/gpu//drm/i915/i915_gpu_error.c: In function 'error_print_context':
>> drivers/gpu//drm/i915/i915_gpu_error.c:466:4: error: implicit declaration of function 'watchdog_to_us' [-Werror=implicit-function-declaration]
       watchdog_to_us(m->i915, ctx->watchdog_threshold) : 0);
       ^
   drivers/gpu//drm/i915/i915_gpu_error.c:189:49: note: in definition of macro 'err_printf'
    #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
                                                    ^~~~~~~~~~~
   drivers/gpu//drm/i915/i915_gpu_error.c: In function 'record_context':
   drivers/gpu//drm/i915/i915_gpu_error.c:1481:50: error: 'struct intel_context' has no member named 'watchdog_threshold'
     e->watchdog_threshold = ctx->__engine[engine_id].watchdog_threshold;
                                                     ^
   cc1: some warnings being treated as errors

vim +/watchdog_to_us +466 drivers/gpu//drm/i915/i915_gpu_error.c

   456	
   457	static void error_print_context(struct drm_i915_error_state_buf *m,
   458					const char *header,
   459					const struct drm_i915_error_context *ctx)
   460	{
   461		err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d, watchdog %dus\n",
   462			   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
   463			   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
   464			   ctx->guilty, ctx->active,
   465			   INTEL_GEN(m->i915) >= 8 ?
 > 466				watchdog_to_us(m->i915, ctx->watchdog_threshold) : 0);
   467	}
   468	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5533a741abeb..f97379606b1b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -458,10 +458,12 @@  static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
+	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d, watchdog %dus\n",
 		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
 		   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
-		   ctx->guilty, ctx->active);
+		   ctx->guilty, ctx->active,
+		   INTEL_GEN(m->i915) >= 8 ?
+			watchdog_to_us(m->i915, ctx->watchdog_threshold) : 0);
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -1451,7 +1453,8 @@  static void error_record_engine_execlists(struct intel_engine_cs *engine,
 }
 
 static void record_context(struct drm_i915_error_context *e,
-			   struct i915_gem_context *ctx)
+			   struct i915_gem_context *ctx,
+			   u32 engine_id)
 {
 	if (ctx->pid) {
 		struct task_struct *task;
@@ -1472,6 +1475,7 @@  static void record_context(struct drm_i915_error_context *e,
 	e->bannable = i915_gem_context_is_bannable(ctx);
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
+	e->watchdog_threshold =	ctx->__engine[engine_id].watchdog_threshold;
 }
 
 static void request_record_user_bo(struct i915_request *request,
@@ -1556,7 +1560,7 @@  static void gem_record_rings(struct i915_gpu_state *error)
 
 			ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm;
 
-			record_context(&ee->context, ctx);
+			record_context(&ee->context, ctx, engine->id);
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 7130786aa5b4..affd12e17f39 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -129,6 +129,7 @@  struct i915_gpu_state {
 			int ban_score;
 			int active;
 			int guilty;
+			int watchdog_threshold;
 			bool bannable;
 			struct i915_sched_attr sched_attr;
 		} context;