[v2,2/3] drm/i915: Print caller when tainting for CI
diff mbox series

Message ID 20200706140125.172844-2-michal@hardline.pl
State New
Headers show
Series
  • [v3,1/3] drm/i915: Reboot CI if we get wedged during driver init
Related show

Commit Message

Michał Winiarski July 6, 2020, 2:01 p.m. UTC
From: Michał Winiarski <michal.winiarski@intel.com>

We can add taint from multiple places, printing the caller allows us to
have a better overview of what exactly caused us to do the tainting.

v2: Tweak format and print the device (Chris)

Suggested-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Petri Latvala <petri.latvala@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_reset.c  |  6 +++---
 drivers/gpu/drm/i915/gt/selftest_rc6.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.h        |  2 +-
 drivers/gpu/drm/i915/i915_utils.h      | 12 ++++++++++--
 drivers/gpu/drm/i915/intel_uncore.c    |  4 ++--
 5 files changed, 17 insertions(+), 9 deletions(-)

Comments

Chris Wilson July 6, 2020, 2:09 p.m. UTC | #1
Quoting Michał Winiarski (2020-07-06 15:01:24)
> From: Michał Winiarski <michal.winiarski@intel.com>
> 
> We can add taint from multiple places, printing the caller allows us to
> have a better overview of what exactly caused us to do the tainting.
> 
> v2: Tweak format and print the device (Chris)
> 
> Suggested-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Petri Latvala <petri.latvala@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_reset.c  |  6 +++---
>  drivers/gpu/drm/i915/gt/selftest_rc6.c |  2 +-
>  drivers/gpu/drm/i915/i915_gem.h        |  2 +-
>  drivers/gpu/drm/i915/i915_utils.h      | 12 ++++++++++--
>  drivers/gpu/drm/i915/intel_uncore.c    |  4 ++--
>  5 files changed, 17 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 6f94b6479a2f..121bf39a6f3e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -930,7 +930,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
>                  * Warn CI about the unrecoverable wedged condition.
>                  * Time for a reboot.
>                  */
> -               add_taint_for_CI(TAINT_WARN);
> +               add_taint_for_CI(gt->i915, TAINT_WARN);
>                 return false;
>         }
>  
> @@ -1097,7 +1097,7 @@ void intel_gt_reset(struct intel_gt *gt,
>          * rather than continue on into oblivion. For everyone else,
>          * the system should still plod along, but they have been warned!
>          */
> -       add_taint_for_CI(TAINT_WARN);
> +       add_taint_for_CI(gt->i915, TAINT_WARN);
>  error:
>         __intel_gt_set_wedged(gt);
>         goto finish;
> @@ -1362,7 +1362,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
>         set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
>  
>         /* Wedged on init is non-recoverable */
> -       add_taint_for_CI(TAINT_WARN);
> +       add_taint_for_CI(gt->i915, TAINT_WARN);
>  }
>  
>  void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
> index 3c8434846fa1..64ef5ee5decf 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
> @@ -233,7 +233,7 @@ int live_rc6_ctx_wa(void *arg)
>                             i915_reset_engine_count(error, engine)) {
>                                 pr_err("%s: GPU reset required\n",
>                                        engine->name);
> -                               add_taint_for_CI(TAINT_WARN);
> +                               add_taint_for_CI(gt->i915, TAINT_WARN);
>                                 err = -EIO;
>                                 goto out;
>                         }
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 1753c84d6c0d..f333e88a2b6e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -72,7 +72,7 @@ struct drm_i915_private;
>         trace_printk(__VA_ARGS__);                                      \
>  } while (0)
>  #define GEM_TRACE_DUMP() \
> -       do { ftrace_dump(DUMP_ALL); add_taint_for_CI(TAINT_WARN); } while (0)
> +       do { ftrace_dump(DUMP_ALL); __add_taint_for_CI(TAINT_WARN); } while (0)
>  #define GEM_TRACE_DUMP_ON(expr) \
>         do { if (expr) GEM_TRACE_DUMP(); } while (0)
>  #else
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 03a73d2bd50d..82fada1e7552 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -436,7 +436,13 @@ static inline const char *enableddisabled(bool v)
>         return v ? "enabled" : "disabled";
>  }
>  
> -static inline void add_taint_for_CI(unsigned int taint)
> +static inline void __add_taint_for_CI(unsigned int taint)
> +{
> +       add_taint(taint, LOCKDEP_STILL_OK);
> +}
> +
> +static inline void
> +add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint)
>  {
>         /*
>          * The system is "ok", just about surviving for the user, but
> @@ -444,7 +450,9 @@ static inline void add_taint_for_CI(unsigned int taint)
>          * CI checks the taint state after every test and will reboot
>          * the machine if the kernel is tainted.
>          */
> -       add_taint(taint, LOCKDEP_STILL_OK);
> +       __i915_printk(i915, KERN_NOTICE, "CI tainted:%x by %pS\n",
> +                     taint, (void *)_RET_IP_);
> +       __add_taint_for_CI(taint);
>  }

The only difference I made was I put this out-of-line in i915_utils.c
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 6f94b6479a2f..121bf39a6f3e 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -930,7 +930,7 @@  static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 * Warn CI about the unrecoverable wedged condition.
 		 * Time for a reboot.
 		 */
-		add_taint_for_CI(TAINT_WARN);
+		add_taint_for_CI(gt->i915, TAINT_WARN);
 		return false;
 	}
 
@@ -1097,7 +1097,7 @@  void intel_gt_reset(struct intel_gt *gt,
 	 * rather than continue on into oblivion. For everyone else,
 	 * the system should still plod along, but they have been warned!
 	 */
-	add_taint_for_CI(TAINT_WARN);
+	add_taint_for_CI(gt->i915, TAINT_WARN);
 error:
 	__intel_gt_set_wedged(gt);
 	goto finish;
@@ -1362,7 +1362,7 @@  void intel_gt_set_wedged_on_init(struct intel_gt *gt)
 	set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
 
 	/* Wedged on init is non-recoverable */
-	add_taint_for_CI(TAINT_WARN);
+	add_taint_for_CI(gt->i915, TAINT_WARN);
 }
 
 void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 3c8434846fa1..64ef5ee5decf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -233,7 +233,7 @@  int live_rc6_ctx_wa(void *arg)
 			    i915_reset_engine_count(error, engine)) {
 				pr_err("%s: GPU reset required\n",
 				       engine->name);
-				add_taint_for_CI(TAINT_WARN);
+				add_taint_for_CI(gt->i915, TAINT_WARN);
 				err = -EIO;
 				goto out;
 			}
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 1753c84d6c0d..f333e88a2b6e 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -72,7 +72,7 @@  struct drm_i915_private;
 	trace_printk(__VA_ARGS__);					\
 } while (0)
 #define GEM_TRACE_DUMP() \
-	do { ftrace_dump(DUMP_ALL); add_taint_for_CI(TAINT_WARN); } while (0)
+	do { ftrace_dump(DUMP_ALL); __add_taint_for_CI(TAINT_WARN); } while (0)
 #define GEM_TRACE_DUMP_ON(expr) \
 	do { if (expr) GEM_TRACE_DUMP(); } while (0)
 #else
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 03a73d2bd50d..82fada1e7552 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -436,7 +436,13 @@  static inline const char *enableddisabled(bool v)
 	return v ? "enabled" : "disabled";
 }
 
-static inline void add_taint_for_CI(unsigned int taint)
+static inline void __add_taint_for_CI(unsigned int taint)
+{
+	add_taint(taint, LOCKDEP_STILL_OK);
+}
+
+static inline void
+add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint)
 {
 	/*
 	 * The system is "ok", just about surviving for the user, but
@@ -444,7 +450,9 @@  static inline void add_taint_for_CI(unsigned int taint)
 	 * CI checks the taint state after every test and will reboot
 	 * the machine if the kernel is tainted.
 	 */
-	add_taint(taint, LOCKDEP_STILL_OK);
+	__i915_printk(i915, KERN_NOTICE, "CI tainted:%x by %pS\n",
+		      taint, (void *)_RET_IP_);
+	__add_taint_for_CI(taint);
 }
 
 void cancel_timer(struct timer_list *t);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 592364aed2da..8e2c073da1aa 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -142,7 +142,7 @@  fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
 	if (wait_ack_clear(d, FORCEWAKE_KERNEL)) {
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
-		add_taint_for_CI(TAINT_WARN); /* CI now unreliable */
+		add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
 	}
 }
 
@@ -219,7 +219,7 @@  fw_domain_wait_ack_set(const struct intel_uncore_forcewake_domain *d)
 	if (wait_ack_set(d, FORCEWAKE_KERNEL)) {
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
-		add_taint_for_CI(TAINT_WARN); /* CI now unreliable */
+		add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
 	}
 }