diff mbox

[RFC,8/8] drm/i915: Force preemption to complete via engine reset

Message ID 20180321172625.6415-9-jeff.mcgee@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

jeff.mcgee@intel.com March 21, 2018, 5:26 p.m. UTC
From: Jeff McGee <jeff.mcgee@intel.com>

The hardware can complete the requested preemption at only certain points
in execution. Thus an uncooperative request that avoids those points can
block a preemption indefinitely. Our only option to bound the preemption
latency is to trigger reset and recovery just as we would if a request had
hung the hardware. This is so-called forced preemption. This change adds
that capability as an option for systems with extremely strict scheduling
latency requirements for its high priority requests. This option must be
used with great care. The force-preempted requests will be discarded at
the point of reset, resulting in various degrees of disruption to the
owning application up to and including crash.

The option is enabled by specifying a non-zero value for new i915 module
parameter fpreempt_timeout. This value becomes the time in milliseconds
after initiation of preemption at which the reset is triggered if the
preemption has not completed normally.

GuC mode is not supported. The fpreempt_timeout parameter is simply
ignored.

Signed-off-by: Jeff McGee <jeff.mcgee@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         | 27 ++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_params.c      |  3 +++
 drivers/gpu/drm/i915/i915_params.h      |  1 +
 drivers/gpu/drm/i915/intel_engine_cs.c  | 40 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 13 +++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 ++++
 6 files changed, 86 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 38f7160d99c9..0ad448792bfb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2896,8 +2896,24 @@  i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return active;
 }
 
-static bool engine_stalled(struct intel_engine_cs *engine)
+static bool engine_stalled(struct intel_engine_cs *engine,
+			   struct i915_request *request)
 {
+	if (engine->fpreempt_stalled) {
+		/* Pardon the request if it managed to yield the
+		 * engine by completing just prior to the reset. We
+		 * could be even more sophisticated here and pardon
+		 * the request if it preempted out (mid-batch) prior
+		 * to the reset, but that's not so straight-forward
+		 * to detect. Perhaps not worth splitting hairs when
+		 * the request had clearly behaved badly to get here.
+		 */
+		if (i915_request_completed(request))
+			return false;
+
+		return true;
+	}
+
 	if (!engine->hangcheck.stalled)
 		return false;
 
@@ -3038,7 +3054,7 @@  i915_gem_reset_request(struct intel_engine_cs *engine,
 	 * subsequent hangs.
 	 */
 
-	if (engine_stalled(engine)) {
+	if (engine_stalled(engine, request)) {
 		i915_gem_context_mark_guilty(request->ctx);
 		skip_request(request);
 
@@ -3046,6 +3062,13 @@  i915_gem_reset_request(struct intel_engine_cs *engine,
 		if (i915_gem_context_is_banned(request->ctx))
 			engine_skip_context(request);
 	} else {
+		/* If the request that we just pardoned was the target of a
+		 * force preemption there is no possibility of the next
+		 * request in line having started.
+		 */
+		if (engine->fpreempt_stalled)
+			return NULL;
+
 		/*
 		 * Since this is not the hung engine, it may have advanced
 		 * since the hang declaration. Double check by refinding
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 08108ce5be21..71bc8213acb2 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -178,6 +178,9 @@  i915_param_named(enable_dpcd_backlight, bool, 0600,
 i915_param_named(enable_gvt, bool, 0400,
 	"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
 
+i915_param_named(fpreempt_timeout, uint, 0600,
+	"Wait time in msecs before forcing a preemption with reset (0:never force [default])");
+
 static __always_inline void _print_param(struct drm_printer *p,
 					 const char *name,
 					 const char *type,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c96360398072..1d50f223b637 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -55,6 +55,7 @@  struct drm_printer;
 	param(int, edp_vswing, 0) \
 	param(int, reset, 2) \
 	param(unsigned int, inject_load_failure, 0) \
+	param(unsigned int, fpreempt_timeout, 0) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \
 	param(bool, enable_cmd_parser, true) \
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 337dfa56a738..81358afd1957 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -493,6 +493,45 @@  static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	execlists->first = NULL;
 }
 
+static enum hrtimer_restart
+intel_engine_fpreempt_timer(struct hrtimer *hrtimer)
+{
+	struct intel_engine_cs *engine =
+		container_of(hrtimer, struct intel_engine_cs,
+			     fpreempt_timer);
+
+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+		queue_work(system_highpri_wq, &engine->fpreempt_work);
+
+	return HRTIMER_NORESTART;
+}
+
+static void intel_engine_fpreempt_work(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, struct intel_engine_cs,
+			     fpreempt_work);
+
+	tasklet_kill(&engine->execlists.tasklet);
+	tasklet_disable(&engine->execlists.tasklet);
+
+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) {
+		engine->fpreempt_stalled = true;
+		i915_handle_error(engine->i915, intel_engine_flag(engine),
+				  0, "force preemption");
+	}
+
+	tasklet_enable(&engine->execlists.tasklet);
+}
+
+static void intel_engine_init_fpreempt(struct intel_engine_cs *engine)
+{
+	hrtimer_init(&engine->fpreempt_timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	engine->fpreempt_timer.function = intel_engine_fpreempt_timer;
+	INIT_WORK(&engine->fpreempt_work, intel_engine_fpreempt_work);
+}
+
 /**
  * intel_engines_setup_common - setup engine state not requiring hw access
  * @engine: Engine to setup.
@@ -508,6 +547,7 @@  void intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_timeline(engine);
 	intel_engine_init_hangcheck(engine);
 	intel_engine_init_batch_pool(engine);
+	intel_engine_init_fpreempt(engine);
 	intel_engine_init_cmd_parser(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d420c2ecb50a..0de687856434 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -533,15 +533,25 @@  static void inject_preempt_context(struct intel_engine_cs *engine)
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+	if (i915_modparams.fpreempt_timeout)
+		hrtimer_start(&engine->fpreempt_timer,
+			      ms_to_ktime(i915_modparams.fpreempt_timeout),
+			      HRTIMER_MODE_REL);
 }
 
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
+	struct intel_engine_cs *engine =
+		container_of(execlists, struct intel_engine_cs, execlists);
+
 	execlists_cancel_port_requests(execlists);
 	execlists_unwind_incomplete_requests(execlists);
 
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+	hrtimer_try_to_cancel(&engine->fpreempt_timer);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -1844,6 +1854,9 @@  static void execlists_reset(struct intel_engine_cs *engine,
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
 {
+	/* Mark any force preemption as resolved */
+	engine->fpreempt_stalled = false;
+
 	tasklet_enable(&engine->execlists.tasklet);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e2681303ce21..51286796def7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -565,6 +565,10 @@  struct intel_engine_cs {
 
 	struct intel_engine_hangcheck hangcheck;
 
+	struct hrtimer fpreempt_timer;
+	struct work_struct fpreempt_work;
+	bool fpreempt_stalled;
+
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 	unsigned int flags;