@@ -4183,7 +4183,7 @@ i915_wedged_set(void *data, u64 val)
intel_runtime_pm_get(dev_priv);
- i915_handle_error(dev, 0x0, val,
+ i915_handle_error(dev, 0x0, false, val,
"Manually setting wedged to %llu", val);
intel_runtime_pm_put(dev_priv);
@@ -791,6 +791,64 @@ i915_hangcheck_init(struct drm_device *dev)
}
}
+void i915_watchdog_init(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int freq;
+ int i;
+
+ /*
+ * Based on pre-defined time out value (60ms or 30ms) calculate
+ * timer count thresholds needed based on core frequency.
+ *
+ * For RCS.
+ * The timestamp resolution changed in Gen7 and beyond to 80ns
+ * for all pipes. Before that it was 640ns.
+ */
+
+#define KM_RCS_ENGINE_TIMEOUT_VALUE_IN_MS 60
+#define KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS 60
+#define KM_TIMER_MILLISECOND 1000
+
+ /*
+ * Timestamp timer resolution = 0.080 uSec,
+ * or 12500000 counts per second
+ */
+#define KM_TIMESTAMP_CNTS_PER_SEC_80NS 12500000
+
+ /*
+ * Timestamp timer resolution = 0.640 uSec,
+ * or 1562500 counts per second
+ */
+#define KM_TIMESTAMP_CNTS_PER_SEC_640NS 1562500
+
+ if (INTEL_INFO(dev)->gen >= 7)
+ freq = KM_TIMESTAMP_CNTS_PER_SEC_80NS;
+ else
+ freq = KM_TIMESTAMP_CNTS_PER_SEC_640NS;
+
+ dev_priv->ring[RCS].watchdog_threshold =
+ ((KM_RCS_ENGINE_TIMEOUT_VALUE_IN_MS) *
+ (freq / KM_TIMER_MILLISECOND));
+
+ dev_priv->ring[VCS].watchdog_threshold =
+ ((KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS) *
+ (freq / KM_TIMER_MILLISECOND));
+
+ dev_priv->ring[VCS2].watchdog_threshold =
+ ((KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS) *
+ (freq / KM_TIMER_MILLISECOND));
+
+ for (i = 0; i < I915_NUM_RINGS; i++)
+ dev_priv->ring[i].hangcheck.watchdog_count = 0;
+
+ DRM_INFO("Watchdog Timeout [ms], " \
+ "RCS: 0x%08X, VCS: 0x%08X, VCS2: 0x%08X\n", \
+ KM_RCS_ENGINE_TIMEOUT_VALUE_IN_MS,
+ KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS,
+ KM_BSD_ENGINE_TIMEOUT_VALUE_IN_MS);
+}
+
/**
* i915_driver_load - setup chip and create an initial config
* @dev: DRM device
@@ -972,6 +1030,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
i915_gem_load(dev);
i915_hangcheck_init(dev);
+ i915_watchdog_init(dev);
/* On the 945G/GM, the chipset reports the MSI capability on the
* integrated graphics even though the support isn't actually there
@@ -2563,6 +2563,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
+void i915_watchdog_init(struct drm_device *dev);
static inline void i915_hangcheck_reinit(struct intel_engine_cs *engine)
{
struct intel_ring_hangcheck *hc = &engine->hangcheck;
@@ -2578,9 +2579,9 @@ static inline void i915_hangcheck_reinit(struct intel_engine_cs *engine)
/* i915_irq.c */
void i915_queue_hangcheck(struct drm_device *dev);
-__printf(4, 5)
-void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged,
- const char *fmt, ...);
+__printf(5, 6)
+void i915_handle_error(struct drm_device *dev, u32 engine_mask,
+ bool watchdog, bool wedged, const char *fmt, ...);
extern void intel_irq_init(struct drm_i915_private *dev_priv);
extern void intel_hpd_init(struct drm_i915_private *dev_priv);
@@ -1289,6 +1289,18 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
intel_lrc_irq_handler(&dev_priv->ring[RCS]);
if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
notify_ring(&dev_priv->ring[RCS]);
+ if (tmp & (GT_GEN8_RCS_WATCHDOG_INTERRUPT << GEN8_RCS_IRQ_SHIFT)) {
+ struct intel_engine_cs *ring;
+
+ /* Stop the counter to prevent further interrupts */
+ ring = &dev_priv->ring[RCS];
+ I915_WRITE(RING_CNTR(ring->mmio_base),
+ GEN6_RCS_WATCHDOG_DISABLE);
+
+ ring->hangcheck.watchdog_count++;
+ i915_handle_error(ring->dev, intel_ring_flag(ring), true, true,
+ "Render engine watchdog timed out");
+ }
if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[BCS]);
@@ -1308,11 +1320,35 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
intel_lrc_irq_handler(&dev_priv->ring[VCS]);
if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
notify_ring(&dev_priv->ring[VCS]);
+ if (tmp & (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS1_IRQ_SHIFT)) {
+ struct intel_engine_cs *ring;
+
+ /* Stop the counter to prevent further interrupts */
+ ring = &dev_priv->ring[VCS];
+ I915_WRITE(RING_CNTR(ring->mmio_base),
+ GEN8_VCS_WATCHDOG_DISABLE);
+
+ ring->hangcheck.watchdog_count++;
+ i915_handle_error(ring->dev, intel_ring_flag(ring), true, true,
+ "Media engine watchdog timed out");
+ }
if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
notify_ring(&dev_priv->ring[VCS2]);
+ if (tmp & (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS2_IRQ_SHIFT)) {
+ struct intel_engine_cs *ring;
+
+ /* Stop the counter to prevent further interrupts */
+ ring = &dev_priv->ring[VCS2];
+ I915_WRITE(RING_CNTR(ring->mmio_base),
+ GEN8_VCS_WATCHDOG_DISABLE);
+
+ ring->hangcheck.watchdog_count++;
+ i915_handle_error(ring->dev, intel_ring_flag(ring), true, true,
+ "Media engine 2 watchdog timed out");
+ }
} else
DRM_ERROR("The master control interrupt lied (GT1)!\n");
}
@@ -2563,6 +2599,7 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
* or if one of the current engine resets fails we fall
* back to legacy full GPU reset.
*
+ * @watchdog: true = Engine hang detected by hardware watchdog.
* @wedged: true = Hang detected, invoke hang recovery.
* @fmt, ...: Error message describing reason for error.
*
@@ -2574,8 +2611,8 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
* reset the associated engine. Failing that, try to fall back to legacy
* full GPU reset recovery mode.
*/
-void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged,
- const char *fmt, ...)
+void i915_handle_error(struct drm_device *dev, u32 engine_mask,
+ bool watchdog, bool wedged, const char *fmt, ...)
{
struct drm_i915_private *dev_priv = dev->dev_private;
va_list args;
@@ -2607,20 +2644,27 @@ void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged,
u32 i;
for_each_ring(engine, dev_priv, i) {
- u32 now, last_engine_reset_timediff;
if (!(intel_ring_flag(engine) & engine_mask))
continue;
- /* Measure the time since this engine was last reset */
- now = get_seconds();
- last_engine_reset_timediff =
- now - engine->hangcheck.last_engine_reset_time;
-
- full_reset = last_engine_reset_timediff <
- i915.gpu_reset_promotion_time;
-
- engine->hangcheck.last_engine_reset_time = now;
+ if (!watchdog) {
+ /* Measure the time since this engine was last reset */
+ u32 now = get_seconds();
+ u32 last_engine_reset_timediff =
+ now - engine->hangcheck.last_engine_reset_time;
+
+ full_reset = last_engine_reset_timediff <
+ i915.gpu_reset_promotion_time;
+
+ engine->hangcheck.last_engine_reset_time = now;
+ } else {
+ /*
+ * Watchdog timeout always results
+ * in engine reset.
+ */
+ full_reset = false;
+ }
/*
* This engine was not reset too recently - go ahead
@@ -2631,10 +2675,11 @@ void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged,
* This can still be overridden by a global
* reset e.g. if per-engine reset fails.
*/
- if (!full_reset)
+ if (watchdog || !full_reset)
atomic_set_mask(I915_ENGINE_RESET_IN_PROGRESS,
&engine->hangcheck.flags);
- else
+
+ if (full_reset)
break;
} /* for_each_ring */
@@ -2642,7 +2687,7 @@ void i915_handle_error(struct drm_device *dev, u32 engine_mask, bool wedged,
if (full_reset) {
atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
- &dev_priv->gpu_error.reset_counter);
+ &dev_priv->gpu_error.reset_counter);
}
/*
@@ -2980,7 +3025,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
*/
tmp = I915_READ_CTL(ring);
if (tmp & RING_WAIT) {
- i915_handle_error(dev, intel_ring_flag(ring), false,
+ i915_handle_error(dev, intel_ring_flag(ring), false, false,
"Kicking stuck wait on %s",
ring->name);
I915_WRITE_CTL(ring, tmp);
@@ -2992,7 +3037,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
default:
return HANGCHECK_HUNG;
case 1:
- i915_handle_error(dev, intel_ring_flag(ring), false,
+ i915_handle_error(dev, intel_ring_flag(ring), false, false,
"Kicking stuck semaphore on %s",
ring->name);
I915_WRITE_CTL(ring, tmp);
@@ -3134,9 +3179,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
}
if (engine_mask)
- i915_handle_error(dev, engine_mask, true, "Ring hung (0x%02x)", engine_mask);
+ i915_handle_error(dev, engine_mask, false, true, "Ring hung (0x%02x)", engine_mask);
else if (force_full_gpu_reset)
- i915_handle_error(dev, 0x0, true,
+ i915_handle_error(dev, 0x0, false, true,
"Hang recovery ineffective, falling back to full GPU reset");
if (busy_count)
@@ -3591,11 +3636,14 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv)
{
/* These are interrupts we'll toggle with the ring mask register */
uint32_t gt_interrupts[] = {
+ GT_GEN8_RCS_WATCHDOG_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
GT_RENDER_L3_PARITY_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT |
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT,
+ GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
+ GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS2_IRQ_SHIFT |
GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT |
@@ -1181,6 +1181,8 @@ enum skl_disp_power_wells {
#define RING_HEAD(base) ((base)+0x34)
#define RING_START(base) ((base)+0x38)
#define RING_CTL(base) ((base)+0x3c)
+#define RING_CNTR(base) ((base)+0x178)
+#define RING_THRESH(base) ((base)+0x17C)
#define RING_SYNC_0(base) ((base)+0x40)
#define RING_SYNC_1(base) ((base)+0x44)
#define RING_SYNC_2(base) ((base)+0x48)
@@ -1584,6 +1586,11 @@ enum skl_disp_power_wells {
#define GT_BSD_USER_INTERRUPT (1 << 12)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8)
+#define GT_GEN6_RENDER_WATCHDOG_INTERRUPT (1 << 6)
+#define GT_GEN8_RCS_WATCHDOG_INTERRUPT (1 << 6)
+#define GEN6_RCS_WATCHDOG_DISABLE 1
+#define GT_GEN8_VCS_WATCHDOG_INTERRUPT (1 << 6)
+#define GEN8_VCS_WATCHDOG_DISABLE 0xFFFFFFFF
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)
#define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3)
@@ -1122,6 +1122,78 @@ static int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf,
return 0;
}
+static int
+gen8_ring_start_watchdog(struct intel_ringbuffer *ringbuf, struct intel_context *ctx)
+{
+ int ret;
+ struct intel_engine_cs *ring = ringbuf->ring;
+
+ ret = intel_logical_ring_begin(ringbuf, ctx, 10);
+ if (ret)
+ return ret;
+
+ /*
+ * i915_reg.h includes a warning to place a MI_NOOP
+ * before a MI_LOAD_REGISTER_IMM
+ */
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+
+ /* Set counter period */
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+ intel_logical_ring_emit(ringbuf, RING_THRESH(ring->mmio_base));
+ intel_logical_ring_emit(ringbuf, ring->watchdog_threshold);
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+
+ /* Start counter */
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+ intel_logical_ring_emit(ringbuf, RING_CNTR(ring->mmio_base));
+ intel_logical_ring_emit(ringbuf, I915_WATCHDOG_ENABLE);
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_advance(ringbuf);
+
+ return 0;
+}
+
+static int
+gen8_ring_stop_watchdog(struct intel_ringbuffer *ringbuf, struct intel_context *ctx)
+{
+ int ret;
+ struct intel_engine_cs *ring = ringbuf->ring;
+
+ ret = intel_logical_ring_begin(ringbuf, ctx, 6);
+ if (ret)
+ return ret;
+
+ /*
+ * i915_reg.h includes a warning to place a MI_NOOP
+ * before a MI_LOAD_REGISTER_IMM
+ */
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+
+ intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+ intel_logical_ring_emit(ringbuf, RING_CNTR(ring->mmio_base));
+
+ switch (ring->id) {
+ default:
+ WARN(1, "%s does not support watchdog timeout! " \
+ "Defaulting to render engine.\n", ring->name);
+ case RCS:
+ intel_logical_ring_emit(ringbuf, GEN6_RCS_WATCHDOG_DISABLE);
+ break;
+ case VCS:
+ case VCS2:
+ intel_logical_ring_emit(ringbuf, GEN8_VCS_WATCHDOG_DISABLE);
+ break;
+ }
+
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ intel_logical_ring_advance(ringbuf);
+
+ return 0;
+}
+
/**
* execlists_submission() - submit a batchbuffer for execution, Execlists style
* @dev: DRM device.
@@ -1152,6 +1224,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
int instp_mode;
u32 instp_mask;
int ret;
+ bool watchdog_running = false;
instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
instp_mask = I915_EXEC_CONSTANTS_MASK;
@@ -1203,6 +1276,18 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
if (ret)
return ret;
+ /* Start watchdog timer */
+ if (args->flags & I915_EXEC_ENABLE_WATCHDOG) {
+ if (!intel_ring_supports_watchdog(ring))
+ return -EINVAL;
+
+ ret = gen8_ring_start_watchdog(ringbuf, ctx);
+ if (ret)
+ return ret;
+
+ watchdog_running = true;
+ }
+
if (ring == &dev_priv->ring[RCS] &&
instp_mode != dev_priv->relative_constants_mode) {
ret = intel_logical_ring_begin(ringbuf, ctx, 4);
@@ -1224,6 +1309,13 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
+ /* Cancel watchdog timer */
+ if (watchdog_running) {
+ ret = gen8_ring_stop_watchdog(ringbuf, ctx);
+ if (ret)
+ return ret;
+ }
+
i915_gem_execbuffer_move_to_active(vmas, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
@@ -1892,6 +1984,9 @@ static int logical_render_ring_init(struct drm_device *dev)
if (HAS_L3_DPF(dev))
ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+ ring->irq_keep_mask |=
+ (GT_GEN8_RCS_WATCHDOG_INTERRUPT << GEN8_RCS_IRQ_SHIFT);
+
if (INTEL_INFO(dev)->gen >= 9)
ring->init_hw = gen9_init_render_ring;
else
@@ -1930,6 +2025,8 @@ static int logical_bsd_ring_init(struct drm_device *dev)
GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
ring->irq_keep_mask =
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
+ ring->irq_keep_mask |=
+ (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS1_IRQ_SHIFT);
ring->init_hw = gen8_init_common_ring;
ring->get_seqno = gen8_get_seqno;
@@ -1959,6 +2056,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
ring->irq_keep_mask =
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
+ ring->irq_keep_mask |=
+ (GT_GEN8_VCS_WATCHDOG_INTERRUPT << GEN8_VCS2_IRQ_SHIFT);
ring->init_hw = gen8_init_common_ring;
ring->get_seqno = gen8_get_seqno;
@@ -30,6 +30,8 @@ struct intel_hw_status_page {
struct drm_i915_gem_object *obj;
};
+#define I915_WATCHDOG_ENABLE 0
+
#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
@@ -136,6 +138,9 @@ struct intel_ring_hangcheck {
/* Number of TDR hang detections */
u32 tdr_count;
+
+ /* Number of watchdog hang detections for this ring */
+ u32 watchdog_count;
};
struct intel_ringbuffer {
@@ -338,6 +343,12 @@ struct intel_engine_cs {
/* Saved head value to be restored after reset */
u32 saved_head;
+ /*
+ * Watchdog timer threshold values
+ * only RCS, VCS, VCS2 rings have watchdog timeout support
+ */
+ uint32_t watchdog_threshold;
+
struct {
struct drm_i915_gem_object *obj;
u32 gtt_offset;
@@ -484,6 +495,26 @@ int intel_ring_save(struct intel_engine_cs *ring,
int intel_ring_restore(struct intel_engine_cs *ring,
struct drm_i915_gem_request *req);
+static inline bool intel_ring_supports_watchdog(struct intel_engine_cs *ring)
+{
+ bool ret = false;
+
+ if (WARN_ON(!ring))
+ goto exit;
+
+ ret = ( ring->id == RCS ||
+ ring->id == VCS ||
+ ring->id == VCS2);
+
+ if (!ret)
+ DRM_ERROR("%s does not support watchdog timeout!\n", ring->name);
+
+exit:
+ return ret;
+}
+int intel_ring_start_watchdog(struct intel_engine_cs *ring);
+int intel_ring_stop_watchdog(struct intel_engine_cs *ring);
+
int __must_check intel_ring_idle(struct intel_engine_cs *ring);
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
@@ -760,7 +760,10 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_BSD_RING1 (1<<13)
#define I915_EXEC_BSD_RING2 (2<<13)
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/* Enable watchdog timer for this batch buffer */
+#define I915_EXEC_ENABLE_WATCHDOG (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \