@@ -412,7 +412,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_POLL (1<<15)
+#define MI_SEMAPHORE_SAD_GT_SDD (0<<12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12)
+#define MI_SEMAPHORE_SAD_LT_SDD (2<<12)
+#define MI_SEMAPHORE_SAD_LTE_SDD (3<<12)
+#define MI_SEMAPHORE_SAD_EQ_SDD (4<<12)
+#define MI_SEMAPHORE_SAD_NEQ_SDD (5<<12)
#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
@@ -505,6 +505,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
goto done;
if (p->priority > max(last->priotree.priority, 0)) {
+ u32 *gpu_sema = &engine->status_page.page_addr[I915_GEM_HWS_INDEX + 2];
+ /* Suspend updates from the gpu */
+ WRITE_ONCE(*gpu_sema, 1);
+ readl(engine->i915->regs +
+ i915_mmio_reg_offset(RING_ACTHD(engine->mmio_base)));
+
list_for_each_entry_safe_reverse(rq, rn,
&engine->timeline->requests,
link) {
@@ -613,11 +619,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
static void switch_to_preempt(struct intel_engine_cs *engine)
{
+ u32 *gpu_sema = &engine->status_page.page_addr[I915_GEM_HWS_INDEX + 2];
+
memcpy(engine->execlist_port,
engine->execlist_preempt,
sizeof(engine->execlist_preempt));
memset(engine->execlist_preempt, 0,
sizeof(engine->execlist_preempt));
+
+ /* Restart updates from the gpu */
+ WRITE_ONCE(*gpu_sema, 0);
}
/*
@@ -1667,6 +1678,14 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0; /* continue if zero (preempt == 0) */
+ *cs++ = intel_hws_seqno_address(request->engine) + 8;
+ *cs++ = 0;
+
*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
*cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
*cs++ = 0;
@@ -1679,7 +1698,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
u32 *cs)
@@ -1687,6 +1706,14 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0; /* continue if zero */
+ *cs++ = intel_hws_seqno_address(request->engine) + 8;
+ *cs++ = 0;
+
/* w/a for post sync ops following a GPGPU operation we
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
@@ -1707,7 +1734,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
gen8_emit_wa_tail(request, cs);
}
-static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
+static const int gen8_emit_breadcrumb_render_sz = 12 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
{