@@ -1396,6 +1396,25 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
return 0;
}
+static int flush_tlb(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+ *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
static inline int mi_set_context(struct i915_request *rq, u32 flags)
{
struct drm_i915_private *i915 = rq->i915;
@@ -1578,7 +1597,7 @@ static int switch_context(struct i915_request *rq)
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
if (vm) {
- int loops = 4; /* 2 for Haswell? 4 for Baytrail! */
+ int loops = 8; /* 2 for Haswell? 8 for Baytrail! */
/*
* Not only do we need a full barrier (post-sync write) after
@@ -1588,8 +1607,12 @@ static int switch_context(struct i915_request *rq)
* post-sync op, this extra pass appears vital before a
* mm switch!
*/
+ ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (ret)
+ return ret;
+
do {
- ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+ ret = flush_tlb(rq);
if (ret)
return ret;
@@ -1598,7 +1621,7 @@ static int switch_context(struct i915_request *rq)
return ret;
} while (--loops);
- ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
if (ret)
return ret;
}
Invalidate the ring TLB and increase the delay required for Baytrail. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- .../gpu/drm/i915/gt/intel_ring_submission.c | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-)