@@ -569,8 +569,18 @@ static inline bool should_skip_switch(struct intel_engine_cs *ring,
struct intel_context *from,
struct intel_context *to)
{
- if (from == to && !to->remap_slice)
- return true;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+ if (to->remap_slice)
+ return false;
+
+ if (to->ppgtt) {
+ if (from == to && !test_bit(ring->id, &to->ppgtt->pd_dirty_rings))
+ return true;
+ } else {
+ if (from == to && !test_bit(ring->id, &dev_priv->mm.aliasing_ppgtt->pd_dirty_rings))
+ return true;
+ }
return false;
}
@@ -587,9 +597,8 @@ needs_pd_load_pre(struct intel_engine_cs *ring, struct intel_context *to)
static bool
needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to)
{
- return (!to->legacy_hw_ctx.initialized ||
- i915_gem_context_is_default(to)) &&
- to->ppgtt && IS_GEN8(ring->dev);
+ return IS_GEN8(ring->dev) &&
+ (to->ppgtt || &to->ppgtt->pd_dirty_rings);
}
static int do_switch(struct intel_engine_cs *ring,
@@ -634,6 +643,12 @@ static int do_switch(struct intel_engine_cs *ring,
ret = to->ppgtt->switch_mm(to->ppgtt, ring);
if (ret)
goto unpin_out;
+
+ /* Doing a PD load always reloads the page dirs */
+ if (to->ppgtt)
+ clear_bit(ring->id, &to->ppgtt->pd_dirty_rings);
+ else
+ clear_bit(ring->id, &dev_priv->mm.aliasing_ppgtt->pd_dirty_rings);
}
if (ring != &dev_priv->ring[RCS]) {
@@ -672,6 +687,8 @@ static int do_switch(struct intel_engine_cs *ring,
*/
if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
hw_flags |= MI_RESTORE_INHIBIT;
+ else if (to->ppgtt && test_and_clear_bit(ring->id, &to->ppgtt->pd_dirty_rings))
+ hw_flags |= MI_FORCE_RESTORE;
ret = mi_set_context(ring, to, hw_flags);
if (ret)
@@ -1198,6 +1198,13 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
if (ret)
goto error;
+ if (ctx->ppgtt)
+ WARN(ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
+ "%s didn't clear reload\n", ring->name);
+ else
+ WARN(dev_priv->mm.aliasing_ppgtt->pd_dirty_rings &
+ (1<<ring->id), "%s didn't clear reload\n", ring->name);
+
instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
instp_mask = I915_EXEC_CONSTANTS_MASK;
switch (instp_mode) {
@@ -1445,6 +1452,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
goto err;
+ /* XXX: Reserve has possibly change PDEs which means we must do a
+ * context switch before we can coherently read some of the reserved
+ * VMAs. */
+
/* The objects are in their final locations, apply the relocations. */
if (need_relocs)
ret = i915_gem_execbuffer_relocate(eb);
@@ -1110,6 +1110,16 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
4096, PCI_DMA_BIDIRECTIONAL);
}
+/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
+ * are switching between contexts with the same LRCA, we also must do a force
+ * restore.
+ */
+static inline void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
+{
+ /* If current vm != vm, */ \
+ ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
+}
+
static int gen6_alloc_va_range(struct i915_address_space *vm,
uint64_t start, uint64_t length)
{
@@ -1128,6 +1138,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
I915_PPGTT_PT_ENTRIES);
}
+ mark_tlbs_dirty(ppgtt);
return 0;
}
@@ -1143,6 +1154,8 @@ static void gen6_teardown_va_range(struct i915_address_space *vm,
bitmap_clear(pt->used_ptes, gen6_pte_index(start),
gen6_pte_count(start, length));
}
+
+ mark_tlbs_dirty(ppgtt);
}
static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
@@ -300,6 +300,7 @@ struct i915_hw_ppgtt {
struct i915_address_space base;
struct kref ref;
struct drm_mm_node node;
+ unsigned long pd_dirty_rings;
unsigned num_pd_entries;
unsigned num_pd_pages; /* gen8+ */
union {