@@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
} while (tmp != rq);
}
-static u32 timeline_get_seqno(struct i915_timeline *tl)
-{
- return tl->seqno += 1 + tl->has_initial_breadcrumb;
-}
-
static void move_to_timeline(struct i915_request *request,
struct i915_timeline *timeline)
{
@@ -532,8 +527,10 @@ struct i915_request *
i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
{
struct drm_i915_private *i915 = engine->i915;
- struct i915_request *rq;
struct intel_context *ce;
+ struct i915_timeline *tl;
+ struct i915_request *rq;
+ u32 seqno;
int ret;
lockdep_assert_held(&i915->drm.struct_mutex);
@@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
}
}
- rq->rcustate = get_state_synchronize_rcu();
-
INIT_LIST_HEAD(&rq->active_list);
+
+ tl = ce->ring->timeline;
+ ret = i915_timeline_get_seqno(tl, rq, &seqno);
+ if (ret)
+ goto err_free;
+
rq->i915 = i915;
rq->engine = engine;
rq->gem_context = ctx;
rq->hw_context = ce;
rq->ring = ce->ring;
- rq->timeline = ce->ring->timeline;
+ rq->timeline = tl;
GEM_BUG_ON(rq->timeline == &engine->timeline);
- rq->hwsp_seqno = rq->timeline->hwsp_seqno;
+ rq->hwsp_seqno = tl->hwsp_seqno;
+ rq->hwsp_cacheline = tl->hwsp_cacheline;
+ rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
spin_lock_init(&rq->lock);
- dma_fence_init(&rq->fence,
- &i915_fence_ops,
- &rq->lock,
- rq->timeline->fence_context,
- timeline_get_seqno(rq->timeline));
+ dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+ tl->fence_context, seqno);
/* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -687,6 +687,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
+err_free:
kmem_cache_free(global.slab_requests, rq);
err_unreserve:
mutex_unlock(&ce->ring->timeline->mutex);
@@ -38,6 +38,7 @@ struct drm_file;
struct drm_i915_gem_object;
struct i915_request;
struct i915_timeline;
+struct i915_timeline_cacheline;
struct i915_capture_list {
struct i915_capture_list *next;
@@ -148,6 +149,16 @@ struct i915_request {
*/
const u32 *hwsp_seqno;
+ /*
+ * If we need to access the timeline's seqno for this request in
+ * another request, we need to keep a read reference to this associated
+ * cacheline, so that we do not free and recycle it before the foriegn
+ * observers have completed. Hence, we keep a pointer to the cacheline
+ * inside the timeline's HWSP vma, but it is only valid while this
+ * request has not completed and guarded by the timeline mutex.
+ */
+ struct i915_timeline_cacheline *hwsp_cacheline;
+
/** Position in the ring of the start of the request */
u32 head;
@@ -6,19 +6,32 @@
#include "i915_drv.h"
-#include "i915_timeline.h"
+#include "i915_active.h"
#include "i915_syncmap.h"
+#include "i915_timeline.h"
+
+#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
+#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
struct i915_timeline_hwsp {
- struct i915_vma *vma;
+ struct i915_gt_timelines *gt;
struct list_head free_link;
+ struct i915_vma *vma;
u64 free_bitmap;
};
-static inline struct i915_timeline_hwsp *
-i915_timeline_hwsp(const struct i915_timeline *tl)
+struct i915_timeline_cacheline {
+ struct i915_active active;
+ struct i915_timeline_hwsp *hwsp;
+ void *vaddr;
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+};
+
+static inline struct drm_i915_private *
+hwsp_to_i915(struct i915_timeline_hwsp *hwsp)
{
- return tl->hwsp_ggtt->private;
+ return container_of(hwsp->gt, struct drm_i915_private, gt.timelines);
}
static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
@@ -71,6 +84,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
vma->private = hwsp;
hwsp->vma = vma;
hwsp->free_bitmap = ~0ull;
+ hwsp->gt = gt;
spin_lock(>->hwsp_lock);
list_add(&hwsp->free_link, >->hwsp_free_list);
@@ -88,14 +102,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
return hwsp->vma;
}
-static void hwsp_free(struct i915_timeline *timeline)
+static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
{
- struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
- struct i915_timeline_hwsp *hwsp;
-
- hwsp = i915_timeline_hwsp(timeline);
- if (!hwsp) /* leave global HWSP alone! */
- return;
+ struct i915_gt_timelines *gt = hwsp->gt;
spin_lock(>->hwsp_lock);
@@ -103,7 +112,8 @@ static void hwsp_free(struct i915_timeline *timeline)
if (!hwsp->free_bitmap)
list_add_tail(&hwsp->free_link, >->hwsp_free_list);
- hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+ GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
+ hwsp->free_bitmap |= BIT_ULL(cacheline);
/* And if no one is left using it, give the page back to the system */
if (hwsp->free_bitmap == ~0ull) {
@@ -115,6 +125,76 @@ static void hwsp_free(struct i915_timeline *timeline)
spin_unlock(>->hwsp_lock);
}
+static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(!i915_active_is_idle(&cl->active));
+
+ i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+ i915_vma_put(cl->hwsp->vma);
+ __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+
+ i915_active_fini(&cl->active);
+ kfree(cl);
+}
+
+static void __cacheline_retire(struct i915_active *active)
+{
+ struct i915_timeline_cacheline *cl =
+ container_of(active, typeof(*cl), active);
+
+ i915_vma_unpin(cl->hwsp->vma);
+ if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
+ __idle_cacheline_free(cl);
+}
+
+static struct i915_timeline_cacheline *
+cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
+{
+ struct i915_timeline_cacheline *cl;
+ void *vaddr;
+
+ GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
+
+ cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+ if (!cl)
+ return ERR_PTR(-ENOMEM);
+
+ vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ kfree(cl);
+ return ERR_CAST(vaddr);
+ }
+
+ i915_vma_get(hwsp->vma);
+ cl->hwsp = hwsp;
+ cl->vaddr = page_pack_bits(vaddr, cacheline);
+
+ i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire);
+
+ return cl;
+}
+
+static void cacheline_acquire(struct i915_timeline_cacheline *cl)
+{
+ if (cl && i915_active_acquire(&cl->active))
+ __i915_vma_pin(cl->hwsp->vma);
+}
+
+static void cacheline_release(struct i915_timeline_cacheline *cl)
+{
+ if (cl)
+ i915_active_release(&cl->active);
+}
+
+static void cacheline_free(struct i915_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
+ cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
+
+ if (i915_active_is_idle(&cl->active))
+ __idle_cacheline_free(cl);
+}
+
int i915_timeline_init(struct drm_i915_private *i915,
struct i915_timeline *timeline,
const char *name,
@@ -136,29 +216,40 @@ int i915_timeline_init(struct drm_i915_private *i915,
timeline->name = name;
timeline->pin_count = 0;
timeline->has_initial_breadcrumb = !hwsp;
+ timeline->hwsp_cacheline = NULL;
- timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
if (!hwsp) {
+ struct i915_timeline_cacheline *cl;
unsigned int cacheline;
hwsp = hwsp_alloc(timeline, &cacheline);
if (IS_ERR(hwsp))
return PTR_ERR(hwsp);
+ cl = cacheline_alloc(hwsp->private, cacheline);
+ if (IS_ERR(cl)) {
+ __idle_hwsp_free(hwsp->private, cacheline);
+ return PTR_ERR(cl);
+ }
+
+ timeline->hwsp_cacheline = cl;
timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
- }
- timeline->hwsp_ggtt = i915_vma_get(hwsp);
- vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- hwsp_free(timeline);
- i915_vma_put(hwsp);
- return PTR_ERR(vaddr);
+ vaddr = page_mask_bits(cl->vaddr);
+ } else {
+ timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+
+ vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
}
timeline->hwsp_seqno =
memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
+ timeline->hwsp_ggtt = i915_vma_get(hwsp);
+ GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
+
timeline->fence_context = dma_fence_context_alloc(1);
spin_lock_init(&timeline->lock);
@@ -240,9 +331,12 @@ void i915_timeline_fini(struct i915_timeline *timeline)
GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
i915_syncmap_free(&timeline->sync);
- hwsp_free(timeline);
- i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+ if (timeline->hwsp_cacheline)
+ cacheline_free(timeline->hwsp_cacheline);
+ else
+ i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+
i915_vma_put(timeline->hwsp_ggtt);
}
@@ -285,6 +379,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(tl->hwsp_offset);
+ cacheline_acquire(tl->hwsp_cacheline);
timeline_add_to_active(tl);
return 0;
@@ -294,6 +389,157 @@ int i915_timeline_pin(struct i915_timeline *tl)
return err;
}
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+ GEM_BUG_ON(!tl->pin_count);
+ GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+ return tl->seqno += 1 + tl->has_initial_breadcrumb;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+ tl->seqno -= 1 + tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ struct i915_timeline_cacheline *cl;
+ unsigned int cacheline;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err;
+
+ /*
+ * If there is an outstanding GPU reference to this cacheline,
+ * such as it being sampled by a HW semaphore on another timeline,
+ * we cannot wraparound our seqno value (the HW semaphore does
+ * a strict greater-than-or-equals compare, not i915_seqno_passed).
+ * So if the cacheline is still busy, we must detach ourselves
+ * from it and leave it inflight alongside its users.
+ *
+ * However, if nobody is watching and we can guarantee that nobody
+ * will, we could simply reuse the same cacheline.
+ *
+ * if (i915_active_request_is_signaled(&tl->last_request) &&
+ * i915_active_is_signaled(&tl->hwsp_cacheline->active))
+ * return 0;
+ *
+ * That seems unlikely for a busy timeline that needed to wrap in
+ * the first place, so just replace the cacheline.
+ */
+
+ vma = hwsp_alloc(tl, &cacheline);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_rollback;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err) {
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_rollback;
+ }
+
+ cl = cacheline_alloc(vma->private, cacheline);
+ if (IS_ERR(cl)) {
+ err = PTR_ERR(cl);
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_unpin;
+ }
+ GEM_BUG_ON(cl->hwsp->vma != vma);
+
+ /*
+ * Attach the old cacheline to the current request, so that we only
+ * free it after the current request is retired, which ensures that
+ * all writes into the cacheline from previous requests are complete.
+ */
+ err = i915_active_ref(&tl->hwsp_cacheline->active,
+ tl->fence_context, rq);
+ if (err)
+ goto err_cacheline;
+
+ cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
+ cacheline_free(tl->hwsp_cacheline);
+
+ i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
+ i915_vma_put(tl->hwsp_ggtt);
+
+ tl->hwsp_ggtt = i915_vma_get(vma);
+
+ vaddr = page_mask_bits(cl->vaddr);
+ tl->hwsp_offset = cacheline * CACHELINE_BYTES;
+ tl->hwsp_seqno =
+ memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
+
+ tl->hwsp_offset += i915_ggtt_offset(vma);
+
+ cacheline_acquire(cl);
+ tl->hwsp_cacheline = cl;
+
+ *seqno = timeline_advance(tl);
+ GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
+ return 0;
+
+err_cacheline:
+ cacheline_free(cl);
+err_unpin:
+ i915_vma_unpin(vma);
+err_rollback:
+ timeline_rollback(tl);
+ return err;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ *seqno = timeline_advance(tl);
+
+ /* Replace the HWSP on wraparound for HW semaphores */
+ if (unlikely(!*seqno && tl->hwsp_cacheline))
+ return __i915_timeline_get_seqno(tl, rq, seqno);
+
+ return 0;
+}
+
+static int cacheline_ref(struct i915_timeline_cacheline *cl,
+ struct i915_request *rq)
+{
+ return i915_active_ref(&cl->active, rq->fence.context, rq);
+}
+
+int i915_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *to,
+ u32 *hwsp)
+{
+ struct i915_timeline_cacheline *cl = from->hwsp_cacheline;
+ struct i915_timeline *tl = from->timeline;
+ int err;
+
+ GEM_BUG_ON(to->timeline == tl);
+
+ mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
+ err = i915_request_completed(from);
+ if (!err)
+ err = cacheline_ref(cl, to);
+ if (!err) {
+ if (likely(cl == tl->hwsp_cacheline)) {
+ *hwsp = tl->hwsp_offset;
+ } else { /* across a seqno wrap, recover the original offset */
+ *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+ ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
+ CACHELINE_BYTES;
+ }
+ }
+ mutex_unlock(&tl->mutex);
+
+ return err;
+}
+
void i915_timeline_unpin(struct i915_timeline *tl)
{
GEM_BUG_ON(!tl->pin_count);
@@ -301,6 +547,7 @@ void i915_timeline_unpin(struct i915_timeline *tl)
return;
timeline_remove_from_active(tl);
+ cacheline_release(tl->hwsp_cacheline);
/*
* Since this timeline is idle, all bariers upon which we were waiting
@@ -34,7 +34,7 @@
#include "i915_utils.h"
struct i915_vma;
-struct i915_timeline_hwsp;
+struct i915_timeline_cacheline;
struct i915_timeline {
u64 fence_context;
@@ -51,6 +51,8 @@ struct i915_timeline {
struct i915_vma *hwsp_ggtt;
u32 hwsp_offset;
+ struct i915_timeline_cacheline *hwsp_cacheline;
+
bool has_initial_breadcrumb;
/**
@@ -162,8 +164,15 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
}
int i915_timeline_pin(struct i915_timeline *tl);
+int i915_timeline_get_seqno(struct i915_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno);
void i915_timeline_unpin(struct i915_timeline *tl);
+int i915_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *until,
+ u32 *hwsp_offset);
+
void i915_timelines_init(struct drm_i915_private *i915);
void i915_timelines_park(struct drm_i915_private *i915);
void i915_timelines_fini(struct drm_i915_private *i915);
@@ -641,6 +641,118 @@ static int live_hwsp_alternate(void *arg)
#undef NUM_TIMELINES
}
+static int live_hwsp_wrap(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct i915_timeline *tl;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = 0;
+
+ /*
+ * Across a seqno wrap, we need to keep the old cacheline alive for
+ * foreign GPU references.
+ */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ tl = i915_timeline_create(i915, __func__, NULL);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ goto out_rpm;
+ }
+ if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+ goto out_free;
+
+ err = i915_timeline_pin(tl);
+ if (err)
+ goto out_free;
+
+ for_each_engine(engine, i915, id) {
+ const u32 *hwsp_seqno[2];
+ struct i915_request *rq;
+ u32 seqno[2];
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ rq = i915_request_alloc(engine, i915->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ tl->seqno = -4u;
+
+ err = i915_timeline_get_seqno(tl, rq, &seqno[0]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
+ seqno[0], tl->hwsp_offset);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ hwsp_seqno[0] = tl->hwsp_seqno;
+
+ err = i915_timeline_get_seqno(tl, rq, &seqno[1]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
+ seqno[1], tl->hwsp_offset);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ hwsp_seqno[1] = tl->hwsp_seqno;
+
+ /* With wrap should come a new hwsp */
+ GEM_BUG_ON(seqno[1] >= seqno[0]);
+ GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
+
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ pr_err("Wait for timeline writes timed out!\n");
+ err = -EIO;
+ goto out;
+ }
+
+ if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
+ pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
+ *hwsp_seqno[0], *hwsp_seqno[1],
+ seqno[0], seqno[1]);
+ err = -EINVAL;
+ goto out;
+ }
+
+ i915_retire_requests(i915); /* recycle HWSP */
+ }
+
+out:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ i915_timeline_unpin(tl);
+out_free:
+ i915_timeline_put(tl);
+out_rpm:
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ return err;
+}
+
static int live_hwsp_recycle(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -723,6 +835,7 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_hwsp_recycle),
SUBTEST(live_hwsp_engine),
SUBTEST(live_hwsp_alternate),
+ SUBTEST(live_hwsp_wrap),
};
return i915_subtests(tests, i915);