Message ID | 20190226102404.29153-6-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/11] drm/i915: Skip scanning for signalers if we are already inflight | expand |
On 26/02/2019 10:23, Chris Wilson wrote: > In preparation for enabling HW semaphores, we need to keep in flight > timeline HWSP alive until its use across entire system has completed, > as any other timeline active on the GPU may still refer back to the > already retired timeline. We both have to delay recycling available > cachelines and unpinning old HWSP until the next idle point. > > An easy option would be to simply keep all used HWSP until the system as > a whole was idle, i.e. we could release them all at once on parking. > However, on a busy system, we may never see a global idle point, > essentially meaning the resource will be leaked until we are forced to > do a GC pass. We already employ a fine-grained idle detection mechanism > for vma, which we can reuse here so that each cacheline can be freed > immediately after the last request using it is retired. > > v3: Keep track of the activity of each cacheline. > v4: cacheline_free() on canceling the seqno tracking > v5: Finally with a testcase to exercise wraparound > v6: Pack cacheline into empty bits of page-aligned vaddr Have you considered using page_(mask/unmask/pack/unpack)_bits helpers instead of open coding it? Is the additional sharing of bits for two variables a thing which makes it not as elegant? Regards, Tvrtko > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v5 > --- > drivers/gpu/drm/i915/i915_request.c | 31 +- > drivers/gpu/drm/i915/i915_request.h | 11 + > drivers/gpu/drm/i915/i915_timeline.c | 290 ++++++++++++++++-- > drivers/gpu/drm/i915/i915_timeline.h | 11 +- > .../gpu/drm/i915/selftests/i915_timeline.c | 113 +++++++ > 5 files changed, 417 insertions(+), 39 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 719d1a5ab082..d354967d6ae8 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq) > } while (tmp != rq); > } > > -static u32 timeline_get_seqno(struct i915_timeline *tl) > -{ > - return tl->seqno += 1 + tl->has_initial_breadcrumb; > -} > - > static void move_to_timeline(struct i915_request *request, > struct i915_timeline *timeline) > { > @@ -532,8 +527,10 @@ struct i915_request * > i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > { > struct drm_i915_private *i915 = engine->i915; > - struct i915_request *rq; > struct intel_context *ce; > + struct i915_timeline *tl; > + struct i915_request *rq; > + u32 seqno; > int ret; > > lockdep_assert_held(&i915->drm.struct_mutex); > @@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > } > } > > - rq->rcustate = get_state_synchronize_rcu(); > - > INIT_LIST_HEAD(&rq->active_list); > + > + tl = ce->ring->timeline; > + ret = i915_timeline_get_seqno(tl, rq, &seqno); > + if (ret) > + goto err_free; > + > rq->i915 = i915; > rq->engine = engine; > rq->gem_context = ctx; > rq->hw_context = ce; > rq->ring = ce->ring; > - rq->timeline = ce->ring->timeline; > + rq->timeline = tl; > GEM_BUG_ON(rq->timeline == &engine->timeline); > - rq->hwsp_seqno = rq->timeline->hwsp_seqno; > + rq->hwsp_seqno = tl->hwsp_seqno; > + rq->hwsp_cacheline = tl->hwsp_cacheline; > + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ > > spin_lock_init(&rq->lock); > - dma_fence_init(&rq->fence, > - &i915_fence_ops, > - &rq->lock, > - rq->timeline->fence_context, > - timeline_get_seqno(rq->timeline)); > + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, > + tl->fence_context, seqno); > > /* We bump the ref for the fence chain */ > i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); > @@ -687,6 +687,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); > GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); > > +err_free: > kmem_cache_free(global.slab_requests, rq); > err_unreserve: > mutex_unlock(&ce->ring->timeline->mutex); > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index be3ded6bcf56..ea1e6f0ade53 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -38,6 +38,7 @@ struct drm_file; > struct drm_i915_gem_object; > struct i915_request; > struct i915_timeline; > +struct i915_timeline_cacheline; > > struct i915_capture_list { > struct i915_capture_list *next; > @@ -148,6 +149,16 @@ struct i915_request { > */ > const u32 *hwsp_seqno; > > + /* > + * If we need to access the timeline's seqno for this request in > + * another request, we need to keep a read reference to this associated > + * cacheline, so that we do not free and recycle it before the foriegn > + * observers have completed. Hence, we keep a pointer to the cacheline > + * inside the timeline's HWSP vma, but it is only valid while this > + * request has not completed and guarded by the timeline mutex. > + */ > + struct i915_timeline_cacheline *hwsp_cacheline; > + > /** Position in the ring of the start of the request */ > u32 head; > > diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c > index 87a80558da28..f976b5a13a47 100644 > --- a/drivers/gpu/drm/i915/i915_timeline.c > +++ b/drivers/gpu/drm/i915/i915_timeline.c > @@ -6,19 +6,29 @@ > > #include "i915_drv.h" > > -#include "i915_timeline.h" > +#include "i915_active.h" > #include "i915_syncmap.h" > +#include "i915_timeline.h" > > struct i915_timeline_hwsp { > - struct i915_vma *vma; > + struct i915_gt_timelines *gt; > struct list_head free_link; > + struct i915_vma *vma; > u64 free_bitmap; > }; > > -static inline struct i915_timeline_hwsp * > -i915_timeline_hwsp(const struct i915_timeline *tl) > +struct i915_timeline_cacheline { > + struct i915_active active; > + struct i915_timeline_hwsp *hwsp; > + unsigned long vaddr; > +#define CACHELINE_MASK GENMASK(5, 0) > +#define CACHELINE_FREE BIT(6) > +}; > + > +static inline struct drm_i915_private * > +hwsp_to_i915(struct i915_timeline_hwsp *hwsp) > { > - return tl->hwsp_ggtt->private; > + return container_of(hwsp->gt, struct drm_i915_private, gt.timelines); > } > > static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) > @@ -71,6 +81,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) > vma->private = hwsp; > hwsp->vma = vma; > hwsp->free_bitmap = ~0ull; > + hwsp->gt = gt; > > spin_lock(>->hwsp_lock); > list_add(&hwsp->free_link, >->hwsp_free_list); > @@ -88,14 +99,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) > return hwsp->vma; > } > > -static void hwsp_free(struct i915_timeline *timeline) > +static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) > { > - struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; > - struct i915_timeline_hwsp *hwsp; > - > - hwsp = i915_timeline_hwsp(timeline); > - if (!hwsp) /* leave global HWSP alone! */ > - return; > + struct i915_gt_timelines *gt = hwsp->gt; > > spin_lock(>->hwsp_lock); > > @@ -103,7 +109,8 @@ static void hwsp_free(struct i915_timeline *timeline) > if (!hwsp->free_bitmap) > list_add_tail(&hwsp->free_link, >->hwsp_free_list); > > - hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); > + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); > + hwsp->free_bitmap |= BIT_ULL(cacheline); > > /* And if no one is left using it, give the page back to the system */ > if (hwsp->free_bitmap == ~0ull) { > @@ -115,6 +122,77 @@ static void hwsp_free(struct i915_timeline *timeline) > spin_unlock(>->hwsp_lock); > } > > +static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) > +{ > + GEM_BUG_ON(!i915_active_is_idle(&cl->active)); > + > + i915_gem_object_unpin_map(cl->hwsp->vma->obj); > + i915_vma_put(cl->hwsp->vma); > + __idle_hwsp_free(cl->hwsp, cl->vaddr & CACHELINE_MASK); > + > + i915_active_fini(&cl->active); > + kfree(cl); > +} > + > +static void __cacheline_retire(struct i915_active *active) > +{ > + struct i915_timeline_cacheline *cl = > + container_of(active, typeof(*cl), active); > + > + i915_vma_unpin(cl->hwsp->vma); > + if (cl->vaddr & CACHELINE_FREE) > + __idle_cacheline_free(cl); > +} > + > +static struct i915_timeline_cacheline * > +cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) > +{ > + struct i915_timeline_cacheline *cl; > + void *vaddr; > + > + GEM_BUG_ON(cacheline > CACHELINE_MASK); > + > + cl = kmalloc(sizeof(*cl), GFP_KERNEL); > + if (!cl) > + return ERR_PTR(-ENOMEM); > + > + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); > + if (IS_ERR(vaddr)) { > + kfree(cl); > + return ERR_CAST(vaddr); > + } > + > + i915_vma_get(hwsp->vma); > + cl->hwsp = hwsp; > + cl->vaddr = (unsigned long)vaddr; > + cl->vaddr |= cacheline; > + > + i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire); > + > + return cl; > +} > + > +static void cacheline_acquire(struct i915_timeline_cacheline *cl) > +{ > + if (cl && i915_active_acquire(&cl->active)) > + __i915_vma_pin(cl->hwsp->vma); > +} > + > +static void cacheline_release(struct i915_timeline_cacheline *cl) > +{ > + if (cl) > + i915_active_release(&cl->active); > +} > + > +static void cacheline_free(struct i915_timeline_cacheline *cl) > +{ > + GEM_BUG_ON(cl->vaddr & CACHELINE_FREE); > + cl->vaddr |= CACHELINE_FREE; > + > + if (i915_active_is_idle(&cl->active)) > + __idle_cacheline_free(cl); > +} > + > int i915_timeline_init(struct drm_i915_private *i915, > struct i915_timeline *timeline, > const char *name, > @@ -136,29 +214,40 @@ int i915_timeline_init(struct drm_i915_private *i915, > timeline->name = name; > timeline->pin_count = 0; > timeline->has_initial_breadcrumb = !hwsp; > + timeline->hwsp_cacheline = NULL; > > - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; > if (!hwsp) { > + struct i915_timeline_cacheline *cl; > unsigned int cacheline; > > hwsp = hwsp_alloc(timeline, &cacheline); > if (IS_ERR(hwsp)) > return PTR_ERR(hwsp); > > + cl = cacheline_alloc(hwsp->private, cacheline); > + if (IS_ERR(cl)) { > + __idle_hwsp_free(hwsp->private, cacheline); > + return PTR_ERR(cl); > + } > + > + timeline->hwsp_cacheline = cl; > timeline->hwsp_offset = cacheline * CACHELINE_BYTES; > - } > - timeline->hwsp_ggtt = i915_vma_get(hwsp); > > - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); > - if (IS_ERR(vaddr)) { > - hwsp_free(timeline); > - i915_vma_put(hwsp); > - return PTR_ERR(vaddr); > + vaddr = (void *)(cl->vaddr & PAGE_MASK); > + } else { > + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; > + > + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); > + if (IS_ERR(vaddr)) > + return PTR_ERR(vaddr); > } > > timeline->hwsp_seqno = > memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); > > + timeline->hwsp_ggtt = i915_vma_get(hwsp); > + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); > + > timeline->fence_context = dma_fence_context_alloc(1); > > spin_lock_init(&timeline->lock); > @@ -240,9 +329,12 @@ void i915_timeline_fini(struct i915_timeline *timeline) > GEM_BUG_ON(i915_active_request_isset(&timeline->barrier)); > > i915_syncmap_free(&timeline->sync); > - hwsp_free(timeline); > > - i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); > + if (timeline->hwsp_cacheline) > + cacheline_free(timeline->hwsp_cacheline); > + else > + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); > + > i915_vma_put(timeline->hwsp_ggtt); > } > > @@ -285,6 +377,7 @@ int i915_timeline_pin(struct i915_timeline *tl) > i915_ggtt_offset(tl->hwsp_ggtt) + > offset_in_page(tl->hwsp_offset); > > + cacheline_acquire(tl->hwsp_cacheline); > timeline_add_to_active(tl); > > return 0; > @@ -294,6 +387,156 @@ int i915_timeline_pin(struct i915_timeline *tl) > return err; > } > > +static u32 timeline_advance(struct i915_timeline *tl) > +{ > + GEM_BUG_ON(!tl->pin_count); > + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); > + > + return tl->seqno += 1 + tl->has_initial_breadcrumb; > +} > + > +static void timeline_rollback(struct i915_timeline *tl) > +{ > + tl->seqno -= 1 + tl->has_initial_breadcrumb; > +} > + > +static noinline int > +__i915_timeline_get_seqno(struct i915_timeline *tl, > + struct i915_request *rq, > + u32 *seqno) > +{ > + struct i915_timeline_cacheline *cl; > + unsigned int cacheline; > + struct i915_vma *vma; > + void *vaddr; > + int err; > + > + /* > + * If there is an outstanding GPU reference to this cacheline, > + * such as it being sampled by a HW semaphore on another timeline, > + * we cannot wraparound our seqno value (the HW semaphore does > + * a strict greater-than-or-equals compare, not i915_seqno_passed). > + * So if the cacheline is still busy, we must detach ourselves > + * from it and leave it inflight alongside its users. > + * > + * However, if nobody is watching and we can guarantee that nobody > + * will, we could simply reuse the same cacheline. > + * > + * if (i915_active_request_is_signaled(&tl->last_request) && > + * i915_active_is_signaled(&tl->hwsp_cacheline->active)) > + * return 0; > + * > + * That seems unlikely for a busy timeline that needed to wrap in > + * the first place, so just replace the cacheline. > + */ > + > + vma = hwsp_alloc(tl, &cacheline); > + if (IS_ERR(vma)) { > + err = PTR_ERR(vma); > + goto err_rollback; > + } > + > + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > + if (err) { > + __idle_hwsp_free(vma->private, cacheline); > + goto err_rollback; > + } > + > + cl = cacheline_alloc(vma->private, cacheline); > + if (IS_ERR(cl)) { > + err = PTR_ERR(cl); > + __idle_hwsp_free(vma->private, cacheline); > + goto err_unpin; > + } > + GEM_BUG_ON(cl->hwsp->vma != vma); > + > + /* > + * Attach the old cacheline to the current request, so that we only > + * free it after the current request is retired, which ensures that > + * all writes into the cacheline from previous requests are complete. > + */ > + err = i915_active_ref(&tl->hwsp_cacheline->active, > + tl->fence_context, rq); > + if (err) > + goto err_cacheline; > + > + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ > + cacheline_free(tl->hwsp_cacheline); > + > + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ > + i915_vma_put(tl->hwsp_ggtt); > + > + tl->hwsp_ggtt = i915_vma_get(vma); > + > + vaddr = (void *)(cl->vaddr & PAGE_MASK); > + tl->hwsp_offset = cacheline * CACHELINE_BYTES; > + tl->hwsp_seqno = > + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); > + > + tl->hwsp_offset += i915_ggtt_offset(vma); > + > + cacheline_acquire(cl); > + tl->hwsp_cacheline = cl; > + > + *seqno = timeline_advance(tl); > + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); > + return 0; > + > +err_cacheline: > + cacheline_free(cl); > +err_unpin: > + i915_vma_unpin(vma); > +err_rollback: > + timeline_rollback(tl); > + return err; > +} > + > +int i915_timeline_get_seqno(struct i915_timeline *tl, > + struct i915_request *rq, > + u32 *seqno) > +{ > + *seqno = timeline_advance(tl); > + > + /* Replace the HWSP on wraparound for HW semaphores */ > + if (unlikely(!*seqno && tl->hwsp_cacheline)) > + return __i915_timeline_get_seqno(tl, rq, seqno); > + > + return 0; > +} > + > +static int cacheline_ref(struct i915_timeline_cacheline *cl, > + struct i915_request *rq) > +{ > + return i915_active_ref(&cl->active, rq->fence.context, rq); > +} > + > +int i915_timeline_read_hwsp(struct i915_request *from, > + struct i915_request *to, > + u32 *hwsp) > +{ > + struct i915_timeline_cacheline *cl = from->hwsp_cacheline; > + struct i915_timeline *tl = from->timeline; > + int err; > + > + GEM_BUG_ON(to->timeline == tl); > + > + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); > + err = i915_request_completed(from); > + if (!err) > + err = cacheline_ref(cl, to); > + if (!err) { > + if (likely(cl == tl->hwsp_cacheline)) { > + *hwsp = tl->hwsp_offset; > + } else { /* across a seqno wrap, recover the original offset */ > + *hwsp = (cl->vaddr & CACHELINE_MASK) * CACHELINE_BYTES; > + *hwsp += i915_ggtt_offset(cl->hwsp->vma); > + } > + } > + mutex_unlock(&tl->mutex); > + > + return err; > +} > + > void i915_timeline_unpin(struct i915_timeline *tl) > { > GEM_BUG_ON(!tl->pin_count); > @@ -301,6 +544,7 @@ void i915_timeline_unpin(struct i915_timeline *tl) > return; > > timeline_remove_from_active(tl); > + cacheline_release(tl->hwsp_cacheline); > > /* > * Since this timeline is idle, all bariers upon which we were waiting > diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h > index 36c3849f7108..60b1dfad93ed 100644 > --- a/drivers/gpu/drm/i915/i915_timeline.h > +++ b/drivers/gpu/drm/i915/i915_timeline.h > @@ -34,7 +34,7 @@ > #include "i915_utils.h" > > struct i915_vma; > -struct i915_timeline_hwsp; > +struct i915_timeline_cacheline; > > struct i915_timeline { > u64 fence_context; > @@ -51,6 +51,8 @@ struct i915_timeline { > struct i915_vma *hwsp_ggtt; > u32 hwsp_offset; > > + struct i915_timeline_cacheline *hwsp_cacheline; > + > bool has_initial_breadcrumb; > > /** > @@ -162,8 +164,15 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, > } > > int i915_timeline_pin(struct i915_timeline *tl); > +int i915_timeline_get_seqno(struct i915_timeline *tl, > + struct i915_request *rq, > + u32 *seqno); > void i915_timeline_unpin(struct i915_timeline *tl); > > +int i915_timeline_read_hwsp(struct i915_request *from, > + struct i915_request *until, > + u32 *hwsp_offset); > + > void i915_timelines_init(struct drm_i915_private *i915); > void i915_timelines_park(struct drm_i915_private *i915); > void i915_timelines_fini(struct drm_i915_private *i915); > diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c > index 12ea69b1a1e5..844701759ffc 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c > +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c > @@ -641,6 +641,118 @@ static int live_hwsp_alternate(void *arg) > #undef NUM_TIMELINES > } > > +static int live_hwsp_wrap(void *arg) > +{ > + struct drm_i915_private *i915 = arg; > + struct intel_engine_cs *engine; > + struct i915_timeline *tl; > + enum intel_engine_id id; > + intel_wakeref_t wakeref; > + int err = 0; > + > + /* > + * Across a seqno wrap, we need to keep the old cacheline alive for > + * foreign GPU references. > + */ > + > + mutex_lock(&i915->drm.struct_mutex); > + wakeref = intel_runtime_pm_get(i915); > + > + tl = i915_timeline_create(i915, __func__, NULL); > + if (IS_ERR(tl)) { > + err = PTR_ERR(tl); > + goto out_rpm; > + } > + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) > + goto out_free; > + > + err = i915_timeline_pin(tl); > + if (err) > + goto out_free; > + > + for_each_engine(engine, i915, id) { > + const u32 *hwsp_seqno[2]; > + struct i915_request *rq; > + u32 seqno[2]; > + > + if (!intel_engine_can_store_dword(engine)) > + continue; > + > + rq = i915_request_alloc(engine, i915->kernel_context); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out; > + } > + > + tl->seqno = -4u; > + > + err = i915_timeline_get_seqno(tl, rq, &seqno[0]); > + if (err) { > + i915_request_add(rq); > + goto out; > + } > + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", > + seqno[0], tl->hwsp_offset); > + > + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); > + if (err) { > + i915_request_add(rq); > + goto out; > + } > + hwsp_seqno[0] = tl->hwsp_seqno; > + > + err = i915_timeline_get_seqno(tl, rq, &seqno[1]); > + if (err) { > + i915_request_add(rq); > + goto out; > + } > + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", > + seqno[1], tl->hwsp_offset); > + > + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); > + if (err) { > + i915_request_add(rq); > + goto out; > + } > + hwsp_seqno[1] = tl->hwsp_seqno; > + > + /* With wrap should come a new hwsp */ > + GEM_BUG_ON(seqno[1] >= seqno[0]); > + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); > + > + i915_request_add(rq); > + > + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { > + pr_err("Wait for timeline writes timed out!\n"); > + err = -EIO; > + goto out; > + } > + > + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { > + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", > + *hwsp_seqno[0], *hwsp_seqno[1], > + seqno[0], seqno[1]); > + err = -EINVAL; > + goto out; > + } > + > + i915_retire_requests(i915); /* recycle HWSP */ > + } > + > +out: > + if (igt_flush_test(i915, I915_WAIT_LOCKED)) > + err = -EIO; > + > + i915_timeline_unpin(tl); > +out_free: > + i915_timeline_put(tl); > +out_rpm: > + intel_runtime_pm_put(i915, wakeref); > + mutex_unlock(&i915->drm.struct_mutex); > + > + return err; > +} > + > static int live_hwsp_recycle(void *arg) > { > struct drm_i915_private *i915 = arg; > @@ -723,6 +835,7 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915) > SUBTEST(live_hwsp_recycle), > SUBTEST(live_hwsp_engine), > SUBTEST(live_hwsp_alternate), > + SUBTEST(live_hwsp_wrap), > }; > > return i915_subtests(tests, i915); >
Quoting Tvrtko Ursulin (2019-02-27 10:44:42) > > On 26/02/2019 10:23, Chris Wilson wrote: > > In preparation for enabling HW semaphores, we need to keep in flight > > timeline HWSP alive until its use across entire system has completed, > > as any other timeline active on the GPU may still refer back to the > > already retired timeline. We both have to delay recycling available > > cachelines and unpinning old HWSP until the next idle point. > > > > An easy option would be to simply keep all used HWSP until the system as > > a whole was idle, i.e. we could release them all at once on parking. > > However, on a busy system, we may never see a global idle point, > > essentially meaning the resource will be leaked until we are forced to > > do a GC pass. We already employ a fine-grained idle detection mechanism > > for vma, which we can reuse here so that each cacheline can be freed > > immediately after the last request using it is retired. > > > > v3: Keep track of the activity of each cacheline. > > v4: cacheline_free() on canceling the seqno tracking > > v5: Finally with a testcase to exercise wraparound > > v6: Pack cacheline into empty bits of page-aligned vaddr > > Have you considered using page_(mask/unmask/pack/unpack)_bits helpers > instead of open coding it? I forgot what they were called and was too lazy to read i915_utils.h > Is the additional sharing of bits for two variables a thing which makes > it not as elegant? Or the opposite: Do those wasted bits not stand out as being inelegant? :) -Chris
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 719d1a5ab082..d354967d6ae8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq) } while (tmp != rq); } -static u32 timeline_get_seqno(struct i915_timeline *tl) -{ - return tl->seqno += 1 + tl->has_initial_breadcrumb; -} - static void move_to_timeline(struct i915_request *request, struct i915_timeline *timeline) { @@ -532,8 +527,10 @@ struct i915_request * i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct drm_i915_private *i915 = engine->i915; - struct i915_request *rq; struct intel_context *ce; + struct i915_timeline *tl; + struct i915_request *rq; + u32 seqno; int ret; lockdep_assert_held(&i915->drm.struct_mutex); @@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->rcustate = get_state_synchronize_rcu(); - INIT_LIST_HEAD(&rq->active_list); + + tl = ce->ring->timeline; + ret = i915_timeline_get_seqno(tl, rq, &seqno); + if (ret) + goto err_free; + rq->i915 = i915; rq->engine = engine; rq->gem_context = ctx; rq->hw_context = ce; rq->ring = ce->ring; - rq->timeline = ce->ring->timeline; + rq->timeline = tl; GEM_BUG_ON(rq->timeline == &engine->timeline); - rq->hwsp_seqno = rq->timeline->hwsp_seqno; + rq->hwsp_seqno = tl->hwsp_seqno; + rq->hwsp_cacheline = tl->hwsp_cacheline; + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ spin_lock_init(&rq->lock); - dma_fence_init(&rq->fence, - &i915_fence_ops, - &rq->lock, - rq->timeline->fence_context, - timeline_get_seqno(rq->timeline)); + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, + tl->fence_context, seqno); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); @@ -687,6 +687,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); +err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: mutex_unlock(&ce->ring->timeline->mutex); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index be3ded6bcf56..ea1e6f0ade53 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -38,6 +38,7 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; struct i915_timeline; +struct i915_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; @@ -148,6 +149,16 @@ struct i915_request { */ const u32 *hwsp_seqno; + /* + * If we need to access the timeline's seqno for this request in + * another request, we need to keep a read reference to this associated + * cacheline, so that we do not free and recycle it before the foriegn + * observers have completed. Hence, we keep a pointer to the cacheline + * inside the timeline's HWSP vma, but it is only valid while this + * request has not completed and guarded by the timeline mutex. + */ + struct i915_timeline_cacheline *hwsp_cacheline; + /** Position in the ring of the start of the request */ u32 head; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 87a80558da28..f976b5a13a47 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -6,19 +6,29 @@ #include "i915_drv.h" -#include "i915_timeline.h" +#include "i915_active.h" #include "i915_syncmap.h" +#include "i915_timeline.h" struct i915_timeline_hwsp { - struct i915_vma *vma; + struct i915_gt_timelines *gt; struct list_head free_link; + struct i915_vma *vma; u64 free_bitmap; }; -static inline struct i915_timeline_hwsp * -i915_timeline_hwsp(const struct i915_timeline *tl) +struct i915_timeline_cacheline { + struct i915_active active; + struct i915_timeline_hwsp *hwsp; + unsigned long vaddr; +#define CACHELINE_MASK GENMASK(5, 0) +#define CACHELINE_FREE BIT(6) +}; + +static inline struct drm_i915_private * +hwsp_to_i915(struct i915_timeline_hwsp *hwsp) { - return tl->hwsp_ggtt->private; + return container_of(hwsp->gt, struct drm_i915_private, gt.timelines); } static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915) @@ -71,6 +81,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) vma->private = hwsp; hwsp->vma = vma; hwsp->free_bitmap = ~0ull; + hwsp->gt = gt; spin_lock(>->hwsp_lock); list_add(&hwsp->free_link, >->hwsp_free_list); @@ -88,14 +99,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) return hwsp->vma; } -static void hwsp_free(struct i915_timeline *timeline) +static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) { - struct i915_gt_timelines *gt = &timeline->i915->gt.timelines; - struct i915_timeline_hwsp *hwsp; - - hwsp = i915_timeline_hwsp(timeline); - if (!hwsp) /* leave global HWSP alone! */ - return; + struct i915_gt_timelines *gt = hwsp->gt; spin_lock(>->hwsp_lock); @@ -103,7 +109,8 @@ static void hwsp_free(struct i915_timeline *timeline) if (!hwsp->free_bitmap) list_add_tail(&hwsp->free_link, >->hwsp_free_list); - hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES); + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); + hwsp->free_bitmap |= BIT_ULL(cacheline); /* And if no one is left using it, give the page back to the system */ if (hwsp->free_bitmap == ~0ull) { @@ -115,6 +122,77 @@ static void hwsp_free(struct i915_timeline *timeline) spin_unlock(>->hwsp_lock); } +static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) +{ + GEM_BUG_ON(!i915_active_is_idle(&cl->active)); + + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + i915_vma_put(cl->hwsp->vma); + __idle_hwsp_free(cl->hwsp, cl->vaddr & CACHELINE_MASK); + + i915_active_fini(&cl->active); + kfree(cl); +} + +static void __cacheline_retire(struct i915_active *active) +{ + struct i915_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + i915_vma_unpin(cl->hwsp->vma); + if (cl->vaddr & CACHELINE_FREE) + __idle_cacheline_free(cl); +} + +static struct i915_timeline_cacheline * +cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) +{ + struct i915_timeline_cacheline *cl; + void *vaddr; + + GEM_BUG_ON(cacheline > CACHELINE_MASK); + + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (!cl) + return ERR_PTR(-ENOMEM); + + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + kfree(cl); + return ERR_CAST(vaddr); + } + + i915_vma_get(hwsp->vma); + cl->hwsp = hwsp; + cl->vaddr = (unsigned long)vaddr; + cl->vaddr |= cacheline; + + i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire); + + return cl; +} + +static void cacheline_acquire(struct i915_timeline_cacheline *cl) +{ + if (cl && i915_active_acquire(&cl->active)) + __i915_vma_pin(cl->hwsp->vma); +} + +static void cacheline_release(struct i915_timeline_cacheline *cl) +{ + if (cl) + i915_active_release(&cl->active); +} + +static void cacheline_free(struct i915_timeline_cacheline *cl) +{ + GEM_BUG_ON(cl->vaddr & CACHELINE_FREE); + cl->vaddr |= CACHELINE_FREE; + + if (i915_active_is_idle(&cl->active)) + __idle_cacheline_free(cl); +} + int i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *timeline, const char *name, @@ -136,29 +214,40 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->name = name; timeline->pin_count = 0; timeline->has_initial_breadcrumb = !hwsp; + timeline->hwsp_cacheline = NULL; - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; if (!hwsp) { + struct i915_timeline_cacheline *cl; unsigned int cacheline; hwsp = hwsp_alloc(timeline, &cacheline); if (IS_ERR(hwsp)) return PTR_ERR(hwsp); + cl = cacheline_alloc(hwsp->private, cacheline); + if (IS_ERR(cl)) { + __idle_hwsp_free(hwsp->private, cacheline); + return PTR_ERR(cl); + } + + timeline->hwsp_cacheline = cl; timeline->hwsp_offset = cacheline * CACHELINE_BYTES; - } - timeline->hwsp_ggtt = i915_vma_get(hwsp); - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - hwsp_free(timeline); - i915_vma_put(hwsp); - return PTR_ERR(vaddr); + vaddr = (void *)(cl->vaddr & PAGE_MASK); + } else { + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); } timeline->hwsp_seqno = memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); + timeline->hwsp_ggtt = i915_vma_get(hwsp); + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); + timeline->fence_context = dma_fence_context_alloc(1); spin_lock_init(&timeline->lock); @@ -240,9 +329,12 @@ void i915_timeline_fini(struct i915_timeline *timeline) GEM_BUG_ON(i915_active_request_isset(&timeline->barrier)); i915_syncmap_free(&timeline->sync); - hwsp_free(timeline); - i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + if (timeline->hwsp_cacheline) + cacheline_free(timeline->hwsp_cacheline); + else + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + i915_vma_put(timeline->hwsp_ggtt); } @@ -285,6 +377,7 @@ int i915_timeline_pin(struct i915_timeline *tl) i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(tl->hwsp_offset); + cacheline_acquire(tl->hwsp_cacheline); timeline_add_to_active(tl); return 0; @@ -294,6 +387,156 @@ int i915_timeline_pin(struct i915_timeline *tl) return err; } +static u32 timeline_advance(struct i915_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + + return tl->seqno += 1 + tl->has_initial_breadcrumb; +} + +static void timeline_rollback(struct i915_timeline *tl) +{ + tl->seqno -= 1 + tl->has_initial_breadcrumb; +} + +static noinline int +__i915_timeline_get_seqno(struct i915_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + struct i915_timeline_cacheline *cl; + unsigned int cacheline; + struct i915_vma *vma; + void *vaddr; + int err; + + /* + * If there is an outstanding GPU reference to this cacheline, + * such as it being sampled by a HW semaphore on another timeline, + * we cannot wraparound our seqno value (the HW semaphore does + * a strict greater-than-or-equals compare, not i915_seqno_passed). + * So if the cacheline is still busy, we must detach ourselves + * from it and leave it inflight alongside its users. + * + * However, if nobody is watching and we can guarantee that nobody + * will, we could simply reuse the same cacheline. + * + * if (i915_active_request_is_signaled(&tl->last_request) && + * i915_active_is_signaled(&tl->hwsp_cacheline->active)) + * return 0; + * + * That seems unlikely for a busy timeline that needed to wrap in + * the first place, so just replace the cacheline. + */ + + vma = hwsp_alloc(tl, &cacheline); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_rollback; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) { + __idle_hwsp_free(vma->private, cacheline); + goto err_rollback; + } + + cl = cacheline_alloc(vma->private, cacheline); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + __idle_hwsp_free(vma->private, cacheline); + goto err_unpin; + } + GEM_BUG_ON(cl->hwsp->vma != vma); + + /* + * Attach the old cacheline to the current request, so that we only + * free it after the current request is retired, which ensures that + * all writes into the cacheline from previous requests are complete. + */ + err = i915_active_ref(&tl->hwsp_cacheline->active, + tl->fence_context, rq); + if (err) + goto err_cacheline; + + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ + cacheline_free(tl->hwsp_cacheline); + + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ + i915_vma_put(tl->hwsp_ggtt); + + tl->hwsp_ggtt = i915_vma_get(vma); + + vaddr = (void *)(cl->vaddr & PAGE_MASK); + tl->hwsp_offset = cacheline * CACHELINE_BYTES; + tl->hwsp_seqno = + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + + tl->hwsp_offset += i915_ggtt_offset(vma); + + cacheline_acquire(cl); + tl->hwsp_cacheline = cl; + + *seqno = timeline_advance(tl); + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); + return 0; + +err_cacheline: + cacheline_free(cl); +err_unpin: + i915_vma_unpin(vma); +err_rollback: + timeline_rollback(tl); + return err; +} + +int i915_timeline_get_seqno(struct i915_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + *seqno = timeline_advance(tl); + + /* Replace the HWSP on wraparound for HW semaphores */ + if (unlikely(!*seqno && tl->hwsp_cacheline)) + return __i915_timeline_get_seqno(tl, rq, seqno); + + return 0; +} + +static int cacheline_ref(struct i915_timeline_cacheline *cl, + struct i915_request *rq) +{ + return i915_active_ref(&cl->active, rq->fence.context, rq); +} + +int i915_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) +{ + struct i915_timeline_cacheline *cl = from->hwsp_cacheline; + struct i915_timeline *tl = from->timeline; + int err; + + GEM_BUG_ON(to->timeline == tl); + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = i915_request_completed(from); + if (!err) + err = cacheline_ref(cl, to); + if (!err) { + if (likely(cl == tl->hwsp_cacheline)) { + *hwsp = tl->hwsp_offset; + } else { /* across a seqno wrap, recover the original offset */ + *hwsp = (cl->vaddr & CACHELINE_MASK) * CACHELINE_BYTES; + *hwsp += i915_ggtt_offset(cl->hwsp->vma); + } + } + mutex_unlock(&tl->mutex); + + return err; +} + void i915_timeline_unpin(struct i915_timeline *tl) { GEM_BUG_ON(!tl->pin_count); @@ -301,6 +544,7 @@ void i915_timeline_unpin(struct i915_timeline *tl) return; timeline_remove_from_active(tl); + cacheline_release(tl->hwsp_cacheline); /* * Since this timeline is idle, all bariers upon which we were waiting diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 36c3849f7108..60b1dfad93ed 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -34,7 +34,7 @@ #include "i915_utils.h" struct i915_vma; -struct i915_timeline_hwsp; +struct i915_timeline_cacheline; struct i915_timeline { u64 fence_context; @@ -51,6 +51,8 @@ struct i915_timeline { struct i915_vma *hwsp_ggtt; u32 hwsp_offset; + struct i915_timeline_cacheline *hwsp_cacheline; + bool has_initial_breadcrumb; /** @@ -162,8 +164,15 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, } int i915_timeline_pin(struct i915_timeline *tl); +int i915_timeline_get_seqno(struct i915_timeline *tl, + struct i915_request *rq, + u32 *seqno); void i915_timeline_unpin(struct i915_timeline *tl); +int i915_timeline_read_hwsp(struct i915_request *from, + struct i915_request *until, + u32 *hwsp_offset); + void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); void i915_timelines_fini(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 12ea69b1a1e5..844701759ffc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -641,6 +641,118 @@ static int live_hwsp_alternate(void *arg) #undef NUM_TIMELINES } +static int live_hwsp_wrap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_timeline *tl; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = 0; + + /* + * Across a seqno wrap, we need to keep the old cacheline alive for + * foreign GPU references. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + tl = i915_timeline_create(i915, __func__, NULL); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out_rpm; + } + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + goto out_free; + + err = i915_timeline_pin(tl); + if (err) + goto out_free; + + for_each_engine(engine, i915, id) { + const u32 *hwsp_seqno[2]; + struct i915_request *rq; + u32 seqno[2]; + + if (!intel_engine_can_store_dword(engine)) + continue; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + tl->seqno = -4u; + + err = i915_timeline_get_seqno(tl, rq, &seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", + seqno[0], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[0] = tl->hwsp_seqno; + + err = i915_timeline_get_seqno(tl, rq, &seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", + seqno[1], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[1] = tl->hwsp_seqno; + + /* With wrap should come a new hwsp */ + GEM_BUG_ON(seqno[1] >= seqno[0]); + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); + + i915_request_add(rq); + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + err = -EIO; + goto out; + } + + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", + *hwsp_seqno[0], *hwsp_seqno[1], + seqno[0], seqno[1]); + err = -EINVAL; + goto out; + } + + i915_retire_requests(i915); /* recycle HWSP */ + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + i915_timeline_unpin(tl); +out_free: + i915_timeline_put(tl); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + static int live_hwsp_recycle(void *arg) { struct drm_i915_private *i915 = arg; @@ -723,6 +835,7 @@ int i915_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_recycle), SUBTEST(live_hwsp_engine), SUBTEST(live_hwsp_alternate), + SUBTEST(live_hwsp_wrap), }; return i915_subtests(tests, i915);