[09/25] drm/i915: Use per object locking in execbuf, v9.

Message ID	20200429123307.1449297-9-maarten.lankhorst@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=NN/q=6N=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org D1CA52072A From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> To: intel-gfx@lists.freedesktop.org Date: Wed, 29 Apr 2020 14:32:51 +0200 Message-Id: <20200429123307.1449297-9-maarten.lankhorst@linux.intel.com> In-Reply-To: <20200429123307.1449297-1-maarten.lankhorst@linux.intel.com> References: <20200429123307.1449297-1-maarten.lankhorst@linux.intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 09/25] drm/i915: Use per object locking in execbuf, v9. Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/25] perf/core: Only copy-to-user after completely unlocking all locks, v3. \| expand [01/25] perf/core: Only copy-to-user after completely unlocking all locks, v3. [03/25] Revert "drm/i915/gem: Drop relocation slowpath" [04/25] drm/i915: Add an implementation for i915_gem_ww_ctx locking, v2. [05/25] drm/i915: Remove locking from i915_gem_object_prepare_read/write [06/25] drm/i915: Parse command buffer earlier in eb_relocate(slow) [07/25] Revert "drm/i915/gem: Split eb_vma into its own allocation" [08/25] drm/i915/gem: Make eb_add_lut interruptible wait on object lock. [09/25] drm/i915: Use per object locking in execbuf, v9. [10/25] drm/i915: Use ww locking in intel_renderstate. [11/25] drm/i915: Add ww context handling to context_barrier_task [12/25] drm/i915: Nuke arguments to eb_pin_engine [13/25] drm/i915: Pin engine before pinning all objects, v3. [14/25] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex [15/25] drm/i915: Make sure execbuffer always passes ww state to i915_vma_pin. [16/25] drm/i915: Convert i915_gem_object/client_blt.c to use ww locking as well, v2. [17/25] drm/i915: Kill last user of intel_context_create_request outside of selftests [18/25] drm/i915: Convert i915_perf to ww locking as well [19/25] drm/i915: Dirty hack to fix selftests locking inversion [20/25] drm/i915/selftests: Fix locking inversion in lrc selftest. [21/25] drm/i915: Use ww pinning for intel_context_create_request() [22/25] drm/i915: Move i915_vma_lock in the selftests to avoid lock inversion, v2. [23/25] drm/i915: Add ww locking to vm_fault_gtt [24/25] drm/i915: Add ww locking to pin_to_display_plane [25/25] drm/i915: Ensure we hold the pin mutex

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5b4f6fb1428c..96b172f9b9f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -249,6 +249,8 @@ struct i915_execbuffer { /** list of vma that have execobj.relocation_count */ struct list_head relocs; + struct i915_gem_ww_ctx ww; + /** * Track the most recently used object for relocations, as we * frequently have to perform multiple relocations within the same @@ -267,14 +269,18 @@ struct i915_execbuffer { struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; + struct intel_gt_buffer_pool_node *pool; } reloc_cache; + struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ + u64 invalid_flags; /** Set of execobj.flags that are invalid */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u32 batch_start_offset; /** Location within object of batch */ u32 batch_len; /** Length of batch within object */ u32 batch_flags; /** Flags composed for emit_bb_start() */ + struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ /** * Indicate either the size of the hastable used to resolve @@ -441,24 +447,18 @@ eb_pin_vma(struct i915_execbuffer *eb, return !eb_vma_misplaced(entry, vma, ev->flags); } -static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) -{ - GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); - - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - __i915_vma_unpin(vma); -} - static inline void eb_unreserve_vma(struct eb_vma *ev) { if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) return; - __eb_unreserve_vma(ev->vma, ev->flags); ev->flags &= ~__EXEC_OBJECT_RESERVED; + + if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(ev->vma); + + __i915_vma_unpin(ev->vma); } static int @@ -552,16 +552,6 @@ eb_add_vma(struct i915_execbuffer *eb, eb->batch = ev; } - - if (eb_pin_vma(eb, entry, ev)) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - } else { - eb_unreserve_vma(ev); - list_add_tail(&ev->bind_link, &eb->unbound); - } } static inline int use_cpu_reloc(const struct reloc_cache *cache, @@ -646,10 +636,6 @@ static int eb_reserve(struct i915_execbuffer *eb) * This avoid unnecessary unbinding of later objects in order to make * room for the earlier objects *unless* we need to defragment. */ - - if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) - return -EINTR; - pass = 0; do { list_for_each_entry(ev, &eb->unbound, bind_link) { @@ -657,8 +643,8 @@ static int eb_reserve(struct i915_execbuffer *eb) if (err) break; } - if (!(err == -ENOSPC || err == -EAGAIN)) - break; + if (err != -ENOSPC) + return err; /* Resort *all* the objects into priority order */ INIT_LIST_HEAD(&eb->unbound); @@ -688,13 +674,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } list_splice_tail(&last, &eb->unbound); - if (err == -EAGAIN) { - mutex_unlock(&eb->i915->drm.struct_mutex); - flush_workqueue(eb->i915->mm.userptr_wq); - mutex_lock(&eb->i915->drm.struct_mutex); - continue; - } - switch (pass++) { case 0: break; @@ -705,20 +684,15 @@ static int eb_reserve(struct i915_execbuffer *eb) err = i915_gem_evict_vm(eb->context->vm); mutex_unlock(&eb->context->vm->mutex); if (err) - goto unlock; + return err; break; default: - err = -ENOSPC; - goto unlock; + return -ENOSPC; } pin_flags = PIN_USER; } while (1); - -unlock: - mutex_unlock(&eb->i915->drm.struct_mutex); - return err; } static unsigned int eb_batch_index(const struct i915_execbuffer *eb) @@ -846,7 +820,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) int err = 0; INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); for (i = 0; i < eb->buffer_count; i++) { struct i915_vma *vma; @@ -889,6 +862,48 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) return err; } +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + err = i915_gem_object_lock(vma->obj, &eb->ww); + if (err) + return err; + + if (eb_pin_vma(eb, entry, ev)) { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } else { + eb_unreserve_vma(ev); + + list_add_tail(&ev->bind_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) { + err = i915_vma_unbind(vma); + if (err) + return err; + } + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); + } + + if (!list_empty(&eb->unbound)) + return eb_reserve(eb); + + return 0; +} + static struct eb_vma * eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { @@ -909,7 +924,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(const struct i915_execbuffer *eb) +static void eb_release_vmas(const struct i915_execbuffer *eb, bool final) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -921,12 +936,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb) if (!vma) break; - eb->vma[i].vma = NULL; - - if (ev->flags & __EXEC_OBJECT_HAS_PIN) - __eb_unreserve_vma(vma, ev->flags); + eb_unreserve_vma(ev); - i915_vma_put(vma); + if (final) + i915_vma_put(vma); } } @@ -945,6 +958,14 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } +static void reloc_cache_clear(struct reloc_cache *cache) +{ + cache->rq = NULL; + cache->rq_cmd = NULL; + cache->pool = NULL; + cache->rq_size = 0; +} + static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -957,8 +978,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - cache->rq = NULL; - cache->rq_size = 0; + reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -982,7 +1002,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) static void reloc_gpu_flush(struct reloc_cache *cache) { - struct drm_i915_gem_object *obj = cache->rq->batch->obj; + struct drm_i915_gem_object *obj = cache->pool->obj; GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; @@ -993,7 +1013,16 @@ static void reloc_gpu_flush(struct reloc_cache *cache) intel_gt_chipset_flush(cache->rq->engine->gt); i915_request_add(cache->rq); - cache->rq = NULL; + + /* + * This is a bit nasty, normally we keep objects locked until the end + * of execbuffer, but we already submit this, and have to unlock before + * dropping the reference. Fortunately we can only hold 1 pool node at + * a time, so this should be harmless. + */ + i915_gem_ww_unlock_single(obj); + intel_gt_buffer_pool_put(cache->pool); + reloc_cache_clear(cache); } static void reloc_cache_reset(struct reloc_cache *cache) @@ -1015,7 +1044,6 @@ static void reloc_cache_reset(struct reloc_cache *cache) kunmap_atomic(vaddr); i915_gem_object_finish_access(obj); - i915_gem_object_unlock(obj); } else { struct i915_ggtt *ggtt = cache_to_ggtt(cache); @@ -1050,15 +1078,9 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, unsigned int flushes; int err; - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - return ERR_PTR(err); - err = i915_gem_object_prepare_write(obj, &flushes); - if (err) { - i915_gem_object_unlock(obj); + if (err) return ERR_PTR(err); - } BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); @@ -1097,9 +1119,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (use_cpu_reloc(cache, obj)) return NULL; - i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); if (err) return ERR_PTR(err); @@ -1188,7 +1208,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) struct drm_i915_gem_object *obj = vma->obj; int err; - i915_vma_lock(vma); + assert_vma_held(vma); if (obj->cache_dirty & ~obj->cache_coherent) i915_gem_clflush_object(obj, 0); @@ -1198,8 +1218,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - return err; } @@ -1208,15 +1226,22 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool; + struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_gt_get_buffer_pool(eb->engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(eb->engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + eb->reloc_pool = NULL; + + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err_pool; cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? @@ -1224,7 +1249,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, I915_MAP_FORCE_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_pool; + goto err_pool; } batch = i915_vma_instance(pool->obj, vma->vm, NULL); @@ -1257,11 +1282,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto skip_request; - i915_vma_lock(batch); + assert_vma_held(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; @@ -1270,10 +1294,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; - cache->rq_size = 0; + cache->pool = pool; /* Return with batch mapping (cmd) still pinned */ - goto out_pool; + return 0; skip_request: i915_request_set_error_once(rq, err); @@ -1283,8 +1307,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, i915_vma_unpin(batch); err_unmap: i915_gem_object_unpin_map(pool->obj); -out_pool: - intel_gt_buffer_pool_put(pool); +err_pool: + eb->reloc_pool = pool; return err; } @@ -1351,7 +1375,9 @@ relocate_entry(struct i915_vma *vma, len = 3; batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) + if (batch == ERR_PTR(-EDEADLK)) + return (s64)-EDEADLK; + else if (IS_ERR(batch)) goto repeat; addr = gen8_canonical_addr(vma->node.start + offset); @@ -1754,6 +1780,10 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) goto out; } + /* We may process another execbuffer during the unlock... */ + eb_release_vmas(eb, false); + i915_gem_ww_ctx_fini(&eb->ww); + /* * We take 3 passes through the slowpatch. * @@ -1776,15 +1806,29 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) cond_resched(); err = 0; } + + flush_workqueue(eb->i915->mm.userptr_wq); + + i915_gem_ww_ctx_init(&eb->ww, true); if (err) goto out; + /* reacquire the objects */ +repeat_validate: + err = eb_validate_vmas(eb); + if (err) + goto err; + + GEM_BUG_ON(!eb->batch); + list_for_each_entry(ev, &eb->relocs, reloc_link) { if (!have_copy) { pagefault_disable(); err = eb_relocate_vma(eb, ev); pagefault_enable(); - if (err) + if (err == -EDEADLK) + goto err; + else if (err) goto repeat; } else { err = eb_relocate_vma_slow(eb, ev); @@ -1806,6 +1850,13 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb) */ err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); + if (!err) + goto repeat_validate; + } + if (err == -EAGAIN) goto repeat; @@ -1834,61 +1885,59 @@ static int eb_relocate_parse(struct i915_execbuffer *eb) { int err; - err = eb_lookup_vmas(eb); - if (err) - return err; - - if (!list_empty(&eb->unbound)) { - err = eb_reserve(eb); - if (err) - return err; - } +retry: + err = eb_validate_vmas(eb); + if (err == -EAGAIN) + goto slow; + else if (err) + goto err; /* The objects are in their final locations, apply the relocations. */ if (eb->args->flags & __EXEC_HAS_RELOC) { struct eb_vma *ev; list_for_each_entry(ev, &eb->relocs, reloc_link) { - if (eb_relocate_vma(eb, ev)) - return eb_relocate_parse_slow(eb); + err = eb_relocate_vma(eb, ev); + if (err == -EDEADLK) + goto err; + else if (err) + goto slow; } } - return eb_parse(eb); + err = eb_parse(eb); + +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); + if (!err) + goto retry; + } + + return err; + +slow: + err = eb_relocate_parse_slow(eb); + if (err) + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + eb->args->flags &= ~__EXEC_HAS_RELOC; + + return err; } static int eb_move_to_gpu(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; - struct ww_acquire_ctx acquire; - unsigned int i; + unsigned int i = count; int err = 0; - ww_acquire_init(&acquire, &reservation_ww_class); - - for (i = 0; i < count; i++) { - struct eb_vma *ev = &eb->vma[i]; - struct i915_vma *vma = ev->vma; - - err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); - if (err == -EDEADLK) { - GEM_BUG_ON(i == 0); - do { - int j = i - 1; - - ww_mutex_unlock(&eb->vma[j].vma->resv->lock); - - swap(eb->vma[i], eb->vma[j]); - } while (--i); - - err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, - &acquire); - } - if (err) - break; - } - ww_acquire_done(&acquire); - while (i--) { struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; @@ -1932,21 +1981,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (err == 0) err = i915_vma_move_to_active(vma, eb->request, flags); - - i915_vma_unlock(vma); - - __eb_unreserve_vma(vma, flags); - i915_vma_put(vma); - - ev->vma = NULL; } - ww_acquire_fini(&acquire); if (unlikely(err)) goto err_skip; - eb->exec = NULL; - /* Unconditionally flush any chipset caches (for streaming writes). */ intel_gt_chipset_flush(eb->engine->gt); return 0; @@ -2096,10 +2135,6 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, pw->shadow = shadow; pw->trampoline = trampoline; - err = dma_resv_lock_interruptible(pw->batch->resv, NULL); - if (err) - goto err_trampoline; - err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) goto err_batch_unlock; @@ -2114,19 +2149,14 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - dma_resv_unlock(pw->batch->resv); - /* Force execution to wait for completion of the parser */ - dma_resv_lock(shadow->resv, NULL); dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - dma_resv_unlock(shadow->resv); dma_fence_work_commit_imm(&pw->base); return 0; err_batch_unlock: dma_resv_unlock(pw->batch->resv); -err_trampoline: if (trampoline) i915_active_release(&trampoline->active); err_shadow: @@ -2141,7 +2171,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_gt_buffer_pool_node *pool; + struct intel_gt_buffer_pool_node *pool = eb->batch_pool; struct i915_vma *shadow, *trampoline; unsigned int len; int err; @@ -2164,9 +2194,16 @@ static int eb_parse(struct i915_execbuffer *eb) len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + eb->batch_pool = pool; + } + + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err; shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); if (IS_ERR(shadow)) { @@ -2202,7 +2239,6 @@ static int eb_parse(struct i915_execbuffer *eb) eb->trampoline = trampoline; eb->batch_start_offset = 0; - shadow->private = pool; return 0; err_trampoline: @@ -2211,7 +2247,6 @@ static int eb_parse(struct i915_execbuffer *eb) err_shadow: i915_vma_unpin(shadow); err: - intel_gt_buffer_pool_put(pool); return err; } @@ -2723,6 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.exec = exec; eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma[0].vma = NULL; + eb.reloc_pool = eb.batch_pool = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2790,6 +2826,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_context; + err = eb_lookup_vmas(&eb); + if (err) { + eb_release_vmas(&eb, true); + goto err_engine; + } + + i915_gem_ww_ctx_init(&eb.ww, true); + err = eb_relocate_parse(&eb); if (err) { /* @@ -2803,6 +2847,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + ww_acquire_done(&eb.ww.ctx); + /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. @@ -2823,7 +2869,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err_parse; + goto err_vma; } batch = vma; @@ -2876,8 +2922,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = batch; - if (batch->private) - intel_gt_buffer_pool_mark_active(batch->private, eb.request); + if (eb.batch_pool) + intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); @@ -2904,14 +2950,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) i915_vma_unpin(batch); -err_parse: - if (batch->private) - intel_gt_buffer_pool_put(batch->private); err_vma: - if (eb.exec) - eb_release_vmas(&eb); + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); + WARN_ON(err == -EDEADLK); + i915_gem_ww_ctx_fini(&eb.ww); + + if (eb.batch_pool) + intel_gt_buffer_pool_put(eb.batch_pool); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); +err_engine: eb_unpin_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6846241f9079..470a89761fd7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1360,6 +1360,12 @@ static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) } } +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) +{ + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); +} + void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) { i915_gem_ww_ctx_unlock_all(ww); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 988755dbf4be..f6bef9894111 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -126,5 +126,6 @@ struct i915_gem_ww_ctx { void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); #endif /* __I915_GEM_H__ */

[09/25] drm/i915: Use per object locking in execbuf, v9.

Commit Message

Patch