[19/37] drm/i915/gem: Asynchronous GTT unbinding

Message ID	20200805122231.23313-20-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=TleC=BP=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3A3E922D05 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Wed, 5 Aug 2020 13:22:13 +0100 Message-Id: <20200805122231.23313-20-chris@chris-wilson.co.uk> In-Reply-To: <20200805122231.23313-1-chris@chris-wilson.co.uk> References: <20200805122231.23313-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 19/37] drm/i915/gem: Asynchronous GTT unbinding Precedence: list Cc: =?utf-8?q?Thomas_Hellstr=C3=B6m?= <thomas.hellstrom@intel.com>, Matthew Auld <matthew.auld@intel.com>, Chris Wilson <chris@chris-wilson.co.uk> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	Replace obj->mm.lock with reservation_ww_class \| expand [00/37] Replace obj->mm.lock with reservation_ww_class [01/37] drm/i915/gem: Reduce context termination list iteration guard to RCU [02/37] drm/i915/gt: Protect context lifetime with RCU [03/37] drm/i915/gt: Free stale request on destroying the virtual engine [04/37] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission [05/37] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock [06/37] drm/i915/gt: Don't cancel the interrupt shadow too early [07/37] drm/i915/gt: Split the breadcrumb spinlock between global and contexts [08/37] drm/i915/gem: Don't drop the timeline lock during execbuf [09/37] drm/i915/gem: Rename execbuf.bind_link to unbound_link [10/37] drm/i915/gem: Rename the list of relocations to reloc_list [11/37] drm/i915/gem: Move the 'cached' info to i915_execbuffer [12/37] drm/i915/gem: Break apart the early i915_vma_pin from execbuf object lookup [13/37] drm/i915/gem: Remove the call for no-evict i915_vma_pin [14/37] drm/i915: Serialise i915_vma_pin_inplace() with i915_vma_unbind() [15/37] drm/i915: Add list_for_each_entry_safe_continue_reverse [16/37] drm/i915: Always defer fenced work to the worker [17/37] drm/i915/gem: Assign context id for async work [18/37] drm/i915/gem: Separate the ww_mutex walker into its own list [19/37] drm/i915/gem: Asynchronous GTT unbinding [20/37] drm/i915/gem: Bind the fence async for execbuf [21/37] drm/i915/gem: Include cmdparser in common execbuf pinning [22/37] drm/i915/gem: Include secure batch in common execbuf pinning [23/37] drm/i915/gem: Manage GTT placement bias (starting offset) explicitly [24/37] drm/i915/gem: Reintroduce multiple passes for reloc processing [25/37] drm/i915: Add an implementation for common reservation_ww_class locking [26/37] drm/i915/gem: Pull execbuf dma resv under a single critical section [27/37] drm/i915/gtt: map the PD up front [28/37] drm/i915: Acquire the object lock around page directories [29/37] drm/i915/gem: Replace i915_gem_object.mm.mutex with reservation_ww_class [30/37] drm/i915: Hold wakeref for the duration of the vma GGTT binding [31/37] drm/i915/gt: Refactor heartbeat request construction and submission [32/37] drm/i915: Specialise GGTT binding [33/37] drm/i915/gt: Acquire backing storage for the context [34/37] drm/i915/gt: Push the wait for the context to bound to the request [35/37] drm/i915: Remove unused i915_gem_evict_vm() [36/37] drm/i915/display: Drop object lock from intel_unpin_fb_vma [37/37] drm/i915/gem: Delay attach mmu-notifier until we acquire the pinned userptr

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 32d23718ee1e..301e67dcdbde 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -18,6 +18,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_ring.h" #include "i915_drv.h" @@ -44,6 +45,12 @@ struct eb_vma { u32 handle; }; +struct eb_bind_vma { + struct eb_vma *ev; + struct drm_mm_node hole; + unsigned int bind_flags; +}; + struct eb_vma_array { struct kref kref; struct eb_vma vma[]; @@ -67,11 +74,12 @@ struct eb_vma_array { I915_EXEC_RESOURCE_STREAMER) /* Catch emission of unexpected errors for CI! */ +#define __EINVAL__ 22 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #undef EINVAL #define EINVAL ({ \ DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ - 22; \ + __EINVAL__; \ }) #endif @@ -323,6 +331,12 @@ static struct eb_vma_array *eb_vma_array_create(unsigned int count) return arr; } +static struct eb_vma_array *eb_vma_array_get(struct eb_vma_array *arr) +{ + kref_get(&arr->kref); + return arr; +} + static inline void eb_unreserve_vma(struct eb_vma *ev) { struct i915_vma *vma = ev->vma; @@ -456,7 +470,10 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, const struct i915_vma *vma, unsigned int flags) { - if (vma->node.size < entry->pad_to_size) + if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) + return true; + + if (vma->node.size < max(vma->size, entry->pad_to_size)) return true; if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) @@ -481,32 +498,6 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, return false; } -static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, - unsigned int exec_flags) -{ - u64 pin_flags = 0; - - if (exec_flags & EXEC_OBJECT_NEEDS_GTT) - pin_flags |= PIN_GLOBAL; - - /* - * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, - * limit address to the first 4GBs for unflagged objects. - */ - if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - pin_flags |= PIN_ZONE_4G; - - if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) - pin_flags |= PIN_MAPPABLE; - - if (exec_flags & EXEC_OBJECT_PINNED) - pin_flags |= entry->offset | PIN_OFFSET_FIXED; - else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) - pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; - - return pin_flags; -} - static bool eb_pin_vma_fence_inplace(struct eb_vma *ev) { return false; /* We need to add some new fence serialisation */ @@ -520,6 +511,10 @@ eb_pin_vma_inplace(struct i915_execbuffer *eb, struct i915_vma *vma = ev->vma; unsigned int pin_flags; + /* Concurrent async binds in progress, get in the queue */ + if (!i915_active_is_idle(&vma->vm->binding)) + return false; + if (eb_vma_misplaced(entry, vma, ev->flags)) return false; @@ -640,45 +635,463 @@ eb_add_vma(struct i915_execbuffer *eb, } } -static int eb_reserve_vma(const struct i915_execbuffer *eb, - struct eb_vma *ev, - u64 pin_flags) +struct eb_vm_work { + struct dma_fence_work base; + struct eb_vma_array *array; + struct eb_bind_vma *bind; + struct i915_address_space *vm; + struct i915_vm_pt_stash stash; + struct list_head evict_list; + u64 *p_flags; + u64 id; + unsigned long count; +}; + +static inline u64 node_end(const struct drm_mm_node *node) +{ + return node->start + node->size; +} + +static int set_bind_fence(struct i915_vma *vma, struct eb_vm_work *work) +{ + struct dma_fence *prev; + int err = 0; + + lockdep_assert_held(&vma->vm->mutex); + prev = i915_active_set_exclusive(&vma->active, &work->base.dma); + if (unlikely(prev)) { + err = i915_sw_fence_await_dma_fence(&work->base.chain, prev, 0, + GFP_NOWAIT | __GFP_NOWARN); + dma_fence_put(prev); + } + + return err < 0 ? err : 0; +} + +static int await_evict(struct eb_vm_work *work, struct i915_vma *vma) { - struct drm_i915_gem_exec_object2 *entry = ev->exec; - struct i915_vma *vma = ev->vma; int err; - if (drm_mm_node_allocated(&vma->node) && - eb_vma_misplaced(entry, vma, ev->flags)) { - err = i915_vma_unbind(vma); + GEM_BUG_ON(rcu_access_pointer(vma->active.excl.fence) == &work->base.dma); + + /* Wait for all other previous activity */ + err = i915_sw_fence_await_active(&work->base.chain, + &vma->active, + I915_ACTIVE_AWAIT_ACTIVE); + /* Then insert along the exclusive vm->mutex timeline */ + if (err == 0) + err = set_bind_fence(vma, work); + + return err; +} + +static int +evict_for_node(struct eb_vm_work *work, + struct eb_bind_vma *const target, + unsigned int flags) +{ + struct i915_vma *target_vma = target->ev->vma; + struct i915_address_space *vm = target_vma->vm; + const unsigned long color = target_vma->node.color; + const u64 start = target_vma->node.start; + const u64 end = start + target_vma->node.size; + u64 hole_start = start, hole_end = end; + struct i915_vma *vma, *next; + struct drm_mm_node *node; + LIST_HEAD(evict_list); + LIST_HEAD(steal_list); + int err = 0; + + lockdep_assert_held(&vm->mutex); + GEM_BUG_ON(drm_mm_node_allocated(&target_vma->node)); + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + + if (i915_vm_has_cache_coloring(vm)) { + /* Expand search to cover neighbouring guard pages (or lack!) */ + if (hole_start) + hole_start -= I915_GTT_PAGE_SIZE; + + /* Always look at the page afterwards to avoid the end-of-GTT */ + hole_end += I915_GTT_PAGE_SIZE; + } + GEM_BUG_ON(hole_start >= hole_end); + + drm_mm_for_each_node_in_range(node, &vm->mm, hole_start, hole_end) { + GEM_BUG_ON(node == &target_vma->node); + err = -ENOSPC; + + /* If we find any non-objects (!vma), we cannot evict them */ + if (node->color == I915_COLOR_UNEVICTABLE) + goto err; + + /* + * If we are using coloring to insert guard pages between + * different cache domains within the address space, we have + * to check whether the objects on either side of our range + * abutt and conflict. If they are in conflict, then we evict + * those as well to make room for our guard pages. + */ + if (i915_vm_has_cache_coloring(vm)) { + if (node_end(node) == start && node->color == color) + continue; + + if (node->start == end && node->color == color) + continue; + } + + GEM_BUG_ON(!drm_mm_node_allocated(node)); + vma = container_of(node, typeof(*vma), node); + + if (flags & PIN_NOEVICT || i915_vma_is_pinned(vma)) + goto err; + + /* If this VMA is already being freed, or idle, steal it! */ + if (!i915_active_acquire_if_busy(&vma->active)) { + list_move(&vma->vm_link, &steal_list); + continue; + } + + if (!(flags & PIN_NONBLOCK)) + err = await_evict(work, vma); + i915_active_release(&vma->active); if (err) - return err; + goto err; + + GEM_BUG_ON(!i915_vma_is_active(vma)); + list_move(&vma->vm_link, &evict_list); } - err = i915_vma_pin(vma, - entry->pad_to_size, entry->alignment, - eb_pin_flags(entry, ev->flags) | pin_flags); - if (err) - return err; + list_for_each_entry_safe(vma, next, &steal_list, vm_link) { + GEM_BUG_ON(i915_vma_is_pinned(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + __i915_vma_evict(vma); + drm_mm_remove_node(&vma->node); + /* No ref held; vma may now be concurrently freed */ + } - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; + /* No overlapping nodes to evict, claim the slot for ourselves! */ + if (list_empty(&evict_list)) + return drm_mm_reserve_node(&vm->mm, &target_vma->node); + + /* + * Mark this range as reserved. + * + * We have not yet removed the PTEs for the old evicted nodes, so + * must prevent this range from being reused for anything else. The + * PTE will be cleared when the range is idle (during the rebind + * phase in the worker). + */ + target->hole.color = I915_COLOR_UNEVICTABLE; + target->hole.start = start; + target->hole.size = end; + + list_for_each_entry(vma, &evict_list, vm_link) { + target->hole.start = + min(target->hole.start, vma->node.start); + target->hole.size = + max(target->hole.size, node_end(&vma->node)); + + GEM_BUG_ON(!i915_vma_is_active(vma)); + GEM_BUG_ON(vma->node.mm != &vm->mm); + set_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)); + drm_mm_remove_node(&vma->node); + GEM_BUG_ON(i915_vma_is_pinned(vma)); } + list_splice(&evict_list, &work->evict_list); - if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_pin_fence(vma); - if (unlikely(err)) { - i915_vma_unpin(vma); - return err; + target->hole.size -= target->hole.start; + + return drm_mm_reserve_node(&vm->mm, &target->hole); + +err: + list_splice(&evict_list, &vm->bound_list); + list_splice(&steal_list, &vm->bound_list); + return err; +} + +static int +evict_in_range(struct eb_vm_work *work, + struct eb_bind_vma * const target, + u64 start, u64 end, u64 align) +{ + struct i915_vma *target_vma = target->ev->vma; + struct i915_address_space *vm = target_vma->vm; + struct i915_vma *active = NULL; + struct i915_vma *vma, *next; + struct drm_mm_scan scan; + LIST_HEAD(evict_list); + bool found = false; + + lockdep_assert_held(&vm->mutex); + + drm_mm_scan_init_with_range(&scan, &vm->mm, + target_vma->node.size, + align, + target_vma->node.color, + start, end, + DRM_MM_INSERT_BEST); + + list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + if (i915_vma_is_pinned(vma)) + continue; + + if (vma == active) + active = ERR_PTR(-EAGAIN); + + /* Prefer to reuse idle nodes; push all active vma to the end */ + if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { + if (!active) + active = vma; + + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; + } + + list_move(&vma->vm_link, &evict_list); + if (drm_mm_scan_add_block(&scan, &vma->node)) { + target_vma->node.start = + round_up(scan.hit_start, align); + found = true; + break; + } + } + + list_for_each_entry(vma, &evict_list, vm_link) + drm_mm_scan_remove_block(&scan, &vma->node); + list_splice(&evict_list, &vm->bound_list); + if (!found) + return -ENOSPC; + + return evict_for_node(work, target, 0); +} + +static u64 random_offset(u64 start, u64 end, u64 len, u64 align) +{ + u64 range, addr; + + GEM_BUG_ON(range_overflows(start, len, end)); + GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); + + range = round_down(end - len, align) - round_up(start, align); + if (range) { + if (sizeof(unsigned long) == sizeof(u64)) { + addr = get_random_long(); + } else { + addr = get_random_int(); + if (range > U32_MAX) { + addr <<= 32; + addr |= get_random_int(); + } } + div64_u64_rem(addr, range, &addr); + start += addr; + } + + return round_up(start, align); +} + +static u64 align0(u64 align) +{ + return align <= I915_GTT_MIN_ALIGNMENT ? 0 : align; +} + +static struct drm_mm_node *__best_hole(struct drm_mm *mm, u64 size) +{ + struct rb_node *rb = mm->holes_size.rb_root.rb_node; + struct drm_mm_node *best = NULL; + + while (rb) { + struct drm_mm_node *node = + rb_entry(rb, struct drm_mm_node, rb_hole_size); + + if (size <= node->hole_size) { + best = node; + rb = rb->rb_right; + } else { + rb = rb->rb_left; + } + } + + return best; +} + +static int best_hole(struct drm_mm *mm, struct drm_mm_node *node, + u64 start, u64 end, u64 align) +{ + struct drm_mm_node *hole; + u64 size = node->size; + + do { + hole = __best_hole(mm, size); + if (!hole) + return -ENOSPC; + + node->start = round_up(max(start, drm_mm_hole_node_start(hole)), + align); + if (min(drm_mm_hole_node_end(hole), end) >= + node->start + node->size) + return drm_mm_reserve_node(mm, node); + + /* + * Too expensive to search for every single hole every time, + * so just look for the next bigger hole, introducing enough + * space for alignments. Finding the smallest hole with ideal + * alignment scales very poorly, so we choose to waste space + * if an alignment is forced. On the other hand, simply + * randomly selecting an offset in 48b space will cause us + * to use the majority of that space and exhaust all memory + * in storing the page directories. Compromise is required. + */ + size = hole->hole_size + align; + } while (1); +} + +static int eb_reserve_vma(struct eb_vm_work *work, struct eb_bind_vma *bind) +{ + struct drm_i915_gem_exec_object2 *entry = bind->ev->exec; + const unsigned int exec_flags = bind->ev->flags; + struct i915_vma *vma = bind->ev->vma; + struct i915_address_space *vm = vma->vm; + u64 start = 0, end = vm->total; + u64 align = entry->alignment ?: I915_GTT_MIN_ALIGNMENT; + unsigned int bind_flags; + int err; + + lockdep_assert_held(&vm->mutex); + + bind_flags = PIN_USER; + if (exec_flags & EXEC_OBJECT_NEEDS_GTT) + bind_flags |= PIN_GLOBAL; + + if (drm_mm_node_allocated(&vma->node)) + goto pin; + + GEM_BUG_ON(i915_vma_is_pinned(vma)); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_BIND_MASK)); + GEM_BUG_ON(i915_active_fence_isset(&vma->active.excl)); + GEM_BUG_ON(!vma->size); + + /* Reuse old address (if it doesn't conflict with new requirements) */ + if (eb_vma_misplaced(entry, vma, exec_flags)) { + vma->node.start = entry->offset & PIN_OFFSET_MASK; + vma->node.size = max(entry->pad_to_size, vma->size); + vma->node.color = 0; + if (i915_vm_has_cache_coloring(vm)) + vma->node.color = vma->obj->cache_level; + } + + /* + * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); + + align = max(align, vma->display_alignment); + if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) { + vma->node.size = max_t(u64, vma->node.size, vma->fence_size); + end = min_t(u64, end, i915_vm_to_ggtt(vm)->mappable_end); + align = max_t(u64, align, vma->fence_alignment); + } + + if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) + start = BATCH_OFFSET_BIAS; + + GEM_BUG_ON(!vma->node.size); + if (vma->node.size > end - start) + return -E2BIG; + + /* Try the user's preferred location first (mandatory if soft-pinned) */ + err = -__EINVAL__; + if (vma->node.start >= start && + IS_ALIGNED(vma->node.start, align) && + !range_overflows(vma->node.start, vma->node.size, end)) { + unsigned int pin_flags; + + /* + * Prefer to relocate and spread objects around. + * + * If we relocate and continue to use the new location in + * future batches, we only pay the relocation cost once. + * + * If we instead keep reusing the same address for different + * objects, each batch must remove/insert objects into the GTT, + * which is more expensive than performing a relocation. + */ + pin_flags = 0; + if (!(exec_flags & EXEC_OBJECT_PINNED)) + pin_flags = PIN_NOEVICT; + + err = evict_for_node(work, bind, pin_flags); + if (err == 0) + goto pin; + } + if (exec_flags & EXEC_OBJECT_PINNED) + return err; + + /* Try the first available free space */ + if (!best_hole(&vm->mm, &vma->node, start, end, align)) + goto pin; + + /* Pick a random slot and see if it's available [O(N) worst case] */ + vma->node.start = random_offset(start, end, vma->node.size, align); + if (evict_for_node(work, bind, PIN_NONBLOCK) == 0) + goto pin; + + /* Otherwise search all free space [degrades to O(N^2)] */ + if (drm_mm_insert_node_in_range(&vm->mm, &vma->node, + vma->node.size, + align0(align), + vma->node.color, + start, end, + DRM_MM_INSERT_BEST) == 0) + goto pin; + + /* Pretty busy! Loop over "LRU" and evict oldest in our search range */ + err = evict_in_range(work, bind, start, end, align); + if (unlikely(err)) + return err; + +pin: + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + err = __i915_vma_pin_fence(vma); /* XXX no waiting */ + if (unlikely(err)) + return err; if (vma->fence) - ev->flags |= __EXEC_OBJECT_HAS_FENCE; + bind->ev->flags |= __EXEC_OBJECT_HAS_FENCE; } - ev->flags |= __EXEC_OBJECT_HAS_PIN; - GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); + bind_flags &= ~atomic_read(&vma->flags); + if (bind_flags) { + err = set_bind_fence(vma, work); + if (unlikely(err)) + return err; + + atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); + atomic_or(bind_flags, &vma->flags); + + if (i915_vma_is_ggtt(vma)) + __i915_vma_set_map_and_fenceable(vma); + + GEM_BUG_ON(!i915_vma_is_active(vma)); + list_move_tail(&vma->vm_link, &vm->bound_list); + bind->bind_flags = bind_flags; + } + __i915_vma_pin(vma); /* and release */ + + GEM_BUG_ON(!bind_flags && !drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(!(drm_mm_node_allocated(&vma->node) ^ + drm_mm_node_allocated(&bind->hole))); + + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + *work->p_flags |= __EXEC_HAS_RELOC; + } + + bind->ev->flags |= __EXEC_OBJECT_HAS_PIN; + GEM_BUG_ON(eb_vma_misplaced(entry, vma, bind->ev->flags)); return 0; } @@ -712,13 +1125,241 @@ static int wait_for_timeline(struct intel_timeline *tl) } while (1); } +static void __eb_bind_vma(struct eb_vm_work *work) +{ + struct i915_address_space *vm = work->vm; + unsigned long n; + + GEM_BUG_ON(!intel_gt_pm_is_awake(vm->gt)); + + /* + * We have to wait until the stale nodes are completely idle before + * we can remove their PTE and unbind their pages. Hence, after + * claiming their slot in the drm_mm, we defer their removal to + * after the fences are signaled. + */ + if (!list_empty(&work->evict_list)) { + struct i915_vma *vma, *vn; + + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &work->evict_list, vm_link) { + GEM_BUG_ON(vma->vm != vm); + __i915_vma_evict(vma); + GEM_BUG_ON(!i915_vma_is_active(vma)); + } + mutex_unlock(&vm->mutex); + } + + /* + * Now we know the nodes we require in drm_mm are idle, we can + * replace the PTE in those ranges with our own. + */ + for (n = 0; n < work->count; n++) { + struct eb_bind_vma *bind = &work->bind[n]; + struct i915_vma *vma = bind->ev->vma; + + if (!bind->bind_flags) + goto put; + + GEM_BUG_ON(vma->vm != vm); + GEM_BUG_ON(!i915_vma_is_active(vma)); + + vma->ops->bind_vma(vm, &work->stash, vma, + vma->obj->cache_level, bind->bind_flags); + + if (drm_mm_node_allocated(&bind->hole)) { + mutex_lock(&vm->mutex); + GEM_BUG_ON(bind->hole.mm != &vm->mm); + GEM_BUG_ON(bind->hole.color != I915_COLOR_UNEVICTABLE); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + drm_mm_remove_node(&bind->hole); + drm_mm_reserve_node(&vm->mm, &vma->node); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + mutex_unlock(&vm->mutex); + } + bind->bind_flags = 0; + +put: + GEM_BUG_ON(drm_mm_node_allocated(&bind->hole)); + i915_vma_put_pages(vma); + } + work->count = 0; +} + +static int eb_bind_vma(struct dma_fence_work *base) +{ + struct eb_vm_work *work = container_of(base, typeof(*work), base); + + __eb_bind_vma(work); + return 0; +} + +static void eb_vma_work_release(struct dma_fence_work *base) +{ + struct eb_vm_work *work = container_of(base, typeof(*work), base); + + __eb_bind_vma(work); + kvfree(work->bind); + + if (work->id) + i915_active_release(&work->vm->binding); + + eb_vma_array_put(work->array); + + i915_vm_free_pt_stash(work->vm, &work->stash); + i915_vm_put(work->vm); +} + +static const struct dma_fence_work_ops eb_bind_ops = { + .name = "eb_bind", + .work = eb_bind_vma, + .release = eb_vma_work_release, +}; + +static int eb_vm_work_cancel(struct eb_vm_work *work, int err) +{ + work->base.dma.error = err; + dma_fence_work_commit_imm(&work->base); + + return err; +} + +static struct eb_vm_work *eb_vm_work(struct i915_execbuffer *eb, + unsigned long count) +{ + struct eb_vm_work *work; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return NULL; + + work->bind = kvmalloc(sizeof(*work->bind) * count, GFP_KERNEL); + if (!work->bind) { + kfree(work->bind); + return NULL; + } + work->count = count; + + INIT_LIST_HEAD(&work->evict_list); + + dma_fence_work_init(&work->base, &eb_bind_ops); + work->array = eb_vma_array_get(eb->array); + work->p_flags = &eb->args->flags; + work->vm = i915_vm_get(eb->context->vm); + memset(&work->stash, 0, sizeof(work->stash)); + + /* Preallocate our slot in vm->binding, outside of vm->mutex */ + work->id = i915_gem_context_async_id(eb->gem_context); + if (i915_active_acquire_for_context(&work->vm->binding, work->id)) { + work->id = 0; + eb_vm_work_cancel(work, -ENOMEM); + return NULL; + } + + return work; +} + +static int eb_vm_throttle(struct eb_vm_work *work) +{ + struct dma_fence *p; + int err; + + /* Keep async work queued per context */ + p = __i915_active_ref(&work->vm->binding, work->id, &work->base.dma); + if (IS_ERR_OR_NULL(p)) + return PTR_ERR_OR_ZERO(p); + + err = i915_sw_fence_await_dma_fence(&work->base.chain, p, 0, + GFP_NOWAIT | __GFP_NOWARN); + dma_fence_put(p); + + return err < 0 ? err : 0; +} + +static int eb_prepare_vma(struct eb_vm_work *work, + unsigned long idx, + struct eb_vma *ev) +{ + struct eb_bind_vma *bind = &work->bind[idx]; + struct i915_vma *vma = ev->vma; + int err; + + bind->ev = ev; + bind->hole.flags = 0; + bind->bind_flags = 0; + + /* Allocate enough page directories to cover PTE used */ + if (work->vm->allocate_va_range) { + err = i915_vm_alloc_pt_stash(work->vm, &work->stash, vma->size); + if (err) + return err; + } + + return i915_vma_get_pages(vma); +} + +static int wait_for_unbinds(struct i915_execbuffer *eb, + struct list_head *unbound, + int pass) +{ + struct eb_vma *ev; + int err; + + list_for_each_entry(ev, unbound, unbound_link) { + struct i915_vma *vma = ev->vma; + + GEM_BUG_ON(ev->flags & __EXEC_OBJECT_HAS_PIN); + + if (drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(ev->exec, vma, ev->flags)) { + err = i915_vma_unbind(vma); + if (err) + return err; + } + + /* Wait for previous to avoid reusing vma->node */ + err = i915_vma_wait_for_unbind(vma); + if (err) + return err; + } + + switch (pass) { + default: + return -ENOSPC; + + case 2: + /* + * Too fragmented, retire everything on the timeline and so + * make it all [contexts included] available to evict. + */ + err = wait_for_timeline(eb->context->timeline); + if (err) + return err; + + fallthrough; + case 1: + /* XXX ticket lock */ + if (i915_active_wait(&eb->context->vm->binding)) + return -EINTR; + + fallthrough; + case 0: + return 0; + } +} + static int eb_reserve_vm(struct i915_execbuffer *eb) { - unsigned int pin_flags = PIN_USER | PIN_NONBLOCK; + struct i915_address_space *vm = eb->context->vm; struct list_head last, unbound; + unsigned long count; struct eb_vma *ev; unsigned int pass; + int err = 0; + count = 0; INIT_LIST_HEAD(&unbound); list_for_each_entry(ev, &eb->bind_list, bind_link) { struct drm_i915_gem_exec_object2 *entry = ev->exec; @@ -735,29 +1376,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb) list_add(&ev->unbound_link, &unbound); else list_add_tail(&ev->unbound_link, &unbound); + count++; } } - - if (list_empty(&unbound)) + if (count == 0) return 0; - /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 3 phases: - * - * 1a. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 1b. Increment pin count for already bound objects. - * 2. Bind new objects. - * 3. Decrement pin count. - * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. - */ - pass = 0; do { - int err = 0; + struct eb_vm_work *work; /* * We need to hold one lock as we bind all the vma so that @@ -771,23 +1398,87 @@ static int eb_reserve_vm(struct i915_execbuffer *eb) * find space for new buffers, we know that extra pressure * from contention is likely. * - * In lieu of being able to hold vm->mutex for the entire - * sequence (it's complicated!), we opt for struct_mutex. + * vm->mutex is complicated, as we are not allowed to allocate + * beneath it, so we have to stage and preallocate all the + * resources we may require before taking the mutex. */ - if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) - return -EINTR; + work = eb_vm_work(eb, count); + if (!work) + return -ENOMEM; + count = 0; list_for_each_entry(ev, &unbound, unbound_link) { - err = eb_reserve_vma(eb, ev, pin_flags); + err = eb_prepare_vma(work, count++, ev); + if (err) { + work->count = count - 1; + + if (eb_vm_work_cancel(work, err) == -EAGAIN) + goto retry; + + return err; + } + } + + err = i915_vm_pin_pt_stash(work->vm, &work->stash); + if (err) + return eb_vm_work_cancel(work, err); + + /* No allocations allowed beyond this point */ + if (mutex_lock_interruptible(&vm->mutex)) + return eb_vm_work_cancel(work, -EINTR); + + err = eb_vm_throttle(work); + if (err) { + mutex_unlock(&vm->mutex); + return eb_vm_work_cancel(work, err); + } + + for (count = 0; count < work->count; count++) { + struct eb_bind_vma *bind = &work->bind[count]; + struct i915_vma *vma; + + ev = bind->ev; + vma = ev->vma; + + /* + * Check if this node is being evicted or must be. + * + * As we use the single node inside the vma to track + * both the eviction and where to insert the new node, + * we cannot handle migrating the vma inside the worker. + */ + if (drm_mm_node_allocated(&vma->node)) { + if (eb_vma_misplaced(ev->exec, vma, ev->flags)) { + err = -ENOSPC; + break; + } + } else { + if (i915_vma_is_active(vma)) { + err = -ENOSPC; + break; + } + } + + err = i915_active_acquire(&vma->active); + if (!err) { + err = eb_reserve_vma(work, bind); + i915_active_release(&vma->active); + } if (err) break; + + GEM_BUG_ON(!i915_vma_is_pinned(vma)); } - if (!(err == -ENOSPC || err == -EAGAIN)) { - mutex_unlock(&eb->i915->drm.struct_mutex); + + mutex_unlock(&vm->mutex); + + dma_fence_work_commit_imm(&work->base); + if (err != -ENOSPC) return err; - } +retry: /* Resort *all* the objects into priority order */ + count = 0; INIT_LIST_HEAD(&unbound); INIT_LIST_HEAD(&last); list_for_each_entry(ev, &eb->bind_list, bind_link) { @@ -798,6 +1489,7 @@ static int eb_reserve_vm(struct i915_execbuffer *eb) continue; eb_unreserve_vma(ev); + count++; if (flags & EXEC_OBJECT_PINNED) /* Pinned must have their slot */ @@ -812,11 +1504,16 @@ static int eb_reserve_vm(struct i915_execbuffer *eb) list_add_tail(&ev->unbound_link, &last); } list_splice_tail(&last, &unbound); - mutex_unlock(&eb->i915->drm.struct_mutex); + GEM_BUG_ON(!count); + + if (signal_pending(current)) + return -EINTR; + + /* Now safe to wait with no reservations held */ if (err == -EAGAIN) { flush_workqueue(eb->i915->mm.userptr_wq); - continue; + pass = 0; } /* @@ -834,27 +1531,9 @@ static int eb_reserve_vm(struct i915_execbuffer *eb) * there is no guarantee that we will succeed in claiming * total ownership of the vm. */ - switch (pass++) { - case 0: - break; - - case 1: - /* - * Too fragmented, retire everything on the timeline - * and so make it all [contexts included] available to - * evict. - */ - err = wait_for_timeline(eb->context->timeline); - if (err) - return err; - - break; - - default: - return -ENOSPC; - } - - pin_flags = PIN_USER; + err = wait_for_unbinds(eb, &unbound, pass++); + if (err) + return err; } while (1); } @@ -1452,6 +2131,29 @@ relocate_entry(struct i915_execbuffer *eb, return target->node.start | UPDATE; } +static int gen6_fixup_ggtt(struct i915_vma *vma) +{ + int err; + + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + return 0; + + err = i915_vma_wait_for_bind(vma); + if (err) + return err; + + mutex_lock(&vma->vm->mutex); + if (!(atomic_fetch_or(I915_VMA_GLOBAL_BIND, &vma->flags) & I915_VMA_GLOBAL_BIND)) { + __i915_gem_object_pin_pages(vma->obj); + vma->ops->bind_vma(vma->vm, NULL, vma, + vma->obj->cache_level, + I915_VMA_GLOBAL_BIND); + } + mutex_unlock(&vma->vm->mutex); + + return 0; +} + static u64 eb_relocate_entry(struct i915_execbuffer *eb, struct eb_vma *ev, @@ -1466,6 +2168,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (unlikely(!target)) return -ENOENT; + GEM_BUG_ON(!i915_vma_is_pinned(target->vma)); + /* Validate that the target is in a valid r/w GPU domain */ if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { drm_dbg(&i915->drm, "reloc with multiple write domains: " @@ -1500,9 +2204,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, */ if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { - err = i915_vma_bind(target->vma, - target->vma->obj->cache_level, - PIN_GLOBAL, NULL); + err = gen6_fixup_ggtt(target->vma); if (err) return err; } @@ -1672,6 +2374,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) struct drm_i915_gem_object *obj = vma->obj; assert_vma_held(vma); + GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); + GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)); if (flags & EXEC_OBJECT_CAPTURE) { struct i915_capture_list *capture; @@ -1710,7 +2414,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) err = i915_vma_move_to_active(vma, eb->request, flags); i915_vma_unlock(vma); - eb_unreserve_vma(ev); } ww_acquire_fini(&acquire); @@ -2826,7 +3529,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - batch = eb.batch->vma; + batch = i915_vma_get(eb.batch->vma); if (eb.batch_flags & I915_DISPATCH_SECURE) { struct i915_vma *vma; @@ -2846,6 +3549,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_parse; } + GEM_BUG_ON(vma->obj != batch->obj); batch = vma; } @@ -2914,6 +3618,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_parse: if (batch->private) intel_gt_buffer_pool_put(batch->private); + i915_vma_put(batch); err_vma: if (eb.trampoline) i915_vma_unpin(eb.trampoline); diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 7e5a86b774a7..1b4fa9ce6658 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -362,6 +362,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) atomic_set(&vma->flags, I915_VMA_GGTT); vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ + INIT_LIST_HEAD(&vma->vm_link); INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->closed_link); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 3f1114b58b01..1e541bc942a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -58,6 +58,8 @@ void __i915_vm_close(struct i915_address_space *vm) void i915_address_space_fini(struct i915_address_space *vm) { + i915_active_fini(&vm->binding); + drm_mm_takedown(&vm->mm); mutex_destroy(&vm->mutex); } @@ -103,6 +105,8 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + i915_active_init(&vm->binding, NULL, NULL); + INIT_LIST_HEAD(&vm->bound_list); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index c13c650ced22..7e956dd1a969 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -247,6 +247,8 @@ struct i915_address_space { */ struct list_head bound_list; + struct i915_active binding; + /* Global GTT */ bool is_ggtt:1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e1de50780ed5..0420a49f876c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -997,6 +997,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NOEVICT) + return ERR_PTR(-ENOSPC); + if (flags & PIN_NONBLOCK) { if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) return ERR_PTR(-ENOSPC); @@ -1016,6 +1019,10 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } + if (flags & PIN_NONBLOCK && + i915_active_fence_isset(&vma->active.excl)) + return ERR_PTR(-EAGAIN); + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c5ee1567f3d1..356c492b80e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -221,6 +221,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, mode = DRM_MM_INSERT_HIGHEST; if (flags & PIN_MAPPABLE) mode = DRM_MM_INSERT_LOW; + if (flags & PIN_NOSEARCH) + mode |= DRM_MM_INSERT_ONCE; /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, * so we know that we always have a minimum alignment of 4096. @@ -238,6 +240,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, if (err != -ENOSPC) return err; + if (flags & PIN_NOSEARCH) + return -ENOSPC; + if (mode & DRM_MM_INSERT_ONCE) { err = drm_mm_insert_node_in_range(&vm->mm, node, size, alignment, color, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 17ce0bce318e..899d83b567e7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -133,6 +133,7 @@ vma_create(struct drm_i915_gem_object *obj, fs_reclaim_release(GFP_KERNEL); } + INIT_LIST_HEAD(&vma->vm_link); INIT_LIST_HEAD(&vma->closed_link); if (view && view->type != I915_GGTT_VIEW_NORMAL) { @@ -341,25 +342,37 @@ struct i915_vma_work *i915_vma_work(void) return vw; } -int i915_vma_wait_for_bind(struct i915_vma *vma) +static int +__i915_vma_wait_excl(struct i915_vma *vma, bool bound, unsigned int flags) { + struct dma_fence *fence; int err = 0; - if (rcu_access_pointer(vma->active.excl.fence)) { - struct dma_fence *fence; + fence = i915_active_fence_get(&vma->active.excl); + if (!fence) + return 0; - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&vma->active.excl.fence); - rcu_read_unlock(); - if (fence) { - err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT); - dma_fence_put(fence); - } + if (drm_mm_node_allocated(&vma->node) == bound) { + if (flags & PIN_NOEVICT) + err = -EBUSY; + else + err = dma_fence_wait(fence, true); } + dma_fence_put(fence); return err; } +int i915_vma_wait_for_bind(struct i915_vma *vma) +{ + return __i915_vma_wait_excl(vma, true, 0); +} + +int i915_vma_wait_for_unbind(struct i915_vma *vma) +{ + return __i915_vma_wait_excl(vma, false, 0); +} + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map @@ -624,8 +637,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) u64 start, end; int ret; - GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_BIND_MASK)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(i915_active_fence_isset(&vma->active.excl)); size = max(size, vma->size); alignment = max(alignment, vma->display_alignment); @@ -721,7 +735,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - list_add_tail(&vma->vm_link, &vma->vm->bound_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); return 0; } @@ -729,15 +743,12 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) static void i915_vma_detach(struct i915_vma *vma) { - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - /* * And finally now the object is completely decoupled from this * vma, we can drop its hold on the backing storage and allow * it to be reaped by the shrinker. */ - list_del(&vma->vm_link); + list_del_init(&vma->vm_link); } bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags) @@ -759,7 +770,7 @@ bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags) return true; } -static int vma_get_pages(struct i915_vma *vma) +int i915_vma_get_pages(struct i915_vma *vma) { int err = 0; @@ -806,7 +817,7 @@ static void __vma_put_pages(struct i915_vma *vma, unsigned int count) mutex_unlock(&vma->pages_mutex); } -static void vma_put_pages(struct i915_vma *vma) +void i915_vma_put_pages(struct i915_vma *vma) { if (atomic_add_unless(&vma->pages_count, -1, 1)) return; @@ -823,9 +834,13 @@ static void vma_unbind_pages(struct i915_vma *vma) /* The upper portion of pages_count is the number of bindings */ count = atomic_read(&vma->pages_count); count >>= I915_VMA_PAGES_BIAS; - GEM_BUG_ON(!count); + if (count) + __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); +} - __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); +static int __wait_for_unbind(struct i915_vma *vma, unsigned int flags) +{ + return __i915_vma_wait_excl(vma, false, flags); } int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) @@ -844,13 +859,17 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (i915_vma_pin_inplace(vma, flags & I915_VMA_BIND_MASK)) return 0; - err = vma_get_pages(vma); + err = i915_vma_get_pages(vma); if (err) return err; if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + err = __wait_for_unbind(vma, flags); + if (err) + goto err_rpm; + if (flags & vma->vm->bind_async_flags) { work = i915_vma_work(); if (!work) { @@ -923,6 +942,10 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) goto err_unlock; if (!(bound & I915_VMA_BIND_MASK)) { + err = __wait_for_unbind(vma, flags); + if (err) + goto err_active; + err = i915_vma_insert(vma, size, alignment, flags); if (err) goto err_active; @@ -942,6 +965,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound); atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); list_move_tail(&vma->vm_link, &vma->vm->bound_list); + GEM_BUG_ON(!i915_vma_is_active(vma)); __i915_vma_pin(vma); GEM_BUG_ON(!i915_vma_is_pinned(vma)); @@ -963,7 +987,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) err_rpm: if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); - vma_put_pages(vma); + i915_vma_put_pages(vma); return err; } @@ -1067,6 +1091,7 @@ void i915_vma_release(struct kref *ref) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); } GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!list_empty(&vma->vm_link)); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; @@ -1126,7 +1151,7 @@ static void __i915_vma_iounmap(struct i915_vma *vma) { GEM_BUG_ON(i915_vma_is_pinned(vma)); - if (vma->iomap == NULL) + if (!vma->iomap) return; io_mapping_unmap(vma->iomap); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 03fea54fd573..9a26e6cbe8cd 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -236,6 +236,9 @@ static inline void i915_vma_unlock(struct i915_vma *vma) dma_resv_unlock(vma->resv); } +int i915_vma_get_pages(struct i915_vma *vma); +void i915_vma_put_pages(struct i915_vma *vma); + bool i915_vma_pin_inplace(struct i915_vma *vma, unsigned int flags); int __must_check @@ -379,6 +382,7 @@ void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); int i915_vma_wait_for_bind(struct i915_vma *vma); +int i915_vma_wait_for_unbind(struct i915_vma *vma); static inline int i915_vma_sync(struct i915_vma *vma) {

[19/37] drm/i915/gem: Asynchronous GTT unbinding

Commit Message

Patch