[172/190] drm/i915: Eliminate lots of iterations over the execobjects array

Message ID	1452510091-6833-31-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Mon, 11 Jan 2016 11:01:13 +0000 Message-Id: <1452510091-6833-31-git-send-email-chris@chris-wilson.co.uk> In-Reply-To: <1452510091-6833-1-git-send-email-chris@chris-wilson.co.uk> References: <1452503961-14837-1-git-send-email-chris@chris-wilson.co.uk> <1452510091-6833-1-git-send-email-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 172/190] drm/i915: Eliminate lots of iterations over the execobjects array Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2ceefce0e731..601ef7412cf9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2733,6 +2733,7 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, void i915_gem_load(struct drm_device *dev); void *i915_gem_object_alloc(struct drm_device *dev); void i915_gem_object_free(struct drm_i915_gem_object *obj); +bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, @@ -3078,7 +3079,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, unsigned long end, unsigned flags); int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags); -int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); +int i915_gem_evict_vm(struct i915_address_space *vm); /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3eeca1fb89d2..0bd6db4e83d9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2393,7 +2393,7 @@ out: * write domains, emitting any outstanding lazy request and retiring and * completed requests. */ -static bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj) +bool i915_gem_object_flush_active(struct drm_i915_gem_object *obj) { int i; @@ -2821,7 +2821,7 @@ i915_vma_insert(struct i915_vma *vma, size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return -E2BIG; + return -ENOSPC; } ret = i915_gem_object_get_pages(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index d40bcb81c922..e71b89bac168 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -72,7 +72,7 @@ mark_free(struct i915_vma *vma, unsigned flags, struct list_head *unwind) if (flags & PIN_NOFAULT && vma->obj->fault_mappable) return false; - list_add(&vma->exec_list, unwind); + list_add(&vma->evict_link, unwind); return drm_mm_scan_add_block(&vma->node); } @@ -154,11 +154,11 @@ search_again: while (!list_empty(&eviction_list)) { vma = list_first_entry(&eviction_list, struct i915_vma, - exec_list); + evict_link); ret = drm_mm_scan_remove_block(&vma->node); BUG_ON(ret); - list_del(&vma->exec_list); + list_del(&vma->evict_link); } /* Can we unpin some objects such as idle hw contents, @@ -201,16 +201,16 @@ found: * calling unbind (which may remove the active reference * of any of our objects, thus corrupting the list). */ - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { if (drm_mm_scan_remove_block(&vma->node)) drm_gem_object_reference(&vma->obj->base); else - list_del(&vma->exec_list); + list_del(&vma->evict_link); } /* Unbinding will emit any required flushes */ ret = 0; - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { struct drm_i915_gem_object *obj = vma->obj; if (ret == 0) ret = i915_vma_unbind(vma); @@ -261,14 +261,13 @@ i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags) break; } - list_add(&vma->exec_list, &eviction_list); + list_add(&vma->evict_link, &eviction_list); drm_gem_object_reference(&vma->obj->base); } ret = 0; - list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { struct drm_i915_gem_object *obj = vma->obj; - list_del(&vma->exec_list); if (ret == 0) ret = i915_vma_unbind(vma); drm_gem_object_unreference(&obj->base); @@ -291,37 +290,49 @@ i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags) * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) +int i915_gem_evict_vm(struct i915_address_space *vm) { + struct list_head *phases[] = { + &vm->inactive_list, + &vm->active_list, + NULL + }, **phase; + struct list_head eviction_list; struct i915_vma *vma, *next; int ret; WARN_ON(!mutex_is_locked(&vm->dev->struct_mutex)); trace_i915_gem_evict_vm(vm); - if (do_idle) { - /* Switch back to the default context in order to unpin - * the existing context objects. However, such objects only - * pin themselves inside the global GTT and performing the - * switch otherwise is ineffective. - */ - if (i915_is_ggtt(vm)) { - ret = switch_to_pinned_context(to_i915(vm->dev)); - if (ret) - return ret; - } - - ret = i915_gpu_idle(vm->dev); + /* Switch back to the default context in order to unpin + * the existing context objects. However, such objects only + * pin themselves inside the global GTT and performing the + * switch otherwise is ineffective. + */ + if (i915_is_ggtt(vm)) { + ret = switch_to_pinned_context(to_i915(vm->dev)); if (ret < 0) return ret; - - i915_gem_retire_requests(vm->dev); - WARN_ON(!list_empty(&vm->active_list)); } - list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) - if (vma->pin_count == 0) - WARN_ON(i915_vma_unbind(vma)); + INIT_LIST_HEAD(&eviction_list); + phase = phases; + do { + list_for_each_entry(vma, *phase, vm_link) { + if (vma->pin_count) + continue; - return 0; + list_add(&vma->evict_link, &eviction_list); + drm_gem_object_reference(&vma->obj->base); + } + } while (*++phase); + + ret = 0; + list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { + struct drm_i915_gem_object *obj = vma->obj; + if (ret == 0) + ret = i915_vma_unbind(vma); + drm_gem_object_unreference(&obj->base); + } + return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2868e094f67c..f40d3254249a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -40,6 +40,10 @@ #define __EXEC_OBJECT_HAS_FENCE (1U<<30) #define __EXEC_OBJECT_NEEDS_MAP (1U<<29) #define __EXEC_OBJECT_NEEDS_BIAS (1U<<28) +#define __EB_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) + +#define __EXEC_HAS_RELOC (1ULL<<31) +#define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -52,21 +56,44 @@ struct i915_execbuffer { struct intel_context *ctx; struct i915_address_space *vm; struct i915_vma *batch_vma; - uint32_t batch_start_offset; struct drm_i915_gem_request *request; - unsigned dispatch_flags; - bool need_relocs; - struct list_head vmas; + struct list_head unbound; + struct list_head relocs; struct reloc_cache { unsigned long vaddr; unsigned page; struct drm_mm_node node; bool use_64bit_reloc; + bool has_llc; + bool has_fence; } reloc_cache; + u64 invalid_flags; + u32 context_flags; + u32 dispatch_flags; int lut_mask; struct hlist_head *buckets; }; +#define to_ptr(T, x) ((T *)(uintptr_t)(x)) + +/* Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline uint64_t gen8_canonical_addr(uint64_t address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline uint64_t gen8_noncanonical_addr(uint64_t address) +{ + return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); +} + static int eb_create(struct i915_execbuffer *eb) { @@ -91,78 +118,317 @@ eb_create(struct i915_execbuffer *eb) return 0; } +static bool +eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, + const struct i915_vma *vma) +{ + if ((entry->flags & __EXEC_OBJECT_HAS_PIN) == 0) + return true; + + if (vma->node.size < entry->pad_to_size) + return true; + + if (entry->alignment && vma->node.start & (entry->alignment - 1)) + return true; + + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && + vma->node.start < BATCH_OFFSET_BIAS) + return true; + + if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && + (vma->node.start + vma->node.size - 1) >> 32) + return true; + + return false; +} + +static void +eb_pin_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + u64 flags; + + flags = PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED; + if (entry->flags & EXEC_OBJECT_NEEDS_GTT) + flags |= PIN_GLOBAL; + if (entry->flags & EXEC_OBJECT_PINNED) + flags |= entry->offset; + else + flags |= vma->node.start; + if (unlikely(i915_vma_pin(vma, 0, 0, flags))) + return; + + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + if (unlikely(i915_vma_get_fence(vma))) { + i915_vma_unpin(vma); + return; + } + + if (i915_vma_pin_fence(vma)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + } + + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + entry->flags |= __EXEC_OBJECT_HAS_PIN; +} + static inline void __eb_unreserve_vma(struct i915_vma *vma, - const struct drm_i915_gem_exec_object2 *entry) + const struct drm_i915_gem_exec_object2 *entry) { + GEM_BUG_ON((entry->flags & __EXEC_OBJECT_HAS_PIN) == 0); + if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) i915_vma_unpin_fence(vma); - if (entry->flags & __EXEC_OBJECT_HAS_PIN) - __i915_vma_unpin(vma); + __i915_vma_unpin(vma); } -static void -eb_unreserve_vma(struct i915_vma *vma) +static inline void +eb_unreserve_vma(struct i915_vma *vma, + struct drm_i915_gem_exec_object2 *entry) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - __eb_unreserve_vma(vma, entry); - entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); + if (entry->flags & __EXEC_OBJECT_HAS_PIN) { + __eb_unreserve_vma(vma, entry); + entry->flags &= ~__EB_RESERVED; + } } -static void -eb_reset(struct i915_execbuffer *eb) +static int +eb_add_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) { - struct i915_vma *vma; + int ret; - list_for_each_entry(vma, &eb->vmas, exec_list) { - eb_unreserve_vma(vma); - vma->exec_entry = NULL; - } + GEM_BUG_ON(vma->closed); - if (eb->lut_mask >= 0) - memset(eb->buckets, 0, - (1<<eb->lut_mask)*sizeof(struct hlist_head)); -} + if (unlikely(entry->flags & eb->invalid_flags)) + return -EINVAL; -#define to_ptr(T, x) ((T *)(uintptr_t)(x)) + if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) + return -EINVAL; + + /* Offset can be used as input (EXEC_OBJECT_PINNED), reject + * any non-page-aligned or non-canonical addresses. + */ + if (entry->flags & EXEC_OBJECT_PINNED) { + if (unlikely(entry->offset != + gen8_canonical_addr(entry->offset & PAGE_MASK))) + return -EINVAL; + + /* From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + entry->offset = gen8_noncanonical_addr(entry->offset); + } + + /* pad_to_size was once a reserved field, so sanitize it */ + if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (unlikely(offset_in_page(entry->pad_to_size))) + return -EINVAL; + } else + entry->pad_to_size = 0; -static bool -eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i) -{ if (unlikely(vma->exec_entry)) { DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", - eb->exec[i].handle, i); - return false; + entry->handle, (int)(entry - eb->exec)); + return -EINVAL; } - list_add_tail(&vma->exec_list, &eb->vmas); - vma->exec_entry = &eb->exec[i]; + vma->exec_entry = entry; + entry->rsvd2 = (uintptr_t)vma; + if (eb->lut_mask >= 0) { - vma->exec_handle = eb->exec[i].handle; + vma->exec_handle = entry->handle; hlist_add_head(&vma->exec_node, - &eb->buckets[hash_32(vma->exec_handle, + &eb->buckets[hash_32(entry->handle, eb->lut_mask)]); } - eb->exec[i].rsvd2 = (uintptr_t)vma; - return true; + if (entry->relocation_count) + list_add_tail(&vma->reloc_link, &eb->relocs); + + if (!eb->reloc_cache.has_fence) { + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; + } else { + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE && + vma->obj->tiling_mode != I915_TILING_NONE) + entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; + } + + if ((entry->flags & EXEC_OBJECT_PINNED) == 0) + entry->flags |= eb->context_flags; + + ret = 0; + if (vma->node.size) + eb_pin_vma(eb, entry, vma); + if (eb_vma_misplaced(entry, vma)) { + eb_unreserve_vma(vma, entry); + + list_add_tail(&vma->exec_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) + ret = i915_vma_unbind(vma); + } + return ret; +} + +static inline int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) +{ + return (DBG_USE_CPU_RELOC || + cache->has_llc || + obj->base.write_domain == I915_GEM_DOMAIN_CPU || + obj->cache_level != I915_CACHE_NONE); +} + +static int +eb_reserve_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int pass) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + u64 flags; + int ret; + + flags = PIN_USER | PIN_NONBLOCK; + if (entry->flags & EXEC_OBJECT_NEEDS_GTT) + flags |= PIN_GLOBAL; + + if (!drm_mm_node_allocated(&vma->node)) { + /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) + flags |= PIN_ZONE_4G; + + if (vma->is_ggtt) { + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) { + flags |= PIN_MAPPABLE; + } else if (entry->relocation_count && + !use_cpu_reloc(&eb->reloc_cache, vma->obj)) { + if (pass <= 0) + flags |= PIN_MAPPABLE; + } else { + flags |= PIN_HIGH; + } + } + + if (entry->flags & EXEC_OBJECT_PINNED) + flags |= entry->offset | PIN_OFFSET_FIXED; + else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) + flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + } + + ret = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags); + if (ret) + return ret; + + if ((entry->offset & PIN_OFFSET_MASK) != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + entry->flags |= __EXEC_OBJECT_HAS_PIN; + + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + ret = i915_vma_get_fence(vma); + if (ret) + return ret; + + if (i915_vma_pin_fence(vma)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + } + + return 0; } -static inline struct hlist_head *ht_head(struct intel_context *ctx, u32 handle) +static int eb_reserve(struct i915_execbuffer *eb) +{ + const unsigned count = eb->args->buffer_count; + struct i915_vma *vma; + unsigned i; + int pass; + int ret; + + /* Attempt to pin all of the buffers into the GTT. + * This is done in 3 phases: + * + * 1a. Unbind all objects that do not match the GTT constraints for + * the execbuffer (fenceable, mappable, alignment etc). + * 1b. Increment pin count for already bound objects. + * 2. Bind new objects. + * 3. Decrement pin count. + * + * This avoid unnecessary unbinding of later objects in order to make + * room for the earlier objects *unless* we need to defragment. + */ + if (list_empty(&eb->unbound)) + return 0; + + pass = -1; + do { + struct list_head last; + + i915_gem_retire_requests(eb->i915->dev); + + list_for_each_entry(vma, &eb->unbound, exec_link) { + ret = eb_reserve_vma(eb, vma, pass); + if (ret) + break; + } + if (ret != -ENOSPC || pass++ > 0) + return ret; + + /* Resort *all* the objects into priority order */ + INIT_LIST_HEAD(&eb->unbound); + INIT_LIST_HEAD(&last); + for (i = 0; i < count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + vma = to_ptr(struct i915_vma, entry->rsvd2); + + eb_unreserve_vma(vma, entry); + + if (entry->flags & EXEC_OBJECT_PINNED) + list_add(&vma->exec_link, &eb->unbound); + else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) + list_add_tail(&vma->exec_link, &eb->unbound); + else + list_add_tail(&vma->exec_link, &last); + } + list_splice_tail(&last, &eb->unbound); + + /* Too fragmented, unbind everything and retry */ + ret = i915_gem_evict_vm(eb->vm); + if (ret) + return ret; + } while (1); +} + +static inline struct hlist_head * +ht_head(const struct intel_context *ctx, u32 handle) { return &ctx->vma_ht[hash_32(handle, ctx->vma_ht_bits)]; } +static int eb_batch_index(const struct i915_execbuffer *eb) +{ + return eb->args->buffer_count - 1; +} + static int eb_lookup_vmas(struct i915_execbuffer *eb) { const int count = eb->args->buffer_count; struct i915_vma *vma; + struct idr *idr; int slow_pass = -1; - int i; + int i, ret; - INIT_LIST_HEAD(&eb->vmas); + INIT_LIST_HEAD(&eb->relocs); + INIT_LIST_HEAD(&eb->unbound); if (unlikely(eb->ctx->vma_ht_size & 1)) flush_work(&eb->ctx->vma_ht_resize); @@ -175,8 +441,9 @@ eb_lookup_vmas(struct i915_execbuffer *eb) if (vma->ctx_handle != eb->exec[i].handle) continue; - if (!eb_add_vma(eb, vma, i)) - return -EINVAL; + ret = eb_add_vma(eb, &eb->exec[i], vma); + if (unlikely(ret)) + return ret; goto next_vma; } @@ -187,24 +454,25 @@ next_vma: ; } if (slow_pass < 0) - return 0; + goto out; spin_lock(&eb->file->table_lock); /* Grab a reference to the object and release the lock so we can lookup * or create the VMA without using GFP_ATOMIC */ + idr = &eb->file->object_idr; for (i = slow_pass; i < count; i++) { struct drm_i915_gem_object *obj; if (eb->exec[i].rsvd2) continue; - obj = to_intel_bo(idr_find(&eb->file->object_idr, - eb->exec[i].handle)); + obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); if (unlikely(obj == NULL)) { spin_unlock(&eb->file->table_lock); DRM_DEBUG("Invalid object handle %d at index %d\n", eb->exec[i].handle, i); - return -ENOENT; + ret = -ENOENT; + goto err; } eb->exec[i].rsvd2 = 1 | (uintptr_t)obj; @@ -225,11 +493,12 @@ next_vma: ; * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - obj = to_ptr(struct drm_i915_gem_object, eb->exec[i].rsvd2 & ~1); + obj = to_ptr(typeof(*obj), eb->exec[i].rsvd2 & ~1); vma = i915_gem_obj_lookup_or_create_vma(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); - return PTR_ERR(vma); + ret = PTR_ERR(vma); + goto err; } /* First come, first served */ @@ -240,28 +509,24 @@ next_vma: ; ht_head(eb->ctx, eb->exec[i].handle)); eb->ctx->vma_ht_count++; if (vma->is_ggtt) { - BUG_ON(obj->vma_hashed); + GEM_BUG_ON(obj->vma_hashed); obj->vma_hashed = vma; } } - if (!eb_add_vma(eb, vma, i)) - return -EINVAL; + ret = eb_add_vma(eb, &eb->exec[i], vma); + if (unlikely(ret)) + goto err; } if (4*eb->ctx->vma_ht_count > 3*eb->ctx->vma_ht_size) { eb->ctx->vma_ht_size |= 1; queue_work(system_highpri_wq, &eb->ctx->vma_ht_resize); } - return 0; -} - -static struct i915_vma * -eb_get_batch(struct i915_execbuffer *eb) -{ - struct i915_vma *vma; - - vma = to_ptr(struct i915_vma, eb->exec[eb->args->buffer_count-1].rsvd2); +out: + /* take note of the batch buffer before we might reorder the lists */ + i = eb_batch_index(eb); + eb->batch_vma = to_ptr(struct i915_vma, eb->exec[i].rsvd2); /* * SNA is doing fancy tricks with compressing batch buffers, which leads @@ -272,14 +537,23 @@ eb_get_batch(struct i915_execbuffer *eb) * Note that actual hangs have only been observed on gen7, but for * paranoia do it everywhere. */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + if ((eb->exec[i].flags & EXEC_OBJECT_PINNED) == 0) + eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS; + if (eb->reloc_cache.has_fence) + eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; - return vma; + return eb_reserve(eb); + +err: + for (i = slow_pass; i < count; i++) { + if (eb->exec[i].rsvd2 & 1) + eb->exec[i].rsvd2 = 0; + } + return ret; } static struct i915_vma * -eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) +eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { if (eb->lut_mask < 0) { if (handle >= -eb->lut_mask) @@ -301,46 +575,51 @@ eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_destroy(struct i915_execbuffer *eb) +static void +eb_reset(const struct i915_execbuffer *eb) { - struct i915_vma *vma; + const unsigned count = eb->args->buffer_count; + unsigned i; - list_for_each_entry(vma, &eb->vmas, exec_list) { - if (vma->exec_entry == NULL) - continue; + for (i = 0; i < count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct i915_vma *vma = to_ptr(struct i915_vma, entry->rsvd2); + + if (entry->flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, entry); + entry->flags &= ~eb->invalid_flags; - __eb_unreserve_vma(vma, vma->exec_entry); vma->exec_entry = NULL; } if (eb->lut_mask >= 0) - kfree(eb->buckets); + memset(eb->buckets, 0, + (1<<eb->lut_mask)*sizeof(struct hlist_head)); } -static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) +static void eb_destroy(const struct i915_execbuffer *eb) { - return (DBG_USE_CPU_RELOC || - HAS_LLC(obj->base.dev) || - obj->base.write_domain == I915_GEM_DOMAIN_CPU || - obj->cache_level != I915_CACHE_NONE); -} + const unsigned count = eb->args->buffer_count; + unsigned i; -/* Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline uint64_t gen8_canonical_addr(uint64_t address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} + if (eb->lut_mask >= 0) + kfree(eb->buckets); -static inline uint64_t gen8_noncanonical_addr(uint64_t address) -{ - return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); + if (eb->exec == NULL) + return; + + for (i = 0; i < count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct i915_vma *vma = to_ptr(struct i915_vma, entry->rsvd2); + + if (vma == NULL || vma->exec_entry == NULL) + continue; + + GEM_BUG_ON(vma->exec_entry != entry); + if (entry->flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, entry); + vma->exec_entry = NULL; + } } static inline uint64_t @@ -355,7 +634,9 @@ static void reloc_cache_init(struct reloc_cache *cache, { cache->page = -1; cache->vaddr = 0; + cache->has_llc = HAS_LLC(i915); cache->use_64bit_reloc = INTEL_INFO(i915)->gen >= 8; + cache->has_fence = INTEL_INFO(i915)->gen < 4; } static inline void *unmask_page(unsigned long p) @@ -442,7 +723,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - if (use_cpu_reloc(obj)) + if (use_cpu_reloc(cache, obj)) return NULL; vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -542,10 +823,10 @@ repeat: return 0; } -static int -eb_relocate_entry(struct i915_vma *vma, - struct i915_execbuffer *eb, - struct drm_i915_gem_relocation_entry *reloc) +static uint64_t +eb_relocate_entry(struct i915_execbuffer *eb, + const struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc) { struct i915_vma *target; u64 target_offset; @@ -618,318 +899,127 @@ eb_relocate_entry(struct i915_vma *vma, return -EINVAL; } - /* We can't wait for rendering with pagefaults disabled */ - if (i915_gem_object_is_active(vma->obj) && pagefault_disabled()) - return -EFAULT; - - ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); - if (ret) - return ret; - - /* and update the user's relocation entry */ - reloc->presumed_offset = target_offset; - return 0; -} - -static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) -{ -#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) - struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; - struct drm_i915_gem_relocation_entry __user *user_relocs; - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int remain, ret = 0; - - user_relocs = to_user_ptr(entry->relocs_ptr); - - remain = entry->relocation_count; - while (remain) { - struct drm_i915_gem_relocation_entry *r = stack_reloc; - int count = remain; - if (count > ARRAY_SIZE(stack_reloc)) - count = ARRAY_SIZE(stack_reloc); - remain -= count; - - if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { - ret = -EFAULT; - goto out; - } - - do { - u64 offset = r->presumed_offset; - - ret = eb_relocate_entry(vma, eb, r); - if (ret) - goto out; - - if (r->presumed_offset != offset && - __copy_to_user_inatomic(&user_relocs->presumed_offset, - &r->presumed_offset, - sizeof(r->presumed_offset))) { - ret = -EFAULT; - goto out; - } - - user_relocs++; - r++; - } while (--count); - } - -out: - reloc_cache_reset(&eb->reloc_cache); - return ret; -#undef N_RELOC -} - -static int -eb_relocate_vma_slow(struct i915_vma *vma, - struct i915_execbuffer *eb, - struct drm_i915_gem_relocation_entry *relocs) -{ - const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int i, ret = 0; - - for (i = 0; i < entry->relocation_count; i++) { - ret = eb_relocate_entry(vma, eb, &relocs[i]); - if (ret) - break; - } - reloc_cache_reset(&eb->reloc_cache); - return ret; -} - -static int eb_relocate(struct i915_execbuffer *eb) -{ - struct i915_vma *vma; - int ret = 0; - - /* This is the fast path and we cannot handle a pagefault whilst - * holding the struct mutex lest the user pass in the relocations - * contained within a mmaped bo. For in such a case we, the page - * fault handler would call i915_gem_fault() and we would try to - * acquire the struct mutex again. Obviously this is bad and so - * lockdep complains vehemently. - */ - pagefault_disable(); - list_for_each_entry(vma, &eb->vmas, exec_list) { - ret = eb_relocate_vma(vma, eb); - if (ret) - break; - } - pagefault_enable(); - - return ret; -} - -static bool only_mappable_for_reloc(unsigned int flags) -{ - return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == - __EXEC_OBJECT_NEEDS_MAP; -} - -static int -eb_reserve_vma(struct i915_vma *vma, - struct intel_engine_cs *ring, - bool *need_reloc) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - uint64_t flags; - int ret; - - flags = PIN_USER; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) - flags |= PIN_GLOBAL; - - if (!drm_mm_node_allocated(&vma->node)) { - /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, - * limit address to the first 4GBs for unflagged objects. - */ - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) - flags |= PIN_ZONE_4G; - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) - flags |= PIN_GLOBAL | PIN_MAPPABLE; - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) - flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; - if (entry->flags & EXEC_OBJECT_PINNED) - flags |= entry->offset | PIN_OFFSET_FIXED; - if ((flags & PIN_MAPPABLE) == 0) - flags |= PIN_HIGH; - } - - ret = i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - flags); - if ((ret == -ENOSPC || ret == -E2BIG) && - only_mappable_for_reloc(entry->flags)) - ret = i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - flags & ~PIN_MAPPABLE); - if (ret) - return ret; - - entry->flags |= __EXEC_OBJECT_HAS_PIN; - - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_vma_get_fence(vma); - if (ret) - return ret; - - if (i915_vma_pin_fence(vma)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; - } - - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start; - *need_reloc = true; - } - - return 0; -} - -static bool -need_reloc_mappable(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - - if (entry->relocation_count == 0) - return false; - - if (!vma->is_ggtt) - return false; - - /* See also use_cpu_reloc() */ - if (HAS_LLC(vma->obj->base.dev)) - return false; - - if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) - return false; - - return true; -} - -static bool -eb_vma_misplaced(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - - WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); - - if (entry->alignment && - vma->node.start & (entry->alignment - 1)) - return true; - - if (vma->node.size < entry->pad_to_size) - return true; - - if (entry->flags & EXEC_OBJECT_PINNED && - vma->node.start != entry->offset) - return true; - - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && - vma->node.start < BATCH_OFFSET_BIAS) - return true; - - /* avoid costly ping-pong once a batch bo ended up non-mappable */ - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->map_and_fenceable) - return !only_mappable_for_reloc(entry->flags); + /* We can't wait for rendering with pagefaults disabled */ + if (i915_gem_object_is_active(vma->obj) && pagefault_disabled()) { + if (i915_gem_object_flush_active(vma->obj)) + return -EBUSY; + } - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && - (vma->node.start + vma->node.size - 1) >> 32) - return true; + ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); + if (ret) + return ret; - return false; + /* and update the user's relocation entry */ + return target_offset | 1; } -static int eb_reserve(struct i915_execbuffer *eb) +static int eb_relocate_vma(struct i915_execbuffer *eb, + const struct i915_vma *vma) { - const bool has_fenced_gpu_access = INTEL_INFO(eb->i915)->gen < 4; - struct i915_vma *vma; - struct list_head ordered_vmas; - struct list_head pinned_vmas; - int retry; - - INIT_LIST_HEAD(&ordered_vmas); - INIT_LIST_HEAD(&pinned_vmas); - while (!list_empty(&eb->vmas)) { - struct drm_i915_gem_exec_object2 *entry; - bool need_fence, need_mappable; - - vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); - entry = vma->exec_entry; - - if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) - entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - if (!has_fenced_gpu_access) - entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; - need_fence = - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - vma->obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) + struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; + struct drm_i915_gem_relocation_entry __user *user_relocs; + const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + int remain; - if (entry->flags & EXEC_OBJECT_PINNED) - list_move_tail(&vma->exec_list, &pinned_vmas); - else if (need_mappable) { - entry->flags |= __EXEC_OBJECT_NEEDS_MAP; - list_move(&vma->exec_list, &ordered_vmas); - } else - list_move_tail(&vma->exec_list, &ordered_vmas); - } - list_splice(&ordered_vmas, &eb->vmas); - list_splice(&pinned_vmas, &eb->vmas); + user_relocs = to_user_ptr(entry->relocs_ptr); + remain = entry->relocation_count; - /* Attempt to pin all of the buffers into the GTT. - * This is done in 3 phases: - * - * 1a. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 1b. Increment pin count for already bound objects. - * 2. Bind new objects. - * 3. Decrement pin count. - * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. + /* + * We must check that the entire relocation array is safe + * to read, but since we may need to update the presumed + * offsets during execution, check for full write access. */ - retry = 0; - do { - int ret = 0; + if (!access_ok(VERIFY_WRITE, user_relocs, remain*sizeof(*user_relocs))) + return -EFAULT; - /* Unbind any ill-fitting objects or pin. */ - list_for_each_entry(vma, &eb->vmas, exec_list) { - if (!drm_mm_node_allocated(&vma->node)) - continue; + do { + struct drm_i915_gem_relocation_entry *r = stack_reloc; + int count = min_t(int, remain, ARRAY_SIZE(stack_reloc)); - if (eb_vma_misplaced(vma)) - ret = i915_vma_unbind(vma); - else - ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); - if (ret) - goto err; + if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { + remain = -EFAULT; + goto out; } - /* Bind fresh objects */ - list_for_each_entry(vma, &eb->vmas, exec_list) { - if (drm_mm_node_allocated(&vma->node)) - continue; + remain -= count; + do { + uint64_t offset = eb_relocate_entry(eb, vma, r); + if (offset == 0) { + } else if ((int64_t)offset < 0) { + remain = (int64_t)offset; + goto out; + } else { + offset &= ~1; + if (__copy_to_user_inatomic(&user_relocs[r-stack_reloc].presumed_offset, + &offset, + sizeof(offset))) { + remain = -EFAULT; + goto out; + } + } + } while (r++, --count); + user_relocs += ARRAY_SIZE(stack_reloc); + } while (remain); +out: + reloc_cache_reset(&eb->reloc_cache); + return remain; +#undef N_RELOC +} - ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); - if (ret) - goto err; - } +static int +eb_relocate_vma_slow(struct i915_execbuffer *eb, + const struct i915_vma *vma) +{ + const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + struct drm_i915_gem_relocation_entry *relocs = + to_ptr(typeof(*relocs), entry->relocs_ptr); + int i, ret; + for (i = 0; i < entry->relocation_count; i++) { + uint64_t offset = eb_relocate_entry(eb, vma, &relocs[i]); + if ((int64_t)offset < 0) { + ret = (int64_t)offset; + goto err; + } + } + ret = 0; err: - if (ret != -ENOSPC || retry++) - return ret; + reloc_cache_reset(&eb->reloc_cache); + return ret; +} - /* Decrement pin count for bound objects */ - list_for_each_entry(vma, &eb->vmas, exec_list) - eb_unreserve_vma(vma); +static int eb_relocate(struct i915_execbuffer *eb) +{ + const struct i915_vma *vma; + int ret = 0; - ret = i915_gem_evict_vm(eb->vm, true); - if (ret) - return ret; - } while (1); + /* This is the fast path and we cannot handle a pagefault whilst + * holding the struct mutex lest the user pass in the relocations + * contained within a mmaped bo. For in such a case we, the page + * fault handler would call i915_gem_fault() and we would try to + * acquire the struct mutex again. Obviously this is bad and so + * lockdep complains vehemently. + */ + pagefault_disable(); + list_for_each_entry(vma, &eb->relocs, reloc_link) { +retry: + ret = eb_relocate_vma(eb, vma); + if (ret == 0) + continue; + + if (ret == -EBUSY) { + pagefault_enable(); + ret = i915_gem_object_wait_rendering(vma->obj, false); + pagefault_disable(); + if (ret == 0) + goto retry; + } + break; + } + pagefault_enable(); + + return ret; } static int eb_select_context(struct i915_execbuffer *eb) @@ -950,49 +1040,52 @@ static int eb_select_context(struct i915_execbuffer *eb) eb->ctx = ctx; eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->gtt.base; + eb->context_flags = 0; + if (ctx->flags & CONTEXT_NO_ZEROMAP) + eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; + return 0; } -static int -eb_relocate_slow(struct i915_execbuffer *eb) +static struct drm_i915_gem_relocation_entry * +eb_copy_relocations(const struct i915_execbuffer *eb) { + const unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); const unsigned count = eb->args->buffer_count; - struct drm_device *dev = eb->i915->dev; struct drm_i915_gem_relocation_entry *reloc; - struct i915_vma *vma; - int *reloc_offset; - int i, total, ret; - - /* We may process another execbuffer during the unlock... */ - eb_reset(eb); - mutex_unlock(&dev->struct_mutex); + unsigned total, i; total = 0; - for (i = 0; i < count; i++) - total += eb->exec[i].relocation_count; - - reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); - reloc = drm_malloc_ab(total, sizeof(*reloc)); - if (reloc == NULL || reloc_offset == NULL) { - drm_free_large(reloc); - drm_free_large(reloc_offset); - mutex_lock(&dev->struct_mutex); - return -ENOMEM; + for (i = 0; i < count; i++) { + unsigned nreloc = eb->exec[i].relocation_count; + + if (total > relocs_max - nreloc) + return ERR_PTR(-EINVAL); + + total += nreloc; } + if (total == 0) + return NULL; + + reloc = drm_malloc_gfp(total, sizeof(*reloc), GFP_TEMPORARY); + if (reloc == NULL) + return ERR_PTR(-ENOMEM); total = 0; for (i = 0; i < count; i++) { struct drm_i915_gem_relocation_entry __user *user_relocs; + unsigned nreloc = eb->exec[i].relocation_count, j; u64 invalid_offset = (u64)-1; - int j; + + if (nreloc == 0) + continue; user_relocs = to_user_ptr(eb->exec[i].relocs_ptr); if (copy_from_user(reloc+total, user_relocs, - eb->exec[i].relocation_count * sizeof(*reloc))) { - ret = -EFAULT; - mutex_lock(&dev->struct_mutex); - goto err; + nreloc * sizeof(*reloc))) { + drm_free_large(reloc); + return ERR_PTR(-EFAULT); } /* As we do not update the known relocation offsets after @@ -1004,18 +1097,40 @@ eb_relocate_slow(struct i915_execbuffer *eb) * happened we would make the mistake of assuming that the * relocations were valid. */ - for (j = 0; j < eb->exec[i].relocation_count; j++) { + for (j = 0; j < nreloc; j++) { if (__copy_to_user(&user_relocs[j].presumed_offset, &invalid_offset, sizeof(invalid_offset))) { - ret = -EFAULT; - mutex_lock(&dev->struct_mutex); - goto err; + drm_free_large(reloc); + return ERR_PTR(-EFAULT); } } - reloc_offset[i] = total; - total += eb->exec[i].relocation_count; + eb->exec[i].relocs_ptr = (uintptr_t)(reloc + total); + total += nreloc; + } + + return reloc; +} + +static int eb_relocate_slow(struct i915_execbuffer *eb) +{ + struct drm_device *dev = eb->i915->dev; + struct drm_i915_gem_relocation_entry *reloc = NULL; + const struct i915_vma *vma; + int ret; + +repeat: + /* We may process another execbuffer during the unlock... */ + eb_reset(eb); + mutex_unlock(&dev->struct_mutex); + + if (reloc == NULL) { + reloc = eb_copy_relocations(eb); + if (IS_ERR(reloc)) { + mutex_lock(&dev->struct_mutex); + return PTR_ERR(reloc); + } } ret = i915_mutex_lock_interruptible(dev); @@ -1033,13 +1148,8 @@ eb_relocate_slow(struct i915_execbuffer *eb) if (ret) goto err; - ret = eb_reserve(eb); - if (ret) - goto err; - - list_for_each_entry(vma, &eb->vmas, exec_list) { - int offset = vma->exec_entry - eb->exec; - ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[offset]); + list_for_each_entry(vma, &eb->relocs, reloc_link) { + ret = eb_relocate_vma_slow(eb, vma); if (ret) goto err; } @@ -1051,8 +1161,12 @@ eb_relocate_slow(struct i915_execbuffer *eb) */ err: + if (ret == -EAGAIN) { + cond_resched(); + goto repeat; + } drm_free_large(reloc); - drm_free_large(reloc_offset); + return ret; } @@ -1060,27 +1174,38 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) { const unsigned other_rings = (~intel_engine_flag(eb->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT; - struct i915_vma *vma; - uint32_t flush_domains = 0; + const unsigned count = eb->args->buffer_count; bool flush_chipset = false; + unsigned i; int ret; - list_for_each_entry(vma, &eb->vmas, exec_list) { + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct i915_vma *vma = to_ptr(struct i915_vma, entry->rsvd2); struct drm_i915_gem_object *obj = vma->obj; if (obj->flags & other_rings) { - ret = i915_gem_object_sync(obj, - eb->request, - vma->exec_entry->flags & EXEC_OBJECT_WRITE); + ret = i915_gem_object_sync(obj, eb->request, + entry->flags & EXEC_OBJECT_WRITE); if (ret) return ret; } - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); + if (!i915_gem_object_is_active(obj)) { + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + flush_chipset |= i915_gem_clflush_object(obj, false); + + obj->base.write_domain = 0; + if (entry->flags & EXEC_OBJECT_WRITE) + obj->base.read_domains = 0; + obj->base.read_domains |= I915_GEM_GPU_DOMAINS; + } - flush_domains |= obj->base.write_domain; + i915_vma_move_to_active(vma, eb->request, entry->flags); + __eb_unreserve_vma(vma, entry); + vma->exec_entry = NULL; } + eb->exec = NULL; if (flush_chipset) i915_gem_chipset_flush(eb->i915->dev); @@ -1115,79 +1240,6 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return true; } -static int -validate_exec_list(struct drm_device *dev, - struct drm_i915_gem_exec_object2 *exec, - int count) -{ - unsigned relocs_total = 0; - unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); - unsigned invalid_flags; - int i; - - invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; - if (USES_FULL_PPGTT(dev)) - invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - - for (i = 0; i < count; i++) { - char __user *ptr = to_user_ptr(exec[i].relocs_ptr); - int length; /* limited by fault_in_pages_readable() */ - - if (exec[i].flags & invalid_flags) - return -EINVAL; - - /* Offset can be used as input (EXEC_OBJECT_PINNED), reject - * any non-page-aligned or non-canonical addresses. - */ - if (exec[i].flags & EXEC_OBJECT_PINNED) { - if (exec[i].offset != - gen8_canonical_addr(exec[i].offset & PAGE_MASK)) - return -EINVAL; - - /* From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - exec[i].offset = gen8_noncanonical_addr(exec[i].offset); - } - - if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) - return -EINVAL; - - /* pad_to_size was once a reserved field, so sanitize it */ - if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { - if (offset_in_page(exec[i].pad_to_size)) - return -EINVAL; - } else - exec[i].pad_to_size = 0; - - /* First check for malicious input causing overflow in - * the worst case where we need to allocate the entire - * relocation tree as a single array. - */ - if (exec[i].relocation_count > relocs_max - relocs_total) - return -EINVAL; - relocs_total += exec[i].relocation_count; - - length = exec[i].relocation_count * - sizeof(struct drm_i915_gem_relocation_entry); - /* - * We must check that the entire relocation array is safe - * to read, but since we may need to update the presumed - * offsets during execution, check for full write access. - */ - if (!access_ok(VERIFY_WRITE, ptr, length)) - return -EFAULT; - - if (likely(!i915.prefault_disable)) { - if (fault_in_multipages_readable(ptr, length)) - return -EFAULT; - } - } - - return 0; -} - void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned flags) @@ -1224,26 +1276,6 @@ void i915_vma_move_to_active(struct i915_vma *vma, list_move_tail(&vma->vm_link, &vma->vm->active_list); } -static void -eb_move_to_active(struct i915_execbuffer *eb) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &eb->vmas, exec_list) { - struct drm_i915_gem_object *obj = vma->obj; - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; - - obj->base.write_domain = 0; - if (vma->exec_entry->flags & EXEC_OBJECT_WRITE) - obj->base.read_domains = 0; - obj->base.read_domains |= I915_GEM_GPU_DOMAINS; - - i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); - trace_i915_gem_object_change_domain(obj, old_read, old_write); - } -} - static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { @@ -1255,25 +1287,22 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); + ret = intel_ring_begin(req, 4 * 2 + 2); if (ret) return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(4)); for (i = 0; i < 4; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); intel_ring_emit(ring, 0); } - + intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); return 0; } -static struct i915_vma * -eb_parse(struct i915_execbuffer *eb, - struct drm_i915_gem_exec_object2 *shadow_exec_entry, - bool is_master) +static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; @@ -1304,11 +1333,11 @@ eb_parse(struct i915_execbuffer *eb, goto err; } - memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - - vma->exec_entry = shadow_exec_entry; + vma->exec_entry = + memset(&eb->exec[eb->args->buffer_count++], + 0, sizeof(*vma->exec_entry)); vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; - list_add_tail(&vma->exec_list, &eb->vmas); + vma->exec_entry->rsvd2 = (uintptr_t)vma; err: i915_gem_object_unpin_pages(shadow_batch_obj); @@ -1324,70 +1353,81 @@ add_to_client(struct drm_i915_gem_request *req, } static int -execbuf_submit(struct i915_execbuffer *eb) +eb_set_constants_offset(struct i915_execbuffer *eb) { - struct intel_ring *ring = eb->request->ring; struct drm_i915_private *dev_priv = eb->i915; - int instp_mode; - u32 instp_mask; + struct intel_ring *ring; + u32 mode, mask; int ret; - ret = eb_move_to_gpu(eb); - if (ret) - return ret; - - ret = i915_switch_context(eb->request); - if (ret) - return ret; - - instp_mode = eb->args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { + mode = eb->args->flags & I915_EXEC_CONSTANTS_MASK; + switch (mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && eb->engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { - DRM_DEBUG("no rel constants on pre-gen4\n"); - return -EINVAL; - } - - if (INTEL_INFO(dev_priv)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } break; default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + DRM_DEBUG("execbuf with unknown constants: %d\n", mode); return -EINVAL; } - if (eb->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(eb->request, 4); - if (ret) - return ret; + if (mode == dev_priv->relative_constants_mode) + return 0; + + if (eb->engine->id != RCS) { + DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); + return -EINVAL; + } - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); + if (INTEL_INFO(dev_priv)->gen < 4) { + DRM_DEBUG("no rel constants on pre-gen4\n"); + return -EINVAL; + } - dev_priv->relative_constants_mode = instp_mode; + if (INTEL_INFO(dev_priv)->gen > 5 && + mode == I915_EXEC_CONSTANTS_REL_SURFACE) { + DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); + return -EINVAL; } + /* The HW changed the meaning on this bit on gen6 */ + mask = I915_EXEC_CONSTANTS_MASK; + if (INTEL_INFO(dev_priv)->gen >= 6) + mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + + ret = intel_ring_begin(eb->request, 4); + if (ret) + return ret; + + ring = eb->request->ring; + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, INSTPM); + intel_ring_emit(ring, mask << 16 | mode); + intel_ring_advance(ring); + + dev_priv->relative_constants_mode = mode; + + return 0; +} + +static int +execbuf_submit(struct i915_execbuffer *eb) +{ + int ret; + + ret = eb_move_to_gpu(eb); + if (ret) + return ret; + + ret = i915_switch_context(eb->request); + if (ret) + return ret; + + ret = eb_set_constants_offset(eb); + if (ret) + return ret; + if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(eb->request); if (ret) @@ -1396,15 +1436,13 @@ execbuf_submit(struct i915_execbuffer *eb) ret = eb->engine->emit_bb_start(eb->request, eb->batch_vma->node.start + - eb->batch_start_offset, + eb->args->batch_start_offset, eb->args->batch_len, eb->dispatch_flags); if (ret) return ret; trace_i915_gem_ring_dispatch(eb->request, eb->dispatch_flags); - - eb_move_to_active(eb); add_to_client(eb->request, eb->file); return 0; @@ -1448,16 +1486,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec) { struct i915_execbuffer eb; - struct drm_i915_gem_exec_object2 shadow_exec_entry; int ret; if (!i915_gem_check_execbuffer(args)) return -EINVAL; - ret = validate_exec_list(dev, exec, args->buffer_count); - if (ret) - return ret; - eb.dispatch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (!file->is_master || !capable(CAP_SYS_ADMIN)) @@ -1485,7 +1518,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.file = file; eb.args = args; eb.exec = exec; - eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; + if ((args->flags & I915_EXEC_NO_RELOC) == 0) + args->flags |= __EXEC_HAS_RELOC; + eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + if (USES_FULL_PPGTT(eb.i915)) + eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT; reloc_cache_init(&eb.reloc_cache, eb.i915); if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) @@ -1561,22 +1598,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (ret) goto err; - /* take note of the batch buffer before we might reorder the lists */ - eb.batch_vma = eb_get_batch(&eb); - - /* Move the objects en-masse into the GTT, evicting if necessary. */ - ret = eb_reserve(&eb); - if (ret) - goto err; - /* The objects are in their final locations, apply the relocations. */ - if (eb.need_relocs) + if (args->flags & __EXEC_HAS_RELOC) { ret = eb_relocate(&eb); - if (ret) { - if (ret == -EFAULT) { + if (ret == -EAGAIN || ret == -EFAULT) ret = eb_relocate_slow(&eb); - BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - } if (ret) goto err; } @@ -1588,11 +1614,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err; } - eb.batch_start_offset = args->batch_start_offset; if (intel_engine_needs_cmd_parser(eb.engine) && args->batch_len) { struct i915_vma *vma; - vma = eb_parse(&eb, &shadow_exec_entry, file->is_master); + vma = eb_parse(&eb, file->is_master); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -1609,7 +1634,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, * command parser has accepted. */ eb.dispatch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; + eb.args->batch_start_offset = 0; eb.batch_vma = vma; } } @@ -1700,7 +1725,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* Copy in the exec list from userland */ exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); - exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); + exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count + 1); if (exec_list == NULL || exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); @@ -1743,24 +1768,22 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, i915_execbuffer2_set_context_id(exec2, 0); ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); - if (!ret) { + if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = to_user_ptr(args->buffers_ptr); /* Copy the new buffer offsets back to the user's exec list. */ for (i = 0; i < args->buffer_count; i++) { + if ((exec2_list[i].offset & UPDATE) == 0) + continue; + exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset); - ret = __copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset)); - if (ret) { - ret = -EFAULT; - DRM_DEBUG("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + exec2_list[i].offset &= PIN_OFFSET_MASK; + if (__copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset))) break; - } } } @@ -1789,43 +1812,38 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, } exec2_list = drm_malloc_gfp(sizeof(*exec2_list), - args->buffer_count, + args->buffer_count + 1, GFP_TEMPORARY); if (exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", args->buffer_count); return -ENOMEM; } - ret = copy_from_user(exec2_list, - to_user_ptr(args->buffers_ptr), - sizeof(*exec2_list) * args->buffer_count); - if (ret != 0) { - DRM_DEBUG("copy %d exec entries failed %d\n", - args->buffer_count, ret); + if (copy_from_user(exec2_list, + to_user_ptr(args->buffers_ptr), + sizeof(*exec2_list) * args->buffer_count)) { + DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count); drm_free_large(exec2_list); return -EFAULT; } ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); - if (!ret) { + if (args->flags & __EXEC_HAS_RELOC) { /* Copy the new buffer offsets back to the user's exec list. */ struct drm_i915_gem_exec_object2 __user *user_exec_list = - to_user_ptr(args->buffers_ptr); + to_user_ptr(args->buffers_ptr); int i; for (i = 0; i < args->buffer_count; i++) { + if ((exec2_list[i].offset & UPDATE) == 0) + continue; + exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset); - ret = __copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset)); - if (ret) { - ret = -EFAULT; - DRM_DEBUG("failed to copy %d exec entries " - "back to user\n", - args->buffer_count); + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + if (__copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset))) break; - } } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cb3a6e272e22..a9b547e4ea6f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3594,7 +3594,7 @@ void *i915_vma_iomap(struct drm_i915_private *dev_priv, int ret; /* Too many areas already allocated? */ - ret = i915_gem_evict_vm(vma->vm, true); + ret = i915_gem_evict_vm(vma->vm); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3080033b722c..6996d79175a0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -244,7 +244,9 @@ struct i915_vma { struct hlist_node obj_node; /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; + struct list_head exec_link; + struct list_head reloc_link; + struct list_head evict_link; /** * Used for performing relocations during execbuffer insertion.

[172/190] drm/i915: Eliminate lots of iterations over the execobjects array

Commit Message

Patch