[05/18] drm/i915: Move GEM activity tracking into a common struct reservation_object

Message ID	20160914065250.15482-6-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Wed, 14 Sep 2016 07:52:37 +0100 Message-Id: <20160914065250.15482-6-chris@chris-wilson.co.uk> In-Reply-To: <20160914065250.15482-1-chris@chris-wilson.co.uk> References: <20160914065250.15482-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 05/18] drm/i915: Move GEM activity tracking into a common struct reservation_object Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 64702cc68e3a..c4e7532c5b6a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -134,15 +134,13 @@ static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct intel_engine_cs *engine; struct i915_vma *vma; unsigned int frontbuffer_bits; int pin_count = 0; - enum intel_engine_id id; lockdep_assert_held(&obj->base.dev->struct_mutex); - seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ", + seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s", &obj->base, get_active_flag(obj), get_pin_flag(obj), @@ -151,14 +149,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) get_pin_mapped_flag(obj), obj->base.size / 1024, obj->base.read_domains, - obj->base.write_domain); - for_each_engine_id(engine, dev_priv, id) - seq_printf(m, "%x ", - i915_gem_active_get_seqno(&obj->last_read[id], - &obj->base.dev->struct_mutex)); - seq_printf(m, "] %x %s%s%s", - i915_gem_active_get_seqno(&obj->last_write, - &obj->base.dev->struct_mutex), + obj->base.write_domain, i915_cache_level_str(dev_priv, obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -198,11 +189,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (%s mappable)", s); } - engine = i915_gem_active_get_engine(&obj->last_write, - &dev_priv->drm.struct_mutex); - if (engine) - seq_printf(m, " (%s)", engine->name); - frontbuffer_bits = atomic_read(&obj->frontbuffer_bits); if (frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index af0212032b64..2bcab3087e8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -41,6 +41,7 @@ #include <linux/intel-iommu.h> #include <linux/kref.h> #include <linux/pm_qos.h> +#include <linux/reservation.h> #include <linux/shmem_fs.h> #include <drm/drmP.h> @@ -2198,15 +2199,6 @@ struct drm_i915_gem_object { struct list_head batch_pool_link; unsigned long flags; - /** - * This is set if the object is on the active lists (has pending - * rendering and so a non-zero seqno), and is not set if it i s on - * inactive (ready to be unbound) list. - */ -#define I915_BO_ACTIVE_SHIFT 0 -#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) -#define __I915_BO_ACTIVE(bo) \ - ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * This is set if the object has been written to since last bound @@ -2245,6 +2237,7 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ unsigned int bind_count; + unsigned int active_count; unsigned int pin_display; struct sg_table *pages; @@ -2265,8 +2258,7 @@ struct drm_i915_gem_object { * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. */ - struct i915_gem_active last_read[I915_NUM_ENGINES]; - struct i915_gem_active last_write; + struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; @@ -2289,6 +2281,8 @@ struct drm_i915_gem_object { struct work_struct *work; } userptr; }; + + struct reservation_object __builtin_resv; }; static inline struct drm_i915_gem_object * @@ -2347,35 +2341,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; } -static inline unsigned long -i915_gem_object_get_active(const struct drm_i915_gem_object *obj) -{ - return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; -} - static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { - return i915_gem_object_get_active(obj); -} - -static inline void -i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) -{ - obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT); -} - -static inline void -i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine) -{ - obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT); -} - -static inline bool -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, - int engine) -{ - return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); + return obj->active_count; } static inline unsigned int diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9214c9d31d2..ab8d10388581 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,7 +29,6 @@ #include <drm/drm_vma_manager.h> #include <drm/i915_drm.h> #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" @@ -441,11 +440,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, long timeout, struct intel_rps_client *rps) { - struct reservation_object *resv; - struct i915_gem_active *active; - unsigned long active_mask; - int idx; - might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) GEM_BUG_ON(!!lockdep_is_held(&obj->base.dev->struct_mutex) != @@ -453,33 +447,9 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, #endif GEM_BUG_ON(timeout < 0); - if (flags & I915_WAIT_ALL) { - active = obj->last_read; - active_mask = i915_gem_object_get_active(obj); - } else { - active_mask = 1; - active = &obj->last_write; - } - - for_each_active(active_mask, idx) { - struct drm_i915_gem_request *request; - - request = i915_gem_active_get_unlocked(&active[idx]); - if (request) { - timeout = i915_gem_object_wait_fence(&request->fence, - flags, timeout, - rps); - i915_gem_request_put(request); - } - if (timeout < 0) - return timeout; - } - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) - timeout = i915_gem_object_wait_reservation(resv, - flags, timeout, - rps); + timeout = i915_gem_object_wait_reservation(obj->resv, + flags, timeout, + rps); return timeout < 0 ? timeout : timeout > 0 ? 0 : -ETIME; } @@ -2586,41 +2556,6 @@ err: return ERR_PTR(ret); } -static void -i915_gem_object_retire__write(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - struct drm_i915_gem_object *obj = - container_of(active, struct drm_i915_gem_object, last_write); - - intel_fb_obj_flush(obj, true, ORIGIN_CS); -} - -static void -i915_gem_object_retire__read(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - int idx = request->engine->id; - struct drm_i915_gem_object *obj = - container_of(active, struct drm_i915_gem_object, last_read[idx]); - - GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); - - i915_gem_object_clear_active(obj, idx); - if (i915_gem_object_is_active(obj)) - return; - - /* Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (obj->bind_count) - list_move_tail(&obj->global_list, - &request->i915->mm.bound_list); - - i915_gem_object_put(obj); -} - static bool i915_context_is_banned(const struct i915_gem_context *ctx) { unsigned long elapsed; @@ -2923,6 +2858,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (!obj) return -ENOENT; + if (reservation_object_test_signaled_rcu(obj->resv, true)) { + ret = 0; + goto out; + } + + if (!args->timeout_ns) { + ret = -ETIME; + goto out; + } + start = ktime_get(); ret = i915_gem_object_wait(obj, @@ -2936,8 +2881,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns = 0; } +out: i915_gem_object_put_unlocked(obj); - return ret; } @@ -3956,173 +3901,18 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return vma; } -static __always_inline unsigned int __busy_read_flag(unsigned int id) -{ - /* Note that we could alias engines in the execbuf API, but - * that would be very unwise as it prevents userspace from - * fine control over engine selection. Ahem. - * - * This should be something like EXEC_MAX_ENGINE instead of - * I915_NUM_ENGINES. - */ - BUILD_BUG_ON(I915_NUM_ENGINES > 16); - return 0x10000 << id; -} - -static __always_inline unsigned int __busy_write_id(unsigned int id) -{ - /* The uABI guarantees an active writer is also amongst the read - * engines. This would be true if we accessed the activity tracking - * under the lock, but as we perform the lookup of the object and - * its activity locklessly we can not guarantee that the last_write - * being active implies that we have set the same engine flag from - * last_read - hence we always set both read and write busy for - * last_write. - */ - return id | __busy_read_flag(id); -} - -static __always_inline unsigned int -__busy_set_if_active(const struct i915_gem_active *active, - unsigned int (*flag)(unsigned int id)) -{ - struct drm_i915_gem_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return 0; - - /* This is racy. See __i915_gem_active_get_rcu() for an in detail - * discussion of how to handle the race correctly, but for reporting - * the busy state we err on the side of potentially reporting the - * wrong engine as being busy (but we guarantee that the result - * is at least self-consistent). - * - * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated - * whilst we are inspecting it, even under the RCU read lock as we are. - * This means that there is a small window for the engine and/or the - * seqno to have been overwritten. The seqno will always be in the - * future compared to the intended, and so we know that if that - * seqno is idle (on whatever engine) our request is idle and the - * return 0 above is correct. - * - * The issue is that if the engine is switched, it is just as likely - * to report that it is busy (but since the switch happened, we know - * the request should be idle). So there is a small chance that a busy - * result is actually the wrong engine. - * - * So why don't we care? - * - * For starters, the busy ioctl is a heuristic that is by definition - * racy. Even with perfect serialisation in the driver, the hardware - * state is constantly advancing - the state we report to the user - * is stale. - * - * The critical information for the busy-ioctl is whether the object - * is idle as userspace relies on that to detect whether its next - * access will stall, or if it has missed submitting commands to - * the hardware allowing the GPU to stall. We never generate a - * false-positive for idleness, thus busy-ioctl is reliable at the - * most fundamental level, and we maintain the guarantee that a - * busy object left to itself will eventually become idle (and stay - * idle!). - * - * We allow ourselves the leeway of potentially misreporting the busy - * state because that is an optimisation heuristic that is constantly - * in flux. Being quickly able to detect the busy/idle state is much - * more important than accurate logging of exactly which engines were - * busy. - * - * For accuracy in reporting the engine, we could use - * - * result = 0; - * request = __i915_gem_active_get_rcu(active); - * if (request) { - * if (!i915_gem_request_completed(request)) - * result = flag(request->engine->exec_id); - * i915_gem_request_put(request); - * } - * - * but that still remains susceptible to both hardware and userspace - * races. So we accept making the result of that race slightly worse, - * given the rarity of the race and its low impact on the result. - */ - return flag(READ_ONCE(request->engine->exec_id)); -} - -static __always_inline unsigned int -busy_check_reader(const struct i915_gem_active *active) -{ - return __busy_set_if_active(active, __busy_read_flag); -} - -static __always_inline unsigned int -busy_check_writer(const struct i915_gem_active *active) -{ - return __busy_set_if_active(active, __busy_write_id); -} - int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - unsigned long active; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; - args->busy = 0; - active = __I915_BO_ACTIVE(obj); - if (active) { - int idx; - - /* Yes, the lookups are intentionally racy. - * - * First, we cannot simply rely on __I915_BO_ACTIVE. We have - * to regard the value as stale and as our ABI guarantees - * forward progress, we confirm the status of each active - * request with the hardware. - * - * Even though we guard the pointer lookup by RCU, that only - * guarantees that the pointer and its contents remain - * dereferencable and does *not* mean that the request we - * have is the same as the one being tracked by the object. - * - * Consider that we lookup the request just as it is being - * retired and freed. We take a local copy of the pointer, - * but before we add its engine into the busy set, the other - * thread reallocates it and assigns it to a task on another - * engine with a fresh and incomplete seqno. Guarding against - * that requires careful serialisation and reference counting, - * i.e. using __i915_gem_active_get_request_rcu(). We don't, - * instead we expect that if the result is busy, which engines - * are busy is not completely reliable - we only guarantee - * that the object was busy. - */ - rcu_read_lock(); - - for_each_active(active, idx) - args->busy |= busy_check_reader(&obj->last_read[idx]); - - /* For ABI sanity, we only care that the write engine is in - * the set of read engines. This should be ensured by the - * ordering of setting last_read/last_write in - * i915_vma_move_to_active(), and then in reverse in retire. - * However, for good measure, we always report the last_write - * request as a busy read as well as being a busy write. - * - * We don't care that the set of active read/write engines - * may change during construction of the result, as it is - * equally liable to change before userspace can inspect - * the result. - */ - args->busy |= busy_check_writer(&obj->last_write); - - rcu_read_unlock(); - } + args->busy = !reservation_object_test_signaled_rcu(obj->resv, true); i915_gem_object_put_unlocked(obj); return 0; @@ -4189,20 +3979,16 @@ unlock: void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops) { - int i; - INIT_LIST_HEAD(&obj->global_list); - for (i = 0; i < I915_NUM_ENGINES; i++) - init_request_active(&obj->last_read[i], - i915_gem_object_retire__read); - init_request_active(&obj->last_write, - i915_gem_object_retire__write); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; + reservation_object_init(&obj->__builtin_resv); + obj->resv = &obj->__builtin_resv; + obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->madv = I915_MADV_WILLNEED; @@ -4349,6 +4135,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->ops->release) obj->ops->release(obj); + reservation_object_fini(&obj->__builtin_resv); + drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev_priv, obj->base.size); diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index ed989596d9a3..bbfea965c593 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -114,8 +114,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list_for_each_entry_safe(tmp, next, list, batch_pool_link) { /* The batches are strictly LRU ordered */ - if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id], - &tmp->base.dev->struct_mutex)) + if (i915_gem_object_is_active(tmp)) break; /* While we're looping, do some clean up */ diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 10265bb35604..b0a429aeafd9 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -222,49 +222,6 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; -static void export_fences(struct drm_i915_gem_object *obj, - struct dma_buf *dma_buf) -{ - struct reservation_object *resv = dma_buf->resv; - struct drm_i915_gem_request *req; - unsigned long active; - int idx; - - active = __I915_BO_ACTIVE(obj); - if (!active) - return; - - /* Serialise with execbuf to prevent concurrent fence-loops */ - mutex_lock(&obj->base.dev->struct_mutex); - - /* Mark the object for future fences before racily adding old fences */ - obj->base.dma_buf = dma_buf; - - ww_mutex_lock(&resv->lock, NULL); - - for_each_active(active, idx) { - req = i915_gem_active_get(&obj->last_read[idx], - &obj->base.dev->struct_mutex); - if (!req) - continue; - - if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); - - i915_gem_request_put(req); - } - - req = i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req) { - reservation_object_add_excl_fence(resv, &req->fence); - i915_gem_request_put(req); - } - - ww_mutex_unlock(&resv->lock); - mutex_unlock(&obj->base.dev->struct_mutex); -} - struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { @@ -276,6 +233,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, exp_info.size = gem_obj->size; exp_info.flags = flags; exp_info.priv = gem_obj; + exp_info.resv = obj->resv; if (obj->ops->dmabuf_export) { int ret = obj->ops->dmabuf_export(obj); @@ -287,7 +245,6 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, if (IS_ERR(dma_buf)) return dma_buf; - export_fences(obj, dma_buf); return dma_buf; } @@ -350,6 +307,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, dma_buf->size); i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; + obj->resv = dma_buf->resv; /* We use GTT as shorthand for a coherent domain, one that is * neither in the GPU cache nor in the CPU cache, where all diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/i915_gem_dmabuf.h deleted file mode 100644 index 91315557e421..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _I915_GEM_DMABUF_H_ -#define _I915_GEM_DMABUF_H_ - -#include <linux/dma-buf.h> - -static inline struct reservation_object * -i915_gem_object_get_dmabuf_resv(struct drm_i915_gem_object *obj) -{ - struct dma_buf *dma_buf; - - if (obj->base.dma_buf) - dma_buf = obj->base.dma_buf; - else if (obj->base.import_attach) - dma_buf = obj->base.import_attach->dmabuf; - else - return NULL; - - return dma_buf->resv; -} - -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 33c85227643d..6b5175ee824c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,7 +34,6 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -552,20 +551,6 @@ repeat: return 0; } -static bool object_is_idle(struct drm_i915_gem_object *obj) -{ - unsigned long active = i915_gem_object_get_active(obj); - int idx; - - for_each_active(active, idx) { - if (!i915_gem_active_is_idle(&obj->last_read[idx], - &obj->base.dev->struct_mutex)) - return false; - } - - return true; -} - static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, @@ -650,7 +635,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } /* We can't wait for rendering with pagefaults disabled */ - if (pagefault_disabled() && !object_is_idle(obj)) + if (pagefault_disabled() && + !reservation_object_test_signaled_rcu(obj->resv, true)) return -EFAULT; ret = relocate_entry(obj, reloc, cache, target_offset); @@ -1111,44 +1097,20 @@ err: return ret; } -static unsigned int eb_other_engines(struct drm_i915_gem_request *req) -{ - unsigned int mask; - - mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; - mask <<= I915_BO_ACTIVE_SHIFT; - - return mask; -} - static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; int ret; list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - struct reservation_object *resv; - - if (obj->flags & other_rings) { - ret = i915_gem_request_await_object - (req, obj, obj->base.pending_write_domain); - if (ret) - return ret; - } - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - ret = i915_sw_fence_await_reservation - (&req->submit, resv, &i915_fence_ops, - obj->base.pending_write_domain, 10*HZ, - GFP_KERNEL | __GFP_NOWARN); - if (ret < 0) - return ret; - } + ret = i915_gem_request_await_object + (req, obj, obj->base.pending_write_domain); + if (ret) + return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj, false); @@ -1290,8 +1252,6 @@ void i915_vma_move_to_active(struct i915_vma *vma, GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - obj->dirty = 1; /* be paranoid */ - /* Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is * vital here: mark_active adds to the start of the callback list, @@ -1299,13 +1259,14 @@ void i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_object_is_active(obj)) + if (!i915_vma_is_active(vma) && !obj->active_count++) i915_gem_object_get(obj); - i915_gem_object_set_active(obj, idx); - i915_gem_active_set(&obj->last_read[idx], req); + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); + list_move_tail(&vma->vm_link, &vma->vm->active_list); if (flags & EXEC_OBJECT_WRITE) { - i915_gem_active_set(&obj->last_write, req); + i915_gem_active_set(&vma->last_write, req); intel_fb_obj_invalidate(obj, ORIGIN_CS); @@ -1315,21 +1276,13 @@ void i915_vma_move_to_active(struct i915_vma *vma, if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_gem_active_set(&vma->last_fence, req); - - i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], req); - list_move_tail(&vma->vm_link, &vma->vm->active_list); } static void eb_export_fence(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *req, unsigned int flags) { - struct reservation_object *resv; - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (!resv) - return; + struct reservation_object *resv = obj->resv; /* Ignore errors from failing to allocate the new fence, we can't * handle an error right now. Worst case should be missed diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0bb4232f66bc..92b36ab79771 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -31,6 +31,7 @@ #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) @@ -3303,6 +3304,7 @@ i915_vma_retire(struct i915_gem_active *active, const unsigned int idx = rq->engine->id; struct i915_vma *vma = container_of(active, struct i915_vma, last_read[idx]); + struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); @@ -3313,6 +3315,31 @@ i915_vma_retire(struct i915_gem_active *active, list_move_tail(&vma->vm_link, &vma->vm->inactive_list); if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) WARN_ON(i915_vma_unbind(vma)); + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + if (--obj->active_count) + return; + + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + if (obj->bind_count) + list_move_tail(&obj->global_list, &rq->i915->mm.bound_list); + + obj->dirty = 1; /* be paranoid */ + + i915_gem_object_put(obj); +} + +static void +i915_ggtt_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + struct i915_vma *vma = + container_of(active, struct i915_vma, last_write); + + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); } void i915_vma_destroy(struct i915_vma *vma) @@ -3356,6 +3383,8 @@ __i915_vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->exec_list); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_write, + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); init_request_active(&vma->last_fence, NULL); list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index ec78be2f8c77..21f5d9657271 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -211,6 +211,7 @@ struct i915_vma { unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; struct i915_gem_active last_fence; /** diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 687537d91be8..bc27c80be1e4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -193,6 +193,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) } i915_gem_context_put(request->ctx); + + fence_signal(&request->fence); i915_gem_request_put(request); } @@ -538,33 +540,41 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, bool write) { - struct i915_gem_active *active; - unsigned long active_mask; - int idx; + struct fence *excl; + int ret = 0; if (write) { - active_mask = i915_gem_object_get_active(obj); - active = obj->last_read; + struct fence **shared; + unsigned int count, i; + + ret = reservation_object_get_fences_rcu(obj->resv, + &excl, &count, &shared); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + ret = i915_gem_request_await_fence(to, shared[i]); + if (ret) + break; + + fence_put(shared[i]); + } + + for (; i < count; i++) + fence_put(shared[i]); + kfree(shared); } else { - active_mask = 1; - active = &obj->last_write; + excl = reservation_object_get_excl_rcu(obj->resv); } - for_each_active(active_mask, idx) { - struct drm_i915_gem_request *request; - int ret; - - request = i915_gem_active_peek(&active[idx], - &obj->base.dev->struct_mutex); - if (!request) - continue; + if (excl) { + if (ret == 0) + ret = i915_gem_request_await_fence(to, excl); - ret = i915_gem_request_await_request(to, request); - if (ret) - return ret; + fence_put(excl); } - return 0; + return ret; } static void i915_gem_mark_busy(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 45998eedda2c..fe75026c60e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -554,22 +554,7 @@ i915_gem_active_isset(const struct i915_gem_active *active) } /** - * i915_gem_active_is_idle - report whether the active tracker is idle - * @active - the active tracker - * - * i915_gem_active_is_idle() returns true if the active tracker is currently - * unassigned or if the request is complete (but not yet retired). Requires - * the caller to hold struct_mutex (but that can be relaxed if desired). - */ -static inline bool -i915_gem_active_is_idle(const struct i915_gem_active *active, - struct mutex *mutex) -{ - return !i915_gem_active_peek(active, mutex); -} - -/** - * i915_gem_active_wait- waits until the request is completed + * i915_gem_active_wait - waits until the request is completed * @active - the active request on which to wait * @flags - how to wait * @timeout - how long to wait at most @@ -639,24 +624,6 @@ i915_gem_active_retire(struct i915_gem_active *active, return 0; } -/* Convenience functions for peeking at state inside active's request whilst - * guarded by the struct_mutex. - */ - -static inline uint32_t -i915_gem_active_get_seqno(const struct i915_gem_active *active, - struct mutex *mutex) -{ - return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex)); -} - -static inline struct intel_engine_cs * -i915_gem_active_get_engine(const struct i915_gem_active *active, - struct mutex *mutex) -{ - return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex)); -} - #define for_each_active(mask, idx) \ for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 334f15df7c8d..3d3f410d9aa9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -795,9 +795,9 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->name = obj->base.name; for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = __active_get_seqno(&obj->last_read[i]); - err->wseqno = __active_get_seqno(&obj->last_write); - err->engine = __active_get_engine_id(&obj->last_write); + err->rseqno[i] = __active_get_seqno(&vma->last_read[i]); + err->wseqno = __active_get_seqno(&vma->last_write); + err->engine = __active_get_engine_id(&vma->last_write); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index b82de3072d4f..a8927929c740 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -84,7 +84,6 @@ intel_plane_duplicate_state(struct drm_plane *plane) state = &intel_state->base; __drm_atomic_helper_plane_duplicate_state(plane, state); - intel_state->wait_req = NULL; return state; } @@ -101,7 +100,6 @@ void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { - WARN_ON(state && to_intel_plane_state(state)->wait_req); drm_atomic_helper_plane_destroy_state(plane, state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a5d61f0eea1d..a3dbeb50c41d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,7 +37,6 @@ #include "intel_frontbuffer.h" #include <drm/i915_drm.h> #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "intel_dsi.h" #include "i915_trace.h" #include <drm/drm_atomic.h> @@ -11917,8 +11916,6 @@ static int intel_gen7_queue_flip(struct drm_device *dev, static bool use_mmio_flip(struct intel_engine_cs *engine, struct drm_i915_gem_object *obj) { - struct reservation_object *resv; - /* * This is not being used for older platforms, because * non-availability of flip done interrupt forces us to use @@ -11935,17 +11932,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (i915.use_mmio_flip < 0) return false; - else if (i915.use_mmio_flip > 0) - return true; - else if (i915.enable_execlists) - return true; - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv && !reservation_object_test_signaled_rcu(resv, false)) - return true; - - return engine != i915_gem_active_get_engine(&obj->last_write, - &obj->base.dev->struct_mutex); + return true; } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -12018,17 +12006,8 @@ static void intel_mmio_flip_work_func(struct work_struct *w) struct intel_framebuffer *intel_fb = to_intel_framebuffer(crtc->base.primary->fb); struct drm_i915_gem_object *obj = intel_fb->obj; - struct reservation_object *resv; - if (work->flip_queued_req) - WARN_ON(i915_wait_request(work->flip_queued_req, - 0, MAX_SCHEDULE_TIMEOUT) < 0); - - /* For framebuffer backed by dmabuf, wait for fence */ - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) - WARN_ON(reservation_object_wait_timeout_rcu(resv, false, false, - MAX_SCHEDULE_TIMEOUT) < 0); + WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0); intel_pipe_update_start(crtc); @@ -12226,13 +12205,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (fb->modifier[0] != old_fb->modifier[0]) /* vlv: DISPLAY_FLIP fails to change tiling */ engine = NULL; - } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { - engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_active_get_engine(&obj->last_write, - &obj->base.dev->struct_mutex); - if (engine == NULL || engine->id != RCS) - engine = &dev_priv->engine[BCS]; + engine = &dev_priv->engine[BCS]; } else { engine = &dev_priv->engine[RCS]; } @@ -12262,9 +12236,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - - work->flip_queued_req = i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); schedule_work(&work->mmio_work); } else { request = i915_gem_request_alloc(engine, engine->last_context); @@ -14036,13 +14007,10 @@ static int intel_atomic_check(struct drm_device *dev, } static int intel_atomic_prepare_commit(struct drm_device *dev, - struct drm_atomic_state *state, - bool nonblock) + struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_plane_state *plane_state; struct drm_crtc_state *crtc_state; - struct drm_plane *plane; struct drm_crtc *crtc; int i, ret; @@ -14065,30 +14033,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, ret = drm_atomic_helper_prepare_planes(dev, state); mutex_unlock(&dev->struct_mutex); - if (!ret && !nonblock) { - for_each_plane_in_state(state, plane, plane_state, i) { - struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); - long timeout; - - if (!intel_plane_state->wait_req) - continue; - - timeout = i915_wait_request(intel_plane_state->wait_req, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) { - /* Any hang should be swallowed by the wait */ - WARN_ON(timeout == -EIO); - mutex_lock(&dev->struct_mutex); - drm_atomic_helper_cleanup_planes(dev, state); - mutex_unlock(&dev->struct_mutex); - ret = timeout; - break; - } - } - } - return ret; } @@ -14280,26 +14224,11 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc_state *old_crtc_state; struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; - struct drm_plane *plane; - struct drm_plane_state *plane_state; bool hw_check = intel_state->modeset; unsigned long put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; - for_each_plane_in_state(state, plane, plane_state, i) { - struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); - - if (!intel_plane_state->wait_req) - continue; - - /* EIO should be eaten, and we can't get interrupted in the - * worker, and blocking commits have waited already. */ - WARN_ON(i915_wait_request(intel_plane_state->wait_req, - 0, MAX_SCHEDULE_TIMEOUT) < 0); - } - drm_atomic_helper_wait_for_dependencies(state); if (intel_state->modeset) { @@ -14507,7 +14436,7 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); - ret = intel_atomic_prepare_commit(dev, state, nonblock); + ret = intel_atomic_prepare_commit(dev, state); if (ret) { DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); return ret; @@ -14639,7 +14568,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_framebuffer *fb = new_state->fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); - struct reservation_object *resv; + long lret; int ret = 0; if (!obj && !old_obj) @@ -14678,39 +14607,34 @@ intel_prepare_plane_fb(struct drm_plane *plane, return 0; /* For framebuffer backed by dmabuf, wait for fence */ - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - long lret; - - lret = reservation_object_wait_timeout_rcu(resv, false, true, - MAX_SCHEDULE_TIMEOUT); - if (lret == -ERESTARTSYS) - return lret; + lret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT, + NULL); + if (lret == -ERESTARTSYS) + return lret; - WARN(lret < 0, "waiting returns %li\n", lret); - } + WARN(lret < 0, "waiting returns %li\n", lret); if (plane->type == DRM_PLANE_TYPE_CURSOR && INTEL_INFO(dev)->cursor_needs_physical) { int align = IS_I830(dev) ? 16 * 1024 : 256; ret = i915_gem_object_attach_phys(obj, align); - if (ret) + if (ret) { DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } } else { struct i915_vma *vma; vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) - ret = PTR_ERR(vma); - } - - if (ret == 0) { - to_intel_plane_state(new_state)->wait_req = - i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } } - return ret; + return 0; } /** @@ -14728,7 +14652,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct intel_plane_state *old_intel_state; - struct intel_plane_state *intel_state = to_intel_plane_state(plane->state); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@ -14740,9 +14663,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane, if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR || !INTEL_INFO(dev)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); - - i915_gem_request_assign(&intel_state->wait_req, NULL); - i915_gem_request_assign(&old_intel_state->wait_req, NULL); } int diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index abe7a4df2e43..15e3050edeb9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -393,9 +393,6 @@ struct intel_plane_state { int scaler_id; struct drm_intel_sprite_colorkey ckey; - - /* async flip related structures */ - struct drm_i915_gem_request *wait_req; }; struct intel_initial_plane_config { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 03725fe89859..93ad6f9d2496 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -860,39 +860,6 @@ struct drm_i915_gem_busy { * long as no new GPU commands are executed upon it). Due to the * asynchronous nature of the hardware, an object reported * as busy may become idle before the ioctl is completed. - * - * Furthermore, if the object is busy, which engine is busy is only - * provided as a guide. There are race conditions which prevent the - * report of which engines are busy from being always accurate. - * However, the converse is not true. If the object is idle, the - * result of the ioctl, that all engines are idle, is accurate. - * - * The returned dword is split into two fields to indicate both - * the engines on which the object is being read, and the - * engine on which it is currently being written (if any). - * - * The low word (bits 0:15) indicate if the object is being written - * to by any engine (there can only be one, as the GEM implicit - * synchronisation rules force writes to be serialised). Only the - * engine for the last write is reported. - * - * The high word (bits 16:31) are a bitmask of which engines are - * currently reading from the object. Multiple engines may be - * reading from the object simultaneously. - * - * The value of each engine is the same as specified in the - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to - * the I915_EXEC_RENDER engine for execution, and so it is never - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same identifier in the EXECBUFFER2 ioctl and - * so are not separately reported for busyness. - * - * Caveat emptor: - * Only the boolean result of this query is reliable; that is whether - * the object is idle or busy. The report of which engines are busy - * should be only used as a heuristic. */ __u32 busy; };

[05/18] drm/i915: Move GEM activity tracking into a common struct reservation_object

Commit Message

Comments

Patch