diff mbox

[05/18] drm/i915: Move GEM activity tracking into a common struct reservation_object

Message ID 20160914065250.15482-6-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Sept. 14, 2016, 6:52 a.m. UTC
In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)

Caveats:

 * busy-ioctl: the modern interface is completely thrown away,
regressing us back to before

commit e9808edd98679680804dfbc42c5ee8f1aa91f617 [v3.10]
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jul 4 12:25:08 2012 +0100

    drm/i915: Return a mask of the active rings in the high word of busy_ioctl

 * local engine selection and semaphore avoidance is lost for CS flips

 * non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering.

 * dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 72s)

 * loss of object-level retirement callbacks

 * minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  18 +--
 drivers/gpu/drm/i915/i915_drv.h            |  43 +----
 drivers/gpu/drm/i915/i915_gem.c            | 252 +++--------------------------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |   3 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  46 +-----
 drivers/gpu/drm/i915/i915_gem_dmabuf.h     |  45 ------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  71 ++------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  29 ++++
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   1 +
 drivers/gpu/drm/i915/i915_gem_request.c    |  48 +++---
 drivers/gpu/drm/i915/i915_gem_request.h    |  35 +---
 drivers/gpu/drm/i915/i915_gpu_error.c      |   6 +-
 drivers/gpu/drm/i915/intel_atomic_plane.c  |   2 -
 drivers/gpu/drm/i915/intel_display.c       | 122 +++-----------
 drivers/gpu/drm/i915/intel_drv.h           |   3 -
 include/uapi/drm/i915_drm.h                |  33 ----
 16 files changed, 127 insertions(+), 630 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.h

Comments

Joonas Lahtinen Sept. 14, 2016, 9:44 a.m. UTC | #1
On ke, 2016-09-14 at 07:52 +0100, Chris Wilson wrote:
> In preparation to support many distinct timelines, we need to expand the
> activity tracking on the GEM object to handle more than just a request
> per engine. We already use the struct reservation_object on the dma-buf
> to handle many fence contexts, so integrating that into the GEM object
> itself is the preferred solution. (For example, we can now share the same
> reservation_object between every consumer/producer using this buffer and
> skip the manual import/export via dma-buf.)
> 
> Caveats:

I'd make comments which patch in the series addresses each introduced
problem, which are fixable in future and which are taken as "a
permanent hit" for achieving multiple timelines. With a bit of
reasoning for each (now only a few points include some of this).

>  static inline struct drm_i915_gem_object *
> @@ -2347,35 +2341,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
>  	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
>  }
>  
> -static inline unsigned long
> -i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
> -{
> -	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
> -}
> -
>  static inline bool
>  i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
>  {
> -	return i915_gem_object_get_active(obj);
> -}
> -
> -static inline void
> -i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> -{
> -	obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
> -}
> -
> -static inline void
> -i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
> -{
> -	obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
> -}
> -
> -static inline bool
> -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> -				  int engine)
> -{
> -	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
> +	return obj->active_count;

our type is bool, so !!obj->active_count;

>  }
> 

<SNIP>

> 
>  static int
>  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
> -	const unsigned int other_rings = eb_other_engines(req);
>  	struct i915_vma *vma;
>  	int ret;
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		struct reservation_object *resv;
> -
> -		if (obj->flags & other_rings) {
> -			ret = i915_gem_request_await_object
> -				(req, obj, obj->base.pending_write_domain);
> -			if (ret)
> -				return ret;
> -		}
>  
> -		resv = i915_gem_object_get_dmabuf_resv(obj);
> -		if (resv) {
> -			ret = i915_sw_fence_await_reservation
> -				(&req->submit, resv, &i915_fence_ops,
> -				 obj->base.pending_write_domain, 10*HZ,
> -				 GFP_KERNEL | __GFP_NOWARN);
> -			if (ret < 0)
> -				return ret;
> -		}
> +		ret = i915_gem_request_await_object
> +			(req, obj, obj->base.pending_write_domain);

I know it was previously like this, but I'm not sure I agree on this
style at all.

> @@ -11935,17 +11932,8 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
>  
>  	if (i915.use_mmio_flip < 0)
>  		return false;
> -	else if (i915.use_mmio_flip > 0)
> -		return true;
> -	else if (i915.enable_execlists)
> -		return true;
>  
> -	resv = i915_gem_object_get_dmabuf_resv(obj);
> -	if (resv && !reservation_object_test_signaled_rcu(resv, false))
> -		return true;
> -
> -	return engine != i915_gem_active_get_engine(&obj->last_write,
> -						    &obj->base.dev->struct_mutex);
> +	return true;

	return i915_use_mmio_flip >= 0; // ?

> @@ -860,39 +860,6 @@ struct drm_i915_gem_busy {
>  	 * long as no new GPU commands are executed upon it). Due to the
>  	 * asynchronous nature of the hardware, an object reported
>  	 * as busy may become idle before the ioctl is completed.
> -	 *
> -	 * Furthermore, if the object is busy, which engine is busy is only
> -	 * provided as a guide. There are race conditions which prevent the
> -	 * report of which engines are busy from being always accurate.
> -	 * However, the converse is not true. If the object is idle, the
> -	 * result of the ioctl, that all engines are idle, is accurate.
> -	 *
> -	 * The returned dword is split into two fields to indicate both
> -	 * the engines on which the object is being read, and the
> -	 * engine on which it is currently being written (if any).
> -	 *
> -	 * The low word (bits 0:15) indicate if the object is being written
> -	 * to by any engine (there can only be one, as the GEM implicit
> -	 * synchronisation rules force writes to be serialised). Only the
> -	 * engine for the last write is reported.
> -	 *
> -	 * The high word (bits 16:31) are a bitmask of which engines are
> -	 * currently reading from the object. Multiple engines may be
> -	 * reading from the object simultaneously.
> -	 *
> -	 * The value of each engine is the same as specified in the
> -	 * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
> -	 * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
> -	 * the I915_EXEC_RENDER engine for execution, and so it is never
> -	 * reported as active itself. Some hardware may have parallel
> -	 * execution engines, e.g. multiple media engines, which are
> -	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
> -	 * so are not separately reported for busyness.
> -	 *
> -	 * Caveat emptor:
> -	 * Only the boolean result of this query is reliable; that is whether
> -	 * the object is idle or busy. The report of which engines are busy
> -	 * should be only used as a heuristic.
>  	 */

Daniel to Ack this ABI change.

Reviewed-by: Joonas Lahtinen <joonas.lahtine@linux.intel.com>

Double check by somebody on the plane code would not hurt.

Regards, Joonas
Chris Wilson Sept. 14, 2016, 5:35 p.m. UTC | #2
On Wed, Sep 14, 2016 at 12:44:04PM +0300, Joonas Lahtinen wrote:
> On ke, 2016-09-14 at 07:52 +0100, Chris Wilson wrote:
> > -static inline bool
> > -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> > -				  int engine)
> > -{
> > -	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
> > +	return obj->active_count;
> 
> our type is bool, so !!obj->active_count;

That's the beauty of using bool, !! is implied on the (bool)integer
conversion.
-Chris
Dave Gordon Sept. 15, 2016, 9:38 a.m. UTC | #3
On 14/09/16 18:35, Chris Wilson wrote:
> On Wed, Sep 14, 2016 at 12:44:04PM +0300, Joonas Lahtinen wrote:
>> On ke, 2016-09-14 at 07:52 +0100, Chris Wilson wrote:
>>> -static inline bool
>>> -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
>>> -				  int engine)
>>> -{
>>> -	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
>>> +	return obj->active_count;
>>
>> our type is bool, so !!obj->active_count;
>
> That's the beauty of using bool, !! is implied on the (bool)integer
> conversion.
> -Chris

It's a gcc extension, though, so does it work on clang?

.Dave.
Jani Nikula Sept. 15, 2016, 9:55 a.m. UTC | #4
On Thu, 15 Sep 2016, Dave Gordon <david.s.gordon@intel.com> wrote:
> On 14/09/16 18:35, Chris Wilson wrote:
>> On Wed, Sep 14, 2016 at 12:44:04PM +0300, Joonas Lahtinen wrote:
>>> On ke, 2016-09-14 at 07:52 +0100, Chris Wilson wrote:
>>>> -static inline bool
>>>> -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
>>>> -				  int engine)
>>>> -{
>>>> -	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
>>>> +	return obj->active_count;
>>>
>>> our type is bool, so !!obj->active_count;
>>
>> That's the beauty of using bool, !! is implied on the (bool)integer
>> conversion.
>> -Chris
>
> It's a gcc extension, though, so does it work on clang?

It's in the standard.

"When any scalar value is converted to _Bool, the result is 0 if the
value compares equal to 0; otherwise, the result is 1."

BR,
Jani.


>
> .Dave.
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Sept. 16, 2016, 11:40 a.m. UTC | #5
On Wed, Sep 14, 2016 at 07:52:37AM +0100, Chris Wilson wrote:
> In preparation to support many distinct timelines, we need to expand the
> activity tracking on the GEM object to handle more than just a request
> per engine. We already use the struct reservation_object on the dma-buf
> to handle many fence contexts, so integrating that into the GEM object
> itself is the preferred solution. (For example, we can now share the same
> reservation_object between every consumer/producer using this buffer and
> skip the manual import/export via dma-buf.)
> 
> Caveats:
> 
>  * busy-ioctl: the modern interface is completely thrown away,
> regressing us back to before
> 
> commit e9808edd98679680804dfbc42c5ee8f1aa91f617 [v3.10]
> Author: Chris Wilson <chris@chris-wilson.co.uk>
> Date:   Wed Jul 4 12:25:08 2012 +0100
> 
>     drm/i915: Return a mask of the active rings in the high word of busy_ioctl

There is an alternative here, as we can walk through the obj->resv and
convert *i915* fences to their respective read/write engines.

However, this means that busy-ioctl and wait-ioctl(.timeout=0) are no
longer equivalent as that busy-ioctl may report an object as idle but
still has external rendering (and so wait-ioctl, set-to-domain-ioctl,
pread, pwrite may stall).

Walking the resv object is also more expensive than doing a simple test.
Though that can be mitigated later by opencoding the test (once we can
do the whole busy-ioctl under only RCU protection that makes more
sense). But it makes busy-ioctl slower than ideal, and busy-ioctl is
high frequency (so long as we keep avoiding struct_mutex, the difference
is likely only visible in the microbenchmarks).

Pro: we keep the finese of being able to report which engines are busy
Con: not equivalent to wait-ioctl, polling the dmabuf, etc.

That we have alternative mechanisms to check may be considered a good
thing, i.e. we can migrate to using wait-ioctl for the general am I
going to stall test, and using busy-ioctl for engine selection. I think
I am leaning towards keeping the ABI more or less intact for the time
being. Though we are getting pretty close to its limit on NUM_ENGINES.

int
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                    struct drm_file *file)
{
        struct drm_i915_gem_busy *args = data;
        struct drm_i915_gem_object *obj;
        struct fence **shared, *excl;
        unsigned int count, i;
        int ret;

        ret = -ENOENT;
        obj = i915_gem_object_lookup(file, args->handle);
        if (obj) {
                ret = reservation_object_get_fences_rcu(obj->resv,
                                                        &excl, &count, &shared);
                i915_gem_object_put_unlocked(obj);
        }
        if (unlikely(ret))
                return ret;

         /* A discrepancy here is that we do not report the status of
          * non-i915 fences, i.e. even though we may report the object as idle,
          * a call to set-domain may still stall waiting for foreign rendering.
          * This also means that wait-ioctl may report an object as busy,
          * where busy-ioctl considers it idle.
          *
          * We trade the ability to warn of foreign fences to report on which
          * i915 engines are active for the object.
          *
          * Alternatively, we can trade that extra information on read/write
          * activity with
          *     args->busy =
          *             !reservation_object_test_signaled_rcu(obj->resv, true);
          * to report the overall busyness. This is what the wait-ioctl does.
          */
        args->busy = 0;

        /* Translate shared fences to READ set of engines */
        for (i = 0; i < count; i++) {
                args->busy |= busy_check_reader(shared[i]);
                fence_put(shared[i]);
        }
        kfree(shared);

        /* Translate the exclusive fence to the READ *and* WRITE engine */
        if (excl) {
                args->busy |= busy_check_writer(excl);
                fence_put(excl);
        }

        return 0;
}

-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 64702cc68e3a..c4e7532c5b6a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -134,15 +134,13 @@  static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct intel_engine_cs *engine;
 	struct i915_vma *vma;
 	unsigned int frontbuffer_bits;
 	int pin_count = 0;
-	enum intel_engine_id id;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
+	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
 		   &obj->base,
 		   get_active_flag(obj),
 		   get_pin_flag(obj),
@@ -151,14 +149,7 @@  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   get_pin_mapped_flag(obj),
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
-		   obj->base.write_domain);
-	for_each_engine_id(engine, dev_priv, id)
-		seq_printf(m, "%x ",
-			   i915_gem_active_get_seqno(&obj->last_read[id],
-						     &obj->base.dev->struct_mutex));
-	seq_printf(m, "] %x %s%s%s",
-		   i915_gem_active_get_seqno(&obj->last_write,
-					     &obj->base.dev->struct_mutex),
+		   obj->base.write_domain,
 		   i915_cache_level_str(dev_priv, obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -198,11 +189,6 @@  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (%s mappable)", s);
 	}
 
-	engine = i915_gem_active_get_engine(&obj->last_write,
-					    &dev_priv->drm.struct_mutex);
-	if (engine)
-		seq_printf(m, " (%s)", engine->name);
-
 	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
 	if (frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index af0212032b64..2bcab3087e8c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -41,6 +41,7 @@ 
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/reservation.h>
 #include <linux/shmem_fs.h>
 
 #include <drm/drmP.h>
@@ -2198,15 +2199,6 @@  struct drm_i915_gem_object {
 	struct list_head batch_pool_link;
 
 	unsigned long flags;
-	/**
-	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
-	 */
-#define I915_BO_ACTIVE_SHIFT 0
-#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
-#define __I915_BO_ACTIVE(bo) \
-	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -2245,6 +2237,7 @@  struct drm_i915_gem_object {
 
 	/** Count of VMA actually bound by this object */
 	unsigned int bind_count;
+	unsigned int active_count;
 	unsigned int pin_display;
 
 	struct sg_table *pages;
@@ -2265,8 +2258,7 @@  struct drm_i915_gem_object {
 	 * read request. This allows for the CPU to read from an active
 	 * buffer by only waiting for the write to complete.
 	 */
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
-	struct i915_gem_active last_write;
+	struct reservation_object *resv;
 
 	/** References from framebuffers, locks out tiling changes. */
 	unsigned long framebuffer_references;
@@ -2289,6 +2281,8 @@  struct drm_i915_gem_object {
 			struct work_struct *work;
 		} userptr;
 	};
+
+	struct reservation_object __builtin_resv;
 };
 
 static inline struct drm_i915_gem_object *
@@ -2347,35 +2341,10 @@  i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
 }
 
-static inline unsigned long
-i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
-{
-	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
-}
-
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
-	return i915_gem_object_get_active(obj);
-}
-
-static inline void
-i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
-{
-	obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
-}
-
-static inline void
-i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
-{
-	obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
-}
-
-static inline bool
-i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
-				  int engine)
-{
-	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
+	return obj->active_count;
 }
 
 static inline unsigned int
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d9214c9d31d2..ab8d10388581 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,7 +29,6 @@ 
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -441,11 +440,6 @@  i915_gem_object_wait(struct drm_i915_gem_object *obj,
 		     long timeout,
 		     struct intel_rps_client *rps)
 {
-	struct reservation_object *resv;
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
 	might_sleep();
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	GEM_BUG_ON(!!lockdep_is_held(&obj->base.dev->struct_mutex) !=
@@ -453,33 +447,9 @@  i915_gem_object_wait(struct drm_i915_gem_object *obj,
 #endif
 	GEM_BUG_ON(timeout < 0);
 
-	if (flags & I915_WAIT_ALL) {
-		active = obj->last_read;
-		active_mask = i915_gem_object_get_active(obj);
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-
-		request = i915_gem_active_get_unlocked(&active[idx]);
-		if (request) {
-			timeout = i915_gem_object_wait_fence(&request->fence,
-							     flags, timeout,
-							     rps);
-			i915_gem_request_put(request);
-		}
-		if (timeout < 0)
-			return timeout;
-	}
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv)
-		timeout = i915_gem_object_wait_reservation(resv,
-							   flags, timeout,
-							   rps);
+	timeout = i915_gem_object_wait_reservation(obj->resv,
+						   flags, timeout,
+						   rps);
 	return timeout < 0 ? timeout : timeout > 0 ? 0 : -ETIME;
 }
 
@@ -2586,41 +2556,6 @@  err:
 	return ERR_PTR(ret);
 }
 
-static void
-i915_gem_object_retire__write(struct i915_gem_active *active,
-			      struct drm_i915_gem_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, struct drm_i915_gem_object, last_write);
-
-	intel_fb_obj_flush(obj, true, ORIGIN_CS);
-}
-
-static void
-i915_gem_object_retire__read(struct i915_gem_active *active,
-			     struct drm_i915_gem_request *request)
-{
-	int idx = request->engine->id;
-	struct drm_i915_gem_object *obj =
-		container_of(active, struct drm_i915_gem_object, last_read[idx]);
-
-	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
-
-	i915_gem_object_clear_active(obj, idx);
-	if (i915_gem_object_is_active(obj))
-		return;
-
-	/* Bump our place on the bound list to keep it roughly in LRU order
-	 * so that we don't steal from recently used but inactive objects
-	 * (unless we are forced to ofc!)
-	 */
-	if (obj->bind_count)
-		list_move_tail(&obj->global_list,
-			       &request->i915->mm.bound_list);
-
-	i915_gem_object_put(obj);
-}
-
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
 {
 	unsigned long elapsed;
@@ -2923,6 +2858,16 @@  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (!obj)
 		return -ENOENT;
 
+	if (reservation_object_test_signaled_rcu(obj->resv, true)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (!args->timeout_ns) {
+		ret = -ETIME;
+		goto out;
+	}
+
 	start = ktime_get();
 
 	ret = i915_gem_object_wait(obj,
@@ -2936,8 +2881,8 @@  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			args->timeout_ns = 0;
 	}
 
+out:
 	i915_gem_object_put_unlocked(obj);
-
 	return ret;
 }
 
@@ -3956,173 +3901,18 @@  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-static __always_inline unsigned int __busy_read_flag(unsigned int id)
-{
-	/* Note that we could alias engines in the execbuf API, but
-	 * that would be very unwise as it prevents userspace from
-	 * fine control over engine selection. Ahem.
-	 *
-	 * This should be something like EXEC_MAX_ENGINE instead of
-	 * I915_NUM_ENGINES.
-	 */
-	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
-	return 0x10000 << id;
-}
-
-static __always_inline unsigned int __busy_write_id(unsigned int id)
-{
-	/* The uABI guarantees an active writer is also amongst the read
-	 * engines. This would be true if we accessed the activity tracking
-	 * under the lock, but as we perform the lookup of the object and
-	 * its activity locklessly we can not guarantee that the last_write
-	 * being active implies that we have set the same engine flag from
-	 * last_read - hence we always set both read and write busy for
-	 * last_write.
-	 */
-	return id | __busy_read_flag(id);
-}
-
-static __always_inline unsigned int
-__busy_set_if_active(const struct i915_gem_active *active,
-		     unsigned int (*flag)(unsigned int id))
-{
-	struct drm_i915_gem_request *request;
-
-	request = rcu_dereference(active->request);
-	if (!request || i915_gem_request_completed(request))
-		return 0;
-
-	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
-	 * discussion of how to handle the race correctly, but for reporting
-	 * the busy state we err on the side of potentially reporting the
-	 * wrong engine as being busy (but we guarantee that the result
-	 * is at least self-consistent).
-	 *
-	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
-	 * whilst we are inspecting it, even under the RCU read lock as we are.
-	 * This means that there is a small window for the engine and/or the
-	 * seqno to have been overwritten. The seqno will always be in the
-	 * future compared to the intended, and so we know that if that
-	 * seqno is idle (on whatever engine) our request is idle and the
-	 * return 0 above is correct.
-	 *
-	 * The issue is that if the engine is switched, it is just as likely
-	 * to report that it is busy (but since the switch happened, we know
-	 * the request should be idle). So there is a small chance that a busy
-	 * result is actually the wrong engine.
-	 *
-	 * So why don't we care?
-	 *
-	 * For starters, the busy ioctl is a heuristic that is by definition
-	 * racy. Even with perfect serialisation in the driver, the hardware
-	 * state is constantly advancing - the state we report to the user
-	 * is stale.
-	 *
-	 * The critical information for the busy-ioctl is whether the object
-	 * is idle as userspace relies on that to detect whether its next
-	 * access will stall, or if it has missed submitting commands to
-	 * the hardware allowing the GPU to stall. We never generate a
-	 * false-positive for idleness, thus busy-ioctl is reliable at the
-	 * most fundamental level, and we maintain the guarantee that a
-	 * busy object left to itself will eventually become idle (and stay
-	 * idle!).
-	 *
-	 * We allow ourselves the leeway of potentially misreporting the busy
-	 * state because that is an optimisation heuristic that is constantly
-	 * in flux. Being quickly able to detect the busy/idle state is much
-	 * more important than accurate logging of exactly which engines were
-	 * busy.
-	 *
-	 * For accuracy in reporting the engine, we could use
-	 *
-	 *	result = 0;
-	 *	request = __i915_gem_active_get_rcu(active);
-	 *	if (request) {
-	 *		if (!i915_gem_request_completed(request))
-	 *			result = flag(request->engine->exec_id);
-	 *		i915_gem_request_put(request);
-	 *	}
-	 *
-	 * but that still remains susceptible to both hardware and userspace
-	 * races. So we accept making the result of that race slightly worse,
-	 * given the rarity of the race and its low impact on the result.
-	 */
-	return flag(READ_ONCE(request->engine->exec_id));
-}
-
-static __always_inline unsigned int
-busy_check_reader(const struct i915_gem_active *active)
-{
-	return __busy_set_if_active(active, __busy_read_flag);
-}
-
-static __always_inline unsigned int
-busy_check_writer(const struct i915_gem_active *active)
-{
-	return __busy_set_if_active(active, __busy_write_id);
-}
-
 int
 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	unsigned long active;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
 
-	args->busy = 0;
-	active = __I915_BO_ACTIVE(obj);
-	if (active) {
-		int idx;
-
-		/* Yes, the lookups are intentionally racy.
-		 *
-		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
-		 * to regard the value as stale and as our ABI guarantees
-		 * forward progress, we confirm the status of each active
-		 * request with the hardware.
-		 *
-		 * Even though we guard the pointer lookup by RCU, that only
-		 * guarantees that the pointer and its contents remain
-		 * dereferencable and does *not* mean that the request we
-		 * have is the same as the one being tracked by the object.
-		 *
-		 * Consider that we lookup the request just as it is being
-		 * retired and freed. We take a local copy of the pointer,
-		 * but before we add its engine into the busy set, the other
-		 * thread reallocates it and assigns it to a task on another
-		 * engine with a fresh and incomplete seqno. Guarding against
-		 * that requires careful serialisation and reference counting,
-		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
-		 * instead we expect that if the result is busy, which engines
-		 * are busy is not completely reliable - we only guarantee
-		 * that the object was busy.
-		 */
-		rcu_read_lock();
-
-		for_each_active(active, idx)
-			args->busy |= busy_check_reader(&obj->last_read[idx]);
-
-		/* For ABI sanity, we only care that the write engine is in
-		 * the set of read engines. This should be ensured by the
-		 * ordering of setting last_read/last_write in
-		 * i915_vma_move_to_active(), and then in reverse in retire.
-		 * However, for good measure, we always report the last_write
-		 * request as a busy read as well as being a busy write.
-		 *
-		 * We don't care that the set of active read/write engines
-		 * may change during construction of the result, as it is
-		 * equally liable to change before userspace can inspect
-		 * the result.
-		 */
-		args->busy |= busy_check_writer(&obj->last_write);
-
-		rcu_read_unlock();
-	}
+	args->busy = !reservation_object_test_signaled_rcu(obj->resv, true);
 
 	i915_gem_object_put_unlocked(obj);
 	return 0;
@@ -4189,20 +3979,16 @@  unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
-	int i;
-
 	INIT_LIST_HEAD(&obj->global_list);
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		init_request_active(&obj->last_read[i],
-				    i915_gem_object_retire__read);
-	init_request_active(&obj->last_write,
-			    i915_gem_object_retire__write);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
+	reservation_object_init(&obj->__builtin_resv);
+	obj->resv = &obj->__builtin_resv;
+
 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
 	obj->madv = I915_MADV_WILLNEED;
 
@@ -4349,6 +4135,8 @@  void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->ops->release)
 		obj->ops->release(obj);
 
+	reservation_object_fini(&obj->__builtin_resv);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index ed989596d9a3..bbfea965c593 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -114,8 +114,7 @@  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
-		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
-					     &tmp->base.dev->struct_mutex))
+		if (i915_gem_object_is_active(tmp))
 			break;
 
 		/* While we're looping, do some clean up */
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 10265bb35604..b0a429aeafd9 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -222,49 +222,6 @@  static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.end_cpu_access = i915_gem_end_cpu_access,
 };
 
-static void export_fences(struct drm_i915_gem_object *obj,
-			  struct dma_buf *dma_buf)
-{
-	struct reservation_object *resv = dma_buf->resv;
-	struct drm_i915_gem_request *req;
-	unsigned long active;
-	int idx;
-
-	active = __I915_BO_ACTIVE(obj);
-	if (!active)
-		return;
-
-	/* Serialise with execbuf to prevent concurrent fence-loops */
-	mutex_lock(&obj->base.dev->struct_mutex);
-
-	/* Mark the object for future fences before racily adding old fences */
-	obj->base.dma_buf = dma_buf;
-
-	ww_mutex_lock(&resv->lock, NULL);
-
-	for_each_active(active, idx) {
-		req = i915_gem_active_get(&obj->last_read[idx],
-					  &obj->base.dev->struct_mutex);
-		if (!req)
-			continue;
-
-		if (reservation_object_reserve_shared(resv) == 0)
-			reservation_object_add_shared_fence(resv, &req->fence);
-
-		i915_gem_request_put(req);
-	}
-
-	req = i915_gem_active_get(&obj->last_write,
-				  &obj->base.dev->struct_mutex);
-	if (req) {
-		reservation_object_add_excl_fence(resv, &req->fence);
-		i915_gem_request_put(req);
-	}
-
-	ww_mutex_unlock(&resv->lock);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
@@ -276,6 +233,7 @@  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 	exp_info.size = gem_obj->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem_obj;
+	exp_info.resv = obj->resv;
 
 	if (obj->ops->dmabuf_export) {
 		int ret = obj->ops->dmabuf_export(obj);
@@ -287,7 +245,6 @@  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 	if (IS_ERR(dma_buf))
 		return dma_buf;
 
-	export_fences(obj, dma_buf);
 	return dma_buf;
 }
 
@@ -350,6 +307,7 @@  struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
+	obj->resv = dma_buf->resv;
 
 	/* We use GTT as shorthand for a coherent domain, one that is
 	 * neither in the GPU cache nor in the CPU cache, where all
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/i915_gem_dmabuf.h
deleted file mode 100644
index 91315557e421..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.h
+++ /dev/null
@@ -1,45 +0,0 @@ 
-/*
- * Copyright 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef _I915_GEM_DMABUF_H_
-#define _I915_GEM_DMABUF_H_
-
-#include <linux/dma-buf.h>
-
-static inline struct reservation_object *
-i915_gem_object_get_dmabuf_resv(struct drm_i915_gem_object *obj)
-{
-	struct dma_buf *dma_buf;
-
-	if (obj->base.dma_buf)
-		dma_buf = obj->base.dma_buf;
-	else if (obj->base.import_attach)
-		dma_buf = obj->base.import_attach->dmabuf;
-	else
-		return NULL;
-
-	return dma_buf->resv;
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 33c85227643d..6b5175ee824c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,7 +34,6 @@ 
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
@@ -552,20 +551,6 @@  repeat:
 	return 0;
 }
 
-static bool object_is_idle(struct drm_i915_gem_object *obj)
-{
-	unsigned long active = i915_gem_object_get_active(obj);
-	int idx;
-
-	for_each_active(active, idx) {
-		if (!i915_gem_active_is_idle(&obj->last_read[idx],
-					     &obj->base.dev->struct_mutex))
-			return false;
-	}
-
-	return true;
-}
-
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_vmas *eb,
@@ -650,7 +635,8 @@  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	}
 
 	/* We can't wait for rendering with pagefaults disabled */
-	if (pagefault_disabled() && !object_is_idle(obj))
+	if (pagefault_disabled() &&
+	    !reservation_object_test_signaled_rcu(obj->resv, true))
 		return -EFAULT;
 
 	ret = relocate_entry(obj, reloc, cache, target_offset);
@@ -1111,44 +1097,20 @@  err:
 	return ret;
 }
 
-static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
-{
-	unsigned int mask;
-
-	mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
-	mask <<= I915_BO_ACTIVE_SHIFT;
-
-	return mask;
-}
-
 static int
 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
-	const unsigned int other_rings = eb_other_engines(req);
 	struct i915_vma *vma;
 	int ret;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		struct reservation_object *resv;
-
-		if (obj->flags & other_rings) {
-			ret = i915_gem_request_await_object
-				(req, obj, obj->base.pending_write_domain);
-			if (ret)
-				return ret;
-		}
 
-		resv = i915_gem_object_get_dmabuf_resv(obj);
-		if (resv) {
-			ret = i915_sw_fence_await_reservation
-				(&req->submit, resv, &i915_fence_ops,
-				 obj->base.pending_write_domain, 10*HZ,
-				 GFP_KERNEL | __GFP_NOWARN);
-			if (ret < 0)
-				return ret;
-		}
+		ret = i915_gem_request_await_object
+			(req, obj, obj->base.pending_write_domain);
+		if (ret)
+			return ret;
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
@@ -1290,8 +1252,6 @@  void i915_vma_move_to_active(struct i915_vma *vma,
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	obj->dirty = 1; /* be paranoid  */
-
 	/* Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
 	 * vital here: mark_active adds to the start of the callback list,
@@ -1299,13 +1259,14 @@  void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_object_is_active(obj))
+	if (!i915_vma_is_active(vma) && !obj->active_count++)
 		i915_gem_object_get(obj);
-	i915_gem_object_set_active(obj, idx);
-	i915_gem_active_set(&obj->last_read[idx], req);
+	i915_vma_set_active(vma, idx);
+	i915_gem_active_set(&vma->last_read[idx], req);
+	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 
 	if (flags & EXEC_OBJECT_WRITE) {
-		i915_gem_active_set(&obj->last_write, req);
+		i915_gem_active_set(&vma->last_write, req);
 
 		intel_fb_obj_invalidate(obj, ORIGIN_CS);
 
@@ -1315,21 +1276,13 @@  void i915_vma_move_to_active(struct i915_vma *vma,
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_gem_active_set(&vma->last_fence, req);
-
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], req);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 }
 
 static void eb_export_fence(struct drm_i915_gem_object *obj,
 			    struct drm_i915_gem_request *req,
 			    unsigned int flags)
 {
-	struct reservation_object *resv;
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (!resv)
-		return;
+	struct reservation_object *resv = obj->resv;
 
 	/* Ignore errors from failing to allocate the new fence, we can't
 	 * handle an error right now. Worst case should be missed
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0bb4232f66bc..92b36ab79771 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -31,6 +31,7 @@ 
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_frontbuffer.h"
 
 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
 
@@ -3303,6 +3304,7 @@  i915_vma_retire(struct i915_gem_active *active,
 	const unsigned int idx = rq->engine->id;
 	struct i915_vma *vma =
 		container_of(active, struct i915_vma, last_read[idx]);
+	struct drm_i915_gem_object *obj = vma->obj;
 
 	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
 
@@ -3313,6 +3315,31 @@  i915_vma_retire(struct i915_gem_active *active,
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
 		WARN_ON(i915_vma_unbind(vma));
+
+	GEM_BUG_ON(!i915_gem_object_is_active(obj));
+	if (--obj->active_count)
+		return;
+
+	/* Bump our place on the bound list to keep it roughly in LRU order
+	 * so that we don't steal from recently used but inactive objects
+	 * (unless we are forced to ofc!)
+	 */
+	if (obj->bind_count)
+		list_move_tail(&obj->global_list, &rq->i915->mm.bound_list);
+
+	obj->dirty = 1; /* be paranoid  */
+
+	i915_gem_object_put(obj);
+}
+
+static void
+i915_ggtt_retire__write(struct i915_gem_active *active,
+			struct drm_i915_gem_request *request)
+{
+	struct i915_vma *vma =
+		container_of(active, struct i915_vma, last_write);
+
+	intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
 }
 
 void i915_vma_destroy(struct i915_vma *vma)
@@ -3356,6 +3383,8 @@  __i915_vma_create(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&vma->exec_list);
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
 		init_request_active(&vma->last_read[i], i915_vma_retire);
+	init_request_active(&vma->last_write,
+			    i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
 	init_request_active(&vma->last_fence, NULL);
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ec78be2f8c77..21f5d9657271 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -211,6 +211,7 @@  struct i915_vma {
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	struct i915_gem_active last_write;
 	struct i915_gem_active last_fence;
 
 	/**
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 687537d91be8..bc27c80be1e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -193,6 +193,8 @@  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	}
 
 	i915_gem_context_put(request->ctx);
+
+	fence_signal(&request->fence);
 	i915_gem_request_put(request);
 }
 
@@ -538,33 +540,41 @@  i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write)
 {
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
+	struct fence *excl;
+	int ret = 0;
 
 	if (write) {
-		active_mask = i915_gem_object_get_active(obj);
-		active = obj->last_read;
+		struct fence **shared;
+		unsigned int count, i;
+
+		ret = reservation_object_get_fences_rcu(obj->resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			ret = i915_gem_request_await_fence(to, shared[i]);
+			if (ret)
+				break;
+
+			fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			fence_put(shared[i]);
+		kfree(shared);
 	} else {
-		active_mask = 1;
-		active = &obj->last_write;
+		excl = reservation_object_get_excl_rcu(obj->resv);
 	}
 
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-		int ret;
-
-		request = i915_gem_active_peek(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (!request)
-			continue;
+	if (excl) {
+		if (ret == 0)
+			ret = i915_gem_request_await_fence(to, excl);
 
-		ret = i915_gem_request_await_request(to, request);
-		if (ret)
-			return ret;
+		fence_put(excl);
 	}
 
-	return 0;
+	return ret;
 }
 
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 45998eedda2c..fe75026c60e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -554,22 +554,7 @@  i915_gem_active_isset(const struct i915_gem_active *active)
 }
 
 /**
- * i915_gem_active_is_idle - report whether the active tracker is idle
- * @active - the active tracker
- *
- * i915_gem_active_is_idle() returns true if the active tracker is currently
- * unassigned or if the request is complete (but not yet retired). Requires
- * the caller to hold struct_mutex (but that can be relaxed if desired).
- */
-static inline bool
-i915_gem_active_is_idle(const struct i915_gem_active *active,
-			struct mutex *mutex)
-{
-	return !i915_gem_active_peek(active, mutex);
-}
-
-/**
- * i915_gem_active_wait- waits until the request is completed
+ * i915_gem_active_wait - waits until the request is completed
  * @active - the active request on which to wait
  * @flags - how to wait
  * @timeout - how long to wait at most
@@ -639,24 +624,6 @@  i915_gem_active_retire(struct i915_gem_active *active,
 	return 0;
 }
 
-/* Convenience functions for peeking at state inside active's request whilst
- * guarded by the struct_mutex.
- */
-
-static inline uint32_t
-i915_gem_active_get_seqno(const struct i915_gem_active *active,
-			  struct mutex *mutex)
-{
-	return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex));
-}
-
-static inline struct intel_engine_cs *
-i915_gem_active_get_engine(const struct i915_gem_active *active,
-			   struct mutex *mutex)
-{
-	return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex));
-}
-
 #define for_each_active(mask, idx) \
 	for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 334f15df7c8d..3d3f410d9aa9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -795,9 +795,9 @@  static void capture_bo(struct drm_i915_error_buffer *err,
 	err->name = obj->base.name;
 
 	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&obj->last_read[i]);
-	err->wseqno = __active_get_seqno(&obj->last_write);
-	err->engine = __active_get_engine_id(&obj->last_write);
+		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
+	err->wseqno = __active_get_seqno(&vma->last_write);
+	err->engine = __active_get_engine_id(&vma->last_write);
 
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index b82de3072d4f..a8927929c740 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -84,7 +84,6 @@  intel_plane_duplicate_state(struct drm_plane *plane)
 	state = &intel_state->base;
 
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
-	intel_state->wait_req = NULL;
 
 	return state;
 }
@@ -101,7 +100,6 @@  void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
-	WARN_ON(state && to_intel_plane_state(state)->wait_req);
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a5d61f0eea1d..a3dbeb50c41d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -37,7 +37,6 @@ 
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -11917,8 +11916,6 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 static bool use_mmio_flip(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_object *obj)
 {
-	struct reservation_object *resv;
-
 	/*
 	 * This is not being used for older platforms, because
 	 * non-availability of flip done interrupt forces us to use
@@ -11935,17 +11932,8 @@  static bool use_mmio_flip(struct intel_engine_cs *engine,
 
 	if (i915.use_mmio_flip < 0)
 		return false;
-	else if (i915.use_mmio_flip > 0)
-		return true;
-	else if (i915.enable_execlists)
-		return true;
 
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv && !reservation_object_test_signaled_rcu(resv, false))
-		return true;
-
-	return engine != i915_gem_active_get_engine(&obj->last_write,
-						    &obj->base.dev->struct_mutex);
+	return true;
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
@@ -12018,17 +12006,8 @@  static void intel_mmio_flip_work_func(struct work_struct *w)
 	struct intel_framebuffer *intel_fb =
 		to_intel_framebuffer(crtc->base.primary->fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
-	struct reservation_object *resv;
 
-	if (work->flip_queued_req)
-		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  0, MAX_SCHEDULE_TIMEOUT) < 0);
-
-	/* For framebuffer backed by dmabuf, wait for fence */
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv)
-		WARN_ON(reservation_object_wait_timeout_rcu(resv, false, false,
-							    MAX_SCHEDULE_TIMEOUT) < 0);
+	WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0);
 
 	intel_pipe_update_start(crtc);
 
@@ -12226,13 +12205,8 @@  static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		if (fb->modifier[0] != old_fb->modifier[0])
 			/* vlv: DISPLAY_FLIP fails to change tiling */
 			engine = NULL;
-	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
-		engine = &dev_priv->engine[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		engine = i915_gem_active_get_engine(&obj->last_write,
-						    &obj->base.dev->struct_mutex);
-		if (engine == NULL || engine->id != RCS)
-			engine = &dev_priv->engine[BCS];
+		engine = &dev_priv->engine[BCS];
 	} else {
 		engine = &dev_priv->engine[RCS];
 	}
@@ -12262,9 +12236,6 @@  static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	if (mmio_flip) {
 		INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
-
-		work->flip_queued_req = i915_gem_active_get(&obj->last_write,
-							    &obj->base.dev->struct_mutex);
 		schedule_work(&work->mmio_work);
 	} else {
 		request = i915_gem_request_alloc(engine, engine->last_context);
@@ -14036,13 +14007,10 @@  static int intel_atomic_check(struct drm_device *dev,
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
-				       struct drm_atomic_state *state,
-				       bool nonblock)
+				       struct drm_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_plane_state *plane_state;
 	struct drm_crtc_state *crtc_state;
-	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	int i, ret;
 
@@ -14065,30 +14033,6 @@  static int intel_atomic_prepare_commit(struct drm_device *dev,
 	ret = drm_atomic_helper_prepare_planes(dev, state);
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!ret && !nonblock) {
-		for_each_plane_in_state(state, plane, plane_state, i) {
-			struct intel_plane_state *intel_plane_state =
-				to_intel_plane_state(plane_state);
-			long timeout;
-
-			if (!intel_plane_state->wait_req)
-				continue;
-
-			timeout = i915_wait_request(intel_plane_state->wait_req,
-						    I915_WAIT_INTERRUPTIBLE,
-						    MAX_SCHEDULE_TIMEOUT);
-			if (timeout < 0) {
-				/* Any hang should be swallowed by the wait */
-				WARN_ON(timeout == -EIO);
-				mutex_lock(&dev->struct_mutex);
-				drm_atomic_helper_cleanup_planes(dev, state);
-				mutex_unlock(&dev->struct_mutex);
-				ret = timeout;
-				break;
-			}
-		}
-	}
-
 	return ret;
 }
 
@@ -14280,26 +14224,11 @@  static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *intel_cstate;
-	struct drm_plane *plane;
-	struct drm_plane_state *plane_state;
 	bool hw_check = intel_state->modeset;
 	unsigned long put_domains[I915_MAX_PIPES] = {};
 	unsigned crtc_vblank_mask = 0;
 	int i;
 
-	for_each_plane_in_state(state, plane, plane_state, i) {
-		struct intel_plane_state *intel_plane_state =
-			to_intel_plane_state(plane_state);
-
-		if (!intel_plane_state->wait_req)
-			continue;
-
-		/* EIO should be eaten, and we can't get interrupted in the
-		 * worker, and blocking commits have waited already. */
-		WARN_ON(i915_wait_request(intel_plane_state->wait_req,
-					  0, MAX_SCHEDULE_TIMEOUT) < 0);
-	}
-
 	drm_atomic_helper_wait_for_dependencies(state);
 
 	if (intel_state->modeset) {
@@ -14507,7 +14436,7 @@  static int intel_atomic_commit(struct drm_device *dev,
 
 	INIT_WORK(&state->commit_work, intel_atomic_commit_work);
 
-	ret = intel_atomic_prepare_commit(dev, state, nonblock);
+	ret = intel_atomic_prepare_commit(dev, state);
 	if (ret) {
 		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
 		return ret;
@@ -14639,7 +14568,7 @@  intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_framebuffer *fb = new_state->fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-	struct reservation_object *resv;
+	long lret;
 	int ret = 0;
 
 	if (!obj && !old_obj)
@@ -14678,39 +14607,34 @@  intel_prepare_plane_fb(struct drm_plane *plane,
 		return 0;
 
 	/* For framebuffer backed by dmabuf, wait for fence */
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv) {
-		long lret;
-
-		lret = reservation_object_wait_timeout_rcu(resv, false, true,
-							   MAX_SCHEDULE_TIMEOUT);
-		if (lret == -ERESTARTSYS)
-			return lret;
+	lret = i915_gem_object_wait(obj,
+				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    MAX_SCHEDULE_TIMEOUT,
+				    NULL);
+	if (lret == -ERESTARTSYS)
+		return lret;
 
-		WARN(lret < 0, "waiting returns %li\n", lret);
-	}
+	WARN(lret < 0, "waiting returns %li\n", lret);
 
 	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret)
+		if (ret) {
 			DRM_DEBUG_KMS("failed to attach phys object\n");
+			return ret;
+		}
 	} else {
 		struct i915_vma *vma;
 
 		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma))
-			ret = PTR_ERR(vma);
-	}
-
-	if (ret == 0) {
-		to_intel_plane_state(new_state)->wait_req =
-			i915_gem_active_get(&obj->last_write,
-					    &obj->base.dev->struct_mutex);
+		if (IS_ERR(vma)) {
+			DRM_DEBUG_KMS("failed to pin object\n");
+			return PTR_ERR(vma);
+		}
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -14728,7 +14652,6 @@  intel_cleanup_plane_fb(struct drm_plane *plane,
 {
 	struct drm_device *dev = plane->dev;
 	struct intel_plane_state *old_intel_state;
-	struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
 	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
 
@@ -14740,9 +14663,6 @@  intel_cleanup_plane_fb(struct drm_plane *plane,
 	if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR ||
 	    !INTEL_INFO(dev)->cursor_needs_physical))
 		intel_unpin_fb_obj(old_state->fb, old_state->rotation);
-
-	i915_gem_request_assign(&intel_state->wait_req, NULL);
-	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
 }
 
 int
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index abe7a4df2e43..15e3050edeb9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -393,9 +393,6 @@  struct intel_plane_state {
 	int scaler_id;
 
 	struct drm_intel_sprite_colorkey ckey;
-
-	/* async flip related structures */
-	struct drm_i915_gem_request *wait_req;
 };
 
 struct intel_initial_plane_config {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03725fe89859..93ad6f9d2496 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -860,39 +860,6 @@  struct drm_i915_gem_busy {
 	 * long as no new GPU commands are executed upon it). Due to the
 	 * asynchronous nature of the hardware, an object reported
 	 * as busy may become idle before the ioctl is completed.
-	 *
-	 * Furthermore, if the object is busy, which engine is busy is only
-	 * provided as a guide. There are race conditions which prevent the
-	 * report of which engines are busy from being always accurate.
-	 * However, the converse is not true. If the object is idle, the
-	 * result of the ioctl, that all engines are idle, is accurate.
-	 *
-	 * The returned dword is split into two fields to indicate both
-	 * the engines on which the object is being read, and the
-	 * engine on which it is currently being written (if any).
-	 *
-	 * The low word (bits 0:15) indicate if the object is being written
-	 * to by any engine (there can only be one, as the GEM implicit
-	 * synchronisation rules force writes to be serialised). Only the
-	 * engine for the last write is reported.
-	 *
-	 * The high word (bits 16:31) are a bitmask of which engines are
-	 * currently reading from the object. Multiple engines may be
-	 * reading from the object simultaneously.
-	 *
-	 * The value of each engine is the same as specified in the
-	 * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
-	 * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
-	 * the I915_EXEC_RENDER engine for execution, and so it is never
-	 * reported as active itself. Some hardware may have parallel
-	 * execution engines, e.g. multiple media engines, which are
-	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
-	 * so are not separately reported for busyness.
-	 *
-	 * Caveat emptor:
-	 * Only the boolean result of this query is reliable; that is whether
-	 * the object is idle or busy. The report of which engines are busy
-	 * should be only used as a heuristic.
 	 */
 	__u32 busy;
 };