diff mbox series

[27/66] drm/i915/gem: Pull execbuf dma resv under a single critical section

Message ID 20200715115147.11866-27-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/66] drm/i915: Reduce i915_request.lock contention for i915_request_wait | expand

Commit Message

Chris Wilson July 15, 2020, 11:51 a.m. UTC
Acquire all the objects and their backing storage, and page directories,
as used by execbuf under a single common ww_mutex. Albeit we have to
restart the critical section a few times in order to handle various
restrictions (such as avoiding copy_(from|to)_user and mmap_sem).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 168 +++++++++---------
 .../i915/gem/selftests/i915_gem_execbuffer.c  |   8 +-
 2 files changed, 87 insertions(+), 89 deletions(-)

Comments

Thomas Hellström (Intel) July 27, 2020, 6:08 p.m. UTC | #1
On 7/15/20 1:51 PM, Chris Wilson wrote:
> Acquire all the objects and their backing storage, and page directories,
> as used by execbuf under a single common ww_mutex. Albeit we have to
> restart the critical section a few times in order to handle various
> restrictions (such as avoiding copy_(from|to)_user and mmap_sem).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 168 +++++++++---------
>   .../i915/gem/selftests/i915_gem_execbuffer.c  |   8 +-
>   2 files changed, 87 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index ebabc0746d50..db433f3f18ec 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -20,6 +20,7 @@
>   #include "gt/intel_gt_pm.h"
>   #include "gt/intel_gt_requests.h"
>   #include "gt/intel_ring.h"
> +#include "mm/i915_acquire_ctx.h"
>   
>   #include "i915_drv.h"
>   #include "i915_gem_clflush.h"
> @@ -244,6 +245,8 @@ struct i915_execbuffer {
>   	struct intel_context *context; /* logical state for the request */
>   	struct i915_gem_context *gem_context; /** caller's context */
>   
> +	struct i915_acquire_ctx acquire; /** lock for _all_ DMA reservations */
> +
>   	struct i915_request *request; /** our request to build */
>   	struct eb_vma *batch; /** identity of the batch obj/vma */
>   
> @@ -389,42 +392,6 @@ static void eb_vma_array_put(struct eb_vma_array *arr)
>   	kref_put(&arr->kref, eb_vma_array_destroy);
>   }
>   
> -static int
> -eb_lock_vma(struct i915_execbuffer *eb, struct ww_acquire_ctx *acquire)
> -{
> -	struct eb_vma *ev;
> -	int err = 0;
> -
> -	list_for_each_entry(ev, &eb->submit_list, submit_link) {
> -		struct i915_vma *vma = ev->vma;
> -
> -		err = ww_mutex_lock_interruptible(&vma->resv->lock, acquire);
> -		if (err == -EDEADLK) {
> -			struct eb_vma *unlock = ev, *en;
> -
> -			list_for_each_entry_safe_continue_reverse(unlock, en,
> -								  &eb->submit_list,
> -								  submit_link) {
> -				ww_mutex_unlock(&unlock->vma->resv->lock);
> -				list_move_tail(&unlock->submit_link, &eb->submit_list);
> -			}
> -
> -			GEM_BUG_ON(!list_is_first(&ev->submit_link, &eb->submit_list));
> -			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
> -							       acquire);
> -		}
> -		if (err) {
> -			list_for_each_entry_continue_reverse(ev,
> -							     &eb->submit_list,
> -							     submit_link)
> -				ww_mutex_unlock(&ev->vma->resv->lock);
> -			break;
> -		}
> -	}
> -
> -	return err;
> -}
> -
>   static int eb_create(struct i915_execbuffer *eb)
>   {
>   	/* Allocate an extra slot for use by the sentinel */
> @@ -668,6 +635,25 @@ eb_add_vma(struct i915_execbuffer *eb,
>   	}
>   }
>   
> +static int eb_lock_mm(struct i915_execbuffer *eb)
> +{
> +	struct eb_vma *ev;
> +	int err;
> +
> +	list_for_each_entry(ev, &eb->bind_list, bind_link) {
> +		err = i915_acquire_ctx_lock(&eb->acquire, ev->vma->obj);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static int eb_acquire_mm(struct i915_execbuffer *eb)
> +{
> +	return i915_acquire_mm(&eb->acquire);
> +}
> +
>   struct eb_vm_work {
>   	struct dma_fence_work base;
>   	struct eb_vma_array *array;
> @@ -1390,7 +1376,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   	unsigned long count;
>   	struct eb_vma *ev;
>   	unsigned int pass;
> -	int err = 0;
> +	int err;
> +
> +	err = eb_lock_mm(eb);
> +	if (err)
> +		return err;
> +
> +	err = eb_acquire_mm(eb);
> +	if (err)
> +		return err;
>   
>   	count = 0;
>   	INIT_LIST_HEAD(&unbound);
> @@ -1416,10 +1410,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   	if (count == 0)
>   		return 0;
>   
> +	/* We need to reserve page directories, release all, start over */
> +	i915_acquire_ctx_fini(&eb->acquire);
> +
>   	pass = 0;
>   	do {
>   		struct eb_vm_work *work;
>   
> +		i915_acquire_ctx_init(&eb->acquire);

Couldn't we do a i915_acquire_ctx_rollback() here to avoid losing our 
ticket? That would mean deferring i915_acquire_ctx_done() until all 
potential rollbacks have been performed.

Or even better if we defer _ctx_done(), couldn't we just continue 
locking the pts here instead of dropping and re-acquiring everything?

> +
>   		/*
>   		 * We need to hold one lock as we bind all the vma so that
>   		 * we have a consistent view of the entire vm and can plan
> @@ -1436,6 +1435,11 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>   		 * beneath it, so we have to stage and preallocate all the
>   		 * resources we may require before taking the mutex.
>   		 */
> +
> +		err = eb_lock_mm(eb);
> +		if (err)
> +			return err;
Chris Wilson July 28, 2020, 3:16 p.m. UTC | #2
Quoting Thomas Hellström (Intel) (2020-07-27 19:08:39)
> 
> On 7/15/20 1:51 PM, Chris Wilson wrote:
> > Acquire all the objects and their backing storage, and page directories,
> > as used by execbuf under a single common ww_mutex. Albeit we have to
> > restart the critical section a few times in order to handle various
> > restrictions (such as avoiding copy_(from|to)_user and mmap_sem).
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 168 +++++++++---------
> >   .../i915/gem/selftests/i915_gem_execbuffer.c  |   8 +-
> >   2 files changed, 87 insertions(+), 89 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index ebabc0746d50..db433f3f18ec 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -20,6 +20,7 @@
> >   #include "gt/intel_gt_pm.h"
> >   #include "gt/intel_gt_requests.h"
> >   #include "gt/intel_ring.h"
> > +#include "mm/i915_acquire_ctx.h"
> >   
> >   #include "i915_drv.h"
> >   #include "i915_gem_clflush.h"
> > @@ -244,6 +245,8 @@ struct i915_execbuffer {
> >       struct intel_context *context; /* logical state for the request */
> >       struct i915_gem_context *gem_context; /** caller's context */
> >   
> > +     struct i915_acquire_ctx acquire; /** lock for _all_ DMA reservations */
> > +
> >       struct i915_request *request; /** our request to build */
> >       struct eb_vma *batch; /** identity of the batch obj/vma */
> >   
> > @@ -389,42 +392,6 @@ static void eb_vma_array_put(struct eb_vma_array *arr)
> >       kref_put(&arr->kref, eb_vma_array_destroy);
> >   }
> >   
> > -static int
> > -eb_lock_vma(struct i915_execbuffer *eb, struct ww_acquire_ctx *acquire)
> > -{
> > -     struct eb_vma *ev;
> > -     int err = 0;
> > -
> > -     list_for_each_entry(ev, &eb->submit_list, submit_link) {
> > -             struct i915_vma *vma = ev->vma;
> > -
> > -             err = ww_mutex_lock_interruptible(&vma->resv->lock, acquire);
> > -             if (err == -EDEADLK) {
> > -                     struct eb_vma *unlock = ev, *en;
> > -
> > -                     list_for_each_entry_safe_continue_reverse(unlock, en,
> > -                                                               &eb->submit_list,
> > -                                                               submit_link) {
> > -                             ww_mutex_unlock(&unlock->vma->resv->lock);
> > -                             list_move_tail(&unlock->submit_link, &eb->submit_list);
> > -                     }
> > -
> > -                     GEM_BUG_ON(!list_is_first(&ev->submit_link, &eb->submit_list));
> > -                     err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
> > -                                                            acquire);
> > -             }
> > -             if (err) {
> > -                     list_for_each_entry_continue_reverse(ev,
> > -                                                          &eb->submit_list,
> > -                                                          submit_link)
> > -                             ww_mutex_unlock(&ev->vma->resv->lock);
> > -                     break;
> > -             }
> > -     }
> > -
> > -     return err;
> > -}
> > -
> >   static int eb_create(struct i915_execbuffer *eb)
> >   {
> >       /* Allocate an extra slot for use by the sentinel */
> > @@ -668,6 +635,25 @@ eb_add_vma(struct i915_execbuffer *eb,
> >       }
> >   }
> >   
> > +static int eb_lock_mm(struct i915_execbuffer *eb)
> > +{
> > +     struct eb_vma *ev;
> > +     int err;
> > +
> > +     list_for_each_entry(ev, &eb->bind_list, bind_link) {
> > +             err = i915_acquire_ctx_lock(&eb->acquire, ev->vma->obj);
> > +             if (err)
> > +                     return err;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int eb_acquire_mm(struct i915_execbuffer *eb)
> > +{
> > +     return i915_acquire_mm(&eb->acquire);
> > +}
> > +
> >   struct eb_vm_work {
> >       struct dma_fence_work base;
> >       struct eb_vma_array *array;
> > @@ -1390,7 +1376,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
> >       unsigned long count;
> >       struct eb_vma *ev;
> >       unsigned int pass;
> > -     int err = 0;
> > +     int err;
> > +
> > +     err = eb_lock_mm(eb);
> > +     if (err)
> > +             return err;
> > +
> > +     err = eb_acquire_mm(eb);
> > +     if (err)
> > +             return err;
> >   
> >       count = 0;
> >       INIT_LIST_HEAD(&unbound);
> > @@ -1416,10 +1410,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
> >       if (count == 0)
> >               return 0;
> >   
> > +     /* We need to reserve page directories, release all, start over */
> > +     i915_acquire_ctx_fini(&eb->acquire);
> > +
> >       pass = 0;
> >       do {
> >               struct eb_vm_work *work;
> >   
> > +             i915_acquire_ctx_init(&eb->acquire);
> 
> Couldn't we do a i915_acquire_ctx_rollback() here to avoid losing our 
> ticket? That would mean deferring i915_acquire_ctx_done() until all 
> potential rollbacks have been performed.

We need to completely drop the acquire-class for catching up with userptr
(and anything else deferred that doesn't meet the current fence semantics).

I thought it was sensible to drop all around the waits in this loop, and
tidier to always reacquire at the beginning of each loop.

> Or even better if we defer _ctx_done(), couldn't we just continue 
> locking the pts here instead of dropping and re-acquiring everything?

Userptr would like to have word. If you just mean these do lines, then
yes fini/init is overkill -- it just looked simpler than doing it at the
end of the loop. The steady-state load is not meant to get past the
optimistic fastpath.
-Chris
Thomas Hellström (Intel) July 30, 2020, 12:57 p.m. UTC | #3
On 7/28/20 5:16 PM, Chris Wilson wrote:
> Quoting Thomas Hellström (Intel) (2020-07-27 19:08:39)
>> On 7/15/20 1:51 PM, Chris Wilson wrote:
>>> Acquire all the objects and their backing storage, and page directories,
>>> as used by execbuf under a single common ww_mutex. Albeit we have to
>>> restart the critical section a few times in order to handle various
>>> restrictions (such as avoiding copy_(from|to)_user and mmap_sem).
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 168 +++++++++---------
>>>    .../i915/gem/selftests/i915_gem_execbuffer.c  |   8 +-
>>>    2 files changed, 87 insertions(+), 89 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>>> index ebabc0746d50..db433f3f18ec 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>>> @@ -20,6 +20,7 @@
>>>    #include "gt/intel_gt_pm.h"
>>>    #include "gt/intel_gt_requests.h"
>>>    #include "gt/intel_ring.h"
>>> +#include "mm/i915_acquire_ctx.h"
>>>    
>>>    #include "i915_drv.h"
>>>    #include "i915_gem_clflush.h"
>>> @@ -244,6 +245,8 @@ struct i915_execbuffer {
>>>        struct intel_context *context; /* logical state for the request */
>>>        struct i915_gem_context *gem_context; /** caller's context */
>>>    
>>> +     struct i915_acquire_ctx acquire; /** lock for _all_ DMA reservations */
>>> +
>>>        struct i915_request *request; /** our request to build */
>>>        struct eb_vma *batch; /** identity of the batch obj/vma */
>>>    
>>> @@ -389,42 +392,6 @@ static void eb_vma_array_put(struct eb_vma_array *arr)
>>>        kref_put(&arr->kref, eb_vma_array_destroy);
>>>    }
>>>    
>>> -static int
>>> -eb_lock_vma(struct i915_execbuffer *eb, struct ww_acquire_ctx *acquire)
>>> -{
>>> -     struct eb_vma *ev;
>>> -     int err = 0;
>>> -
>>> -     list_for_each_entry(ev, &eb->submit_list, submit_link) {
>>> -             struct i915_vma *vma = ev->vma;
>>> -
>>> -             err = ww_mutex_lock_interruptible(&vma->resv->lock, acquire);
>>> -             if (err == -EDEADLK) {
>>> -                     struct eb_vma *unlock = ev, *en;
>>> -
>>> -                     list_for_each_entry_safe_continue_reverse(unlock, en,
>>> -                                                               &eb->submit_list,
>>> -                                                               submit_link) {
>>> -                             ww_mutex_unlock(&unlock->vma->resv->lock);
>>> -                             list_move_tail(&unlock->submit_link, &eb->submit_list);
>>> -                     }
>>> -
>>> -                     GEM_BUG_ON(!list_is_first(&ev->submit_link, &eb->submit_list));
>>> -                     err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
>>> -                                                            acquire);
>>> -             }
>>> -             if (err) {
>>> -                     list_for_each_entry_continue_reverse(ev,
>>> -                                                          &eb->submit_list,
>>> -                                                          submit_link)
>>> -                             ww_mutex_unlock(&ev->vma->resv->lock);
>>> -                     break;
>>> -             }
>>> -     }
>>> -
>>> -     return err;
>>> -}
>>> -
>>>    static int eb_create(struct i915_execbuffer *eb)
>>>    {
>>>        /* Allocate an extra slot for use by the sentinel */
>>> @@ -668,6 +635,25 @@ eb_add_vma(struct i915_execbuffer *eb,
>>>        }
>>>    }
>>>    
>>> +static int eb_lock_mm(struct i915_execbuffer *eb)
>>> +{
>>> +     struct eb_vma *ev;
>>> +     int err;
>>> +
>>> +     list_for_each_entry(ev, &eb->bind_list, bind_link) {
>>> +             err = i915_acquire_ctx_lock(&eb->acquire, ev->vma->obj);
>>> +             if (err)
>>> +                     return err;
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static int eb_acquire_mm(struct i915_execbuffer *eb)
>>> +{
>>> +     return i915_acquire_mm(&eb->acquire);
>>> +}
>>> +
>>>    struct eb_vm_work {
>>>        struct dma_fence_work base;
>>>        struct eb_vma_array *array;
>>> @@ -1390,7 +1376,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>>>        unsigned long count;
>>>        struct eb_vma *ev;
>>>        unsigned int pass;
>>> -     int err = 0;
>>> +     int err;
>>> +
>>> +     err = eb_lock_mm(eb);
>>> +     if (err)
>>> +             return err;
>>> +
>>> +     err = eb_acquire_mm(eb);
>>> +     if (err)
>>> +             return err;
>>>    
>>>        count = 0;
>>>        INIT_LIST_HEAD(&unbound);
>>> @@ -1416,10 +1410,15 @@ static int eb_reserve_vm(struct i915_execbuffer *eb)
>>>        if (count == 0)
>>>                return 0;
>>>    
>>> +     /* We need to reserve page directories, release all, start over */
>>> +     i915_acquire_ctx_fini(&eb->acquire);
>>> +
>>>        pass = 0;
>>>        do {
>>>                struct eb_vm_work *work;
>>>    
>>> +             i915_acquire_ctx_init(&eb->acquire);
>> Couldn't we do a i915_acquire_ctx_rollback() here to avoid losing our
>> ticket? That would mean deferring i915_acquire_ctx_done() until all
>> potential rollbacks have been performed.
> We need to completely drop the acquire-class for catching up with userptr
> (and anything else deferred that doesn't meet the current fence semantics).
Hmm, what other deferrered stuff are we doing that doesn't meet the 
current fence semantics? (Which I interpret as "everything deferred that 
we spawn during a CS needs to be either synced before we publish the 
fence or considered part of the fence signaling critical path")
>
> I thought it was sensible to drop all around the waits in this loop, and
> tidier to always reacquire at the beginning of each loop.
>
>> Or even better if we defer _ctx_done(), couldn't we just continue
>> locking the pts here instead of dropping and re-acquiring everything?
> Userptr would like to have word. If you just mean these do lines, then
> yes fini/init is overkill -- it just looked simpler than doing it at the
> end of the loop. The steady-state load is not meant to get past the
> optimistic fastpath.

Indeed complicating the code to not drop these locks outside of the 
faspath doesn't sound like a good idea.

However, if we drop the acquire ctx we might be subject to starving, and 
I figure waiting for userptr would still work as long as we drop all the 
locks.

> -Chris

/Thomas
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index ebabc0746d50..db433f3f18ec 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -20,6 +20,7 @@ 
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_requests.h"
 #include "gt/intel_ring.h"
+#include "mm/i915_acquire_ctx.h"
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
@@ -244,6 +245,8 @@  struct i915_execbuffer {
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
 
+	struct i915_acquire_ctx acquire; /** lock for _all_ DMA reservations */
+
 	struct i915_request *request; /** our request to build */
 	struct eb_vma *batch; /** identity of the batch obj/vma */
 
@@ -389,42 +392,6 @@  static void eb_vma_array_put(struct eb_vma_array *arr)
 	kref_put(&arr->kref, eb_vma_array_destroy);
 }
 
-static int
-eb_lock_vma(struct i915_execbuffer *eb, struct ww_acquire_ctx *acquire)
-{
-	struct eb_vma *ev;
-	int err = 0;
-
-	list_for_each_entry(ev, &eb->submit_list, submit_link) {
-		struct i915_vma *vma = ev->vma;
-
-		err = ww_mutex_lock_interruptible(&vma->resv->lock, acquire);
-		if (err == -EDEADLK) {
-			struct eb_vma *unlock = ev, *en;
-
-			list_for_each_entry_safe_continue_reverse(unlock, en,
-								  &eb->submit_list,
-								  submit_link) {
-				ww_mutex_unlock(&unlock->vma->resv->lock);
-				list_move_tail(&unlock->submit_link, &eb->submit_list);
-			}
-
-			GEM_BUG_ON(!list_is_first(&ev->submit_link, &eb->submit_list));
-			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
-							       acquire);
-		}
-		if (err) {
-			list_for_each_entry_continue_reverse(ev,
-							     &eb->submit_list,
-							     submit_link)
-				ww_mutex_unlock(&ev->vma->resv->lock);
-			break;
-		}
-	}
-
-	return err;
-}
-
 static int eb_create(struct i915_execbuffer *eb)
 {
 	/* Allocate an extra slot for use by the sentinel */
@@ -668,6 +635,25 @@  eb_add_vma(struct i915_execbuffer *eb,
 	}
 }
 
+static int eb_lock_mm(struct i915_execbuffer *eb)
+{
+	struct eb_vma *ev;
+	int err;
+
+	list_for_each_entry(ev, &eb->bind_list, bind_link) {
+		err = i915_acquire_ctx_lock(&eb->acquire, ev->vma->obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int eb_acquire_mm(struct i915_execbuffer *eb)
+{
+	return i915_acquire_mm(&eb->acquire);
+}
+
 struct eb_vm_work {
 	struct dma_fence_work base;
 	struct eb_vma_array *array;
@@ -1390,7 +1376,15 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 	unsigned long count;
 	struct eb_vma *ev;
 	unsigned int pass;
-	int err = 0;
+	int err;
+
+	err = eb_lock_mm(eb);
+	if (err)
+		return err;
+
+	err = eb_acquire_mm(eb);
+	if (err)
+		return err;
 
 	count = 0;
 	INIT_LIST_HEAD(&unbound);
@@ -1416,10 +1410,15 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 	if (count == 0)
 		return 0;
 
+	/* We need to reserve page directories, release all, start over */
+	i915_acquire_ctx_fini(&eb->acquire);
+
 	pass = 0;
 	do {
 		struct eb_vm_work *work;
 
+		i915_acquire_ctx_init(&eb->acquire);
+
 		/*
 		 * We need to hold one lock as we bind all the vma so that
 		 * we have a consistent view of the entire vm and can plan
@@ -1436,6 +1435,11 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 		 * beneath it, so we have to stage and preallocate all the
 		 * resources we may require before taking the mutex.
 		 */
+
+		err = eb_lock_mm(eb);
+		if (err)
+			return err;
+
 		work = eb_vm_work(eb, count);
 		if (!work)
 			return -ENOMEM;
@@ -1453,6 +1457,10 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 			}
 		}
 
+		err = eb_acquire_mm(eb);
+		if (err)
+			return eb_vm_work_cancel(work, err);
+
 		err = i915_vm_pin_pt_stash(work->vm, &work->stash);
 		if (err)
 			return eb_vm_work_cancel(work, err);
@@ -1543,6 +1551,8 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 		if (signal_pending(current))
 			return -EINTR;
 
+		i915_acquire_ctx_fini(&eb->acquire);
+
 		/* Now safe to wait with no reservations held */
 
 		if (err == -EAGAIN) {
@@ -1566,8 +1576,10 @@  static int eb_reserve_vm(struct i915_execbuffer *eb)
 		 * total ownership of the vm.
 		 */
 		err = wait_for_unbinds(eb, &unbound, pass++);
-		if (err)
+		if (err) {
+			i915_acquire_ctx_init(&eb->acquire);
 			return err;
+		}
 	} while (1);
 }
 
@@ -1994,8 +2006,6 @@  static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	struct drm_i915_gem_object *obj = vma->obj;
 	int err;
 
-	i915_vma_lock(vma);
-
 	if (obj->cache_dirty & ~obj->cache_coherent)
 		i915_gem_clflush_object(obj, 0);
 	obj->write_domain = 0;
@@ -2004,8 +2014,6 @@  static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
-	i915_vma_unlock(vma);
-
 	return err;
 }
 
@@ -2334,11 +2342,9 @@  get_gpu_relocs(struct i915_execbuffer *eb,
 		int err;
 
 		GEM_BUG_ON(!vma);
-		i915_vma_lock(vma);
 		err = i915_request_await_object(rq, vma->obj, false);
 		if (err == 0)
 			err = i915_vma_move_to_active(vma, rq, 0);
-		i915_vma_unlock(vma);
 		if (err)
 			return ERR_PTR(err);
 
@@ -2470,6 +2476,7 @@  static int eb_relocate(struct i915_execbuffer *eb)
 	/* Drop everything before we copy_from_user */
 	list_for_each_entry(ev, &eb->bind_list, bind_link)
 		eb_unreserve_vma(ev);
+	i915_acquire_ctx_fini(&eb->acquire);
 
 	/* Pick a single buffer for all relocs, within reason */
 	c->bufsz *= sizeof(struct drm_i915_gem_relocation_entry);
@@ -2482,6 +2489,7 @@  static int eb_relocate(struct i915_execbuffer *eb)
 
 	/* Copy the user's relocations into plain system memory */
 	err = eb_relocs_copy_user(eb);
+	i915_acquire_ctx_init(&eb->acquire);
 	if (err)
 		return err;
 
@@ -2517,17 +2525,8 @@  static int eb_reserve(struct i915_execbuffer *eb)
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
-	struct ww_acquire_ctx acquire;
 	struct eb_vma *ev;
-	int err = 0;
-
-	ww_acquire_init(&acquire, &reservation_ww_class);
-
-	err = eb_lock_vma(eb, &acquire);
-	if (err)
-		goto err_fini;
-
-	ww_acquire_done(&acquire);
+	int err;
 
 	list_for_each_entry(ev, &eb->submit_list, submit_link) {
 		struct i915_vma *vma = ev->vma;
@@ -2566,27 +2565,22 @@  static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+		if (!(flags & EXEC_OBJECT_ASYNC)) {
 			err = i915_request_await_object
 				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
+			if (unlikely(err))
+				goto err_skip;
 		}
 
-		if (err == 0)
-			err = i915_vma_move_to_active(vma, eb->request, flags);
-
-		i915_vma_unlock(vma);
+		err = i915_vma_move_to_active(vma, eb->request, flags);
+		if (unlikely(err))
+			goto err_skip;
 	}
-	ww_acquire_fini(&acquire);
-
-	if (unlikely(err))
-		goto err_skip;
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	intel_gt_chipset_flush(eb->engine->gt);
 	return 0;
 
-err_fini:
-	ww_acquire_fini(&acquire);
 err_skip:
 	i915_request_set_error_once(eb->request, err);
 	return err;
@@ -2749,39 +2743,27 @@  static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	/* Mark active refs early for this worker, in case we get interrupted */
 	err = parser_mark_active(pw, eb->context->timeline);
 	if (err)
-		goto err_commit;
-
-	err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
-	if (err)
-		goto err_commit;
+		goto out;
 
 	err = dma_resv_reserve_shared(pw->batch->resv, 1);
 	if (err)
-		goto err_commit_unlock;
+		goto out;
 
 	/* Wait for all writes (and relocs) into the batch to complete */
 	err = i915_sw_fence_await_reservation(&pw->base.chain,
 					      pw->batch->resv, NULL, false,
 					      0, I915_FENCE_GFP);
 	if (err < 0)
-		goto err_commit_unlock;
+		goto out;
 
 	/* Keep the batch alive and unwritten as we parse */
 	dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
 
-	dma_resv_unlock(pw->batch->resv);
-
 	/* Force execution to wait for completion of the parser */
-	dma_resv_lock(shadow->resv, NULL);
 	dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-	dma_resv_unlock(shadow->resv);
 
-	dma_fence_work_commit_imm(&pw->base);
-	return 0;
-
-err_commit_unlock:
-	dma_resv_unlock(pw->batch->resv);
-err_commit:
+	err = 0;
+out:
 	i915_sw_fence_set_error_once(&pw->base.chain, err);
 	dma_fence_work_commit_imm(&pw->base);
 	return err;
@@ -2833,10 +2815,6 @@  static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
 
-	err = eb_move_to_gpu(eb);
-	if (err)
-		return err;
-
 	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		err = i915_reset_gen7_sol_offsets(eb->request);
 		if (err)
@@ -3420,6 +3398,9 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_engine;
 	lockdep_assert_held(&eb.context->timeline->mutex);
 
+	/* *** DMA-RESV LOCK *** */
+	i915_acquire_ctx_init(&eb.acquire);
+
 	err = eb_reserve(&eb);
 	if (err) {
 		/*
@@ -3433,6 +3414,8 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_vma;
 	}
 
+	/* *** DMA-RESV SEALED *** */
+
 	err = eb_parse(&eb);
 	if (err)
 		goto err_vma;
@@ -3483,9 +3466,20 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 		intel_gt_buffer_pool_mark_active(eb.parser.shadow->vma->private,
 						 eb.request);
 
+	err = eb_move_to_gpu(&eb);
+	if (err)
+		goto err_request;
+
+	/* *** DMA-RESV PUBLISHED *** */
+
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
+
 err_request:
+	i915_acquire_ctx_fini(&eb.acquire);
+	eb.acquire.locked = ERR_PTR(-1);
+	/* *** DMA-RESV UNLOCK *** */
+
 	i915_request_get(eb.request);
 	eb_request_add(&eb);
 
@@ -3496,6 +3490,8 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_request_put(eb.request);
 
 err_vma:
+	if (eb.acquire.locked != ERR_PTR(-1))
+		i915_acquire_ctx_fini(&eb.acquire);
 	eb_unlock_engine(&eb);
 	/* *** TIMELINE UNLOCK *** */
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index 8776f2750fa7..57181718acb1 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -101,11 +101,13 @@  static int __igt_gpu_reloc(struct i915_execbuffer *eb, struct eb_vma *ev)
 		return err;
 	ev->exec->relocation_count = err;
 
+	i915_acquire_ctx_init(&eb->acquire);
+
 	err = eb_reserve_vm(eb);
-	if (err)
-		return err;
+	if (err == 0)
+		err = eb_relocs_gpu(eb);
 
-	err = eb_relocs_gpu(eb);
+	i915_acquire_ctx_fini(&eb->acquire);
 	if (err)
 		return err;