diff mbox series

[14/24] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex

Message ID 20200810103103.303818-15-maarten.lankhorst@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Correct the locking hierarchy in gem. | expand

Commit Message

Maarten Lankhorst Aug. 10, 2020, 10:30 a.m. UTC
Instead of doing everything inside of pin_mutex, we move all pinning
outside. Because i915_active has its own reference counting and
pinning is also having the same issues vs mutexes, we make sure
everything is pinned first, so the pinning in i915_active only needs
to bump refcounts. This allows us to take pin refcounts correctly
all the time.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 232 +++++++++++-------
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  34 ++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |  13 +-
 5 files changed, 190 insertions(+), 106 deletions(-)

Comments

Thomas Hellström (Intel) Aug. 12, 2020, 7:14 p.m. UTC | #1
On 8/10/20 12:30 PM, Maarten Lankhorst wrote:
> Instead of doing everything inside of pin_mutex, we move all pinning
> outside. Because i915_active has its own reference counting and
> pinning is also having the same issues vs mutexes, we make sure
> everything is pinned first, so the pinning in i915_active only needs
> to bump refcounts. This allows us to take pin refcounts correctly
> all the time.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       | 232 +++++++++++-------
>   drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  34 ++-
>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
>   drivers/gpu/drm/i915/gt/mock_engine.c         |  13 +-
>   5 files changed, 190 insertions(+), 106 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 52db2bde44a3..efe9a7a89ede 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -93,79 +93,6 @@ static void intel_context_active_release(struct intel_context *ce)
>   	i915_active_release(&ce->active);
>   }
>   
> -int __intel_context_do_pin(struct intel_context *ce)
> -{
> -	int err;
> -
> -	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
> -		err = intel_context_alloc_state(ce);
> -		if (err)
> -			return err;
> -	}
> -
> -	err = i915_active_acquire(&ce->active);
> -	if (err)
> -		return err;
> -
> -	if (mutex_lock_interruptible(&ce->pin_mutex)) {
> -		err = -EINTR;
> -		goto out_release;
> -	}
> -
> -	if (unlikely(intel_context_is_closed(ce))) {
> -		err = -ENOENT;
> -		goto out_unlock;
> -	}
> -
> -	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
> -		err = intel_context_active_acquire(ce);
> -		if (unlikely(err))
> -			goto out_unlock;
> -
> -		err = ce->ops->pin(ce);
> -		if (unlikely(err))
> -			goto err_active;
> -
> -		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
> -			 i915_ggtt_offset(ce->ring->vma),
> -			 ce->ring->head, ce->ring->tail);
> -
> -		smp_mb__before_atomic(); /* flush pin before it is visible */
> -		atomic_inc(&ce->pin_count);
> -	}
> -
> -	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
> -	GEM_BUG_ON(i915_active_is_idle(&ce->active));
> -	goto out_unlock;
> -
> -err_active:
> -	intel_context_active_release(ce);
> -out_unlock:
> -	mutex_unlock(&ce->pin_mutex);
> -out_release:
> -	i915_active_release(&ce->active);
> -	return err;
> -}
> -
> -void intel_context_unpin(struct intel_context *ce)
> -{
> -	if (!atomic_dec_and_test(&ce->pin_count))
> -		return;
> -
> -	CE_TRACE(ce, "unpin\n");
> -	ce->ops->unpin(ce);
> -
> -	/*
> -	 * Once released, we may asynchronously drop the active reference.
> -	 * As that may be the only reference keeping the context alive,
> -	 * take an extra now so that it is not freed before we finish
> -	 * dereferencing it.
> -	 */
> -	intel_context_get(ce);
> -	intel_context_active_release(ce);
> -	intel_context_put(ce);
> -}
> -
>   static int __context_pin_state(struct i915_vma *vma)
>   {
>   	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
> @@ -225,6 +152,138 @@ static void __ring_retire(struct intel_ring *ring)
>   	intel_ring_unpin(ring);
>   }
>   
> +static int intel_context_pre_pin(struct intel_context *ce)
> +{
> +	int err;
> +
> +	CE_TRACE(ce, "active\n");
> +
> +	err = __ring_active(ce->ring);
> +	if (err)
> +		return err;
> +
> +	err = intel_timeline_pin(ce->timeline);
> +	if (err)
> +		goto err_ring;
> +
> +	if (!ce->state)
> +		return 0;
> +
> +	err = __context_pin_state(ce->state);
> +	if (err)
> +		goto err_timeline;
> +
> +
> +	return 0;
> +
> +err_timeline:
> +	intel_timeline_unpin(ce->timeline);
> +err_ring:
> +	__ring_retire(ce->ring);
> +	return err;
> +}
> +
> +static void intel_context_post_unpin(struct intel_context *ce)
> +{
> +	if (ce->state)
> +		__context_unpin_state(ce->state);
> +
> +	intel_timeline_unpin(ce->timeline);
> +	__ring_retire(ce->ring);
> +}
> +
> +int __intel_context_do_pin(struct intel_context *ce)
> +{
> +	bool handoff = false;
> +	void *vaddr;
> +	int err = 0;
> +
> +	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
> +		err = intel_context_alloc_state(ce);
> +		if (err)
> +			return err;
> +	}
> +
> +	/*
> +	 * We always pin the context/ring/timeline here, to ensure a pin
> +	 * refcount for __intel_context_active(), which prevent a lock
> +	 * inversion of ce->pin_mutex vs dma_resv_lock().
> +	 */
> +	err = intel_context_pre_pin(ce);
> +	if (err)
> +		return err;
> +
> +	err = i915_active_acquire(&ce->active);
> +	if (err)
> +		goto err_ctx_unpin;
> +
> +	err = ce->ops->pre_pin(ce, &vaddr);
> +	if (err)
> +		goto err_release;
> +
> +	err = mutex_lock_interruptible(&ce->pin_mutex);
> +	if (err)
> +		goto err_post_unpin;
> +
> +	if (unlikely(intel_context_is_closed(ce))) {
> +		err = -ENOENT;
> +		goto err_unlock;
> +	}
> +
> +	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
> +		err = intel_context_active_acquire(ce);
> +		if (unlikely(err))
> +			goto err_unlock;
> +
> +		err = ce->ops->pin(ce, vaddr);
> +		if (err) {
> +			intel_context_active_release(ce);
> +			goto err_unlock;
> +		}
> +
> +		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
> +			 i915_ggtt_offset(ce->ring->vma),
> +			 ce->ring->head, ce->ring->tail);
> +
> +		handoff = true;
> +		smp_mb__before_atomic(); /* flush pin before it is visible */
> +		atomic_inc(&ce->pin_count);
> +	}
> +
> +	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
> +
> +err_unlock:
> +	mutex_unlock(&ce->pin_mutex);
> +err_post_unpin:
> +	if (!handoff)
> +		ce->ops->post_unpin(ce);
> +err_release:
> +	i915_active_release(&ce->active);
> +err_ctx_unpin:
> +	intel_context_post_unpin(ce);
> +	return err;
> +}
> +
> +void intel_context_unpin(struct intel_context *ce)
> +{
> +	if (!atomic_dec_and_test(&ce->pin_count))
> +		return;
> +
> +	CE_TRACE(ce, "unpin\n");
> +	ce->ops->unpin(ce);
> +	ce->ops->post_unpin(ce);

What's protecting ops->unpin() here, running concurrently with ops->pin 
in __intel_context_do_pin()? Do the ops functions have to implement 
their own locking if needed?

Otherwise LGTM

Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
Maarten Lankhorst Aug. 19, 2020, 10:38 a.m. UTC | #2
Op 12-08-2020 om 21:14 schreef Thomas Hellström (Intel):
>
> On 8/10/20 12:30 PM, Maarten Lankhorst wrote:
>> Instead of doing everything inside of pin_mutex, we move all pinning
>> outside. Because i915_active has its own reference counting and
>> pinning is also having the same issues vs mutexes, we make sure
>> everything is pinned first, so the pinning in i915_active only needs
>> to bump refcounts. This allows us to take pin refcounts correctly
>> all the time.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_context.c       | 232 +++++++++++-------
>>   drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
>>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  34 ++-
>>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
>>   drivers/gpu/drm/i915/gt/mock_engine.c         |  13 +-
>>   5 files changed, 190 insertions(+), 106 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
>> index 52db2bde44a3..efe9a7a89ede 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -93,79 +93,6 @@ static void intel_context_active_release(struct intel_context *ce)
>>       i915_active_release(&ce->active);
>>   }
>>   -int __intel_context_do_pin(struct intel_context *ce)
>> -{
>> -    int err;
>> -
>> -    if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
>> -        err = intel_context_alloc_state(ce);
>> -        if (err)
>> -            return err;
>> -    }
>> -
>> -    err = i915_active_acquire(&ce->active);
>> -    if (err)
>> -        return err;
>> -
>> -    if (mutex_lock_interruptible(&ce->pin_mutex)) {
>> -        err = -EINTR;
>> -        goto out_release;
>> -    }
>> -
>> -    if (unlikely(intel_context_is_closed(ce))) {
>> -        err = -ENOENT;
>> -        goto out_unlock;
>> -    }
>> -
>> -    if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
>> -        err = intel_context_active_acquire(ce);
>> -        if (unlikely(err))
>> -            goto out_unlock;
>> -
>> -        err = ce->ops->pin(ce);
>> -        if (unlikely(err))
>> -            goto err_active;
>> -
>> -        CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
>> -             i915_ggtt_offset(ce->ring->vma),
>> -             ce->ring->head, ce->ring->tail);
>> -
>> -        smp_mb__before_atomic(); /* flush pin before it is visible */
>> -        atomic_inc(&ce->pin_count);
>> -    }
>> -
>> -    GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
>> -    GEM_BUG_ON(i915_active_is_idle(&ce->active));
>> -    goto out_unlock;
>> -
>> -err_active:
>> -    intel_context_active_release(ce);
>> -out_unlock:
>> -    mutex_unlock(&ce->pin_mutex);
>> -out_release:
>> -    i915_active_release(&ce->active);
>> -    return err;
>> -}
>> -
>> -void intel_context_unpin(struct intel_context *ce)
>> -{
>> -    if (!atomic_dec_and_test(&ce->pin_count))
>> -        return;
>> -
>> -    CE_TRACE(ce, "unpin\n");
>> -    ce->ops->unpin(ce);
>> -
>> -    /*
>> -     * Once released, we may asynchronously drop the active reference.
>> -     * As that may be the only reference keeping the context alive,
>> -     * take an extra now so that it is not freed before we finish
>> -     * dereferencing it.
>> -     */
>> -    intel_context_get(ce);
>> -    intel_context_active_release(ce);
>> -    intel_context_put(ce);
>> -}
>> -
>>   static int __context_pin_state(struct i915_vma *vma)
>>   {
>>       unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
>> @@ -225,6 +152,138 @@ static void __ring_retire(struct intel_ring *ring)
>>       intel_ring_unpin(ring);
>>   }
>>   +static int intel_context_pre_pin(struct intel_context *ce)
>> +{
>> +    int err;
>> +
>> +    CE_TRACE(ce, "active\n");
>> +
>> +    err = __ring_active(ce->ring);
>> +    if (err)
>> +        return err;
>> +
>> +    err = intel_timeline_pin(ce->timeline);
>> +    if (err)
>> +        goto err_ring;
>> +
>> +    if (!ce->state)
>> +        return 0;
>> +
>> +    err = __context_pin_state(ce->state);
>> +    if (err)
>> +        goto err_timeline;
>> +
>> +
>> +    return 0;
>> +
>> +err_timeline:
>> +    intel_timeline_unpin(ce->timeline);
>> +err_ring:
>> +    __ring_retire(ce->ring);
>> +    return err;
>> +}
>> +
>> +static void intel_context_post_unpin(struct intel_context *ce)
>> +{
>> +    if (ce->state)
>> +        __context_unpin_state(ce->state);
>> +
>> +    intel_timeline_unpin(ce->timeline);
>> +    __ring_retire(ce->ring);
>> +}
>> +
>> +int __intel_context_do_pin(struct intel_context *ce)
>> +{
>> +    bool handoff = false;
>> +    void *vaddr;
>> +    int err = 0;
>> +
>> +    if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
>> +        err = intel_context_alloc_state(ce);
>> +        if (err)
>> +            return err;
>> +    }
>> +
>> +    /*
>> +     * We always pin the context/ring/timeline here, to ensure a pin
>> +     * refcount for __intel_context_active(), which prevent a lock
>> +     * inversion of ce->pin_mutex vs dma_resv_lock().
>> +     */
>> +    err = intel_context_pre_pin(ce);
>> +    if (err)
>> +        return err;
>> +
>> +    err = i915_active_acquire(&ce->active);
>> +    if (err)
>> +        goto err_ctx_unpin;
>> +
>> +    err = ce->ops->pre_pin(ce, &vaddr);
>> +    if (err)
>> +        goto err_release;
>> +
>> +    err = mutex_lock_interruptible(&ce->pin_mutex);
>> +    if (err)
>> +        goto err_post_unpin;
>> +
>> +    if (unlikely(intel_context_is_closed(ce))) {
>> +        err = -ENOENT;
>> +        goto err_unlock;
>> +    }
>> +
>> +    if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
>> +        err = intel_context_active_acquire(ce);
>> +        if (unlikely(err))
>> +            goto err_unlock;
>> +
>> +        err = ce->ops->pin(ce, vaddr);
>> +        if (err) {
>> +            intel_context_active_release(ce);
>> +            goto err_unlock;
>> +        }
>> +
>> +        CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
>> +             i915_ggtt_offset(ce->ring->vma),
>> +             ce->ring->head, ce->ring->tail);
>> +
>> +        handoff = true;
>> +        smp_mb__before_atomic(); /* flush pin before it is visible */
>> +        atomic_inc(&ce->pin_count);
>> +    }
>> +
>> +    GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
>> +
>> +err_unlock:
>> +    mutex_unlock(&ce->pin_mutex);
>> +err_post_unpin:
>> +    if (!handoff)
>> +        ce->ops->post_unpin(ce);
>> +err_release:
>> +    i915_active_release(&ce->active);
>> +err_ctx_unpin:
>> +    intel_context_post_unpin(ce);
>> +    return err;
>> +}
>> +
>> +void intel_context_unpin(struct intel_context *ce)
>> +{
>> +    if (!atomic_dec_and_test(&ce->pin_count))
>> +        return;
>> +
>> +    CE_TRACE(ce, "unpin\n");
>> +    ce->ops->unpin(ce);
>> +    ce->ops->post_unpin(ce);
>
> What's protecting ops->unpin() here, running concurrently with ops->pin in __intel_context_do_pin()? Do the ops functions have to implement their own locking if needed?
>
> Otherwise LGTM
>
> Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
>
post_unpin can be run concurrently, unpin() for intel_lrc.c is check_redzone(), empty for legacy rings, should be fine. :)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 52db2bde44a3..efe9a7a89ede 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -93,79 +93,6 @@  static void intel_context_active_release(struct intel_context *ce)
 	i915_active_release(&ce->active);
 }
 
-int __intel_context_do_pin(struct intel_context *ce)
-{
-	int err;
-
-	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
-		err = intel_context_alloc_state(ce);
-		if (err)
-			return err;
-	}
-
-	err = i915_active_acquire(&ce->active);
-	if (err)
-		return err;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex)) {
-		err = -EINTR;
-		goto out_release;
-	}
-
-	if (unlikely(intel_context_is_closed(ce))) {
-		err = -ENOENT;
-		goto out_unlock;
-	}
-
-	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
-		err = intel_context_active_acquire(ce);
-		if (unlikely(err))
-			goto out_unlock;
-
-		err = ce->ops->pin(ce);
-		if (unlikely(err))
-			goto err_active;
-
-		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
-			 i915_ggtt_offset(ce->ring->vma),
-			 ce->ring->head, ce->ring->tail);
-
-		smp_mb__before_atomic(); /* flush pin before it is visible */
-		atomic_inc(&ce->pin_count);
-	}
-
-	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	goto out_unlock;
-
-err_active:
-	intel_context_active_release(ce);
-out_unlock:
-	mutex_unlock(&ce->pin_mutex);
-out_release:
-	i915_active_release(&ce->active);
-	return err;
-}
-
-void intel_context_unpin(struct intel_context *ce)
-{
-	if (!atomic_dec_and_test(&ce->pin_count))
-		return;
-
-	CE_TRACE(ce, "unpin\n");
-	ce->ops->unpin(ce);
-
-	/*
-	 * Once released, we may asynchronously drop the active reference.
-	 * As that may be the only reference keeping the context alive,
-	 * take an extra now so that it is not freed before we finish
-	 * dereferencing it.
-	 */
-	intel_context_get(ce);
-	intel_context_active_release(ce);
-	intel_context_put(ce);
-}
-
 static int __context_pin_state(struct i915_vma *vma)
 {
 	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
@@ -225,6 +152,138 @@  static void __ring_retire(struct intel_ring *ring)
 	intel_ring_unpin(ring);
 }
 
+static int intel_context_pre_pin(struct intel_context *ce)
+{
+	int err;
+
+	CE_TRACE(ce, "active\n");
+
+	err = __ring_active(ce->ring);
+	if (err)
+		return err;
+
+	err = intel_timeline_pin(ce->timeline);
+	if (err)
+		goto err_ring;
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state);
+	if (err)
+		goto err_timeline;
+
+
+	return 0;
+
+err_timeline:
+	intel_timeline_unpin(ce->timeline);
+err_ring:
+	__ring_retire(ce->ring);
+	return err;
+}
+
+static void intel_context_post_unpin(struct intel_context *ce)
+{
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_timeline_unpin(ce->timeline);
+	__ring_retire(ce->ring);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	bool handoff = false;
+	void *vaddr;
+	int err = 0;
+
+	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+		err = intel_context_alloc_state(ce);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * We always pin the context/ring/timeline here, to ensure a pin
+	 * refcount for __intel_context_active(), which prevent a lock
+	 * inversion of ce->pin_mutex vs dma_resv_lock().
+	 */
+	err = intel_context_pre_pin(ce);
+	if (err)
+		return err;
+
+	err = i915_active_acquire(&ce->active);
+	if (err)
+		goto err_ctx_unpin;
+
+	err = ce->ops->pre_pin(ce, &vaddr);
+	if (err)
+		goto err_release;
+
+	err = mutex_lock_interruptible(&ce->pin_mutex);
+	if (err)
+		goto err_post_unpin;
+
+	if (unlikely(intel_context_is_closed(ce))) {
+		err = -ENOENT;
+		goto err_unlock;
+	}
+
+	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
+		err = intel_context_active_acquire(ce);
+		if (unlikely(err))
+			goto err_unlock;
+
+		err = ce->ops->pin(ce, vaddr);
+		if (err) {
+			intel_context_active_release(ce);
+			goto err_unlock;
+		}
+
+		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
+			 i915_ggtt_offset(ce->ring->vma),
+			 ce->ring->head, ce->ring->tail);
+
+		handoff = true;
+		smp_mb__before_atomic(); /* flush pin before it is visible */
+		atomic_inc(&ce->pin_count);
+	}
+
+	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+
+err_unlock:
+	mutex_unlock(&ce->pin_mutex);
+err_post_unpin:
+	if (!handoff)
+		ce->ops->post_unpin(ce);
+err_release:
+	i915_active_release(&ce->active);
+err_ctx_unpin:
+	intel_context_post_unpin(ce);
+	return err;
+}
+
+void intel_context_unpin(struct intel_context *ce)
+{
+	if (!atomic_dec_and_test(&ce->pin_count))
+		return;
+
+	CE_TRACE(ce, "unpin\n");
+	ce->ops->unpin(ce);
+	ce->ops->post_unpin(ce);
+
+	/*
+	 * Once released, we may asynchronously drop the active reference.
+	 * As that may be the only reference keeping the context alive,
+	 * take an extra now so that it is not freed before we finish
+	 * dereferencing it.
+	 */
+	intel_context_get(ce);
+	intel_context_active_release(ce);
+	intel_context_put(ce);
+}
+
 __i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
@@ -235,12 +294,7 @@  static void __intel_context_retire(struct i915_active *active)
 		 intel_context_get_avg_runtime_ns(ce));
 
 	set_bit(CONTEXT_VALID_BIT, &ce->flags);
-	if (ce->state)
-		__context_unpin_state(ce->state);
-
-	intel_timeline_unpin(ce->timeline);
-	__ring_retire(ce->ring);
-
+	intel_context_post_unpin(ce);
 	intel_context_put(ce);
 }
 
@@ -249,29 +303,25 @@  static int __intel_context_active(struct i915_active *active)
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
 	int err;
 
-	CE_TRACE(ce, "active\n");
-
 	intel_context_get(ce);
 
+	/* everything should already be activated by intel_context_pre_pin() */
 	err = __ring_active(ce->ring);
-	if (err)
+	if (GEM_WARN_ON(err))
 		goto err_put;
 
 	err = intel_timeline_pin(ce->timeline);
-	if (err)
+	if (GEM_WARN_ON(err))
 		goto err_ring;
 
-	if (!ce->state)
-		return 0;
-
-	err = __context_pin_state(ce->state);
-	if (err)
-		goto err_timeline;
+	if (ce->state) {
+		GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
+		__i915_vma_pin(ce->state);
+		i915_vma_make_unshrinkable(ce->state);
+	}
 
 	return 0;
 
-err_timeline:
-	intel_timeline_unpin(ce->timeline);
 err_ring:
 	__ring_retire(ce->ring);
 err_put:
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4954b0df4864..ca8e05b4d3ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -30,8 +30,10 @@  struct intel_ring;
 struct intel_context_ops {
 	int (*alloc)(struct intel_context *ce);
 
-	int (*pin)(struct intel_context *ce);
+	int (*pre_pin)(struct intel_context *ce, void **vaddr);
+	int (*pin)(struct intel_context *ce, void *vaddr);
 	void (*unpin)(struct intel_context *ce);
+	void (*post_unpin)(struct intel_context *ce);
 
 	void (*enter)(struct intel_context *ce);
 	void (*exit)(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 417f6b0c6c61..f4390559a3d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3303,7 +3303,10 @@  static void execlists_context_unpin(struct intel_context *ce)
 {
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
 		      ce->engine);
+}
 
+static void execlists_context_post_unpin(struct intel_context *ce)
+{
 	i915_gem_object_unpin_map(ce->state->obj);
 }
 
@@ -3465,20 +3468,23 @@  __execlists_update_reg_state(const struct intel_context *ce,
 }
 
 static int
-__execlists_context_pin(struct intel_context *ce,
-			struct intel_engine_cs *engine)
+execlists_context_pre_pin(struct intel_context *ce, void **vaddr)
 {
-	void *vaddr;
-
 	GEM_BUG_ON(!ce->state);
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
-	vaddr = i915_gem_object_pin_map(ce->state->obj,
-					i915_coherent_map_type(engine->i915) |
+	*vaddr = i915_gem_object_pin_map(ce->state->obj,
+					i915_coherent_map_type(ce->engine->i915) |
 					I915_MAP_OVERRIDE);
-	if (IS_ERR(vaddr))
-		return PTR_ERR(vaddr);
 
+	return PTR_ERR_OR_ZERO(*vaddr);
+}
+
+static int
+__execlists_context_pin(struct intel_context *ce,
+			struct intel_engine_cs *engine,
+			void *vaddr)
+{
 	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
@@ -3486,9 +3492,9 @@  __execlists_context_pin(struct intel_context *ce,
 	return 0;
 }
 
-static int execlists_context_pin(struct intel_context *ce)
+static int execlists_context_pin(struct intel_context *ce, void *vaddr)
 {
-	return __execlists_context_pin(ce, ce->engine);
+	return __execlists_context_pin(ce, ce->engine, vaddr);
 }
 
 static int execlists_context_alloc(struct intel_context *ce)
@@ -3514,8 +3520,10 @@  static void execlists_context_reset(struct intel_context *ce)
 static const struct intel_context_ops execlists_context_ops = {
 	.alloc = execlists_context_alloc,
 
+	.pre_pin = execlists_context_pre_pin,
 	.pin = execlists_context_pin,
 	.unpin = execlists_context_unpin,
+	.post_unpin = execlists_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
@@ -5454,12 +5462,12 @@  static int virtual_context_alloc(struct intel_context *ce)
 	return __execlists_context_alloc(ce, ve->siblings[0]);
 }
 
-static int virtual_context_pin(struct intel_context *ce)
+static int virtual_context_pin(struct intel_context *ce, void *vaddr)
 {
 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 
 	/* Note: we must use a real engine class for setting up reg state */
-	return __execlists_context_pin(ce, ve->siblings[0]);
+	return __execlists_context_pin(ce, ve->siblings[0], vaddr);
 }
 
 static void virtual_context_enter(struct intel_context *ce)
@@ -5487,8 +5495,10 @@  static void virtual_context_exit(struct intel_context *ce)
 static const struct intel_context_ops virtual_context_ops = {
 	.alloc = virtual_context_alloc,
 
+	.pre_pin = execlists_context_pre_pin,
 	.pin = virtual_context_pin,
 	.unpin = execlists_context_unpin,
+	.post_unpin = execlists_context_post_unpin,
 
 	.enter = virtual_context_enter,
 	.exit = virtual_context_exit,
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index a3b10f3c83eb..93cf72cfd318 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -499,6 +499,10 @@  static void __context_unpin_ppgtt(struct intel_context *ce)
 }
 
 static void ring_context_unpin(struct intel_context *ce)
+{
+}
+
+static void ring_context_post_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce);
 }
@@ -587,11 +591,16 @@  static int ring_context_alloc(struct intel_context *ce)
 	return 0;
 }
 
-static int ring_context_pin(struct intel_context *ce)
+static int ring_context_pre_pin(struct intel_context *ce, void **unused)
 {
 	return __context_pin_ppgtt(ce);
 }
 
+static int ring_context_pin(struct intel_context *ce, void *unused)
+{
+	return 0;
+}
+
 static void ring_context_reset(struct intel_context *ce)
 {
 	intel_ring_reset(ce->ring, ce->ring->emit);
@@ -600,8 +609,10 @@  static void ring_context_reset(struct intel_context *ce)
 static const struct intel_context_ops ring_context_ops = {
 	.alloc = ring_context_alloc,
 
+	.pre_pin = ring_context_pre_pin,
 	.pin = ring_context_pin,
 	.unpin = ring_context_unpin,
+	.post_unpin = ring_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 79764305b8ec..c8e631222f23 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -131,6 +131,10 @@  static void mock_context_unpin(struct intel_context *ce)
 {
 }
 
+static void mock_context_post_unpin(struct intel_context *ce)
+{
+}
+
 static void mock_context_destroy(struct kref *ref)
 {
 	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
@@ -163,7 +167,12 @@  static int mock_context_alloc(struct intel_context *ce)
 	return 0;
 }
 
-static int mock_context_pin(struct intel_context *ce)
+static int mock_context_pre_pin(struct intel_context *ce, void **unused)
+{
+	return 0;
+}
+
+static int mock_context_pin(struct intel_context *ce, void *unused)
 {
 	return 0;
 }
@@ -175,8 +184,10 @@  static void mock_context_reset(struct intel_context *ce)
 static const struct intel_context_ops mock_context_ops = {
 	.alloc = mock_context_alloc,
 
+	.pre_pin = mock_context_pre_pin,
 	.pin = mock_context_pin,
 	.unpin = mock_context_unpin,
+	.post_unpin = mock_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,