diff mbox series

[29/38] drm/i915: Move over to intel_context_lookup()

Message ID 20190301140404.26690-29-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/38] drm/i915/execlists: Suppress redundant preemption | expand

Commit Message

Chris Wilson March 1, 2019, 2:03 p.m. UTC
In preparation for an ever growing number of engines and so ever
increasing static array of HW contexts within the GEM context, move the
array over to an rbtree, allocated upon first use.

Unfortunately, this imposes an rbtree lookup at a few frequent callsites,
but we should be able to mitigate those by moving over to using the HW
context as our primary type and so only incur the lookup on the boundary
with the user GEM context and engines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gvt/mmio_context.c       |   3 +-
 drivers/gpu/drm/i915/i915_gem.c               |   9 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |  78 ++++------
 drivers/gpu/drm/i915/i915_gem_context.h       |   1 -
 drivers/gpu/drm/i915/i915_gem_context_types.h |   7 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_perf.c              |   4 +-
 drivers/gpu/drm/i915/intel_context.c          | 139 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_context.h          |  37 ++++-
 drivers/gpu/drm/i915/intel_context_types.h    |   2 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   2 +-
 drivers/gpu/drm/i915/intel_engine_types.h     |   5 +
 drivers/gpu/drm/i915/intel_guc_ads.c          |   4 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  29 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  23 ++-
 drivers/gpu/drm/i915/selftests/mock_context.c |   7 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |   6 +-
 19 files changed, 271 insertions(+), 94 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_context.c

Comments

Tvrtko Ursulin March 5, 2019, 5:01 p.m. UTC | #1
On 01/03/2019 14:03, Chris Wilson wrote:
> In preparation for an ever growing number of engines and so ever
> increasing static array of HW contexts within the GEM context, move the
> array over to an rbtree, allocated upon first use.
> 
> Unfortunately, this imposes an rbtree lookup at a few frequent callsites,
> but we should be able to mitigate those by moving over to using the HW
> context as our primary type and so only incur the lookup on the boundary
> with the user GEM context and engines.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   1 +
>   drivers/gpu/drm/i915/gvt/mmio_context.c       |   3 +-
>   drivers/gpu/drm/i915/i915_gem.c               |   9 +-
>   drivers/gpu/drm/i915/i915_gem_context.c       |  78 ++++------
>   drivers/gpu/drm/i915/i915_gem_context.h       |   1 -
>   drivers/gpu/drm/i915/i915_gem_context_types.h |   7 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   4 +-
>   drivers/gpu/drm/i915/i915_perf.c              |   4 +-
>   drivers/gpu/drm/i915/intel_context.c          | 139 ++++++++++++++++++
>   drivers/gpu/drm/i915/intel_context.h          |  37 ++++-
>   drivers/gpu/drm/i915/intel_context_types.h    |   2 +
>   drivers/gpu/drm/i915/intel_engine_cs.c        |   2 +-
>   drivers/gpu/drm/i915/intel_engine_types.h     |   5 +
>   drivers/gpu/drm/i915/intel_guc_ads.c          |   4 +-
>   drivers/gpu/drm/i915/intel_guc_submission.c   |   4 +-
>   drivers/gpu/drm/i915/intel_lrc.c              |  29 ++--
>   drivers/gpu/drm/i915/intel_ringbuffer.c       |  23 ++-
>   drivers/gpu/drm/i915/selftests/mock_context.c |   7 +-
>   drivers/gpu/drm/i915/selftests/mock_engine.c  |   6 +-
>   19 files changed, 271 insertions(+), 94 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_context.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 89105b1aaf12..d7292b349c0d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -86,6 +86,7 @@ i915-y += \
>   	  i915_trace_points.o \
>   	  i915_vma.o \
>   	  intel_breadcrumbs.o \
> +	  intel_context.o \
>   	  intel_engine_cs.o \
>   	  intel_hangcheck.o \
>   	  intel_lrc.o \
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index 7d84cfb9051a..442a74805129 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -492,7 +492,8 @@ static void switch_mmio(struct intel_vgpu *pre,
>   			 * itself.
>   			 */
>   			if (mmio->in_context &&
> -			    !is_inhibit_context(&s->shadow_ctx->__engine[ring_id]))
> +			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
> +								     dev_priv->engine[ring_id])))
>   				continue;
>   
>   			if (mmio->mask)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 90bb951b8c99..c6a25c3276ee 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4694,15 +4694,20 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   	}
>   
>   	for_each_engine(engine, i915, id) {
> +		struct intel_context *ce;
>   		struct i915_vma *state;
>   		void *vaddr;
>   
> -		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
> +		ce = intel_context_lookup(ctx, engine);
> +		if (!ce)
> +			continue;
>   
> -		state = to_intel_context(ctx, engine)->state;
> +		state = ce->state;
>   		if (!state)
>   			continue;
>   
> +		GEM_BUG_ON(ce->pin_count);
> +
>   		/*
>   		 * As we will hold a reference to the logical state, it will
>   		 * not be torn down with the context, and importantly the
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 04c24caf30d2..60b17f6a727d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -238,7 +238,7 @@ static void release_hw_id(struct i915_gem_context *ctx)
>   
>   static void i915_gem_context_free(struct i915_gem_context *ctx)
>   {
> -	unsigned int n;
> +	struct intel_context *it, *n;
>   
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
> @@ -249,12 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   
>   	kfree(ctx->engines);
>   
> -	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
> -		struct intel_context *ce = &ctx->__engine[n];
> -
> -		if (ce->ops)
> -			ce->ops->destroy(ce);
> -	}

/*
  * No need to take lock since no one can access the context at this
  * point.
  */

?

> +	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> +		it->ops->destroy(it);
>   
>   	if (ctx->timeline)
>   		i915_timeline_put(ctx->timeline);
> @@ -361,40 +357,11 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
>   	return desc;
>   }
>   
> -static void intel_context_retire(struct i915_active_request *active,
> -				 struct i915_request *rq)
> -{
> -	struct intel_context *ce =
> -		container_of(active, typeof(*ce), active_tracker);
> -
> -	intel_context_unpin(ce);
> -}
> -
> -void
> -intel_context_init(struct intel_context *ce,
> -		   struct i915_gem_context *ctx,
> -		   struct intel_engine_cs *engine)
> -{
> -	ce->gem_context = ctx;
> -	ce->engine = engine;
> -	ce->ops = engine->context;
> -
> -	INIT_LIST_HEAD(&ce->signal_link);
> -	INIT_LIST_HEAD(&ce->signals);
> -
> -	/* Use the whole device by default */
> -	ce->sseu = intel_device_default_sseu(ctx->i915);
> -
> -	i915_active_request_init(&ce->active_tracker,
> -				 NULL, intel_context_retire);
> -}
> -
>   static struct i915_gem_context *
>   __create_hw_context(struct drm_i915_private *dev_priv,
>   		    struct drm_i915_file_private *file_priv)
>   {
>   	struct i915_gem_context *ctx;
> -	unsigned int n;
>   	int ret;
>   	int i;
>   
> @@ -409,8 +376,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	INIT_LIST_HEAD(&ctx->active_engines);
>   	mutex_init(&ctx->mutex);
>   
> -	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
> -		intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]);
> +	ctx->hw_contexts = RB_ROOT;
> +	spin_lock_init(&ctx->hw_contexts_lock);
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>   	INIT_LIST_HEAD(&ctx->handles_list);
> @@ -959,8 +926,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
>   		struct intel_ring *ring;
>   		struct i915_request *rq;
>   
> -		GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
> -
>   		rq = i915_request_alloc(engine, i915->kernel_context);
>   		if (IS_ERR(rq))
>   			return PTR_ERR(rq);
> @@ -1195,9 +1160,13 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
>   				    struct intel_engine_cs *engine,
>   				    struct intel_sseu sseu)
>   {
> -	struct intel_context *ce = to_intel_context(ctx, engine);
> +	struct intel_context *ce;
>   	int ret = 0;
>   
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
>   	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
>   	GEM_BUG_ON(engine->id != RCS);
>   
> @@ -1631,8 +1600,8 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
>   	return ctx_setparam(data, &local.setparam);
>   }
>   
> -static void clone_sseu(struct i915_gem_context *dst,
> -		       struct i915_gem_context *src)
> +static int clone_sseu(struct i915_gem_context *dst,
> +		      struct i915_gem_context *src)
>   {
>   	const struct intel_sseu default_sseu =
>   		intel_device_default_sseu(dst->i915);
> @@ -1643,7 +1612,7 @@ static void clone_sseu(struct i915_gem_context *dst,
>   		struct intel_context *ce;
>   		struct intel_sseu sseu;
>   
> -		ce = to_intel_context(src, engine);
> +		ce = intel_context_lookup(src, engine);
>   		if (!ce)
>   			continue;
>   
> @@ -1651,9 +1620,14 @@ static void clone_sseu(struct i915_gem_context *dst,
>   		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
>   			continue;
>   
> -		ce = to_intel_context(dst, engine);
> +		ce = intel_context_instance(dst, engine);
> +		if (IS_ERR(ce))
> +			return PTR_ERR(ce);
> +
>   		ce->sseu = sseu;
>   	}
> +
> +	return 0;
>   }
>   
>   static int create_clone(struct i915_user_extension __user *ext, void *data)
> @@ -1661,6 +1635,7 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
>   	struct drm_i915_gem_context_create_ext_clone local;
>   	struct i915_gem_context *dst = data;
>   	struct i915_gem_context *src;
> +	int err;
>   
>   	if (copy_from_user(&local, ext, sizeof(local)))
>   		return -EFAULT;
> @@ -1683,8 +1658,11 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
>   	if (local.flags & I915_CONTEXT_CLONE_SCHED)
>   		dst->sched = src->sched;
>   
> -	if (local.flags & I915_CONTEXT_CLONE_SSEU)
> -		clone_sseu(dst, src);
> +	if (local.flags & I915_CONTEXT_CLONE_SSEU) {
> +		err = clone_sseu(dst, src);
> +		if (err)
> +			return err;
> +	}
>   
>   	if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
>   		if (dst->timeline)
> @@ -1839,13 +1817,15 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	if (!engine)
>   		return -EINVAL;
>   
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);

Interesting! Nevermind..

> +
>   	/* Only use for mutex here is to serialize get_param and set_param. */
>   	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
>   	if (ret)
>   		return ret;
>   
> -	ce = to_intel_context(ctx, engine);
> -
>   	user_sseu.slice_mask = ce->sseu.slice_mask;
>   	user_sseu.subslice_mask = ce->sseu.subslice_mask;
>   	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 110d5881c9de..5b0e423edf86 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -31,7 +31,6 @@
>   #include "i915_scheduler.h"
>   #include "intel_context.h"
>   #include "intel_device_info.h"
> -#include "intel_ringbuffer.h"
>   
>   struct drm_device;
>   struct drm_file;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index 8baa7a5e7fdb..b1df720710f5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -13,10 +13,10 @@
>   #include <linux/kref.h>
>   #include <linux/mutex.h>
>   #include <linux/radix-tree.h>
> +#include <linux/rbtree.h>
>   #include <linux/rcupdate.h>
>   #include <linux/types.h>
>   
> -#include "i915_gem.h" /* I915_NUM_ENGINES */
>   #include "i915_scheduler.h"
>   #include "intel_context_types.h"
>   
> @@ -146,8 +146,9 @@ struct i915_gem_context {
>   
>   	struct i915_sched_attr sched;
>   
> -	/** engine: per-engine logical HW state */
> -	struct intel_context __engine[I915_NUM_ENGINES];
> +	/** hw_contexts: per-engine logical HW state */
> +	struct rb_root hw_contexts;
> +	spinlock_t hw_contexts_lock;
>   
>   	/** ring_size: size for allocating the per-engine ring buffer */
>   	u32 ring_size;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ff5810a932a8..5ea2e1c8b927 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -794,8 +794,8 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
>   	 * keeping all of their resources pinned.
>   	 */
>   
> -	ce = to_intel_context(eb->ctx, eb->engine);
> -	if (!ce->ring) /* first use, assume empty! */
> +	ce = intel_context_lookup(eb->ctx, eb->engine);
> +	if (!ce || !ce->ring) /* first use, assume empty! */
>   		return 0;
>   
>   	rq = __eb_wait_for_ring(ce->ring);
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index f969a0512465..ecca231ca83a 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1740,11 +1740,11 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>   
>   	/* Update all contexts now that we've stalled the submission. */
>   	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> -		struct intel_context *ce = to_intel_context(ctx, engine);
> +		struct intel_context *ce = intel_context_lookup(ctx, engine);
>   		u32 *regs;
>   
>   		/* OA settings will be set upon first use */
> -		if (!ce->state)
> +		if (!ce || !ce->state)
>   			continue;
>   
>   		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
> new file mode 100644
> index 000000000000..242b1b6ad253
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_context.c
> @@ -0,0 +1,139 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "i915_gem_context.h"
> +#include "intel_context.h"
> +#include "intel_ringbuffer.h"
> +
> +struct intel_context *
> +intel_context_lookup(struct i915_gem_context *ctx,
> +		     struct intel_engine_cs *engine)
> +{
> +	struct intel_context *ce = NULL;
> +	struct rb_node *p;
> +
> +	spin_lock(&ctx->hw_contexts_lock);
> +	p = ctx->hw_contexts.rb_node;
> +	while (p) {
> +		struct intel_context *this =
> +			rb_entry(p, struct intel_context, node);
> +
> +		if (this->engine == engine) {
> +			GEM_BUG_ON(this->gem_context != ctx);
> +			ce = this;
> +			break;
> +		}
> +
> +		if (this->engine < engine)
> +			p = p->rb_right;
> +		else
> +			p = p->rb_left;
> +	}
> +	spin_unlock(&ctx->hw_contexts_lock);
> +
> +	return ce;
> +}
> +
> +struct intel_context *
> +__intel_context_insert(struct i915_gem_context *ctx,
> +		       struct intel_engine_cs *engine,
> +		       struct intel_context *ce)
> +{
> +	struct rb_node **p, *parent;
> +	int err = 0;
> +
> +	spin_lock(&ctx->hw_contexts_lock);
> +
> +	parent = NULL;
> +	p = &ctx->hw_contexts.rb_node;
> +	while (*p) {
> +		struct intel_context *this;
> +
> +		parent = *p;
> +		this = rb_entry(parent, struct intel_context, node);
> +
> +		if (this->engine == engine) {
> +			err = -EEXIST;
> +			ce = this;
> +			break;
> +		}
> +
> +		if (this->engine < engine)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +	if (!err) {
> +		rb_link_node(&ce->node, parent, p);
> +		rb_insert_color(&ce->node, &ctx->hw_contexts);
> +	}
> +
> +	spin_unlock(&ctx->hw_contexts_lock);
> +
> +	return ce;
> +}
> +
> +void __intel_context_remove(struct intel_context *ce)
> +{
> +	struct i915_gem_context *ctx = ce->gem_context;
> +
> +	spin_lock(&ctx->hw_contexts_lock);
> +	rb_erase(&ce->node, &ctx->hw_contexts);
> +	spin_unlock(&ctx->hw_contexts_lock);
> +}

Gets used in a later patch?

> +
> +struct intel_context *
> +intel_context_instance(struct i915_gem_context *ctx,
> +		       struct intel_engine_cs *engine)
> +{
> +	struct intel_context *ce, *pos;
> +
> +	ce = intel_context_lookup(ctx, engine);
> +	if (likely(ce))
> +		return ce;
> +
> +	ce = kzalloc(sizeof(*ce), GFP_KERNEL);

At some point you'll move this to global slab?

> +	if (!ce)
> +		return ERR_PTR(-ENOMEM);
> +
> +	intel_context_init(ce, ctx, engine);
> +
> +	pos = __intel_context_insert(ctx, engine, ce);
> +	if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
> +		kfree(ce);
> +
> +	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
> +	return pos;
> +}
> +
> +static void intel_context_retire(struct i915_active_request *active,
> +				 struct i915_request *rq)
> +{
> +	struct intel_context *ce =
> +		container_of(active, typeof(*ce), active_tracker);
> +
> +	intel_context_unpin(ce);
> +}
> +
> +void
> +intel_context_init(struct intel_context *ce,
> +		   struct i915_gem_context *ctx,
> +		   struct intel_engine_cs *engine)
> +{
> +	ce->gem_context = ctx;
> +	ce->engine = engine;
> +	ce->ops = engine->context;
> +
> +	INIT_LIST_HEAD(&ce->signal_link);
> +	INIT_LIST_HEAD(&ce->signals);
> +
> +	/* Use the whole device by default */
> +	ce->sseu = intel_device_default_sseu(ctx->i915);
> +
> +	i915_active_request_init(&ce->active_tracker,
> +				 NULL, intel_context_retire);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
> index dd947692bb0b..c3fffd9b8ae4 100644
> --- a/drivers/gpu/drm/i915/intel_context.h
> +++ b/drivers/gpu/drm/i915/intel_context.h
> @@ -7,7 +7,6 @@
>   #ifndef __INTEL_CONTEXT_H__
>   #define __INTEL_CONTEXT_H__
>   
> -#include "i915_gem_context_types.h"
>   #include "intel_context_types.h"
>   #include "intel_engine_types.h"
>   
> @@ -15,12 +14,36 @@ void intel_context_init(struct intel_context *ce,
>   			struct i915_gem_context *ctx,
>   			struct intel_engine_cs *engine);
>   
> -static inline struct intel_context *
> -to_intel_context(struct i915_gem_context *ctx,
> -		 const struct intel_engine_cs *engine)
> -{
> -	return &ctx->__engine[engine->id];
> -}
> +/**
> + * intel_context_lookup - Find the matching HW context for this (ctx, engine)
> + * @ctx - the parent GEM context
> + * @engine - the target HW engine
> + *
> + * May return NULL if the HW context hasn't been instantiated (i.e. unused).
> + */
> +struct intel_context *
> +intel_context_lookup(struct i915_gem_context *ctx,
> +		     struct intel_engine_cs *engine);
> +
> +/**
> + * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
> + * @ctx - the parent GEM context
> + * @engine - the target HW engine
> + *
> + * Returns the existing HW context for this pair of (GEM context, engine), or
> + * allocates and initialises a fresh context. Once allocated, the HW context
> + * remains resident until the GEM context is destroyed.
> + */
> +struct intel_context *
> +intel_context_instance(struct i915_gem_context *ctx,
> +		       struct intel_engine_cs *engine);
> +
> +struct intel_context *
> +__intel_context_insert(struct i915_gem_context *ctx,
> +		       struct intel_engine_cs *engine,
> +		       struct intel_context *ce);
> +void
> +__intel_context_remove(struct intel_context *ce);
>   
>   static inline struct intel_context *
>   intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
> index 16e1306e9595..857f5c335324 100644
> --- a/drivers/gpu/drm/i915/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/intel_context_types.h
> @@ -8,6 +8,7 @@
>   #define __INTEL_CONTEXT_TYPES__
>   
>   #include <linux/list.h>
> +#include <linux/rbtree.h>
>   #include <linux/types.h>
>   
>   #include "i915_active_types.h"
> @@ -52,6 +53,7 @@ struct intel_context {
>   	struct i915_active_request active_tracker;
>   
>   	const struct intel_context_ops *ops;
> +	struct rb_node node;
>   
>   	/** sseu: Control eu/slice partitioning */
>   	struct intel_sseu sseu;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 2559dcc25a6a..112301560745 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -644,7 +644,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
>   static void __intel_context_unpin(struct i915_gem_context *ctx,
>   				  struct intel_engine_cs *engine)
>   {
> -	intel_context_unpin(to_intel_context(ctx, engine));
> +	intel_context_unpin(intel_context_lookup(ctx, engine));
>   }
>   
>   struct measure_breadcrumb {
> diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
> index 546b790871ad..5dac6b439f95 100644
> --- a/drivers/gpu/drm/i915/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/intel_engine_types.h
> @@ -231,6 +231,11 @@ struct intel_engine_execlists {
>   	 */
>   	u32 *csb_status;
>   
> +	/**
> +	 * @preempt_context: the HW context for injecting preempt-to-idle
> +	 */
> +	struct intel_context *preempt_context;
> +
>   	/**
>   	 * @preempt_complete_status: expected CSB upon completing preemption
>   	 */
> diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
> index f0db62887f50..da220561ac41 100644
> --- a/drivers/gpu/drm/i915/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/intel_guc_ads.c
> @@ -121,8 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc)
>   	 * to find it. Note that we have to skip our header (1 page),
>   	 * because our GuC shared data is there.
>   	 */
> -	kernel_ctx_vma = to_intel_context(dev_priv->kernel_context,
> -					  dev_priv->engine[RCS])->state;
> +	kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context,
> +					      dev_priv->engine[RCS])->state;
>   	blob->ads.golden_context_lrca =
>   		intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;

Scary moment but kernel context is perma pinned, ok.

>   
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 119d3326bb5e..4935e0555135 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -382,7 +382,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
>   	desc->db_id = client->doorbell_id;
>   
>   	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
> -		struct intel_context *ce = to_intel_context(ctx, engine);
> +		struct intel_context *ce = intel_context_lookup(ctx, engine);

This one seems to be handling !ce->state a bit below. So it feels you 
have to add the !ce test as well.

>   		u32 guc_engine_id = engine->guc_id;
>   		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
>   
> @@ -567,7 +567,7 @@ static void inject_preempt_context(struct work_struct *work)
>   					     preempt_work[engine->id]);
>   	struct intel_guc_client *client = guc->preempt_client;
>   	struct guc_stage_desc *stage_desc = __get_stage_desc(client);
> -	struct intel_context *ce = to_intel_context(client->owner, engine);
> +	struct intel_context *ce = intel_context_lookup(client->owner, engine);
>   	u32 data[7];
>   
>   	if (!ce->ring->emit) { /* recreate upon load/resume */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4f6f09137662..0800f8edffeb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -622,8 +622,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   static void inject_preempt_context(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists *execlists = &engine->execlists;
> -	struct intel_context *ce =
> -		to_intel_context(engine->i915->preempt_context, engine);
> +	struct intel_context *ce = execlists->preempt_context;
>   	unsigned int n;
>   
>   	GEM_BUG_ON(execlists->preempt_complete_status !=
> @@ -1231,19 +1230,24 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
> -static void execlists_context_destroy(struct intel_context *ce)
> +static void __execlists_context_fini(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> -
> -	if (!ce->state)
> -		return;
> -
>   	intel_ring_free(ce->ring);
>   
>   	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
>   	i915_gem_object_put(ce->state->obj);
>   }
>   
> +static void execlists_context_destroy(struct intel_context *ce)
> +{
> +	GEM_BUG_ON(ce->pin_count);
> +
> +	if (ce->state)
> +		__execlists_context_fini(ce);
> +
> +	kfree(ce);
> +}
> +
>   static void execlists_context_unpin(struct intel_context *ce)
>   {
>   	struct intel_engine_cs *engine;
> @@ -1385,7 +1389,11 @@ static struct intel_context *
>   execlists_context_pin(struct intel_engine_cs *engine,
>   		      struct i915_gem_context *ctx)
>   {
> -	struct intel_context *ce = to_intel_context(ctx, engine);
> +	struct intel_context *ce;
> +
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return ce;
>   
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!ctx->ppgtt);
> @@ -2436,8 +2444,9 @@ static int logical_ring_init(struct intel_engine_cs *engine)
>   	execlists->preempt_complete_status = ~0u;
>   	if (i915->preempt_context) {
>   		struct intel_context *ce =
> -			to_intel_context(i915->preempt_context, engine);
> +			intel_context_lookup(i915->preempt_context, engine);
>   
> +		execlists->preempt_context = ce;
>   		execlists->preempt_complete_status =
>   			upper_32_bits(ce->lrc_desc);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 848b68e090d5..9002f7f6d32e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1348,15 +1348,20 @@ intel_ring_free(struct intel_ring *ring)
>   	kfree(ring);
>   }
>   
> +static void __ring_context_fini(struct intel_context *ce)
> +{
> +	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
> +	i915_gem_object_put(ce->state->obj);
> +}
> +
>   static void ring_context_destroy(struct intel_context *ce)
>   {
>   	GEM_BUG_ON(ce->pin_count);
>   
> -	if (!ce->state)
> -		return;
> +	if (ce->state)
> +		__ring_context_fini(ce);
>   
> -	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
> -	i915_gem_object_put(ce->state->obj);
> +	kfree(ce);
>   }
>   
>   static int __context_pin_ppgtt(struct i915_gem_context *ctx)
> @@ -1551,7 +1556,11 @@ __ring_context_pin(struct intel_engine_cs *engine,
>   static struct intel_context *
>   ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   {
> -	struct intel_context *ce = to_intel_context(ctx, engine);
> +	struct intel_context *ce;
> +
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return ce;
>   
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   
> @@ -1753,8 +1762,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>   		 * placeholder we use to flush other contexts.
>   		 */
>   		*cs++ = MI_SET_CONTEXT;
> -		*cs++ = i915_ggtt_offset(to_intel_context(i915->kernel_context,
> -							  engine)->state) |
> +		*cs++ = i915_ggtt_offset(intel_context_lookup(i915->kernel_context,
> +							      engine)->state) |
>   			MI_MM_SPACE_GTT |
>   			MI_RESTORE_INHIBIT;
>   	}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 5d0ff2293abc..1d6dc2fe36ab 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -30,7 +30,6 @@ mock_context(struct drm_i915_private *i915,
>   	     const char *name)
>   {
>   	struct i915_gem_context *ctx;
> -	unsigned int n;
>   	int ret;
>   
>   	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> @@ -41,15 +40,15 @@ mock_context(struct drm_i915_private *i915,
>   	INIT_LIST_HEAD(&ctx->link);
>   	ctx->i915 = i915;
>   
> +	ctx->hw_contexts = RB_ROOT;
> +	spin_lock_init(&ctx->hw_contexts_lock);
> +
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>   	INIT_LIST_HEAD(&ctx->handles_list);
>   	INIT_LIST_HEAD(&ctx->hw_id_link);
>   	INIT_LIST_HEAD(&ctx->active_engines);
>   	mutex_init(&ctx->mutex);
>   
> -	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
> -		intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]);
> -
>   	ret = i915_gem_context_pin_hw_id(ctx);
>   	if (ret < 0)
>   		goto err_handles;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 6c09e5162feb..f1a80006289d 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -141,9 +141,13 @@ static struct intel_context *
>   mock_context_pin(struct intel_engine_cs *engine,
>   		 struct i915_gem_context *ctx)
>   {
> -	struct intel_context *ce = to_intel_context(ctx, engine);
> +	struct intel_context *ce;
>   	int err = -ENOMEM;
>   
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return ce;
> +
>   	if (ce->pin_count++)
>   		return ce;
>   
> 

Regards,

Tvrtko
Chris Wilson March 5, 2019, 5:10 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-05 17:01:09)
> 
> On 01/03/2019 14:03, Chris Wilson wrote:
> > @@ -249,12 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> >   
> >       kfree(ctx->engines);
> >   
> > -     for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
> > -             struct intel_context *ce = &ctx->__engine[n];
> > -
> > -             if (ce->ops)
> > -                     ce->ops->destroy(ce);
> > -     }
> 
> /*
>   * No need to take lock since no one can access the context at this
>   * point.
>   */
> 
> ?

I hope that's self-evident from being the free function. Hmm, perhaps
not if we move to a HW context centric viewpoint. Maybe worth revisiting
in the future when GEM context withers away.

> > @@ -1839,13 +1817,15 @@ static int get_sseu(struct i915_gem_context *ctx,
> >       if (!engine)
> >               return -EINVAL;
> >   
> > +     ce = intel_context_instance(ctx, engine);
> > +     if (IS_ERR(ce))
> > +             return PTR_ERR(ce);
> 
> Interesting! Nevermind..

Wait for the next patch, that one is just for you :)

> > +void __intel_context_remove(struct intel_context *ce)
> > +{
> > +     struct i915_gem_context *ctx = ce->gem_context;
> > +
> > +     spin_lock(&ctx->hw_contexts_lock);
> > +     rb_erase(&ce->node, &ctx->hw_contexts);
> > +     spin_unlock(&ctx->hw_contexts_lock);
> > +}
> 
> Gets used in a later patch?

For the embedded context within virtual_engine.

> > +struct intel_context *
> > +intel_context_instance(struct i915_gem_context *ctx,
> > +                    struct intel_engine_cs *engine)
> > +{
> > +     struct intel_context *ce, *pos;
> > +
> > +     ce = intel_context_lookup(ctx, engine);
> > +     if (likely(ce))
> > +             return ce;
> > +
> > +     ce = kzalloc(sizeof(*ce), GFP_KERNEL);
> 
> At some point you'll move this to global slab?

Can do so now, I didn't think these would be frequent enough to justify
a slab. We expect a few hundred on a system; whereas we expect
thousands, tens of thousands of requests and objects.

Still slabs have secondary benefits of debugging allocation patterns.

> > diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
> > index f0db62887f50..da220561ac41 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_ads.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_ads.c
> > @@ -121,8 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc)
> >        * to find it. Note that we have to skip our header (1 page),
> >        * because our GuC shared data is there.
> >        */
> > -     kernel_ctx_vma = to_intel_context(dev_priv->kernel_context,
> > -                                       dev_priv->engine[RCS])->state;
> > +     kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context,
> > +                                           dev_priv->engine[RCS])->state;
> >       blob->ads.golden_context_lrca =
> >               intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;
> 
> Scary moment but kernel context is perma pinned, ok.

Scary moment, but its guc.

> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 119d3326bb5e..4935e0555135 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -382,7 +382,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
> >       desc->db_id = client->doorbell_id;
> >   
> >       for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
> > -             struct intel_context *ce = to_intel_context(ctx, engine);
> > +             struct intel_context *ce = intel_context_lookup(ctx, engine);
> 
> This one seems to be handling !ce->state a bit below. So it feels you 
> have to add the !ce test as well.

I'm not allowed to explode here, shame.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 89105b1aaf12..d7292b349c0d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -86,6 +86,7 @@  i915-y += \
 	  i915_trace_points.o \
 	  i915_vma.o \
 	  intel_breadcrumbs.o \
+	  intel_context.o \
 	  intel_engine_cs.o \
 	  intel_hangcheck.o \
 	  intel_lrc.o \
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 7d84cfb9051a..442a74805129 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -492,7 +492,8 @@  static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(&s->shadow_ctx->__engine[ring_id]))
+			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
+								     dev_priv->engine[ring_id])))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 90bb951b8c99..c6a25c3276ee 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4694,15 +4694,20 @@  static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
 		struct i915_vma *state;
 		void *vaddr;
 
-		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
+		ce = intel_context_lookup(ctx, engine);
+		if (!ce)
+			continue;
 
-		state = to_intel_context(ctx, engine)->state;
+		state = ce->state;
 		if (!state)
 			continue;
 
+		GEM_BUG_ON(ce->pin_count);
+
 		/*
 		 * As we will hold a reference to the logical state, it will
 		 * not be torn down with the context, and importantly the
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 04c24caf30d2..60b17f6a727d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -238,7 +238,7 @@  static void release_hw_id(struct i915_gem_context *ctx)
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
-	unsigned int n;
+	struct intel_context *it, *n;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -249,12 +249,8 @@  static void i915_gem_context_free(struct i915_gem_context *ctx)
 
 	kfree(ctx->engines);
 
-	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
-		struct intel_context *ce = &ctx->__engine[n];
-
-		if (ce->ops)
-			ce->ops->destroy(ce);
-	}
+	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
+		it->ops->destroy(it);
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -361,40 +357,11 @@  static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
-{
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
-
-	intel_context_unpin(ce);
-}
-
-void
-intel_context_init(struct intel_context *ce,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine)
-{
-	ce->gem_context = ctx;
-	ce->engine = engine;
-	ce->ops = engine->context;
-
-	INIT_LIST_HEAD(&ce->signal_link);
-	INIT_LIST_HEAD(&ce->signals);
-
-	/* Use the whole device by default */
-	ce->sseu = intel_device_default_sseu(ctx->i915);
-
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
-}
-
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
 {
 	struct i915_gem_context *ctx;
-	unsigned int n;
 	int ret;
 	int i;
 
@@ -409,8 +376,8 @@  __create_hw_context(struct drm_i915_private *dev_priv,
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
-		intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]);
+	ctx->hw_contexts = RB_ROOT;
+	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -959,8 +926,6 @@  int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 		struct intel_ring *ring;
 		struct i915_request *rq;
 
-		GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
-
 		rq = i915_request_alloc(engine, i915->kernel_context);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
@@ -1195,9 +1160,13 @@  __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 				    struct intel_engine_cs *engine,
 				    struct intel_sseu sseu)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
 	int ret = 0;
 
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
 	GEM_BUG_ON(engine->id != RCS);
 
@@ -1631,8 +1600,8 @@  static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(data, &local.setparam);
 }
 
-static void clone_sseu(struct i915_gem_context *dst,
-		       struct i915_gem_context *src)
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
 {
 	const struct intel_sseu default_sseu =
 		intel_device_default_sseu(dst->i915);
@@ -1643,7 +1612,7 @@  static void clone_sseu(struct i915_gem_context *dst,
 		struct intel_context *ce;
 		struct intel_sseu sseu;
 
-		ce = to_intel_context(src, engine);
+		ce = intel_context_lookup(src, engine);
 		if (!ce)
 			continue;
 
@@ -1651,9 +1620,14 @@  static void clone_sseu(struct i915_gem_context *dst,
 		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
 			continue;
 
-		ce = to_intel_context(dst, engine);
+		ce = intel_context_instance(dst, engine);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
+
 		ce->sseu = sseu;
 	}
+
+	return 0;
 }
 
 static int create_clone(struct i915_user_extension __user *ext, void *data)
@@ -1661,6 +1635,7 @@  static int create_clone(struct i915_user_extension __user *ext, void *data)
 	struct drm_i915_gem_context_create_ext_clone local;
 	struct i915_gem_context *dst = data;
 	struct i915_gem_context *src;
+	int err;
 
 	if (copy_from_user(&local, ext, sizeof(local)))
 		return -EFAULT;
@@ -1683,8 +1658,11 @@  static int create_clone(struct i915_user_extension __user *ext, void *data)
 	if (local.flags & I915_CONTEXT_CLONE_SCHED)
 		dst->sched = src->sched;
 
-	if (local.flags & I915_CONTEXT_CLONE_SSEU)
-		clone_sseu(dst, src);
+	if (local.flags & I915_CONTEXT_CLONE_SSEU) {
+		err = clone_sseu(dst, src);
+		if (err)
+			return err;
+	}
 
 	if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
 		if (dst->timeline)
@@ -1839,13 +1817,15 @@  static int get_sseu(struct i915_gem_context *ctx,
 	if (!engine)
 		return -EINVAL;
 
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/* Only use for mutex here is to serialize get_param and set_param. */
 	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	ce = to_intel_context(ctx, engine);
-
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 110d5881c9de..5b0e423edf86 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -31,7 +31,6 @@ 
 #include "i915_scheduler.h"
 #include "intel_context.h"
 #include "intel_device_info.h"
-#include "intel_ringbuffer.h"
 
 struct drm_device;
 struct drm_file;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index 8baa7a5e7fdb..b1df720710f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -13,10 +13,10 @@ 
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 
-#include "i915_gem.h" /* I915_NUM_ENGINES */
 #include "i915_scheduler.h"
 #include "intel_context_types.h"
 
@@ -146,8 +146,9 @@  struct i915_gem_context {
 
 	struct i915_sched_attr sched;
 
-	/** engine: per-engine logical HW state */
-	struct intel_context __engine[I915_NUM_ENGINES];
+	/** hw_contexts: per-engine logical HW state */
+	struct rb_root hw_contexts;
+	spinlock_t hw_contexts_lock;
 
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ff5810a932a8..5ea2e1c8b927 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -794,8 +794,8 @@  static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 	 * keeping all of their resources pinned.
 	 */
 
-	ce = to_intel_context(eb->ctx, eb->engine);
-	if (!ce->ring) /* first use, assume empty! */
+	ce = intel_context_lookup(eb->ctx, eb->engine);
+	if (!ce || !ce->ring) /* first use, assume empty! */
 		return 0;
 
 	rq = __eb_wait_for_ring(ce->ring);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index f969a0512465..ecca231ca83a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1740,11 +1740,11 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+		struct intel_context *ce = intel_context_lookup(ctx, engine);
 		u32 *regs;
 
 		/* OA settings will be set upon first use */
-		if (!ce->state)
+		if (!ce || !ce->state)
 			continue;
 
 		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
new file mode 100644
index 000000000000..242b1b6ad253
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -0,0 +1,139 @@ 
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_context.h"
+#include "intel_context.h"
+#include "intel_ringbuffer.h"
+
+struct intel_context *
+intel_context_lookup(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	struct intel_context *ce = NULL;
+	struct rb_node *p;
+
+	spin_lock(&ctx->hw_contexts_lock);
+	p = ctx->hw_contexts.rb_node;
+	while (p) {
+		struct intel_context *this =
+			rb_entry(p, struct intel_context, node);
+
+		if (this->engine == engine) {
+			GEM_BUG_ON(this->gem_context != ctx);
+			ce = this;
+			break;
+		}
+
+		if (this->engine < engine)
+			p = p->rb_right;
+		else
+			p = p->rb_left;
+	}
+	spin_unlock(&ctx->hw_contexts_lock);
+
+	return ce;
+}
+
+struct intel_context *
+__intel_context_insert(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       struct intel_context *ce)
+{
+	struct rb_node **p, *parent;
+	int err = 0;
+
+	spin_lock(&ctx->hw_contexts_lock);
+
+	parent = NULL;
+	p = &ctx->hw_contexts.rb_node;
+	while (*p) {
+		struct intel_context *this;
+
+		parent = *p;
+		this = rb_entry(parent, struct intel_context, node);
+
+		if (this->engine == engine) {
+			err = -EEXIST;
+			ce = this;
+			break;
+		}
+
+		if (this->engine < engine)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	if (!err) {
+		rb_link_node(&ce->node, parent, p);
+		rb_insert_color(&ce->node, &ctx->hw_contexts);
+	}
+
+	spin_unlock(&ctx->hw_contexts_lock);
+
+	return ce;
+}
+
+void __intel_context_remove(struct intel_context *ce)
+{
+	struct i915_gem_context *ctx = ce->gem_context;
+
+	spin_lock(&ctx->hw_contexts_lock);
+	rb_erase(&ce->node, &ctx->hw_contexts);
+	spin_unlock(&ctx->hw_contexts_lock);
+}
+
+struct intel_context *
+intel_context_instance(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine)
+{
+	struct intel_context *ce, *pos;
+
+	ce = intel_context_lookup(ctx, engine);
+	if (likely(ce))
+		return ce;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce)
+		return ERR_PTR(-ENOMEM);
+
+	intel_context_init(ce, ctx, engine);
+
+	pos = __intel_context_insert(ctx, engine, ce);
+	if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
+		kfree(ce);
+
+	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
+	return pos;
+}
+
+static void intel_context_retire(struct i915_active_request *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce =
+		container_of(active, typeof(*ce), active_tracker);
+
+	intel_context_unpin(ce);
+}
+
+void
+intel_context_init(struct intel_context *ce,
+		   struct i915_gem_context *ctx,
+		   struct intel_engine_cs *engine)
+{
+	ce->gem_context = ctx;
+	ce->engine = engine;
+	ce->ops = engine->context;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
+
+	/* Use the whole device by default */
+	ce->sseu = intel_device_default_sseu(ctx->i915);
+
+	i915_active_request_init(&ce->active_tracker,
+				 NULL, intel_context_retire);
+}
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index dd947692bb0b..c3fffd9b8ae4 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -7,7 +7,6 @@ 
 #ifndef __INTEL_CONTEXT_H__
 #define __INTEL_CONTEXT_H__
 
-#include "i915_gem_context_types.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
@@ -15,12 +14,36 @@  void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
 			struct intel_engine_cs *engine);
 
-static inline struct intel_context *
-to_intel_context(struct i915_gem_context *ctx,
-		 const struct intel_engine_cs *engine)
-{
-	return &ctx->__engine[engine->id];
-}
+/**
+ * intel_context_lookup - Find the matching HW context for this (ctx, engine)
+ * @ctx - the parent GEM context
+ * @engine - the target HW engine
+ *
+ * May return NULL if the HW context hasn't been instantiated (i.e. unused).
+ */
+struct intel_context *
+intel_context_lookup(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine);
+
+/**
+ * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
+ * @ctx - the parent GEM context
+ * @engine - the target HW engine
+ *
+ * Returns the existing HW context for this pair of (GEM context, engine), or
+ * allocates and initialises a fresh context. Once allocated, the HW context
+ * remains resident until the GEM context is destroyed.
+ */
+struct intel_context *
+intel_context_instance(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine);
+
+struct intel_context *
+__intel_context_insert(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       struct intel_context *ce);
+void
+__intel_context_remove(struct intel_context *ce);
 
 static inline struct intel_context *
 intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 16e1306e9595..857f5c335324 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -8,6 +8,7 @@ 
 #define __INTEL_CONTEXT_TYPES__
 
 #include <linux/list.h>
+#include <linux/rbtree.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
@@ -52,6 +53,7 @@  struct intel_context {
 	struct i915_active_request active_tracker;
 
 	const struct intel_context_ops *ops;
+	struct rb_node node;
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2559dcc25a6a..112301560745 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -644,7 +644,7 @@  void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
-	intel_context_unpin(to_intel_context(ctx, engine));
+	intel_context_unpin(intel_context_lookup(ctx, engine));
 }
 
 struct measure_breadcrumb {
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 546b790871ad..5dac6b439f95 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -231,6 +231,11 @@  struct intel_engine_execlists {
 	 */
 	u32 *csb_status;
 
+	/**
+	 * @preempt_context: the HW context for injecting preempt-to-idle
+	 */
+	struct intel_context *preempt_context;
+
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
 	 */
diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
index f0db62887f50..da220561ac41 100644
--- a/drivers/gpu/drm/i915/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/intel_guc_ads.c
@@ -121,8 +121,8 @@  int intel_guc_ads_create(struct intel_guc *guc)
 	 * to find it. Note that we have to skip our header (1 page),
 	 * because our GuC shared data is there.
 	 */
-	kernel_ctx_vma = to_intel_context(dev_priv->kernel_context,
-					  dev_priv->engine[RCS])->state;
+	kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context,
+					      dev_priv->engine[RCS])->state;
 	blob->ads.golden_context_lrca =
 		intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 119d3326bb5e..4935e0555135 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -382,7 +382,7 @@  static void guc_stage_desc_init(struct intel_guc_client *client)
 	desc->db_id = client->doorbell_id;
 
 	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+		struct intel_context *ce = intel_context_lookup(ctx, engine);
 		u32 guc_engine_id = engine->guc_id;
 		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
 
@@ -567,7 +567,7 @@  static void inject_preempt_context(struct work_struct *work)
 					     preempt_work[engine->id]);
 	struct intel_guc_client *client = guc->preempt_client;
 	struct guc_stage_desc *stage_desc = __get_stage_desc(client);
-	struct intel_context *ce = to_intel_context(client->owner, engine);
+	struct intel_context *ce = intel_context_lookup(client->owner, engine);
 	u32 data[7];
 
 	if (!ce->ring->emit) { /* recreate upon load/resume */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4f6f09137662..0800f8edffeb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -622,8 +622,7 @@  static void port_assign(struct execlist_port *port, struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
+	struct intel_context *ce = execlists->preempt_context;
 	unsigned int n;
 
 	GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -1231,19 +1230,24 @@  static void execlists_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static void execlists_context_destroy(struct intel_context *ce)
+static void __execlists_context_fini(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
-
-	if (!ce->state)
-		return;
-
 	intel_ring_free(ce->ring);
 
 	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
 	i915_gem_object_put(ce->state->obj);
 }
 
+static void execlists_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(ce->pin_count);
+
+	if (ce->state)
+		__execlists_context_fini(ce);
+
+	kfree(ce);
+}
+
 static void execlists_context_unpin(struct intel_context *ce)
 {
 	struct intel_engine_cs *engine;
@@ -1385,7 +1389,11 @@  static struct intel_context *
 execlists_context_pin(struct intel_engine_cs *engine,
 		      struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!ctx->ppgtt);
@@ -2436,8 +2444,9 @@  static int logical_ring_init(struct intel_engine_cs *engine)
 	execlists->preempt_complete_status = ~0u;
 	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(i915->preempt_context, engine);
+			intel_context_lookup(i915->preempt_context, engine);
 
+		execlists->preempt_context = ce;
 		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 848b68e090d5..9002f7f6d32e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1348,15 +1348,20 @@  intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
+static void __ring_context_fini(struct intel_context *ce)
+{
+	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
+	i915_gem_object_put(ce->state->obj);
+}
+
 static void ring_context_destroy(struct intel_context *ce)
 {
 	GEM_BUG_ON(ce->pin_count);
 
-	if (!ce->state)
-		return;
+	if (ce->state)
+		__ring_context_fini(ce);
 
-	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
-	i915_gem_object_put(ce->state->obj);
+	kfree(ce);
 }
 
 static int __context_pin_ppgtt(struct i915_gem_context *ctx)
@@ -1551,7 +1556,11 @@  __ring_context_pin(struct intel_engine_cs *engine,
 static struct intel_context *
 ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 
@@ -1753,8 +1762,8 @@  static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		 * placeholder we use to flush other contexts.
 		 */
 		*cs++ = MI_SET_CONTEXT;
-		*cs++ = i915_ggtt_offset(to_intel_context(i915->kernel_context,
-							  engine)->state) |
+		*cs++ = i915_ggtt_offset(intel_context_lookup(i915->kernel_context,
+							      engine)->state) |
 			MI_MM_SPACE_GTT |
 			MI_RESTORE_INHIBIT;
 	}
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 5d0ff2293abc..1d6dc2fe36ab 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -30,7 +30,6 @@  mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
-	unsigned int n;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -41,15 +40,15 @@  mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
+	ctx->hw_contexts = RB_ROOT;
+	spin_lock_init(&ctx->hw_contexts_lock);
+
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
-		intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]);
-
 	ret = i915_gem_context_pin_hw_id(ctx);
 	if (ret < 0)
 		goto err_handles;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 6c09e5162feb..f1a80006289d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -141,9 +141,13 @@  static struct intel_context *
 mock_context_pin(struct intel_engine_cs *engine,
 		 struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
 	int err = -ENOMEM;
 
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
 	if (ce->pin_count++)
 		return ce;