diff mbox series

[06/18] drm/i915/guc: Implement GuC context operations for new inteface

Message ID 20210720223921.56160-7-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Series to merge a subset of GuC submission | expand

Commit Message

Matthew Brost July 20, 2021, 10:39 p.m. UTC
Implement GuC context operations which includes GuC specific operations
alloc, pin, unpin, and destroy.

v2:
 (Daniel Vetter)
  - Use msleep_interruptible rather than cond_resched in busy loop
 (Michal)
  - Remove C++ style comment
v3:
 (Matthew Brost)
  - Drop GUC_ID_START
 (John Harrison)
  - Fix a bunch of typos
  - Use drm_err rather than drm_dbg for G2H errors
 (Daniele)
  - Fix ;; typo
  - Clean up sched state functions
  - Add lockdep for guc_id functions
  - Don't call __release_guc_id when guc_id is invalid
  - Use MISSING_CASE
  - Add comment in guc_context_pin
  - Use shorter path to rpm
 (Daniele / CI)
  - Don't call release_guc_id on an invalid guc_id in destroy

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       |   5 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  22 +-
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |   1 -
 drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  40 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c     |   4 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 667 ++++++++++++++++--
 drivers/gpu/drm/i915/i915_reg.h               |   1 +
 drivers/gpu/drm/i915/i915_request.c           |   1 +
 8 files changed, 686 insertions(+), 55 deletions(-)

Comments

John Harrison July 21, 2021, 1:51 a.m. UTC | #1
On 7/20/2021 15:39, Matthew Brost wrote:
> Implement GuC context operations which includes GuC specific operations
> alloc, pin, unpin, and destroy.
>
> v2:
>   (Daniel Vetter)
>    - Use msleep_interruptible rather than cond_resched in busy loop
>   (Michal)
>    - Remove C++ style comment
> v3:
>   (Matthew Brost)
>    - Drop GUC_ID_START
>   (John Harrison)
>    - Fix a bunch of typos
>    - Use drm_err rather than drm_dbg for G2H errors
>   (Daniele)
>    - Fix ;; typo
>    - Clean up sched state functions
>    - Add lockdep for guc_id functions
>    - Don't call __release_guc_id when guc_id is invalid
>    - Use MISSING_CASE
>    - Add comment in guc_context_pin
>    - Use shorter path to rpm
>   (Daniele / CI)
>    - Don't call release_guc_id on an invalid guc_id in destroy
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |   5 +
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  22 +-
>   drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |   1 -
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  40 ++
>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c     |   4 +
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 667 ++++++++++++++++--
>   drivers/gpu/drm/i915/i915_reg.h               |   1 +
>   drivers/gpu/drm/i915/i915_request.c           |   1 +
>   8 files changed, 686 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index bd63813c8a80..32fd6647154b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -384,6 +384,11 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>   
>   	mutex_init(&ce->pin_mutex);
>   
> +	spin_lock_init(&ce->guc_state.lock);
> +
> +	ce->guc_id = GUC_INVALID_LRC_ID;
> +	INIT_LIST_HEAD(&ce->guc_id_link);
> +
>   	i915_active_init(&ce->active,
>   			 __intel_context_active, __intel_context_retire, 0);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 6d99631d19b9..606c480aec26 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -96,6 +96,7 @@ struct intel_context {
>   #define CONTEXT_BANNED			6
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>   #define CONTEXT_NOPREEMPT		8
> +#define CONTEXT_LRCA_DIRTY		9
>   
>   	struct {
>   		u64 timeout_us;
> @@ -138,14 +139,29 @@ struct intel_context {
>   
>   	u8 wa_bb_page; /* if set, page num reserved for context workarounds */
>   
> +	struct {
> +		/** lock: protects everything in guc_state */
> +		spinlock_t lock;
> +		/**
> +		 * sched_state: scheduling state of this context using GuC
> +		 * submission
> +		 */
> +		u8 sched_state;
> +	} guc_state;
> +
>   	/* GuC scheduling state flags that do not require a lock. */
>   	atomic_t guc_sched_state_no_lock;
>   
> +	/* GuC LRC descriptor ID */
> +	u16 guc_id;
> +
> +	/* GuC LRC descriptor reference count */
> +	atomic_t guc_id_ref;
> +
>   	/*
> -	 * GuC LRC descriptor ID - Not assigned in this patch but future patches
> -	 * in the series will.
> +	 * GuC ID link - in list when unpinned but guc_id still valid in GuC
>   	 */
> -	u16 guc_id;
> +	struct list_head guc_id_link;
>   };
>   
>   #endif /* __INTEL_CONTEXT_TYPES__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index 41e5350a7a05..49d4857ad9b7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -87,7 +87,6 @@
>   #define GEN11_CSB_WRITE_PTR_MASK	(GEN11_CSB_PTR_MASK << 0)
>   
>   #define MAX_CONTEXT_HW_ID	(1 << 21) /* exclusive */
> -#define MAX_GUC_CONTEXT_HW_ID	(1 << 20) /* exclusive */
>   #define GEN11_MAX_CONTEXT_HW_ID	(1 << 11) /* exclusive */
>   /* in Gen12 ID 0x7FF is reserved to indicate idle */
>   #define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 8c7b92f699f1..30773cd699f5 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -7,6 +7,7 @@
>   #define _INTEL_GUC_H_
>   
>   #include <linux/xarray.h>
> +#include <linux/delay.h>
>   
>   #include "intel_uncore.h"
>   #include "intel_guc_fw.h"
> @@ -44,6 +45,14 @@ struct intel_guc {
>   		void (*disable)(struct intel_guc *guc);
>   	} interrupts;
>   
> +	/*
> +	 * contexts_lock protects the pool of free guc ids and a linked list of
> +	 * guc ids available to be stolen
> +	 */
> +	spinlock_t contexts_lock;
> +	struct ida guc_ids;
> +	struct list_head guc_id_list;
> +
>   	bool submission_selected;
>   
>   	struct i915_vma *ads_vma;
> @@ -101,6 +110,34 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
>   				 response_buf, response_buf_size, 0);
>   }
>   
> +static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
> +					   const u32 *action,
> +					   u32 len,
> +					   bool loop)
> +{
> +	int err;
> +	unsigned int sleep_period_ms = 1;
> +	bool not_atomic = !in_atomic() && !irqs_disabled();
> +
> +	/* No sleeping with spin locks, just busy loop */
> +	might_sleep_if(loop && not_atomic);
> +
> +retry:
> +	err = intel_guc_send_nb(guc, action, len);
> +	if (unlikely(err == -EBUSY && loop)) {
> +		if (likely(not_atomic)) {
> +			if (msleep_interruptible(sleep_period_ms))
> +				return -EINTR;
> +			sleep_period_ms = sleep_period_ms << 1;
> +		} else {
> +			cpu_relax();
> +		}
> +		goto retry;
> +	}
> +
> +	return err;
> +}
> +
>   static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
>   {
>   	intel_guc_ct_event_handler(&guc->ct);
> @@ -202,6 +239,9 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
>   int intel_guc_reset_engine(struct intel_guc *guc,
>   			   struct intel_engine_cs *engine);
>   
> +int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
> +					  const u32 *msg, u32 len);
> +
>   void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index 83ec60ea3f89..28ff82c5be45 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -928,6 +928,10 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>   	case INTEL_GUC_ACTION_DEFAULT:
>   		ret = intel_guc_to_host_process_recv_msg(guc, payload, len);
>   		break;
> +	case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
> +		ret = intel_guc_deregister_done_process_msg(guc, payload,
> +							    len);
> +		break;
>   	default:
>   		ret = -EOPNOTSUPP;
>   		break;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 53b4a5eb4a85..6940b9d62118 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -13,7 +13,9 @@
>   #include "gt/intel_gt.h"
>   #include "gt/intel_gt_irq.h"
>   #include "gt/intel_gt_pm.h"
> +#include "gt/intel_gt_requests.h"
>   #include "gt/intel_lrc.h"
> +#include "gt/intel_lrc_reg.h"
>   #include "gt/intel_mocs.h"
>   #include "gt/intel_ring.h"
>   
> @@ -85,6 +87,72 @@ static inline void clr_context_enabled(struct intel_context *ce)
>   		   &ce->guc_sched_state_no_lock);
>   }
>   
> +/*
> + * Below is a set of functions which control the GuC scheduling state which
> + * require a lock, aside from the special case where the functions are called
> + * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
> + * path to be executing on the context.
> + */
> +#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER	BIT(0)
> +#define SCHED_STATE_DESTROYED				BIT(1)
> +static inline void init_sched_state(struct intel_context *ce)
> +{
> +	/* Only should be called from guc_lrc_desc_pin() */
> +	atomic_set(&ce->guc_sched_state_no_lock, 0);
> +	ce->guc_state.sched_state = 0;
> +}
> +
> +static inline bool
> +context_wait_for_deregister_to_register(struct intel_context *ce)
> +{
> +	return ce->guc_state.sched_state &
> +		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
> +}
> +
> +static inline void
> +set_context_wait_for_deregister_to_register(struct intel_context *ce)
> +{
> +	/* Only should be called from guc_lrc_desc_pin() */
> +	ce->guc_state.sched_state |=
> +		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
> +}
> +
> +static inline void
> +clr_context_wait_for_deregister_to_register(struct intel_context *ce)
> +{
> +	lockdep_assert_held(&ce->guc_state.lock);
> +	ce->guc_state.sched_state &=
> +		~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
> +}
> +
> +static inline bool
> +context_destroyed(struct intel_context *ce)
> +{
> +	return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
> +}
> +
> +static inline void
> +set_context_destroyed(struct intel_context *ce)
> +{
> +	lockdep_assert_held(&ce->guc_state.lock);
> +	ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
> +}
> +
> +static inline bool context_guc_id_invalid(struct intel_context *ce)
> +{
> +	return (ce->guc_id == GUC_INVALID_LRC_ID);
Could have dropped the brackets from this one too.

> +}
> +
> +static inline void set_context_guc_id_invalid(struct intel_context *ce)
> +{
> +	ce->guc_id = GUC_INVALID_LRC_ID;
> +}
> +
> +static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
> +{
> +	return &ce->engine->gt->uc.guc;
> +}
> +
>   static inline struct i915_priolist *to_priolist(struct rb_node *rb)
>   {
>   	return rb_entry(rb, struct i915_priolist, node);
> @@ -155,6 +223,9 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
>   	int len = 0;
>   	bool enabled = context_enabled(ce);
>   
> +	GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
> +	GEM_BUG_ON(context_guc_id_invalid(ce));
> +
>   	if (!enabled) {
>   		action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
>   		action[len++] = ce->guc_id;
> @@ -417,6 +488,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
>   
>   	xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
>   
> +	spin_lock_init(&guc->contexts_lock);
> +	INIT_LIST_HEAD(&guc->guc_id_list);
> +	ida_init(&guc->guc_ids);
> +
>   	return 0;
>   }
>   
> @@ -429,9 +504,305 @@ void intel_guc_submission_fini(struct intel_guc *guc)
>   	i915_sched_engine_put(guc->sched_engine);
>   }
>   
> -static int guc_context_alloc(struct intel_context *ce)
> +static inline void queue_request(struct i915_sched_engine *sched_engine,
> +				 struct i915_request *rq,
> +				 int prio)
>   {
> -	return lrc_alloc(ce, ce->engine);
> +	GEM_BUG_ON(!list_empty(&rq->sched.link));
> +	list_add_tail(&rq->sched.link,
> +		      i915_sched_lookup_priolist(sched_engine, prio));
> +	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> +}
> +
> +static int guc_bypass_tasklet_submit(struct intel_guc *guc,
> +				     struct i915_request *rq)
> +{
> +	int ret;
> +
> +	__i915_request_submit(rq);
> +
> +	trace_i915_request_in(rq, 0);
> +
> +	guc_set_lrc_tail(rq);
> +	ret = guc_add_request(guc, rq);
> +	if (ret == -EBUSY)
> +		guc->stalled_request = rq;
> +
> +	return ret;
> +}
> +
> +static void guc_submit_request(struct i915_request *rq)
> +{
> +	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
> +	struct intel_guc *guc = &rq->engine->gt->uc.guc;
> +	unsigned long flags;
> +
> +	/* Will be called from irq-context when using foreign fences. */
> +	spin_lock_irqsave(&sched_engine->lock, flags);
> +
> +	if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
> +		queue_request(sched_engine, rq, rq_prio(rq));
> +	else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
> +		tasklet_hi_schedule(&sched_engine->tasklet);
> +
> +	spin_unlock_irqrestore(&sched_engine->lock, flags);
> +}
> +
> +static int new_guc_id(struct intel_guc *guc)
> +{
> +	return ida_simple_get(&guc->guc_ids, 0,
> +			      GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
> +			      __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> +}
> +
> +static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	if (!context_guc_id_invalid(ce)) {
> +		ida_simple_remove(&guc->guc_ids, ce->guc_id);
> +		reset_lrc_desc(guc, ce->guc_id);
> +		set_context_guc_id_invalid(ce);
> +	}
> +	if (!list_empty(&ce->guc_id_link))
> +		list_del_init(&ce->guc_id_link);
> +}
> +
> +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&guc->contexts_lock, flags);
> +	__release_guc_id(guc, ce);
> +	spin_unlock_irqrestore(&guc->contexts_lock, flags);
> +}
> +
> +static int steal_guc_id(struct intel_guc *guc)
> +{
> +	struct intel_context *ce;
> +	int guc_id;
> +
> +	lockdep_assert_held(&guc->contexts_lock);
> +
> +	if (!list_empty(&guc->guc_id_list)) {
> +		ce = list_first_entry(&guc->guc_id_list,
> +				      struct intel_context,
> +				      guc_id_link);
> +
> +		GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
> +		GEM_BUG_ON(context_guc_id_invalid(ce));
> +
> +		list_del_init(&ce->guc_id_link);
> +		guc_id = ce->guc_id;
> +		set_context_guc_id_invalid(ce);
> +		return guc_id;
> +	} else {
> +		return -EAGAIN;
> +	}
> +}
> +
> +static int assign_guc_id(struct intel_guc *guc, u16 *out)
> +{
> +	int ret;
> +
> +	lockdep_assert_held(&guc->contexts_lock);
> +
> +	ret = new_guc_id(guc);
> +	if (unlikely(ret < 0)) {
> +		ret = steal_guc_id(guc);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	*out = ret;
> +	return 0;
> +}
> +
> +#define PIN_GUC_ID_TRIES	4
> +static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	int ret = 0;
> +	unsigned long flags, tries = PIN_GUC_ID_TRIES;
> +
> +	GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
> +
> +try_again:
> +	spin_lock_irqsave(&guc->contexts_lock, flags);
> +
> +	if (context_guc_id_invalid(ce)) {
> +		ret = assign_guc_id(guc, &ce->guc_id);
> +		if (ret)
> +			goto out_unlock;
> +		ret = 1;	/* Indidcates newly assigned guc_id */
> +	}
> +	if (!list_empty(&ce->guc_id_link))
> +		list_del_init(&ce->guc_id_link);
> +	atomic_inc(&ce->guc_id_ref);
> +
> +out_unlock:
> +	spin_unlock_irqrestore(&guc->contexts_lock, flags);
> +
> +	/*
> +	 * -EAGAIN indicates no guc_ids are available, let's retire any
> +	 * outstanding requests to see if that frees up a guc_id. If the first
> +	 * retire didn't help, insert a sleep with the timeslice duration before
> +	 * attempting to retire more requests. Double the sleep period each
> +	 * subsequent pass before finally giving up. The sleep period has max of
> +	 * 100ms and minimum of 1ms.
> +	 */
> +	if (ret == -EAGAIN && --tries) {
> +		if (PIN_GUC_ID_TRIES - tries > 1) {
> +			unsigned int timeslice_shifted =
> +				ce->engine->props.timeslice_duration_ms <<
> +				(PIN_GUC_ID_TRIES - tries - 2);
> +			unsigned int max = min_t(unsigned int, 100,
> +						 timeslice_shifted);
> +
> +			msleep(max_t(unsigned int, max, 1));
> +		}
> +		intel_gt_retire_requests(guc_to_gt(guc));
> +		goto try_again;
> +	}
> +
> +	return ret;
> +}
> +
> +static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	unsigned long flags;
> +
> +	GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
> +
> +	spin_lock_irqsave(&guc->contexts_lock, flags);
> +	if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
> +	    !atomic_read(&ce->guc_id_ref))
> +		list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
> +	spin_unlock_irqrestore(&guc->contexts_lock, flags);
> +}
> +
> +static int __guc_action_register_context(struct intel_guc *guc,
> +					 u32 guc_id,
> +					 u32 offset)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_REGISTER_CONTEXT,
> +		guc_id,
> +		offset,
> +	};
> +
> +	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true);
> +}
> +
> +static int register_context(struct intel_context *ce)
> +{
> +	struct intel_guc *guc = ce_to_guc(ce);
> +	u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
> +		ce->guc_id * sizeof(struct guc_lrc_desc);
> +
> +	return __guc_action_register_context(guc, ce->guc_id, offset);
> +}
> +
> +static int __guc_action_deregister_context(struct intel_guc *guc,
> +					   u32 guc_id)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
> +		guc_id,
> +	};
> +
> +	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true);
> +}
> +
> +static int deregister_context(struct intel_context *ce, u32 guc_id)
> +{
> +	struct intel_guc *guc = ce_to_guc(ce);
> +
> +	return __guc_action_deregister_context(guc, guc_id);
> +}
> +
> +static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
> +{
> +	switch (class) {
> +	case RENDER_CLASS:
> +		return mask >> RCS0;
> +	case VIDEO_ENHANCEMENT_CLASS:
> +		return mask >> VECS0;
> +	case VIDEO_DECODE_CLASS:
> +		return mask >> VCS0;
> +	case COPY_ENGINE_CLASS:
> +		return mask >> BCS0;
> +	default:
> +		MISSING_CASE(class);
> +		return 0;
> +	}
> +}
> +
> +static void guc_context_policy_init(struct intel_engine_cs *engine,
> +				    struct guc_lrc_desc *desc)
> +{
> +	desc->policy_flags = 0;
> +
> +	desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US;
> +	desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US;
> +}
> +
> +static int guc_lrc_desc_pin(struct intel_context *ce)
> +{
> +	struct intel_engine_cs *engine = ce->engine;
> +	struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
> +	struct intel_guc *guc = &engine->gt->uc.guc;
> +	u32 desc_idx = ce->guc_id;
> +	struct guc_lrc_desc *desc;
> +	bool context_registered;
> +	intel_wakeref_t wakeref;
> +	int ret = 0;
> +
> +	GEM_BUG_ON(!engine->mask);
> +
> +	/*
> +	 * Ensure LRC + CT vmas are is same region as write barrier is done
> +	 * based on CT vma region.
> +	 */
> +	GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
> +		   i915_gem_object_is_lmem(ce->ring->vma->obj));
> +
> +	context_registered = lrc_desc_registered(guc, desc_idx);
> +
> +	reset_lrc_desc(guc, desc_idx);
> +	set_lrc_desc_registered(guc, desc_idx, ce);
> +
> +	desc = __get_lrc_desc(guc, desc_idx);
> +	desc->engine_class = engine_class_to_guc_class(engine->class);
> +	desc->engine_submit_mask = adjust_engine_mask(engine->class,
> +						      engine->mask);
> +	desc->hw_context_desc = ce->lrc.lrca;
> +	desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
> +	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
> +	guc_context_policy_init(engine, desc);
> +	init_sched_state(ce);
> +
> +	/*
> +	 * The context_lookup xarray is used to determine if the hardware
> +	 * context is currently registered. There are two cases in which it
> +	 * could be registered either the guc_id has been stolen from from
> +	 * another context or the lrc descriptor address of this context has
> +	 * changed. In either case the context needs to be deregistered with the
> +	 * GuC before registering this context.
> +	 */
> +	if (context_registered) {
> +		set_context_wait_for_deregister_to_register(ce);
> +		intel_context_get(ce);
> +
> +		/*
> +		 * If stealing the guc_id, this ce has the same guc_id as the
> +		 * context whose guc_id was stolen.
> +		 */
> +		with_intel_runtime_pm(runtime_pm, wakeref)
> +			ret = deregister_context(ce, ce->guc_id);
> +	} else {
> +		with_intel_runtime_pm(runtime_pm, wakeref)
> +			ret = register_context(ce);
> +	}
> +
> +	return ret;
>   }
>   
>   static int guc_context_pre_pin(struct intel_context *ce,
> @@ -443,36 +814,144 @@ static int guc_context_pre_pin(struct intel_context *ce,
>   
>   static int guc_context_pin(struct intel_context *ce, void *vaddr)
>   {
> +	if (i915_ggtt_offset(ce->state) !=
> +	    (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
> +		set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
> +
> +	/*
> +	 * GuC context gets pinned in guc_request_alloc. See that function for
> +	 * explaination of why.
> +	 */
> +
>   	return lrc_pin(ce, ce->engine, vaddr);
>   }
>   
> +static void guc_context_unpin(struct intel_context *ce)
> +{
> +	struct intel_guc *guc = ce_to_guc(ce);
> +
> +	unpin_guc_id(guc, ce);
> +	lrc_unpin(ce);
> +}
> +
> +static void guc_context_post_unpin(struct intel_context *ce)
> +{
> +	lrc_post_unpin(ce);
> +}
> +
> +static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> +{
> +	struct intel_guc *guc = ce_to_guc(ce);
> +	unsigned long flags;
> +
> +	GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
> +	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
> +
> +	spin_lock_irqsave(&ce->guc_state.lock, flags);
> +	set_context_destroyed(ce);
> +	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> +
> +	deregister_context(ce, ce->guc_id);
> +}
> +
> +static void guc_context_destroy(struct kref *kref)
> +{
> +	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
> +	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
> +	struct intel_guc *guc = ce_to_guc(ce);
> +	intel_wakeref_t wakeref;
> +	unsigned long flags;
> +
> +	/*
> +	 * If the guc_id is invalid this context has been stolen and we can free
> +	 * it immediately. Also can be freed immediately if the context is not
> +	 * registered with the GuC.
> +	 */
> +	if (context_guc_id_invalid(ce)) {
> +		lrc_destroy(kref);
> +		return;
> +	} else if (!lrc_desc_registered(guc, ce->guc_id)) {
> +		release_guc_id(guc, ce);
> +		lrc_destroy(kref);
> +		return;
> +	}
> +
> +	/*
> +	 * We have to acquire the context spinlock and check guc_id again, if it
> +	 * is valid it hasn't been stolen and needs to be deregistered. We
> +	 * delete this context from the list of unpinned guc_ids available to
> +	 * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
> +	 * returns indicating this context has been deregistered the guc_id is
> +	 * returned to the pool of available guc_ids.
> +	 */
> +	spin_lock_irqsave(&guc->contexts_lock, flags);
> +	if (context_guc_id_invalid(ce)) {
> +		spin_unlock_irqrestore(&guc->contexts_lock, flags);
> +		lrc_destroy(kref);
> +		return;
> +	}
> +
> +	if (!list_empty(&ce->guc_id_link))
> +		list_del_init(&ce->guc_id_link);
> +	spin_unlock_irqrestore(&guc->contexts_lock, flags);
> +
> +	/*
> +	 * We defer GuC context deregistration until the context is destroyed
> +	 * in order to save on CTBs. With this optimization ideally we only need
> +	 * 1 CTB to register the context during the first pin and 1 CTB to
> +	 * deregister the context when the context is destroyed. Without this
> +	 * optimization, a CTB would be needed every pin & unpin.
> +	 *
> +	 * XXX: Need to acqiure the runtime wakeref as this can be triggered
> +	 * from context_free_worker when runtime wakeref is not held.
> +	 * guc_lrc_desc_unpin requires the runtime as a GuC register is written
> +	 * in H2G CTB to deregister the context. A future patch may defer this
> +	 * H2G CTB if the runtime wakeref is zero.
> +	 */
> +	with_intel_runtime_pm(runtime_pm, wakeref)
> +		guc_lrc_desc_unpin(ce);
> +}
> +
> +static int guc_context_alloc(struct intel_context *ce)
> +{
> +	return lrc_alloc(ce, ce->engine);
> +}
> +
>   static const struct intel_context_ops guc_context_ops = {
>   	.alloc = guc_context_alloc,
>   
>   	.pre_pin = guc_context_pre_pin,
>   	.pin = guc_context_pin,
> -	.unpin = lrc_unpin,
> -	.post_unpin = lrc_post_unpin,
> +	.unpin = guc_context_unpin,
> +	.post_unpin = guc_context_post_unpin,
>   
>   	.enter = intel_context_enter_engine,
>   	.exit = intel_context_exit_engine,
>   
>   	.reset = lrc_reset,
> -	.destroy = lrc_destroy,
> +	.destroy = guc_context_destroy,
>   };
>   
> -static int guc_request_alloc(struct i915_request *request)
> +static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
> +{
> +	return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
> +		!lrc_desc_registered(ce_to_guc(ce), ce->guc_id);
> +}
> +
> +static int guc_request_alloc(struct i915_request *rq)
>   {
> +	struct intel_context *ce = rq->context;
> +	struct intel_guc *guc = ce_to_guc(ce);
>   	int ret;
>   
> -	GEM_BUG_ON(!intel_context_is_pinned(request->context));
> +	GEM_BUG_ON(!intel_context_is_pinned(rq->context));
>   
>   	/*
>   	 * Flush enough space to reduce the likelihood of waiting after
>   	 * we start building the request - in which case we will just
>   	 * have to repeat work.
>   	 */
> -	request->reserved_space += GUC_REQUEST_SIZE;
> +	rq->reserved_space += GUC_REQUEST_SIZE;
>   
>   	/*
>   	 * Note that after this point, we have committed to using
> @@ -483,56 +962,47 @@ static int guc_request_alloc(struct i915_request *request)
>   	 */
>   
>   	/* Unconditionally invalidate GPU caches and TLBs. */
> -	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
> +	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
>   	if (ret)
>   		return ret;
>   
> -	request->reserved_space -= GUC_REQUEST_SIZE;
> -	return 0;
> -}
> -
> -static inline void queue_request(struct i915_sched_engine *sched_engine,
> -				 struct i915_request *rq,
> -				 int prio)
> -{
> -	GEM_BUG_ON(!list_empty(&rq->sched.link));
> -	list_add_tail(&rq->sched.link,
> -		      i915_sched_lookup_priolist(sched_engine, prio));
> -	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> -}
> -
> -static int guc_bypass_tasklet_submit(struct intel_guc *guc,
> -				     struct i915_request *rq)
> -{
> -	int ret;
> -
> -	__i915_request_submit(rq);
> -
> -	trace_i915_request_in(rq, 0);
> -
> -	guc_set_lrc_tail(rq);
> -	ret = guc_add_request(guc, rq);
> -	if (ret == -EBUSY)
> -		guc->stalled_request = rq;
> -
> -	return ret;
> -}
> +	rq->reserved_space -= GUC_REQUEST_SIZE;
>   
> -static void guc_submit_request(struct i915_request *rq)
> -{
> -	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
> -	struct intel_guc *guc = &rq->engine->gt->uc.guc;
> -	unsigned long flags;
> +	/*
> +	 * Call pin_guc_id here rather than in the pinning step as with
> +	 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
> +	 * guc_ids and creating horrible race conditions. This is especially bad
> +	 * when guc_ids are being stolen due to over subscription. By the time
> +	 * this function is reached, it is guaranteed that the guc_id will be
> +	 * persistent until the generated request is retired. Thus, sealing these
> +	 * race conditions. It is still safe to fail here if guc_ids are
> +	 * exhausted and return -EAGAIN to the user indicating that they can try
> +	 * again in the future.
> +	 *
> +	 * There is no need for a lock here as the timeline mutex ensures at
> +	 * most one context can be executing this code path at once. The
> +	 * guc_id_ref is incremented once for every request in flight and
> +	 * decremented on each retire. When it is zero, a lock around the
> +	 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
> +	 */
> +	if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
> +		return 0;
>   
> -	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&sched_engine->lock, flags);
> +	ret = pin_guc_id(guc, ce);	/* returns 1 if new guc_id assigned */
> +	if (unlikely(ret < 0))
> +		return ret;
> +	if (context_needs_register(ce, !!ret)) {
> +		ret = guc_lrc_desc_pin(ce);
> +		if (unlikely(ret)) {	/* unwind */
> +			atomic_dec(&ce->guc_id_ref);
> +			unpin_guc_id(guc, ce);
> +			return ret;
> +		}
> +	}
>   
> -	if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
> -		queue_request(sched_engine, rq, rq_prio(rq));
> -	else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
> -		tasklet_hi_schedule(&sched_engine->tasklet);
> +	clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
>   
> -	spin_unlock_irqrestore(&sched_engine->lock, flags);
> +	return 0;
>   }
>   
>   static void sanitize_hwsp(struct intel_engine_cs *engine)
> @@ -606,6 +1076,41 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
>   	engine->submit_request = guc_submit_request;
>   }
>   
> +static inline void guc_kernel_context_pin(struct intel_guc *guc,
> +					  struct intel_context *ce)
> +{
> +	if (context_guc_id_invalid(ce))
> +		pin_guc_id(guc, ce);
> +	guc_lrc_desc_pin(ce);
> +}
> +
> +static inline void guc_init_lrc_mapping(struct intel_guc *guc)
> +{
> +	struct intel_gt *gt = guc_to_gt(guc);
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	/* make sure all descriptors are clean... */
> +	xa_destroy(&guc->context_lookup);
> +
> +	/*
> +	 * Some contexts might have been pinned before we enabled GuC
> +	 * submission, so we need to add them to the GuC bookeeping.
> +	 * Also, after a reset the of GuC we want to make sure that the
the of -> of the

> +	 * information shared with GuC is properly reset. The kernel LRCs are
> +	 * not attached to the gem_context, so they need to be added separately.
> +	 *
> +	 * Note: we purposely do not check the return of guc_lrc_desc_pin,
purposefully

Just a bunch of nits, so maybe not worth respinning. I think it needs an 
r-b from Daniele as well, given that he had a bunch of comments on the 
previous rev too. But apart from the nits, looks good to me.

Reviewed-by: John Harrison <John.C.Harrison@Intel.com>

> +	 * because that function can only fail if a reset is just starting. This
> +	 * is at the end of reset so presumably another reset isn't happening
> +	 * and even it did this code would be run again.
> +	 */
> +
> +	for_each_engine(engine, gt, id)
> +		if (engine->kernel_context)
> +			guc_kernel_context_pin(guc, engine->kernel_context);
> +}
> +
>   static void guc_release(struct intel_engine_cs *engine)
>   {
>   	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
> @@ -718,6 +1223,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
>   
>   void intel_guc_submission_enable(struct intel_guc *guc)
>   {
> +	guc_init_lrc_mapping(guc);
>   }
>   
>   void intel_guc_submission_disable(struct intel_guc *guc)
> @@ -743,3 +1249,62 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
>   {
>   	guc->submission_selected = __guc_submission_selected(guc);
>   }
> +
> +static inline struct intel_context *
> +g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
> +{
> +	struct intel_context *ce;
> +
> +	if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
> +		drm_err(&guc_to_gt(guc)->i915->drm,
> +			"Invalid desc_idx %u", desc_idx);
> +		return NULL;
> +	}
> +
> +	ce = __get_context(guc, desc_idx);
> +	if (unlikely(!ce)) {
> +		drm_err(&guc_to_gt(guc)->i915->drm,
> +			"Context is NULL, desc_idx %u", desc_idx);
> +		return NULL;
> +	}
> +
> +	return ce;
> +}
> +
> +int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
> +					  const u32 *msg,
> +					  u32 len)
> +{
> +	struct intel_context *ce;
> +	u32 desc_idx = msg[0];
> +
> +	if (unlikely(len < 1)) {
> +		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
> +		return -EPROTO;
> +	}
> +
> +	ce = g2h_context_lookup(guc, desc_idx);
> +	if (unlikely(!ce))
> +		return -EPROTO;
> +
> +	if (context_wait_for_deregister_to_register(ce)) {
> +		struct intel_runtime_pm *runtime_pm =
> +			&ce->engine->gt->i915->runtime_pm;
> +		intel_wakeref_t wakeref;
> +
> +		/*
> +		 * Previous owner of this guc_id has been deregistered, now safe
> +		 * register this context.
> +		 */
> +		with_intel_runtime_pm(runtime_pm, wakeref)
> +			register_context(ce);
> +		clr_context_wait_for_deregister_to_register(ce);
> +		intel_context_put(ce);
> +	} else if (context_destroyed(ce)) {
> +		/* Context has been destroyed */
> +		release_guc_id(guc, ce);
> +		lrc_destroy(&ce->ref);
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 943fe485c662..204c95c39353 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -4142,6 +4142,7 @@ enum {
>   	FAULT_AND_CONTINUE /* Unsupported */
>   };
>   
> +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
>   #define GEN8_CTX_VALID (1 << 0)
>   #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
>   #define GEN8_CTX_FORCE_RESTORE (1 << 2)
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 09ebea9a0090..ef26724fe980 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -407,6 +407,7 @@ bool i915_request_retire(struct i915_request *rq)
>   	 */
>   	if (!list_empty(&rq->sched.link))
>   		remove_from_engine(rq);
> +	atomic_dec(&rq->context->guc_id_ref);
>   	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
>   
>   	__list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
Daniele Ceraolo Spurio July 21, 2021, 11:57 p.m. UTC | #2
On 7/20/2021 6:51 PM, John Harrison wrote:
> On 7/20/2021 15:39, Matthew Brost wrote:
>> Implement GuC context operations which includes GuC specific operations
>> alloc, pin, unpin, and destroy.
>>
>> v2:
>>   (Daniel Vetter)
>>    - Use msleep_interruptible rather than cond_resched in busy loop
>>   (Michal)
>>    - Remove C++ style comment
>> v3:
>>   (Matthew Brost)
>>    - Drop GUC_ID_START
>>   (John Harrison)
>>    - Fix a bunch of typos
>>    - Use drm_err rather than drm_dbg for G2H errors
>>   (Daniele)
>>    - Fix ;; typo
>>    - Clean up sched state functions
>>    - Add lockdep for guc_id functions
>>    - Don't call __release_guc_id when guc_id is invalid
>>    - Use MISSING_CASE
>>    - Add comment in guc_context_pin
>>    - Use shorter path to rpm
>>   (Daniele / CI)
>>    - Don't call release_guc_id on an invalid guc_id in destroy
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_context.c       |   5 +
>>   drivers/gpu/drm/i915/gt/intel_context_types.h |  22 +-
>>   drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |   1 -
>>   drivers/gpu/drm/i915/gt/uc/intel_guc.h        |  40 ++
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c     |   4 +
>>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 667 ++++++++++++++++--
>>   drivers/gpu/drm/i915/i915_reg.h               |   1 +
>>   drivers/gpu/drm/i915/i915_request.c           |   1 +
>>   8 files changed, 686 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
>> b/drivers/gpu/drm/i915/gt/intel_context.c
>> index bd63813c8a80..32fd6647154b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -384,6 +384,11 @@ intel_context_init(struct intel_context *ce, 
>> struct intel_engine_cs *engine)
>>         mutex_init(&ce->pin_mutex);
>>   +    spin_lock_init(&ce->guc_state.lock);
>> +
>> +    ce->guc_id = GUC_INVALID_LRC_ID;
>> +    INIT_LIST_HEAD(&ce->guc_id_link);
>> +
>>       i915_active_init(&ce->active,
>>                __intel_context_active, __intel_context_retire, 0);
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
>> b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index 6d99631d19b9..606c480aec26 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -96,6 +96,7 @@ struct intel_context {
>>   #define CONTEXT_BANNED            6
>>   #define CONTEXT_FORCE_SINGLE_SUBMISSION    7
>>   #define CONTEXT_NOPREEMPT        8
>> +#define CONTEXT_LRCA_DIRTY        9
>>         struct {
>>           u64 timeout_us;
>> @@ -138,14 +139,29 @@ struct intel_context {
>>         u8 wa_bb_page; /* if set, page num reserved for context 
>> workarounds */
>>   +    struct {
>> +        /** lock: protects everything in guc_state */
>> +        spinlock_t lock;
>> +        /**
>> +         * sched_state: scheduling state of this context using GuC
>> +         * submission
>> +         */
>> +        u8 sched_state;
>> +    } guc_state;
>> +
>>       /* GuC scheduling state flags that do not require a lock. */
>>       atomic_t guc_sched_state_no_lock;
>>   +    /* GuC LRC descriptor ID */
>> +    u16 guc_id;
>> +
>> +    /* GuC LRC descriptor reference count */
>> +    atomic_t guc_id_ref;
>> +
>>       /*
>> -     * GuC LRC descriptor ID - Not assigned in this patch but future 
>> patches
>> -     * in the series will.
>> +     * GuC ID link - in list when unpinned but guc_id still valid in 
>> GuC
>>        */
>> -    u16 guc_id;
>> +    struct list_head guc_id_link;
>>   };
>>     #endif /* __INTEL_CONTEXT_TYPES__ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h 
>> b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> index 41e5350a7a05..49d4857ad9b7 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> @@ -87,7 +87,6 @@
>>   #define GEN11_CSB_WRITE_PTR_MASK    (GEN11_CSB_PTR_MASK << 0)
>>     #define MAX_CONTEXT_HW_ID    (1 << 21) /* exclusive */
>> -#define MAX_GUC_CONTEXT_HW_ID    (1 << 20) /* exclusive */
>>   #define GEN11_MAX_CONTEXT_HW_ID    (1 << 11) /* exclusive */
>>   /* in Gen12 ID 0x7FF is reserved to indicate idle */
>>   #define GEN12_MAX_CONTEXT_HW_ID    (GEN11_MAX_CONTEXT_HW_ID - 1)
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> index 8c7b92f699f1..30773cd699f5 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> @@ -7,6 +7,7 @@
>>   #define _INTEL_GUC_H_
>>     #include <linux/xarray.h>
>> +#include <linux/delay.h>
>>     #include "intel_uncore.h"
>>   #include "intel_guc_fw.h"
>> @@ -44,6 +45,14 @@ struct intel_guc {
>>           void (*disable)(struct intel_guc *guc);
>>       } interrupts;
>>   +    /*
>> +     * contexts_lock protects the pool of free guc ids and a linked 
>> list of
>> +     * guc ids available to be stolen
>> +     */
>> +    spinlock_t contexts_lock;
>> +    struct ida guc_ids;
>> +    struct list_head guc_id_list;
>> +
>>       bool submission_selected;
>>         struct i915_vma *ads_vma;
>> @@ -101,6 +110,34 @@ intel_guc_send_and_receive(struct intel_guc 
>> *guc, const u32 *action, u32 len,
>>                    response_buf, response_buf_size, 0);
>>   }
>>   +static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
>> +                       const u32 *action,
>> +                       u32 len,
>> +                       bool loop)
>> +{
>> +    int err;
>> +    unsigned int sleep_period_ms = 1;
>> +    bool not_atomic = !in_atomic() && !irqs_disabled();
>> +
>> +    /* No sleeping with spin locks, just busy loop */
>> +    might_sleep_if(loop && not_atomic);
>> +
>> +retry:
>> +    err = intel_guc_send_nb(guc, action, len);
>> +    if (unlikely(err == -EBUSY && loop)) {
>> +        if (likely(not_atomic)) {
>> +            if (msleep_interruptible(sleep_period_ms))
>> +                return -EINTR;
>> +            sleep_period_ms = sleep_period_ms << 1;
>> +        } else {
>> +            cpu_relax();
>> +        }
>> +        goto retry;
>> +    }
>> +
>> +    return err;
>> +}
>> +
>>   static inline void intel_guc_to_host_event_handler(struct intel_guc 
>> *guc)
>>   {
>>       intel_guc_ct_event_handler(&guc->ct);
>> @@ -202,6 +239,9 @@ static inline void intel_guc_disable_msg(struct 
>> intel_guc *guc, u32 mask)
>>   int intel_guc_reset_engine(struct intel_guc *guc,
>>                  struct intel_engine_cs *engine);
>>   +int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>> +                      const u32 *msg, u32 len);
>> +
>>   void intel_guc_load_status(struct intel_guc *guc, struct 
>> drm_printer *p);
>>     #endif
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> index 83ec60ea3f89..28ff82c5be45 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> @@ -928,6 +928,10 @@ static int ct_process_request(struct 
>> intel_guc_ct *ct, struct ct_incoming_msg *r
>>       case INTEL_GUC_ACTION_DEFAULT:
>>           ret = intel_guc_to_host_process_recv_msg(guc, payload, len);
>>           break;
>> +    case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
>> +        ret = intel_guc_deregister_done_process_msg(guc, payload,
>> +                                len);
>> +        break;
>>       default:
>>           ret = -EOPNOTSUPP;
>>           break;
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> index 53b4a5eb4a85..6940b9d62118 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> @@ -13,7 +13,9 @@
>>   #include "gt/intel_gt.h"
>>   #include "gt/intel_gt_irq.h"
>>   #include "gt/intel_gt_pm.h"
>> +#include "gt/intel_gt_requests.h"
>>   #include "gt/intel_lrc.h"
>> +#include "gt/intel_lrc_reg.h"
>>   #include "gt/intel_mocs.h"
>>   #include "gt/intel_ring.h"
>>   @@ -85,6 +87,72 @@ static inline void clr_context_enabled(struct 
>> intel_context *ce)
>>              &ce->guc_sched_state_no_lock);
>>   }
>>   +/*
>> + * Below is a set of functions which control the GuC scheduling 
>> state which
>> + * require a lock, aside from the special case where the functions 
>> are called
>> + * from guc_lrc_desc_pin(). In that case it isn't possible for any 
>> other code
>> + * path to be executing on the context.
>> + */
>> +#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER    BIT(0)
>> +#define SCHED_STATE_DESTROYED                BIT(1)
>> +static inline void init_sched_state(struct intel_context *ce)
>> +{
>> +    /* Only should be called from guc_lrc_desc_pin() */
>> +    atomic_set(&ce->guc_sched_state_no_lock, 0);
>> +    ce->guc_state.sched_state = 0;
>> +}
>> +
>> +static inline bool
>> +context_wait_for_deregister_to_register(struct intel_context *ce)
>> +{
>> +    return ce->guc_state.sched_state &
>> +        SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
>> +}
>> +
>> +static inline void
>> +set_context_wait_for_deregister_to_register(struct intel_context *ce)
>> +{
>> +    /* Only should be called from guc_lrc_desc_pin() */
>> +    ce->guc_state.sched_state |=
>> +        SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
>> +}
>> +
>> +static inline void
>> +clr_context_wait_for_deregister_to_register(struct intel_context *ce)
>> +{
>> +    lockdep_assert_held(&ce->guc_state.lock);
>> +    ce->guc_state.sched_state &=
>> +        ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
>> +}
>> +
>> +static inline bool
>> +context_destroyed(struct intel_context *ce)
>> +{
>> +    return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
>> +}
>> +
>> +static inline void
>> +set_context_destroyed(struct intel_context *ce)
>> +{
>> +    lockdep_assert_held(&ce->guc_state.lock);
>> +    ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
>> +}
>> +
>> +static inline bool context_guc_id_invalid(struct intel_context *ce)
>> +{
>> +    return (ce->guc_id == GUC_INVALID_LRC_ID);
> Could have dropped the brackets from this one too.
>
>> +}
>> +
>> +static inline void set_context_guc_id_invalid(struct intel_context *ce)
>> +{
>> +    ce->guc_id = GUC_INVALID_LRC_ID;
>> +}
>> +
>> +static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
>> +{
>> +    return &ce->engine->gt->uc.guc;
>> +}
>> +
>>   static inline struct i915_priolist *to_priolist(struct rb_node *rb)
>>   {
>>       return rb_entry(rb, struct i915_priolist, node);
>> @@ -155,6 +223,9 @@ static int guc_add_request(struct intel_guc *guc, 
>> struct i915_request *rq)
>>       int len = 0;
>>       bool enabled = context_enabled(ce);
>>   +    GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
>> +    GEM_BUG_ON(context_guc_id_invalid(ce));
>> +
>>       if (!enabled) {
>>           action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
>>           action[len++] = ce->guc_id;
>> @@ -417,6 +488,10 @@ int intel_guc_submission_init(struct intel_guc 
>> *guc)
>>         xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
>>   +    spin_lock_init(&guc->contexts_lock);
>> +    INIT_LIST_HEAD(&guc->guc_id_list);
>> +    ida_init(&guc->guc_ids);
>> +
>>       return 0;
>>   }
>>   @@ -429,9 +504,305 @@ void intel_guc_submission_fini(struct 
>> intel_guc *guc)
>>       i915_sched_engine_put(guc->sched_engine);
>>   }
>>   -static int guc_context_alloc(struct intel_context *ce)
>> +static inline void queue_request(struct i915_sched_engine 
>> *sched_engine,
>> +                 struct i915_request *rq,
>> +                 int prio)
>>   {
>> -    return lrc_alloc(ce, ce->engine);
>> +    GEM_BUG_ON(!list_empty(&rq->sched.link));
>> +    list_add_tail(&rq->sched.link,
>> +              i915_sched_lookup_priolist(sched_engine, prio));
>> +    set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>> +}
>> +
>> +static int guc_bypass_tasklet_submit(struct intel_guc *guc,
>> +                     struct i915_request *rq)
>> +{
>> +    int ret;
>> +
>> +    __i915_request_submit(rq);
>> +
>> +    trace_i915_request_in(rq, 0);
>> +
>> +    guc_set_lrc_tail(rq);
>> +    ret = guc_add_request(guc, rq);
>> +    if (ret == -EBUSY)
>> +        guc->stalled_request = rq;
>> +
>> +    return ret;
>> +}
>> +
>> +static void guc_submit_request(struct i915_request *rq)
>> +{
>> +    struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
>> +    struct intel_guc *guc = &rq->engine->gt->uc.guc;
>> +    unsigned long flags;
>> +
>> +    /* Will be called from irq-context when using foreign fences. */
>> +    spin_lock_irqsave(&sched_engine->lock, flags);
>> +
>> +    if (guc->stalled_request || 
>> !i915_sched_engine_is_empty(sched_engine))
>> +        queue_request(sched_engine, rq, rq_prio(rq));
>> +    else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
>> +        tasklet_hi_schedule(&sched_engine->tasklet);
>> +
>> +    spin_unlock_irqrestore(&sched_engine->lock, flags);
>> +}
>> +
>> +static int new_guc_id(struct intel_guc *guc)
>> +{
>> +    return ida_simple_get(&guc->guc_ids, 0,
>> +                  GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
>> +                  __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
>> +}
>> +
>> +static void __release_guc_id(struct intel_guc *guc, struct 
>> intel_context *ce)
>> +{
>> +    if (!context_guc_id_invalid(ce)) {
>> +        ida_simple_remove(&guc->guc_ids, ce->guc_id);
>> +        reset_lrc_desc(guc, ce->guc_id);
>> +        set_context_guc_id_invalid(ce);
>> +    }
>> +    if (!list_empty(&ce->guc_id_link))
>> +        list_del_init(&ce->guc_id_link);
>> +}
>> +
>> +static void release_guc_id(struct intel_guc *guc, struct 
>> intel_context *ce)
>> +{
>> +    unsigned long flags;
>> +
>> +    spin_lock_irqsave(&guc->contexts_lock, flags);
>> +    __release_guc_id(guc, ce);
>> +    spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +}
>> +
>> +static int steal_guc_id(struct intel_guc *guc)
>> +{
>> +    struct intel_context *ce;
>> +    int guc_id;
>> +
>> +    lockdep_assert_held(&guc->contexts_lock);
>> +
>> +    if (!list_empty(&guc->guc_id_list)) {
>> +        ce = list_first_entry(&guc->guc_id_list,
>> +                      struct intel_context,
>> +                      guc_id_link);
>> +
>> +        GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
>> +        GEM_BUG_ON(context_guc_id_invalid(ce));
>> +
>> +        list_del_init(&ce->guc_id_link);
>> +        guc_id = ce->guc_id;
>> +        set_context_guc_id_invalid(ce);
>> +        return guc_id;
>> +    } else {
>> +        return -EAGAIN;
>> +    }
>> +}
>> +
>> +static int assign_guc_id(struct intel_guc *guc, u16 *out)
>> +{
>> +    int ret;
>> +
>> +    lockdep_assert_held(&guc->contexts_lock);
>> +
>> +    ret = new_guc_id(guc);
>> +    if (unlikely(ret < 0)) {
>> +        ret = steal_guc_id(guc);
>> +        if (ret < 0)
>> +            return ret;
>> +    }
>> +
>> +    *out = ret;
>> +    return 0;
>> +}
>> +
>> +#define PIN_GUC_ID_TRIES    4
>> +static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
>> +{
>> +    int ret = 0;
>> +    unsigned long flags, tries = PIN_GUC_ID_TRIES;
>> +
>> +    GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
>> +
>> +try_again:
>> +    spin_lock_irqsave(&guc->contexts_lock, flags);
>> +
>> +    if (context_guc_id_invalid(ce)) {
>> +        ret = assign_guc_id(guc, &ce->guc_id);
>> +        if (ret)
>> +            goto out_unlock;
>> +        ret = 1;    /* Indidcates newly assigned guc_id */
>> +    }
>> +    if (!list_empty(&ce->guc_id_link))
>> +        list_del_init(&ce->guc_id_link);
>> +    atomic_inc(&ce->guc_id_ref);
>> +
>> +out_unlock:
>> +    spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +
>> +    /*
>> +     * -EAGAIN indicates no guc_ids are available, let's retire any
>> +     * outstanding requests to see if that frees up a guc_id. If the 
>> first
>> +     * retire didn't help, insert a sleep with the timeslice 
>> duration before
>> +     * attempting to retire more requests. Double the sleep period each
>> +     * subsequent pass before finally giving up. The sleep period 
>> has max of
>> +     * 100ms and minimum of 1ms.
>> +     */
>> +    if (ret == -EAGAIN && --tries) {
>> +        if (PIN_GUC_ID_TRIES - tries > 1) {
>> +            unsigned int timeslice_shifted =
>> +                ce->engine->props.timeslice_duration_ms <<
>> +                (PIN_GUC_ID_TRIES - tries - 2);
>> +            unsigned int max = min_t(unsigned int, 100,
>> +                         timeslice_shifted);
>> +
>> +            msleep(max_t(unsigned int, max, 1));
>> +        }
>> +        intel_gt_retire_requests(guc_to_gt(guc));
>> +        goto try_again;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void unpin_guc_id(struct intel_guc *guc, struct intel_context 
>> *ce)
>> +{
>> +    unsigned long flags;
>> +
>> +    GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
>> +
>> +    spin_lock_irqsave(&guc->contexts_lock, flags);
>> +    if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
>> +        !atomic_read(&ce->guc_id_ref))
>> +        list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
>> +    spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +}
>> +
>> +static int __guc_action_register_context(struct intel_guc *guc,
>> +                     u32 guc_id,
>> +                     u32 offset)
>> +{
>> +    u32 action[] = {
>> +        INTEL_GUC_ACTION_REGISTER_CONTEXT,
>> +        guc_id,
>> +        offset,
>> +    };
>> +
>> +    return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 
>> true);
>> +}
>> +
>> +static int register_context(struct intel_context *ce)
>> +{
>> +    struct intel_guc *guc = ce_to_guc(ce);
>> +    u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
>> +        ce->guc_id * sizeof(struct guc_lrc_desc);
>> +
>> +    return __guc_action_register_context(guc, ce->guc_id, offset);
>> +}
>> +
>> +static int __guc_action_deregister_context(struct intel_guc *guc,
>> +                       u32 guc_id)
>> +{
>> +    u32 action[] = {
>> +        INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
>> +        guc_id,
>> +    };
>> +
>> +    return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 
>> true);
>> +}
>> +
>> +static int deregister_context(struct intel_context *ce, u32 guc_id)
>> +{
>> +    struct intel_guc *guc = ce_to_guc(ce);
>> +
>> +    return __guc_action_deregister_context(guc, guc_id);
>> +}
>> +
>> +static intel_engine_mask_t adjust_engine_mask(u8 class, 
>> intel_engine_mask_t mask)
>> +{
>> +    switch (class) {
>> +    case RENDER_CLASS:
>> +        return mask >> RCS0;
>> +    case VIDEO_ENHANCEMENT_CLASS:
>> +        return mask >> VECS0;
>> +    case VIDEO_DECODE_CLASS:
>> +        return mask >> VCS0;
>> +    case COPY_ENGINE_CLASS:
>> +        return mask >> BCS0;
>> +    default:
>> +        MISSING_CASE(class);
>> +        return 0;
>> +    }
>> +}
>> +
>> +static void guc_context_policy_init(struct intel_engine_cs *engine,
>> +                    struct guc_lrc_desc *desc)
>> +{
>> +    desc->policy_flags = 0;
>> +
>> +    desc->execution_quantum = 
>> CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US;
>> +    desc->preemption_timeout = 
>> CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US;
>> +}
>> +
>> +static int guc_lrc_desc_pin(struct intel_context *ce)
>> +{
>> +    struct intel_engine_cs *engine = ce->engine;
>> +    struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
>> +    struct intel_guc *guc = &engine->gt->uc.guc;
>> +    u32 desc_idx = ce->guc_id;
>> +    struct guc_lrc_desc *desc;
>> +    bool context_registered;
>> +    intel_wakeref_t wakeref;
>> +    int ret = 0;
>> +
>> +    GEM_BUG_ON(!engine->mask);
>> +
>> +    /*
>> +     * Ensure LRC + CT vmas are is same region as write barrier is done
>> +     * based on CT vma region.
>> +     */
>> +    GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
>> + i915_gem_object_is_lmem(ce->ring->vma->obj));
>> +
>> +    context_registered = lrc_desc_registered(guc, desc_idx);
>> +
>> +    reset_lrc_desc(guc, desc_idx);
>> +    set_lrc_desc_registered(guc, desc_idx, ce);
>> +
>> +    desc = __get_lrc_desc(guc, desc_idx);
>> +    desc->engine_class = engine_class_to_guc_class(engine->class);
>> +    desc->engine_submit_mask = adjust_engine_mask(engine->class,
>> +                              engine->mask);
>> +    desc->hw_context_desc = ce->lrc.lrca;
>> +    desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
>> +    desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
>> +    guc_context_policy_init(engine, desc);
>> +    init_sched_state(ce);
>> +
>> +    /*
>> +     * The context_lookup xarray is used to determine if the hardware
>> +     * context is currently registered. There are two cases in which it
>> +     * could be registered either the guc_id has been stolen from from
>> +     * another context or the lrc descriptor address of this context 
>> has
>> +     * changed. In either case the context needs to be deregistered 
>> with the
>> +     * GuC before registering this context.
>> +     */
>> +    if (context_registered) {
>> +        set_context_wait_for_deregister_to_register(ce);
>> +        intel_context_get(ce);
>> +
>> +        /*
>> +         * If stealing the guc_id, this ce has the same guc_id as the
>> +         * context whose guc_id was stolen.
>> +         */
>> +        with_intel_runtime_pm(runtime_pm, wakeref)
>> +            ret = deregister_context(ce, ce->guc_id);
>> +    } else {
>> +        with_intel_runtime_pm(runtime_pm, wakeref)
>> +            ret = register_context(ce);
>> +    }
>> +
>> +    return ret;
>>   }
>>     static int guc_context_pre_pin(struct intel_context *ce,
>> @@ -443,36 +814,144 @@ static int guc_context_pre_pin(struct 
>> intel_context *ce,
>>     static int guc_context_pin(struct intel_context *ce, void *vaddr)
>>   {
>> +    if (i915_ggtt_offset(ce->state) !=
>> +        (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
>> +        set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
>> +
>> +    /*
>> +     * GuC context gets pinned in guc_request_alloc. See that 
>> function for
>> +     * explaination of why.
>> +     */
>> +
>>       return lrc_pin(ce, ce->engine, vaddr);
>>   }
>>   +static void guc_context_unpin(struct intel_context *ce)
>> +{
>> +    struct intel_guc *guc = ce_to_guc(ce);
>> +
>> +    unpin_guc_id(guc, ce);
>> +    lrc_unpin(ce);
>> +}
>> +
>> +static void guc_context_post_unpin(struct intel_context *ce)
>> +{
>> +    lrc_post_unpin(ce);
>> +}
>> +
>> +static inline void guc_lrc_desc_unpin(struct intel_context *ce)
>> +{
>> +    struct intel_guc *guc = ce_to_guc(ce);
>> +    unsigned long flags;
>> +
>> +    GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
>> +    GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
>> +
>> +    spin_lock_irqsave(&ce->guc_state.lock, flags);
>> +    set_context_destroyed(ce);
>> +    spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>> +
>> +    deregister_context(ce, ce->guc_id);
>> +}
>> +
>> +static void guc_context_destroy(struct kref *kref)
>> +{
>> +    struct intel_context *ce = container_of(kref, typeof(*ce), ref);
>> +    struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
>> +    struct intel_guc *guc = ce_to_guc(ce);
>> +    intel_wakeref_t wakeref;
>> +    unsigned long flags;
>> +
>> +    /*
>> +     * If the guc_id is invalid this context has been stolen and we 
>> can free
>> +     * it immediately. Also can be freed immediately if the context 
>> is not
>> +     * registered with the GuC.
>> +     */
>> +    if (context_guc_id_invalid(ce)) {
>> +        lrc_destroy(kref);
>> +        return;
>> +    } else if (!lrc_desc_registered(guc, ce->guc_id)) {
>> +        release_guc_id(guc, ce);
>> +        lrc_destroy(kref);
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * We have to acquire the context spinlock and check guc_id 
>> again, if it
>> +     * is valid it hasn't been stolen and needs to be deregistered. We
>> +     * delete this context from the list of unpinned guc_ids 
>> available to
>> +     * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
>> +     * returns indicating this context has been deregistered the 
>> guc_id is
>> +     * returned to the pool of available guc_ids.
>> +     */
>> +    spin_lock_irqsave(&guc->contexts_lock, flags);
>> +    if (context_guc_id_invalid(ce)) {
>> +        spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +        lrc_destroy(kref);
>> +        return;
>> +    }
>> +
>> +    if (!list_empty(&ce->guc_id_link))
>> +        list_del_init(&ce->guc_id_link);
>> +    spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +
>> +    /*
>> +     * We defer GuC context deregistration until the context is 
>> destroyed
>> +     * in order to save on CTBs. With this optimization ideally we 
>> only need
>> +     * 1 CTB to register the context during the first pin and 1 CTB to
>> +     * deregister the context when the context is destroyed. Without 
>> this
>> +     * optimization, a CTB would be needed every pin & unpin.
>> +     *
>> +     * XXX: Need to acqiure the runtime wakeref as this can be 
>> triggered
>> +     * from context_free_worker when runtime wakeref is not held.
>> +     * guc_lrc_desc_unpin requires the runtime as a GuC register is 
>> written
>> +     * in H2G CTB to deregister the context. A future patch may 
>> defer this
>> +     * H2G CTB if the runtime wakeref is zero.
>> +     */
>> +    with_intel_runtime_pm(runtime_pm, wakeref)
>> +        guc_lrc_desc_unpin(ce);
>> +}
>> +
>> +static int guc_context_alloc(struct intel_context *ce)
>> +{
>> +    return lrc_alloc(ce, ce->engine);
>> +}
>> +
>>   static const struct intel_context_ops guc_context_ops = {
>>       .alloc = guc_context_alloc,
>>         .pre_pin = guc_context_pre_pin,
>>       .pin = guc_context_pin,
>> -    .unpin = lrc_unpin,
>> -    .post_unpin = lrc_post_unpin,
>> +    .unpin = guc_context_unpin,
>> +    .post_unpin = guc_context_post_unpin,
>>         .enter = intel_context_enter_engine,
>>       .exit = intel_context_exit_engine,
>>         .reset = lrc_reset,
>> -    .destroy = lrc_destroy,
>> +    .destroy = guc_context_destroy,
>>   };
>>   -static int guc_request_alloc(struct i915_request *request)
>> +static bool context_needs_register(struct intel_context *ce, bool 
>> new_guc_id)
>> +{
>> +    return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
>> +        !lrc_desc_registered(ce_to_guc(ce), ce->guc_id);
>> +}
>> +
>> +static int guc_request_alloc(struct i915_request *rq)
>>   {
>> +    struct intel_context *ce = rq->context;
>> +    struct intel_guc *guc = ce_to_guc(ce);
>>       int ret;
>>   - GEM_BUG_ON(!intel_context_is_pinned(request->context));
>> +    GEM_BUG_ON(!intel_context_is_pinned(rq->context));
>>         /*
>>        * Flush enough space to reduce the likelihood of waiting after
>>        * we start building the request - in which case we will just
>>        * have to repeat work.
>>        */
>> -    request->reserved_space += GUC_REQUEST_SIZE;
>> +    rq->reserved_space += GUC_REQUEST_SIZE;
>>         /*
>>        * Note that after this point, we have committed to using
>> @@ -483,56 +962,47 @@ static int guc_request_alloc(struct 
>> i915_request *request)
>>        */
>>         /* Unconditionally invalidate GPU caches and TLBs. */
>> -    ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
>> +    ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
>>       if (ret)
>>           return ret;
>>   -    request->reserved_space -= GUC_REQUEST_SIZE;
>> -    return 0;
>> -}
>> -
>> -static inline void queue_request(struct i915_sched_engine 
>> *sched_engine,
>> -                 struct i915_request *rq,
>> -                 int prio)
>> -{
>> -    GEM_BUG_ON(!list_empty(&rq->sched.link));
>> -    list_add_tail(&rq->sched.link,
>> -              i915_sched_lookup_priolist(sched_engine, prio));
>> -    set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>> -}
>> -
>> -static int guc_bypass_tasklet_submit(struct intel_guc *guc,
>> -                     struct i915_request *rq)
>> -{
>> -    int ret;
>> -
>> -    __i915_request_submit(rq);
>> -
>> -    trace_i915_request_in(rq, 0);
>> -
>> -    guc_set_lrc_tail(rq);
>> -    ret = guc_add_request(guc, rq);
>> -    if (ret == -EBUSY)
>> -        guc->stalled_request = rq;
>> -
>> -    return ret;
>> -}
>> +    rq->reserved_space -= GUC_REQUEST_SIZE;
>>   -static void guc_submit_request(struct i915_request *rq)
>> -{
>> -    struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
>> -    struct intel_guc *guc = &rq->engine->gt->uc.guc;
>> -    unsigned long flags;
>> +    /*
>> +     * Call pin_guc_id here rather than in the pinning step as with
>> +     * dma_resv, contexts can be repeatedly pinned / unpinned 
>> trashing the
>> +     * guc_ids and creating horrible race conditions. This is 
>> especially bad
>> +     * when guc_ids are being stolen due to over subscription. By 
>> the time
>> +     * this function is reached, it is guaranteed that the guc_id 
>> will be
>> +     * persistent until the generated request is retired. Thus, 
>> sealing these
>> +     * race conditions. It is still safe to fail here if guc_ids are
>> +     * exhausted and return -EAGAIN to the user indicating that they 
>> can try
>> +     * again in the future.
>> +     *
>> +     * There is no need for a lock here as the timeline mutex 
>> ensures at
>> +     * most one context can be executing this code path at once. The
>> +     * guc_id_ref is incremented once for every request in flight and
>> +     * decremented on each retire. When it is zero, a lock around the
>> +     * increment (in pin_guc_id) is needed to seal a race with 
>> unpin_guc_id.
>> +     */
>> +    if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
>> +        return 0;
>>   -    /* Will be called from irq-context when using foreign fences. */
>> -    spin_lock_irqsave(&sched_engine->lock, flags);
>> +    ret = pin_guc_id(guc, ce);    /* returns 1 if new guc_id 
>> assigned */
>> +    if (unlikely(ret < 0))
>> +        return ret;
>> +    if (context_needs_register(ce, !!ret)) {
>> +        ret = guc_lrc_desc_pin(ce);
>> +        if (unlikely(ret)) {    /* unwind */
>> +            atomic_dec(&ce->guc_id_ref);
>> +            unpin_guc_id(guc, ce);
>> +            return ret;
>> +        }
>> +    }
>>   -    if (guc->stalled_request || 
>> !i915_sched_engine_is_empty(sched_engine))
>> -        queue_request(sched_engine, rq, rq_prio(rq));
>> -    else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
>> -        tasklet_hi_schedule(&sched_engine->tasklet);
>> +    clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
>>   -    spin_unlock_irqrestore(&sched_engine->lock, flags);
>> +    return 0;
>>   }
>>     static void sanitize_hwsp(struct intel_engine_cs *engine)
>> @@ -606,6 +1076,41 @@ static void guc_set_default_submission(struct 
>> intel_engine_cs *engine)
>>       engine->submit_request = guc_submit_request;
>>   }
>>   +static inline void guc_kernel_context_pin(struct intel_guc *guc,
>> +                      struct intel_context *ce)
>> +{
>> +    if (context_guc_id_invalid(ce))
>> +        pin_guc_id(guc, ce);
>> +    guc_lrc_desc_pin(ce);
>> +}
>> +
>> +static inline void guc_init_lrc_mapping(struct intel_guc *guc)
>> +{
>> +    struct intel_gt *gt = guc_to_gt(guc);
>> +    struct intel_engine_cs *engine;
>> +    enum intel_engine_id id;
>> +
>> +    /* make sure all descriptors are clean... */
>> +    xa_destroy(&guc->context_lookup);
>> +
>> +    /*
>> +     * Some contexts might have been pinned before we enabled GuC
>> +     * submission, so we need to add them to the GuC bookeeping.
>> +     * Also, after a reset the of GuC we want to make sure that the
> the of -> of the
>
>> +     * information shared with GuC is properly reset. The kernel 
>> LRCs are
>> +     * not attached to the gem_context, so they need to be added 
>> separately.
>> +     *
>> +     * Note: we purposely do not check the return of guc_lrc_desc_pin,
> purposefully
>
> Just a bunch of nits, so maybe not worth respinning. I think it needs 
> an r-b from Daniele as well, given that he had a bunch of comments on 
> the previous rev too. But apart from the nits, looks good to me.
>

I didn't fully re-review the patch, but I've checked the things I had 
commented on and I'm happy with how they've been addressed. My only 
remaining concern is the potentially long wait in atomic context, but 
that can be addressed as a follow up.

Daniele

> Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
>
>> +     * because that function can only fail if a reset is just 
>> starting. This
>> +     * is at the end of reset so presumably another reset isn't 
>> happening
>> +     * and even it did this code would be run again.
>> +     */
>> +
>> +    for_each_engine(engine, gt, id)
>> +        if (engine->kernel_context)
>> +            guc_kernel_context_pin(guc, engine->kernel_context);
>> +}
>> +
>>   static void guc_release(struct intel_engine_cs *engine)
>>   {
>>       engine->sanitize = NULL; /* no longer in control, nothing to 
>> sanitize */
>> @@ -718,6 +1223,7 @@ int intel_guc_submission_setup(struct 
>> intel_engine_cs *engine)
>>     void intel_guc_submission_enable(struct intel_guc *guc)
>>   {
>> +    guc_init_lrc_mapping(guc);
>>   }
>>     void intel_guc_submission_disable(struct intel_guc *guc)
>> @@ -743,3 +1249,62 @@ void intel_guc_submission_init_early(struct 
>> intel_guc *guc)
>>   {
>>       guc->submission_selected = __guc_submission_selected(guc);
>>   }
>> +
>> +static inline struct intel_context *
>> +g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
>> +{
>> +    struct intel_context *ce;
>> +
>> +    if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
>> +        drm_err(&guc_to_gt(guc)->i915->drm,
>> +            "Invalid desc_idx %u", desc_idx);
>> +        return NULL;
>> +    }
>> +
>> +    ce = __get_context(guc, desc_idx);
>> +    if (unlikely(!ce)) {
>> +        drm_err(&guc_to_gt(guc)->i915->drm,
>> +            "Context is NULL, desc_idx %u", desc_idx);
>> +        return NULL;
>> +    }
>> +
>> +    return ce;
>> +}
>> +
>> +int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>> +                      const u32 *msg,
>> +                      u32 len)
>> +{
>> +    struct intel_context *ce;
>> +    u32 desc_idx = msg[0];
>> +
>> +    if (unlikely(len < 1)) {
>> +        drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
>> +        return -EPROTO;
>> +    }
>> +
>> +    ce = g2h_context_lookup(guc, desc_idx);
>> +    if (unlikely(!ce))
>> +        return -EPROTO;
>> +
>> +    if (context_wait_for_deregister_to_register(ce)) {
>> +        struct intel_runtime_pm *runtime_pm =
>> +            &ce->engine->gt->i915->runtime_pm;
>> +        intel_wakeref_t wakeref;
>> +
>> +        /*
>> +         * Previous owner of this guc_id has been deregistered, now 
>> safe
>> +         * register this context.
>> +         */
>> +        with_intel_runtime_pm(runtime_pm, wakeref)
>> +            register_context(ce);
>> +        clr_context_wait_for_deregister_to_register(ce);
>> +        intel_context_put(ce);
>> +    } else if (context_destroyed(ce)) {
>> +        /* Context has been destroyed */
>> +        release_guc_id(guc, ce);
>> +        lrc_destroy(&ce->ref);
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 943fe485c662..204c95c39353 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -4142,6 +4142,7 @@ enum {
>>       FAULT_AND_CONTINUE /* Unsupported */
>>   };
>>   +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
>>   #define GEN8_CTX_VALID (1 << 0)
>>   #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
>>   #define GEN8_CTX_FORCE_RESTORE (1 << 2)
>> diff --git a/drivers/gpu/drm/i915/i915_request.c 
>> b/drivers/gpu/drm/i915/i915_request.c
>> index 09ebea9a0090..ef26724fe980 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -407,6 +407,7 @@ bool i915_request_retire(struct i915_request *rq)
>>        */
>>       if (!list_empty(&rq->sched.link))
>>           remove_from_engine(rq);
>> +    atomic_dec(&rq->context->guc_id_ref);
>>       GEM_BUG_ON(!llist_empty(&rq->execute_cb));
>>         __list_del_entry(&rq->link); /* poison neither prev/next (RCU 
>> walks) */
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index bd63813c8a80..32fd6647154b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -384,6 +384,11 @@  intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 
 	mutex_init(&ce->pin_mutex);
 
+	spin_lock_init(&ce->guc_state.lock);
+
+	ce->guc_id = GUC_INVALID_LRC_ID;
+	INIT_LIST_HEAD(&ce->guc_id_link);
+
 	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire, 0);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 6d99631d19b9..606c480aec26 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -96,6 +96,7 @@  struct intel_context {
 #define CONTEXT_BANNED			6
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
+#define CONTEXT_LRCA_DIRTY		9
 
 	struct {
 		u64 timeout_us;
@@ -138,14 +139,29 @@  struct intel_context {
 
 	u8 wa_bb_page; /* if set, page num reserved for context workarounds */
 
+	struct {
+		/** lock: protects everything in guc_state */
+		spinlock_t lock;
+		/**
+		 * sched_state: scheduling state of this context using GuC
+		 * submission
+		 */
+		u8 sched_state;
+	} guc_state;
+
 	/* GuC scheduling state flags that do not require a lock. */
 	atomic_t guc_sched_state_no_lock;
 
+	/* GuC LRC descriptor ID */
+	u16 guc_id;
+
+	/* GuC LRC descriptor reference count */
+	atomic_t guc_id_ref;
+
 	/*
-	 * GuC LRC descriptor ID - Not assigned in this patch but future patches
-	 * in the series will.
+	 * GuC ID link - in list when unpinned but guc_id still valid in GuC
 	 */
-	u16 guc_id;
+	struct list_head guc_id_link;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 41e5350a7a05..49d4857ad9b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -87,7 +87,6 @@ 
 #define GEN11_CSB_WRITE_PTR_MASK	(GEN11_CSB_PTR_MASK << 0)
 
 #define MAX_CONTEXT_HW_ID	(1 << 21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID	(1 << 20) /* exclusive */
 #define GEN11_MAX_CONTEXT_HW_ID	(1 << 11) /* exclusive */
 /* in Gen12 ID 0x7FF is reserved to indicate idle */
 #define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 8c7b92f699f1..30773cd699f5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -7,6 +7,7 @@ 
 #define _INTEL_GUC_H_
 
 #include <linux/xarray.h>
+#include <linux/delay.h>
 
 #include "intel_uncore.h"
 #include "intel_guc_fw.h"
@@ -44,6 +45,14 @@  struct intel_guc {
 		void (*disable)(struct intel_guc *guc);
 	} interrupts;
 
+	/*
+	 * contexts_lock protects the pool of free guc ids and a linked list of
+	 * guc ids available to be stolen
+	 */
+	spinlock_t contexts_lock;
+	struct ida guc_ids;
+	struct list_head guc_id_list;
+
 	bool submission_selected;
 
 	struct i915_vma *ads_vma;
@@ -101,6 +110,34 @@  intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
 				 response_buf, response_buf_size, 0);
 }
 
+static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
+					   const u32 *action,
+					   u32 len,
+					   bool loop)
+{
+	int err;
+	unsigned int sleep_period_ms = 1;
+	bool not_atomic = !in_atomic() && !irqs_disabled();
+
+	/* No sleeping with spin locks, just busy loop */
+	might_sleep_if(loop && not_atomic);
+
+retry:
+	err = intel_guc_send_nb(guc, action, len);
+	if (unlikely(err == -EBUSY && loop)) {
+		if (likely(not_atomic)) {
+			if (msleep_interruptible(sleep_period_ms))
+				return -EINTR;
+			sleep_period_ms = sleep_period_ms << 1;
+		} else {
+			cpu_relax();
+		}
+		goto retry;
+	}
+
+	return err;
+}
+
 static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
 {
 	intel_guc_ct_event_handler(&guc->ct);
@@ -202,6 +239,9 @@  static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
 int intel_guc_reset_engine(struct intel_guc *guc,
 			   struct intel_engine_cs *engine);
 
+int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
+					  const u32 *msg, u32 len);
+
 void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 83ec60ea3f89..28ff82c5be45 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -928,6 +928,10 @@  static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	case INTEL_GUC_ACTION_DEFAULT:
 		ret = intel_guc_to_host_process_recv_msg(guc, payload, len);
 		break;
+	case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+		ret = intel_guc_deregister_done_process_msg(guc, payload,
+							    len);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 53b4a5eb4a85..6940b9d62118 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -13,7 +13,9 @@ 
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_irq.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_lrc.h"
+#include "gt/intel_lrc_reg.h"
 #include "gt/intel_mocs.h"
 #include "gt/intel_ring.h"
 
@@ -85,6 +87,72 @@  static inline void clr_context_enabled(struct intel_context *ce)
 		   &ce->guc_sched_state_no_lock);
 }
 
+/*
+ * Below is a set of functions which control the GuC scheduling state which
+ * require a lock, aside from the special case where the functions are called
+ * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
+ * path to be executing on the context.
+ */
+#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER	BIT(0)
+#define SCHED_STATE_DESTROYED				BIT(1)
+static inline void init_sched_state(struct intel_context *ce)
+{
+	/* Only should be called from guc_lrc_desc_pin() */
+	atomic_set(&ce->guc_sched_state_no_lock, 0);
+	ce->guc_state.sched_state = 0;
+}
+
+static inline bool
+context_wait_for_deregister_to_register(struct intel_context *ce)
+{
+	return ce->guc_state.sched_state &
+		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
+}
+
+static inline void
+set_context_wait_for_deregister_to_register(struct intel_context *ce)
+{
+	/* Only should be called from guc_lrc_desc_pin() */
+	ce->guc_state.sched_state |=
+		SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
+}
+
+static inline void
+clr_context_wait_for_deregister_to_register(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state &=
+		~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
+}
+
+static inline bool
+context_destroyed(struct intel_context *ce)
+{
+	return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
+}
+
+static inline void
+set_context_destroyed(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
+}
+
+static inline bool context_guc_id_invalid(struct intel_context *ce)
+{
+	return (ce->guc_id == GUC_INVALID_LRC_ID);
+}
+
+static inline void set_context_guc_id_invalid(struct intel_context *ce)
+{
+	ce->guc_id = GUC_INVALID_LRC_ID;
+}
+
+static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
+{
+	return &ce->engine->gt->uc.guc;
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -155,6 +223,9 @@  static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 	int len = 0;
 	bool enabled = context_enabled(ce);
 
+	GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
+	GEM_BUG_ON(context_guc_id_invalid(ce));
+
 	if (!enabled) {
 		action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
 		action[len++] = ce->guc_id;
@@ -417,6 +488,10 @@  int intel_guc_submission_init(struct intel_guc *guc)
 
 	xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
 
+	spin_lock_init(&guc->contexts_lock);
+	INIT_LIST_HEAD(&guc->guc_id_list);
+	ida_init(&guc->guc_ids);
+
 	return 0;
 }
 
@@ -429,9 +504,305 @@  void intel_guc_submission_fini(struct intel_guc *guc)
 	i915_sched_engine_put(guc->sched_engine);
 }
 
-static int guc_context_alloc(struct intel_context *ce)
+static inline void queue_request(struct i915_sched_engine *sched_engine,
+				 struct i915_request *rq,
+				 int prio)
 {
-	return lrc_alloc(ce, ce->engine);
+	GEM_BUG_ON(!list_empty(&rq->sched.link));
+	list_add_tail(&rq->sched.link,
+		      i915_sched_lookup_priolist(sched_engine, prio));
+	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+}
+
+static int guc_bypass_tasklet_submit(struct intel_guc *guc,
+				     struct i915_request *rq)
+{
+	int ret;
+
+	__i915_request_submit(rq);
+
+	trace_i915_request_in(rq, 0);
+
+	guc_set_lrc_tail(rq);
+	ret = guc_add_request(guc, rq);
+	if (ret == -EBUSY)
+		guc->stalled_request = rq;
+
+	return ret;
+}
+
+static void guc_submit_request(struct i915_request *rq)
+{
+	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
+	struct intel_guc *guc = &rq->engine->gt->uc.guc;
+	unsigned long flags;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&sched_engine->lock, flags);
+
+	if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
+		queue_request(sched_engine, rq, rq_prio(rq));
+	else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
+		tasklet_hi_schedule(&sched_engine->tasklet);
+
+	spin_unlock_irqrestore(&sched_engine->lock, flags);
+}
+
+static int new_guc_id(struct intel_guc *guc)
+{
+	return ida_simple_get(&guc->guc_ids, 0,
+			      GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
+			      __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+}
+
+static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	if (!context_guc_id_invalid(ce)) {
+		ida_simple_remove(&guc->guc_ids, ce->guc_id);
+		reset_lrc_desc(guc, ce->guc_id);
+		set_context_guc_id_invalid(ce);
+	}
+	if (!list_empty(&ce->guc_id_link))
+		list_del_init(&ce->guc_id_link);
+}
+
+static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&guc->contexts_lock, flags);
+	__release_guc_id(guc, ce);
+	spin_unlock_irqrestore(&guc->contexts_lock, flags);
+}
+
+static int steal_guc_id(struct intel_guc *guc)
+{
+	struct intel_context *ce;
+	int guc_id;
+
+	lockdep_assert_held(&guc->contexts_lock);
+
+	if (!list_empty(&guc->guc_id_list)) {
+		ce = list_first_entry(&guc->guc_id_list,
+				      struct intel_context,
+				      guc_id_link);
+
+		GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+		GEM_BUG_ON(context_guc_id_invalid(ce));
+
+		list_del_init(&ce->guc_id_link);
+		guc_id = ce->guc_id;
+		set_context_guc_id_invalid(ce);
+		return guc_id;
+	} else {
+		return -EAGAIN;
+	}
+}
+
+static int assign_guc_id(struct intel_guc *guc, u16 *out)
+{
+	int ret;
+
+	lockdep_assert_held(&guc->contexts_lock);
+
+	ret = new_guc_id(guc);
+	if (unlikely(ret < 0)) {
+		ret = steal_guc_id(guc);
+		if (ret < 0)
+			return ret;
+	}
+
+	*out = ret;
+	return 0;
+}
+
+#define PIN_GUC_ID_TRIES	4
+static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	int ret = 0;
+	unsigned long flags, tries = PIN_GUC_ID_TRIES;
+
+	GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+
+try_again:
+	spin_lock_irqsave(&guc->contexts_lock, flags);
+
+	if (context_guc_id_invalid(ce)) {
+		ret = assign_guc_id(guc, &ce->guc_id);
+		if (ret)
+			goto out_unlock;
+		ret = 1;	/* Indidcates newly assigned guc_id */
+	}
+	if (!list_empty(&ce->guc_id_link))
+		list_del_init(&ce->guc_id_link);
+	atomic_inc(&ce->guc_id_ref);
+
+out_unlock:
+	spin_unlock_irqrestore(&guc->contexts_lock, flags);
+
+	/*
+	 * -EAGAIN indicates no guc_ids are available, let's retire any
+	 * outstanding requests to see if that frees up a guc_id. If the first
+	 * retire didn't help, insert a sleep with the timeslice duration before
+	 * attempting to retire more requests. Double the sleep period each
+	 * subsequent pass before finally giving up. The sleep period has max of
+	 * 100ms and minimum of 1ms.
+	 */
+	if (ret == -EAGAIN && --tries) {
+		if (PIN_GUC_ID_TRIES - tries > 1) {
+			unsigned int timeslice_shifted =
+				ce->engine->props.timeslice_duration_ms <<
+				(PIN_GUC_ID_TRIES - tries - 2);
+			unsigned int max = min_t(unsigned int, 100,
+						 timeslice_shifted);
+
+			msleep(max_t(unsigned int, max, 1));
+		}
+		intel_gt_retire_requests(guc_to_gt(guc));
+		goto try_again;
+	}
+
+	return ret;
+}
+
+static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	unsigned long flags;
+
+	GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
+
+	spin_lock_irqsave(&guc->contexts_lock, flags);
+	if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
+	    !atomic_read(&ce->guc_id_ref))
+		list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
+	spin_unlock_irqrestore(&guc->contexts_lock, flags);
+}
+
+static int __guc_action_register_context(struct intel_guc *guc,
+					 u32 guc_id,
+					 u32 offset)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_REGISTER_CONTEXT,
+		guc_id,
+		offset,
+	};
+
+	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true);
+}
+
+static int register_context(struct intel_context *ce)
+{
+	struct intel_guc *guc = ce_to_guc(ce);
+	u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
+		ce->guc_id * sizeof(struct guc_lrc_desc);
+
+	return __guc_action_register_context(guc, ce->guc_id, offset);
+}
+
+static int __guc_action_deregister_context(struct intel_guc *guc,
+					   u32 guc_id)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
+		guc_id,
+	};
+
+	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), true);
+}
+
+static int deregister_context(struct intel_context *ce, u32 guc_id)
+{
+	struct intel_guc *guc = ce_to_guc(ce);
+
+	return __guc_action_deregister_context(guc, guc_id);
+}
+
+static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
+{
+	switch (class) {
+	case RENDER_CLASS:
+		return mask >> RCS0;
+	case VIDEO_ENHANCEMENT_CLASS:
+		return mask >> VECS0;
+	case VIDEO_DECODE_CLASS:
+		return mask >> VCS0;
+	case COPY_ENGINE_CLASS:
+		return mask >> BCS0;
+	default:
+		MISSING_CASE(class);
+		return 0;
+	}
+}
+
+static void guc_context_policy_init(struct intel_engine_cs *engine,
+				    struct guc_lrc_desc *desc)
+{
+	desc->policy_flags = 0;
+
+	desc->execution_quantum = CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US;
+	desc->preemption_timeout = CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US;
+}
+
+static int guc_lrc_desc_pin(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = ce->engine;
+	struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
+	struct intel_guc *guc = &engine->gt->uc.guc;
+	u32 desc_idx = ce->guc_id;
+	struct guc_lrc_desc *desc;
+	bool context_registered;
+	intel_wakeref_t wakeref;
+	int ret = 0;
+
+	GEM_BUG_ON(!engine->mask);
+
+	/*
+	 * Ensure LRC + CT vmas are is same region as write barrier is done
+	 * based on CT vma region.
+	 */
+	GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+		   i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+	context_registered = lrc_desc_registered(guc, desc_idx);
+
+	reset_lrc_desc(guc, desc_idx);
+	set_lrc_desc_registered(guc, desc_idx, ce);
+
+	desc = __get_lrc_desc(guc, desc_idx);
+	desc->engine_class = engine_class_to_guc_class(engine->class);
+	desc->engine_submit_mask = adjust_engine_mask(engine->class,
+						      engine->mask);
+	desc->hw_context_desc = ce->lrc.lrca;
+	desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
+	desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+	guc_context_policy_init(engine, desc);
+	init_sched_state(ce);
+
+	/*
+	 * The context_lookup xarray is used to determine if the hardware
+	 * context is currently registered. There are two cases in which it
+	 * could be registered either the guc_id has been stolen from from
+	 * another context or the lrc descriptor address of this context has
+	 * changed. In either case the context needs to be deregistered with the
+	 * GuC before registering this context.
+	 */
+	if (context_registered) {
+		set_context_wait_for_deregister_to_register(ce);
+		intel_context_get(ce);
+
+		/*
+		 * If stealing the guc_id, this ce has the same guc_id as the
+		 * context whose guc_id was stolen.
+		 */
+		with_intel_runtime_pm(runtime_pm, wakeref)
+			ret = deregister_context(ce, ce->guc_id);
+	} else {
+		with_intel_runtime_pm(runtime_pm, wakeref)
+			ret = register_context(ce);
+	}
+
+	return ret;
 }
 
 static int guc_context_pre_pin(struct intel_context *ce,
@@ -443,36 +814,144 @@  static int guc_context_pre_pin(struct intel_context *ce,
 
 static int guc_context_pin(struct intel_context *ce, void *vaddr)
 {
+	if (i915_ggtt_offset(ce->state) !=
+	    (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
+		set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
+
+	/*
+	 * GuC context gets pinned in guc_request_alloc. See that function for
+	 * explaination of why.
+	 */
+
 	return lrc_pin(ce, ce->engine, vaddr);
 }
 
+static void guc_context_unpin(struct intel_context *ce)
+{
+	struct intel_guc *guc = ce_to_guc(ce);
+
+	unpin_guc_id(guc, ce);
+	lrc_unpin(ce);
+}
+
+static void guc_context_post_unpin(struct intel_context *ce)
+{
+	lrc_post_unpin(ce);
+}
+
+static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+{
+	struct intel_guc *guc = ce_to_guc(ce);
+	unsigned long flags;
+
+	GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
+	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
+
+	spin_lock_irqsave(&ce->guc_state.lock, flags);
+	set_context_destroyed(ce);
+	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+	deregister_context(ce, ce->guc_id);
+}
+
+static void guc_context_destroy(struct kref *kref)
+{
+	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+	struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
+	struct intel_guc *guc = ce_to_guc(ce);
+	intel_wakeref_t wakeref;
+	unsigned long flags;
+
+	/*
+	 * If the guc_id is invalid this context has been stolen and we can free
+	 * it immediately. Also can be freed immediately if the context is not
+	 * registered with the GuC.
+	 */
+	if (context_guc_id_invalid(ce)) {
+		lrc_destroy(kref);
+		return;
+	} else if (!lrc_desc_registered(guc, ce->guc_id)) {
+		release_guc_id(guc, ce);
+		lrc_destroy(kref);
+		return;
+	}
+
+	/*
+	 * We have to acquire the context spinlock and check guc_id again, if it
+	 * is valid it hasn't been stolen and needs to be deregistered. We
+	 * delete this context from the list of unpinned guc_ids available to
+	 * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
+	 * returns indicating this context has been deregistered the guc_id is
+	 * returned to the pool of available guc_ids.
+	 */
+	spin_lock_irqsave(&guc->contexts_lock, flags);
+	if (context_guc_id_invalid(ce)) {
+		spin_unlock_irqrestore(&guc->contexts_lock, flags);
+		lrc_destroy(kref);
+		return;
+	}
+
+	if (!list_empty(&ce->guc_id_link))
+		list_del_init(&ce->guc_id_link);
+	spin_unlock_irqrestore(&guc->contexts_lock, flags);
+
+	/*
+	 * We defer GuC context deregistration until the context is destroyed
+	 * in order to save on CTBs. With this optimization ideally we only need
+	 * 1 CTB to register the context during the first pin and 1 CTB to
+	 * deregister the context when the context is destroyed. Without this
+	 * optimization, a CTB would be needed every pin & unpin.
+	 *
+	 * XXX: Need to acqiure the runtime wakeref as this can be triggered
+	 * from context_free_worker when runtime wakeref is not held.
+	 * guc_lrc_desc_unpin requires the runtime as a GuC register is written
+	 * in H2G CTB to deregister the context. A future patch may defer this
+	 * H2G CTB if the runtime wakeref is zero.
+	 */
+	with_intel_runtime_pm(runtime_pm, wakeref)
+		guc_lrc_desc_unpin(ce);
+}
+
+static int guc_context_alloc(struct intel_context *ce)
+{
+	return lrc_alloc(ce, ce->engine);
+}
+
 static const struct intel_context_ops guc_context_ops = {
 	.alloc = guc_context_alloc,
 
 	.pre_pin = guc_context_pre_pin,
 	.pin = guc_context_pin,
-	.unpin = lrc_unpin,
-	.post_unpin = lrc_post_unpin,
+	.unpin = guc_context_unpin,
+	.post_unpin = guc_context_post_unpin,
 
 	.enter = intel_context_enter_engine,
 	.exit = intel_context_exit_engine,
 
 	.reset = lrc_reset,
-	.destroy = lrc_destroy,
+	.destroy = guc_context_destroy,
 };
 
-static int guc_request_alloc(struct i915_request *request)
+static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
+{
+	return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
+		!lrc_desc_registered(ce_to_guc(ce), ce->guc_id);
+}
+
+static int guc_request_alloc(struct i915_request *rq)
 {
+	struct intel_context *ce = rq->context;
+	struct intel_guc *guc = ce_to_guc(ce);
 	int ret;
 
-	GEM_BUG_ON(!intel_context_is_pinned(request->context));
+	GEM_BUG_ON(!intel_context_is_pinned(rq->context));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
 	 * have to repeat work.
 	 */
-	request->reserved_space += GUC_REQUEST_SIZE;
+	rq->reserved_space += GUC_REQUEST_SIZE;
 
 	/*
 	 * Note that after this point, we have committed to using
@@ -483,56 +962,47 @@  static int guc_request_alloc(struct i915_request *request)
 	 */
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+	ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)
 		return ret;
 
-	request->reserved_space -= GUC_REQUEST_SIZE;
-	return 0;
-}
-
-static inline void queue_request(struct i915_sched_engine *sched_engine,
-				 struct i915_request *rq,
-				 int prio)
-{
-	GEM_BUG_ON(!list_empty(&rq->sched.link));
-	list_add_tail(&rq->sched.link,
-		      i915_sched_lookup_priolist(sched_engine, prio));
-	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-}
-
-static int guc_bypass_tasklet_submit(struct intel_guc *guc,
-				     struct i915_request *rq)
-{
-	int ret;
-
-	__i915_request_submit(rq);
-
-	trace_i915_request_in(rq, 0);
-
-	guc_set_lrc_tail(rq);
-	ret = guc_add_request(guc, rq);
-	if (ret == -EBUSY)
-		guc->stalled_request = rq;
-
-	return ret;
-}
+	rq->reserved_space -= GUC_REQUEST_SIZE;
 
-static void guc_submit_request(struct i915_request *rq)
-{
-	struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
-	struct intel_guc *guc = &rq->engine->gt->uc.guc;
-	unsigned long flags;
+	/*
+	 * Call pin_guc_id here rather than in the pinning step as with
+	 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
+	 * guc_ids and creating horrible race conditions. This is especially bad
+	 * when guc_ids are being stolen due to over subscription. By the time
+	 * this function is reached, it is guaranteed that the guc_id will be
+	 * persistent until the generated request is retired. Thus, sealing these
+	 * race conditions. It is still safe to fail here if guc_ids are
+	 * exhausted and return -EAGAIN to the user indicating that they can try
+	 * again in the future.
+	 *
+	 * There is no need for a lock here as the timeline mutex ensures at
+	 * most one context can be executing this code path at once. The
+	 * guc_id_ref is incremented once for every request in flight and
+	 * decremented on each retire. When it is zero, a lock around the
+	 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
+	 */
+	if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
+		return 0;
 
-	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&sched_engine->lock, flags);
+	ret = pin_guc_id(guc, ce);	/* returns 1 if new guc_id assigned */
+	if (unlikely(ret < 0))
+		return ret;
+	if (context_needs_register(ce, !!ret)) {
+		ret = guc_lrc_desc_pin(ce);
+		if (unlikely(ret)) {	/* unwind */
+			atomic_dec(&ce->guc_id_ref);
+			unpin_guc_id(guc, ce);
+			return ret;
+		}
+	}
 
-	if (guc->stalled_request || !i915_sched_engine_is_empty(sched_engine))
-		queue_request(sched_engine, rq, rq_prio(rq));
-	else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
-		tasklet_hi_schedule(&sched_engine->tasklet);
+	clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
 
-	spin_unlock_irqrestore(&sched_engine->lock, flags);
+	return 0;
 }
 
 static void sanitize_hwsp(struct intel_engine_cs *engine)
@@ -606,6 +1076,41 @@  static void guc_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = guc_submit_request;
 }
 
+static inline void guc_kernel_context_pin(struct intel_guc *guc,
+					  struct intel_context *ce)
+{
+	if (context_guc_id_invalid(ce))
+		pin_guc_id(guc, ce);
+	guc_lrc_desc_pin(ce);
+}
+
+static inline void guc_init_lrc_mapping(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* make sure all descriptors are clean... */
+	xa_destroy(&guc->context_lookup);
+
+	/*
+	 * Some contexts might have been pinned before we enabled GuC
+	 * submission, so we need to add them to the GuC bookeeping.
+	 * Also, after a reset the of GuC we want to make sure that the
+	 * information shared with GuC is properly reset. The kernel LRCs are
+	 * not attached to the gem_context, so they need to be added separately.
+	 *
+	 * Note: we purposely do not check the return of guc_lrc_desc_pin,
+	 * because that function can only fail if a reset is just starting. This
+	 * is at the end of reset so presumably another reset isn't happening
+	 * and even it did this code would be run again.
+	 */
+
+	for_each_engine(engine, gt, id)
+		if (engine->kernel_context)
+			guc_kernel_context_pin(guc, engine->kernel_context);
+}
+
 static void guc_release(struct intel_engine_cs *engine)
 {
 	engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
@@ -718,6 +1223,7 @@  int intel_guc_submission_setup(struct intel_engine_cs *engine)
 
 void intel_guc_submission_enable(struct intel_guc *guc)
 {
+	guc_init_lrc_mapping(guc);
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -743,3 +1249,62 @@  void intel_guc_submission_init_early(struct intel_guc *guc)
 {
 	guc->submission_selected = __guc_submission_selected(guc);
 }
+
+static inline struct intel_context *
+g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
+{
+	struct intel_context *ce;
+
+	if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
+		drm_err(&guc_to_gt(guc)->i915->drm,
+			"Invalid desc_idx %u", desc_idx);
+		return NULL;
+	}
+
+	ce = __get_context(guc, desc_idx);
+	if (unlikely(!ce)) {
+		drm_err(&guc_to_gt(guc)->i915->drm,
+			"Context is NULL, desc_idx %u", desc_idx);
+		return NULL;
+	}
+
+	return ce;
+}
+
+int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
+					  const u32 *msg,
+					  u32 len)
+{
+	struct intel_context *ce;
+	u32 desc_idx = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	ce = g2h_context_lookup(guc, desc_idx);
+	if (unlikely(!ce))
+		return -EPROTO;
+
+	if (context_wait_for_deregister_to_register(ce)) {
+		struct intel_runtime_pm *runtime_pm =
+			&ce->engine->gt->i915->runtime_pm;
+		intel_wakeref_t wakeref;
+
+		/*
+		 * Previous owner of this guc_id has been deregistered, now safe
+		 * register this context.
+		 */
+		with_intel_runtime_pm(runtime_pm, wakeref)
+			register_context(ce);
+		clr_context_wait_for_deregister_to_register(ce);
+		intel_context_put(ce);
+	} else if (context_destroyed(ce)) {
+		/* Context has been destroyed */
+		release_guc_id(guc, ce);
+		lrc_destroy(&ce->ref);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 943fe485c662..204c95c39353 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4142,6 +4142,7 @@  enum {
 	FAULT_AND_CONTINUE /* Unsupported */
 };
 
+#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
 #define GEN8_CTX_VALID (1 << 0)
 #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
 #define GEN8_CTX_FORCE_RESTORE (1 << 2)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 09ebea9a0090..ef26724fe980 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -407,6 +407,7 @@  bool i915_request_retire(struct i915_request *rq)
 	 */
 	if (!list_empty(&rq->sched.link))
 		remove_from_engine(rq);
+	atomic_dec(&rq->context->guc_id_ref);
 	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
 
 	__list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */