Message ID | 20220412225955.1802543-2-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Update to GuC v70 | expand |
On Tue, Apr 12, 2022 at 03:59:55PM -0700, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > The latest GuC firmware drops the context descriptor pool in favour of > passing all creation data in the create H2G. It also greatly simplifies > the work queue and removes the process descriptor used for multi-LRC > submission. So, remove all mention of LRC and process descriptors and > update the registration code accordingly. > > Unfortunately, the new API also removes the ability to set default > values for the scheduling policies at context registration time. > Instead, a follow up H2G must be sent. The individual scheduling > policy update H2G commands are also dropped in favour of a single KLV > based H2G. So, change the update wrappers accordingly and call this > during context registration.. > > Of course, this second H2G per registration might fail due to being > backed up. The registration code has a complicated state machine to > cope with the actual registration call failing. However, if that works > then there is no support for unwinding if a further call should fail. > Unwinding would require sending a H2G to de-register - but that can't > be done because the CTB is already backed up. > > So instead, add a new flag to say whether the context has a pending > policy update. This is set if the policy H2G fails at registration > time. The submission code checks for this flag and retries the policy > update if set. If that call fails, the submission path early exists > with a retry error. This is something that is already supported for > other reasons. > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Applied to drm-intel-gt-next. Thanks for the patch and review. Matt > --- > .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 4 +- > drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 15 + > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 5 - > drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 67 ++-- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 375 +++++++++++------- > drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 32 +- > 6 files changed, 294 insertions(+), 204 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h > index 9ad6df1b6fbc..be9ac47fa9d0 100644 > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h > @@ -122,11 +122,9 @@ enum intel_guc_action { > INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, > INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, > INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, > - INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, > - INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, > - INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, > INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, > INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, > + INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, > INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, > INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, > INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100, > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > index f0814a57c191..4a59478c3b5c 100644 > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h > @@ -6,6 +6,8 @@ > #ifndef _ABI_GUC_KLVS_ABI_H > #define _ABI_GUC_KLVS_ABI_H > > +#include <linux/types.h> > + > /** > * DOC: GuC KLV > * > @@ -79,4 +81,17 @@ > #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 > #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u > > +/* > + * Per context scheduling policy update keys. > + */ > +enum { > + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, > + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, > + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, > + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, > + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, > + > + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, > +}; > + > #endif /* _ABI_GUC_KLVS_ABI_H */ > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index 4e431c14b118..3f3373f68123 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -170,11 +170,6 @@ struct intel_guc { > /** @ads_engine_usage_size: size of engine usage in the ADS */ > u32 ads_engine_usage_size; > > - /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ > - struct i915_vma *lrc_desc_pool; > - /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ > - void *lrc_desc_pool_vaddr; > - > /** > * @context_lookup: used to resolve intel_context from guc_id, if a > * context is present in this structure it is registered with the GuC > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > index f21b6de46a99..c154b5efccde 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h > @@ -197,54 +197,45 @@ struct guc_wq_item { > u32 fence_id; > } __packed; > > -struct guc_process_desc { > - u32 stage_id; > - u64 db_base_addr; > +struct guc_sched_wq_desc { > u32 head; > u32 tail; > u32 error_offset; > - u64 wq_base_addr; > - u32 wq_size_bytes; > u32 wq_status; > - u32 engine_presence; > - u32 priority; > - u32 reserved[36]; > + u32 reserved[28]; > } __packed; > > +/* Helper for context registration H2G */ > +struct guc_ctxt_registration_info { > + u32 flags; > + u32 context_idx; > + u32 engine_class; > + u32 engine_submit_mask; > + u32 wq_desc_lo; > + u32 wq_desc_hi; > + u32 wq_base_lo; > + u32 wq_base_hi; > + u32 wq_size; > + u32 hwlrca_lo; > + u32 hwlrca_hi; > +}; > #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) > > -#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 > -#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000 > +/* 32-bit KLV structure as used by policy updates and others */ > +struct guc_klv_generic_dw_t { > + u32 kl; > + u32 value; > +} __packed; > > -/* Preempt to idle on quantum expiry */ > -#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0) > +/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ > +struct guc_update_context_policy_header { > + u32 action; > + u32 ctx_id; > +} __packed; > > -/* > - * GuC Context registration descriptor. > - * FIXME: This is only required to exist during context registration. > - * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC > - * is not required. > - */ > -struct guc_lrc_desc { > - u32 hw_context_desc; > - u32 slpm_perf_mode_hint; /* SPLC v1 only */ > - u32 slpm_freq_hint; > - u32 engine_submit_mask; /* In logical space */ > - u8 engine_class; > - u8 reserved0[3]; > - u32 priority; > - u32 process_desc; > - u32 wq_addr; > - u32 wq_size; > - u32 context_flags; /* CONTEXT_REGISTRATION_* */ > - /* Time for one workload to execute. (in micro seconds) */ > - u32 execution_quantum; > - /* Time to wait for a preemption request to complete before issuing a > - * reset. (in micro seconds). > - */ > - u32 preemption_timeout; > - u32 policy_flags; /* CONTEXT_POLICY_* */ > - u32 reserved1[19]; > +struct guc_update_context_policy { > + struct guc_update_context_policy_header header; > + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; > } __packed; > > #define GUC_POWER_UNSPECIFIED 0 > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index e1612c393781..ee45fdb67f32 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -162,7 +162,8 @@ guc_create_parallel(struct intel_engine_cs **engines, > #define SCHED_STATE_ENABLED BIT(4) > #define SCHED_STATE_PENDING_ENABLE BIT(5) > #define SCHED_STATE_REGISTERED BIT(6) > -#define SCHED_STATE_BLOCKED_SHIFT 7 > +#define SCHED_STATE_POLICY_REQUIRED BIT(7) > +#define SCHED_STATE_BLOCKED_SHIFT 8 > #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) > #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) > > @@ -301,6 +302,23 @@ static inline void clr_context_registered(struct intel_context *ce) > ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; > } > > +static inline bool context_policy_required(struct intel_context *ce) > +{ > + return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; > +} > + > +static inline void set_context_policy_required(struct intel_context *ce) > +{ > + lockdep_assert_held(&ce->guc_state.lock); > + ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; > +} > + > +static inline void clr_context_policy_required(struct intel_context *ce) > +{ > + lockdep_assert_held(&ce->guc_state.lock); > + ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; > +} > + > static inline u32 context_blocked(struct intel_context *ce) > { > return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> > @@ -396,12 +414,12 @@ struct sync_semaphore { > }; > > struct parent_scratch { > - struct guc_process_desc pdesc; > + struct guc_sched_wq_desc wq_desc; > > struct sync_semaphore go; > struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; > > - u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - > + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - > sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; > > u32 wq[WQ_SIZE / sizeof(u32)]; > @@ -438,15 +456,15 @@ __get_parent_scratch(struct intel_context *ce) > LRC_STATE_OFFSET) / sizeof(u32))); > } > > -static struct guc_process_desc * > -__get_process_desc(struct intel_context *ce) > +static struct guc_sched_wq_desc * > +__get_wq_desc(struct intel_context *ce) > { > struct parent_scratch *ps = __get_parent_scratch(ce); > > - return &ps->pdesc; > + return &ps->wq_desc; > } > > -static u32 *get_wq_pointer(struct guc_process_desc *desc, > +static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, > struct intel_context *ce, > u32 wqi_size) > { > @@ -458,7 +476,7 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc, > #define AVAILABLE_SPACE \ > CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) > if (wqi_size > AVAILABLE_SPACE) { > - ce->parallel.guc.wqi_head = READ_ONCE(desc->head); > + ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); > > if (wqi_size > AVAILABLE_SPACE) > return NULL; > @@ -468,15 +486,6 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc, > return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; > } > > -static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) > -{ > - struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; > - > - GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); > - > - return &base[index]; > -} > - > static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) > { > struct intel_context *ce = xa_load(&guc->context_lookup, id); > @@ -486,39 +495,11 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) > return ce; > } > > -static int guc_lrc_desc_pool_create(struct intel_guc *guc) > -{ > - u32 size; > - int ret; > - > - size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * > - GUC_MAX_CONTEXT_ID); > - ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, > - (void **)&guc->lrc_desc_pool_vaddr); > - if (ret) > - return ret; > - > - return 0; > -} > - > -static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) > -{ > - guc->lrc_desc_pool_vaddr = NULL; > - i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); > -} > - > static inline bool guc_submission_initialized(struct intel_guc *guc) > { > return guc->submission_initialized; > } > > -static inline void _reset_lrc_desc(struct intel_guc *guc, u32 id) > -{ > - struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); > - > - memset(desc, 0, sizeof(*desc)); > -} > - > static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) > { > return __get_context(guc, id); > @@ -545,8 +526,6 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) > if (unlikely(!guc_submission_initialized(guc))) > return; > > - _reset_lrc_desc(guc, id); > - > /* > * xarray API doesn't have xa_erase_irqsave wrapper, so calling > * the lower level functions directly. > @@ -632,6 +611,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) > true, timeout); > } > > +static int guc_context_policy_init(struct intel_context *ce, bool loop); > static int try_context_registration(struct intel_context *ce, bool loop); > > static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) > @@ -658,6 +638,12 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) > GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); > GEM_BUG_ON(context_guc_id_invalid(ce)); > > + if (context_policy_required(ce)) { > + err = guc_context_policy_init(ce, false); > + if (err) > + return err; > + } > + > spin_lock(&ce->guc_state.lock); > > /* > @@ -751,7 +737,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) > return (WQ_SIZE - ce->parallel.guc.wqi_tail); > } > > -static void write_wqi(struct guc_process_desc *desc, > +static void write_wqi(struct guc_sched_wq_desc *wq_desc, > struct intel_context *ce, > u32 wqi_size) > { > @@ -764,13 +750,13 @@ static void write_wqi(struct guc_process_desc *desc, > > ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & > (WQ_SIZE - 1); > - WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); > + WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); > } > > static int guc_wq_noop_append(struct intel_context *ce) > { > - struct guc_process_desc *desc = __get_process_desc(ce); > - u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); > + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); > + u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); > u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; > > if (!wqi) > @@ -789,7 +775,7 @@ static int __guc_wq_item_append(struct i915_request *rq) > { > struct intel_context *ce = request_to_scheduling_context(rq); > struct intel_context *child; > - struct guc_process_desc *desc = __get_process_desc(ce); > + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); > unsigned int wqi_size = (ce->parallel.number_children + 4) * > sizeof(u32); > u32 *wqi; > @@ -809,7 +795,7 @@ static int __guc_wq_item_append(struct i915_request *rq) > return ret; > } > > - wqi = get_wq_pointer(desc, ce, wqi_size); > + wqi = get_wq_pointer(wq_desc, ce, wqi_size); > if (!wqi) > return -EBUSY; > > @@ -824,7 +810,7 @@ static int __guc_wq_item_append(struct i915_request *rq) > for_each_child(ce, child) > *wqi++ = child->ring->tail / sizeof(u64); > > - write_wqi(desc, ce, wqi_size); > + write_wqi(wq_desc, ce, wqi_size); > > return 0; > } > @@ -1812,20 +1798,10 @@ static void reset_fail_worker_func(struct work_struct *w); > int intel_guc_submission_init(struct intel_guc *guc) > { > struct intel_gt *gt = guc_to_gt(guc); > - int ret; > > if (guc->submission_initialized) > return 0; > > - ret = guc_lrc_desc_pool_create(guc); > - if (ret) > - return ret; > - /* > - * Keep static analysers happy, let them know that we allocated the > - * vma after testing that it didn't exist earlier. > - */ > - GEM_BUG_ON(!guc->lrc_desc_pool); > - > guc->submission_state.guc_ids_bitmap = > bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > if (!guc->submission_state.guc_ids_bitmap) > @@ -1844,7 +1820,6 @@ void intel_guc_submission_fini(struct intel_guc *guc) > return; > > guc_flush_destroyed_contexts(guc); > - guc_lrc_desc_pool_destroy(guc); > i915_sched_engine_put(guc->sched_engine); > bitmap_free(guc->submission_state.guc_ids_bitmap); > guc->submission_initialized = false; > @@ -2104,69 +2079,96 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) > > static int __guc_action_register_multi_lrc(struct intel_guc *guc, > struct intel_context *ce, > - u32 guc_id, > - u32 offset, > + struct guc_ctxt_registration_info *info, > bool loop) > { > struct intel_context *child; > - u32 action[4 + MAX_ENGINE_INSTANCE]; > + u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; > int len = 0; > + u32 next_id; > > GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); > > action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; > - action[len++] = guc_id; > + action[len++] = info->flags; > + action[len++] = info->context_idx; > + action[len++] = info->engine_class; > + action[len++] = info->engine_submit_mask; > + action[len++] = info->wq_desc_lo; > + action[len++] = info->wq_desc_hi; > + action[len++] = info->wq_base_lo; > + action[len++] = info->wq_base_hi; > + action[len++] = info->wq_size; > action[len++] = ce->parallel.number_children + 1; > - action[len++] = offset; > + action[len++] = info->hwlrca_lo; > + action[len++] = info->hwlrca_hi; > + > + next_id = info->context_idx + 1; > for_each_child(ce, child) { > - offset += sizeof(struct guc_lrc_desc); > - action[len++] = offset; > + GEM_BUG_ON(next_id++ != child->guc_id.id); > + > + /* > + * NB: GuC interface supports 64 bit LRCA even though i915/HW > + * only supports 32 bit currently. > + */ > + action[len++] = lower_32_bits(child->lrc.lrca); > + action[len++] = upper_32_bits(child->lrc.lrca); > } > > + GEM_BUG_ON(len > ARRAY_SIZE(action)); > + > return guc_submission_send_busy_loop(guc, action, len, 0, loop); > } > > static int __guc_action_register_context(struct intel_guc *guc, > - u32 guc_id, > - u32 offset, > + struct guc_ctxt_registration_info *info, > bool loop) > { > u32 action[] = { > INTEL_GUC_ACTION_REGISTER_CONTEXT, > - guc_id, > - offset, > + info->flags, > + info->context_idx, > + info->engine_class, > + info->engine_submit_mask, > + info->wq_desc_lo, > + info->wq_desc_hi, > + info->wq_base_lo, > + info->wq_base_hi, > + info->wq_size, > + info->hwlrca_lo, > + info->hwlrca_hi, > }; > > return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), > 0, loop); > } > > -static void prepare_context_registration_info(struct intel_context *ce); > +static void prepare_context_registration_info(struct intel_context *ce, > + struct guc_ctxt_registration_info *info); > > static int register_context(struct intel_context *ce, bool loop) > { > + struct guc_ctxt_registration_info info; > struct intel_guc *guc = ce_to_guc(ce); > - u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + > - ce->guc_id.id * sizeof(struct guc_lrc_desc); > int ret; > > GEM_BUG_ON(intel_context_is_child(ce)); > trace_intel_context_register(ce); > > - prepare_context_registration_info(ce); > + prepare_context_registration_info(ce, &info); > > if (intel_context_is_parent(ce)) > - ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, > - offset, loop); > + ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); > else > - ret = __guc_action_register_context(guc, ce->guc_id.id, offset, > - loop); > + ret = __guc_action_register_context(guc, &info, loop); > if (likely(!ret)) { > unsigned long flags; > > spin_lock_irqsave(&ce->guc_state.lock, flags); > set_context_registered(ce); > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > + > + guc_context_policy_init(ce, loop); > } > > return ret; > @@ -2216,26 +2218,118 @@ static inline u32 get_children_join_value(struct intel_context *ce, > return __get_parent_scratch(ce)->join[child_index].semaphore; > } > > -static void guc_context_policy_init(struct intel_engine_cs *engine, > - struct guc_lrc_desc *desc) > +struct context_policy { > + u32 count; > + struct guc_update_context_policy h2g; > +}; > + > +static u32 __guc_context_policy_action_size(struct context_policy *policy) > { > - desc->policy_flags = 0; > + size_t bytes = sizeof(policy->h2g.header) + > + (sizeof(policy->h2g.klv[0]) * policy->count); > > - if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) > - desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; > + return bytes / sizeof(u32); > +} > + > +static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) > +{ > + policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; > + policy->h2g.header.ctx_id = guc_id; > + policy->count = 0; > +} > + > +#define MAKE_CONTEXT_POLICY_ADD(func, id) \ > +static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ > +{ \ > + GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ > + policy->h2g.klv[policy->count].kl = \ > + FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ > + FIELD_PREP(GUC_KLV_0_LEN, 1); \ > + policy->h2g.klv[policy->count].value = data; \ > + policy->count++; \ > +} > + > +MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) > +MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) > +MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) > +MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) > + > +#undef MAKE_CONTEXT_POLICY_ADD > + > +static int __guc_context_set_context_policies(struct intel_guc *guc, > + struct context_policy *policy, > + bool loop) > +{ > + return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, > + __guc_context_policy_action_size(policy), > + 0, loop); > +} > + > +static int guc_context_policy_init(struct intel_context *ce, bool loop) > +{ > + struct intel_engine_cs *engine = ce->engine; > + struct intel_guc *guc = &engine->gt->uc.guc; > + struct context_policy policy; > + u32 execution_quantum; > + u32 preemption_timeout; > + bool missing = false; > + unsigned long flags; > + int ret; > > /* NB: For both of these, zero means disabled. */ > - desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; > - desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; > + execution_quantum = engine->props.timeslice_duration_ms * 1000; > + preemption_timeout = engine->props.preempt_timeout_ms * 1000; > + > + __guc_context_policy_start_klv(&policy, ce->guc_id.id); > + > + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); > + __guc_context_policy_add_execution_quantum(&policy, execution_quantum); > + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); > + > + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) > + __guc_context_policy_add_preempt_to_idle(&policy, 1); > + > + ret = __guc_context_set_context_policies(guc, &policy, loop); > + missing = ret != 0; > + > + if (!missing && intel_context_is_parent(ce)) { > + struct intel_context *child; > + > + for_each_child(ce, child) { > + __guc_context_policy_start_klv(&policy, child->guc_id.id); > + > + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) > + __guc_context_policy_add_preempt_to_idle(&policy, 1); > + > + child->guc_state.prio = ce->guc_state.prio; > + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); > + __guc_context_policy_add_execution_quantum(&policy, execution_quantum); > + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); > + > + ret = __guc_context_set_context_policies(guc, &policy, loop); > + if (ret) { > + missing = true; > + break; > + } > + } > + } > + > + spin_lock_irqsave(&ce->guc_state.lock, flags); > + if (missing) > + set_context_policy_required(ce); > + else > + clr_context_policy_required(ce); > + spin_unlock_irqrestore(&ce->guc_state.lock, flags); > + > + return ret; > } > > -static void prepare_context_registration_info(struct intel_context *ce) > +static void prepare_context_registration_info(struct intel_context *ce, > + struct guc_ctxt_registration_info *info) > { > struct intel_engine_cs *engine = ce->engine; > struct intel_guc *guc = &engine->gt->uc.guc; > u32 ctx_id = ce->guc_id.id; > - struct guc_lrc_desc *desc; > - struct intel_context *child; > > GEM_BUG_ON(!engine->mask); > > @@ -2246,47 +2340,42 @@ static void prepare_context_registration_info(struct intel_context *ce) > GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != > i915_gem_object_is_lmem(ce->ring->vma->obj)); > > - desc = __get_lrc_desc(guc, ctx_id); > - desc->engine_class = engine_class_to_guc_class(engine->class); > - desc->engine_submit_mask = engine->logical_mask; > - desc->hw_context_desc = ce->lrc.lrca; > - desc->priority = ce->guc_state.prio; > - desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; > - guc_context_policy_init(engine, desc); > + memset(info, 0, sizeof(*info)); > + info->context_idx = ctx_id; > + info->engine_class = engine_class_to_guc_class(engine->class); > + info->engine_submit_mask = engine->logical_mask; > + /* > + * NB: GuC interface supports 64 bit LRCA even though i915/HW > + * only supports 32 bit currently. > + */ > + info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); > + info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); > + info->flags = CONTEXT_REGISTRATION_FLAG_KMD; > > /* > * If context is a parent, we need to register a process descriptor > * describing a work queue and register all child contexts. > */ > if (intel_context_is_parent(ce)) { > - struct guc_process_desc *pdesc; > + struct guc_sched_wq_desc *wq_desc; > + u64 wq_desc_offset, wq_base_offset; > > ce->parallel.guc.wqi_tail = 0; > ce->parallel.guc.wqi_head = 0; > > - desc->process_desc = i915_ggtt_offset(ce->state) + > - __get_parent_scratch_offset(ce); > - desc->wq_addr = i915_ggtt_offset(ce->state) + > - __get_wq_offset(ce); > - desc->wq_size = WQ_SIZE; > + wq_desc_offset = i915_ggtt_offset(ce->state) + > + __get_parent_scratch_offset(ce); > + wq_base_offset = i915_ggtt_offset(ce->state) + > + __get_wq_offset(ce); > + info->wq_desc_lo = lower_32_bits(wq_desc_offset); > + info->wq_desc_hi = upper_32_bits(wq_desc_offset); > + info->wq_base_lo = lower_32_bits(wq_base_offset); > + info->wq_base_hi = upper_32_bits(wq_base_offset); > + info->wq_size = WQ_SIZE; > > - pdesc = __get_process_desc(ce); > - memset(pdesc, 0, sizeof(*(pdesc))); > - pdesc->stage_id = ce->guc_id.id; > - pdesc->wq_base_addr = desc->wq_addr; > - pdesc->wq_size_bytes = desc->wq_size; > - pdesc->wq_status = WQ_STATUS_ACTIVE; > - > - for_each_child(ce, child) { > - desc = __get_lrc_desc(guc, child->guc_id.id); > - > - desc->engine_class = > - engine_class_to_guc_class(engine->class); > - desc->hw_context_desc = child->lrc.lrca; > - desc->priority = ce->guc_state.prio; > - desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; > - guc_context_policy_init(engine, desc); > - } > + wq_desc = __get_wq_desc(ce); > + memset(wq_desc, 0, sizeof(*wq_desc)); > + wq_desc->wq_status = WQ_STATUS_ACTIVE; > > clear_children_join_go_memory(ce); > } > @@ -2602,13 +2691,11 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, > u16 guc_id, > u32 preemption_timeout) > { > - u32 action[] = { > - INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, > - guc_id, > - preemption_timeout > - }; > + struct context_policy policy; > > - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > + __guc_context_policy_start_klv(&policy, guc_id); > + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); > + __guc_context_set_context_policies(guc, &policy, true); > } > > static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) > @@ -2853,16 +2940,20 @@ static int guc_context_alloc(struct intel_context *ce) > return lrc_alloc(ce, ce->engine); > } > > +static void __guc_context_set_prio(struct intel_guc *guc, > + struct intel_context *ce) > +{ > + struct context_policy policy; > + > + __guc_context_policy_start_klv(&policy, ce->guc_id.id); > + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); > + __guc_context_set_context_policies(guc, &policy, true); > +} > + > static void guc_context_set_prio(struct intel_guc *guc, > struct intel_context *ce, > u8 prio) > { > - u32 action[] = { > - INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, > - ce->guc_id.id, > - prio, > - }; > - > GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || > prio > GUC_CLIENT_PRIORITY_NORMAL); > lockdep_assert_held(&ce->guc_state.lock); > @@ -2873,9 +2964,9 @@ static void guc_context_set_prio(struct intel_guc *guc, > return; > } > > - guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > - > ce->guc_state.prio = prio; > + __guc_context_set_prio(guc, ce); > + > trace_intel_context_set_prio(ce); > } > > @@ -4360,17 +4451,17 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, > guc_log_context_priority(p, ce); > > if (intel_context_is_parent(ce)) { > - struct guc_process_desc *desc = __get_process_desc(ce); > + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); > struct intel_context *child; > > drm_printf(p, "\t\tNumber children: %u\n", > ce->parallel.number_children); > drm_printf(p, "\t\tWQI Head: %u\n", > - READ_ONCE(desc->head)); > + READ_ONCE(wq_desc->head)); > drm_printf(p, "\t\tWQI Tail: %u\n", > - READ_ONCE(desc->tail)); > + READ_ONCE(wq_desc->tail)); > drm_printf(p, "\t\tWQI Status: %u\n\n", > - READ_ONCE(desc->wq_status)); > + READ_ONCE(wq_desc->wq_status)); > > if (ce->engine->emit_bb_start == > emit_bb_start_parent_no_preempt_mid_batch) { > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > index bb864655c495..cb5dd16421d0 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c > @@ -53,22 +53,22 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, > * firmware as TGL. > */ > #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ > - fw_def(DG2, 0, guc_def(dg2, 69, 0, 3)) \ > - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ > - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) \ > - fw_def(DG1, 0, guc_def(dg1, 69, 0, 3)) \ > - fw_def(ROCKETLAKE, 0, guc_def(tgl, 69, 0, 3)) \ > - fw_def(TIGERLAKE, 0, guc_def(tgl, 69, 0, 3)) \ > - fw_def(JASPERLAKE, 0, guc_def(ehl, 69, 0, 3)) \ > - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 69, 0, 3)) \ > - fw_def(ICELAKE, 0, guc_def(icl, 69, 0, 3)) \ > - fw_def(COMETLAKE, 5, guc_def(cml, 69, 0, 3)) \ > - fw_def(COMETLAKE, 0, guc_def(kbl, 69, 0, 3)) \ > - fw_def(COFFEELAKE, 0, guc_def(kbl, 69, 0, 3)) \ > - fw_def(GEMINILAKE, 0, guc_def(glk, 69, 0, 3)) \ > - fw_def(KABYLAKE, 0, guc_def(kbl, 69, 0, 3)) \ > - fw_def(BROXTON, 0, guc_def(bxt, 69, 0, 3)) \ > - fw_def(SKYLAKE, 0, guc_def(skl, 69, 0, 3)) > + fw_def(DG2, 0, guc_def(dg2, 70, 1, 1)) \ > + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ > + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ > + fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ > + fw_def(ROCKETLAKE, 0, guc_def(tgl, 70, 1, 1)) \ > + fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 1, 1)) \ > + fw_def(JASPERLAKE, 0, guc_def(ehl, 70, 1, 1)) \ > + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 70, 1, 1)) \ > + fw_def(ICELAKE, 0, guc_def(icl, 70, 1, 1)) \ > + fw_def(COMETLAKE, 5, guc_def(cml, 70, 1, 1)) \ > + fw_def(COMETLAKE, 0, guc_def(kbl, 70, 1, 1)) \ > + fw_def(COFFEELAKE, 0, guc_def(kbl, 70, 1, 1)) \ > + fw_def(GEMINILAKE, 0, guc_def(glk, 70, 1, 1)) \ > + fw_def(KABYLAKE, 0, guc_def(kbl, 70, 1, 1)) \ > + fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ > + fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) > > #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ > fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ > -- > 2.25.1 >
On Fri, 15 Apr 2022 at 10:15, Matt Roper <matthew.d.roper@intel.com> wrote: > > On Tue, Apr 12, 2022 at 03:59:55PM -0700, John.C.Harrison@Intel.com wrote: > > From: John Harrison <John.C.Harrison@Intel.com> > > > > The latest GuC firmware drops the context descriptor pool in favour of > > passing all creation data in the create H2G. It also greatly simplifies > > the work queue and removes the process descriptor used for multi-LRC > > submission. So, remove all mention of LRC and process descriptors and > > update the registration code accordingly. > > > > Unfortunately, the new API also removes the ability to set default > > values for the scheduling policies at context registration time. > > Instead, a follow up H2G must be sent. The individual scheduling > > policy update H2G commands are also dropped in favour of a single KLV > > based H2G. So, change the update wrappers accordingly and call this > > during context registration.. > > > > Of course, this second H2G per registration might fail due to being > > backed up. The registration code has a complicated state machine to > > cope with the actual registration call failing. However, if that works > > then there is no support for unwinding if a further call should fail. > > Unwinding would require sending a H2G to de-register - but that can't > > be done because the CTB is already backed up. > > > > So instead, add a new flag to say whether the context has a pending > > policy update. This is set if the policy H2G fails at registration > > time. The submission code checks for this flag and retries the policy > > update if set. If that call fails, the submission path early exists > > with a retry error. This is something that is already supported for > > other reasons. > > > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > Applied to drm-intel-gt-next. Thanks for the patch and review. > (cc'ing Linus and danvet, as a headsup, there is also a phoronix article where this was discovered). Okay WTF. This is in no way acceptable. This needs to be fixed in 5.19-rc ASAP. Once hardware is released and we remove the gate flag by default, you cannot just bump firmware versions blindly. The kernel needs to retain compatibility with all released firmwares since a device was declared supported. This needs to be reverted, and then 70 should be introduced with a fallback to 69 versions. Very disappointing, I expect this to get dealt with v.quickly. Dave.
[TLDR: I'm adding this regression report to the list of tracked regressions; all text from me you find below is based on a few templates paragraphs you might have encountered already already in similar form.] Hi, this is your Linux kernel regression tracker. On 15.07.22 01:08, Dave Airlie wrote: > On Fri, 15 Apr 2022 at 10:15, Matt Roper <matthew.d.roper@intel.com> wrote: >> >> On Tue, Apr 12, 2022 at 03:59:55PM -0700, John.C.Harrison@Intel.com wrote: >>> From: John Harrison <John.C.Harrison@Intel.com> >>> >>> The latest GuC firmware drops the context descriptor pool in favour of >>> passing all creation data in the create H2G. It also greatly simplifies >>> the work queue and removes the process descriptor used for multi-LRC >>> submission. So, remove all mention of LRC and process descriptors and >>> update the registration code accordingly. >>> >>> Unfortunately, the new API also removes the ability to set default >>> values for the scheduling policies at context registration time. >>> Instead, a follow up H2G must be sent. The individual scheduling >>> policy update H2G commands are also dropped in favour of a single KLV >>> based H2G. So, change the update wrappers accordingly and call this >>> during context registration.. >>> >>> Of course, this second H2G per registration might fail due to being >>> backed up. The registration code has a complicated state machine to >>> cope with the actual registration call failing. However, if that works >>> then there is no support for unwinding if a further call should fail. >>> Unwinding would require sending a H2G to de-register - but that can't >>> be done because the CTB is already backed up. >>> >>> So instead, add a new flag to say whether the context has a pending >>> policy update. This is set if the policy H2G fails at registration >>> time. The submission code checks for this flag and retries the policy >>> update if set. If that call fails, the submission path early exists >>> with a retry error. This is something that is already supported for >>> other reasons. >>> >>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> >>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> >> Applied to drm-intel-gt-next. Thanks for the patch and review. >> > > (cc'ing Linus and danvet, as a headsup, there is also a phoronix > article where this was discovered). > > Okay WTF. > > This is in no way acceptable. This needs to be fixed in 5.19-rc ASAP. > > Once hardware is released and we remove the gate flag by default, you > cannot just bump firmware versions blindly. > > The kernel needs to retain compatibility with all released firmwares > since a device was declared supported. > > This needs to be reverted, and then 70 should be introduced with a > fallback to 69 versions. > > Very disappointing, I expect this to get dealt with v.quickly. To be sure below issue doesn't fall through the cracks unnoticed, I'm adding it to regzbot, my Linux kernel regression tracking bot: #regzbot ^introduced 2584b3549f4c4081 #regzbot title #regzbot ignore-activity This isn't a regression? This issue or a fix for it are already discussed somewhere else? It was fixed already? You want to clarify when the regression started to happen? Or point out I got the title or something else totally wrong? Then just reply -- ideally with also telling regzbot about it, as explained here: https://linux-regtracking.leemhuis.info/tracked-regression/ Reminder for developers: When fixing the issue, add 'Link:' tags pointing to the report (the mail this one replies to), as explained for in the Linux kernel's documentation; above webpage explains why this is important for tracked regressions. Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) P.S.: As the Linux kernel's regression tracker I deal with a lot of reports and sometimes miss something important when writing mails like this. If that's the case here, don't hesitate to tell me in a public reply, it's in everyone's interest to set the public record straight.
On 2022-07-14 16:08:51, Dave Airlie wrote: > On Fri, 15 Apr 2022 at 10:15, Matt Roper <matthew.d.roper@intel.com> wrote: > > > > On Tue, Apr 12, 2022 at 03:59:55PM -0700, John.C.Harrison@Intel.com wrote: > > > From: John Harrison <John.C.Harrison@Intel.com> > > > > > > The latest GuC firmware drops the context descriptor pool in favour of > > > passing all creation data in the create H2G. It also greatly simplifies > > > the work queue and removes the process descriptor used for multi-LRC > > > submission. So, remove all mention of LRC and process descriptors and > > > update the registration code accordingly. > > > > > > Unfortunately, the new API also removes the ability to set default > > > values for the scheduling policies at context registration time. > > > Instead, a follow up H2G must be sent. The individual scheduling > > > policy update H2G commands are also dropped in favour of a single KLV > > > based H2G. So, change the update wrappers accordingly and call this > > > during context registration.. > > > > > > Of course, this second H2G per registration might fail due to being > > > backed up. The registration code has a complicated state machine to > > > cope with the actual registration call failing. However, if that works > > > then there is no support for unwinding if a further call should fail. > > > Unwinding would require sending a H2G to de-register - but that can't > > > be done because the CTB is already backed up. > > > > > > So instead, add a new flag to say whether the context has a pending > > > policy update. This is set if the policy H2G fails at registration > > > time. The submission code checks for this flag and retries the policy > > > update if set. If that call fails, the submission path early exists > > > with a retry error. This is something that is already supported for > > > other reasons. > > > > > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > > > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > > > Applied to drm-intel-gt-next. Thanks for the patch and review. > > > > (cc'ing Linus and danvet, as a headsup, there is also a phoronix > article where this was discovered). > > Okay WTF. > > This is in no way acceptable. This needs to be fixed in 5.19-rc ASAP. > > Once hardware is released and we remove the gate flag by default, you > cannot just bump firmware versions blindly. > > The kernel needs to retain compatibility with all released firmwares > since a device was declared supported. > > This needs to be reverted, and then 70 should be introduced with a > fallback to 69 versions. > > Very disappointing, I expect this to get dealt with v.quickly. This reminds me of something. A distant memory, really. But, if you can believe it, i915 used to actually be able to *do something* without the *closed source* guc. Crazy, right? Anyway, that's all ancient history now. I mean, you have to go back pretty far for that. Let me check my notes. Yeah, you'd probably have to go all the way back to 2021 for that. I guess a lot of things were much simpler back then though. Anyway... Always fun to reminisce. -Jordan
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 9ad6df1b6fbc..be9ac47fa9d0 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -122,11 +122,9 @@ enum intel_guc_action { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, - INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, - INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, - INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index f0814a57c191..4a59478c3b5c 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -6,6 +6,8 @@ #ifndef _ABI_GUC_KLVS_ABI_H #define _ABI_GUC_KLVS_ABI_H +#include <linux/types.h> + /** * DOC: GuC KLV * @@ -79,4 +81,17 @@ #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u +/* + * Per context scheduling policy update keys. + */ +enum { + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, + + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, +}; + #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4e431c14b118..3f3373f68123 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -170,11 +170,6 @@ struct intel_guc { /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; - /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ - struct i915_vma *lrc_desc_pool; - /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ - void *lrc_desc_pool_vaddr; - /** * @context_lookup: used to resolve intel_context from guc_id, if a * context is present in this structure it is registered with the GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index f21b6de46a99..c154b5efccde 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -197,54 +197,45 @@ struct guc_wq_item { u32 fence_id; } __packed; -struct guc_process_desc { - u32 stage_id; - u64 db_base_addr; +struct guc_sched_wq_desc { u32 head; u32 tail; u32 error_offset; - u64 wq_base_addr; - u32 wq_size_bytes; u32 wq_status; - u32 engine_presence; - u32 priority; - u32 reserved[36]; + u32 reserved[28]; } __packed; +/* Helper for context registration H2G */ +struct guc_ctxt_registration_info { + u32 flags; + u32 context_idx; + u32 engine_class; + u32 engine_submit_mask; + u32 wq_desc_lo; + u32 wq_desc_hi; + u32 wq_base_lo; + u32 wq_base_hi; + u32 wq_size; + u32 hwlrca_lo; + u32 hwlrca_hi; +}; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) -#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 -#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000 +/* 32-bit KLV structure as used by policy updates and others */ +struct guc_klv_generic_dw_t { + u32 kl; + u32 value; +} __packed; -/* Preempt to idle on quantum expiry */ -#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0) +/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ +struct guc_update_context_policy_header { + u32 action; + u32 ctx_id; +} __packed; -/* - * GuC Context registration descriptor. - * FIXME: This is only required to exist during context registration. - * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC - * is not required. - */ -struct guc_lrc_desc { - u32 hw_context_desc; - u32 slpm_perf_mode_hint; /* SPLC v1 only */ - u32 slpm_freq_hint; - u32 engine_submit_mask; /* In logical space */ - u8 engine_class; - u8 reserved0[3]; - u32 priority; - u32 process_desc; - u32 wq_addr; - u32 wq_size; - u32 context_flags; /* CONTEXT_REGISTRATION_* */ - /* Time for one workload to execute. (in micro seconds) */ - u32 execution_quantum; - /* Time to wait for a preemption request to complete before issuing a - * reset. (in micro seconds). - */ - u32 preemption_timeout; - u32 policy_flags; /* CONTEXT_POLICY_* */ - u32 reserved1[19]; +struct guc_update_context_policy { + struct guc_update_context_policy_header header; + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; } __packed; #define GUC_POWER_UNSPECIFIED 0 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e1612c393781..ee45fdb67f32 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -162,7 +162,8 @@ guc_create_parallel(struct intel_engine_cs **engines, #define SCHED_STATE_ENABLED BIT(4) #define SCHED_STATE_PENDING_ENABLE BIT(5) #define SCHED_STATE_REGISTERED BIT(6) -#define SCHED_STATE_BLOCKED_SHIFT 7 +#define SCHED_STATE_POLICY_REQUIRED BIT(7) +#define SCHED_STATE_BLOCKED_SHIFT 8 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) @@ -301,6 +302,23 @@ static inline void clr_context_registered(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; } +static inline bool context_policy_required(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; +} + +static inline void set_context_policy_required(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; +} + +static inline void clr_context_policy_required(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -396,12 +414,12 @@ struct sync_semaphore { }; struct parent_scratch { - struct guc_process_desc pdesc; + struct guc_sched_wq_desc wq_desc; struct sync_semaphore go; struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; - u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; u32 wq[WQ_SIZE / sizeof(u32)]; @@ -438,15 +456,15 @@ __get_parent_scratch(struct intel_context *ce) LRC_STATE_OFFSET) / sizeof(u32))); } -static struct guc_process_desc * -__get_process_desc(struct intel_context *ce) +static struct guc_sched_wq_desc * +__get_wq_desc(struct intel_context *ce) { struct parent_scratch *ps = __get_parent_scratch(ce); - return &ps->pdesc; + return &ps->wq_desc; } -static u32 *get_wq_pointer(struct guc_process_desc *desc, +static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, struct intel_context *ce, u32 wqi_size) { @@ -458,7 +476,7 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc, #define AVAILABLE_SPACE \ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { - ce->parallel.guc.wqi_head = READ_ONCE(desc->head); + ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); if (wqi_size > AVAILABLE_SPACE) return NULL; @@ -468,15 +486,6 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc, return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; } -static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) -{ - struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; - - GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); - - return &base[index]; -} - static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) { struct intel_context *ce = xa_load(&guc->context_lookup, id); @@ -486,39 +495,11 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) return ce; } -static int guc_lrc_desc_pool_create(struct intel_guc *guc) -{ - u32 size; - int ret; - - size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * - GUC_MAX_CONTEXT_ID); - ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, - (void **)&guc->lrc_desc_pool_vaddr); - if (ret) - return ret; - - return 0; -} - -static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) -{ - guc->lrc_desc_pool_vaddr = NULL; - i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); -} - static inline bool guc_submission_initialized(struct intel_guc *guc) { return guc->submission_initialized; } -static inline void _reset_lrc_desc(struct intel_guc *guc, u32 id) -{ - struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); - - memset(desc, 0, sizeof(*desc)); -} - static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) { return __get_context(guc, id); @@ -545,8 +526,6 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) if (unlikely(!guc_submission_initialized(guc))) return; - _reset_lrc_desc(guc, id); - /* * xarray API doesn't have xa_erase_irqsave wrapper, so calling * the lower level functions directly. @@ -632,6 +611,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } +static int guc_context_policy_init(struct intel_context *ce, bool loop); static int try_context_registration(struct intel_context *ce, bool loop); static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) @@ -658,6 +638,12 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); + if (context_policy_required(ce)) { + err = guc_context_policy_init(ce, false); + if (err) + return err; + } + spin_lock(&ce->guc_state.lock); /* @@ -751,7 +737,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) return (WQ_SIZE - ce->parallel.guc.wqi_tail); } -static void write_wqi(struct guc_process_desc *desc, +static void write_wqi(struct guc_sched_wq_desc *wq_desc, struct intel_context *ce, u32 wqi_size) { @@ -764,13 +750,13 @@ static void write_wqi(struct guc_process_desc *desc, ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & (WQ_SIZE - 1); - WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); + WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); } static int guc_wq_noop_append(struct intel_context *ce) { - struct guc_process_desc *desc = __get_process_desc(ce); - u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); + u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; if (!wqi) @@ -789,7 +775,7 @@ static int __guc_wq_item_append(struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); struct intel_context *child; - struct guc_process_desc *desc = __get_process_desc(ce); + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); unsigned int wqi_size = (ce->parallel.number_children + 4) * sizeof(u32); u32 *wqi; @@ -809,7 +795,7 @@ static int __guc_wq_item_append(struct i915_request *rq) return ret; } - wqi = get_wq_pointer(desc, ce, wqi_size); + wqi = get_wq_pointer(wq_desc, ce, wqi_size); if (!wqi) return -EBUSY; @@ -824,7 +810,7 @@ static int __guc_wq_item_append(struct i915_request *rq) for_each_child(ce, child) *wqi++ = child->ring->tail / sizeof(u64); - write_wqi(desc, ce, wqi_size); + write_wqi(wq_desc, ce, wqi_size); return 0; } @@ -1812,20 +1798,10 @@ static void reset_fail_worker_func(struct work_struct *w); int intel_guc_submission_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - int ret; if (guc->submission_initialized) return 0; - ret = guc_lrc_desc_pool_create(guc); - if (ret) - return ret; - /* - * Keep static analysers happy, let them know that we allocated the - * vma after testing that it didn't exist earlier. - */ - GEM_BUG_ON(!guc->lrc_desc_pool); - guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) @@ -1844,7 +1820,6 @@ void intel_guc_submission_fini(struct intel_guc *guc) return; guc_flush_destroyed_contexts(guc); - guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); guc->submission_initialized = false; @@ -2104,69 +2079,96 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) static int __guc_action_register_multi_lrc(struct intel_guc *guc, struct intel_context *ce, - u32 guc_id, - u32 offset, + struct guc_ctxt_registration_info *info, bool loop) { struct intel_context *child; - u32 action[4 + MAX_ENGINE_INSTANCE]; + u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; int len = 0; + u32 next_id; GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; - action[len++] = guc_id; + action[len++] = info->flags; + action[len++] = info->context_idx; + action[len++] = info->engine_class; + action[len++] = info->engine_submit_mask; + action[len++] = info->wq_desc_lo; + action[len++] = info->wq_desc_hi; + action[len++] = info->wq_base_lo; + action[len++] = info->wq_base_hi; + action[len++] = info->wq_size; action[len++] = ce->parallel.number_children + 1; - action[len++] = offset; + action[len++] = info->hwlrca_lo; + action[len++] = info->hwlrca_hi; + + next_id = info->context_idx + 1; for_each_child(ce, child) { - offset += sizeof(struct guc_lrc_desc); - action[len++] = offset; + GEM_BUG_ON(next_id++ != child->guc_id.id); + + /* + * NB: GuC interface supports 64 bit LRCA even though i915/HW + * only supports 32 bit currently. + */ + action[len++] = lower_32_bits(child->lrc.lrca); + action[len++] = upper_32_bits(child->lrc.lrca); } + GEM_BUG_ON(len > ARRAY_SIZE(action)); + return guc_submission_send_busy_loop(guc, action, len, 0, loop); } static int __guc_action_register_context(struct intel_guc *guc, - u32 guc_id, - u32 offset, + struct guc_ctxt_registration_info *info, bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, - guc_id, - offset, + info->flags, + info->context_idx, + info->engine_class, + info->engine_submit_mask, + info->wq_desc_lo, + info->wq_desc_hi, + info->wq_base_lo, + info->wq_base_hi, + info->wq_size, + info->hwlrca_lo, + info->hwlrca_hi, }; return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, loop); } -static void prepare_context_registration_info(struct intel_context *ce); +static void prepare_context_registration_info(struct intel_context *ce, + struct guc_ctxt_registration_info *info); static int register_context(struct intel_context *ce, bool loop) { + struct guc_ctxt_registration_info info; struct intel_guc *guc = ce_to_guc(ce); - u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + - ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - prepare_context_registration_info(ce); + prepare_context_registration_info(ce, &info); if (intel_context_is_parent(ce)) - ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, - offset, loop); + ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); else - ret = __guc_action_register_context(guc, ce->guc_id.id, offset, - loop); + ret = __guc_action_register_context(guc, &info, loop); if (likely(!ret)) { unsigned long flags; spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + guc_context_policy_init(ce, loop); } return ret; @@ -2216,26 +2218,118 @@ static inline u32 get_children_join_value(struct intel_context *ce, return __get_parent_scratch(ce)->join[child_index].semaphore; } -static void guc_context_policy_init(struct intel_engine_cs *engine, - struct guc_lrc_desc *desc) +struct context_policy { + u32 count; + struct guc_update_context_policy h2g; +}; + +static u32 __guc_context_policy_action_size(struct context_policy *policy) { - desc->policy_flags = 0; + size_t bytes = sizeof(policy->h2g.header) + + (sizeof(policy->h2g.klv[0]) * policy->count); - if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) - desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; + return bytes / sizeof(u32); +} + +static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) +{ + policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; + policy->h2g.header.ctx_id = guc_id; + policy->count = 0; +} + +#define MAKE_CONTEXT_POLICY_ADD(func, id) \ +static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ +{ \ + GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ + policy->h2g.klv[policy->count].kl = \ + FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ + FIELD_PREP(GUC_KLV_0_LEN, 1); \ + policy->h2g.klv[policy->count].value = data; \ + policy->count++; \ +} + +MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) +MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) +MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) +MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) + +#undef MAKE_CONTEXT_POLICY_ADD + +static int __guc_context_set_context_policies(struct intel_guc *guc, + struct context_policy *policy, + bool loop) +{ + return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, + __guc_context_policy_action_size(policy), + 0, loop); +} + +static int guc_context_policy_init(struct intel_context *ce, bool loop) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_guc *guc = &engine->gt->uc.guc; + struct context_policy policy; + u32 execution_quantum; + u32 preemption_timeout; + bool missing = false; + unsigned long flags; + int ret; /* NB: For both of these, zero means disabled. */ - desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; - desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; + execution_quantum = engine->props.timeslice_duration_ms * 1000; + preemption_timeout = engine->props.preempt_timeout_ms * 1000; + + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_policy_add_execution_quantum(&policy, execution_quantum); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + __guc_context_policy_add_preempt_to_idle(&policy, 1); + + ret = __guc_context_set_context_policies(guc, &policy, loop); + missing = ret != 0; + + if (!missing && intel_context_is_parent(ce)) { + struct intel_context *child; + + for_each_child(ce, child) { + __guc_context_policy_start_klv(&policy, child->guc_id.id); + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + __guc_context_policy_add_preempt_to_idle(&policy, 1); + + child->guc_state.prio = ce->guc_state.prio; + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_policy_add_execution_quantum(&policy, execution_quantum); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + + ret = __guc_context_set_context_policies(guc, &policy, loop); + if (ret) { + missing = true; + break; + } + } + } + + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (missing) + set_context_policy_required(ce); + else + clr_context_policy_required(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + return ret; } -static void prepare_context_registration_info(struct intel_context *ce) +static void prepare_context_registration_info(struct intel_context *ce, + struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; u32 ctx_id = ce->guc_id.id; - struct guc_lrc_desc *desc; - struct intel_context *child; GEM_BUG_ON(!engine->mask); @@ -2246,47 +2340,42 @@ static void prepare_context_registration_info(struct intel_context *ce) GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != i915_gem_object_is_lmem(ce->ring->vma->obj)); - desc = __get_lrc_desc(guc, ctx_id); - desc->engine_class = engine_class_to_guc_class(engine->class); - desc->engine_submit_mask = engine->logical_mask; - desc->hw_context_desc = ce->lrc.lrca; - desc->priority = ce->guc_state.prio; - desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; - guc_context_policy_init(engine, desc); + memset(info, 0, sizeof(*info)); + info->context_idx = ctx_id; + info->engine_class = engine_class_to_guc_class(engine->class); + info->engine_submit_mask = engine->logical_mask; + /* + * NB: GuC interface supports 64 bit LRCA even though i915/HW + * only supports 32 bit currently. + */ + info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); + info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); + info->flags = CONTEXT_REGISTRATION_FLAG_KMD; /* * If context is a parent, we need to register a process descriptor * describing a work queue and register all child contexts. */ if (intel_context_is_parent(ce)) { - struct guc_process_desc *pdesc; + struct guc_sched_wq_desc *wq_desc; + u64 wq_desc_offset, wq_base_offset; ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - desc->process_desc = i915_ggtt_offset(ce->state) + - __get_parent_scratch_offset(ce); - desc->wq_addr = i915_ggtt_offset(ce->state) + - __get_wq_offset(ce); - desc->wq_size = WQ_SIZE; + wq_desc_offset = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + wq_base_offset = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + info->wq_desc_lo = lower_32_bits(wq_desc_offset); + info->wq_desc_hi = upper_32_bits(wq_desc_offset); + info->wq_base_lo = lower_32_bits(wq_base_offset); + info->wq_base_hi = upper_32_bits(wq_base_offset); + info->wq_size = WQ_SIZE; - pdesc = __get_process_desc(ce); - memset(pdesc, 0, sizeof(*(pdesc))); - pdesc->stage_id = ce->guc_id.id; - pdesc->wq_base_addr = desc->wq_addr; - pdesc->wq_size_bytes = desc->wq_size; - pdesc->wq_status = WQ_STATUS_ACTIVE; - - for_each_child(ce, child) { - desc = __get_lrc_desc(guc, child->guc_id.id); - - desc->engine_class = - engine_class_to_guc_class(engine->class); - desc->hw_context_desc = child->lrc.lrca; - desc->priority = ce->guc_state.prio; - desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; - guc_context_policy_init(engine, desc); - } + wq_desc = __get_wq_desc(ce); + memset(wq_desc, 0, sizeof(*wq_desc)); + wq_desc->wq_status = WQ_STATUS_ACTIVE; clear_children_join_go_memory(ce); } @@ -2602,13 +2691,11 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - u32 action[] = { - INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, - guc_id, - preemption_timeout - }; + struct context_policy policy; - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + __guc_context_policy_start_klv(&policy, guc_id); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_set_context_policies(guc, &policy, true); } static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) @@ -2853,16 +2940,20 @@ static int guc_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void __guc_context_set_prio(struct intel_guc *guc, + struct intel_context *ce) +{ + struct context_policy policy; + + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_set_context_policies(guc, &policy, true); +} + static void guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce, u8 prio) { - u32 action[] = { - INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, - ce->guc_id.id, - prio, - }; - GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); lockdep_assert_held(&ce->guc_state.lock); @@ -2873,9 +2964,9 @@ static void guc_context_set_prio(struct intel_guc *guc, return; } - guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_state.prio = prio; + __guc_context_set_prio(guc, ce); + trace_intel_context_set_prio(ce); } @@ -4360,17 +4451,17 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); if (intel_context_is_parent(ce)) { - struct guc_process_desc *desc = __get_process_desc(ce); + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); struct intel_context *child; drm_printf(p, "\t\tNumber children: %u\n", ce->parallel.number_children); drm_printf(p, "\t\tWQI Head: %u\n", - READ_ONCE(desc->head)); + READ_ONCE(wq_desc->head)); drm_printf(p, "\t\tWQI Tail: %u\n", - READ_ONCE(desc->tail)); + READ_ONCE(wq_desc->tail)); drm_printf(p, "\t\tWQI Status: %u\n\n", - READ_ONCE(desc->wq_status)); + READ_ONCE(wq_desc->wq_status)); if (ce->engine->emit_bb_start == emit_bb_start_parent_no_preempt_mid_batch) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index bb864655c495..cb5dd16421d0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,22 +53,22 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(DG2, 0, guc_def(dg2, 69, 0, 3)) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) \ - fw_def(DG1, 0, guc_def(dg1, 69, 0, 3)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 69, 0, 3)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 69, 0, 3)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 69, 0, 3)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 69, 0, 3)) \ - fw_def(ICELAKE, 0, guc_def(icl, 69, 0, 3)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 69, 0, 3)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 69, 0, 3)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 69, 0, 3)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 69, 0, 3)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 69, 0, 3)) \ - fw_def(BROXTON, 0, guc_def(bxt, 69, 0, 3)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 69, 0, 3)) + fw_def(DG2, 0, guc_def(dg2, 70, 1, 1)) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ + fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 70, 1, 1)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 1, 1)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 70, 1, 1)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 70, 1, 1)) \ + fw_def(ICELAKE, 0, guc_def(icl, 70, 1, 1)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 70, 1, 1)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 70, 1, 1)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 70, 1, 1)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 70, 1, 1)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 70, 1, 1)) \ + fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \