Message ID | 20210624070516.21893-35-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | GuC submission support | expand |
On 6/24/2021 00:05, Matthew Brost wrote: > The new GuC interface introduces an MMIO H2G command, > INTEL_GUC_ACTION_RESET_CLIENT, which is used to implement suspend. This > MMIO tears down any active contexts generating a context reset G2H CTB > for each. Once that step completes the GuC tears down the CTB > channels. It is safe to suspend once this MMIO H2G command completes > and all G2H CTBs have been processed. In practice the i915 will likely > never receive a G2H as suspend should only be called after the GPU is > idle. > > Resume is implemented in the same manner as before - simply reload the > GuC firmware and reinitialize everything (e.g. CTB channels, contexts, > etc..). > > Cc: John Harrison <john.c.harrison@intel.com> > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com? > --- > .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + > drivers/gpu/drm/i915/gt/uc/intel_guc.c | 64 ++++++++----------- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 14 ++-- > .../gpu/drm/i915/gt/uc/intel_guc_submission.h | 5 ++ > drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++-- > 5 files changed, 53 insertions(+), 51 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h > index 57e18babdf4b..596cf4b818e5 100644 > --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h > +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h > @@ -142,6 +142,7 @@ enum intel_guc_action { > INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, > INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, > INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, > + INTEL_GUC_ACTION_RESET_CLIENT = 0x5B01, > INTEL_GUC_ACTION_LIMIT > }; > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c > index 9b09395b998f..68266cbffd1f 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c > @@ -524,51 +524,34 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) > */ > int intel_guc_suspend(struct intel_guc *guc) > { > - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; > int ret; > - u32 status; > u32 action[] = { > - INTEL_GUC_ACTION_ENTER_S_STATE, > - GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ > + INTEL_GUC_ACTION_RESET_CLIENT, > }; > > - /* > - * If GuC communication is enabled but submission is not supported, > - * we do not need to suspend the GuC. > - */ > - if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) > + if (!intel_guc_is_ready(guc)) > return 0; > > - /* > - * The ENTER_S_STATE action queues the save/restore operation in GuC FW > - * and then returns, so waiting on the H2G is not enough to guarantee > - * GuC is done. When all the processing is done, GuC writes > - * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll > - * on that. Note that GuC does not ensure that the value in the register > - * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is > - * in progress so we need to take care of that ourselves as well. > - */ > - > - intel_uncore_write(uncore, SOFT_SCRATCH(14), > - INTEL_GUC_SLEEP_STATE_INVALID_MASK); > - > - ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); > - if (ret) > - return ret; > - > - ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), > - INTEL_GUC_SLEEP_STATE_INVALID_MASK, > - 0, 0, 10, &status); > - if (ret) > - return ret; > - > - if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) { > - DRM_ERROR("GuC failed to change sleep state. " > - "action=0x%x, err=%u\n", > - action[0], status); > - return -EIO; > + if (intel_guc_submission_is_used(guc)) { > + /* > + * This H2G MMIO command tears down the GuC in two steps. First it will > + * generate a G2H CTB for every active context indicating a reset. In > + * practice the i915 shouldn't ever get a G2H as suspend should only be > + * called when the GPU is idle. Next, it tears down the CTBs and this > + * H2G MMIO command completes. > + * > + * Don't abort on a failure code from the GuC. Keep going and do the > + * clean up in santize() and re-initialisation on resume and hopefully > + * the error here won't be problematic. > + */ > + ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); > + if (ret) > + DRM_ERROR("GuC suspend: RESET_CLIENT action failed with error %d!\n", ret); > } > > + /* Signal that the GuC isn't running. */ > + intel_guc_sanitize(guc); > + > return 0; > } > > @@ -578,7 +561,12 @@ int intel_guc_suspend(struct intel_guc *guc) > */ > int intel_guc_resume(struct intel_guc *guc) > { > - /* XXX: to be implemented with submission interface rework */ > + /* > + * NB: This function can still be called even if GuC submission is > + * disabled, e.g. if GuC is enabled for HuC authentication only. Thus, > + * if any code is later added here, it must be support doing nothing > + * if submission is disabled (as per intel_guc_suspend). > + */ > return 0; > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 59fca9748c15..16b61fe71b07 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -304,10 +304,10 @@ static int guc_submission_busy_loop(struct intel_guc* guc, > return err; > } > > -static int guc_wait_for_pending_msg(struct intel_guc *guc, > - atomic_t *wait_var, > - bool interruptible, > - long timeout) > +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, > + atomic_t *wait_var, > + bool interruptible, > + long timeout) > { > const int state = interruptible ? > TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > @@ -352,8 +352,8 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) > if (unlikely(timeout < 0)) > timeout = -timeout, interruptible = false; > > - return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > - interruptible, timeout); > + return intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > + interruptible, timeout); > } > > static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); > @@ -625,7 +625,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) > for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { > intel_guc_to_host_event_handler(guc); > #define wait_for_reset(guc, wait_var) \ > - guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) > + intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) > do { > wait_for_reset(guc, &guc->outstanding_submission_g2h); > } while (!list_empty(&guc->ct.requests.incoming)); > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h > index 95df5ab06031..b9b9f0f60f91 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h > @@ -27,6 +27,11 @@ void intel_guc_log_context_info(struct intel_guc *guc, struct drm_printer *p); > > bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); > > +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, > + atomic_t *wait_var, > + bool interruptible, > + long timeout); > + > static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) > { > /* XXX: GuC submission is unavailable for now */ > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > index ab11fe731ee7..b523a8521351 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > @@ -596,14 +596,18 @@ void intel_uc_cancel_requests(struct intel_uc *uc) > void intel_uc_runtime_suspend(struct intel_uc *uc) > { > struct intel_guc *guc = &uc->guc; > - int err; > > if (!intel_guc_is_ready(guc)) > return; > > - err = intel_guc_suspend(guc); > - if (err) > - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); > + /* > + * Wait for any outstanding CTB before tearing down communication /w the > + * GuC. > + */ > +#define OUTSTANDING_CTB_TIMEOUT_PERIOD (HZ / 5) > + intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > + false, OUTSTANDING_CTB_TIMEOUT_PERIOD); > + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); > > guc_disable_communication(guc); > } > @@ -612,12 +616,16 @@ void intel_uc_suspend(struct intel_uc *uc) > { > struct intel_guc *guc = &uc->guc; > intel_wakeref_t wakeref; > + int err; > > if (!intel_guc_is_ready(guc)) > return; > > - with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) > - intel_uc_runtime_suspend(uc); > + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { > + err = intel_guc_suspend(guc); > + if (err) > + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); > + } > } > > static int __uc_resume(struct intel_uc *uc, bool enable_communication)
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 57e18babdf4b..596cf4b818e5 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + INTEL_GUC_ACTION_RESET_CLIENT = 0x5B01, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 9b09395b998f..68266cbffd1f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -524,51 +524,34 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) */ int intel_guc_suspend(struct intel_guc *guc) { - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; int ret; - u32 status; u32 action[] = { - INTEL_GUC_ACTION_ENTER_S_STATE, - GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + INTEL_GUC_ACTION_RESET_CLIENT, }; - /* - * If GuC communication is enabled but submission is not supported, - * we do not need to suspend the GuC. - */ - if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) + if (!intel_guc_is_ready(guc)) return 0; - /* - * The ENTER_S_STATE action queues the save/restore operation in GuC FW - * and then returns, so waiting on the H2G is not enough to guarantee - * GuC is done. When all the processing is done, GuC writes - * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll - * on that. Note that GuC does not ensure that the value in the register - * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is - * in progress so we need to take care of that ourselves as well. - */ - - intel_uncore_write(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK); - - ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); - if (ret) - return ret; - - ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK, - 0, 0, 10, &status); - if (ret) - return ret; - - if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) { - DRM_ERROR("GuC failed to change sleep state. " - "action=0x%x, err=%u\n", - action[0], status); - return -EIO; + if (intel_guc_submission_is_used(guc)) { + /* + * This H2G MMIO command tears down the GuC in two steps. First it will + * generate a G2H CTB for every active context indicating a reset. In + * practice the i915 shouldn't ever get a G2H as suspend should only be + * called when the GPU is idle. Next, it tears down the CTBs and this + * H2G MMIO command completes. + * + * Don't abort on a failure code from the GuC. Keep going and do the + * clean up in santize() and re-initialisation on resume and hopefully + * the error here won't be problematic. + */ + ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (ret) + DRM_ERROR("GuC suspend: RESET_CLIENT action failed with error %d!\n", ret); } + /* Signal that the GuC isn't running. */ + intel_guc_sanitize(guc); + return 0; } @@ -578,7 +561,12 @@ int intel_guc_suspend(struct intel_guc *guc) */ int intel_guc_resume(struct intel_guc *guc) { - /* XXX: to be implemented with submission interface rework */ + /* + * NB: This function can still be called even if GuC submission is + * disabled, e.g. if GuC is enabled for HuC authentication only. Thus, + * if any code is later added here, it must be support doing nothing + * if submission is disabled (as per intel_guc_suspend). + */ return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 59fca9748c15..16b61fe71b07 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -304,10 +304,10 @@ static int guc_submission_busy_loop(struct intel_guc* guc, return err; } -static int guc_wait_for_pending_msg(struct intel_guc *guc, - atomic_t *wait_var, - bool interruptible, - long timeout) +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout) { const int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; @@ -352,8 +352,8 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) if (unlikely(timeout < 0)) timeout = -timeout, interruptible = false; - return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, - interruptible, timeout); + return intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + interruptible, timeout); } static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); @@ -625,7 +625,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { intel_guc_to_host_event_handler(guc); #define wait_for_reset(guc, wait_var) \ - guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) + intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) do { wait_for_reset(guc, &guc->outstanding_submission_g2h); } while (!list_empty(&guc->ct.requests.incoming)); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 95df5ab06031..b9b9f0f60f91 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -27,6 +27,11 @@ void intel_guc_log_context_info(struct intel_guc *guc, struct drm_printer *p); bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout); + static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { /* XXX: GuC submission is unavailable for now */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index ab11fe731ee7..b523a8521351 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -596,14 +596,18 @@ void intel_uc_cancel_requests(struct intel_uc *uc) void intel_uc_runtime_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - int err; if (!intel_guc_is_ready(guc)) return; - err = intel_guc_suspend(guc); - if (err) - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + /* + * Wait for any outstanding CTB before tearing down communication /w the + * GuC. + */ +#define OUTSTANDING_CTB_TIMEOUT_PERIOD (HZ / 5) + intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + false, OUTSTANDING_CTB_TIMEOUT_PERIOD); + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); guc_disable_communication(guc); } @@ -612,12 +616,16 @@ void intel_uc_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; intel_wakeref_t wakeref; + int err; if (!intel_guc_is_ready(guc)) return; - with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) - intel_uc_runtime_suspend(uc); + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + } } static int __uc_resume(struct intel_uc *uc, bool enable_communication)