Message ID | 20230113011850.1463965-6-alan.previn.teres.alexis@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/pxp: Add missing cleanup steps for PXP global-teardown | expand |
On Thu, Jan 12, 2023 at 05:18:49PM -0800, Alan Previn wrote: > A driver bug was recently discovered where the security firmware was > receiving internal HW signals indicating that session key expirations > had occurred. Architecturally, the firmware was expecting a response > from the GuC to acknowledge the event with the firmware side. > However the OS was in a suspended state and GuC had been reset. > > Internal specifications actually required the driver to ensure > that all active sessions be properly cleaned up in such cases where > the system is suspended and the GuC potentially unable to respond. > > This patch adds the global teardown code in i915's suspend_prepare > code path. > > Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> > Reviewed-by: Juston Li <justonli@chromium.org> > --- > drivers/gpu/drm/i915/pxp/intel_pxp.c | 60 +++++++++++++++++--- > drivers/gpu/drm/i915/pxp/intel_pxp.h | 1 + > drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 2 +- > drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 9 ++- > drivers/gpu/drm/i915/pxp/intel_pxp_session.h | 5 ++ > 5 files changed, 64 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c > index cfc9af8b3d21..f3d9e7989eb7 100644 > --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c > @@ -270,6 +270,55 @@ static bool pxp_component_bound(struct intel_pxp *pxp) > return bound; > } > > +static int __pxp_global_teardown_locked(struct intel_pxp *pxp, bool terminate_for_cleanup) > +{ > + if (terminate_for_cleanup) { > + if (!pxp->arb_is_valid) > + return 0; > + /* > + * To ensure synchronous and coherent session teardown completion > + * in response to suspend or shutdown triggers, don't use a worker. > + */ > + intel_pxp_mark_termination_in_progress(pxp); > + intel_pxp_terminate(pxp, false); > + } else { > + if (pxp->arb_is_valid) > + return 0; > + /* > + * If we are not in final termination, and the arb-session is currently > + * inactive, we are doing a reset and restart due to some runtime event. > + * Use the worker that was designed for this. > + */ > + pxp_queue_termination(pxp); > + } I really don't see why you need 1 function for totally 2 different cases. Why not 2 functions then? > + > + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(250))) > + return -ETIMEDOUT; > + > + return 0; > +} > + > +void intel_pxp_end(struct intel_pxp *pxp) > +{ > + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; > + intel_wakeref_t wakeref; > + > + if (!intel_pxp_is_enabled(pxp)) > + return; > + > + wakeref = intel_runtime_pm_get(&i915->runtime_pm); > + > + mutex_lock(&pxp->arb_mutex); > + > + if (__pxp_global_teardown_locked(pxp, true)) > + drm_dbg(&i915->drm, "PXP end timed out\n"); > + > + mutex_unlock(&pxp->arb_mutex); > + > + intel_pxp_fini_hw(pxp); > + intel_runtime_pm_put(&i915->runtime_pm, wakeref); > +} > + > /* > * the arb session is restarted from the irq work when we receive the > * termination completion interrupt > @@ -286,16 +335,9 @@ int intel_pxp_start(struct intel_pxp *pxp) > > mutex_lock(&pxp->arb_mutex); > > - if (pxp->arb_is_valid) > - goto unlock; > - > - pxp_queue_termination(pxp); > - > - if (!wait_for_completion_timeout(&pxp->termination, > - msecs_to_jiffies(250))) { > - ret = -ETIMEDOUT; > + ret = __pxp_global_teardown_locked(pxp, false); > + if (ret) > goto unlock; > - } > > /* make sure the compiler doesn't optimize the double access */ > barrier(); > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h > index 9658d3005222..3ded0890cd27 100644 > --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h > @@ -27,6 +27,7 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); > void intel_pxp_tee_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); > > int intel_pxp_start(struct intel_pxp *pxp); > +void intel_pxp_end(struct intel_pxp *pxp); > > int intel_pxp_key_check(struct intel_pxp *pxp, > struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c > index 892d39cc61c1..e427464aa131 100644 > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c > @@ -16,7 +16,7 @@ void intel_pxp_suspend_prepare(struct intel_pxp *pxp) > if (!intel_pxp_is_enabled(pxp)) > return; > > - pxp->arb_is_valid = false; > + intel_pxp_end(pxp); > > intel_pxp_invalidate(pxp); > } > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c > index 74ed7e16e481..d8278c4002e3 100644 > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c > @@ -115,11 +115,14 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) > return ret; > } > > -static void pxp_terminate(struct intel_pxp *pxp) > +void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb) > { > int ret; > > - pxp->hw_state_invalidated = true; > + if (restart_arb) > + pxp->hw_state_invalidated = true; > + else > + pxp->hw_state_invalidated = false; o.O pxp->hw_state_invalidate = restart_arb; ? or even a better name for the restart_arb to already indicate that is the hw_state_invalidate ? > > /* > * if we fail to submit the termination there is no point in waiting for > @@ -167,7 +170,7 @@ static void pxp_session_work(struct work_struct *work) > > if (events & PXP_TERMINATION_REQUEST) { > events &= ~PXP_TERMINATION_COMPLETE; > - pxp_terminate(pxp); > + intel_pxp_terminate(pxp, true); > } > > if (events & PXP_TERMINATION_COMPLETE) > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h > index 903ac52cffa1..4f944b63b5b6 100644 > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h > @@ -12,9 +12,14 @@ struct intel_pxp; > > #ifdef CONFIG_DRM_I915_PXP > void intel_pxp_session_management_init(struct intel_pxp *pxp); > +void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb); > #else > static inline void intel_pxp_session_management_init(struct intel_pxp *pxp) > { > } > + > +static inline void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb) > +{ > +} > #endif > #endif /* __INTEL_PXP_SESSION_H__ */ > -- > 2.39.0 >
Thanks for reviewing - responses below. On Thu, 2023-01-19 at 14:35 -0500, Vivi, Rodrigo wrote: > On Thu, Jan 12, 2023 at 05:18:49PM -0800, Alan Previn wrote: > > A driver bug was recently discovered where the security firmware was > > receiving internal HW signals indicating that session key expirations > > had occurred. Architecturally, the firmware was expecting a response > > from the GuC to acknowledge the event with the firmware side. > > However the OS was in a suspended state and GuC had been reset. > > > > Internal specifications actually required the driver to ensure > > that all active sessions be properly cleaned up in such cases where > > the system is suspended and the GuC potentially unable to respond. > > > > This patch adds the global teardown code in i915's suspend_prepare > > code path. > > > > Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com> > > Reviewed-by: Juston Li <justonli@chromium.org> > > Alan: [snip] > > > > +static int __pxp_global_teardown_locked(struct intel_pxp *pxp, bool terminate_for_cleanup) > > +{ > > + if (terminate_for_cleanup) { > > + if (!pxp->arb_is_valid) > > + return 0; > > + /* > > + * To ensure synchronous and coherent session teardown completion > > + * in response to suspend or shutdown triggers, don't use a worker. > > + */ > > + intel_pxp_mark_termination_in_progress(pxp); > > + intel_pxp_terminate(pxp, false); > > + } else { > > + if (pxp->arb_is_valid) > > + return 0; > > + /* > > + * If we are not in final termination, and the arb-session is currently > > + * inactive, we are doing a reset and restart due to some runtime event. > > + * Use the worker that was designed for this. > > + */ > > + pxp_queue_termination(pxp); > > + } > > I really don't see why you need 1 function for totally 2 different cases. > Why not 2 functions then? > Alan: I don't see why not ;) My goal with above method was was to concentrate the teardown steps in a single function so if future changes are required, we can keep it in this single function entry point. For now i will assume that was a nack so i shall split it on next rev. > > + > > + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(250))) > > + return -ETIMEDOUT; > > + > > + return 0; > > +} > > + > > Alan: [snip] > > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h > > index 9658d3005222..3ded0890cd27 100644 > > --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h > > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h > > @@ -27,6 +27,7 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); > > void intel_pxp_tee_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); > > > > int intel_pxp_start(struct intel_pxp *pxp); > > +void intel_pxp_end(struct intel_pxp *pxp); > > > > int intel_pxp_key_check(struct intel_pxp *pxp, > > struct drm_i915_gem_object *obj, > > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c > > index 892d39cc61c1..e427464aa131 100644 > > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c > > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c > > @@ -16,7 +16,7 @@ void intel_pxp_suspend_prepare(struct intel_pxp *pxp) > > if (!intel_pxp_is_enabled(pxp)) > > return; > > > > - pxp->arb_is_valid = false; > > + intel_pxp_end(pxp); > > > > intel_pxp_invalidate(pxp); > > } > > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c > > index 74ed7e16e481..d8278c4002e3 100644 > > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c > > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c > > @@ -115,11 +115,14 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) > > return ret; > > } > > > > -static void pxp_terminate(struct intel_pxp *pxp) > > +void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb) > > { > > int ret; > > > > - pxp->hw_state_invalidated = true; > > + if (restart_arb) > > + pxp->hw_state_invalidated = true; > > + else > > + pxp->hw_state_invalidated = false; > > o.O > > pxp->hw_state_invalidate = restart_arb; Alan: duhhhh... (my bad) > > ? > > or even a better name for the restart_arb to already indicate that is > the hw_state_invalidate ? > Alan: hmmm... you something mean like: hw_state_invalidated = post_invalidation_needs_restart; Alan: actually i wish we couold redo "hw_state_invalidate" which is currently defined as a boolean that only means one thing -> teardown and restart. It would be more scalable if we can replace it with a bitmask of "current + (infered)pending state" with a documented state-machine with a fixed set of state-transition paths. INACTIVE----> STARTING----> ACTIVE ----> TEARDOWN_RESTART--->| ^ ^ | | | | | V | |<--------------)----------<---------------| | | | |-----> TEARDOWN_END---->--| | V |<-----------------<----------------<------------------| However, I didn't do this initially because it would mean a wider set of changes that might take more time to test and review (downstream customers impacts) but for only 5 states but where only 2 of em are impacted by this change. For now i shall go with the simpler name change as you hint above - unless you request this instead. Alan: [snip]
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index cfc9af8b3d21..f3d9e7989eb7 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -270,6 +270,55 @@ static bool pxp_component_bound(struct intel_pxp *pxp) return bound; } +static int __pxp_global_teardown_locked(struct intel_pxp *pxp, bool terminate_for_cleanup) +{ + if (terminate_for_cleanup) { + if (!pxp->arb_is_valid) + return 0; + /* + * To ensure synchronous and coherent session teardown completion + * in response to suspend or shutdown triggers, don't use a worker. + */ + intel_pxp_mark_termination_in_progress(pxp); + intel_pxp_terminate(pxp, false); + } else { + if (pxp->arb_is_valid) + return 0; + /* + * If we are not in final termination, and the arb-session is currently + * inactive, we are doing a reset and restart due to some runtime event. + * Use the worker that was designed for this. + */ + pxp_queue_termination(pxp); + } + + if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(250))) + return -ETIMEDOUT; + + return 0; +} + +void intel_pxp_end(struct intel_pxp *pxp) +{ + struct drm_i915_private *i915 = pxp->ctrl_gt->i915; + intel_wakeref_t wakeref; + + if (!intel_pxp_is_enabled(pxp)) + return; + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + mutex_lock(&pxp->arb_mutex); + + if (__pxp_global_teardown_locked(pxp, true)) + drm_dbg(&i915->drm, "PXP end timed out\n"); + + mutex_unlock(&pxp->arb_mutex); + + intel_pxp_fini_hw(pxp); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); +} + /* * the arb session is restarted from the irq work when we receive the * termination completion interrupt @@ -286,16 +335,9 @@ int intel_pxp_start(struct intel_pxp *pxp) mutex_lock(&pxp->arb_mutex); - if (pxp->arb_is_valid) - goto unlock; - - pxp_queue_termination(pxp); - - if (!wait_for_completion_timeout(&pxp->termination, - msecs_to_jiffies(250))) { - ret = -ETIMEDOUT; + ret = __pxp_global_teardown_locked(pxp, false); + if (ret) goto unlock; - } /* make sure the compiler doesn't optimize the double access */ barrier(); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h index 9658d3005222..3ded0890cd27 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h @@ -27,6 +27,7 @@ void intel_pxp_mark_termination_in_progress(struct intel_pxp *pxp); void intel_pxp_tee_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); int intel_pxp_start(struct intel_pxp *pxp); +void intel_pxp_end(struct intel_pxp *pxp); int intel_pxp_key_check(struct intel_pxp *pxp, struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c index 892d39cc61c1..e427464aa131 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c @@ -16,7 +16,7 @@ void intel_pxp_suspend_prepare(struct intel_pxp *pxp) if (!intel_pxp_is_enabled(pxp)) return; - pxp->arb_is_valid = false; + intel_pxp_end(pxp); intel_pxp_invalidate(pxp); } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c index 74ed7e16e481..d8278c4002e3 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c @@ -115,11 +115,14 @@ static int pxp_terminate_arb_session_and_global(struct intel_pxp *pxp) return ret; } -static void pxp_terminate(struct intel_pxp *pxp) +void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb) { int ret; - pxp->hw_state_invalidated = true; + if (restart_arb) + pxp->hw_state_invalidated = true; + else + pxp->hw_state_invalidated = false; /* * if we fail to submit the termination there is no point in waiting for @@ -167,7 +170,7 @@ static void pxp_session_work(struct work_struct *work) if (events & PXP_TERMINATION_REQUEST) { events &= ~PXP_TERMINATION_COMPLETE; - pxp_terminate(pxp); + intel_pxp_terminate(pxp, true); } if (events & PXP_TERMINATION_COMPLETE) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h index 903ac52cffa1..4f944b63b5b6 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h @@ -12,9 +12,14 @@ struct intel_pxp; #ifdef CONFIG_DRM_I915_PXP void intel_pxp_session_management_init(struct intel_pxp *pxp); +void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb); #else static inline void intel_pxp_session_management_init(struct intel_pxp *pxp) { } + +static inline void intel_pxp_terminate(struct intel_pxp *pxp, bool restart_arb) +{ +} #endif #endif /* __INTEL_PXP_SESSION_H__ */