diff mbox series

[v2,4/6] drm/i915: Disable PSR when a PSR aux error happen

Message ID 20181011004124.4110-4-jose.souza@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/6] drm/i915/psr: Use intel_psr_exit() in intel_psr_disable_source() | expand

Commit Message

Souza, Jose Oct. 11, 2018, 12:41 a.m. UTC
While PSR is active hardware will do aux transactions by it self to
wakeup sink to receive a new frame when necessary. If that
transaction is not acked by sink, hardware will trigger this
interruption.

So let's disable PSR as it is a hint that there is problem with this
sink.

The removed FIXME was asking to manually train the link but we don't
need to do that as by spec sink should do a short pulse when it is
out of sync with source, we just need to make sure it is awaken and
the SDP header with PSR disable will trigger this condition.

Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_psr.c | 39 ++++++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 4 deletions(-)

Comments

Dhinakaran Pandiyan Oct. 19, 2018, 11:14 p.m. UTC | #1
On Wed, 2018-10-10 at 17:41 -0700, José Roberto de Souza wrote:
> While PSR is active hardware will do aux transactions by it self to
> wakeup sink to receive a new frame when necessary. If that
> transaction is not acked by sink, hardware will trigger this
> interruption.
> 
> So let's disable PSR as it is a hint that there is problem with this
> sink.
> 
> The removed FIXME was asking to manually train the link but we don't
> need to do that as by spec sink should do a short pulse when it is
> out of sync with source, we just need to make sure it is awaken and
> the SDP header with PSR disable will trigger this condition.
> 
> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
> Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  1 +
>  drivers/gpu/drm/i915/intel_psr.c | 39 ++++++++++++++++++++++++++++
> ----
>  2 files changed, 36 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 3017ef037fed..e8ba00dd2c51 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -638,6 +638,7 @@ struct i915_psr {
>  	u8 sink_sync_latency;
>  	ktime_t last_entry_attempt;
>  	ktime_t last_exit;
> +	u32 irq_aux_error;
>  };
>  
>  enum intel_pch {
> diff --git a/drivers/gpu/drm/i915/intel_psr.c
> b/drivers/gpu/drm/i915/intel_psr.c
> index 70d4e26e17b5..ad09130cb4ad 100644
> --- a/drivers/gpu/drm/i915/intel_psr.c
> +++ b/drivers/gpu/drm/i915/intel_psr.c
> @@ -159,10 +159,16 @@ void intel_psr_irq_handler(struct
> drm_i915_private *dev_priv, u32 psr_iir)
>  			       BIT(TRANSCODER_C);
>  
>  	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder,
> transcoders) {
> -		/* FIXME: Exit PSR and link train manually when this
> happens. */
> -		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
> -			DRM_DEBUG_KMS("[transcoder %s] PSR aux
> error\n",
> -				      transcoder_name(cpu_transcoder));
> +		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) {
> +			DRM_WARN("[transcoder %s] PSR aux error\n",
> +				 transcoder_name(cpu_transcoder));
Downgrade this to debug since the error is handled in the driver? 

> +
> +			spin_lock(&dev_priv->irq_lock);
> +			dev_priv->psr.irq_aux_error |=
> BIT(cpu_transcoder);
Just ignore the non eDP bits, I don't think we want to do anything with
the information that some other bit was set.

> +			spin_unlock(&dev_priv->irq_lock);
> +
> +			schedule_work(&dev_priv->psr.work);
> +		}
>  
>  		if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
>  			dev_priv->psr.last_entry_attempt = time_ns;
> @@ -893,11 +899,36 @@ int intel_psr_set_debugfs_mode(struct
> drm_i915_private *dev_priv,
>  	return ret;
>  }
>  
> +static void intel_psr_handle_irq(struct drm_i915_private *dev_priv)
> +{
> +	struct i915_psr *psr = &dev_priv->psr;
> +	u32 irq_aux_error;
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	irq_aux_error = psr->irq_aux_error;
> +	psr->irq_aux_error = 0;
A subsequent modeset will enable PSR again. I don't expect a modeset to
to be able to fix an AUX wake up failure, so might as well disable it
for good.

> +	spin_unlock_irq(&dev_priv->irq_lock);
> +
> +	/* right now PSR is only enabled in eDP */
"right now" implies that PSR could be enabled for non eDP ports, but
that's not the case.


> +	WARN_ON(irq_aux_error & ~BIT(TRANSCODER_EDP));
This should go away if you ignore non-EDP bits, and a stack trace isn't
particularly useful anyway.

> +
> +	mutex_lock(&psr->lock);
Is this sufficient? Don't we have to serialize against ongoing modesets
like we do for debugfs enable/disable. The disable sequence in bspec
calls out a running pipe and port as pre-requisites.

Ccing Ville and Maarten to get their opinion on this.
 
> +
> +	intel_psr_disable_locked(psr->dp);
> +	/* let's make sure that sink is awaken */
> +	drm_dp_dpcd_writeb(&psr->dp->aux, DP_SET_POWER,
> DP_SET_POWER_D0);
Given that the hardware initiated AUX write failed, I would recommend
reading back the sink PSR status to make sure disable worked.

> +
> +	mutex_unlock(&dev_priv->psr.lock);
> +}
> +
>  static void intel_psr_work(struct work_struct *work)
>  {
>  	struct drm_i915_private *dev_priv =
>  		container_of(work, typeof(*dev_priv), psr.work);
>  
> +	if (READ_ONCE(dev_priv->psr.irq_aux_error))
> +		intel_psr_handle_irq(dev_priv);
If psr_work() was already executing and past this check,
schedule_work() in intel_psr_irq_handler will return a failure and
disable PSR would now depend on getting an invalidate and flush
operation. We should disable PSR without any dependency on flush or
invalidate.


> +
>  	mutex_lock(&dev_priv->psr.lock);
>  
>  	if (!dev_priv->psr.enabled)
Souza, Jose Oct. 20, 2018, 12:12 a.m. UTC | #2
On Fri, 2018-10-19 at 16:14 -0700, Dhinakaran Pandiyan wrote:
> On Wed, 2018-10-10 at 17:41 -0700, José Roberto de Souza wrote:
> > While PSR is active hardware will do aux transactions by it self to
> > wakeup sink to receive a new frame when necessary. If that
> > transaction is not acked by sink, hardware will trigger this
> > interruption.
> > 
> > So let's disable PSR as it is a hint that there is problem with
> > this
> > sink.
> > 
> > The removed FIXME was asking to manually train the link but we
> > don't
> > need to do that as by spec sink should do a short pulse when it is
> > out of sync with source, we just need to make sure it is awaken and
> > the SDP header with PSR disable will trigger this condition.
> > 
> > Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
> > Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |  1 +
> >  drivers/gpu/drm/i915/intel_psr.c | 39 ++++++++++++++++++++++++++++
> > ----
> >  2 files changed, 36 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index 3017ef037fed..e8ba00dd2c51 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -638,6 +638,7 @@ struct i915_psr {
> >  	u8 sink_sync_latency;
> >  	ktime_t last_entry_attempt;
> >  	ktime_t last_exit;
> > +	u32 irq_aux_error;
> >  };
> >  
> >  enum intel_pch {
> > diff --git a/drivers/gpu/drm/i915/intel_psr.c
> > b/drivers/gpu/drm/i915/intel_psr.c
> > index 70d4e26e17b5..ad09130cb4ad 100644
> > --- a/drivers/gpu/drm/i915/intel_psr.c
> > +++ b/drivers/gpu/drm/i915/intel_psr.c
> > @@ -159,10 +159,16 @@ void intel_psr_irq_handler(struct
> > drm_i915_private *dev_priv, u32 psr_iir)
> >  			       BIT(TRANSCODER_C);
> >  
> >  	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder,
> > transcoders) {
> > -		/* FIXME: Exit PSR and link train manually when this
> > happens. */
> > -		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
> > -			DRM_DEBUG_KMS("[transcoder %s] PSR aux
> > error\n",
> > -				      transcoder_name(cpu_transcoder));
> > +		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) {
> > +			DRM_WARN("[transcoder %s] PSR aux error\n",
> > +				 transcoder_name(cpu_transcoder));
> Downgrade this to debug since the error is handled in the driver? 

I think is better keep as DRM_WARN so it is shown in regular kernel
logs this way if a user opens a bug complaning why PSR is disabled we
can check that is because of PSR aux error.

> 
> > +
> > +			spin_lock(&dev_priv->irq_lock);
> > +			dev_priv->psr.irq_aux_error |=
> > BIT(cpu_transcoder);
> Just ignore the non eDP bits, I don't think we want to do anything
> with
> the information that some other bit was set.
> 
> > +			spin_unlock(&dev_priv->irq_lock);
> > +
> > +			schedule_work(&dev_priv->psr.work);
> > +		}
> >  
> >  		if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
> >  			dev_priv->psr.last_entry_attempt = time_ns;
> > @@ -893,11 +899,36 @@ int intel_psr_set_debugfs_mode(struct
> > drm_i915_private *dev_priv,
> >  	return ret;
> >  }
> >  
> > +static void intel_psr_handle_irq(struct drm_i915_private
> > *dev_priv)
> > +{
> > +	struct i915_psr *psr = &dev_priv->psr;
> > +	u32 irq_aux_error;
> > +
> > +	spin_lock_irq(&dev_priv->irq_lock);
> > +	irq_aux_error = psr->irq_aux_error;
> > +	psr->irq_aux_error = 0;
> A subsequent modeset will enable PSR again. I don't expect a modeset
> to
> to be able to fix an AUX wake up failure, so might as well disable it
> for good.

Add another field to do that or set sink_support=false? I guess PSR
short pulses errors should also disable it good too?

> 
> > +	spin_unlock_irq(&dev_priv->irq_lock);
> > +
> > +	/* right now PSR is only enabled in eDP */
> "right now" implies that PSR could be enabled for non eDP ports, but
> that's not the case.
> 
> 
> > +	WARN_ON(irq_aux_error & ~BIT(TRANSCODER_EDP));
> This should go away if you ignore non-EDP bits, and a stack trace
> isn't
> particularly useful anyway.

Okay I will remove this handlings for other transcoders.

> 
> > +
> > +	mutex_lock(&psr->lock);
> Is this sufficient? Don't we have to serialize against ongoing
> modesets
> like we do for debugfs enable/disable. The disable sequence in bspec
> calls out a running pipe and port as pre-requisites.

HW will only send a aux transaction when exiting PSR, in this cases
pipe will always be running:
- exiting because of changes in the screen
- exiting because pipe will be disabled
- exiting because of PSR error

> 
> Ccing Ville and Maarten to get their opinion on this.
>  
> > +
> > +	intel_psr_disable_locked(psr->dp);
> > +	/* let's make sure that sink is awaken */
> > +	drm_dp_dpcd_writeb(&psr->dp->aux, DP_SET_POWER,
> > DP_SET_POWER_D0);
> Given that the hardware initiated AUX write failed, I would recommend
> reading back the sink PSR status to make sure disable worked.

And in case of reading error or the value is not set try again? This
could fall into a infite loop. intel_dp_aux_xfer() already try to do
the transaction 5 times I guess if if failed the sink crashed and there
is no recover.

> 
> > +
> > +	mutex_unlock(&dev_priv->psr.lock);
> > +}
> > +
> >  static void intel_psr_work(struct work_struct *work)
> >  {
> >  	struct drm_i915_private *dev_priv =
> >  		container_of(work, typeof(*dev_priv), psr.work);
> >  
> > +	if (READ_ONCE(dev_priv->psr.irq_aux_error))
> > +		intel_psr_handle_irq(dev_priv);
> If psr_work() was already executing and past this check,
> schedule_work() in intel_psr_irq_handler will return a failure and
> disable PSR would now depend on getting an invalidate and flush
> operation. We should disable PSR without any dependency on flush or
> invalidate.

For what I checked in the schedule_work() code if the work is running
and there is a call to schedule_work() it will be schedule again.

> 
> 
> > +
> >  	mutex_lock(&dev_priv->psr.lock);
> >  
> >  	if (!dev_priv->psr.enabled)
Dhinakaran Pandiyan Oct. 24, 2018, 10:08 p.m. UTC | #3
On Sat, 2018-10-20 at 00:12 +0000, Souza, Jose wrote:
> On Fri, 2018-10-19 at 16:14 -0700, Dhinakaran Pandiyan wrote:
> > On Wed, 2018-10-10 at 17:41 -0700, José Roberto de Souza wrote:
> > > While PSR is active hardware will do aux transactions by it self
> > > to
> > > wakeup sink to receive a new frame when necessary. If that
> > > transaction is not acked by sink, hardware will trigger this
> > > interruption.
> > > 
> > > So let's disable PSR as it is a hint that there is problem with
> > > this
> > > sink.
> > > 
> > > The removed FIXME was asking to manually train the link but we
> > > don't
> > > need to do that as by spec sink should do a short pulse when it
> > > is
> > > out of sync with source, we just need to make sure it is awaken
> > > and
> > > the SDP header with PSR disable will trigger this condition.
> > > 
> > > Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
> > > Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h  |  1 +
> > >  drivers/gpu/drm/i915/intel_psr.c | 39
> > > ++++++++++++++++++++++++++++
> > > ----
> > >  2 files changed, 36 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > b/drivers/gpu/drm/i915/i915_drv.h
> > > index 3017ef037fed..e8ba00dd2c51 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -638,6 +638,7 @@ struct i915_psr {
> > >  	u8 sink_sync_latency;
> > >  	ktime_t last_entry_attempt;
> > >  	ktime_t last_exit;
> > > +	u32 irq_aux_error;
> > >  };
> > >  
> > >  enum intel_pch {
> > > diff --git a/drivers/gpu/drm/i915/intel_psr.c
> > > b/drivers/gpu/drm/i915/intel_psr.c
> > > index 70d4e26e17b5..ad09130cb4ad 100644
> > > --- a/drivers/gpu/drm/i915/intel_psr.c
> > > +++ b/drivers/gpu/drm/i915/intel_psr.c
> > > @@ -159,10 +159,16 @@ void intel_psr_irq_handler(struct
> > > drm_i915_private *dev_priv, u32 psr_iir)
> > >  			       BIT(TRANSCODER_C);
> > >  
> > >  	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder,
> > > transcoders) {
> > > -		/* FIXME: Exit PSR and link train manually when this
> > > happens. */
> > > -		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
> > > -			DRM_DEBUG_KMS("[transcoder %s] PSR aux
> > > error\n",
> > > -				      transcoder_name(cpu_transcoder));
> > > +		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) {
> > > +			DRM_WARN("[transcoder %s] PSR aux error\n",
> > > +				 transcoder_name(cpu_transcoder));
> > 
> > Downgrade this to debug since the error is handled in the driver? 
> 
> I think is better keep as DRM_WARN so it is shown in regular kernel
> logs this way if a user opens a bug complaning why PSR is disabled we
> can check that is because of PSR aux error.
> 
> > 
> > > +
> > > +			spin_lock(&dev_priv->irq_lock);
This lock isn't needed either. How about setting a bool only if the
transcoder is eDP and then scheduling a disable.

> > > +			dev_priv->psr.irq_aux_error |=
> > > BIT(cpu_transcoder);
> > 
> > Just ignore the non eDP bits, I don't think we want to do anything
> > with
> > the information that some other bit was set.
> > 
> > > +			spin_unlock(&dev_priv->irq_lock);
> > > +
> > > +			schedule_work(&dev_priv->psr.work);
> > > +		}
> > >  
> > >  		if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
> > >  			dev_priv->psr.last_entry_attempt = time_ns;
> > > @@ -893,11 +899,36 @@ int intel_psr_set_debugfs_mode(struct
> > > drm_i915_private *dev_priv,
> > >  	return ret;
> > >  }
> > >  
> > > +static void intel_psr_handle_irq(struct drm_i915_private
> > > *dev_priv)
> > > +{
> > > +	struct i915_psr *psr = &dev_priv->psr;
> > > +	u32 irq_aux_error;
> > > +
> > > +	spin_lock_irq(&dev_priv->irq_lock);
> > > +	irq_aux_error = psr->irq_aux_error;
> > > +	psr->irq_aux_error = 0;
> > 
> > A subsequent modeset will enable PSR again. I don't expect a
> > modeset
> > to
> > to be able to fix an AUX wake up failure, so might as well disable
> > it
> > for good.
> 
> Add another field to do that or set sink_support=false? I guess PSR
> short pulses errors should also disable it good too?
Reusing sink_support will get confusing, particularly because it is
exposed in debugfs.

> 
> > 
> > > +	spin_unlock_irq(&dev_priv->irq_lock);
> > > +
> > > +	/* right now PSR is only enabled in eDP */
> > 
> > "right now" implies that PSR could be enabled for non eDP ports,
> > but
> > that's not the case.
> > 
> > 
> > > +	WARN_ON(irq_aux_error & ~BIT(TRANSCODER_EDP));
> > 
> > This should go away if you ignore non-EDP bits, and a stack trace
> > isn't
> > particularly useful anyway.
> 
> Okay I will remove this handlings for other transcoders.
> 
> > 
> > > +
> > > +	mutex_lock(&psr->lock);
> > 
> > Is this sufficient? Don't we have to serialize against ongoing
> > modesets
> > like we do for debugfs enable/disable. The disable sequence in
> > bspec
> > calls out a running pipe and port as pre-requisites.
> 
> HW will only send a aux transaction when exiting PSR, in this cases
> pipe will always be running:
Sure, but psr_work() can run after the pipe is disabled.

However, psr.enabled should take care of not writing to PSR_CTL if the
pipe was already disabled. The question now is if we were in the middle
of a modeset, disabling PSR here would have no effect if encoders are
enabled after this point.


> - exiting because of changes in the screen
> - exiting because pipe will be disabled
> - exiting because of PSR error
> 
> > 
> > Ccing Ville and Maarten to get their opinion on this.
> >  
> > > +
> > > +	intel_psr_disable_locked(psr->dp);
> > > +	/* let's make sure that sink is awaken */
> > > +	drm_dp_dpcd_writeb(&psr->dp->aux, DP_SET_POWER,
> > > DP_SET_POWER_D0);
> > 
> > Given that the hardware initiated AUX write failed, I would
> > recommend
> > reading back the sink PSR status to make sure disable worked.
> 
> And in case of reading error or the value is not set try again? This
> could fall into a infite loop. intel_dp_aux_xfer() already try to do
> the transaction 5 times I guess if if failed the sink crashed and
> there
> is no recover.
> 
I was thinking of printing an error here so that we know error recovery
did not work.


> > 
> > > +
> > > +	mutex_unlock(&dev_priv->psr.lock);
> > > +}
> > > +
> > >  static void intel_psr_work(struct work_struct *work)
> > >  {
> > >  	struct drm_i915_private *dev_priv =
> > >  		container_of(work, typeof(*dev_priv), psr.work);
> > >  
> > > +	if (READ_ONCE(dev_priv->psr.irq_aux_error))
> > > +		intel_psr_handle_irq(dev_priv);

Why not create a new work item for disable? I don't see why
intel_psr_work() needs to be reused for a completely different reason.

> > 
> > If psr_work() was already executing and past this check,
> > schedule_work() in intel_psr_irq_handler will return a failure and
> > disable PSR would now depend on getting an invalidate and flush
> > operation. We should disable PSR without any dependency on flush or
> > invalidate.
> 
> For what I checked in the schedule_work() code if the work is running
> and there is a call to schedule_work() it will be schedule again.
> 
From the documentation, 
/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
 * Returns %false if @work was already on the kernel-global workqueue
and
 * %true otherwise.
 *

> > 
> > 
> > > +
> > >  	mutex_lock(&dev_priv->psr.lock);
> > >  
> > >  	if (!dev_priv->psr.enabled)
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Dhinakaran Pandiyan Oct. 24, 2018, 11:22 p.m. UTC | #4
On Wed, 2018-10-24 at 15:08 -0700, Dhinakaran Pandiyan wrote:
> On Sat, 2018-10-20 at 00:12 +0000, Souza, Jose wrote:
> > On Fri, 2018-10-19 at 16:14 -0700, Dhinakaran Pandiyan wrote:
> > > On Wed, 2018-10-10 at 17:41 -0700, José Roberto de Souza wrote:
> > > > While PSR is active hardware will do aux transactions by it
> > > > self
> > > > to
> > > > wakeup sink to receive a new frame when necessary. If that
> > > > transaction is not acked by sink, hardware will trigger this
> > > > interruption.
> > > > 
> > > > So let's disable PSR as it is a hint that there is problem with
> > > > this
> > > > sink.
> > > > 
> > > > The removed FIXME was asking to manually train the link but we
> > > > don't
> > > > need to do that as by spec sink should do a short pulse when it
> > > > is
> > > > out of sync with source, we just need to make sure it is awaken
> > > > and
> > > > the SDP header with PSR disable will trigger this condition.
> > > > 
> > > > Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
> > > > Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.h  |  1 +
> > > >  drivers/gpu/drm/i915/intel_psr.c | 39
> > > > ++++++++++++++++++++++++++++
> > > > ----
> > > >  2 files changed, 36 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > b/drivers/gpu/drm/i915/i915_drv.h
> > > > index 3017ef037fed..e8ba00dd2c51 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -638,6 +638,7 @@ struct i915_psr {
> > > >  	u8 sink_sync_latency;
> > > >  	ktime_t last_entry_attempt;
> > > >  	ktime_t last_exit;
> > > > +	u32 irq_aux_error;
> > > >  };
> > > >  
> > > >  enum intel_pch {
> > > > diff --git a/drivers/gpu/drm/i915/intel_psr.c
> > > > b/drivers/gpu/drm/i915/intel_psr.c
> > > > index 70d4e26e17b5..ad09130cb4ad 100644
> > > > --- a/drivers/gpu/drm/i915/intel_psr.c
> > > > +++ b/drivers/gpu/drm/i915/intel_psr.c
> > > > @@ -159,10 +159,16 @@ void intel_psr_irq_handler(struct
> > > > drm_i915_private *dev_priv, u32 psr_iir)
> > > >  			       BIT(TRANSCODER_C);
> > > >  
> > > >  	for_each_cpu_transcoder_masked(dev_priv,
> > > > cpu_transcoder,
> > > > transcoders) {
> > > > -		/* FIXME: Exit PSR and link train manually when
> > > > this
> > > > happens. */
> > > > -		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
> > > > -			DRM_DEBUG_KMS("[transcoder %s] PSR aux
> > > > error\n",
> > > > -				      transcoder_name(cpu_trans
> > > > coder));
> > > > +		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) {
> > > > +			DRM_WARN("[transcoder %s] PSR aux
> > > > error\n",
> > > > +				 transcoder_name(cpu_transcoder
> > > > ));
> > > 
> > > Downgrade this to debug since the error is handled in the
> > > driver? 
> > 
> > I think is better keep as DRM_WARN so it is shown in regular kernel
> > logs this way if a user opens a bug complaning why PSR is disabled
> > we
> > can check that is because of PSR aux error.
> > 
> > > 
> > > > +
> > > > +			spin_lock(&dev_priv->irq_lock);
> 
> This lock isn't needed either. How about setting a bool only if the
> transcoder is eDP and then scheduling a disable.
> 
> > > > +			dev_priv->psr.irq_aux_error |=
> > > > BIT(cpu_transcoder);
> > > 
> > > Just ignore the non eDP bits, I don't think we want to do
> > > anything
> > > with
> > > the information that some other bit was set.
> > > 
> > > > +			spin_unlock(&dev_priv->irq_lock);
> > > > +
> > > > +			schedule_work(&dev_priv->psr.work);
> > > > +		}
> > > >  
> > > >  		if (psr_iir &
> > > > EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
> > > >  			dev_priv->psr.last_entry_attempt =
> > > > time_ns;
> > > > @@ -893,11 +899,36 @@ int intel_psr_set_debugfs_mode(struct
> > > > drm_i915_private *dev_priv,
> > > >  	return ret;
> > > >  }
> > > >  
> > > > +static void intel_psr_handle_irq(struct drm_i915_private
> > > > *dev_priv)
> > > > +{
> > > > +	struct i915_psr *psr = &dev_priv->psr;
> > > > +	u32 irq_aux_error;
> > > > +
> > > > +	spin_lock_irq(&dev_priv->irq_lock);
> > > > +	irq_aux_error = psr->irq_aux_error;
> > > > +	psr->irq_aux_error = 0;
> > > 
> > > A subsequent modeset will enable PSR again. I don't expect a
> > > modeset
> > > to
> > > to be able to fix an AUX wake up failure, so might as well
> > > disable
> > > it
> > > for good.
> > 
> > Add another field to do that or set sink_support=false? I guess PSR
> > short pulses errors should also disable it good too?
> 
> Reusing sink_support will get confusing, particularly because it is
> exposed in debugfs.
> 
> > 
> > > 
> > > > +	spin_unlock_irq(&dev_priv->irq_lock);
> > > > +
> > > > +	/* right now PSR is only enabled in eDP */
> > > 
> > > "right now" implies that PSR could be enabled for non eDP ports,
> > > but
> > > that's not the case.
> > > 
> > > 
> > > > +	WARN_ON(irq_aux_error & ~BIT(TRANSCODER_EDP));
> > > 
> > > This should go away if you ignore non-EDP bits, and a stack trace
> > > isn't
> > > particularly useful anyway.
> > 
> > Okay I will remove this handlings for other transcoders.
> > 
> > > 
> > > > +
> > > > +	mutex_lock(&psr->lock);
> > > 
> > > Is this sufficient? Don't we have to serialize against ongoing
> > > modesets
> > > like we do for debugfs enable/disable. The disable sequence in
> > > bspec
> > > calls out a running pipe and port as pre-requisites.
> > 
> > HW will only send a aux transaction when exiting PSR, in this cases
> > pipe will always be running:
> 
> Sure, but psr_work() can run after the pipe is disabled.
> 
> However, psr.enabled should take care of not writing to PSR_CTL if
> the
> pipe was already disabled. The question now is if we were in the
> middle
> of a modeset, disabling PSR here would have no effect if encoders are
> enabled after this point.
> 
Another issue is the wait for idle under psr.lock, not a good idea to
have modesets waiting for that lock.

> 
> > - exiting because of changes in the screen
> > - exiting because pipe will be disabled
> > - exiting because of PSR error
> > 
> > > 
> > > Ccing Ville and Maarten to get their opinion on this.
> > >  
> > > > +
> > > > +	intel_psr_disable_locked(psr->dp);
> > > > +	/* let's make sure that sink is awaken */
> > > > +	drm_dp_dpcd_writeb(&psr->dp->aux, DP_SET_POWER,
> > > > DP_SET_POWER_D0);
> > > 
> > > Given that the hardware initiated AUX write failed, I would
> > > recommend
> > > reading back the sink PSR status to make sure disable worked.
> > 
> > And in case of reading error or the value is not set try again?
> > This
> > could fall into a infite loop. intel_dp_aux_xfer() already try to
> > do
> > the transaction 5 times I guess if if failed the sink crashed and
> > there
> > is no recover.
> > 
> 
> I was thinking of printing an error here so that we know error
> recovery
> did not work.
> 
> 
> > > 
> > > > +
> > > > +	mutex_unlock(&dev_priv->psr.lock);
> > > > +}
> > > > +
> > > >  static void intel_psr_work(struct work_struct *work)
> > > >  {
> > > >  	struct drm_i915_private *dev_priv =
> > > >  		container_of(work, typeof(*dev_priv),
> > > > psr.work);
> > > >  
> > > > +	if (READ_ONCE(dev_priv->psr.irq_aux_error))
> > > > +		intel_psr_handle_irq(dev_priv);
> 
> Why not create a new work item for disable? I don't see why
> intel_psr_work() needs to be reused for a completely different
> reason.
> 
> > > 
> > > If psr_work() was already executing and past this check,
> > > schedule_work() in intel_psr_irq_handler will return a failure
> > > and
> > > disable PSR would now depend on getting an invalidate and flush
> > > operation. We should disable PSR without any dependency on flush
> > > or
> > > invalidate.
> > 
> > For what I checked in the schedule_work() code if the work is
> > running
> > and there is a call to schedule_work() it will be schedule again.
> > 
> 
> From the documentation, 
> /**
>  * schedule_work - put work task in global workqueue
>  * @work: job to be done
>  *
>  * Returns %false if @work was already on the kernel-global workqueue
> and
>  * %true otherwise.
>  *
> 
> > > 
> > > 
> > > > +
> > > >  	mutex_lock(&dev_priv->psr.lock);
> > > >  
> > > >  	if (!dev_priv->psr.enabled)
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Souza, Jose Oct. 24, 2018, 11:31 p.m. UTC | #5
On Wed, 2018-10-24 at 16:22 -0700, Dhinakaran Pandiyan wrote:
> On Wed, 2018-10-24 at 15:08 -0700, Dhinakaran Pandiyan wrote:
> > On Sat, 2018-10-20 at 00:12 +0000, Souza, Jose wrote:
> > > On Fri, 2018-10-19 at 16:14 -0700, Dhinakaran Pandiyan wrote:
> > > > On Wed, 2018-10-10 at 17:41 -0700, José Roberto de Souza wrote:
> > > > > While PSR is active hardware will do aux transactions by it
> > > > > self
> > > > > to
> > > > > wakeup sink to receive a new frame when necessary. If that
> > > > > transaction is not acked by sink, hardware will trigger this
> > > > > interruption.
> > > > > 
> > > > > So let's disable PSR as it is a hint that there is problem
> > > > > with
> > > > > this
> > > > > sink.
> > > > > 
> > > > > The removed FIXME was asking to manually train the link but
> > > > > we
> > > > > don't
> > > > > need to do that as by spec sink should do a short pulse when
> > > > > it
> > > > > is
> > > > > out of sync with source, we just need to make sure it is
> > > > > awaken
> > > > > and
> > > > > the SDP header with PSR disable will trigger this condition.
> > > > > 
> > > > > Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
> > > > > Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/i915_drv.h  |  1 +
> > > > >  drivers/gpu/drm/i915/intel_psr.c | 39
> > > > > ++++++++++++++++++++++++++++
> > > > > ----
> > > > >  2 files changed, 36 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > b/drivers/gpu/drm/i915/i915_drv.h
> > > > > index 3017ef037fed..e8ba00dd2c51 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > @@ -638,6 +638,7 @@ struct i915_psr {
> > > > >  	u8 sink_sync_latency;
> > > > >  	ktime_t last_entry_attempt;
> > > > >  	ktime_t last_exit;
> > > > > +	u32 irq_aux_error;
> > > > >  };
> > > > >  
> > > > >  enum intel_pch {
> > > > > diff --git a/drivers/gpu/drm/i915/intel_psr.c
> > > > > b/drivers/gpu/drm/i915/intel_psr.c
> > > > > index 70d4e26e17b5..ad09130cb4ad 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_psr.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_psr.c
> > > > > @@ -159,10 +159,16 @@ void intel_psr_irq_handler(struct
> > > > > drm_i915_private *dev_priv, u32 psr_iir)
> > > > >  			       BIT(TRANSCODER_C);
> > > > >  
> > > > >  	for_each_cpu_transcoder_masked(dev_priv,
> > > > > cpu_transcoder,
> > > > > transcoders) {
> > > > > -		/* FIXME: Exit PSR and link train manually when
> > > > > this
> > > > > happens. */
> > > > > -		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
> > > > > -			DRM_DEBUG_KMS("[transcoder %s] PSR aux
> > > > > error\n",
> > > > > -				      transcoder_name(cpu_trans
> > > > > coder));
> > > > > +		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) {
> > > > > +			DRM_WARN("[transcoder %s] PSR aux
> > > > > error\n",
> > > > > +				 transcoder_name(cpu_transcoder
> > > > > ));
> > > > 
> > > > Downgrade this to debug since the error is handled in the
> > > > driver? 
> > > 
> > > I think is better keep as DRM_WARN so it is shown in regular
> > > kernel
> > > logs this way if a user opens a bug complaning why PSR is
> > > disabled
> > > we
> > > can check that is because of PSR aux error.
> > > 
> > > > > +
> > > > > +			spin_lock(&dev_priv->irq_lock);
> > 
> > This lock isn't needed either. How about setting a bool only if the
> > transcoder is eDP and then scheduling a disable.
> > 
> > > > > +			dev_priv->psr.irq_aux_error |=
> > > > > BIT(cpu_transcoder);
> > > > 
> > > > Just ignore the non eDP bits, I don't think we want to do
> > > > anything
> > > > with
> > > > the information that some other bit was set.
> > > > 
> > > > > +			spin_unlock(&dev_priv->irq_lock);
> > > > > +
> > > > > +			schedule_work(&dev_priv->psr.work);
> > > > > +		}
> > > > >  
> > > > >  		if (psr_iir &
> > > > > EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
> > > > >  			dev_priv->psr.last_entry_attempt =
> > > > > time_ns;
> > > > > @@ -893,11 +899,36 @@ int intel_psr_set_debugfs_mode(struct
> > > > > drm_i915_private *dev_priv,
> > > > >  	return ret;
> > > > >  }
> > > > >  
> > > > > +static void intel_psr_handle_irq(struct drm_i915_private
> > > > > *dev_priv)
> > > > > +{
> > > > > +	struct i915_psr *psr = &dev_priv->psr;
> > > > > +	u32 irq_aux_error;
> > > > > +
> > > > > +	spin_lock_irq(&dev_priv->irq_lock);
> > > > > +	irq_aux_error = psr->irq_aux_error;
> > > > > +	psr->irq_aux_error = 0;
> > > > 
> > > > A subsequent modeset will enable PSR again. I don't expect a
> > > > modeset
> > > > to
> > > > to be able to fix an AUX wake up failure, so might as well
> > > > disable
> > > > it
> > > > for good.
> > > 
> > > Add another field to do that or set sink_support=false? I guess
> > > PSR
> > > short pulses errors should also disable it good too?
> > 
> > Reusing sink_support will get confusing, particularly because it is
> > exposed in debugfs.
> > 
> > > > > +	spin_unlock_irq(&dev_priv->irq_lock);
> > > > > +
> > > > > +	/* right now PSR is only enabled in eDP */
> > > > 
> > > > "right now" implies that PSR could be enabled for non eDP
> > > > ports,
> > > > but
> > > > that's not the case.
> > > > 
> > > > 
> > > > > +	WARN_ON(irq_aux_error & ~BIT(TRANSCODER_EDP));
> > > > 
> > > > This should go away if you ignore non-EDP bits, and a stack
> > > > trace
> > > > isn't
> > > > particularly useful anyway.
> > > 
> > > Okay I will remove this handlings for other transcoders.
> > > 
> > > > > +
> > > > > +	mutex_lock(&psr->lock);
> > > > 
> > > > Is this sufficient? Don't we have to serialize against ongoing
> > > > modesets
> > > > like we do for debugfs enable/disable. The disable sequence in
> > > > bspec
> > > > calls out a running pipe and port as pre-requisites.
> > > 
> > > HW will only send a aux transaction when exiting PSR, in this
> > > cases
> > > pipe will always be running:
> > 
> > Sure, but psr_work() can run after the pipe is disabled.
> > 
> > However, psr.enabled should take care of not writing to PSR_CTL if
> > the
> > pipe was already disabled. The question now is if we were in the
> > middle
> > of a modeset, disabling PSR here would have no effect if encoders
> > are
> > enabled after this point.
> > 
> Another issue is the wait for idle under psr.lock, not a good idea to
> have modesets waiting for that lock.

When doing a modeset it would wait for PSR idle anyways as we can only
disable the pipe if PSR is idle. But waiting here allow us to send the
wake command after hardware already tried to exit PSR.

> 
> > > - exiting because of changes in the screen
> > > - exiting because pipe will be disabled
> > > - exiting because of PSR error
> > > 
> > > > Ccing Ville and Maarten to get their opinion on this.
> > > >  
> > > > > +
> > > > > +	intel_psr_disable_locked(psr->dp);
> > > > > +	/* let's make sure that sink is awaken */
> > > > > +	drm_dp_dpcd_writeb(&psr->dp->aux, DP_SET_POWER,
> > > > > DP_SET_POWER_D0);
> > > > 
> > > > Given that the hardware initiated AUX write failed, I would
> > > > recommend
> > > > reading back the sink PSR status to make sure disable worked.
> > > 
> > > And in case of reading error or the value is not set try again?
> > > This
> > > could fall into a infite loop. intel_dp_aux_xfer() already try to
> > > do
> > > the transaction 5 times I guess if if failed the sink crashed and
> > > there
> > > is no recover.
> > > 
> > 
> > I was thinking of printing an error here so that we know error
> > recovery
> > did not work.
> > 
> > 
> > > > > +
> > > > > +	mutex_unlock(&dev_priv->psr.lock);
> > > > > +}
> > > > > +
> > > > >  static void intel_psr_work(struct work_struct *work)
> > > > >  {
> > > > >  	struct drm_i915_private *dev_priv =
> > > > >  		container_of(work, typeof(*dev_priv),
> > > > > psr.work);
> > > > >  
> > > > > +	if (READ_ONCE(dev_priv->psr.irq_aux_error))
> > > > > +		intel_psr_handle_irq(dev_priv);
> > 
> > Why not create a new work item for disable? I don't see why
> > intel_psr_work() needs to be reused for a completely different
> > reason.
> > 
> > > > If psr_work() was already executing and past this check,
> > > > schedule_work() in intel_psr_irq_handler will return a failure
> > > > and
> > > > disable PSR would now depend on getting an invalidate and flush
> > > > operation. We should disable PSR without any dependency on
> > > > flush
> > > > or
> > > > invalidate.
> > > 
> > > For what I checked in the schedule_work() code if the work is
> > > running
> > > and there is a call to schedule_work() it will be schedule again.
> > > 
> > 
> > From the documentation, 
> > /**
> >  * schedule_work - put work task in global workqueue
> >  * @work: job to be done
> >  *
> >  * Returns %false if @work was already on the kernel-global
> > workqueue
> > and
> >  * %true otherwise.
> >  *
> > 
> > > > 
> > > > > +
> > > > >  	mutex_lock(&dev_priv->psr.lock);
> > > > >  
> > > > >  	if (!dev_priv->psr.enabled)
> > > 
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3017ef037fed..e8ba00dd2c51 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -638,6 +638,7 @@  struct i915_psr {
 	u8 sink_sync_latency;
 	ktime_t last_entry_attempt;
 	ktime_t last_exit;
+	u32 irq_aux_error;
 };
 
 enum intel_pch {
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 70d4e26e17b5..ad09130cb4ad 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -159,10 +159,16 @@  void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir)
 			       BIT(TRANSCODER_C);
 
 	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
-		/* FIXME: Exit PSR and link train manually when this happens. */
-		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
-			DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n",
-				      transcoder_name(cpu_transcoder));
+		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) {
+			DRM_WARN("[transcoder %s] PSR aux error\n",
+				 transcoder_name(cpu_transcoder));
+
+			spin_lock(&dev_priv->irq_lock);
+			dev_priv->psr.irq_aux_error |= BIT(cpu_transcoder);
+			spin_unlock(&dev_priv->irq_lock);
+
+			schedule_work(&dev_priv->psr.work);
+		}
 
 		if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
 			dev_priv->psr.last_entry_attempt = time_ns;
@@ -893,11 +899,36 @@  int intel_psr_set_debugfs_mode(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
+static void intel_psr_handle_irq(struct drm_i915_private *dev_priv)
+{
+	struct i915_psr *psr = &dev_priv->psr;
+	u32 irq_aux_error;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	irq_aux_error = psr->irq_aux_error;
+	psr->irq_aux_error = 0;
+	spin_unlock_irq(&dev_priv->irq_lock);
+
+	/* right now PSR is only enabled in eDP */
+	WARN_ON(irq_aux_error & ~BIT(TRANSCODER_EDP));
+
+	mutex_lock(&psr->lock);
+
+	intel_psr_disable_locked(psr->dp);
+	/* let's make sure that sink is awaken */
+	drm_dp_dpcd_writeb(&psr->dp->aux, DP_SET_POWER, DP_SET_POWER_D0);
+
+	mutex_unlock(&dev_priv->psr.lock);
+}
+
 static void intel_psr_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), psr.work);
 
+	if (READ_ONCE(dev_priv->psr.irq_aux_error))
+		intel_psr_handle_irq(dev_priv);
+
 	mutex_lock(&dev_priv->psr.lock);
 
 	if (!dev_priv->psr.enabled)