diff mbox

[4/4] drm/i915/psr: Account for sink CRC raciness on some panels

Message ID 1494018126-30190-5-git-send-email-jim.bride@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

jim.bride@linux.intel.com May 5, 2017, 9:02 p.m. UTC
According to the eDP spec, when the count field in TEST_SINK_MISC
increments then the six bytes of sink CRC information in the DPCD
should be valid.  Unfortunately, this doesn't seem to be the case
on some panels, and as a result we get some incorrect and inconsistent
values from the sink CRC DPCD locations at times.  This problem exhibits
itself more on faster processors (relative failure rates HSW < SKL < KBL.)
In order to try and account for this, we try a lot harder to read the sink
CRC until we get consistent values twice in a row before returning what we
read and delay for a time before trying to read.  We still see some
occasional failures, but reading the sink CRC is much more reliable,
particularly on SKL and KBL, with these changes than without.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Jim Bride <jim.bride@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++--
 drivers/gpu/drm/i915/intel_dp.c     | 57 ++++++++++++++++++++++++++++++++-----
 2 files changed, 61 insertions(+), 10 deletions(-)

Comments

Jani Nikula May 8, 2017, 9:12 a.m. UTC | #1
On Sat, 06 May 2017, Jim Bride <jim.bride@linux.intel.com> wrote:
> According to the eDP spec, when the count field in TEST_SINK_MISC
> increments then the six bytes of sink CRC information in the DPCD
> should be valid.  Unfortunately, this doesn't seem to be the case
> on some panels, and as a result we get some incorrect and inconsistent
> values from the sink CRC DPCD locations at times.  This problem exhibits
> itself more on faster processors (relative failure rates HSW < SKL < KBL.)
> In order to try and account for this, we try a lot harder to read the sink
> CRC until we get consistent values twice in a row before returning what we
> read and delay for a time before trying to read.  We still see some
> occasional failures, but reading the sink CRC is much more reliable,
> particularly on SKL and KBL, with these changes than without.

Retrying seems like a good idea. But this patch retries up to 36 times
if the two consecutive reads don't match, which is excessive.

The sleeps should probably be a separate change. Also not happy about
the magic numbers there.

Also, there are plenty of what seem like unrelated changes. Whitespace,
debug logging, error checking, etc.


BR,
Jani.


>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Signed-off-by: Jim Bride <jim.bride@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++--
>  drivers/gpu/drm/i915/intel_dp.c     | 57 ++++++++++++++++++++++++++++++++-----
>  2 files changed, 61 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 870c470..4902473 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2718,7 +2718,7 @@ static int i915_sink_crc(struct seq_file *m, void *data)
>  	struct intel_connector *connector;
>  	struct drm_connector_list_iter conn_iter;
>  	struct intel_dp *intel_dp = NULL;
> -	int ret;
> +	int ret, tries = 6;
>  	u8 crc[6];
>  
>  	drm_modeset_lock_all(dev);
> @@ -2738,9 +2738,17 @@ static int i915_sink_crc(struct seq_file *m, void *data)
>  
>  		intel_dp = enc_to_intel_dp(connector->base.state->best_encoder);
>  
> -		ret = intel_dp_sink_crc(intel_dp, crc);
> -		if (ret)
> +		memset(crc, 0, 6);
> +		do {
> +			ret = intel_dp_sink_crc(intel_dp, crc);
> +			if (ret == -ETIMEDOUT)
> +				usleep_range(500, 700);
> +		} while ((ret == -ETIMEDOUT) && --tries);
> +
> +		if (ret != 0) {
> +			seq_printf(m, "000000000000\n");
>  			goto out;
> +		}
>  
>  		seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
>  			   crc[0], crc[1], crc[2],
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index 06b8bd4..217bc06 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -3877,13 +3877,15 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
>  
>  	do {
>  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> -
> +		usleep_range(16700, 17000);
>  		if (drm_dp_dpcd_readb(&intel_dp->aux,
>  				      DP_TEST_SINK_MISC, &buf) < 0) {
> +			DRM_DEBUG_KMS("Could not read TEST_SINK_MISC\n");
>  			ret = -EIO;
>  			goto out;
>  		}
>  		count = buf & DP_TEST_COUNT_MASK;
> +		DRM_DEBUG_KMS("PSR count is %d\n", count);
>  	} while (--attempts && count);
>  
>  	if (attempts == 0) {
> @@ -3928,6 +3930,8 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
>  	}
>  
>  	intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> +	usleep_range(16700, 17000);
> +	DRM_DEBUG_KMS("PSR Successfully started sink CRC\n");
>  	return 0;
>  }
>  
> @@ -3939,21 +3943,30 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
>  	u8 buf;
>  	int count, ret;
>  	int attempts = 6;
> +	u8 old_crc[6];
> +
> +	if (crc != NULL)
> +		memset(crc, 0, 6);
> +	else
> +		return -ENOMEM;
>  
>  	ret = intel_dp_sink_crc_start(intel_dp);
> -	if (ret)
> +	if (ret) {
> +		DRM_DEBUG_KMS("Could not start sink crc; ret %d\n", ret);
>  		return ret;
> +	}
>  
>  	do {
>  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> +		usleep_range(16700, 17000);
>  
>  		if (drm_dp_dpcd_readb(&intel_dp->aux,
>  				      DP_TEST_SINK_MISC, &buf) < 0) {
> +			DRM_DEBUG_KMS("Cound not read TEST_SINK_MISC\n");
>  			ret = -EIO;
>  			goto stop;
>  		}
>  		count = buf & DP_TEST_COUNT_MASK;
> -
>  	} while (--attempts && count == 0);
>  
>  	if (attempts == 0) {
> @@ -3962,11 +3975,41 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
>  		goto stop;
>  	}
>  
> -	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
> -		ret = -EIO;
> -		goto stop;
> -	}
> +	attempts = 6;
> +	memset(old_crc, 0xFF, 6);
> +	do {
> +		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> +		usleep_range(16500, 17000);
> +		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR,
> +				     crc, 6) < 0) {
> +			DRM_DEBUG_KMS("Could not read sink crc\n");
> +			ret = -EIO;
> +			goto stop;
> +		}
> +
> +		if (memcmp(old_crc, crc, 6) == 0) {
> +			DRM_DEBUG_KMS("PSR Active: %3s Matching crc values found:"
> +				      "%02x%02x%02x%02x%02x%02x\n",
> +				      yesno(dev_priv->psr.active),
> +				      crc[0], crc[1], crc[2],
> +				      crc[3], crc[4], crc[5]);
> +			ret = 0;
> +			goto stop;
> +		} else {
> +			DRM_DEBUG_KMS("PSR Active: %3s crc %02x%02x%02x%02x%02x%02x "
> +				      "doesn't match "
> +				      "previous %02x%02x%02x%02x%02x%02x\n",
> +				      yesno(dev_priv->psr.active),
> +				      crc[0], crc[1], crc[2],
> +				      crc[3], crc[4], crc[5],
> +				      old_crc[0], old_crc[1], old_crc[2],
> +				      old_crc[3], old_crc[4], old_crc[5]);
> +			memcpy(old_crc, crc, 6);
> +		}
> +	} while (--attempts);
>  
> +	DRM_DEBUG_KMS("Failed to get CRC after 6 attempts.\n");
> +	ret = -ETIMEDOUT;
>  stop:
>  	intel_dp_sink_crc_stop(intel_dp);
>  	return ret;
jim.bride@linux.intel.com May 8, 2017, 5:29 p.m. UTC | #2
On Mon, May 08, 2017 at 12:12:47PM +0300, Jani Nikula wrote:
> On Sat, 06 May 2017, Jim Bride <jim.bride@linux.intel.com> wrote:
> > According to the eDP spec, when the count field in TEST_SINK_MISC
> > increments then the six bytes of sink CRC information in the DPCD
> > should be valid.  Unfortunately, this doesn't seem to be the case
> > on some panels, and as a result we get some incorrect and inconsistent
> > values from the sink CRC DPCD locations at times.  This problem exhibits
> > itself more on faster processors (relative failure rates HSW < SKL < KBL.)
> > In order to try and account for this, we try a lot harder to read the sink
> > CRC until we get consistent values twice in a row before returning what we
> > read and delay for a time before trying to read.  We still see some
> > occasional failures, but reading the sink CRC is much more reliable,
> > particularly on SKL and KBL, with these changes than without.
> 
> Retrying seems like a good idea. But this patch retries up to 36 times
> if the two consecutive reads don't match, which is excessive.

I don't see where you're getting 36 from; the loop runs through six times.

> The sleeps should probably be a separate change. Also not happy about
> the magic numbers there.

Looping w/o the sleeps doesn't make a ton of sense IMHO.  The numbers
were found experimentally on KBL, which seemed to more easily
exercise the race that this patch is meant to address.  Ideally the
panels would not increment the counter until they were sure that the
CRC data in the DPCD actually reflected reality.  Unfortunately, this
isn't what we're seeing.  I will create a couple of #defines for the
sleep values, at least, with some comments about them.  

> Also, there are plenty of what seem like unrelated changes. Whitespace,
> debug logging, error checking, etc.

I looked at this as refactoring the existing implementation, so I
went ahead and added some debugging info; the error checking is
part of the refactoring.  Given the history of problems with this
functionality that seemed warranted.  I did notice one line of
whitespace change that snuck in (where I missed that I removed a
blank line.)  I can take out the debug prints for the success
cases if they seem excessive (and I'll restore the deleted
blank line for the next version of the patch.)

Jim


> 
> BR,
> Jani.
> 
> 
> >
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > Signed-off-by: Jim Bride <jim.bride@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++--
> >  drivers/gpu/drm/i915/intel_dp.c     | 57 ++++++++++++++++++++++++++++++++-----
> >  2 files changed, 61 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 870c470..4902473 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2718,7 +2718,7 @@ static int i915_sink_crc(struct seq_file *m, void *data)
> >  	struct intel_connector *connector;
> >  	struct drm_connector_list_iter conn_iter;
> >  	struct intel_dp *intel_dp = NULL;
> > -	int ret;
> > +	int ret, tries = 6;
> >  	u8 crc[6];
> >  
> >  	drm_modeset_lock_all(dev);
> > @@ -2738,9 +2738,17 @@ static int i915_sink_crc(struct seq_file *m, void *data)
> >  
> >  		intel_dp = enc_to_intel_dp(connector->base.state->best_encoder);
> >  
> > -		ret = intel_dp_sink_crc(intel_dp, crc);
> > -		if (ret)
> > +		memset(crc, 0, 6);
> > +		do {
> > +			ret = intel_dp_sink_crc(intel_dp, crc);
> > +			if (ret == -ETIMEDOUT)
> > +				usleep_range(500, 700);
> > +		} while ((ret == -ETIMEDOUT) && --tries);
> > +
> > +		if (ret != 0) {
> > +			seq_printf(m, "000000000000\n");
> >  			goto out;
> > +		}
> >  
> >  		seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
> >  			   crc[0], crc[1], crc[2],
> > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > index 06b8bd4..217bc06 100644
> > --- a/drivers/gpu/drm/i915/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > @@ -3877,13 +3877,15 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
> >  
> >  	do {
> >  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> > -
> > +		usleep_range(16700, 17000);
> >  		if (drm_dp_dpcd_readb(&intel_dp->aux,
> >  				      DP_TEST_SINK_MISC, &buf) < 0) {
> > +			DRM_DEBUG_KMS("Could not read TEST_SINK_MISC\n");
> >  			ret = -EIO;
> >  			goto out;
> >  		}
> >  		count = buf & DP_TEST_COUNT_MASK;
> > +		DRM_DEBUG_KMS("PSR count is %d\n", count);
> >  	} while (--attempts && count);
> >  
> >  	if (attempts == 0) {
> > @@ -3928,6 +3930,8 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
> >  	}
> >  
> >  	intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> > +	usleep_range(16700, 17000);
> > +	DRM_DEBUG_KMS("PSR Successfully started sink CRC\n");
> >  	return 0;
> >  }
> >  
> > @@ -3939,21 +3943,30 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
> >  	u8 buf;
> >  	int count, ret;
> >  	int attempts = 6;
> > +	u8 old_crc[6];
> > +
> > +	if (crc != NULL)
> > +		memset(crc, 0, 6);
> > +	else
> > +		return -ENOMEM;
> >  
> >  	ret = intel_dp_sink_crc_start(intel_dp);
> > -	if (ret)
> > +	if (ret) {
> > +		DRM_DEBUG_KMS("Could not start sink crc; ret %d\n", ret);
> >  		return ret;
> > +	}
> >  
> >  	do {
> >  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> > +		usleep_range(16700, 17000);
> >  
> >  		if (drm_dp_dpcd_readb(&intel_dp->aux,
> >  				      DP_TEST_SINK_MISC, &buf) < 0) {
> > +			DRM_DEBUG_KMS("Cound not read TEST_SINK_MISC\n");
> >  			ret = -EIO;
> >  			goto stop;
> >  		}
> >  		count = buf & DP_TEST_COUNT_MASK;
> > -
> >  	} while (--attempts && count == 0);
> >  
> >  	if (attempts == 0) {
> > @@ -3962,11 +3975,41 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
> >  		goto stop;
> >  	}
> >  
> > -	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
> > -		ret = -EIO;
> > -		goto stop;
> > -	}
> > +	attempts = 6;
> > +	memset(old_crc, 0xFF, 6);
> > +	do {
> > +		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> > +		usleep_range(16500, 17000);
> > +		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR,
> > +				     crc, 6) < 0) {
> > +			DRM_DEBUG_KMS("Could not read sink crc\n");
> > +			ret = -EIO;
> > +			goto stop;
> > +		}
> > +
> > +		if (memcmp(old_crc, crc, 6) == 0) {
> > +			DRM_DEBUG_KMS("PSR Active: %3s Matching crc values found:"
> > +				      "%02x%02x%02x%02x%02x%02x\n",
> > +				      yesno(dev_priv->psr.active),
> > +				      crc[0], crc[1], crc[2],
> > +				      crc[3], crc[4], crc[5]);
> > +			ret = 0;
> > +			goto stop;
> > +		} else {
> > +			DRM_DEBUG_KMS("PSR Active: %3s crc %02x%02x%02x%02x%02x%02x "
> > +				      "doesn't match "
> > +				      "previous %02x%02x%02x%02x%02x%02x\n",
> > +				      yesno(dev_priv->psr.active),
> > +				      crc[0], crc[1], crc[2],
> > +				      crc[3], crc[4], crc[5],
> > +				      old_crc[0], old_crc[1], old_crc[2],
> > +				      old_crc[3], old_crc[4], old_crc[5]);
> > +			memcpy(old_crc, crc, 6);
> > +		}
> > +	} while (--attempts);
> >  
> > +	DRM_DEBUG_KMS("Failed to get CRC after 6 attempts.\n");
> > +	ret = -ETIMEDOUT;
> >  stop:
> >  	intel_dp_sink_crc_stop(intel_dp);
> >  	return ret;
> 
> -- 
> Jani Nikula, Intel Open Source Technology Center
Jani Nikula May 8, 2017, 6:05 p.m. UTC | #3
On Mon, 08 May 2017, Jim Bride <jim.bride@linux.intel.com> wrote:
> On Mon, May 08, 2017 at 12:12:47PM +0300, Jani Nikula wrote:
>> On Sat, 06 May 2017, Jim Bride <jim.bride@linux.intel.com> wrote:
>> > According to the eDP spec, when the count field in TEST_SINK_MISC
>> > increments then the six bytes of sink CRC information in the DPCD
>> > should be valid.  Unfortunately, this doesn't seem to be the case
>> > on some panels, and as a result we get some incorrect and inconsistent
>> > values from the sink CRC DPCD locations at times.  This problem exhibits
>> > itself more on faster processors (relative failure rates HSW < SKL < KBL.)
>> > In order to try and account for this, we try a lot harder to read the sink
>> > CRC until we get consistent values twice in a row before returning what we
>> > read and delay for a time before trying to read.  We still see some
>> > occasional failures, but reading the sink CRC is much more reliable,
>> > particularly on SKL and KBL, with these changes than without.
>> 
>> Retrying seems like a good idea. But this patch retries up to 36 times
>> if the two consecutive reads don't match, which is excessive.
>
> I don't see where you're getting 36 from; the loop runs through six times.

i915_sink_crc() calls intel_dp_sink_crc(), retrying on ETIMEDOUT. In
turn, intel_dp_sink_crc() reads CRC, retrying on mismatched CRC and
returning ETIMEDOUT to i915_sink_crc() on errors. Admittedly the loop in
intel_dp_sink_crc() will always run at least twice to get two results to
compare, so I guess it would be fair to say the combo tries CRC read up
to 6 * 5 = 30 times.

>> The sleeps should probably be a separate change. Also not happy about
>> the magic numbers there.
>
> Looping w/o the sleeps doesn't make a ton of sense IMHO.  The numbers
> were found experimentally on KBL, which seemed to more easily
> exercise the race that this patch is meant to address.  Ideally the
> panels would not increment the counter until they were sure that the
> CRC data in the DPCD actually reflected reality.  Unfortunately, this
> isn't what we're seeing.  I will create a couple of #defines for the
> sleep values, at least, with some comments about them.  

It's just that the magic sleeps are eerily close to a frame at 60
Hz. Did you try another vblank wait instead? Or maybe it's the sync to
vblank that is the issue.

>> Also, there are plenty of what seem like unrelated changes. Whitespace,
>> debug logging, error checking, etc.
>
> I looked at this as refactoring the existing implementation, so I
> went ahead and added some debugging info; the error checking is
> part of the refactoring.  Given the history of problems with this
> functionality that seemed warranted.  I did notice one line of
> whitespace change that snuck in (where I missed that I removed a
> blank line.)  I can take out the debug prints for the success
> cases if they seem excessive (and I'll restore the deleted
> blank line for the next version of the patch.)

The thing is, by now I'm looking at basically any sink visible
functional changes to eDP, and PSR in particular, with the thought that
this may have to be reverted due to regressions later. That has nothing
to do with these patches, but rather our track record with eDP and
PSR. (And that also says a lot about eDP sink quality in general.)

So I'd rather see the other changes split out so we have a smaller diff
to revert, in case we have to revert. The pessimist does not get
disappointed. And so far PSR hasn't given us any reasons for optimism.

BR,
Jani.



>
> Jim
>
>
>> 
>> BR,
>> Jani.
>> 
>> 
>> >
>> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> > Signed-off-by: Jim Bride <jim.bride@linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++--
>> >  drivers/gpu/drm/i915/intel_dp.c     | 57 ++++++++++++++++++++++++++++++++-----
>> >  2 files changed, 61 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> > index 870c470..4902473 100644
>> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> > @@ -2718,7 +2718,7 @@ static int i915_sink_crc(struct seq_file *m, void *data)
>> >  	struct intel_connector *connector;
>> >  	struct drm_connector_list_iter conn_iter;
>> >  	struct intel_dp *intel_dp = NULL;
>> > -	int ret;
>> > +	int ret, tries = 6;
>> >  	u8 crc[6];
>> >  
>> >  	drm_modeset_lock_all(dev);
>> > @@ -2738,9 +2738,17 @@ static int i915_sink_crc(struct seq_file *m, void *data)
>> >  
>> >  		intel_dp = enc_to_intel_dp(connector->base.state->best_encoder);
>> >  
>> > -		ret = intel_dp_sink_crc(intel_dp, crc);
>> > -		if (ret)
>> > +		memset(crc, 0, 6);
>> > +		do {
>> > +			ret = intel_dp_sink_crc(intel_dp, crc);
>> > +			if (ret == -ETIMEDOUT)
>> > +				usleep_range(500, 700);
>> > +		} while ((ret == -ETIMEDOUT) && --tries);
>> > +
>> > +		if (ret != 0) {
>> > +			seq_printf(m, "000000000000\n");
>> >  			goto out;
>> > +		}
>> >  
>> >  		seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
>> >  			   crc[0], crc[1], crc[2],
>> > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
>> > index 06b8bd4..217bc06 100644
>> > --- a/drivers/gpu/drm/i915/intel_dp.c
>> > +++ b/drivers/gpu/drm/i915/intel_dp.c
>> > @@ -3877,13 +3877,15 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
>> >  
>> >  	do {
>> >  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
>> > -
>> > +		usleep_range(16700, 17000);
>> >  		if (drm_dp_dpcd_readb(&intel_dp->aux,
>> >  				      DP_TEST_SINK_MISC, &buf) < 0) {
>> > +			DRM_DEBUG_KMS("Could not read TEST_SINK_MISC\n");
>> >  			ret = -EIO;
>> >  			goto out;
>> >  		}
>> >  		count = buf & DP_TEST_COUNT_MASK;
>> > +		DRM_DEBUG_KMS("PSR count is %d\n", count);
>> >  	} while (--attempts && count);
>> >  
>> >  	if (attempts == 0) {
>> > @@ -3928,6 +3930,8 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
>> >  	}
>> >  
>> >  	intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
>> > +	usleep_range(16700, 17000);
>> > +	DRM_DEBUG_KMS("PSR Successfully started sink CRC\n");
>> >  	return 0;
>> >  }
>> >  
>> > @@ -3939,21 +3943,30 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
>> >  	u8 buf;
>> >  	int count, ret;
>> >  	int attempts = 6;
>> > +	u8 old_crc[6];
>> > +
>> > +	if (crc != NULL)
>> > +		memset(crc, 0, 6);
>> > +	else
>> > +		return -ENOMEM;
>> >  
>> >  	ret = intel_dp_sink_crc_start(intel_dp);
>> > -	if (ret)
>> > +	if (ret) {
>> > +		DRM_DEBUG_KMS("Could not start sink crc; ret %d\n", ret);
>> >  		return ret;
>> > +	}
>> >  
>> >  	do {
>> >  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
>> > +		usleep_range(16700, 17000);
>> >  
>> >  		if (drm_dp_dpcd_readb(&intel_dp->aux,
>> >  				      DP_TEST_SINK_MISC, &buf) < 0) {
>> > +			DRM_DEBUG_KMS("Cound not read TEST_SINK_MISC\n");
>> >  			ret = -EIO;
>> >  			goto stop;
>> >  		}
>> >  		count = buf & DP_TEST_COUNT_MASK;
>> > -
>> >  	} while (--attempts && count == 0);
>> >  
>> >  	if (attempts == 0) {
>> > @@ -3962,11 +3975,41 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
>> >  		goto stop;
>> >  	}
>> >  
>> > -	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
>> > -		ret = -EIO;
>> > -		goto stop;
>> > -	}
>> > +	attempts = 6;
>> > +	memset(old_crc, 0xFF, 6);
>> > +	do {
>> > +		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
>> > +		usleep_range(16500, 17000);
>> > +		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR,
>> > +				     crc, 6) < 0) {
>> > +			DRM_DEBUG_KMS("Could not read sink crc\n");
>> > +			ret = -EIO;
>> > +			goto stop;
>> > +		}
>> > +
>> > +		if (memcmp(old_crc, crc, 6) == 0) {
>> > +			DRM_DEBUG_KMS("PSR Active: %3s Matching crc values found:"
>> > +				      "%02x%02x%02x%02x%02x%02x\n",
>> > +				      yesno(dev_priv->psr.active),
>> > +				      crc[0], crc[1], crc[2],
>> > +				      crc[3], crc[4], crc[5]);
>> > +			ret = 0;
>> > +			goto stop;
>> > +		} else {
>> > +			DRM_DEBUG_KMS("PSR Active: %3s crc %02x%02x%02x%02x%02x%02x "
>> > +				      "doesn't match "
>> > +				      "previous %02x%02x%02x%02x%02x%02x\n",
>> > +				      yesno(dev_priv->psr.active),
>> > +				      crc[0], crc[1], crc[2],
>> > +				      crc[3], crc[4], crc[5],
>> > +				      old_crc[0], old_crc[1], old_crc[2],
>> > +				      old_crc[3], old_crc[4], old_crc[5]);
>> > +			memcpy(old_crc, crc, 6);
>> > +		}
>> > +	} while (--attempts);
>> >  
>> > +	DRM_DEBUG_KMS("Failed to get CRC after 6 attempts.\n");
>> > +	ret = -ETIMEDOUT;
>> >  stop:
>> >  	intel_dp_sink_crc_stop(intel_dp);
>> >  	return ret;
>> 
>> -- 
>> Jani Nikula, Intel Open Source Technology Center
jim.bride@linux.intel.com May 8, 2017, 7:26 p.m. UTC | #4
On Mon, May 08, 2017 at 09:05:08PM +0300, Jani Nikula wrote:
> On Mon, 08 May 2017, Jim Bride <jim.bride@linux.intel.com> wrote:
> > On Mon, May 08, 2017 at 12:12:47PM +0300, Jani Nikula wrote:
> >> On Sat, 06 May 2017, Jim Bride <jim.bride@linux.intel.com> wrote:
> >> > According to the eDP spec, when the count field in TEST_SINK_MISC
> >> > increments then the six bytes of sink CRC information in the DPCD
> >> > should be valid.  Unfortunately, this doesn't seem to be the case
> >> > on some panels, and as a result we get some incorrect and inconsistent
> >> > values from the sink CRC DPCD locations at times.  This problem exhibits
> >> > itself more on faster processors (relative failure rates HSW < SKL < KBL.)
> >> > In order to try and account for this, we try a lot harder to read the sink
> >> > CRC until we get consistent values twice in a row before returning what we
> >> > read and delay for a time before trying to read.  We still see some
> >> > occasional failures, but reading the sink CRC is much more reliable,
> >> > particularly on SKL and KBL, with these changes than without.
> >> 
> >> Retrying seems like a good idea. But this patch retries up to 36 times
> >> if the two consecutive reads don't match, which is excessive.
> >
> > I don't see where you're getting 36 from; the loop runs through six times.
> 
> i915_sink_crc() calls intel_dp_sink_crc(), retrying on ETIMEDOUT. In
> turn, intel_dp_sink_crc() reads CRC, retrying on mismatched CRC and
> returning ETIMEDOUT to i915_sink_crc() on errors. Admittedly the loop in
> intel_dp_sink_crc() will always run at least twice to get two results to
> compare, so I guess it would be fair to say the combo tries CRC read up
> to 6 * 5 = 30 times.

I'll play around with making i915_sink_crc() a little less
persistent and see what it does.  The loop in intel_dp_sink_crc()
frequently goes 3+ tries, so I'd prefer to keep that one as it is.


> >> The sleeps should probably be a separate change. Also not happy about
> >> the magic numbers there.
> >
> > Looping w/o the sleeps doesn't make a ton of sense IMHO.  The numbers
> > were found experimentally on KBL, which seemed to more easily
> > exercise the race that this patch is meant to address.  Ideally the
> > panels would not increment the counter until they were sure that the
> > CRC data in the DPCD actually reflected reality.  Unfortunately, this
> > isn't what we're seeing.  I will create a couple of #defines for the
> > sleep values, at least, with some comments about them.  
> 
> It's just that the magic sleeps are eerily close to a frame at 60
> Hz. Did you try another vblank wait instead? Or maybe it's the sync to
> vblank that is the issue.

This brings up an interesting point.  IMHO relying on vblanks for PSR
related stuff is a bad idea, at least while PSR is active.  The
reason I say this is that if PSR is active, then the link itself may
or may not be active.  We certainly shouldn't be sending frames of
data, vblanks,  or anything of that ilk.  There's a patch series that
Rodrigo wrote last August that I plan to write a IGT test case for (I
wanted to see IGT working right for PSR tests in general before doing
this) that basically makes sure that vblanks are disabled if PSR is
active, and that enabling vblanks causes PSR to exit.  Review
feedback on that series was that it needed an IGT test.  When thinking
about this series, I've been toying with the idea of doing a sleep
based on the time needed to render a frame + the appropriate blanking
intervals instead of doing all of the vblank waits anyhow, at least
for the sink CRC stuff since we need to get that data out while PSR
is active.  Now I'm wondering if we should go ahead and do that for
all but the PSR exit case now, perhaps.  Thoughts?

> >> Also, there are plenty of what seem like unrelated changes. Whitespace,
> >> debug logging, error checking, etc.
> >
> > I looked at this as refactoring the existing implementation, so I
> > went ahead and added some debugging info; the error checking is
> > part of the refactoring.  Given the history of problems with this
> > functionality that seemed warranted.  I did notice one line of
> > whitespace change that snuck in (where I missed that I removed a
> > blank line.)  I can take out the debug prints for the success
> > cases if they seem excessive (and I'll restore the deleted
> > blank line for the next version of the patch.)
> 
> The thing is, by now I'm looking at basically any sink visible
> functional changes to eDP, and PSR in particular, with the thought that
> this may have to be reverted due to regressions later. That has nothing
> to do with these patches, but rather our track record with eDP and
> PSR. (And that also says a lot about eDP sink quality in general.)
> 
> So I'd rather see the other changes split out so we have a smaller diff
> to revert, in case we have to revert. The pessimist does not get
> disappointed. And so far PSR hasn't given us any reasons for optimism.

Let me see what I can do.  I might be able to at least split out some
of the debug / error checking into a separate patch.

Jim

> BR,
> Jani.
> 
> 
> 
> >
> > Jim
> >
> >
> >> 
> >> BR,
> >> Jani.
> >> 
> >> 
> >> >
> >> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> >> > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >> > Signed-off-by: Jim Bride <jim.bride@linux.intel.com>
> >> > ---
> >> >  drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++--
> >> >  drivers/gpu/drm/i915/intel_dp.c     | 57 ++++++++++++++++++++++++++++++++-----
> >> >  2 files changed, 61 insertions(+), 10 deletions(-)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >> > index 870c470..4902473 100644
> >> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >> > @@ -2718,7 +2718,7 @@ static int i915_sink_crc(struct seq_file *m, void *data)
> >> >  	struct intel_connector *connector;
> >> >  	struct drm_connector_list_iter conn_iter;
> >> >  	struct intel_dp *intel_dp = NULL;
> >> > -	int ret;
> >> > +	int ret, tries = 6;
> >> >  	u8 crc[6];
> >> >  
> >> >  	drm_modeset_lock_all(dev);
> >> > @@ -2738,9 +2738,17 @@ static int i915_sink_crc(struct seq_file *m, void *data)
> >> >  
> >> >  		intel_dp = enc_to_intel_dp(connector->base.state->best_encoder);
> >> >  
> >> > -		ret = intel_dp_sink_crc(intel_dp, crc);
> >> > -		if (ret)
> >> > +		memset(crc, 0, 6);
> >> > +		do {
> >> > +			ret = intel_dp_sink_crc(intel_dp, crc);
> >> > +			if (ret == -ETIMEDOUT)
> >> > +				usleep_range(500, 700);
> >> > +		} while ((ret == -ETIMEDOUT) && --tries);
> >> > +
> >> > +		if (ret != 0) {
> >> > +			seq_printf(m, "000000000000\n");
> >> >  			goto out;
> >> > +		}
> >> >  
> >> >  		seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
> >> >  			   crc[0], crc[1], crc[2],
> >> > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> >> > index 06b8bd4..217bc06 100644
> >> > --- a/drivers/gpu/drm/i915/intel_dp.c
> >> > +++ b/drivers/gpu/drm/i915/intel_dp.c
> >> > @@ -3877,13 +3877,15 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
> >> >  
> >> >  	do {
> >> >  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> >> > -
> >> > +		usleep_range(16700, 17000);
> >> >  		if (drm_dp_dpcd_readb(&intel_dp->aux,
> >> >  				      DP_TEST_SINK_MISC, &buf) < 0) {
> >> > +			DRM_DEBUG_KMS("Could not read TEST_SINK_MISC\n");
> >> >  			ret = -EIO;
> >> >  			goto out;
> >> >  		}
> >> >  		count = buf & DP_TEST_COUNT_MASK;
> >> > +		DRM_DEBUG_KMS("PSR count is %d\n", count);
> >> >  	} while (--attempts && count);
> >> >  
> >> >  	if (attempts == 0) {
> >> > @@ -3928,6 +3930,8 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
> >> >  	}
> >> >  
> >> >  	intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> >> > +	usleep_range(16700, 17000);
> >> > +	DRM_DEBUG_KMS("PSR Successfully started sink CRC\n");
> >> >  	return 0;
> >> >  }
> >> >  
> >> > @@ -3939,21 +3943,30 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
> >> >  	u8 buf;
> >> >  	int count, ret;
> >> >  	int attempts = 6;
> >> > +	u8 old_crc[6];
> >> > +
> >> > +	if (crc != NULL)
> >> > +		memset(crc, 0, 6);
> >> > +	else
> >> > +		return -ENOMEM;
> >> >  
> >> >  	ret = intel_dp_sink_crc_start(intel_dp);
> >> > -	if (ret)
> >> > +	if (ret) {
> >> > +		DRM_DEBUG_KMS("Could not start sink crc; ret %d\n", ret);
> >> >  		return ret;
> >> > +	}
> >> >  
> >> >  	do {
> >> >  		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> >> > +		usleep_range(16700, 17000);
> >> >  
> >> >  		if (drm_dp_dpcd_readb(&intel_dp->aux,
> >> >  				      DP_TEST_SINK_MISC, &buf) < 0) {
> >> > +			DRM_DEBUG_KMS("Cound not read TEST_SINK_MISC\n");
> >> >  			ret = -EIO;
> >> >  			goto stop;
> >> >  		}
> >> >  		count = buf & DP_TEST_COUNT_MASK;
> >> > -
> >> >  	} while (--attempts && count == 0);
> >> >  
> >> >  	if (attempts == 0) {
> >> > @@ -3962,11 +3975,41 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
> >> >  		goto stop;
> >> >  	}
> >> >  
> >> > -	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
> >> > -		ret = -EIO;
> >> > -		goto stop;
> >> > -	}
> >> > +	attempts = 6;
> >> > +	memset(old_crc, 0xFF, 6);
> >> > +	do {
> >> > +		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
> >> > +		usleep_range(16500, 17000);
> >> > +		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR,
> >> > +				     crc, 6) < 0) {
> >> > +			DRM_DEBUG_KMS("Could not read sink crc\n");
> >> > +			ret = -EIO;
> >> > +			goto stop;
> >> > +		}
> >> > +
> >> > +		if (memcmp(old_crc, crc, 6) == 0) {
> >> > +			DRM_DEBUG_KMS("PSR Active: %3s Matching crc values found:"
> >> > +				      "%02x%02x%02x%02x%02x%02x\n",
> >> > +				      yesno(dev_priv->psr.active),
> >> > +				      crc[0], crc[1], crc[2],
> >> > +				      crc[3], crc[4], crc[5]);
> >> > +			ret = 0;
> >> > +			goto stop;
> >> > +		} else {
> >> > +			DRM_DEBUG_KMS("PSR Active: %3s crc %02x%02x%02x%02x%02x%02x "
> >> > +				      "doesn't match "
> >> > +				      "previous %02x%02x%02x%02x%02x%02x\n",
> >> > +				      yesno(dev_priv->psr.active),
> >> > +				      crc[0], crc[1], crc[2],
> >> > +				      crc[3], crc[4], crc[5],
> >> > +				      old_crc[0], old_crc[1], old_crc[2],
> >> > +				      old_crc[3], old_crc[4], old_crc[5]);
> >> > +			memcpy(old_crc, crc, 6);
> >> > +		}
> >> > +	} while (--attempts);
> >> >  
> >> > +	DRM_DEBUG_KMS("Failed to get CRC after 6 attempts.\n");
> >> > +	ret = -ETIMEDOUT;
> >> >  stop:
> >> >  	intel_dp_sink_crc_stop(intel_dp);
> >> >  	return ret;
> >> 
> >> -- 
> >> Jani Nikula, Intel Open Source Technology Center
> 
> -- 
> Jani Nikula, Intel Open Source Technology Center
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 870c470..4902473 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2718,7 +2718,7 @@  static int i915_sink_crc(struct seq_file *m, void *data)
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct intel_dp *intel_dp = NULL;
-	int ret;
+	int ret, tries = 6;
 	u8 crc[6];
 
 	drm_modeset_lock_all(dev);
@@ -2738,9 +2738,17 @@  static int i915_sink_crc(struct seq_file *m, void *data)
 
 		intel_dp = enc_to_intel_dp(connector->base.state->best_encoder);
 
-		ret = intel_dp_sink_crc(intel_dp, crc);
-		if (ret)
+		memset(crc, 0, 6);
+		do {
+			ret = intel_dp_sink_crc(intel_dp, crc);
+			if (ret == -ETIMEDOUT)
+				usleep_range(500, 700);
+		} while ((ret == -ETIMEDOUT) && --tries);
+
+		if (ret != 0) {
+			seq_printf(m, "000000000000\n");
 			goto out;
+		}
 
 		seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
 			   crc[0], crc[1], crc[2],
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 06b8bd4..217bc06 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3877,13 +3877,15 @@  static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
 
 	do {
 		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
-
+		usleep_range(16700, 17000);
 		if (drm_dp_dpcd_readb(&intel_dp->aux,
 				      DP_TEST_SINK_MISC, &buf) < 0) {
+			DRM_DEBUG_KMS("Could not read TEST_SINK_MISC\n");
 			ret = -EIO;
 			goto out;
 		}
 		count = buf & DP_TEST_COUNT_MASK;
+		DRM_DEBUG_KMS("PSR count is %d\n", count);
 	} while (--attempts && count);
 
 	if (attempts == 0) {
@@ -3928,6 +3930,8 @@  static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
 	}
 
 	intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
+	usleep_range(16700, 17000);
+	DRM_DEBUG_KMS("PSR Successfully started sink CRC\n");
 	return 0;
 }
 
@@ -3939,21 +3943,30 @@  int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
 	u8 buf;
 	int count, ret;
 	int attempts = 6;
+	u8 old_crc[6];
+
+	if (crc != NULL)
+		memset(crc, 0, 6);
+	else
+		return -ENOMEM;
 
 	ret = intel_dp_sink_crc_start(intel_dp);
-	if (ret)
+	if (ret) {
+		DRM_DEBUG_KMS("Could not start sink crc; ret %d\n", ret);
 		return ret;
+	}
 
 	do {
 		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
+		usleep_range(16700, 17000);
 
 		if (drm_dp_dpcd_readb(&intel_dp->aux,
 				      DP_TEST_SINK_MISC, &buf) < 0) {
+			DRM_DEBUG_KMS("Cound not read TEST_SINK_MISC\n");
 			ret = -EIO;
 			goto stop;
 		}
 		count = buf & DP_TEST_COUNT_MASK;
-
 	} while (--attempts && count == 0);
 
 	if (attempts == 0) {
@@ -3962,11 +3975,41 @@  int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
 		goto stop;
 	}
 
-	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
-		ret = -EIO;
-		goto stop;
-	}
+	attempts = 6;
+	memset(old_crc, 0xFF, 6);
+	do {
+		intel_wait_for_vblank(dev_priv, intel_crtc->pipe);
+		usleep_range(16500, 17000);
+		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR,
+				     crc, 6) < 0) {
+			DRM_DEBUG_KMS("Could not read sink crc\n");
+			ret = -EIO;
+			goto stop;
+		}
+
+		if (memcmp(old_crc, crc, 6) == 0) {
+			DRM_DEBUG_KMS("PSR Active: %3s Matching crc values found:"
+				      "%02x%02x%02x%02x%02x%02x\n",
+				      yesno(dev_priv->psr.active),
+				      crc[0], crc[1], crc[2],
+				      crc[3], crc[4], crc[5]);
+			ret = 0;
+			goto stop;
+		} else {
+			DRM_DEBUG_KMS("PSR Active: %3s crc %02x%02x%02x%02x%02x%02x "
+				      "doesn't match "
+				      "previous %02x%02x%02x%02x%02x%02x\n",
+				      yesno(dev_priv->psr.active),
+				      crc[0], crc[1], crc[2],
+				      crc[3], crc[4], crc[5],
+				      old_crc[0], old_crc[1], old_crc[2],
+				      old_crc[3], old_crc[4], old_crc[5]);
+			memcpy(old_crc, crc, 6);
+		}
+	} while (--attempts);
 
+	DRM_DEBUG_KMS("Failed to get CRC after 6 attempts.\n");
+	ret = -ETIMEDOUT;
 stop:
 	intel_dp_sink_crc_stop(intel_dp);
 	return ret;