diff mbox

[2/2] drm/i915/lspcon: Add workaround for resuming in PCON mode

Message ID 1476983187-27216-2-git-send-email-imre.deak@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Imre Deak Oct. 20, 2016, 5:06 p.m. UTC
On my APL the LSPCON firmware resumes in PCON mode as opposed to the
expected LS mode. It also appears to be in a state where AUX DPCD reads
will succeed but return garbage recovering only after a few hundreds of
milliseconds. After the recovery time DPCD reads will result in the
correct values and things will continue to work. If I2C over AUX is
attempted during this recovery time (implying an AUX write transaction)
the firmware won't recover and will stay in this broken state.

As a workaround check if the firmware is in PCON state after resume and
if so wait until the correct DPCD values are returned. For this we
compare the branch descriptor with the one we cached during init time.
If the firmware was in the LS state, we skip the w/a and continue as
before.

Cc: Shashank Sharma <shashank.sharma@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c     |  2 +-
 drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
 drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
 3 files changed, 48 insertions(+), 12 deletions(-)

Comments

Jani Nikula Oct. 20, 2016, 6:20 p.m. UTC | #1
On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> expected LS mode. It also appears to be in a state where AUX DPCD reads
> will succeed but return garbage recovering only after a few hundreds of
> milliseconds. After the recovery time DPCD reads will result in the
> correct values and things will continue to work. If I2C over AUX is
> attempted during this recovery time (implying an AUX write transaction)
> the firmware won't recover and will stay in this broken state.
>
> As a workaround check if the firmware is in PCON state after resume and
> if so wait until the correct DPCD values are returned. For this we
> compare the branch descriptor with the one we cached during init time.
> If the firmware was in the LS state, we skip the w/a and continue as
> before.
>
> Cc: Shashank Sharma <shashank.sharma@intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
>  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
>  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
>  3 files changed, 48 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index e90211e..ec031db 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>  	intel_dp->DP = DP;
>  }
>  
> -static bool
> +bool
>  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>  {
>  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index a35e241..9a2366e 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -972,7 +972,9 @@ struct intel_dp {
>  struct intel_lspcon {
>  	bool active;
>  	enum drm_lspcon_mode mode;
> -	struct drm_dp_aux *aux;
> +	struct intel_dp *intel_dp;
> +	bool desc_valid;
> +	struct intel_dp_desc desc;

I guess we could cache the desc in intel_dp directly. Independent of
this patch.

Also, I'm wondering if we could stick with just aux here, and read
something else from dpcd instead.

>  };
>  
>  struct intel_digital_port {
> @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
>  }
>  
>  bool
> +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> +bool
>  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>  void
>  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
> index d2c8cb2..54c6173 100644
> --- a/drivers/gpu/drm/i915/intel_lspcon.c
> +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> @@ -30,7 +30,7 @@
>  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
>  {
>  	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>  
>  	if (drm_lspcon_get_mode(adapter, &current_mode))
>  		DRM_ERROR("Error reading LSPCON mode\n");
> @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>  {
>  	int err;
>  	enum drm_lspcon_mode current_mode;
> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>  
>  	err = drm_lspcon_get_mode(adapter, &current_mode);
>  	if (err) {
> @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>  static bool lspcon_probe(struct intel_lspcon *lspcon)
>  {
>  	enum drm_dp_dual_mode_type adaptor_type;
> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>  
>  	/* Lets probe the adaptor and check its type */
>  	adaptor_type = drm_dp_dual_mode_detect(adapter);
> @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
>  	return true;
>  }
>  
> +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> +{
> +	unsigned long start = jiffies;
> +
> +	if (!lspcon->desc_valid)
> +		return;
> +
> +	while (1) {
> +		struct intel_dp_desc desc;
> +
> +		/*
> +		 * The w/a only applies in PCON mode and we don't expect any
> +		 * AUX errors.
> +		 */
> +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
> +			return;
> +
> +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
> +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
> +				      jiffies_to_msecs(jiffies - start));
> +			return;
> +		}
> +
> +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
> +			break;
> +
> +		msleep(10);
> +	}
> +
> +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
> +}

I think we need to go through the LSPCON spec once more before we merge
this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
the resume properly. Maybe this isn't a "workaround" after all.

BR,
Jani.

> +
>  void lspcon_resume(struct intel_lspcon *lspcon)
>  {
> +	lspcon_resume_in_pcon_wa(lspcon);
> +
>  	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
>  		DRM_ERROR("LSPCON resume failed\n");
>  	else
> @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>  
>  	lspcon->active = false;
>  	lspcon->mode = DRM_LSPCON_MODE_INVALID;
> -	lspcon->aux = &dp->aux;
> +	lspcon->intel_dp = dp;
>  
>  	if (!lspcon_probe(lspcon)) {
>  		DRM_ERROR("Failed to probe lspcon\n");
> @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>  		}
>  	}
>  
> -	if (drm_debug & DRM_UT_KMS) {
> -		struct intel_dp_desc desc;
> -
> -		if (intel_dp_read_desc(dp, &desc))
> -			intel_dp_print_desc(dp, &desc);
> -	}
> +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
> +			     intel_dp_read_desc(dp, &lspcon->desc);
> +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
> +		intel_dp_print_desc(dp, &lspcon->desc);
>  
>  	DRM_DEBUG_KMS("Success: LSPCON init\n");
>  	return true;
Imre Deak Oct. 20, 2016, 7:20 p.m. UTC | #2
On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > expected LS mode. It also appears to be in a state where AUX DPCD reads
> > will succeed but return garbage recovering only after a few hundreds of
> > milliseconds. After the recovery time DPCD reads will result in the
> > correct values and things will continue to work. If I2C over AUX is
> > attempted during this recovery time (implying an AUX write transaction)
> > the firmware won't recover and will stay in this broken state.
> > 
> > As a workaround check if the firmware is in PCON state after resume and
> > if so wait until the correct DPCD values are returned. For this we
> > compare the branch descriptor with the one we cached during init time.
> > If the firmware was in the LS state, we skip the w/a and continue as
> > before.
> > 
> > Cc: Shashank Sharma <shashank.sharma@intel.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Cc: Jani Nikula <jani.nikula@intel.com>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
> >  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
> >  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
> >  3 files changed, 48 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > index e90211e..ec031db 100644
> > --- a/drivers/gpu/drm/i915/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> >  	intel_dp->DP = DP;
> >  }
> >  
> > -static bool
> > +bool
> >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> >  {
> >  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index a35e241..9a2366e 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -972,7 +972,9 @@ struct intel_dp {
> >  struct intel_lspcon {
> >  	bool active;
> >  	enum drm_lspcon_mode mode;
> > -	struct drm_dp_aux *aux;
> > +	struct intel_dp *intel_dp;
> > +	bool desc_valid;
> > +	struct intel_dp_desc desc;
> 
> I guess we could cache the desc in intel_dp directly. Independent of
> this patch.

It's not used anywhere else, but I can move it to intel_dp.

> 
> Also, I'm wondering if we could stick with just aux here, and read
> something else from dpcd instead.

Not sure either, I picked desc since we read it out anyway during init.

> 
> >  };
> >  
> >  struct intel_digital_port {
> > @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
> >  }
> >  
> >  bool
> > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > +bool
> >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> >  void
> >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
> > index d2c8cb2..54c6173 100644
> > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > @@ -30,7 +30,7 @@
> >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
> >  {
> >  	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> >  
> >  	if (drm_lspcon_get_mode(adapter, ¤t_mode))
> >  		DRM_ERROR("Error reading LSPCON mode\n");
> > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> >  {
> >  	int err;
> >  	enum drm_lspcon_mode current_mode;
> > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> >  
> >  	err = drm_lspcon_get_mode(adapter, ¤t_mode);
> >  	if (err) {
> > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> >  {
> >  	enum drm_dp_dual_mode_type adaptor_type;
> > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> >  
> >  	/* Lets probe the adaptor and check its type */
> >  	adaptor_type = drm_dp_dual_mode_detect(adapter);
> > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
> >  	return true;
> >  }
> >  
> > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> > +{
> > +	unsigned long start = jiffies;
> > +
> > +	if (!lspcon->desc_valid)
> > +		return;
> > +
> > +	while (1) {
> > +		struct intel_dp_desc desc;
> > +
> > +		/*
> > +		 * The w/a only applies in PCON mode and we don't expect any
> > +		 * AUX errors.
> > +		 */
> > +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
> > +			return;
> > +
> > +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
> > +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
> > +				      jiffies_to_msecs(jiffies - start));
> > +			return;
> > +		}
> > +
> > +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
> > +			break;
> > +
> > +		msleep(10);
> > +	}
> > +
> > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
> > +}
> 
> I think we need to go through the LSPCON spec once more before we merge
> this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
> the resume properly.

I haven't found at least any way to reset things. I also tried to
change to LS mode when suspending or switch here to LS mode first and
only then to PCON mode but these didn't help.

--Imre

> Maybe this isn't a "workaround" after all.
> 
> BR,
> Jani.
> 
> > +
> >  void lspcon_resume(struct intel_lspcon *lspcon)
> >  {
> > +	lspcon_resume_in_pcon_wa(lspcon);
> > +
> >  	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
> >  		DRM_ERROR("LSPCON resume failed\n");
> >  	else
> > @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
> >  
> >  	lspcon->active = false;
> >  	lspcon->mode = DRM_LSPCON_MODE_INVALID;
> > -	lspcon->aux = &dp->aux;
> > +	lspcon->intel_dp = dp;
> >  
> >  	if (!lspcon_probe(lspcon)) {
> >  		DRM_ERROR("Failed to probe lspcon\n");
> > @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
> >  		}
> >  	}
> >  
> > -	if (drm_debug & DRM_UT_KMS) {
> > -		struct intel_dp_desc desc;
> > -
> > -		if (intel_dp_read_desc(dp, &desc))
> > -			intel_dp_print_desc(dp, &desc);
> > -	}
> > +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
> > +			     intel_dp_read_desc(dp, &lspcon->desc);
> > +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
> > +		intel_dp_print_desc(dp, &lspcon->desc);
> >  
> >  	DRM_DEBUG_KMS("Success: LSPCON init\n");
> >  	return true;
>
Jani Nikula Oct. 20, 2016, 7:24 p.m. UTC | #3
On Thu, 20 Oct 2016, Jani Nikula <jani.nikula@intel.com> wrote:
> On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
>> On my APL the LSPCON firmware resumes in PCON mode as opposed to the
>> expected LS mode. It also appears to be in a state where AUX DPCD reads
>> will succeed but return garbage recovering only after a few hundreds of
>> milliseconds. After the recovery time DPCD reads will result in the
>> correct values and things will continue to work. If I2C over AUX is
>> attempted during this recovery time (implying an AUX write transaction)
>> the firmware won't recover and will stay in this broken state.
>>
>> As a workaround check if the firmware is in PCON state after resume and
>> if so wait until the correct DPCD values are returned. For this we
>> compare the branch descriptor with the one we cached during init time.
>> If the firmware was in the LS state, we skip the w/a and continue as
>> before.
>>
>> Cc: Shashank Sharma <shashank.sharma@intel.com>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Cc: Jani Nikula <jani.nikula@intel.com>
>> Signed-off-by: Imre Deak <imre.deak@intel.com>
>> ---
>>  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
>>  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
>>  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
>>  3 files changed, 48 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
>> index e90211e..ec031db 100644
>> --- a/drivers/gpu/drm/i915/intel_dp.c
>> +++ b/drivers/gpu/drm/i915/intel_dp.c
>> @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>>  	intel_dp->DP = DP;
>>  }
>>  
>> -static bool
>> +bool
>>  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>>  {
>>  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index a35e241..9a2366e 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -972,7 +972,9 @@ struct intel_dp {
>>  struct intel_lspcon {
>>  	bool active;
>>  	enum drm_lspcon_mode mode;
>> -	struct drm_dp_aux *aux;
>> +	struct intel_dp *intel_dp;
>> +	bool desc_valid;
>> +	struct intel_dp_desc desc;
>
> I guess we could cache the desc in intel_dp directly. Independent of
> this patch.
>
> Also, I'm wondering if we could stick with just aux here, and read
> something else from dpcd instead.
>
>>  };
>>  
>>  struct intel_digital_port {
>> @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
>>  }
>>  
>>  bool
>> +intel_dp_read_dpcd(struct intel_dp *intel_dp);
>> +bool
>>  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>>  void
>>  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>> diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
>> index d2c8cb2..54c6173 100644
>> --- a/drivers/gpu/drm/i915/intel_lspcon.c
>> +++ b/drivers/gpu/drm/i915/intel_lspcon.c
>> @@ -30,7 +30,7 @@
>>  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
>>  {
>>  	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
>> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>>  
>>  	if (drm_lspcon_get_mode(adapter, &current_mode))
>>  		DRM_ERROR("Error reading LSPCON mode\n");
>> @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>>  {
>>  	int err;
>>  	enum drm_lspcon_mode current_mode;
>> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>>  
>>  	err = drm_lspcon_get_mode(adapter, &current_mode);
>>  	if (err) {
>> @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>>  static bool lspcon_probe(struct intel_lspcon *lspcon)
>>  {
>>  	enum drm_dp_dual_mode_type adaptor_type;
>> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>>  
>>  	/* Lets probe the adaptor and check its type */
>>  	adaptor_type = drm_dp_dual_mode_detect(adapter);
>> @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
>>  	return true;
>>  }
>>  
>> +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
>> +{
>> +	unsigned long start = jiffies;
>> +
>> +	if (!lspcon->desc_valid)
>> +		return;
>> +
>> +	while (1) {
>> +		struct intel_dp_desc desc;
>> +
>> +		/*
>> +		 * The w/a only applies in PCON mode and we don't expect any
>> +		 * AUX errors.
>> +		 */
>> +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
>> +			return;
>> +
>> +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
>> +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
>> +				      jiffies_to_msecs(jiffies - start));
>> +			return;
>> +		}
>> +
>> +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
>> +			break;
>> +
>> +		msleep(10);
>> +	}
>> +
>> +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
>> +}
>
> I think we need to go through the LSPCON spec once more before we merge
> this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
> the resume properly. Maybe this isn't a "workaround" after all.

Looks like this stuff is normal behaviour if the LSPCON has been in low
power state, and we need to wake it up. In PCON mode, this is done using
DP_SET_POWER. And while waking up, it'll issue AUX DEFER, as we can see
from the logs.

We need to do something, but I don't think this patch is the right fix.

BR,
Jani.



>
> BR,
> Jani.
>
>> +
>>  void lspcon_resume(struct intel_lspcon *lspcon)
>>  {
>> +	lspcon_resume_in_pcon_wa(lspcon);
>> +
>>  	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
>>  		DRM_ERROR("LSPCON resume failed\n");
>>  	else
>> @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>>  
>>  	lspcon->active = false;
>>  	lspcon->mode = DRM_LSPCON_MODE_INVALID;
>> -	lspcon->aux = &dp->aux;
>> +	lspcon->intel_dp = dp;
>>  
>>  	if (!lspcon_probe(lspcon)) {
>>  		DRM_ERROR("Failed to probe lspcon\n");
>> @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>>  		}
>>  	}
>>  
>> -	if (drm_debug & DRM_UT_KMS) {
>> -		struct intel_dp_desc desc;
>> -
>> -		if (intel_dp_read_desc(dp, &desc))
>> -			intel_dp_print_desc(dp, &desc);
>> -	}
>> +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
>> +			     intel_dp_read_desc(dp, &lspcon->desc);
>> +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
>> +		intel_dp_print_desc(dp, &lspcon->desc);
>>  
>>  	DRM_DEBUG_KMS("Success: LSPCON init\n");
>>  	return true;
Imre Deak Oct. 20, 2016, 7:43 p.m. UTC | #4
On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Jani Nikula <jani.nikula@intel.com> wrote:
> > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > > expected LS mode. It also appears to be in a state where AUX DPCD reads
> > > will succeed but return garbage recovering only after a few hundreds of
> > > milliseconds. After the recovery time DPCD reads will result in the
> > > correct values and things will continue to work. If I2C over AUX is
> > > attempted during this recovery time (implying an AUX write transaction)
> > > the firmware won't recover and will stay in this broken state.
> > > 
> > > As a workaround check if the firmware is in PCON state after resume and
> > > if so wait until the correct DPCD values are returned. For this we
> > > compare the branch descriptor with the one we cached during init time.
> > > If the firmware was in the LS state, we skip the w/a and continue as
> > > before.
> > > 
> > > Cc: Shashank Sharma <shashank.sharma@intel.com>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
> > >  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
> > >  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
> > >  3 files changed, 48 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > > index e90211e..ec031db 100644
> > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> > >  	intel_dp->DP = DP;
> > >  }
> > >  
> > > -static bool
> > > +bool
> > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> > >  {
> > >  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > > index a35e241..9a2366e 100644
> > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > @@ -972,7 +972,9 @@ struct intel_dp {
> > >  struct intel_lspcon {
> > >  	bool active;
> > >  	enum drm_lspcon_mode mode;
> > > -	struct drm_dp_aux *aux;
> > > +	struct intel_dp *intel_dp;
> > > +	bool desc_valid;
> > > +	struct intel_dp_desc desc;
> > 
> > I guess we could cache the desc in intel_dp directly. Independent of
> > this patch.
> > 
> > Also, I'm wondering if we could stick with just aux here, and read
> > something else from dpcd instead.
> > 
> > >  };
> > >  
> > >  struct intel_digital_port {
> > > @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
> > >  }
> > >  
> > >  bool
> > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > > +bool
> > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > >  void
> > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
> > > index d2c8cb2..54c6173 100644
> > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > > @@ -30,7 +30,7 @@
> > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
> > >  {
> > >  	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > >  
> > >  	if (drm_lspcon_get_mode(adapter, ¤t_mode))
> > >  		DRM_ERROR("Error reading LSPCON mode\n");
> > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> > >  {
> > >  	int err;
> > >  	enum drm_lspcon_mode current_mode;
> > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > >  
> > >  	err = drm_lspcon_get_mode(adapter, ¤t_mode);
> > >  	if (err) {
> > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> > >  {
> > >  	enum drm_dp_dual_mode_type adaptor_type;
> > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > >  
> > >  	/* Lets probe the adaptor and check its type */
> > >  	adaptor_type = drm_dp_dual_mode_detect(adapter);
> > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
> > >  	return true;
> > >  }
> > >  
> > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> > > +{
> > > +	unsigned long start = jiffies;
> > > +
> > > +	if (!lspcon->desc_valid)
> > > +		return;
> > > +
> > > +	while (1) {
> > > +		struct intel_dp_desc desc;
> > > +
> > > +		/*
> > > +		 * The w/a only applies in PCON mode and we don't expect any
> > > +		 * AUX errors.
> > > +		 */
> > > +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
> > > +			return;
> > > +
> > > +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
> > > +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
> > > +				      jiffies_to_msecs(jiffies - start));
> > > +			return;
> > > +		}
> > > +
> > > +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
> > > +			break;
> > > +
> > > +		msleep(10);
> > > +	}
> > > +
> > > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
> > > +}
> > 
> > I think we need to go through the LSPCON spec once more before we merge
> > this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
> > the resume properly. Maybe this isn't a "workaround" after all.
> 
> Looks like this stuff is normal behaviour if the LSPCON has been in low
> power state, and we need to wake it up. In PCON mode, this is done using
> DP_SET_POWER. And while waking up, it'll issue AUX DEFER, as we can see
> from the logs.

I did think about that, but waking the device via DP_SET_POWER(D0)
didn't help. Doing any AUX write transactions actually gets the device
into a stuck state for good (until next reboot on APL).

More over, we don't get AUX DEFERs the AUX transactions succeed, it's
only that reads will return corrupted values for the given time period.

--Imre

> 
> We need to do something, but I don't think this patch is the right fix.
> 
> BR,
> Jani.
> 
> 
> 
> > 
> > BR,
> > Jani.
> > 
> > > +
> > >  void lspcon_resume(struct intel_lspcon *lspcon)
> > >  {
> > > +	lspcon_resume_in_pcon_wa(lspcon);
> > > +
> > >  	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
> > >  		DRM_ERROR("LSPCON resume failed\n");
> > >  	else
> > > @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
> > >  
> > >  	lspcon->active = false;
> > >  	lspcon->mode = DRM_LSPCON_MODE_INVALID;
> > > -	lspcon->aux = &dp->aux;
> > > +	lspcon->intel_dp = dp;
> > >  
> > >  	if (!lspcon_probe(lspcon)) {
> > >  		DRM_ERROR("Failed to probe lspcon\n");
> > > @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
> > >  		}
> > >  	}
> > >  
> > > -	if (drm_debug & DRM_UT_KMS) {
> > > -		struct intel_dp_desc desc;
> > > -
> > > -		if (intel_dp_read_desc(dp, &desc))
> > > -			intel_dp_print_desc(dp, &desc);
> > > -	}
> > > +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
> > > +			     intel_dp_read_desc(dp, &lspcon->desc);
> > > +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
> > > +		intel_dp_print_desc(dp, &lspcon->desc);
> > >  
> > >  	DRM_DEBUG_KMS("Success: LSPCON init\n");
> > >  	return true;
>
Jani Nikula Oct. 20, 2016, 8:50 p.m. UTC | #5
On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
>> On Thu, 20 Oct 2016, Jani Nikula <jani.nikula@intel.com> wrote:
>> > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
>> > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
>> > > expected LS mode. It also appears to be in a state where AUX DPCD reads
>> > > will succeed but return garbage recovering only after a few hundreds of
>> > > milliseconds. After the recovery time DPCD reads will result in the
>> > > correct values and things will continue to work. If I2C over AUX is
>> > > attempted during this recovery time (implying an AUX write transaction)
>> > > the firmware won't recover and will stay in this broken state.
>> > > 
>> > > As a workaround check if the firmware is in PCON state after resume and
>> > > if so wait until the correct DPCD values are returned. For this we
>> > > compare the branch descriptor with the one we cached during init time.
>> > > If the firmware was in the LS state, we skip the w/a and continue as
>> > > before.
>> > > 
>> > > Cc: Shashank Sharma <shashank.sharma@intel.com>
>> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> > > Cc: Jani Nikula <jani.nikula@intel.com>
>> > > Signed-off-by: Imre Deak <imre.deak@intel.com>
>> > > ---
>> > >  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
>> > >  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
>> > >  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
>> > >  3 files changed, 48 insertions(+), 12 deletions(-)
>> > > 
>> > > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
>> > > index e90211e..ec031db 100644
>> > > --- a/drivers/gpu/drm/i915/intel_dp.c
>> > > +++ b/drivers/gpu/drm/i915/intel_dp.c
>> > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>> > >  	intel_dp->DP = DP;
>> > >  }
>> > >  
>> > > -static bool
>> > > +bool
>> > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>> > >  {
>> > >  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
>> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> > > index a35e241..9a2366e 100644
>> > > --- a/drivers/gpu/drm/i915/intel_drv.h
>> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
>> > > @@ -972,7 +972,9 @@ struct intel_dp {
>> > >  struct intel_lspcon {
>> > >  	bool active;
>> > >  	enum drm_lspcon_mode mode;
>> > > -	struct drm_dp_aux *aux;
>> > > +	struct intel_dp *intel_dp;
>> > > +	bool desc_valid;
>> > > +	struct intel_dp_desc desc;
>> > 
>> > I guess we could cache the desc in intel_dp directly. Independent of
>> > this patch.
>> > 
>> > Also, I'm wondering if we could stick with just aux here, and read
>> > something else from dpcd instead.
>> > 
>> > >  };
>> > >  
>> > >  struct intel_digital_port {
>> > > @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
>> > >  }
>> > >  
>> > >  bool
>> > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
>> > > +bool
>> > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>> > >  void
>> > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>> > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
>> > > index d2c8cb2..54c6173 100644
>> > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
>> > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
>> > > @@ -30,7 +30,7 @@
>> > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
>> > >  {
>> > >  	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
>> > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>> > >  
>> > >  	if (drm_lspcon_get_mode(adapter, ¤t_mode))
>> > >  		DRM_ERROR("Error reading LSPCON mode\n");
>> > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>> > >  {
>> > >  	int err;
>> > >  	enum drm_lspcon_mode current_mode;
>> > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>> > >  
>> > >  	err = drm_lspcon_get_mode(adapter, ¤t_mode);
>> > >  	if (err) {
>> > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>> > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
>> > >  {
>> > >  	enum drm_dp_dual_mode_type adaptor_type;
>> > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>> > >  
>> > >  	/* Lets probe the adaptor and check its type */
>> > >  	adaptor_type = drm_dp_dual_mode_detect(adapter);
>> > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
>> > >  	return true;
>> > >  }
>> > >  
>> > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
>> > > +{
>> > > +	unsigned long start = jiffies;
>> > > +
>> > > +	if (!lspcon->desc_valid)
>> > > +		return;
>> > > +
>> > > +	while (1) {
>> > > +		struct intel_dp_desc desc;
>> > > +
>> > > +		/*
>> > > +		 * The w/a only applies in PCON mode and we don't expect any
>> > > +		 * AUX errors.
>> > > +		 */
>> > > +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
>> > > +			return;
>> > > +
>> > > +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
>> > > +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
>> > > +				      jiffies_to_msecs(jiffies - start));
>> > > +			return;
>> > > +		}
>> > > +
>> > > +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
>> > > +			break;
>> > > +
>> > > +		msleep(10);
>> > > +	}
>> > > +
>> > > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
>> > > +}
>> > 
>> > I think we need to go through the LSPCON spec once more before we merge
>> > this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
>> > the resume properly. Maybe this isn't a "workaround" after all.
>> 
>> Looks like this stuff is normal behaviour if the LSPCON has been in low
>> power state, and we need to wake it up. In PCON mode, this is done using
>> DP_SET_POWER. And while waking up, it'll issue AUX DEFER, as we can see
>> from the logs.
>
> I did think about that, but waking the device via DP_SET_POWER(D0)
> didn't help. Doing any AUX write transactions actually gets the device
> into a stuck state for good (until next reboot on APL).
>
> More over, we don't get AUX DEFERs the AUX transactions succeed, it's
> only that reads will return corrupted values for the given time period.

The bug at hand has native defers
https://bugs.freedesktop.org/show_bug.cgi?id=98353

BR,
Jani.


>
> --Imre
>
>> 
>> We need to do something, but I don't think this patch is the right fix.
>> 
>> BR,
>> Jani.
>> 
>> 
>> 
>> > 
>> > BR,
>> > Jani.
>> > 
>> > > +
>> > >  void lspcon_resume(struct intel_lspcon *lspcon)
>> > >  {
>> > > +	lspcon_resume_in_pcon_wa(lspcon);
>> > > +
>> > >  	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
>> > >  		DRM_ERROR("LSPCON resume failed\n");
>> > >  	else
>> > > @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>> > >  
>> > >  	lspcon->active = false;
>> > >  	lspcon->mode = DRM_LSPCON_MODE_INVALID;
>> > > -	lspcon->aux = &dp->aux;
>> > > +	lspcon->intel_dp = dp;
>> > >  
>> > >  	if (!lspcon_probe(lspcon)) {
>> > >  		DRM_ERROR("Failed to probe lspcon\n");
>> > > @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>> > >  		}
>> > >  	}
>> > >  
>> > > -	if (drm_debug & DRM_UT_KMS) {
>> > > -		struct intel_dp_desc desc;
>> > > -
>> > > -		if (intel_dp_read_desc(dp, &desc))
>> > > -			intel_dp_print_desc(dp, &desc);
>> > > -	}
>> > > +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
>> > > +			     intel_dp_read_desc(dp, &lspcon->desc);
>> > > +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
>> > > +		intel_dp_print_desc(dp, &lspcon->desc);
>> > >  
>> > >  	DRM_DEBUG_KMS("Success: LSPCON init\n");
>> > >  	return true;
>>
Jani Nikula Oct. 20, 2016, 9:07 p.m. UTC | #6
On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:
>> On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
>> > +	bool desc_valid;
>> > +	struct intel_dp_desc desc;
>> 
>> I guess we could cache the desc in intel_dp directly. Independent of
>> this patch.
>
> It's not used anywhere else, but I can move it to intel_dp.
>
>> 
>> Also, I'm wondering if we could stick with just aux here, and read
>> something else from dpcd instead.
>
> Not sure either, I picked desc since we read it out anyway during init.

That was my point with putting it in intel_dp. If it's read out anyway,
lspcon or not, just cache it in intel_dp.

BR,
Jani.
Imre Deak Oct. 20, 2016, 9:40 p.m. UTC | #7
On Thu, 2016-10-20 at 23:50 +0300, Jani Nikula wrote:
> On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
> > > On Thu, 20 Oct 2016, Jani Nikula <jani.nikula@intel.com> wrote:
> > > > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > > > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > > > > expected LS mode. It also appears to be in a state where AUX DPCD reads
> > > > > will succeed but return garbage recovering only after a few hundreds of
> > > > > milliseconds. After the recovery time DPCD reads will result in the
> > > > > correct values and things will continue to work. If I2C over AUX is
> > > > > attempted during this recovery time (implying an AUX write transaction)
> > > > > the firmware won't recover and will stay in this broken state.
> > > > > 
> > > > > As a workaround check if the firmware is in PCON state after resume and
> > > > > if so wait until the correct DPCD values are returned. For this we
> > > > > compare the branch descriptor with the one we cached during init time.
> > > > > If the firmware was in the LS state, we skip the w/a and continue as
> > > > > before.
> > > > > 
> > > > > Cc: Shashank Sharma <shashank.sharma@intel.com>
> > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
> > > > >  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
> > > > >  drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
> > > > >  3 files changed, 48 insertions(+), 12 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > > > > index e90211e..ec031db 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> > > > >  	intel_dp->DP = DP;
> > > > >  }
> > > > >  
> > > > > -static bool
> > > > > +bool
> > > > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> > > > >  {
> > > > >  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
> > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > > > > index a35e241..9a2366e 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > @@ -972,7 +972,9 @@ struct intel_dp {
> > > > >  struct intel_lspcon {
> > > > >  	bool active;
> > > > >  	enum drm_lspcon_mode mode;
> > > > > -	struct drm_dp_aux *aux;
> > > > > +	struct intel_dp *intel_dp;
> > > > > +	bool desc_valid;
> > > > > +	struct intel_dp_desc desc;
> > > > 
> > > > I guess we could cache the desc in intel_dp directly. Independent of
> > > > this patch.
> > > > 
> > > > Also, I'm wondering if we could stick with just aux here, and read
> > > > something else from dpcd instead.
> > > > 
> > > > >  };
> > > > >  
> > > > >  struct intel_digital_port {
> > > > > @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
> > > > >  }
> > > > >  
> > > > >  bool
> > > > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > > > > +bool
> > > > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > > > >  void
> > > > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > > > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > index d2c8cb2..54c6173 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > @@ -30,7 +30,7 @@
> > > > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
> > > > >  {
> > > > >  	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > > > >  
> > > > >  	if (drm_lspcon_get_mode(adapter, ¤t_mode))
> > > > >  		DRM_ERROR("Error reading LSPCON mode\n");
> > > > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> > > > >  {
> > > > >  	int err;
> > > > >  	enum drm_lspcon_mode current_mode;
> > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > > > >  
> > > > >  	err = drm_lspcon_get_mode(adapter, ¤t_mode);
> > > > >  	if (err) {
> > > > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> > > > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> > > > >  {
> > > > >  	enum drm_dp_dual_mode_type adaptor_type;
> > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > > > >  
> > > > >  	/* Lets probe the adaptor and check its type */
> > > > >  	adaptor_type = drm_dp_dual_mode_detect(adapter);
> > > > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
> > > > >  	return true;
> > > > >  }
> > > > >  
> > > > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> > > > > +{
> > > > > +	unsigned long start = jiffies;
> > > > > +
> > > > > +	if (!lspcon->desc_valid)
> > > > > +		return;
> > > > > +
> > > > > +	while (1) {
> > > > > +		struct intel_dp_desc desc;
> > > > > +
> > > > > +		/*
> > > > > +		 * The w/a only applies in PCON mode and we don't expect any
> > > > > +		 * AUX errors.
> > > > > +		 */
> > > > > +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
> > > > > +			return;
> > > > > +
> > > > > +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
> > > > > +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
> > > > > +				      jiffies_to_msecs(jiffies - start));
> > > > > +			return;
> > > > > +		}
> > > > > +
> > > > > +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
> > > > > +			break;
> > > > > +
> > > > > +		msleep(10);
> > > > > +	}
> > > > > +
> > > > > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
> > > > > +}
> > > > 
> > > > I think we need to go through the LSPCON spec once more before we merge
> > > > this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
> > > > the resume properly. Maybe this isn't a "workaround" after all.
> > > 
> > > Looks like this stuff is normal behaviour if the LSPCON has been in low
> > > power state, and we need to wake it up. In PCON mode, this is done using
> > > DP_SET_POWER. And while waking up, it'll issue AUX DEFER, as we can see
> > > from the logs.
> > 
> > I did think about that, but waking the device via DP_SET_POWER(D0)
> > didn't help. Doing any AUX write transactions actually gets the device
> > into a stuck state for good (until next reboot on APL).
> > 
> > More over, we don't get AUX DEFERs the AUX transactions succeed, it's
> > only that reads will return corrupted values for the given time period.
> 
> The bug at hand has native defers
> https://bugs.freedesktop.org/show_bug.cgi?id=98353

For I2C-over-AUX I also get defers, but not for DPCD transactions for
native AUX registers for instance DP_SET_POWER or the OUI, device-ID
registers. I also tried the LS mode wake-up sequence via
I2C-over-AUX (offset 0x20), but didn't help either.

The bug above could be a different issue, although I did see this
particular scenario getting fixed on the same machine see
/archive/results/CI_IGT_test/Patchwork_2778/

--Imre
Sharma, Shashank Oct. 21, 2016, 5:46 a.m. UTC | #8
Regards

Shashank


On 10/21/2016 12:50 AM, Imre Deak wrote:
> On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:
>> On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
>>> On my APL the LSPCON firmware resumes in PCON mode as opposed to the
>>> expected LS mode. It also appears to be in a state where AUX DPCD reads
>>> will succeed but return garbage recovering only after a few hundreds of
>>> milliseconds. After the recovery time DPCD reads will result in the
>>> correct values and things will continue to work. If I2C over AUX is
>>> attempted during this recovery time (implying an AUX write transaction)
>>> the firmware won't recover and will stay in this broken state.
>>>
>>> As a workaround check if the firmware is in PCON state after resume and
>>> if so wait until the correct DPCD values are returned. For this we
>>> compare the branch descriptor with the one we cached during init time.
>>> If the firmware was in the LS state, we skip the w/a and continue as
>>> before.
>>>
>>> Cc: Shashank Sharma <shashank.sharma@intel.com>
>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>> Cc: Jani Nikula <jani.nikula@intel.com>
>>> Signed-off-by: Imre Deak <imre.deak@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/intel_dp.c     |  2 +-
>>>   drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
>>>   drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
>>>   3 files changed, 48 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
>>> index e90211e..ec031db 100644
>>> --- a/drivers/gpu/drm/i915/intel_dp.c
>>> +++ b/drivers/gpu/drm/i915/intel_dp.c
>>> @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
>>>   	intel_dp->DP = DP;
>>>   }
>>>   
>>> -static bool
>>> +bool
>>>   intel_dp_read_dpcd(struct intel_dp *intel_dp)
>>>   {
>>>   	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
>>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>>> index a35e241..9a2366e 100644
>>> --- a/drivers/gpu/drm/i915/intel_drv.h
>>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>>> @@ -972,7 +972,9 @@ struct intel_dp {
>>>   struct intel_lspcon {
>>>   	bool active;
>>>   	enum drm_lspcon_mode mode;
>>> -	struct drm_dp_aux *aux;
>>> +	struct intel_dp *intel_dp;
IMHO, Its not required to have the intel_dp inside lspcon. we can always 
get intel_dig_port from lspcon, and intel_dp from intel_dig_port
The reason why I kept aux here was thats the only thing required to 
read/write from/to lspcon.
>>> +	bool desc_valid;
>>> +	struct intel_dp_desc desc;
>> I guess we could cache the desc in intel_dp directly. Independent of
>> this patch.
> It's not used anywhere else, but I can move it to intel_dp.
>
>> Also, I'm wondering if we could stick with just aux here, and read
>> something else from dpcd instead.
> Not sure either, I picked desc since we read it out anyway during init.
>
>>>   };
>>>   
>>>   struct intel_digital_port {
>>> @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
>>>   }
>>>   
>>>   bool
>>> +intel_dp_read_dpcd(struct intel_dp *intel_dp);
>>> +bool
>>>   intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>>>   void
>>>   intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
>>> diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
>>> index d2c8cb2..54c6173 100644
>>> --- a/drivers/gpu/drm/i915/intel_lspcon.c
>>> +++ b/drivers/gpu/drm/i915/intel_lspcon.c
>>> @@ -30,7 +30,7 @@
>>>   static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
>>>   {
>>>   	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
>>> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>>> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>>>   
>>>   	if (drm_lspcon_get_mode(adapter, ¤t_mode))
>>>   		DRM_ERROR("Error reading LSPCON mode\n");
>>> @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>>>   {
>>>   	int err;
>>>   	enum drm_lspcon_mode current_mode;
>>> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>>> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>>>   
>>>   	err = drm_lspcon_get_mode(adapter, ¤t_mode);
>>>   	if (err) {
>>> @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
>>>   static bool lspcon_probe(struct intel_lspcon *lspcon)
>>>   {
>>>   	enum drm_dp_dual_mode_type adaptor_type;
>>> -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>>> +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
>>>   
>>>   	/* Lets probe the adaptor and check its type */
>>>   	adaptor_type = drm_dp_dual_mode_detect(adapter);
>>> @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
>>>   	return true;
>>>   }
>>>   
>>> +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
>>> +{
>>> +	unsigned long start = jiffies;
>>> +
>>> +	if (!lspcon->desc_valid)
>>> +		return;
>>> +
>>> +	while (1) {
>>> +		struct intel_dp_desc desc;
>>> +
>>> +		/*
>>> +		 * The w/a only applies in PCON mode and we don't expect any
>>> +		 * AUX errors.
>>> +		 */
>>> +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
>>> +			return;
>>> +
>>> +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
>>> +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
>>> +				      jiffies_to_msecs(jiffies - start));
>>> +			return;
>>> +		}
>>> +
>>> +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
>>> +			break;
>>> +
>>> +		msleep(10);
>>> +	}
>>> +
>>> +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
>>> +}
>> I think we need to go through the LSPCON spec once more before we merge
>> this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
>> the resume properly.
> I haven't found at least any way to reset things. I also tried to
> change to LS mode when suspending or switch here to LS mode first and
> only then to PCON mode but these didn't help.
>
> --Imre
I agree with Imre. There is nothing like that in specs as such, and even 
If this was something missing during the reset/power down time, we 
should have seen this on all/most of the boards.

Regards
Shashank
>
>> Maybe this isn't a "workaround" after all.
>>
>> BR,
>> Jani.
>>
>>> +
>>>   void lspcon_resume(struct intel_lspcon *lspcon)
>>>   {
>>> +	lspcon_resume_in_pcon_wa(lspcon);
>>> +
>>>   	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
>>>   		DRM_ERROR("LSPCON resume failed\n");
>>>   	else
>>> @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>>>   
>>>   	lspcon->active = false;
>>>   	lspcon->mode = DRM_LSPCON_MODE_INVALID;
>>> -	lspcon->aux = &dp->aux;
>>> +	lspcon->intel_dp = dp;
>>>   
>>>   	if (!lspcon_probe(lspcon)) {
>>>   		DRM_ERROR("Failed to probe lspcon\n");
>>> @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
>>>   		}
>>>   	}
>>>   
>>> -	if (drm_debug & DRM_UT_KMS) {
>>> -		struct intel_dp_desc desc;
>>> -
>>> -		if (intel_dp_read_desc(dp, &desc))
>>> -			intel_dp_print_desc(dp, &desc);
>>> -	}
>>> +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
>>> +			     intel_dp_read_desc(dp, &lspcon->desc);
>>> +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
>>> +		intel_dp_print_desc(dp, &lspcon->desc);
>>>   
>>>   	DRM_DEBUG_KMS("Success: LSPCON init\n");
>>>   	return true;
Imre Deak Oct. 21, 2016, 7:20 a.m. UTC | #9
On Fri, 2016-10-21 at 00:40 +0300, Imre Deak wrote:
> On Thu, 2016-10-20 at 23:50 +0300, Jani Nikula wrote:
> > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > > On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
> > > > On Thu, 20 Oct 2016, Jani Nikula <jani.nikula@intel.com> wrote:
> > > > > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > > > > > On my APL the LSPCON firmware resumes in PCON mode as
> > > > > > opposed to the
> > > > > > expected LS mode. It also appears to be in a state where
> > > > > > AUX DPCD reads
> > > > > > will succeed but return garbage recovering only after a few
> > > > > > hundreds of
> > > > > > milliseconds. After the recovery time DPCD reads will
> > > > > > result in the
> > > > > > correct values and things will continue to work. If I2C
> > > > > > over AUX is
> > > > > > attempted during this recovery time (implying an AUX write
> > > > > > transaction)
> > > > > > the firmware won't recover and will stay in this broken
> > > > > > state.
> > > > > > 
> > > > > > As a workaround check if the firmware is in PCON state
> > > > > > after resume and
> > > > > > if so wait until the correct DPCD values are returned. For
> > > > > > this we
> > > > > > compare the branch descriptor with the one we cached during
> > > > > > init time.
> > > > > > If the firmware was in the LS state, we skip the w/a and
> > > > > > continue as
> > > > > > before.
> > > > > > 
> > > > > > Cc: Shashank Sharma <shashank.sharma@intel.com>
> > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
> > > > > >  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
> > > > > >  drivers/gpu/drm/i915/intel_lspcon.c | 52
> > > > > > ++++++++++++++++++++++++++++++-------
> > > > > >  3 files changed, 48 insertions(+), 12 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > index e90211e..ec031db 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp
> > > > > > *intel_dp)
> > > > > >  	intel_dp->DP = DP;
> > > > > >  }
> > > > > >  
> > > > > > -static bool
> > > > > > +bool
> > > > > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
> > > > > >  {
> > > > > >  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000,
> > > > > > intel_dp->dpcd,
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > index a35e241..9a2366e 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > @@ -972,7 +972,9 @@ struct intel_dp {
> > > > > >  struct intel_lspcon {
> > > > > >  	bool active;
> > > > > >  	enum drm_lspcon_mode mode;
> > > > > > -	struct drm_dp_aux *aux;
> > > > > > +	struct intel_dp *intel_dp;
> > > > > > +	bool desc_valid;
> > > > > > +	struct intel_dp_desc desc;
> > > > > 
> > > > > I guess we could cache the desc in intel_dp directly.
> > > > > Independent of
> > > > > this patch.
> > > > > 
> > > > > Also, I'm wondering if we could stick with just aux here, and
> > > > > read
> > > > > something else from dpcd instead.
> > > > > 
> > > > > >  };
> > > > > >  
> > > > > >  struct intel_digital_port {
> > > > > > @@ -1469,6 +1471,8 @@ static inline unsigned int
> > > > > > intel_dp_unused_lane_mask(int lane_count)
> > > > > >  }
> > > > > >  
> > > > > >  bool
> > > > > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > > > > > +bool
> > > > > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct
> > > > > > intel_dp_desc *desc);
> > > > > >  void
> > > > > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct
> > > > > > intel_dp_desc *desc);
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > > b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > > index d2c8cb2..54c6173 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > > > @@ -30,7 +30,7 @@
> > > > > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct
> > > > > > intel_lspcon *lspcon)
> > > > > >  {
> > > > > >  	enum drm_lspcon_mode current_mode =
> > > > > > DRM_LSPCON_MODE_INVALID;
> > > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp-
> > > > > > >aux.ddc;
> > > > > >  
> > > > > >  	if (drm_lspcon_get_mode(adapter, ¤t_mode))
> > > > > >  		DRM_ERROR("Error reading LSPCON mode\n");
> > > > > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct
> > > > > > intel_lspcon *lspcon,
> > > > > >  {
> > > > > >  	int err;
> > > > > >  	enum drm_lspcon_mode current_mode;
> > > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp-
> > > > > > >aux.ddc;
> > > > > >  
> > > > > >  	err = drm_lspcon_get_mode(adapter, ¤t_mode);
> > > > > >  	if (err) {
> > > > > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct
> > > > > > intel_lspcon *lspcon,
> > > > > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
> > > > > >  {
> > > > > >  	enum drm_dp_dual_mode_type adaptor_type;
> > > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp-
> > > > > > >aux.ddc;
> > > > > >  
> > > > > >  	/* Lets probe the adaptor and check its type */
> > > > > >  	adaptor_type = drm_dp_dual_mode_detect(adapter);
> > > > > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct
> > > > > > intel_lspcon *lspcon)
> > > > > >  	return true;
> > > > > >  }
> > > > > >  
> > > > > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon
> > > > > > *lspcon)
> > > > > > +{
> > > > > > +	unsigned long start = jiffies;
> > > > > > +
> > > > > > +	if (!lspcon->desc_valid)
> > > > > > +		return;
> > > > > > +
> > > > > > +	while (1) {
> > > > > > +		struct intel_dp_desc desc;
> > > > > > +
> > > > > > +		/*
> > > > > > +		 * The w/a only applies in PCON mode and
> > > > > > we don't expect any
> > > > > > +		 * AUX errors.
> > > > > > +		 */
> > > > > > +		if (!intel_dp_read_desc(lspcon->intel_dp,
> > > > > > &desc))
> > > > > > +			return;
> > > > > > +
> > > > > > +		if (!memcmp(&lspcon->desc, &desc,
> > > > > > sizeof(desc))) {
> > > > > > +			DRM_DEBUG_KMS("LSPCON recovering
> > > > > > in PCON mode after %u ms\n",
> > > > > > +				      jiffies_to_msecs(jif
> > > > > > fies - start));
> > > > > > +			return;
> > > > > > +		}
> > > > > > +
> > > > > > +		if (time_after(jiffies, start +
> > > > > > msecs_to_jiffies(1000)))
> > > > > > +			break;
> > > > > > +
> > > > > > +		msleep(10);
> > > > > > +	}
> > > > > > +
> > > > > > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after
> > > > > > resume\n");
> > > > > > +}
> > > > > 
> > > > > I think we need to go through the LSPCON spec once more
> > > > > before we merge
> > > > > this. Maybe we don't reset the LSPCON properly. Maybe we
> > > > > don't handle
> > > > > the resume properly. Maybe this isn't a "workaround" after
> > > > > all.
> > > > 
> > > > Looks like this stuff is normal behaviour if the LSPCON has
> > > > been in low
> > > > power state, and we need to wake it up. In PCON mode, this is
> > > > done using
> > > > DP_SET_POWER. And while waking up, it'll issue AUX DEFER, as we
> > > > can see
> > > > from the logs.
> > > 
> > > I did think about that, but waking the device via
> > > DP_SET_POWER(D0)
> > > didn't help. Doing any AUX write transactions actually gets the
> > > device
> > > into a stuck state for good (until next reboot on APL).
> > > 
> > > More over, we don't get AUX DEFERs the AUX transactions succeed,
> > > it's
> > > only that reads will return corrupted values for the given time
> > > period.
> > 
> > The bug at hand has native defers
> > https://bugs.freedesktop.org/show_bug.cgi?id=98353
> 
> For I2C-over-AUX I also get defers, but not for DPCD transactions for
> native AUX registers for instance DP_SET_POWER or the OUI, device-ID
> registers. I also tried the LS mode wake-up sequence via
> I2C-over-AUX (offset 0x20), but didn't help either.
> 
> The bug above could be a different issue, although I did see this
> particular scenario getting fixed on the same machine see
> /archive/results/CI_IGT_test/Patchwork_2778/

I tried this now on the fi-skl-6700hq machine in the bug report and it
seems to be the same failure mode as the APL one. There is one
difference in that on the SKL machine once the firmware is in the stuck
state after suspend/resume, a warm reboot may not recover it, so during
the next boot the LSPCON probe will fail. I guess that during warm
reboot LSPCON may continue to be powered, so to fully reset it a power
cycle is needed. In any case this patch fixes resume on that machine
too, so I couldn't reproduce the problem with it even across multiple
reboots, whereas without it it's 100% reproducible.

--Imre
Imre Deak Oct. 21, 2016, 7:40 a.m. UTC | #10
On Fri, 2016-10-21 at 11:16 +0530, Sharma, Shashank wrote:
> Regards
> 
> Shashank
> 
> 
> On 10/21/2016 12:50 AM, Imre Deak wrote:
> > On Thu, 2016-10-20 at 21:20 +0300, Jani Nikula wrote:
> > > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > > > On my APL the LSPCON firmware resumes in PCON mode as opposed to the
> > > > expected LS mode. It also appears to be in a state where AUX DPCD reads
> > > > will succeed but return garbage recovering only after a few hundreds of
> > > > milliseconds. After the recovery time DPCD reads will result in the
> > > > correct values and things will continue to work. If I2C over AUX is
> > > > attempted during this recovery time (implying an AUX write transaction)
> > > > the firmware won't recover and will stay in this broken state.
> > > > 
> > > > As a workaround check if the firmware is in PCON state after resume and
> > > > if so wait until the correct DPCD values are returned. For this we
> > > > compare the branch descriptor with the one we cached during init time.
> > > > If the firmware was in the LS state, we skip the w/a and continue as
> > > > before.
> > > > 
> > > > Cc: Shashank Sharma <shashank.sharma@intel.com>
> > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > ---
> > > >   drivers/gpu/drm/i915/intel_dp.c     |  2 +-
> > > >   drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
> > > >   drivers/gpu/drm/i915/intel_lspcon.c | 52 ++++++++++++++++++++++++++++++-------
> > > >   3 files changed, 48 insertions(+), 12 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > > > index e90211e..ec031db 100644
> > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
> > > >   	intel_dp->DP = DP;
> > > >   }
> > > >   
> > > > -static bool
> > > > +bool
> > > >   intel_dp_read_dpcd(struct intel_dp *intel_dp)
> > > >   {
> > > >   	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
> > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > > > index a35e241..9a2366e 100644
> > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > @@ -972,7 +972,9 @@ struct intel_dp {
> > > >   struct intel_lspcon {
> > > >   	bool active;
> > > >   	enum drm_lspcon_mode mode;
> > > > -	struct drm_dp_aux *aux;
> > > > +	struct intel_dp *intel_dp;
> IMHO, Its not required to have the intel_dp inside lspcon. we can always 
> get intel_dig_port from lspcon, and intel_dp from intel_dig_port

Yea I missed that, so the above change isn't needed, I'll fix this.

> The reason why I kept aux here was thats the only thing required to 
> read/write from/to lspcon.
> > > > +	bool desc_valid;
> > > > +	struct intel_dp_desc desc;
> > > I guess we could cache the desc in intel_dp directly. Independent of
> > > this patch.
> > It's not used anywhere else, but I can move it to intel_dp.
> > 
> > > Also, I'm wondering if we could stick with just aux here, and read
> > > something else from dpcd instead.
> > Not sure either, I picked desc since we read it out anyway during init.
> > 
> > > >   };
> > > >   
> > > >   struct intel_digital_port {
> > > > @@ -1469,6 +1471,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
> > > >   }
> > > >   
> > > >   bool
> > > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
> > > > +bool
> > > >   intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > > >   void
> > > >   intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
> > > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > index d2c8cb2..54c6173 100644
> > > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
> > > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
> > > > @@ -30,7 +30,7 @@
> > > >   static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
> > > >   {
> > > >   	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
> > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > > >   
> > > >   	if (drm_lspcon_get_mode(adapter, ¤t_mode))
> > > >   		DRM_ERROR("Error reading LSPCON mode\n");
> > > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> > > >   {
> > > >   	int err;
> > > >   	enum drm_lspcon_mode current_mode;
> > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > > >   
> > > >   	err = drm_lspcon_get_mode(adapter, ¤t_mode);
> > > >   	if (err) {
> > > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon,
> > > >   static bool lspcon_probe(struct intel_lspcon *lspcon)
> > > >   {
> > > >   	enum drm_dp_dual_mode_type adaptor_type;
> > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
> > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
> > > >   
> > > >   	/* Lets probe the adaptor and check its type */
> > > >   	adaptor_type = drm_dp_dual_mode_detect(adapter);
> > > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
> > > >   	return true;
> > > >   }
> > > >   
> > > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
> > > > +{
> > > > +	unsigned long start = jiffies;
> > > > +
> > > > +	if (!lspcon->desc_valid)
> > > > +		return;
> > > > +
> > > > +	while (1) {
> > > > +		struct intel_dp_desc desc;
> > > > +
> > > > +		/*
> > > > +		 * The w/a only applies in PCON mode and we don't expect any
> > > > +		 * AUX errors.
> > > > +		 */
> > > > +		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
> > > > +			return;
> > > > +
> > > > +		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
> > > > +			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
> > > > +				      jiffies_to_msecs(jiffies - start));
> > > > +			return;
> > > > +		}
> > > > +
> > > > +		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
> > > > +			break;
> > > > +
> > > > +		msleep(10);
> > > > +	}
> > > > +
> > > > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
> > > > +}
> > > I think we need to go through the LSPCON spec once more before we merge
> > > this. Maybe we don't reset the LSPCON properly. Maybe we don't handle
> > > the resume properly.
> > I haven't found at least any way to reset things. I also tried to
> > change to LS mode when suspending or switch here to LS mode first and
> > only then to PCON mode but these didn't help.
> > 
> > --Imre
> I agree with Imre. There is nothing like that in specs as such, and even 
> If this was something missing during the reset/power down time, we 
> should have seen this on all/most of the boards.
> 
> Regards
> Shashank
> > 
> > > Maybe this isn't a "workaround" after all.
> > > 
> > > BR,
> > > Jani.
> > > 
> > > > +
> > > >   void lspcon_resume(struct intel_lspcon *lspcon)
> > > >   {
> > > > +	lspcon_resume_in_pcon_wa(lspcon);
> > > > +
> > > >   	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
> > > >   		DRM_ERROR("LSPCON resume failed\n");
> > > >   	else
> > > > @@ -111,7 +145,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
> > > >   
> > > >   	lspcon->active = false;
> > > >   	lspcon->mode = DRM_LSPCON_MODE_INVALID;
> > > > -	lspcon->aux = &dp->aux;
> > > > +	lspcon->intel_dp = dp;
> > > >   
> > > >   	if (!lspcon_probe(lspcon)) {
> > > >   		DRM_ERROR("Failed to probe lspcon\n");
> > > > @@ -131,12 +165,10 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
> > > >   		}
> > > >   	}
> > > >   
> > > > -	if (drm_debug & DRM_UT_KMS) {
> > > > -		struct intel_dp_desc desc;
> > > > -
> > > > -		if (intel_dp_read_desc(dp, &desc))
> > > > -			intel_dp_print_desc(dp, &desc);
> > > > -	}
> > > > +	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
> > > > +			     intel_dp_read_desc(dp, &lspcon->desc);
> > > > +	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
> > > > +		intel_dp_print_desc(dp, &lspcon->desc);
> > > >   
> > > >   	DRM_DEBUG_KMS("Success: LSPCON init\n");
> > > >   	return true;
>
Jani Nikula Oct. 21, 2016, 8:51 a.m. UTC | #11
On Fri, 21 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> On Fri, 2016-10-21 at 00:40 +0300, Imre Deak wrote:
>> On Thu, 2016-10-20 at 23:50 +0300, Jani Nikula wrote:
>> > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
>> > > On Thu, 2016-10-20 at 22:24 +0300, Jani Nikula wrote:
>> > > > On Thu, 20 Oct 2016, Jani Nikula <jani.nikula@intel.com> wrote:
>> > > > > On Thu, 20 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
>> > > > > > On my APL the LSPCON firmware resumes in PCON mode as
>> > > > > > opposed to the
>> > > > > > expected LS mode. It also appears to be in a state where
>> > > > > > AUX DPCD reads
>> > > > > > will succeed but return garbage recovering only after a few
>> > > > > > hundreds of
>> > > > > > milliseconds. After the recovery time DPCD reads will
>> > > > > > result in the
>> > > > > > correct values and things will continue to work. If I2C
>> > > > > > over AUX is
>> > > > > > attempted during this recovery time (implying an AUX write
>> > > > > > transaction)
>> > > > > > the firmware won't recover and will stay in this broken
>> > > > > > state.
>> > > > > > 
>> > > > > > As a workaround check if the firmware is in PCON state
>> > > > > > after resume and
>> > > > > > if so wait until the correct DPCD values are returned. For
>> > > > > > this we
>> > > > > > compare the branch descriptor with the one we cached during
>> > > > > > init time.
>> > > > > > If the firmware was in the LS state, we skip the w/a and
>> > > > > > continue as
>> > > > > > before.
>> > > > > > 
>> > > > > > Cc: Shashank Sharma <shashank.sharma@intel.com>
>> > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> > > > > > Cc: Jani Nikula <jani.nikula@intel.com>
>> > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
>> > > > > > ---
>> > > > > >  drivers/gpu/drm/i915/intel_dp.c     |  2 +-
>> > > > > >  drivers/gpu/drm/i915/intel_drv.h    |  6 ++++-
>> > > > > >  drivers/gpu/drm/i915/intel_lspcon.c | 52
>> > > > > > ++++++++++++++++++++++++++++++-------
>> > > > > >  3 files changed, 48 insertions(+), 12 deletions(-)
>> > > > > > 
>> > > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
>> > > > > > b/drivers/gpu/drm/i915/intel_dp.c
>> > > > > > index e90211e..ec031db 100644
>> > > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
>> > > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
>> > > > > > @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp
>> > > > > > *intel_dp)
>> > > > > >  	intel_dp->DP = DP;
>> > > > > >  }
>> > > > > >  
>> > > > > > -static bool
>> > > > > > +bool
>> > > > > >  intel_dp_read_dpcd(struct intel_dp *intel_dp)
>> > > > > >  {
>> > > > > >  	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000,
>> > > > > > intel_dp->dpcd,
>> > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
>> > > > > > b/drivers/gpu/drm/i915/intel_drv.h
>> > > > > > index a35e241..9a2366e 100644
>> > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
>> > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
>> > > > > > @@ -972,7 +972,9 @@ struct intel_dp {
>> > > > > >  struct intel_lspcon {
>> > > > > >  	bool active;
>> > > > > >  	enum drm_lspcon_mode mode;
>> > > > > > -	struct drm_dp_aux *aux;
>> > > > > > +	struct intel_dp *intel_dp;
>> > > > > > +	bool desc_valid;
>> > > > > > +	struct intel_dp_desc desc;
>> > > > > 
>> > > > > I guess we could cache the desc in intel_dp directly.
>> > > > > Independent of
>> > > > > this patch.
>> > > > > 
>> > > > > Also, I'm wondering if we could stick with just aux here, and
>> > > > > read
>> > > > > something else from dpcd instead.
>> > > > > 
>> > > > > >  };
>> > > > > >  
>> > > > > >  struct intel_digital_port {
>> > > > > > @@ -1469,6 +1471,8 @@ static inline unsigned int
>> > > > > > intel_dp_unused_lane_mask(int lane_count)
>> > > > > >  }
>> > > > > >  
>> > > > > >  bool
>> > > > > > +intel_dp_read_dpcd(struct intel_dp *intel_dp);
>> > > > > > +bool
>> > > > > >  intel_dp_read_desc(struct intel_dp *intel_dp, struct
>> > > > > > intel_dp_desc *desc);
>> > > > > >  void
>> > > > > >  intel_dp_print_desc(struct intel_dp *intel_dp, struct
>> > > > > > intel_dp_desc *desc);
>> > > > > > diff --git a/drivers/gpu/drm/i915/intel_lspcon.c
>> > > > > > b/drivers/gpu/drm/i915/intel_lspcon.c
>> > > > > > index d2c8cb2..54c6173 100644
>> > > > > > --- a/drivers/gpu/drm/i915/intel_lspcon.c
>> > > > > > +++ b/drivers/gpu/drm/i915/intel_lspcon.c
>> > > > > > @@ -30,7 +30,7 @@
>> > > > > >  static enum drm_lspcon_mode lspcon_get_current_mode(struct
>> > > > > > intel_lspcon *lspcon)
>> > > > > >  {
>> > > > > >  	enum drm_lspcon_mode current_mode =
>> > > > > > DRM_LSPCON_MODE_INVALID;
>> > > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> > > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp-
>> > > > > > >aux.ddc;
>> > > > > >  
>> > > > > >  	if (drm_lspcon_get_mode(adapter, ¤t_mode))
>> > > > > >  		DRM_ERROR("Error reading LSPCON mode\n");
>> > > > > > @@ -45,7 +45,7 @@ static int lspcon_change_mode(struct
>> > > > > > intel_lspcon *lspcon,
>> > > > > >  {
>> > > > > >  	int err;
>> > > > > >  	enum drm_lspcon_mode current_mode;
>> > > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> > > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp-
>> > > > > > >aux.ddc;
>> > > > > >  
>> > > > > >  	err = drm_lspcon_get_mode(adapter, ¤t_mode);
>> > > > > >  	if (err) {
>> > > > > > @@ -72,7 +72,7 @@ static int lspcon_change_mode(struct
>> > > > > > intel_lspcon *lspcon,
>> > > > > >  static bool lspcon_probe(struct intel_lspcon *lspcon)
>> > > > > >  {
>> > > > > >  	enum drm_dp_dual_mode_type adaptor_type;
>> > > > > > -	struct i2c_adapter *adapter = &lspcon->aux->ddc;
>> > > > > > +	struct i2c_adapter *adapter = &lspcon->intel_dp-
>> > > > > > >aux.ddc;
>> > > > > >  
>> > > > > >  	/* Lets probe the adaptor and check its type */
>> > > > > >  	adaptor_type = drm_dp_dual_mode_detect(adapter);
>> > > > > > @@ -89,8 +89,42 @@ static bool lspcon_probe(struct
>> > > > > > intel_lspcon *lspcon)
>> > > > > >  	return true;
>> > > > > >  }
>> > > > > >  
>> > > > > > +static void lspcon_resume_in_pcon_wa(struct intel_lspcon
>> > > > > > *lspcon)
>> > > > > > +{
>> > > > > > +	unsigned long start = jiffies;
>> > > > > > +
>> > > > > > +	if (!lspcon->desc_valid)
>> > > > > > +		return;
>> > > > > > +
>> > > > > > +	while (1) {
>> > > > > > +		struct intel_dp_desc desc;
>> > > > > > +
>> > > > > > +		/*
>> > > > > > +		 * The w/a only applies in PCON mode and
>> > > > > > we don't expect any
>> > > > > > +		 * AUX errors.
>> > > > > > +		 */
>> > > > > > +		if (!intel_dp_read_desc(lspcon->intel_dp,
>> > > > > > &desc))
>> > > > > > +			return;
>> > > > > > +
>> > > > > > +		if (!memcmp(&lspcon->desc, &desc,
>> > > > > > sizeof(desc))) {
>> > > > > > +			DRM_DEBUG_KMS("LSPCON recovering
>> > > > > > in PCON mode after %u ms\n",
>> > > > > > +				      jiffies_to_msecs(jif
>> > > > > > fies - start));
>> > > > > > +			return;
>> > > > > > +		}
>> > > > > > +
>> > > > > > +		if (time_after(jiffies, start +
>> > > > > > msecs_to_jiffies(1000)))
>> > > > > > +			break;
>> > > > > > +
>> > > > > > +		msleep(10);
>> > > > > > +	}
>> > > > > > +
>> > > > > > +	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after
>> > > > > > resume\n");
>> > > > > > +}
>> > > > > 
>> > > > > I think we need to go through the LSPCON spec once more
>> > > > > before we merge
>> > > > > this. Maybe we don't reset the LSPCON properly. Maybe we
>> > > > > don't handle
>> > > > > the resume properly. Maybe this isn't a "workaround" after
>> > > > > all.
>> > > > 
>> > > > Looks like this stuff is normal behaviour if the LSPCON has
>> > > > been in low
>> > > > power state, and we need to wake it up. In PCON mode, this is
>> > > > done using
>> > > > DP_SET_POWER. And while waking up, it'll issue AUX DEFER, as we
>> > > > can see
>> > > > from the logs.
>> > > 
>> > > I did think about that, but waking the device via
>> > > DP_SET_POWER(D0)
>> > > didn't help. Doing any AUX write transactions actually gets the
>> > > device
>> > > into a stuck state for good (until next reboot on APL).
>> > > 
>> > > More over, we don't get AUX DEFERs the AUX transactions succeed,
>> > > it's
>> > > only that reads will return corrupted values for the given time
>> > > period.
>> > 
>> > The bug at hand has native defers
>> > https://bugs.freedesktop.org/show_bug.cgi?id=98353
>> 
>> For I2C-over-AUX I also get defers, but not for DPCD transactions for
>> native AUX registers for instance DP_SET_POWER or the OUI, device-ID
>> registers. I also tried the LS mode wake-up sequence via
>> I2C-over-AUX (offset 0x20), but didn't help either.
>> 
>> The bug above could be a different issue, although I did see this
>> particular scenario getting fixed on the same machine see
>> /archive/results/CI_IGT_test/Patchwork_2778/
>
> I tried this now on the fi-skl-6700hq machine in the bug report and it
> seems to be the same failure mode as the APL one. There is one
> difference in that on the SKL machine once the firmware is in the stuck
> state after suspend/resume, a warm reboot may not recover it, so during
> the next boot the LSPCON probe will fail. I guess that during warm
> reboot LSPCON may continue to be powered, so to fully reset it a power
> cycle is needed. In any case this patch fixes resume on that machine
> too, so I couldn't reproduce the problem with it even across multiple
> reboots, whereas without it it's 100% reproducible.

A quick thought, when we do module unload or suspend, perhaps we should
put the LSPCON to the state in which we expect to find it on
probe/resume? In case it doesn't get reset.

BR,
Jani.


>
> --Imre
Imre Deak Oct. 21, 2016, 9:07 a.m. UTC | #12
On pe, 2016-10-21 at 11:51 +0300, Jani Nikula wrote:
> On Fri, 21 Oct 2016, Imre Deak <imre.deak@intel.com> wrote:
> > I tried this now on the fi-skl-6700hq machine in the bug report and it
> > seems to be the same failure mode as the APL one. There is one
> > difference in that on the SKL machine once the firmware is in the stuck
> > state after suspend/resume, a warm reboot may not recover it, so during
> > the next boot the LSPCON probe will fail. I guess that during warm
> > reboot LSPCON may continue to be powered, so to fully reset it a power
> > cycle is needed. In any case this patch fixes resume on that machine
> > too, so I couldn't reproduce the problem with it even across multiple
> > reboots, whereas without it it's 100% reproducible.
> 
> A quick thought, when we do module unload or suspend, perhaps we should
> put the LSPCON to the state in which we expect to find it on
> probe/resume? In case it doesn't get reset.

The probe and resume functions handle both PCON and LS modes, switching
only if necessary. But in any case I tried the followings without
success:
- switch to LS mode during suspend
- power down the TMDS output during suspend after switching to LS mode
(DP_DUAL_MODE_TMDS_OEN) and then back on during resume
- while in PCON mode force DP_SET_POWER(D3) during suspend and then
DP_SET_POWER(D0) during resume

--Imre
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e90211e..ec031db 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3487,7 +3487,7 @@  intel_dp_link_down(struct intel_dp *intel_dp)
 	intel_dp->DP = DP;
 }
 
-static bool
+bool
 intel_dp_read_dpcd(struct intel_dp *intel_dp)
 {
 	if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a35e241..9a2366e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -972,7 +972,9 @@  struct intel_dp {
 struct intel_lspcon {
 	bool active;
 	enum drm_lspcon_mode mode;
-	struct drm_dp_aux *aux;
+	struct intel_dp *intel_dp;
+	bool desc_valid;
+	struct intel_dp_desc desc;
 };
 
 struct intel_digital_port {
@@ -1469,6 +1471,8 @@  static inline unsigned int intel_dp_unused_lane_mask(int lane_count)
 }
 
 bool
+intel_dp_read_dpcd(struct intel_dp *intel_dp);
+bool
 intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
 void
 intel_dp_print_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc);
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index d2c8cb2..54c6173 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -30,7 +30,7 @@ 
 static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon)
 {
 	enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID;
-	struct i2c_adapter *adapter = &lspcon->aux->ddc;
+	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
 
 	if (drm_lspcon_get_mode(adapter, &current_mode))
 		DRM_ERROR("Error reading LSPCON mode\n");
@@ -45,7 +45,7 @@  static int lspcon_change_mode(struct intel_lspcon *lspcon,
 {
 	int err;
 	enum drm_lspcon_mode current_mode;
-	struct i2c_adapter *adapter = &lspcon->aux->ddc;
+	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
 
 	err = drm_lspcon_get_mode(adapter, &current_mode);
 	if (err) {
@@ -72,7 +72,7 @@  static int lspcon_change_mode(struct intel_lspcon *lspcon,
 static bool lspcon_probe(struct intel_lspcon *lspcon)
 {
 	enum drm_dp_dual_mode_type adaptor_type;
-	struct i2c_adapter *adapter = &lspcon->aux->ddc;
+	struct i2c_adapter *adapter = &lspcon->intel_dp->aux.ddc;
 
 	/* Lets probe the adaptor and check its type */
 	adaptor_type = drm_dp_dual_mode_detect(adapter);
@@ -89,8 +89,42 @@  static bool lspcon_probe(struct intel_lspcon *lspcon)
 	return true;
 }
 
+static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
+{
+	unsigned long start = jiffies;
+
+	if (!lspcon->desc_valid)
+		return;
+
+	while (1) {
+		struct intel_dp_desc desc;
+
+		/*
+		 * The w/a only applies in PCON mode and we don't expect any
+		 * AUX errors.
+		 */
+		if (!intel_dp_read_desc(lspcon->intel_dp, &desc))
+			return;
+
+		if (!memcmp(&lspcon->desc, &desc, sizeof(desc))) {
+			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
+				      jiffies_to_msecs(jiffies - start));
+			return;
+		}
+
+		if (time_after(jiffies, start + msecs_to_jiffies(1000)))
+			break;
+
+		msleep(10);
+	}
+
+	DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n");
+}
+
 void lspcon_resume(struct intel_lspcon *lspcon)
 {
+	lspcon_resume_in_pcon_wa(lspcon);
+
 	if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true))
 		DRM_ERROR("LSPCON resume failed\n");
 	else
@@ -111,7 +145,7 @@  bool lspcon_init(struct intel_digital_port *intel_dig_port)
 
 	lspcon->active = false;
 	lspcon->mode = DRM_LSPCON_MODE_INVALID;
-	lspcon->aux = &dp->aux;
+	lspcon->intel_dp = dp;
 
 	if (!lspcon_probe(lspcon)) {
 		DRM_ERROR("Failed to probe lspcon\n");
@@ -131,12 +165,10 @@  bool lspcon_init(struct intel_digital_port *intel_dig_port)
 		}
 	}
 
-	if (drm_debug & DRM_UT_KMS) {
-		struct intel_dp_desc desc;
-
-		if (intel_dp_read_desc(dp, &desc))
-			intel_dp_print_desc(dp, &desc);
-	}
+	lspcon->desc_valid = intel_dp_read_dpcd(dp) &&
+			     intel_dp_read_desc(dp, &lspcon->desc);
+	if ((drm_debug & DRM_UT_KMS) && lspcon->desc_valid)
+		intel_dp_print_desc(dp, &lspcon->desc);
 
 	DRM_DEBUG_KMS("Success: LSPCON init\n");
 	return true;