diff mbox series

[v2,1/3] drm/i915/ilk-glk: Fix link training on links with LTTPRs

Message ID 20210317184901.4029798-2-imre.deak@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Fix DP LTTPR link training mode initialization | expand

Commit Message

Imre Deak March 17, 2021, 6:48 p.m. UTC
The spec requires to use at least 3.2ms for the AUX timeout period if
there are LT-tunable PHY Repeaters on the link (2.11.2). An upcoming
spec update makes this more specific, by requiring a 3.2ms minimum
timeout period for the LTTPR detection reading the 0xF0000-0xF0007
range (3.6.5.1).

Accordingly disable LTTPR detection until GLK, where the maximum timeout
we can set is only 1.6ms.

Link training in the non-transparent mode is known to fail at least on
some SKL systems with a WD19 dock on the link, which exposes an LTTPR
(see the References below). While this could have different reasons
besides the too short AUX timeout used, not detecting LTTPRs (and so not
using the non-transparent LT mode) fixes link training on these systems.

While at it add a code comment about the platform specific maximum
timeout values.

v2: Add a comment about the g4x maximum timeout as well. (Ville)

Reported-by: Takashi Iwai <tiwai@suse.de>
Reported-and-tested-by: Santiago Zarate <santiago.zarate@suse.com>
Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3166
Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent mode link training")
Cc: <stable@vger.kernel.org> # v5.11
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
 .../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

Comments

Ville Syrjälä March 18, 2021, 5:33 p.m. UTC | #1
On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> The spec requires to use at least 3.2ms for the AUX timeout period if
> there are LT-tunable PHY Repeaters on the link (2.11.2). An upcoming
> spec update makes this more specific, by requiring a 3.2ms minimum
> timeout period for the LTTPR detection reading the 0xF0000-0xF0007
> range (3.6.5.1).

I'm pondering if we could reduce the timeout after having determined
wherther LTTPRs are present or not? But maybe that wouldn't really speed
up anything since we can't reduce the timeout until after detecting
*something*. And once there is something there we shouldn't really get
any more timeouts I guess. So probably a totally stupid idea.

Anyways, this seems about the only thing we can do given the limited
hw capabilities.
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> 
> Accordingly disable LTTPR detection until GLK, where the maximum timeout
> we can set is only 1.6ms.
> 
> Link training in the non-transparent mode is known to fail at least on
> some SKL systems with a WD19 dock on the link, which exposes an LTTPR
> (see the References below). While this could have different reasons
> besides the too short AUX timeout used, not detecting LTTPRs (and so not
> using the non-transparent LT mode) fixes link training on these systems.
> 
> While at it add a code comment about the platform specific maximum
> timeout values.
> 
> v2: Add a comment about the g4x maximum timeout as well. (Ville)
> 
> Reported-by: Takashi Iwai <tiwai@suse.de>
> Reported-and-tested-by: Santiago Zarate <santiago.zarate@suse.com>
> Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent mode link training")
> Cc: <stable@vger.kernel.org> # v5.11
> Cc: Takashi Iwai <tiwai@suse.de>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
>  .../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++---
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> index eaebf123310a..10fe17b7280d 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
>  	else
>  		precharge = 5;
>  
> +	/* Max timeout value on G4x-BDW: 1.6ms */
>  	if (IS_BROADWELL(dev_priv))
>  		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
>  	else
> @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
>  	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
>  	u32 ret;
>  
> +	/*
> +	 * Max timeout values:
> +	 * SKL-GLK: 1.6ms
> +	 * CNL: 3.2ms
> +	 * ICL+: 4ms
> +	 */
>  	ret = DP_AUX_CH_CTL_SEND_BUSY |
>  	      DP_AUX_CH_CTL_DONE |
>  	      DP_AUX_CH_CTL_INTERRUPT |
> diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> index 19ba7c7cbaab..c0e25c75c105 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> @@ -82,6 +82,18 @@ static void intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
>  
>  static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp)
>  {
> +	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> +
> +	if (intel_dp_is_edp(intel_dp))
> +		return false;
> +
> +	/*
> +	 * Detecting LTTPRs must be avoided on platforms with an AUX timeout
> +	 * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1).
> +	 */
> +	if (INTEL_GEN(i915) < 10)
> +		return false;
> +
>  	if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
>  					  intel_dp->lttpr_common_caps) < 0) {
>  		memset(intel_dp->lttpr_common_caps, 0,
> @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp *intel_dp)
>  	bool ret;
>  	int i;
>  
> -	if (intel_dp_is_edp(intel_dp))
> -		return 0;
> -
>  	ret = intel_dp_read_lttpr_common_caps(intel_dp);
>  	if (!ret)
>  		return 0;
> -- 
> 2.25.1
Imre Deak March 18, 2021, 5:49 p.m. UTC | #2
On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > The spec requires to use at least 3.2ms for the AUX timeout period if
> > there are LT-tunable PHY Repeaters on the link (2.11.2). An upcoming
> > spec update makes this more specific, by requiring a 3.2ms minimum
> > timeout period for the LTTPR detection reading the 0xF0000-0xF0007
> > range (3.6.5.1).
> 
> I'm pondering if we could reduce the timeout after having determined
> wherther LTTPRs are present or not? But maybe that wouldn't really speed
> up anything since we can't reduce the timeout until after detecting
> *something*. And once there is something there we shouldn't really get
> any more timeouts I guess. So probably a totally stupid idea.

Right, if something is connected it would take anyway as much time as it
takes for the sink to reply whether or not we decreased the timeout.

However if nothing is connected, we have the excessive timeout Khaled
already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve that
we could scale the total number of retries by making it
total_timeout/platform_specific_timeout (letting total_timeout=2sec for
instance) or just changing the drm retry logic to be time based instead
of the number of retries we use atm. 

> Anyways, this seems about the only thing we can do given the limited
> hw capabilities.
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> > Accordingly disable LTTPR detection until GLK, where the maximum timeout
> > we can set is only 1.6ms.
> > 
> > Link training in the non-transparent mode is known to fail at least on
> > some SKL systems with a WD19 dock on the link, which exposes an LTTPR
> > (see the References below). While this could have different reasons
> > besides the too short AUX timeout used, not detecting LTTPRs (and so not
> > using the non-transparent LT mode) fixes link training on these systems.
> > 
> > While at it add a code comment about the platform specific maximum
> > timeout values.
> > 
> > v2: Add a comment about the g4x maximum timeout as well. (Ville)
> > 
> > Reported-by: Takashi Iwai <tiwai@suse.de>
> > Reported-and-tested-by: Santiago Zarate <santiago.zarate@suse.com>
> > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > References: https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent mode link training")
> > Cc: <stable@vger.kernel.org> # v5.11
> > Cc: Takashi Iwai <tiwai@suse.de>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++---
> >  2 files changed, 19 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > index eaebf123310a..10fe17b7280d 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
> >  	else
> >  		precharge = 5;
> >  
> > +	/* Max timeout value on G4x-BDW: 1.6ms */
> >  	if (IS_BROADWELL(dev_priv))
> >  		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> >  	else
> > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
> >  	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
> >  	u32 ret;
> >  
> > +	/*
> > +	 * Max timeout values:
> > +	 * SKL-GLK: 1.6ms
> > +	 * CNL: 3.2ms
> > +	 * ICL+: 4ms
> > +	 */
> >  	ret = DP_AUX_CH_CTL_SEND_BUSY |
> >  	      DP_AUX_CH_CTL_DONE |
> >  	      DP_AUX_CH_CTL_INTERRUPT |
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > index 19ba7c7cbaab..c0e25c75c105 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > @@ -82,6 +82,18 @@ static void intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> >  
> >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp)
> >  {
> > +	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > +
> > +	if (intel_dp_is_edp(intel_dp))
> > +		return false;
> > +
> > +	/*
> > +	 * Detecting LTTPRs must be avoided on platforms with an AUX timeout
> > +	 * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1).
> > +	 */
> > +	if (INTEL_GEN(i915) < 10)
> > +		return false;
> > +
> >  	if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> >  					  intel_dp->lttpr_common_caps) < 0) {
> >  		memset(intel_dp->lttpr_common_caps, 0,
> > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp *intel_dp)
> >  	bool ret;
> >  	int i;
> >  
> > -	if (intel_dp_is_edp(intel_dp))
> > -		return 0;
> > -
> >  	ret = intel_dp_read_lttpr_common_caps(intel_dp);
> >  	if (!ret)
> >  		return 0;
> > -- 
> > 2.25.1
> 
> -- 
> Ville Syrjälä
> Intel
Imre Deak March 18, 2021, 6:06 p.m. UTC | #3
On Thu, Mar 18, 2021 at 07:49:13PM +0200, Imre Deak wrote:
> On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> > On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > > The spec requires to use at least 3.2ms for the AUX timeout period if
> > > there are LT-tunable PHY Repeaters on the link (2.11.2). An upcoming
> > > spec update makes this more specific, by requiring a 3.2ms minimum
> > > timeout period for the LTTPR detection reading the 0xF0000-0xF0007
> > > range (3.6.5.1).
> > 
> > I'm pondering if we could reduce the timeout after having determined
> > wherther LTTPRs are present or not? But maybe that wouldn't really speed
> > up anything since we can't reduce the timeout until after detecting
> > *something*. And once there is something there we shouldn't really get
> > any more timeouts I guess. So probably a totally stupid idea.
> 
> Right, if something is connected it would take anyway as much time as it
> takes for the sink to reply whether or not we decreased the timeout.
> 
> However if nothing is connected, we have the excessive timeout Khaled
> already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve that
> we could scale the total number of retries by making it
> total_timeout/platform_specific_timeout (letting total_timeout=2sec for
> instance) or just changing the drm retry logic to be time based instead
> of the number of retries we use atm. 

Doh, reducing simply the HW timeouts would be enough to fix this.

> > Anyways, this seems about the only thing we can do given the limited
> > hw capabilities.
> > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > > Accordingly disable LTTPR detection until GLK, where the maximum timeout
> > > we can set is only 1.6ms.
> > > 
> > > Link training in the non-transparent mode is known to fail at least on
> > > some SKL systems with a WD19 dock on the link, which exposes an LTTPR
> > > (see the References below). While this could have different reasons
> > > besides the too short AUX timeout used, not detecting LTTPRs (and so not
> > > using the non-transparent LT mode) fixes link training on these systems.
> > > 
> > > While at it add a code comment about the platform specific maximum
> > > timeout values.
> > > 
> > > v2: Add a comment about the g4x maximum timeout as well. (Ville)
> > > 
> > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > Reported-and-tested-by: Santiago Zarate <santiago.zarate@suse.com>
> > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > References: https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent mode link training")
> > > Cc: <stable@vger.kernel.org> # v5.11
> > > Cc: Takashi Iwai <tiwai@suse.de>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++---
> > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > index eaebf123310a..10fe17b7280d 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
> > >  	else
> > >  		precharge = 5;
> > >  
> > > +	/* Max timeout value on G4x-BDW: 1.6ms */
> > >  	if (IS_BROADWELL(dev_priv))
> > >  		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > >  	else
> > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
> > >  	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
> > >  	u32 ret;
> > >  
> > > +	/*
> > > +	 * Max timeout values:
> > > +	 * SKL-GLK: 1.6ms
> > > +	 * CNL: 3.2ms
> > > +	 * ICL+: 4ms
> > > +	 */
> > >  	ret = DP_AUX_CH_CTL_SEND_BUSY |
> > >  	      DP_AUX_CH_CTL_DONE |
> > >  	      DP_AUX_CH_CTL_INTERRUPT |
> > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > @@ -82,6 +82,18 @@ static void intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > >  
> > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp)
> > >  {
> > > +	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > +
> > > +	if (intel_dp_is_edp(intel_dp))
> > > +		return false;
> > > +
> > > +	/*
> > > +	 * Detecting LTTPRs must be avoided on platforms with an AUX timeout
> > > +	 * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1).
> > > +	 */
> > > +	if (INTEL_GEN(i915) < 10)
> > > +		return false;
> > > +
> > >  	if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > >  					  intel_dp->lttpr_common_caps) < 0) {
> > >  		memset(intel_dp->lttpr_common_caps, 0,
> > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp *intel_dp)
> > >  	bool ret;
> > >  	int i;
> > >  
> > > -	if (intel_dp_is_edp(intel_dp))
> > > -		return 0;
> > > -
> > >  	ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > >  	if (!ret)
> > >  		return 0;
> > > -- 
> > > 2.25.1
> > 
> > -- 
> > Ville Syrjälä
> > Intel
Almahallawy, Khaled March 18, 2021, 10:04 p.m. UTC | #4
On Thu, 2021-03-18 at 20:06 +0200, Imre Deak wrote:
> On Thu, Mar 18, 2021 at 07:49:13PM +0200, Imre Deak wrote:
> > On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> > > On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > > > The spec requires to use at least 3.2ms for the AUX timeout
> > > > period if
> > > > there are LT-tunable PHY Repeaters on the link (2.11.2). An
> > > > upcoming
> > > > spec update makes this more specific, by requiring a 3.2ms
> > > > minimum
> > > > timeout period for the LTTPR detection reading the 0xF0000-
> > > > 0xF0007
> > > > range (3.6.5.1).
> > > 
> > > I'm pondering if we could reduce the timeout after having
> > > determined
> > > wherther LTTPRs are present or not? But maybe that wouldn't
> > > really speed
> > > up anything since we can't reduce the timeout until after
> > > detecting
> > > *something*. And once there is something there we shouldn't
> > > really get
> > > any more timeouts I guess. So probably a totally stupid idea.
> > 
> > Right, if something is connected it would take anyway as much time
> > as it
> > takes for the sink to reply whether or not we decreased the
> > timeout.
> > 
> > However if nothing is connected, we have the excessive timeout
> > Khaled
> > already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve
> > that
> > we could scale the total number of retries by making it
> > total_timeout/platform_specific_timeout (letting total_timeout=2sec
> > for
> > instance) or just changing the drm retry logic to be time based
> > instead
> > of the number of retries we use atm. 
> 
> Doh, reducing simply the HW timeouts would be enough to fix this.

What about Lyude's suggestion ( 
https://patchwork.freedesktop.org/patch/420369/#comment_756572) 
to drop the retries in intel_dp_aux_xfer()
		/* Must try at least 3 times according to DP spec */
		for (try = 0; try < 5; try++) {
 
 
And use only the retries in drm_dpcd_access?

Thanks
Khaled

> 
> > > Anyways, this seems about the only thing we can do given the
> > > limited
> > > hw capabilities.
> > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > > Accordingly disable LTTPR detection until GLK, where the
> > > > maximum timeout
> > > > we can set is only 1.6ms.
> > > > 
> > > > Link training in the non-transparent mode is known to fail at
> > > > least on
> > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > LTTPR
> > > > (see the References below). While this could have different
> > > > reasons
> > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > (and so not
> > > > using the non-transparent LT mode) fixes link training on these
> > > > systems.
> > > > 
> > > > While at it add a code comment about the platform specific
> > > > maximum
> > > > timeout values.
> > > > 
> > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > (Ville)
> > > > 
> > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > Reported-and-tested-by: Santiago Zarate <
> > > > santiago.zarate@suse.com>
> > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > References: 
> > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent 
> > > > mode link training")
> > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > ++++++++++++---
> > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > index eaebf123310a..10fe17b7280d 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > intel_dp *intel_dp,
> > > >  	else
> > > >  		precharge = 5;
> > > >  
> > > > +	/* Max timeout value on G4x-BDW: 1.6ms */
> > > >  	if (IS_BROADWELL(dev_priv))
> > > >  		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > >  	else
> > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > intel_dp *intel_dp,
> > > >  	enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > >base.port);
> > > >  	u32 ret;
> > > >  
> > > > +	/*
> > > > +	 * Max timeout values:
> > > > +	 * SKL-GLK: 1.6ms
> > > > +	 * CNL: 3.2ms
> > > > +	 * ICL+: 4ms
> > > > +	 */
> > > >  	ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > >  	      DP_AUX_CH_CTL_DONE |
> > > >  	      DP_AUX_CH_CTL_INTERRUPT |
> > > > diff --git
> > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > @@ -82,6 +82,18 @@ static void
> > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > >  
> > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > *intel_dp)
> > > >  {
> > > > +	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > +
> > > > +	if (intel_dp_is_edp(intel_dp))
> > > > +		return false;
> > > > +
> > > > +	/*
> > > > +	 * Detecting LTTPRs must be avoided on platforms with
> > > > an AUX timeout
> > > > +	 * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > 3.6.6.1).
> > > > +	 */
> > > > +	if (INTEL_GEN(i915) < 10)
> > > > +		return false;
> > > > +
> > > >  	if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > >  					  intel_dp-
> > > > >lttpr_common_caps) < 0) {
> > > >  		memset(intel_dp->lttpr_common_caps, 0,
> > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > *intel_dp)
> > > >  	bool ret;
> > > >  	int i;
> > > >  
> > > > -	if (intel_dp_is_edp(intel_dp))
> > > > -		return 0;
> > > > -
> > > >  	ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > >  	if (!ret)
> > > >  		return 0;
> > > > -- 
> > > > 2.25.1
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel
Imre Deak March 18, 2021, 11:17 p.m. UTC | #5
On Fri, Mar 19, 2021 at 12:04:54AM +0200, Almahallawy, Khaled wrote:
> On Thu, 2021-03-18 at 20:06 +0200, Imre Deak wrote:
> > On Thu, Mar 18, 2021 at 07:49:13PM +0200, Imre Deak wrote:
> > > On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> > > > On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > > > > The spec requires to use at least 3.2ms for the AUX timeout
> > > > > period if
> > > > > there are LT-tunable PHY Repeaters on the link (2.11.2). An
> > > > > upcoming
> > > > > spec update makes this more specific, by requiring a 3.2ms
> > > > > minimum
> > > > > timeout period for the LTTPR detection reading the 0xF0000-
> > > > > 0xF0007
> > > > > range (3.6.5.1).
> > > >
> > > > I'm pondering if we could reduce the timeout after having
> > > > determined
> > > > wherther LTTPRs are present or not? But maybe that wouldn't
> > > > really speed
> > > > up anything since we can't reduce the timeout until after
> > > > detecting
> > > > *something*. And once there is something there we shouldn't
> > > > really get
> > > > any more timeouts I guess. So probably a totally stupid idea.
> > >
> > > Right, if something is connected it would take anyway as much time
> > > as it
> > > takes for the sink to reply whether or not we decreased the
> > > timeout.
> > >
> > > However if nothing is connected, we have the excessive timeout
> > > Khaled
> > > already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve
> > > that
> > > we could scale the total number of retries by making it
> > > total_timeout/platform_specific_timeout (letting total_timeout=2sec
> > > for
> > > instance) or just changing the drm retry logic to be time based
> > > instead
> > > of the number of retries we use atm.
> >
> > Doh, reducing simply the HW timeouts would be enough to fix this.
> 
> What about Lyude's suggestion (
> https://patchwork.freedesktop.org/patch/420369/#comment_756572)
> to drop the retries in intel_dp_aux_xfer()
> /* Must try at least 3 times according to DP spec */
> for (try = 0; try < 5; try++) {
> 
> And use only the retries in drm_dpcd_access?

I think it would work if we can make the retries configurable and set it
to
	retries = total_timeout / platform_specific_timeout_per_retry

where total_timeout would be something reasonable like 1 sec.

> 
> Thanks
> Khaled
> 
> >
> > > > Anyways, this seems about the only thing we can do given the
> > > > limited
> > > > hw capabilities.
> > > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > >
> > > > > Accordingly disable LTTPR detection until GLK, where the
> > > > > maximum timeout
> > > > > we can set is only 1.6ms.
> > > > >
> > > > > Link training in the non-transparent mode is known to fail at
> > > > > least on
> > > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > > LTTPR
> > > > > (see the References below). While this could have different
> > > > > reasons
> > > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > > (and so not
> > > > > using the non-transparent LT mode) fixes link training on these
> > > > > systems.
> > > > >
> > > > > While at it add a code comment about the platform specific
> > > > > maximum
> > > > > timeout values.
> > > > >
> > > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > > (Ville)
> > > > >
> > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > santiago.zarate@suse.com>
> > > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > > References:
> > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent
> > > > > mode link training")
> > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > > ++++++++++++---
> > > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > > intel_dp *intel_dp,
> > > > >  else
> > > > >  precharge = 5;
> > > > >
> > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > >  if (IS_BROADWELL(dev_priv))
> > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > >  else
> > > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > > intel_dp *intel_dp,
> > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > >base.port);
> > > > >  u32 ret;
> > > > >
> > > > > +/*
> > > > > + * Max timeout values:
> > > > > + * SKL-GLK: 1.6ms
> > > > > + * CNL: 3.2ms
> > > > > + * ICL+: 4ms
> > > > > + */
> > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > >        DP_AUX_CH_CTL_DONE |
> > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > diff --git
> > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > @@ -82,6 +82,18 @@ static void
> > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > > >
> > > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > *intel_dp)
> > > > >  {
> > > > > +struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > > +
> > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > +return false;
> > > > > +
> > > > > +/*
> > > > > + * Detecting LTTPRs must be avoided on platforms with
> > > > > an AUX timeout
> > > > > + * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > > 3.6.6.1).
> > > > > + */
> > > > > +if (INTEL_GEN(i915) < 10)
> > > > > +return false;
> > > > > +
> > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > > >    intel_dp-
> > > > > >lttpr_common_caps) < 0) {
> > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > > *intel_dp)
> > > > >  bool ret;
> > > > >  int i;
> > > > >
> > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > -return 0;
> > > > > -
> > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > >  if (!ret)
> > > > >  return 0;
> > > > > --
> > > > > 2.25.1
> > > >
> > > > --
> > > > Ville Syrjälä
> > > > Intel
Lyude Paul March 19, 2021, 5:25 p.m. UTC | #6
On Fri, 2021-03-19 at 01:17 +0200, Imre Deak wrote:
> On Fri, Mar 19, 2021 at 12:04:54AM +0200, Almahallawy, Khaled wrote:
> > On Thu, 2021-03-18 at 20:06 +0200, Imre Deak wrote:
> > > On Thu, Mar 18, 2021 at 07:49:13PM +0200, Imre Deak wrote:
> > > > On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> > > > > On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > > > > > The spec requires to use at least 3.2ms for the AUX timeout
> > > > > > period if
> > > > > > there are LT-tunable PHY Repeaters on the link (2.11.2). An
> > > > > > upcoming
> > > > > > spec update makes this more specific, by requiring a 3.2ms
> > > > > > minimum
> > > > > > timeout period for the LTTPR detection reading the 0xF0000-
> > > > > > 0xF0007
> > > > > > range (3.6.5.1).
> > > > > 
> > > > > I'm pondering if we could reduce the timeout after having
> > > > > determined
> > > > > wherther LTTPRs are present or not? But maybe that wouldn't
> > > > > really speed
> > > > > up anything since we can't reduce the timeout until after
> > > > > detecting
> > > > > *something*. And once there is something there we shouldn't
> > > > > really get
> > > > > any more timeouts I guess. So probably a totally stupid idea.
> > > > 
> > > > Right, if something is connected it would take anyway as much time
> > > > as it
> > > > takes for the sink to reply whether or not we decreased the
> > > > timeout.
> > > > 
> > > > However if nothing is connected, we have the excessive timeout
> > > > Khaled
> > > > already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve
> > > > that
> > > > we could scale the total number of retries by making it
> > > > total_timeout/platform_specific_timeout (letting total_timeout=2sec
> > > > for
> > > > instance) or just changing the drm retry logic to be time based
> > > > instead
> > > > of the number of retries we use atm.
> > > 
> > > Doh, reducing simply the HW timeouts would be enough to fix this.
> > 
> > What about Lyude's suggestion (
> > https://patchwork.freedesktop.org/patch/420369/#comment_756572)
> > to drop the retries in intel_dp_aux_xfer()
> > /* Must try at least 3 times according to DP spec */
> > for (try = 0; try < 5; try++) {
> > 
> > And use only the retries in drm_dpcd_access?
> 
> I think it would work if we can make the retries configurable and set it
> to
>         retries = total_timeout / platform_specific_timeout_per_retry
> 
> where total_timeout would be something reasonable like 1 sec.

I actually think I'm more open to the idea of configurable retries after
learning that apparently this is a thing that the i2c subsystem does - so
there's more precedence for it in the rest of the kernel than I originally
thought.

I'm still curious if we need these extra retries in here though - there seems to
be one set of retries that is actually platform specific, and then just a random
set of 5 retries that don't seem to have anything to do with platform specific
behavior - so I think it'd still be worth giving a shot at getting rid of that

> 
> > 
> > Thanks
> > Khaled
> > 
> > > 
> > > > > Anyways, this seems about the only thing we can do given the
> > > > > limited
> > > > > hw capabilities.
> > > > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > 
> > > > > > Accordingly disable LTTPR detection until GLK, where the
> > > > > > maximum timeout
> > > > > > we can set is only 1.6ms.
> > > > > > 
> > > > > > Link training in the non-transparent mode is known to fail at
> > > > > > least on
> > > > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > > > LTTPR
> > > > > > (see the References below). While this could have different
> > > > > > reasons
> > > > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > > > (and so not
> > > > > > using the non-transparent LT mode) fixes link training on these
> > > > > > systems.
> > > > > > 
> > > > > > While at it add a code comment about the platform specific
> > > > > > maximum
> > > > > > timeout values.
> > > > > > 
> > > > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > > > (Ville)
> > > > > > 
> > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > santiago.zarate@suse.com>
> > > > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > > > References:
> > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent
> > > > > > mode link training")
> > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > > > ++++++++++++---
> > > > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > > > intel_dp *intel_dp,
> > > > > >  else
> > > > > >  precharge = 5;
> > > > > > 
> > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > >  else
> > > > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > > > intel_dp *intel_dp,
> > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > base.port);
> > > > > >  u32 ret;
> > > > > > 
> > > > > > +/*
> > > > > > + * Max timeout values:
> > > > > > + * SKL-GLK: 1.6ms
> > > > > > + * CNL: 3.2ms
> > > > > > + * ICL+: 4ms
> > > > > > + */
> > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > diff --git
> > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > > > > 
> > > > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > *intel_dp)
> > > > > >  {
> > > > > > +struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > > > +
> > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > +return false;
> > > > > > +
> > > > > > +/*
> > > > > > + * Detecting LTTPRs must be avoided on platforms with
> > > > > > an AUX timeout
> > > > > > + * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > > > 3.6.6.1).
> > > > > > + */
> > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > +return false;
> > > > > > +
> > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > > > >    intel_dp-
> > > > > > > lttpr_common_caps) < 0) {
> > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > > > *intel_dp)
> > > > > >  bool ret;
> > > > > >  int i;
> > > > > > 
> > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > -return 0;
> > > > > > -
> > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > >  if (!ret)
> > > > > >  return 0;
> > > > > > --
> > > > > > 2.25.1
> > > > > 
> > > > > --
> > > > > Ville Syrjälä
> > > > > Intel
>
Imre Deak March 19, 2021, 5:29 p.m. UTC | #7
On Fri, Mar 19, 2021 at 01:25:08PM -0400, Lyude Paul wrote:
> On Fri, 2021-03-19 at 01:17 +0200, Imre Deak wrote:
> > On Fri, Mar 19, 2021 at 12:04:54AM +0200, Almahallawy, Khaled wrote:
> > > On Thu, 2021-03-18 at 20:06 +0200, Imre Deak wrote:
> > > > On Thu, Mar 18, 2021 at 07:49:13PM +0200, Imre Deak wrote:
> > > > > On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> > > > > > On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > > > > > > The spec requires to use at least 3.2ms for the AUX timeout
> > > > > > > period if
> > > > > > > there are LT-tunable PHY Repeaters on the link (2.11.2). An
> > > > > > > upcoming
> > > > > > > spec update makes this more specific, by requiring a 3.2ms
> > > > > > > minimum
> > > > > > > timeout period for the LTTPR detection reading the 0xF0000-
> > > > > > > 0xF0007
> > > > > > > range (3.6.5.1).
> > > > > > 
> > > > > > I'm pondering if we could reduce the timeout after having
> > > > > > determined
> > > > > > wherther LTTPRs are present or not? But maybe that wouldn't
> > > > > > really speed
> > > > > > up anything since we can't reduce the timeout until after
> > > > > > detecting
> > > > > > *something*. And once there is something there we shouldn't
> > > > > > really get
> > > > > > any more timeouts I guess. So probably a totally stupid idea.
> > > > > 
> > > > > Right, if something is connected it would take anyway as much time
> > > > > as it
> > > > > takes for the sink to reply whether or not we decreased the
> > > > > timeout.
> > > > > 
> > > > > However if nothing is connected, we have the excessive timeout
> > > > > Khaled
> > > > > already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve
> > > > > that
> > > > > we could scale the total number of retries by making it
> > > > > total_timeout/platform_specific_timeout (letting total_timeout=2sec
> > > > > for
> > > > > instance) or just changing the drm retry logic to be time based
> > > > > instead
> > > > > of the number of retries we use atm.
> > > > 
> > > > Doh, reducing simply the HW timeouts would be enough to fix this.
> > > 
> > > What about Lyude's suggestion (
> > > https://patchwork.freedesktop.org/patch/420369/#comment_756572)
> > > to drop the retries in intel_dp_aux_xfer()
> > > /* Must try at least 3 times according to DP spec */
> > > for (try = 0; try < 5; try++) {
> > > 
> > > And use only the retries in drm_dpcd_access?
> > 
> > I think it would work if we can make the retries configurable and set it
> > to
> >         retries = total_timeout / platform_specific_timeout_per_retry
> > 
> > where total_timeout would be something reasonable like 1 sec.
> 
> I actually think I'm more open to the idea of configurable retries after
> learning that apparently this is a thing that the i2c subsystem does - so
> there's more precedence for it in the rest of the kernel than I originally
> thought.
> 
> I'm still curious if we need these extra retries in here though - there seems to
> be one set of retries that is actually platform specific, and then just a random
> set of 5 retries that don't seem to have anything to do with platform specific
> behavior - so I think it'd still be worth giving a shot at getting rid of that

The platform specific part of the timeout is the one desctibed in the
maximum timeout values comments.

> > > Thanks
> > > Khaled
> > > 
> > > > 
> > > > > > Anyways, this seems about the only thing we can do given the
> > > > > > limited
> > > > > > hw capabilities.
> > > > > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > 
> > > > > > > Accordingly disable LTTPR detection until GLK, where the
> > > > > > > maximum timeout
> > > > > > > we can set is only 1.6ms.
> > > > > > > 
> > > > > > > Link training in the non-transparent mode is known to fail at
> > > > > > > least on
> > > > > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > > > > LTTPR
> > > > > > > (see the References below). While this could have different
> > > > > > > reasons
> > > > > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > > > > (and so not
> > > > > > > using the non-transparent LT mode) fixes link training on these
> > > > > > > systems.
> > > > > > > 
> > > > > > > While at it add a code comment about the platform specific
> > > > > > > maximum
> > > > > > > timeout values.
> > > > > > > 
> > > > > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > > > > (Ville)
> > > > > > > 
> > > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > > santiago.zarate@suse.com>
> > > > > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > > > > References:
> > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent
> > > > > > > mode link training")
> > > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > > > > ++++++++++++---
> > > > > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > > > > intel_dp *intel_dp,
> > > > > > >  else
> > > > > > >  precharge = 5;
> > > > > > > 
> > > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > > >  else
> > > > > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > > > > intel_dp *intel_dp,
> > > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > > base.port);
> > > > > > >  u32 ret;
> > > > > > > 
> > > > > > > +/*
> > > > > > > + * Max timeout values:
> > > > > > > + * SKL-GLK: 1.6ms
> > > > > > > + * CNL: 3.2ms
> > > > > > > + * ICL+: 4ms
> > > > > > > + */
> > > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > > diff --git
> > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > > > > > 
> > > > > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > > *intel_dp)
> > > > > > >  {
> > > > > > > +struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > > > > +
> > > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > > +return false;
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * Detecting LTTPRs must be avoided on platforms with
> > > > > > > an AUX timeout
> > > > > > > + * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > > > > 3.6.6.1).
> > > > > > > + */
> > > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > > +return false;
> > > > > > > +
> > > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > > > > >    intel_dp-
> > > > > > > > lttpr_common_caps) < 0) {
> > > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > > > > *intel_dp)
> > > > > > >  bool ret;
> > > > > > >  int i;
> > > > > > > 
> > > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > > -return 0;
> > > > > > > -
> > > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > > >  if (!ret)
> > > > > > >  return 0;
> > > > > > > --
> > > > > > > 2.25.1
> > > > > > 
> > > > > > --
> > > > > > Ville Syrjälä
> > > > > > Intel
> > 
> 
> -- 
> Sincerely,
>    Lyude Paul (she/her)
>    Software Engineer at Red Hat
>    
> Note: I deal with a lot of emails and have a lot of bugs on my plate. If you've
> asked me a question, are waiting for a review/merge on a patch, etc. and I
> haven't responded in a while, please feel free to send me another email to check
> on my status. I don't bite!
>
Lyude Paul March 19, 2021, 8:44 p.m. UTC | #8
On Fri, 2021-03-19 at 19:29 +0200, Imre Deak wrote:
> On Fri, Mar 19, 2021 at 01:25:08PM -0400, Lyude Paul wrote:
> > On Fri, 2021-03-19 at 01:17 +0200, Imre Deak wrote:
> > > On Fri, Mar 19, 2021 at 12:04:54AM +0200, Almahallawy, Khaled wrote:
> > > > On Thu, 2021-03-18 at 20:06 +0200, Imre Deak wrote:
> > > > > On Thu, Mar 18, 2021 at 07:49:13PM +0200, Imre Deak wrote:
> > > > > > On Thu, Mar 18, 2021 at 07:33:20PM +0200, Ville Syrjälä wrote:
> > > > > > > On Wed, Mar 17, 2021 at 08:48:59PM +0200, Imre Deak wrote:
> > > > > > > > The spec requires to use at least 3.2ms for the AUX timeout
> > > > > > > > period if
> > > > > > > > there are LT-tunable PHY Repeaters on the link (2.11.2). An
> > > > > > > > upcoming
> > > > > > > > spec update makes this more specific, by requiring a 3.2ms
> > > > > > > > minimum
> > > > > > > > timeout period for the LTTPR detection reading the 0xF0000-
> > > > > > > > 0xF0007
> > > > > > > > range (3.6.5.1).
> > > > > > > 
> > > > > > > I'm pondering if we could reduce the timeout after having
> > > > > > > determined
> > > > > > > wherther LTTPRs are present or not? But maybe that wouldn't
> > > > > > > really speed
> > > > > > > up anything since we can't reduce the timeout until after
> > > > > > > detecting
> > > > > > > *something*. And once there is something there we shouldn't
> > > > > > > really get
> > > > > > > any more timeouts I guess. So probably a totally stupid idea.
> > > > > > 
> > > > > > Right, if something is connected it would take anyway as much time
> > > > > > as it
> > > > > > takes for the sink to reply whether or not we decreased the
> > > > > > timeout.
> > > > > > 
> > > > > > However if nothing is connected, we have the excessive timeout
> > > > > > Khaled
> > > > > > already noticed (160 * 4ms = 6.4 sec on ICL+). I think to improve
> > > > > > that
> > > > > > we could scale the total number of retries by making it
> > > > > > total_timeout/platform_specific_timeout (letting total_timeout=2sec
> > > > > > for
> > > > > > instance) or just changing the drm retry logic to be time based
> > > > > > instead
> > > > > > of the number of retries we use atm.
> > > > > 
> > > > > Doh, reducing simply the HW timeouts would be enough to fix this.
> > > > 
> > > > What about Lyude's suggestion (
> > > > https://patchwork.freedesktop.org/patch/420369/#comment_756572)
> > > > to drop the retries in intel_dp_aux_xfer()
> > > > /* Must try at least 3 times according to DP spec */
> > > > for (try = 0; try < 5; try++) {
> > > > 
> > > > And use only the retries in drm_dpcd_access?
> > > 
> > > I think it would work if we can make the retries configurable and set it
> > > to
> > >         retries = total_timeout / platform_specific_timeout_per_retry
> > > 
> > > where total_timeout would be something reasonable like 1 sec.
> > 
> > I actually think I'm more open to the idea of configurable retries after
> > learning that apparently this is a thing that the i2c subsystem does - so
> > there's more precedence for it in the rest of the kernel than I originally
> > thought.
> > 
> > I'm still curious if we need these extra retries in here though - there seems
> > to
> > be one set of retries that is actually platform specific, and then just a
> > random
> > set of 5 retries that don't seem to have anything to do with platform specific
> > behavior - so I think it'd still be worth giving a shot at getting rid of that
> 
> The platform specific part of the timeout is the one desctibed in the
> maximum timeout values comments.

You mean the

		/* Must try at least 3 times according to DP spec */
		for (try = 0; try < 5; try++) {

bit? I thought that wasn't related to platform specific retries at all, since
the code in that loop seems to only reference parts of the DP spec, and that the

	while ((aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, clock++))) {

Loop was the portion that was platform specific, since it prompts the driver to
retry the transaction with different aux clock divider rates depending on the
platform in use. Feel free to correct me if I'm wrong though.

Also - with the timeouts we're seeing, does the LTTPR return NAKs at all? That's
still another thing I had suggested alternate workarounds for so that we could
terminate transactions immediately on NAKs, so I wonder if that could save time
here as well. 

> 
> > > > Thanks
> > > > Khaled
> > > > 
> > > > > 
> > > > > > > Anyways, this seems about the only thing we can do given the
> > > > > > > limited
> > > > > > > hw capabilities.
> > > > > > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > 
> > > > > > > > Accordingly disable LTTPR detection until GLK, where the
> > > > > > > > maximum timeout
> > > > > > > > we can set is only 1.6ms.
> > > > > > > > 
> > > > > > > > Link training in the non-transparent mode is known to fail at
> > > > > > > > least on
> > > > > > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > > > > > LTTPR
> > > > > > > > (see the References below). While this could have different
> > > > > > > > reasons
> > > > > > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > > > > > (and so not
> > > > > > > > using the non-transparent LT mode) fixes link training on these
> > > > > > > > systems.
> > > > > > > > 
> > > > > > > > While at it add a code comment about the platform specific
> > > > > > > > maximum
> > > > > > > > timeout values.
> > > > > > > > 
> > > > > > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > > > > > (Ville)
> > > > > > > > 
> > > > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > > > santiago.zarate@suse.com>
> > > > > > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > > > > > References:
> > > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent
> > > > > > > > mode link training")
> > > > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > > > ---
> > > > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > > > > > ++++++++++++---
> > > > > > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > > > > > intel_dp *intel_dp,
> > > > > > > >  else
> > > > > > > >  precharge = 5;
> > > > > > > > 
> > > > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > > > >  else
> > > > > > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > > > > > intel_dp *intel_dp,
> > > > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > > > base.port);
> > > > > > > >  u32 ret;
> > > > > > > > 
> > > > > > > > +/*
> > > > > > > > + * Max timeout values:
> > > > > > > > + * SKL-GLK: 1.6ms
> > > > > > > > + * CNL: 3.2ms
> > > > > > > > + * ICL+: 4ms
> > > > > > > > + */
> > > > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > > > diff --git
> > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > > > > > > 
> > > > > > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > > > *intel_dp)
> > > > > > > >  {
> > > > > > > > +struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > > > > > +
> > > > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > > > +return false;
> > > > > > > > +
> > > > > > > > +/*
> > > > > > > > + * Detecting LTTPRs must be avoided on platforms with
> > > > > > > > an AUX timeout
> > > > > > > > + * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > > > > > 3.6.6.1).
> > > > > > > > + */
> > > > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > > > +return false;
> > > > > > > > +
> > > > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > > > > > >    intel_dp-
> > > > > > > > > lttpr_common_caps) < 0) {
> > > > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > > > > > *intel_dp)
> > > > > > > >  bool ret;
> > > > > > > >  int i;
> > > > > > > > 
> > > > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > > > -return 0;
> > > > > > > > -
> > > > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > > > >  if (!ret)
> > > > > > > >  return 0;
> > > > > > > > --
> > > > > > > > 2.25.1
> > > > > > > 
> > > > > > > --
> > > > > > > Ville Syrjälä
> > > > > > > Intel
> > > 
> > 
> > -- 
> > Sincerely,
> >    Lyude Paul (she/her)
> >    Software Engineer at Red Hat
> >    
> > Note: I deal with a lot of emails and have a lot of bugs on my plate. If
> > you've
> > asked me a question, are waiting for a review/merge on a patch, etc. and I
> > haven't responded in a while, please feel free to send me another email to
> > check
> > on my status. I don't bite!
> > 
>
Imre Deak March 19, 2021, 9:07 p.m. UTC | #9
On Fri, Mar 19, 2021 at 04:44:26PM -0400, Lyude Paul wrote:
> > > > [...]
> > > > I think it would work if we can make the retries configurable and set it
> > > > to
> > > >         retries = total_timeout / platform_specific_timeout_per_retry
> > > > 
> > > > where total_timeout would be something reasonable like 1 sec.
> > > 
> > > I actually think I'm more open to the idea of configurable retries after
> > > learning that apparently this is a thing that the i2c subsystem does - so
> > > there's more precedence for it in the rest of the kernel than I originally
> > > thought.
> > > 
> > > I'm still curious if we need these extra retries in here though - there seems
> > > to
> > > be one set of retries that is actually platform specific, and then just a
> > > random
> > > set of 5 retries that don't seem to have anything to do with platform specific
> > > behavior - so I think it'd still be worth giving a shot at getting rid of that
> > 
> > The platform specific part of the timeout is the one desctibed in the
> > maximum timeout values comments.
> 
> You mean the
> 
> 		/* Must try at least 3 times according to DP spec */
> 		for (try = 0; try < 5; try++) {
> 
> bit? I thought that wasn't related to platform specific retries at all, since
> the code in that loop seems to only reference parts of the DP spec, and that the
> 
> 	while ((aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, clock++))) {
> 
> Loop was the portion that was platform specific, since it prompts the driver to
> retry the transaction with different aux clock divider rates depending on the
> platform in use. Feel free to correct me if I'm wrong though.

Nope. I meant every HW transaction will have a platform specific
timeout. For instance it's 1.6ms on SKL, but 4ms on ICL. So now since
the overall retry count is 32 * 5 = 160, on SKL we'll retry for ~2.6
seconds, on ICL we'll retry for ~6.4 seconds (disregarding now the extra
400usec delay inserted by drm_dp_dpcd_access(), which adds a fixed
~1.3ms delay).

This is what I think should be normalized, so that we have the same
amount of overall maximum timeout period on all platforms.

> Also - with the timeouts we're seeing, does the LTTPR return NAKs at all? That's
> still another thing I had suggested alternate workarounds for so that we could
> terminate transactions immediately on NAKs, so I wonder if that could save time
> here as well.

There's not much LTTPR specific in that wrt. what sinks would do
normally (no NAKs for read, only for writes) except LTTPRs may rewrite
NAKs to ACKs to account for buggy monitors returning NAKs when reading
the 0xf0000 -> range. But I'd suggest not dealing with this aspect now,
just sanitize the above retry thing, as you suggested, remove the i915
retry loop and make the drm retry loop configurable.

(In any case I also had the idea to stop transactions early when HPD
 gets deasserted, but not sure if that's completely robust.)

> > > > > Thanks
> > > > > Khaled
> > > > > 
> > > > > > 
> > > > > > > > Anyways, this seems about the only thing we can do given the
> > > > > > > > limited
> > > > > > > > hw capabilities.
> > > > > > > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > 
> > > > > > > > > Accordingly disable LTTPR detection until GLK, where the
> > > > > > > > > maximum timeout
> > > > > > > > > we can set is only 1.6ms.
> > > > > > > > > 
> > > > > > > > > Link training in the non-transparent mode is known to fail at
> > > > > > > > > least on
> > > > > > > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > > > > > > LTTPR
> > > > > > > > > (see the References below). While this could have different
> > > > > > > > > reasons
> > > > > > > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > > > > > > (and so not
> > > > > > > > > using the non-transparent LT mode) fixes link training on these
> > > > > > > > > systems.
> > > > > > > > > 
> > > > > > > > > While at it add a code comment about the platform specific
> > > > > > > > > maximum
> > > > > > > > > timeout values.
> > > > > > > > > 
> > > > > > > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > > > > > > (Ville)
> > > > > > > > > 
> > > > > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > > > > santiago.zarate@suse.com>
> > > > > > > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > > > > > > References:
> > > > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent
> > > > > > > > > mode link training")
> > > > > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > > > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > > > > > > ++++++++++++---
> > > > > > > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > > > > > > intel_dp *intel_dp,
> > > > > > > > >  else
> > > > > > > > >  precharge = 5;
> > > > > > > > > 
> > > > > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > > > > >  else
> > > > > > > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > > > > > > intel_dp *intel_dp,
> > > > > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > > > > base.port);
> > > > > > > > >  u32 ret;
> > > > > > > > > 
> > > > > > > > > +/*
> > > > > > > > > + * Max timeout values:
> > > > > > > > > + * SKL-GLK: 1.6ms
> > > > > > > > > + * CNL: 3.2ms
> > > > > > > > > + * ICL+: 4ms
> > > > > > > > > + */
> > > > > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > > > > diff --git
> > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > > > > > > > 
> > > > > > > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > > > > *intel_dp)
> > > > > > > > >  {
> > > > > > > > > +struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > > > > > > +
> > > > > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > > > > +return false;
> > > > > > > > > +
> > > > > > > > > +/*
> > > > > > > > > + * Detecting LTTPRs must be avoided on platforms with
> > > > > > > > > an AUX timeout
> > > > > > > > > + * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > > > > > > 3.6.6.1).
> > > > > > > > > + */
> > > > > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > > > > +return false;
> > > > > > > > > +
> > > > > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > > > > > > >    intel_dp-
> > > > > > > > > > lttpr_common_caps) < 0) {
> > > > > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > > > > > > *intel_dp)
> > > > > > > > >  bool ret;
> > > > > > > > >  int i;
> > > > > > > > > 
> > > > > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > > > > -return 0;
> > > > > > > > > -
> > > > > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > > > > >  if (!ret)
> > > > > > > > >  return 0;
> > > > > > > > > --
> > > > > > > > > 2.25.1
> > > > > > > > 
> > > > > > > > --
> > > > > > > > Ville Syrjälä
> > > > > > > > Intel
> > > > 
> > > 
> > > -- 
> > > Sincerely,
> > >    Lyude Paul (she/her)
> > >    Software Engineer at Red Hat
> > >    
> > > Note: I deal with a lot of emails and have a lot of bugs on my plate. If
> > > you've
> > > asked me a question, are waiting for a review/merge on a patch, etc. and I
> > > haven't responded in a while, please feel free to send me another email to
> > > check
> > > on my status. I don't bite!
> > > 
> > 
> 
> -- 
> Sincerely,
>    Lyude Paul (she/her)
>    Software Engineer at Red Hat
>    
> Note: I deal with a lot of emails and have a lot of bugs on my plate. If you've
> asked me a question, are waiting for a review/merge on a patch, etc. and I
> haven't responded in a while, please feel free to send me another email to check
> on my status. I don't bite!
>
Imre Deak March 20, 2021, 7:15 a.m. UTC | #10
On Fri, Mar 19, 2021 at 11:07:21PM +0200, Imre Deak wrote:
> On Fri, Mar 19, 2021 at 04:44:26PM -0400, Lyude Paul wrote:
> > > > > [...]
> > > > > I think it would work if we can make the retries configurable and set it
> > > > > to
> > > > >         retries = total_timeout / platform_specific_timeout_per_retry
> > > > > 
> > > > > where total_timeout would be something reasonable like 1 sec.
> > > > 
> > > > I actually think I'm more open to the idea of configurable retries after
> > > > learning that apparently this is a thing that the i2c subsystem does - so
> > > > there's more precedence for it in the rest of the kernel than I originally
> > > > thought.
> > > > 
> > > > I'm still curious if we need these extra retries in here though - there seems
> > > > to
> > > > be one set of retries that is actually platform specific, and then just a
> > > > random
> > > > set of 5 retries that don't seem to have anything to do with platform specific
> > > > behavior - so I think it'd still be worth giving a shot at getting rid of that
> > > 
> > > The platform specific part of the timeout is the one desctibed in the
> > > maximum timeout values comments.
> > 
> > You mean the
> > 
> > 		/* Must try at least 3 times according to DP spec */
> > 		for (try = 0; try < 5; try++) {
> > 
> > bit? I thought that wasn't related to platform specific retries at all, since
> > the code in that loop seems to only reference parts of the DP spec, and that the
> > 
> > 	while ((aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, clock++))) {
> > 
> > Loop was the portion that was platform specific, since it prompts the driver to
> > retry the transaction with different aux clock divider rates depending on the
> > platform in use. Feel free to correct me if I'm wrong though.
> 
> Nope. I meant every HW transaction will have a platform specific
> timeout. For instance it's 1.6ms on SKL, but 4ms on ICL. So now since
> the overall retry count is 32 * 5 = 160, on SKL we'll retry for ~2.6
> seconds, on ICL we'll retry for ~6.4 seconds (disregarding now the extra
> 400usec delay inserted by drm_dp_dpcd_access(), which adds a fixed
> ~1.3ms delay).

Err, looks like I missed some coffee. Max total timeouts atm, which we
would need to make the same on all platforms:

	g4x-glk: 5 * 32 * 1.6ms + 32 * 400us = 268.8ms
	cnl    : 5 * 32 * 3.2ms + 32 * 400us = 524.8ms
	icl+   : 5 * 32 * 4ms   + 32 * 400us = 652.8ms

> This is what I think should be normalized, so that we have the same
> amount of overall maximum timeout period on all platforms.
> 
> > Also - with the timeouts we're seeing, does the LTTPR return NAKs at all? That's
> > still another thing I had suggested alternate workarounds for so that we could
> > terminate transactions immediately on NAKs, so I wonder if that could save time
> > here as well.
> 
> There's not much LTTPR specific in that wrt. what sinks would do
> normally (no NAKs for read, only for writes) except LTTPRs may rewrite
> NAKs to ACKs to account for buggy monitors returning NAKs when reading
> the 0xf0000 -> range. But I'd suggest not dealing with this aspect now,
> just sanitize the above retry thing, as you suggested, remove the i915
> retry loop and make the drm retry loop configurable.
> 
> (In any case I also had the idea to stop transactions early when HPD
>  gets deasserted, but not sure if that's completely robust.)
> 
> > > > > > Thanks
> > > > > > Khaled
> > > > > > 
> > > > > > > 
> > > > > > > > > Anyways, this seems about the only thing we can do given the
> > > > > > > > > limited
> > > > > > > > > hw capabilities.
> > > > > > > > > Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > 
> > > > > > > > > > Accordingly disable LTTPR detection until GLK, where the
> > > > > > > > > > maximum timeout
> > > > > > > > > > we can set is only 1.6ms.
> > > > > > > > > > 
> > > > > > > > > > Link training in the non-transparent mode is known to fail at
> > > > > > > > > > least on
> > > > > > > > > > some SKL systems with a WD19 dock on the link, which exposes an
> > > > > > > > > > LTTPR
> > > > > > > > > > (see the References below). While this could have different
> > > > > > > > > > reasons
> > > > > > > > > > besides the too short AUX timeout used, not detecting LTTPRs
> > > > > > > > > > (and so not
> > > > > > > > > > using the non-transparent LT mode) fixes link training on these
> > > > > > > > > > systems.
> > > > > > > > > > 
> > > > > > > > > > While at it add a code comment about the platform specific
> > > > > > > > > > maximum
> > > > > > > > > > timeout values.
> > > > > > > > > > 
> > > > > > > > > > v2: Add a comment about the g4x maximum timeout as well.
> > > > > > > > > > (Ville)
> > > > > > > > > > 
> > > > > > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > > > > > santiago.zarate@suse.com>
> > > > > > > > > > Reported-and-tested-by: Bodo Graumann <mail@bodograumann.de>
> > > > > > > > > > References:
> > > > > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent
> > > > > > > > > > mode link training")
> > > > > > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c       |  7 +++++++
> > > > > > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.c | 15
> > > > > > > > > > ++++++++++++---
> > > > > > > > > >  2 files changed, 19 insertions(+), 3 deletions(-)
> > > > > > > > > > 
> > > > > > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > @@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct
> > > > > > > > > > intel_dp *intel_dp,
> > > > > > > > > >  else
> > > > > > > > > >  precharge = 5;
> > > > > > > > > > 
> > > > > > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > > > > > >  else
> > > > > > > > > > @@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct
> > > > > > > > > > intel_dp *intel_dp,
> > > > > > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > > > > > base.port);
> > > > > > > > > >  u32 ret;
> > > > > > > > > > 
> > > > > > > > > > +/*
> > > > > > > > > > + * Max timeout values:
> > > > > > > > > > + * SKL-GLK: 1.6ms
> > > > > > > > > > + * CNL: 3.2ms
> > > > > > > > > > + * ICL+: 4ms
> > > > > > > > > > + */
> > > > > > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > > > > > diff --git
> > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
> > > > > > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
> > > > > > > > > > 
> > > > > > > > > >  static bool intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > > > > > *intel_dp)
> > > > > > > > > >  {
> > > > > > > > > > +struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > > > > > > > > > +
> > > > > > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > > > > > +return false;
> > > > > > > > > > +
> > > > > > > > > > +/*
> > > > > > > > > > + * Detecting LTTPRs must be avoided on platforms with
> > > > > > > > > > an AUX timeout
> > > > > > > > > > + * period < 3.2ms. (see DP Standard v2.0, 2.11.2,
> > > > > > > > > > 3.6.6.1).
> > > > > > > > > > + */
> > > > > > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > > > > > +return false;
> > > > > > > > > > +
> > > > > > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
> > > > > > > > > >    intel_dp-
> > > > > > > > > > > lttpr_common_caps) < 0) {
> > > > > > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > > > > > @@ -127,9 +139,6 @@ int intel_dp_lttpr_init(struct intel_dp
> > > > > > > > > > *intel_dp)
> > > > > > > > > >  bool ret;
> > > > > > > > > >  int i;
> > > > > > > > > > 
> > > > > > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > > > > > -return 0;
> > > > > > > > > > -
> > > > > > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > > > > > >  if (!ret)
> > > > > > > > > >  return 0;
> > > > > > > > > > --
> > > > > > > > > > 2.25.1
> > > > > > > > > 
> > > > > > > > > --
> > > > > > > > > Ville Syrjälä
> > > > > > > > > Intel
> > > > > 
> > > > 
> > > > -- 
> > > > Sincerely,
> > > >    Lyude Paul (she/her)
> > > >    Software Engineer at Red Hat
> > > >    
> > > > Note: I deal with a lot of emails and have a lot of bugs on my plate. If
> > > > you've
> > > > asked me a question, are waiting for a review/merge on a patch, etc. and I
> > > > haven't responded in a while, please feel free to send me another email to
> > > > check
> > > > on my status. I don't bite!
> > > > 
> > > 
> > 
> > -- 
> > Sincerely,
> >    Lyude Paul (she/her)
> >    Software Engineer at Red Hat
> >    
> > Note: I deal with a lot of emails and have a lot of bugs on my plate. If you've
> > asked me a question, are waiting for a review/merge on a patch, etc. and I
> > haven't responded in a while, please feel free to send me another email to check
> > on my status. I don't bite!
> >
Almahallawy, Khaled March 20, 2021, 7:40 a.m. UTC | #11
On Sat, 2021-03-20 at 09:15 +0200, Imre Deak wrote:
> On Fri, Mar 19, 2021 at 11:07:21PM +0200, Imre Deak wrote:
> > On Fri, Mar 19, 2021 at 04:44:26PM -0400, Lyude Paul wrote:
> > > > > > [...]
> > > > > > I think it would work if we can make the retries
> > > > > > configurable and set it
> > > > > > to
> > > > > >         retries = total_timeout /
> > > > > > platform_specific_timeout_per_retry
> > > > > > 
> > > > > > where total_timeout would be something reasonable like 1
> > > > > > sec.
> > > > > 
> > > > > I actually think I'm more open to the idea of configurable
> > > > > retries after
> > > > > learning that apparently this is a thing that the i2c
> > > > > subsystem does - so
> > > > > there's more precedence for it in the rest of the kernel than
> > > > > I originally
> > > > > thought.
> > > > > 
> > > > > I'm still curious if we need these extra retries in here
> > > > > though - there seems
> > > > > to
> > > > > be one set of retries that is actually platform specific, and
> > > > > then just a
> > > > > random
> > > > > set of 5 retries that don't seem to have anything to do with
> > > > > platform specific
> > > > > behavior - so I think it'd still be worth giving a shot at
> > > > > getting rid of that
> > > > 
> > > > The platform specific part of the timeout is the one desctibed
> > > > in the
> > > > maximum timeout values comments.
> > > 
> > > You mean the
> > > 
> > > 		/* Must try at least 3 times according to DP spec */
> > > 		for (try = 0; try < 5; try++) {
> > > 
> > > bit? I thought that wasn't related to platform specific retries
> > > at all, since
> > > the code in that loop seems to only reference parts of the DP
> > > spec, and that the
> > > 
> > > 	while ((aux_clock_divider = intel_dp-
> > > >get_aux_clock_divider(intel_dp, clock++))) {
> > > 
> > > Loop was the portion that was platform specific, since it prompts
> > > the driver to
> > > retry the transaction with different aux clock divider rates
> > > depending on the
> > > platform in use. Feel free to correct me if I'm wrong though.
> > 
> > Nope. I meant every HW transaction will have a platform specific
> > timeout. For instance it's 1.6ms on SKL, but 4ms on ICL. So now
> > since
> > the overall retry count is 32 * 5 = 160, on SKL we'll retry for
> > ~2.6
> > seconds, on ICL we'll retry for ~6.4 seconds (disregarding now the
> > extra
> > 400usec delay inserted by drm_dp_dpcd_access(), which adds a fixed
> > ~1.3ms delay).
> 
> Err, looks like I missed some coffee. Max total timeouts atm, which
> we
> would need to make the same on all platforms:
> 
> 	g4x-glk: 5 * 32 * 1.6ms + 32 * 400us = 268.8ms
> 	cnl    : 5 * 32 * 3.2ms + 32 * 400us = 524.8ms
> 	icl+   : 5 * 32 * 4ms   + 32 * 400us = 652.8ms
> 


Apology if I'm missing something. but in drm_dpcd_access() I think it
is 500us not 400us?!

		#define AUX_RETRY_INTERVAL 500 /* us */

		if (ret != 0 && ret != -ETIMEDOUT) {
			usleep_range(AUX_RETRY_INTERVAL,
				     AUX_RETRY_INTERVAL + 100);
		}

Thanks
Khaled

> > This is what I think should be normalized, so that we have the same
> > amount of overall maximum timeout period on all platforms.
> > 
> > > Also - with the timeouts we're seeing, does the LTTPR return NAKs
> > > at all? That's
> > > still another thing I had suggested alternate workarounds for so
> > > that we could
> > > terminate transactions immediately on NAKs, so I wonder if that
> > > could save time
> > > here as well.
> > 
> > There's not much LTTPR specific in that wrt. what sinks would do
> > normally (no NAKs for read, only for writes) except LTTPRs may
> > rewrite
> > NAKs to ACKs to account for buggy monitors returning NAKs when
> > reading
> > the 0xf0000 -> range. But I'd suggest not dealing with this aspect
> > now,
> > just sanitize the above retry thing, as you suggested, remove the
> > i915
> > retry loop and make the drm retry loop configurable.
> > 
> > (In any case I also had the idea to stop transactions early when
> > HPD
> >  gets deasserted, but not sure if that's completely robust.)
> > 
> > > > > > > Thanks
> > > > > > > Khaled
> > > > > > > 
> > > > > > > > > > Anyways, this seems about the only thing we can do
> > > > > > > > > > given the
> > > > > > > > > > limited
> > > > > > > > > > hw capabilities.
> > > > > > > > > > Reviewed-by: Ville Syrjälä <
> > > > > > > > > > ville.syrjala@linux.intel.com>
> > > > > > > > > > 
> > > > > > > > > > > Accordingly disable LTTPR detection until GLK,
> > > > > > > > > > > where the
> > > > > > > > > > > maximum timeout
> > > > > > > > > > > we can set is only 1.6ms.
> > > > > > > > > > > 
> > > > > > > > > > > Link training in the non-transparent mode is
> > > > > > > > > > > known to fail at
> > > > > > > > > > > least on
> > > > > > > > > > > some SKL systems with a WD19 dock on the link,
> > > > > > > > > > > which exposes an
> > > > > > > > > > > LTTPR
> > > > > > > > > > > (see the References below). While this could have
> > > > > > > > > > > different
> > > > > > > > > > > reasons
> > > > > > > > > > > besides the too short AUX timeout used, not
> > > > > > > > > > > detecting LTTPRs
> > > > > > > > > > > (and so not
> > > > > > > > > > > using the non-transparent LT mode) fixes link
> > > > > > > > > > > training on these
> > > > > > > > > > > systems.
> > > > > > > > > > > 
> > > > > > > > > > > While at it add a code comment about the platform
> > > > > > > > > > > specific
> > > > > > > > > > > maximum
> > > > > > > > > > > timeout values.
> > > > > > > > > > > 
> > > > > > > > > > > v2: Add a comment about the g4x maximum timeout
> > > > > > > > > > > as well.
> > > > > > > > > > > (Ville)
> > > > > > > > > > > 
> > > > > > > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > > > > > > santiago.zarate@suse.com>
> > > > > > > > > > > Reported-and-tested-by: Bodo Graumann <
> > > > > > > > > > > mail@bodograumann.de>
> > > > > > > > > > > References:
> > > > > > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR
> > > > > > > > > > > non-transparent
> > > > > > > > > > > mode link training")
> > > > > > > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c     
> > > > > > > > > > >   |  7 +++++++
> > > > > > > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.
> > > > > > > > > > > c | 15
> > > > > > > > > > > ++++++++++++---
> > > > > > > > > > >  2 files changed, 19 insertions(+), 3 deletions(-
> > > > > > > > > > > )
> > > > > > > > > > > 
> > > > > > > > > > > diff --git
> > > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > @@ -133,6 +133,7 @@ static u32
> > > > > > > > > > > g4x_get_aux_send_ctl(struct
> > > > > > > > > > > intel_dp *intel_dp,
> > > > > > > > > > >  else
> > > > > > > > > > >  precharge = 5;
> > > > > > > > > > > 
> > > > > > > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > > > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > > > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > > > > > > >  else
> > > > > > > > > > > @@ -159,6 +160,12 @@ static u32
> > > > > > > > > > > skl_get_aux_send_ctl(struct
> > > > > > > > > > > intel_dp *intel_dp,
> > > > > > > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > > > > > > base.port);
> > > > > > > > > > >  u32 ret;
> > > > > > > > > > > 
> > > > > > > > > > > +/*
> > > > > > > > > > > + * Max timeout values:
> > > > > > > > > > > + * SKL-GLK: 1.6ms
> > > > > > > > > > > + * CNL: 3.2ms
> > > > > > > > > > > + * ICL+: 4ms
> > > > > > > > > > > + */
> > > > > > > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > > > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > > > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > > > > > > diff --git
> > > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > ning.c
> > > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > ning.c
> > > > > > > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > > > > > > ---
> > > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > ning.c
> > > > > > > > > > > +++
> > > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > ning.c
> > > > > > > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp
> > > > > > > > > > > *intel_dp,
> > > > > > > > > > > 
> > > > > > > > > > >  static bool
> > > > > > > > > > > intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > > > > > > *intel_dp)
> > > > > > > > > > >  {
> > > > > > > > > > > +struct drm_i915_private *i915 =
> > > > > > > > > > > dp_to_i915(intel_dp);
> > > > > > > > > > > +
> > > > > > > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > > > > > > +return false;
> > > > > > > > > > > +
> > > > > > > > > > > +/*
> > > > > > > > > > > + * Detecting LTTPRs must be avoided on platforms
> > > > > > > > > > > with
> > > > > > > > > > > an AUX timeout
> > > > > > > > > > > + * period < 3.2ms. (see DP Standard v2.0,
> > > > > > > > > > > 2.11.2,
> > > > > > > > > > > 3.6.6.1).
> > > > > > > > > > > + */
> > > > > > > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > > > > > > +return false;
> > > > > > > > > > > +
> > > > > > > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp-
> > > > > > > > > > > >aux,
> > > > > > > > > > >    intel_dp-
> > > > > > > > > > > > lttpr_common_caps) < 0) {
> > > > > > > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > > > > > > @@ -127,9 +139,6 @@ int
> > > > > > > > > > > intel_dp_lttpr_init(struct intel_dp
> > > > > > > > > > > *intel_dp)
> > > > > > > > > > >  bool ret;
> > > > > > > > > > >  int i;
> > > > > > > > > > > 
> > > > > > > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > > > > > > -return 0;
> > > > > > > > > > > -
> > > > > > > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > > > > > > >  if (!ret)
> > > > > > > > > > >  return 0;
> > > > > > > > > > > --
> > > > > > > > > > > 2.25.1
> > > > > > > > > > 
> > > > > > > > > > --
> > > > > > > > > > Ville Syrjälä
> > > > > > > > > > Intel
> > > > > 
> > > > > -- 
> > > > > Sincerely,
> > > > >    Lyude Paul (she/her)
> > > > >    Software Engineer at Red Hat
> > > > >    
> > > > > Note: I deal with a lot of emails and have a lot of bugs on
> > > > > my plate. If
> > > > > you've
> > > > > asked me a question, are waiting for a review/merge on a
> > > > > patch, etc. and I
> > > > > haven't responded in a while, please feel free to send me
> > > > > another email to
> > > > > check
> > > > > on my status. I don't bite!
> > > > > 
> > > 
> > > -- 
> > > Sincerely,
> > >    Lyude Paul (she/her)
> > >    Software Engineer at Red Hat
> > >    
> > > Note: I deal with a lot of emails and have a lot of bugs on my
> > > plate. If you've
> > > asked me a question, are waiting for a review/merge on a patch,
> > > etc. and I
> > > haven't responded in a while, please feel free to send me another
> > > email to check
> > > on my status. I don't bite!
> > >
Imre Deak March 20, 2021, 7:45 a.m. UTC | #12
On Sat, Mar 20, 2021 at 09:40:52AM +0200, Almahallawy, Khaled wrote:
> On Sat, 2021-03-20 at 09:15 +0200, Imre Deak wrote:
> > On Fri, Mar 19, 2021 at 11:07:21PM +0200, Imre Deak wrote:
> > > On Fri, Mar 19, 2021 at 04:44:26PM -0400, Lyude Paul wrote:
> > > > > > > [...]
> > > > > > > I think it would work if we can make the retries
> > > > > > > configurable and set it
> > > > > > > to
> > > > > > >         retries = total_timeout /
> > > > > > > platform_specific_timeout_per_retry
> > > > > > >
> > > > > > > where total_timeout would be something reasonable like 1
> > > > > > > sec.
> > > > > >
> > > > > > I actually think I'm more open to the idea of configurable
> > > > > > retries after
> > > > > > learning that apparently this is a thing that the i2c
> > > > > > subsystem does - so
> > > > > > there's more precedence for it in the rest of the kernel than
> > > > > > I originally
> > > > > > thought.
> > > > > >
> > > > > > I'm still curious if we need these extra retries in here
> > > > > > though - there seems
> > > > > > to
> > > > > > be one set of retries that is actually platform specific, and
> > > > > > then just a
> > > > > > random
> > > > > > set of 5 retries that don't seem to have anything to do with
> > > > > > platform specific
> > > > > > behavior - so I think it'd still be worth giving a shot at
> > > > > > getting rid of that
> > > > >
> > > > > The platform specific part of the timeout is the one desctibed
> > > > > in the
> > > > > maximum timeout values comments.
> > > >
> > > > You mean the
> > > >
> > > > /* Must try at least 3 times according to DP spec */
> > > > for (try = 0; try < 5; try++) {
> > > >
> > > > bit? I thought that wasn't related to platform specific retries
> > > > at all, since
> > > > the code in that loop seems to only reference parts of the DP
> > > > spec, and that the
> > > >
> > > > while ((aux_clock_divider = intel_dp-
> > > > >get_aux_clock_divider(intel_dp, clock++))) {
> > > >
> > > > Loop was the portion that was platform specific, since it prompts
> > > > the driver to
> > > > retry the transaction with different aux clock divider rates
> > > > depending on the
> > > > platform in use. Feel free to correct me if I'm wrong though.
> > >
> > > Nope. I meant every HW transaction will have a platform specific
> > > timeout. For instance it's 1.6ms on SKL, but 4ms on ICL. So now
> > > since
> > > the overall retry count is 32 * 5 = 160, on SKL we'll retry for
> > > ~2.6
> > > seconds, on ICL we'll retry for ~6.4 seconds (disregarding now the
> > > extra
> > > 400usec delay inserted by drm_dp_dpcd_access(), which adds a fixed
> > > ~1.3ms delay).
> >
> > Err, looks like I missed some coffee. Max total timeouts atm, which
> > we
> > would need to make the same on all platforms:
> >
> > g4x-glk: 5 * 32 * 1.6ms + 32 * 400us = 268.8ms
> > cnl    : 5 * 32 * 3.2ms + 32 * 400us = 524.8ms
> > icl+   : 5 * 32 * 4ms   + 32 * 400us = 652.8ms
> >
> 
> 
> Apology if I'm missing something. but in drm_dpcd_access() I think it
> is 500us not 400us?!

Ah, yes, or more like 600us so need to add 6.4ms to all of the above
figures.

> #define AUX_RETRY_INTERVAL 500 /* us */
> 
> if (ret != 0 && ret != -ETIMEDOUT) {
> usleep_range(AUX_RETRY_INTERVAL,
>      AUX_RETRY_INTERVAL + 100);
> }
> 
> Thanks
> Khaled
> 
> > > This is what I think should be normalized, so that we have the same
> > > amount of overall maximum timeout period on all platforms.
> > >
> > > > Also - with the timeouts we're seeing, does the LTTPR return NAKs
> > > > at all? That's
> > > > still another thing I had suggested alternate workarounds for so
> > > > that we could
> > > > terminate transactions immediately on NAKs, so I wonder if that
> > > > could save time
> > > > here as well.
> > >
> > > There's not much LTTPR specific in that wrt. what sinks would do
> > > normally (no NAKs for read, only for writes) except LTTPRs may
> > > rewrite
> > > NAKs to ACKs to account for buggy monitors returning NAKs when
> > > reading
> > > the 0xf0000 -> range. But I'd suggest not dealing with this aspect
> > > now,
> > > just sanitize the above retry thing, as you suggested, remove the
> > > i915
> > > retry loop and make the drm retry loop configurable.
> > >
> > > (In any case I also had the idea to stop transactions early when
> > > HPD
> > >  gets deasserted, but not sure if that's completely robust.)
> > >
> > > > > > > > Thanks
> > > > > > > > Khaled
> > > > > > > >
> > > > > > > > > > > Anyways, this seems about the only thing we can do
> > > > > > > > > > > given the
> > > > > > > > > > > limited
> > > > > > > > > > > hw capabilities.
> > > > > > > > > > > Reviewed-by: Ville Syrjälä <
> > > > > > > > > > > ville.syrjala@linux.intel.com>
> > > > > > > > > > >
> > > > > > > > > > > > Accordingly disable LTTPR detection until GLK,
> > > > > > > > > > > > where the
> > > > > > > > > > > > maximum timeout
> > > > > > > > > > > > we can set is only 1.6ms.
> > > > > > > > > > > >
> > > > > > > > > > > > Link training in the non-transparent mode is
> > > > > > > > > > > > known to fail at
> > > > > > > > > > > > least on
> > > > > > > > > > > > some SKL systems with a WD19 dock on the link,
> > > > > > > > > > > > which exposes an
> > > > > > > > > > > > LTTPR
> > > > > > > > > > > > (see the References below). While this could have
> > > > > > > > > > > > different
> > > > > > > > > > > > reasons
> > > > > > > > > > > > besides the too short AUX timeout used, not
> > > > > > > > > > > > detecting LTTPRs
> > > > > > > > > > > > (and so not
> > > > > > > > > > > > using the non-transparent LT mode) fixes link
> > > > > > > > > > > > training on these
> > > > > > > > > > > > systems.
> > > > > > > > > > > >
> > > > > > > > > > > > While at it add a code comment about the platform
> > > > > > > > > > > > specific
> > > > > > > > > > > > maximum
> > > > > > > > > > > > timeout values.
> > > > > > > > > > > >
> > > > > > > > > > > > v2: Add a comment about the g4x maximum timeout
> > > > > > > > > > > > as well.
> > > > > > > > > > > > (Ville)
> > > > > > > > > > > >
> > > > > > > > > > > > Reported-by: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > > > > Reported-and-tested-by: Santiago Zarate <
> > > > > > > > > > > > santiago.zarate@suse.com>
> > > > > > > > > > > > Reported-and-tested-by: Bodo Graumann <
> > > > > > > > > > > > mail@bodograumann.de>
> > > > > > > > > > > > References:
> > > > > > > > > > > > https://gitlab.freedesktop.org/drm/intel/-/issues/3166
> > > > > > > > > > > > Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR
> > > > > > > > > > > > non-transparent
> > > > > > > > > > > > mode link training")
> > > > > > > > > > > > Cc: <stable@vger.kernel.org> # v5.11
> > > > > > > > > > > > Cc: Takashi Iwai <tiwai@suse.de>
> > > > > > > > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > > > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > > > > > > > > > > ---
> > > > > > > > > > > >  drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > >   |  7 +++++++
> > > > > > > > > > > >  .../gpu/drm/i915/display/intel_dp_link_training.
> > > > > > > > > > > > c | 15
> > > > > > > > > > > > ++++++++++++---
> > > > > > > > > > > >  2 files changed, 19 insertions(+), 3 deletions(-
> > > > > > > > > > > > )
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git
> > > > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > > index eaebf123310a..10fe17b7280d 100644
> > > > > > > > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
> > > > > > > > > > > > @@ -133,6 +133,7 @@ static u32
> > > > > > > > > > > > g4x_get_aux_send_ctl(struct
> > > > > > > > > > > > intel_dp *intel_dp,
> > > > > > > > > > > >  else
> > > > > > > > > > > >  precharge = 5;
> > > > > > > > > > > >
> > > > > > > > > > > > +/* Max timeout value on G4x-BDW: 1.6ms */
> > > > > > > > > > > >  if (IS_BROADWELL(dev_priv))
> > > > > > > > > > > >  timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
> > > > > > > > > > > >  else
> > > > > > > > > > > > @@ -159,6 +160,12 @@ static u32
> > > > > > > > > > > > skl_get_aux_send_ctl(struct
> > > > > > > > > > > > intel_dp *intel_dp,
> > > > > > > > > > > >  enum phy phy = intel_port_to_phy(i915, dig_port-
> > > > > > > > > > > > > base.port);
> > > > > > > > > > > >  u32 ret;
> > > > > > > > > > > >
> > > > > > > > > > > > +/*
> > > > > > > > > > > > + * Max timeout values:
> > > > > > > > > > > > + * SKL-GLK: 1.6ms
> > > > > > > > > > > > + * CNL: 3.2ms
> > > > > > > > > > > > + * ICL+: 4ms
> > > > > > > > > > > > + */
> > > > > > > > > > > >  ret = DP_AUX_CH_CTL_SEND_BUSY |
> > > > > > > > > > > >        DP_AUX_CH_CTL_DONE |
> > > > > > > > > > > >        DP_AUX_CH_CTL_INTERRUPT |
> > > > > > > > > > > > diff --git
> > > > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > > ning.c
> > > > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > > ning.c
> > > > > > > > > > > > index 19ba7c7cbaab..c0e25c75c105 100644
> > > > > > > > > > > > ---
> > > > > > > > > > > > a/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > > ning.c
> > > > > > > > > > > > +++
> > > > > > > > > > > > b/drivers/gpu/drm/i915/display/intel_dp_link_trai
> > > > > > > > > > > > ning.c
> > > > > > > > > > > > @@ -82,6 +82,18 @@ static void
> > > > > > > > > > > > intel_dp_read_lttpr_phy_caps(struct intel_dp
> > > > > > > > > > > > *intel_dp,
> > > > > > > > > > > >
> > > > > > > > > > > >  static bool
> > > > > > > > > > > > intel_dp_read_lttpr_common_caps(struct intel_dp
> > > > > > > > > > > > *intel_dp)
> > > > > > > > > > > >  {
> > > > > > > > > > > > +struct drm_i915_private *i915 =
> > > > > > > > > > > > dp_to_i915(intel_dp);
> > > > > > > > > > > > +
> > > > > > > > > > > > +if (intel_dp_is_edp(intel_dp))
> > > > > > > > > > > > +return false;
> > > > > > > > > > > > +
> > > > > > > > > > > > +/*
> > > > > > > > > > > > + * Detecting LTTPRs must be avoided on platforms
> > > > > > > > > > > > with
> > > > > > > > > > > > an AUX timeout
> > > > > > > > > > > > + * period < 3.2ms. (see DP Standard v2.0,
> > > > > > > > > > > > 2.11.2,
> > > > > > > > > > > > 3.6.6.1).
> > > > > > > > > > > > + */
> > > > > > > > > > > > +if (INTEL_GEN(i915) < 10)
> > > > > > > > > > > > +return false;
> > > > > > > > > > > > +
> > > > > > > > > > > >  if (drm_dp_read_lttpr_common_caps(&intel_dp-
> > > > > > > > > > > > >aux,
> > > > > > > > > > > >    intel_dp-
> > > > > > > > > > > > > lttpr_common_caps) < 0) {
> > > > > > > > > > > >  memset(intel_dp->lttpr_common_caps, 0,
> > > > > > > > > > > > @@ -127,9 +139,6 @@ int
> > > > > > > > > > > > intel_dp_lttpr_init(struct intel_dp
> > > > > > > > > > > > *intel_dp)
> > > > > > > > > > > >  bool ret;
> > > > > > > > > > > >  int i;
> > > > > > > > > > > >
> > > > > > > > > > > > -if (intel_dp_is_edp(intel_dp))
> > > > > > > > > > > > -return 0;
> > > > > > > > > > > > -
> > > > > > > > > > > >  ret = intel_dp_read_lttpr_common_caps(intel_dp);
> > > > > > > > > > > >  if (!ret)
> > > > > > > > > > > >  return 0;
> > > > > > > > > > > > --
> > > > > > > > > > > > 2.25.1
> > > > > > > > > > >
> > > > > > > > > > > --
> > > > > > > > > > > Ville Syrjälä
> > > > > > > > > > > Intel
> > > > > >
> > > > > > --
> > > > > > Sincerely,
> > > > > >    Lyude Paul (she/her)
> > > > > >    Software Engineer at Red Hat
> > > > > >
> > > > > > Note: I deal with a lot of emails and have a lot of bugs on
> > > > > > my plate. If
> > > > > > you've
> > > > > > asked me a question, are waiting for a review/merge on a
> > > > > > patch, etc. and I
> > > > > > haven't responded in a while, please feel free to send me
> > > > > > another email to
> > > > > > check
> > > > > > on my status. I don't bite!
> > > > > >
> > > >
> > > > --
> > > > Sincerely,
> > > >    Lyude Paul (she/her)
> > > >    Software Engineer at Red Hat
> > > >
> > > > Note: I deal with a lot of emails and have a lot of bugs on my
> > > > plate. If you've
> > > > asked me a question, are waiting for a review/merge on a patch,
> > > > etc. and I
> > > > haven't responded in a while, please feel free to send me another
> > > > email to check
> > > > on my status. I don't bite!
> > > >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
index eaebf123310a..10fe17b7280d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
@@ -133,6 +133,7 @@  static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
 	else
 		precharge = 5;
 
+	/* Max timeout value on G4x-BDW: 1.6ms */
 	if (IS_BROADWELL(dev_priv))
 		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
 	else
@@ -159,6 +160,12 @@  static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
 	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
 	u32 ret;
 
+	/*
+	 * Max timeout values:
+	 * SKL-GLK: 1.6ms
+	 * CNL: 3.2ms
+	 * ICL+: 4ms
+	 */
 	ret = DP_AUX_CH_CTL_SEND_BUSY |
 	      DP_AUX_CH_CTL_DONE |
 	      DP_AUX_CH_CTL_INTERRUPT |
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 19ba7c7cbaab..c0e25c75c105 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -82,6 +82,18 @@  static void intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
 
 static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+	if (intel_dp_is_edp(intel_dp))
+		return false;
+
+	/*
+	 * Detecting LTTPRs must be avoided on platforms with an AUX timeout
+	 * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1).
+	 */
+	if (INTEL_GEN(i915) < 10)
+		return false;
+
 	if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
 					  intel_dp->lttpr_common_caps) < 0) {
 		memset(intel_dp->lttpr_common_caps, 0,
@@ -127,9 +139,6 @@  int intel_dp_lttpr_init(struct intel_dp *intel_dp)
 	bool ret;
 	int i;
 
-	if (intel_dp_is_edp(intel_dp))
-		return 0;
-
 	ret = intel_dp_read_lttpr_common_caps(intel_dp);
 	if (!ret)
 		return 0;