diff mbox

[v2,3/5] drm/i915: Move SST DP link retraining into the ->post_hotplug() hook

Message ID 20180117192149.17760-3-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ville Syrjälä Jan. 17, 2018, 7:21 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

Doing link retraining from the short pulse handler is problematic since
that might introduce deadlocks with MST sideband processing. Currently
we don't retrain MST links from this code, but we want to change that.
So better to move the entire thing to the hotplug work. We can utilize
the new encoder->hotplug() hook for this.

The only thing we leave in the short pulse handler is the link status
check. That one still depends on the link parameters stored under
intel_dp, so no locking around that but races should be mostly harmless
as the actual retraining code will recheck the link state if we
end up there by mistake.

v2: Rebase due to ->post_hotplug() now being just ->hotplug()
    Check the connector type to figure out if we should do
    the HDMI thing or the DP think for DDI

Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_ddi.c |  10 +-
 drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_drv.h |   2 +
 3 files changed, 120 insertions(+), 88 deletions(-)

Comments

Lyude Paul Jan. 30, 2018, 11:16 p.m. UTC | #1
On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> Doing link retraining from the short pulse handler is problematic since
> that might introduce deadlocks with MST sideband processing. Currently
> we don't retrain MST links from this code, but we want to change that.
> So better to move the entire thing to the hotplug work. We can utilize
> the new encoder->hotplug() hook for this.
> 
> The only thing we leave in the short pulse handler is the link status
> check. That one still depends on the link parameters stored under
> intel_dp, so no locking around that but races should be mostly harmless
> as the actual retraining code will recheck the link state if we
> end up there by mistake.
> 
> v2: Rebase due to ->post_hotplug() now being just ->hotplug()
>     Check the connector type to figure out if we should do
>     the HDMI thing or the DP think for DDI
> 
> Cc: Manasi Navare <manasi.d.navare@intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
>  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++--------------
> ---
>  drivers/gpu/drm/i915/intel_drv.h |   2 +
>  3 files changed, 120 insertions(+), 88 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> b/drivers/gpu/drm/i915/intel_ddi.c
> index 25793bdc692f..5f3d58f1ae6e 100644
> --- a/drivers/gpu/drm/i915/intel_ddi.c
> +++ b/drivers/gpu/drm/i915/intel_ddi.c
> @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct intel_encoder
> *encoder,
>  	drm_modeset_acquire_init(&ctx, 0);
>  
>  	for (;;) {
> -		ret = intel_hdmi_reset_link(encoder, &ctx);
> +		if (connector->base.connector_type ==
> DRM_MODE_CONNECTOR_HDMIA)
> +			ret = intel_hdmi_reset_link(encoder, &ctx);
> +		else
> +			ret = intel_dp_retrain_link(encoder, &ctx);
>  
>  		if (ret == -EDEADLK) {
>  			drm_modeset_backoff(&ctx);
> @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv,
> enum port port)
>  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
>  			 DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
>  
> -	if (init_hdmi)
> -		intel_encoder->hotplug = intel_ddi_hotplug;
> -	else
> -		intel_encoder->hotplug = intel_encoder_hotplug;
> +	intel_encoder->hotplug = intel_ddi_hotplug;
>  	intel_encoder->compute_output_type = intel_ddi_compute_output_type;
>  	intel_encoder->compute_config = intel_ddi_compute_config;
>  	intel_encoder->enable = intel_enable_ddi;
> diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> index 6bbf14410c2a..152016e09a11 100644
> --- a/drivers/gpu/drm/i915/intel_dp.c
> +++ b/drivers/gpu/drm/i915/intel_dp.c
> @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp)
>  	return -EINVAL;
>  }
>  
> -static void
> -intel_dp_retrain_link(struct intel_dp *intel_dp)
> +static bool
> +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> +{
> +	u8 link_status[DP_LINK_STATUS_SIZE];
> +
> +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> +		DRM_ERROR("Failed to get link status\n");
> +		return false;
> +	}
> +
> +	/*
> +	 * Validate the cached values of intel_dp->link_rate and
> +	 * intel_dp->lane_count before attempting to retrain.
> +	 */
> +	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> +					intel_dp->lane_count))
> +		return false;
> +
> +	/* Retrain if Channel EQ or CR not ok */
> +	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
> +}
> +
> +/*
> + * If display is now connected check links status,
> + * there has been known issues of link loss triggering
> + * long pulse.
> + *
> + * Some sinks (eg. ASUS PB287Q) seem to perform some
> + * weird HPD ping pong during modesets. So we can apparently
> + * end up with HPD going low during a modeset, and then
> + * going back up soon after. And once that happens we must
> + * retrain the link to get a picture. That's in case no
> + * userspace component reacted to intermittent HPD dip.
> + */
> +int intel_dp_retrain_link(struct intel_encoder *encoder,
> +			  struct drm_modeset_acquire_ctx *ctx)
>  {
> -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
>  	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
> -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> +	struct intel_connector *connector = intel_dp->attached_connector;
> +	struct drm_connector_state *conn_state;
> +	struct intel_crtc_state *crtc_state;
> +	struct intel_crtc *crtc;
> +	int ret;
> +
> +	/* FIXME handle the MST connectors as well */
> +
> +	if (!connector || connector->base.status !=
> connector_status_connected)
> +		return 0;
> +
> +	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
> ctx);
> +	if (ret)
> +		return ret;
> +
> +	conn_state = connector->base.state;
> +
> +	crtc = to_intel_crtc(conn_state->crtc);
> +	if (!crtc)
> +		return 0;
> +
> +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> +	if (ret)
> +		return ret;
> +
> +	crtc_state = to_intel_crtc_state(crtc->base.state);
> +
> +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> +
> +	if (!crtc_state->base.active)
> +		return 0;
> +
> +	if (conn_state->commit &&
> +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> +		return 0;
> +
> +	if (!intel_dp_needs_link_retrain(intel_dp))
> +		return 0;
NAK, this definitely won't work for implementing MST retraining. There's some
pretty huge differences with how retraining needs to be handled on SST vs. MST.
An example with some normal SST sink vs. what happens on my caldigit TS3

SST:
    1. commit modeset, everything is OK
    2. something happens, sink sends shortpulse and changes link status registers
    in dpcd
    3. Source receives short pulse, tries retraining five times
    4. if this succeeds:
        5. we're done here
    6. if this fails:
        7. mark link status as bad
        8. get fallback parameters
        9. hotplug event

MST (i915 doesn't do this yet, but this is generally how it needs to be
handled):
    1. commit modeset, everything is OK
    2. something happens (in my case, the MST hub discovers it had the wrong max
    link rate/lane count), sink sends ESI indicating channel EQ has failed
    3. retraining commences with five retries.
    4. if this succeeds:
       5. continue
    6. if this fails (I actually haven't seen this once yet)
        7. mark link status as bad on all downstream connectors
        8. get fallback parameters
        9. hotplug event
    10. the retrain didn't actually work (despite what the SST link status
    registers told us). go back to step 3 five more times
    11. if this fails:
        12. mark link status as bad on all downstream connectors
        13. get fallback parameters
        14. hotplug event

simply put: we really should keep the "do we need to retrain?" logic out of the
actual retraining helpers so that SST/MST codepaths can do their own checks to
figure this out.

>  
>  	/* Suppress underruns caused by re-training */
>  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
> @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp)
>  	if (crtc->config->has_pch_encoder)
>  		intel_set_pch_fifo_underrun_reporting(dev_priv,
>  						      intel_crtc_pch_transcod
> er(crtc), true);
> +
> +	return 0;
>  }
>  
> -static void
> -intel_dp_check_link_status(struct intel_dp *intel_dp)
> +/*
> + * If display is now connected check links status,
> + * there has been known issues of link loss triggering
> + * long pulse.
> + *
> + * Some sinks (eg. ASUS PB287Q) seem to perform some
> + * weird HPD ping pong during modesets. So we can apparently
> + * end up with HPD going low during a modeset, and then
> + * going back up soon after. And once that happens we must
> + * retrain the link to get a picture. That's in case no
> + * userspace component reacted to intermittent HPD dip.
> + */
> +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> +			     struct intel_connector *connector)
>  {
> -	struct drm_i915_private *dev_priv =
> to_i915(intel_dp_to_dev(intel_dp));
> -	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)-
> >base;
> -	struct drm_connector_state *conn_state =
> -		intel_dp->attached_connector->base.state;
> -	u8 link_status[DP_LINK_STATUS_SIZE];
> -
> -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> >drm.mode_config.connection_mutex));
> -
> -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> -		DRM_ERROR("Failed to get link status\n");
> -		return;
> -	}
> +	struct drm_modeset_acquire_ctx ctx;
> +	bool changed;
> +	int ret;
>  
> -	if (!conn_state->crtc)
> -		return;
> +	changed = intel_encoder_hotplug(encoder, connector);
>  
> -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> +	drm_modeset_acquire_init(&ctx, 0);
>  
> -	if (!conn_state->crtc->state->active)
> -		return;
> +	for (;;) {
> +		ret = intel_dp_retrain_link(encoder, &ctx);
>  
> -	if (conn_state->commit &&
> -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> -		return;
> +		if (ret == -EDEADLK) {
> +			drm_modeset_backoff(&ctx);
> +			continue;
> +		}
>  
> -	/*
> -	 * Validate the cached values of intel_dp->link_rate and
> -	 * intel_dp->lane_count before attempting to retrain.
> -	 */
> -	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> -					intel_dp->lane_count))
> -		return;
> +		break;
> +	}
>  
> -	/* Retrain if Channel EQ or CR not ok */
> -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
> -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> -			      intel_encoder->base.name);
> +	drm_modeset_drop_locks(&ctx);
> +	drm_modeset_acquire_fini(&ctx);
> +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
>  
> -		intel_dp_retrain_link(intel_dp);
> -	}
> +	return changed;
>  }
>  
>  /*
> @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
>  			DRM_DEBUG_DRIVER("CP or sink specific irq
> unhandled\n");
>  	}
>  
> -	intel_dp_check_link_status(intel_dp);
> +	/* defer to the hotplug work for link retraining if needed */
> +	if (intel_dp_needs_link_retrain(intel_dp))
> +		return false;
>  
>  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
>  		DRM_DEBUG_KMS("Link Training Compliance Test requested\n");
> @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector *connector)
>  		 */
>  		status = connector_status_disconnected;
>  		goto out;
> -	} else {
> -		/*
> -		 * If display is now connected check links status,
> -		 * there has been known issues of link loss triggerring
> -		 * long pulse.
> -		 *
> -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> -		 * weird HPD ping pong during modesets. So we can apparently
> -		 * end up with HPD going low during a modeset, and then
> -		 * going back up soon after. And once that happens we must
> -		 * retrain the link to get a picture. That's in case no
> -		 * userspace component reacted to intermittent HPD dip.
> -		 */
> -		intel_dp_check_link_status(intel_dp);
>  	}
>  
>  	/*
> @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> *intel_dig_port, bool long_hpd)
>  	}
>  
>  	if (!intel_dp->is_mst) {
> -		struct drm_modeset_acquire_ctx ctx;
> -		struct drm_connector *connector = &intel_dp-
> >attached_connector->base;
> -		struct drm_crtc *crtc;
> -		int iret;
> -		bool handled = false;
> -
> -		drm_modeset_acquire_init(&ctx, 0);
> -retry:
> -		iret = drm_modeset_lock(&dev_priv-
> >drm.mode_config.connection_mutex, &ctx);
> -		if (iret)
> -			goto err;
> -
> -		crtc = connector->state->crtc;
> -		if (crtc) {
> -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> -			if (iret)
> -				goto err;
> -		}
> +		bool handled;
>  
>  		handled = intel_dp_short_pulse(intel_dp);
>  
> -err:
> -		if (iret == -EDEADLK) {
> -			drm_modeset_backoff(&ctx);
> -			goto retry;
> -		}
> -
> -		drm_modeset_drop_locks(&ctx);
> -		drm_modeset_acquire_fini(&ctx);
> -		WARN(iret, "Acquiring modeset locks failed with %i\n", iret);
> -
>  		/* Short pulse can signify loss of hdcp authentication */
>  		intel_hdcp_check_link(intel_dp->attached_connector);
>  
> @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
>  			     "DP %c", port_name(port)))
>  		goto err_encoder_init;
>  
> -	intel_encoder->hotplug = intel_encoder_hotplug;
> +	intel_encoder->hotplug = intel_dp_hotplug;
>  	intel_encoder->compute_config = intel_dp_compute_config;
>  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
>  	intel_encoder->get_config = intel_dp_get_config;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index 5ea1dc3f63bf..ddf28a442cd7 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1611,6 +1611,8 @@ int intel_dp_get_link_train_fallback_values(struct
> intel_dp *intel_dp,
>  					    int link_rate, uint8_t
> lane_count);
>  void intel_dp_start_link_train(struct intel_dp *intel_dp);
>  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> +int intel_dp_retrain_link(struct intel_encoder *encoder,
> +			  struct drm_modeset_acquire_ctx *ctx);
>  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
>  void intel_dp_encoder_reset(struct drm_encoder *encoder);
>  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
Ville Syrjälä Jan. 31, 2018, 1:27 p.m. UTC | #2
On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > Doing link retraining from the short pulse handler is problematic since
> > that might introduce deadlocks with MST sideband processing. Currently
> > we don't retrain MST links from this code, but we want to change that.
> > So better to move the entire thing to the hotplug work. We can utilize
> > the new encoder->hotplug() hook for this.
> > 
> > The only thing we leave in the short pulse handler is the link status
> > check. That one still depends on the link parameters stored under
> > intel_dp, so no locking around that but races should be mostly harmless
> > as the actual retraining code will recheck the link state if we
> > end up there by mistake.
> > 
> > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> >     Check the connector type to figure out if we should do
> >     the HDMI thing or the DP think for DDI
> > 
> > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++--------------
> > ---
> >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> >  3 files changed, 120 insertions(+), 88 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > b/drivers/gpu/drm/i915/intel_ddi.c
> > index 25793bdc692f..5f3d58f1ae6e 100644
> > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct intel_encoder
> > *encoder,
> >  	drm_modeset_acquire_init(&ctx, 0);
> >  
> >  	for (;;) {
> > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > +		if (connector->base.connector_type ==
> > DRM_MODE_CONNECTOR_HDMIA)
> > +			ret = intel_hdmi_reset_link(encoder, &ctx);
> > +		else
> > +			ret = intel_dp_retrain_link(encoder, &ctx);
> >  
> >  		if (ret == -EDEADLK) {
> >  			drm_modeset_backoff(&ctx);
> > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv,
> > enum port port)
> >  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
> >  			 DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
> >  
> > -	if (init_hdmi)
> > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > -	else
> > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > +	intel_encoder->hotplug = intel_ddi_hotplug;
> >  	intel_encoder->compute_output_type = intel_ddi_compute_output_type;
> >  	intel_encoder->compute_config = intel_ddi_compute_config;
> >  	intel_encoder->enable = intel_enable_ddi;
> > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > index 6bbf14410c2a..152016e09a11 100644
> > --- a/drivers/gpu/drm/i915/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp)
> >  	return -EINVAL;
> >  }
> >  
> > -static void
> > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > +static bool
> > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > +{
> > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > +
> > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > +		DRM_ERROR("Failed to get link status\n");
> > +		return false;
> > +	}
> > +
> > +	/*
> > +	 * Validate the cached values of intel_dp->link_rate and
> > +	 * intel_dp->lane_count before attempting to retrain.
> > +	 */
> > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> > +					intel_dp->lane_count))
> > +		return false;
> > +
> > +	/* Retrain if Channel EQ or CR not ok */
> > +	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
> > +}
> > +
> > +/*
> > + * If display is now connected check links status,
> > + * there has been known issues of link loss triggering
> > + * long pulse.
> > + *
> > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > + * weird HPD ping pong during modesets. So we can apparently
> > + * end up with HPD going low during a modeset, and then
> > + * going back up soon after. And once that happens we must
> > + * retrain the link to get a picture. That's in case no
> > + * userspace component reacted to intermittent HPD dip.
> > + */
> > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > +			  struct drm_modeset_acquire_ctx *ctx)
> >  {
> > -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
> >  	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
> > -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> > +	struct intel_connector *connector = intel_dp->attached_connector;
> > +	struct drm_connector_state *conn_state;
> > +	struct intel_crtc_state *crtc_state;
> > +	struct intel_crtc *crtc;
> > +	int ret;
> > +
> > +	/* FIXME handle the MST connectors as well */
> > +
> > +	if (!connector || connector->base.status !=
> > connector_status_connected)
> > +		return 0;
> > +
> > +	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
> > ctx);
> > +	if (ret)
> > +		return ret;
> > +
> > +	conn_state = connector->base.state;
> > +
> > +	crtc = to_intel_crtc(conn_state->crtc);
> > +	if (!crtc)
> > +		return 0;
> > +
> > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > +	if (ret)
> > +		return ret;
> > +
> > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > +
> > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > +
> > +	if (!crtc_state->base.active)
> > +		return 0;
> > +
> > +	if (conn_state->commit &&
> > +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > +		return 0;
> > +
> > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > +		return 0;
> NAK, this definitely won't work for implementing MST retraining. There's some
> pretty huge differences with how retraining needs to be handled on SST vs. MST.
> An example with some normal SST sink vs. what happens on my caldigit TS3
> 
> SST:
>     1. commit modeset, everything is OK
>     2. something happens, sink sends shortpulse and changes link status registers
>     in dpcd
>     3. Source receives short pulse, tries retraining five times
>     4. if this succeeds:
>         5. we're done here
>     6. if this fails:
>         7. mark link status as bad
>         8. get fallback parameters
>         9. hotplug event
> 
> MST (i915 doesn't do this yet, but this is generally how it needs to be
> handled):
>     1. commit modeset, everything is OK
>     2. something happens (in my case, the MST hub discovers it had the wrong max
>     link rate/lane count), sink sends ESI indicating channel EQ has failed
>     3. retraining commences with five retries.
>     4. if this succeeds:
>        5. continue
>     6. if this fails (I actually haven't seen this once yet)
>         7. mark link status as bad on all downstream connectors
>         8. get fallback parameters
>         9. hotplug event
>     10. the retrain didn't actually work (despite what the SST link status
>     registers told us). go back to step 3 five more times
>     11. if this fails:
>         12. mark link status as bad on all downstream connectors
>         13. get fallback parameters
>         14. hotplug event
> 
> simply put: we really should keep the "do we need to retrain?" logic out of the
> actual retraining helpers so that SST/MST codepaths can do their own checks to
> figure this out.

No, we need it since we want to check it *after* any modeset has
finished. With MST I think what we'll want to do is find all the pipes
affected by the link failure, lock them, and wait until they're done
with their modesets, then we check the link state. If it's bad we
proceed to retrain the link.

So basically just walking over the MST encoders in addition to the
SST encoder, and repeating most of the steps in this code for each.
Hence the the MST FIXME I left in there ;)

> 
> >  
> >  	/* Suppress underruns caused by re-training */
> >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
> > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp)
> >  	if (crtc->config->has_pch_encoder)
> >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> >  						      intel_crtc_pch_transcod
> > er(crtc), true);
> > +
> > +	return 0;
> >  }
> >  
> > -static void
> > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > +/*
> > + * If display is now connected check links status,
> > + * there has been known issues of link loss triggering
> > + * long pulse.
> > + *
> > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > + * weird HPD ping pong during modesets. So we can apparently
> > + * end up with HPD going low during a modeset, and then
> > + * going back up soon after. And once that happens we must
> > + * retrain the link to get a picture. That's in case no
> > + * userspace component reacted to intermittent HPD dip.
> > + */
> > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > +			     struct intel_connector *connector)
> >  {
> > -	struct drm_i915_private *dev_priv =
> > to_i915(intel_dp_to_dev(intel_dp));
> > -	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)-
> > >base;
> > -	struct drm_connector_state *conn_state =
> > -		intel_dp->attached_connector->base.state;
> > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > -
> > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > >drm.mode_config.connection_mutex));
> > -
> > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > -		DRM_ERROR("Failed to get link status\n");
> > -		return;
> > -	}
> > +	struct drm_modeset_acquire_ctx ctx;
> > +	bool changed;
> > +	int ret;
> >  
> > -	if (!conn_state->crtc)
> > -		return;
> > +	changed = intel_encoder_hotplug(encoder, connector);
> >  
> > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> > +	drm_modeset_acquire_init(&ctx, 0);
> >  
> > -	if (!conn_state->crtc->state->active)
> > -		return;
> > +	for (;;) {
> > +		ret = intel_dp_retrain_link(encoder, &ctx);
> >  
> > -	if (conn_state->commit &&
> > -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > -		return;
> > +		if (ret == -EDEADLK) {
> > +			drm_modeset_backoff(&ctx);
> > +			continue;
> > +		}
> >  
> > -	/*
> > -	 * Validate the cached values of intel_dp->link_rate and
> > -	 * intel_dp->lane_count before attempting to retrain.
> > -	 */
> > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> > -					intel_dp->lane_count))
> > -		return;
> > +		break;
> > +	}
> >  
> > -	/* Retrain if Channel EQ or CR not ok */
> > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
> > -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> > -			      intel_encoder->base.name);
> > +	drm_modeset_drop_locks(&ctx);
> > +	drm_modeset_acquire_fini(&ctx);
> > +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
> >  
> > -		intel_dp_retrain_link(intel_dp);
> > -	}
> > +	return changed;
> >  }
> >  
> >  /*
> > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
> >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > unhandled\n");
> >  	}
> >  
> > -	intel_dp_check_link_status(intel_dp);
> > +	/* defer to the hotplug work for link retraining if needed */
> > +	if (intel_dp_needs_link_retrain(intel_dp))
> > +		return false;
> >  
> >  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
> >  		DRM_DEBUG_KMS("Link Training Compliance Test requested\n");
> > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector *connector)
> >  		 */
> >  		status = connector_status_disconnected;
> >  		goto out;
> > -	} else {
> > -		/*
> > -		 * If display is now connected check links status,
> > -		 * there has been known issues of link loss triggerring
> > -		 * long pulse.
> > -		 *
> > -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> > -		 * weird HPD ping pong during modesets. So we can apparently
> > -		 * end up with HPD going low during a modeset, and then
> > -		 * going back up soon after. And once that happens we must
> > -		 * retrain the link to get a picture. That's in case no
> > -		 * userspace component reacted to intermittent HPD dip.
> > -		 */
> > -		intel_dp_check_link_status(intel_dp);
> >  	}
> >  
> >  	/*
> > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> > *intel_dig_port, bool long_hpd)
> >  	}
> >  
> >  	if (!intel_dp->is_mst) {
> > -		struct drm_modeset_acquire_ctx ctx;
> > -		struct drm_connector *connector = &intel_dp-
> > >attached_connector->base;
> > -		struct drm_crtc *crtc;
> > -		int iret;
> > -		bool handled = false;
> > -
> > -		drm_modeset_acquire_init(&ctx, 0);
> > -retry:
> > -		iret = drm_modeset_lock(&dev_priv-
> > >drm.mode_config.connection_mutex, &ctx);
> > -		if (iret)
> > -			goto err;
> > -
> > -		crtc = connector->state->crtc;
> > -		if (crtc) {
> > -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> > -			if (iret)
> > -				goto err;
> > -		}
> > +		bool handled;
> >  
> >  		handled = intel_dp_short_pulse(intel_dp);
> >  
> > -err:
> > -		if (iret == -EDEADLK) {
> > -			drm_modeset_backoff(&ctx);
> > -			goto retry;
> > -		}
> > -
> > -		drm_modeset_drop_locks(&ctx);
> > -		drm_modeset_acquire_fini(&ctx);
> > -		WARN(iret, "Acquiring modeset locks failed with %i\n", iret);
> > -
> >  		/* Short pulse can signify loss of hdcp authentication */
> >  		intel_hdcp_check_link(intel_dp->attached_connector);
> >  
> > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
> >  			     "DP %c", port_name(port)))
> >  		goto err_encoder_init;
> >  
> > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > +	intel_encoder->hotplug = intel_dp_hotplug;
> >  	intel_encoder->compute_config = intel_dp_compute_config;
> >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> >  	intel_encoder->get_config = intel_dp_get_config;
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > b/drivers/gpu/drm/i915/intel_drv.h
> > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -1611,6 +1611,8 @@ int intel_dp_get_link_train_fallback_values(struct
> > intel_dp *intel_dp,
> >  					    int link_rate, uint8_t
> > lane_count);
> >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > +			  struct drm_modeset_acquire_ctx *ctx);
> >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> >  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
Navare, Manasi Feb. 28, 2018, 7:17 a.m. UTC | #3
Ville,  thanks for the patch and
Sorry for not being able to review this earlier.
Please find some comments below:

On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > Doing link retraining from the short pulse handler is problematic since
> > > that might introduce deadlocks with MST sideband processing. Currently
> > > we don't retrain MST links from this code, but we want to change that.
> > > So better to move the entire thing to the hotplug work. We can utilize
> > > the new encoder->hotplug() hook for this.
> > > 
> > > The only thing we leave in the short pulse handler is the link status
> > > check. That one still depends on the link parameters stored under
> > > intel_dp, so no locking around that but races should be mostly harmless
> > > as the actual retraining code will recheck the link state if we
> > > end up there by mistake.
> > > 
> > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > >     Check the connector type to figure out if we should do
> > >     the HDMI thing or the DP think for DDI
> > > 
> > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++--------------
> > > ---
> > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct intel_encoder
> > > *encoder,
> > >  	drm_modeset_acquire_init(&ctx, 0);
> > >  
> > >  	for (;;) {
> > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > +		if (connector->base.connector_type ==
> > > DRM_MODE_CONNECTOR_HDMIA)
> > > +			ret = intel_hdmi_reset_link(encoder, &ctx);
> > > +		else
> > > +			ret = intel_dp_retrain_link(encoder, &ctx);
> > >  
> > >  		if (ret == -EDEADLK) {
> > >  			drm_modeset_backoff(&ctx);
> > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv,
> > > enum port port)
> > >  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
> > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
> > >  
> > > -	if (init_hdmi)
> > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > -	else
> > > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > >  	intel_encoder->compute_output_type = intel_ddi_compute_output_type;
> > >  	intel_encoder->compute_config = intel_ddi_compute_config;
> > >  	intel_encoder->enable = intel_enable_ddi;
> > > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
> > > index 6bbf14410c2a..152016e09a11 100644
> > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp)
> > >  	return -EINVAL;
> > >  }
> > >  
> > > -static void
> > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > +static bool
> > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > +{
> > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > +
> > > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > +		DRM_ERROR("Failed to get link status\n");
> > > +		return false;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Validate the cached values of intel_dp->link_rate and
> > > +	 * intel_dp->lane_count before attempting to retrain.
> > > +	 */
> > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> > > +					intel_dp->lane_count))
> > > +		return false;
> > > +
> > > +	/* Retrain if Channel EQ or CR not ok */
> > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
> > > +}
> > > +
> > > +/*
> > > + * If display is now connected check links status,
> > > + * there has been known issues of link loss triggering
> > > + * long pulse.
> > > + *
> > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > + * weird HPD ping pong during modesets. So we can apparently
> > > + * end up with HPD going low during a modeset, and then
> > > + * going back up soon after. And once that happens we must
> > > + * retrain the link to get a picture. That's in case no
> > > + * userspace component reacted to intermittent HPD dip.
> > > + */
> > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > +			  struct drm_modeset_acquire_ctx *ctx)
> > >  {
> > > -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
> > >  	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
> > > -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> > > +	struct intel_connector *connector = intel_dp->attached_connector;
> > > +	struct drm_connector_state *conn_state;
> > > +	struct intel_crtc_state *crtc_state;
> > > +	struct intel_crtc *crtc;
> > > +	int ret;
> > > +
> > > +	/* FIXME handle the MST connectors as well */
> > > +
> > > +	if (!connector || connector->base.status !=
> > > connector_status_connected)
> > > +		return 0;
> > > +
> > > +	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
> > > ctx);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	conn_state = connector->base.state;
> > > +
> > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > +	if (!crtc)
> > > +		return 0;
> > > +
> > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > +
> > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > +
> > > +	if (!crtc_state->base.active)
> > > +		return 0;
> > > +
> > > +	if (conn_state->commit &&
> > > +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > +		return 0;
> > > +
> > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > +		return 0;
> > NAK, this definitely won't work for implementing MST retraining. There's some
> > pretty huge differences with how retraining needs to be handled on SST vs. MST.
> > An example with some normal SST sink vs. what happens on my caldigit TS3
> > 
> > SST:
> >     1. commit modeset, everything is OK
> >     2. something happens, sink sends shortpulse and changes link status registers
> >     in dpcd
> >     3. Source receives short pulse, tries retraining five times
> >     4. if this succeeds:
> >         5. we're done here
> >     6. if this fails:
> >         7. mark link status as bad
> >         8. get fallback parameters
> >         9. hotplug event
> > 
> > MST (i915 doesn't do this yet, but this is generally how it needs to be
> > handled):
> >     1. commit modeset, everything is OK
> >     2. something happens (in my case, the MST hub discovers it had the wrong max
> >     link rate/lane count), sink sends ESI indicating channel EQ has failed
> >     3. retraining commences with five retries.
> >     4. if this succeeds:
> >        5. continue
> >     6. if this fails (I actually haven't seen this once yet)
> >         7. mark link status as bad on all downstream connectors
> >         8. get fallback parameters
> >         9. hotplug event
> >     10. the retrain didn't actually work (despite what the SST link status
> >     registers told us). go back to step 3 five more times
> >     11. if this fails:
> >         12. mark link status as bad on all downstream connectors
> >         13. get fallback parameters
> >         14. hotplug event
> > 
> > simply put: we really should keep the "do we need to retrain?" logic out of the
> > actual retraining helpers so that SST/MST codepaths can do their own checks to
> > figure this out.
> 
> No, we need it since we want to check it *after* any modeset has
> finished. With MST I think what we'll want to do is find all the pipes
> affected by the link failure, lock them, and wait until they're done
> with their modesets, then we check the link state. If it's bad we
> proceed to retrain the link.
> 
> So basically just walking over the MST encoders in addition to the
> SST encoder, and repeating most of the steps in this code for each.
> Hence the the MST FIXME I left in there ;)
> 

Lyude,

I agree with Ville here, can we add the MST retrain required check from
within the intel_dp_retrain_link()? So for now the FIXME should be left there
and MST retraining check can be added from your patches.

Manasi

> > 
> > >  
> > >  	/* Suppress underruns caused by re-training */
> > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
> > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp *intel_dp)
> > >  	if (crtc->config->has_pch_encoder)
> > >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> > >  						      intel_crtc_pch_transcod
> > > er(crtc), true);
> > > +
> > > +	return 0;
> > >  }
> > >  
> > > -static void
> > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > +/*
> > > + * If display is now connected check links status,
> > > + * there has been known issues of link loss triggering
> > > + * long pulse.
> > > + *
> > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > + * weird HPD ping pong during modesets. So we can apparently
> > > + * end up with HPD going low during a modeset, and then
> > > + * going back up soon after. And once that happens we must
> > > + * retrain the link to get a picture. That's in case no
> > > + * userspace component reacted to intermittent HPD dip.
> > > + */
> > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > +			     struct intel_connector *connector)
> > >  {
> > > -	struct drm_i915_private *dev_priv =
> > > to_i915(intel_dp_to_dev(intel_dp));
> > > -	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)-
> > > >base;
> > > -	struct drm_connector_state *conn_state =
> > > -		intel_dp->attached_connector->base.state;
> > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > -
> > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > >drm.mode_config.connection_mutex));
> > > -
> > > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > -		DRM_ERROR("Failed to get link status\n");
> > > -		return;
> > > -	}
> > > +	struct drm_modeset_acquire_ctx ctx;
> > > +	bool changed;
> > > +	int ret;
> > >  
> > > -	if (!conn_state->crtc)
> > > -		return;
> > > +	changed = intel_encoder_hotplug(encoder, connector);
> > >  
> > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> > > +	drm_modeset_acquire_init(&ctx, 0);
> > >  
> > > -	if (!conn_state->crtc->state->active)
> > > -		return;
> > > +	for (;;) {

Here if this is getting executed due to hpd ping pong during the modeset
and that modeset is already happening at a link fallback parameter then
while we call retrain link, we should also validate the link parameters
so that it doesnt try to retrain with stale values.

I think we need to call intel_dp_link_params_valid() before retrain.

> > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > >  
> > > -	if (conn_state->commit &&
> > > -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > -		return;
> > > +		if (ret == -EDEADLK) {
> > > +			drm_modeset_backoff(&ctx);
> > > +			continue;
> > > +		}
> > >  
> > > -	/*
> > > -	 * Validate the cached values of intel_dp->link_rate and
> > > -	 * intel_dp->lane_count before attempting to retrain.
> > > -	 */
> > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
> > > -					intel_dp->lane_count))
> > > -		return;
> > > +		break;
> > > +	}
> > >  
> > > -	/* Retrain if Channel EQ or CR not ok */
> > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
> > > -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> > > -			      intel_encoder->base.name);
> > > +	drm_modeset_drop_locks(&ctx);
> > > +	drm_modeset_acquire_fini(&ctx);
> > > +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
> > >  
> > > -		intel_dp_retrain_link(intel_dp);
> > > -	}
> > > +	return changed;
> > >  }
> > >  
> > >  /*
> > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
> > >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > > unhandled\n");
> > >  	}
> > >  
> > > -	intel_dp_check_link_status(intel_dp);
> > > +	/* defer to the hotplug work for link retraining if needed */
> > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > +		return false;
> > >  
> > >  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
> > >  		DRM_DEBUG_KMS("Link Training Compliance Test requested\n");
> > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector *connector)
> > >  		 */
> > >  		status = connector_status_disconnected;
> > >  		goto out;
> > > -	} else {
> > > -		/*
> > > -		 * If display is now connected check links status,
> > > -		 * there has been known issues of link loss triggerring
> > > -		 * long pulse.
> > > -		 *
> > > -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > -		 * weird HPD ping pong during modesets. So we can apparently
> > > -		 * end up with HPD going low during a modeset, and then
> > > -		 * going back up soon after. And once that happens we must
> > > -		 * retrain the link to get a picture. That's in case no
> > > -		 * userspace component reacted to intermittent HPD dip.
> > > -		 */
> > > -		intel_dp_check_link_status(intel_dp);
> > >  	}
> > >  
> > >  	/*
> > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> > > *intel_dig_port, bool long_hpd)
> > >  	}
> > >  
> > >  	if (!intel_dp->is_mst) {
> > > -		struct drm_modeset_acquire_ctx ctx;
> > > -		struct drm_connector *connector = &intel_dp-
> > > >attached_connector->base;
> > > -		struct drm_crtc *crtc;
> > > -		int iret;
> > > -		bool handled = false;
> > > -
> > > -		drm_modeset_acquire_init(&ctx, 0);
> > > -retry:
> > > -		iret = drm_modeset_lock(&dev_priv-
> > > >drm.mode_config.connection_mutex, &ctx);
> > > -		if (iret)
> > > -			goto err;
> > > -
> > > -		crtc = connector->state->crtc;
> > > -		if (crtc) {
> > > -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> > > -			if (iret)
> > > -				goto err;
> > > -		}
> > > +		bool handled;
> > >  
> > >  		handled = intel_dp_short_pulse(intel_dp);
> > >  
> > > -err:
> > > -		if (iret == -EDEADLK) {
> > > -			drm_modeset_backoff(&ctx);
> > > -			goto retry;
> > > -		}
> > > -
> > > -		drm_modeset_drop_locks(&ctx);
> > > -		drm_modeset_acquire_fini(&ctx);
> > > -		WARN(iret, "Acquiring modeset locks failed with %i\n", iret);
> > > -
> > >  		/* Short pulse can signify loss of hdcp authentication */
> > >  		intel_hdcp_check_link(intel_dp->attached_connector);
> > >  
> > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
> > >  			     "DP %c", port_name(port)))
> > >  		goto err_encoder_init;
> > >  
> > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > >  	intel_encoder->compute_config = intel_dp_compute_config;
> > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > >  	intel_encoder->get_config = intel_dp_get_config;
> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > b/drivers/gpu/drm/i915/intel_drv.h
> > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > @@ -1611,6 +1611,8 @@ int intel_dp_get_link_train_fallback_values(struct
> > > intel_dp *intel_dp,
> > >  					    int link_rate, uint8_t
> > > lane_count);
> > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > +			  struct drm_modeset_acquire_ctx *ctx);
> > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > >  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lyude Paul Feb. 28, 2018, 7:07 p.m. UTC | #4
On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> Ville,  thanks for the patch and
> Sorry for not being able to review this earlier.
> Please find some comments below:
> 
> On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > 
> > > > Doing link retraining from the short pulse handler is problematic
> > > > since
> > > > that might introduce deadlocks with MST sideband processing. Currently
> > > > we don't retrain MST links from this code, but we want to change that.
> > > > So better to move the entire thing to the hotplug work. We can utilize
> > > > the new encoder->hotplug() hook for this.
> > > > 
> > > > The only thing we leave in the short pulse handler is the link status
> > > > check. That one still depends on the link parameters stored under
> > > > intel_dp, so no locking around that but races should be mostly
> > > > harmless
> > > > as the actual retraining code will recheck the link state if we
> > > > end up there by mistake.
> > > > 
> > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > >     Check the connector type to figure out if we should do
> > > >     the HDMI thing or the DP think for DDI
> > > > 
> > > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++------
> > > > --------
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > intel_encoder
> > > > *encoder,
> > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > >  
> > > >  	for (;;) {
> > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > +		if (connector->base.connector_type ==
> > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > +			ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > +		else
> > > > +			ret = intel_dp_retrain_link(encoder, &ctx);
> > > >  
> > > >  		if (ret == -EDEADLK) {
> > > >  			drm_modeset_backoff(&ctx);
> > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private
> > > > *dev_priv,
> > > > enum port port)
> > > >  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
> > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > port_name(port));
> > > >  
> > > > -	if (init_hdmi)
> > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > -	else
> > > > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > >  	intel_encoder->compute_output_type =
> > > > intel_ddi_compute_output_type;
> > > >  	intel_encoder->compute_config = intel_ddi_compute_config;
> > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > index 6bbf14410c2a..152016e09a11 100644
> > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp
> > > > *intel_dp)
> > > >  	return -EINVAL;
> > > >  }
> > > >  
> > > > -static void
> > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > +static bool
> > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > +{
> > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > +
> > > > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > +		DRM_ERROR("Failed to get link status\n");
> > > > +		return false;
> > > > +	}
> > > > +
> > > > +	/*
> > > > +	 * Validate the cached values of intel_dp->link_rate and
> > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > +	 */
> > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > >link_rate,
> > > > +					intel_dp->lane_count))
> > > > +		return false;
> > > > +
> > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > >lane_count);
> > > > +}
> > > > +
> > > > +/*
> > > > + * If display is now connected check links status,
> > > > + * there has been known issues of link loss triggering
> > > > + * long pulse.
> > > > + *
> > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > + * end up with HPD going low during a modeset, and then
> > > > + * going back up soon after. And once that happens we must
> > > > + * retrain the link to get a picture. That's in case no
> > > > + * userspace component reacted to intermittent HPD dip.
> > > > + */
> > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > +			  struct drm_modeset_acquire_ctx *ctx)
> > > >  {
> > > > -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)-
> > > > >base;
> > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > >base.dev);
> > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> > > > +	struct intel_connector *connector = intel_dp-
> > > > >attached_connector;
> > > > +	struct drm_connector_state *conn_state;
> > > > +	struct intel_crtc_state *crtc_state;
> > > > +	struct intel_crtc *crtc;
> > > > +	int ret;
> > > > +
> > > > +	/* FIXME handle the MST connectors as well */
> > > > +
> > > > +	if (!connector || connector->base.status !=
> > > > connector_status_connected)
> > > > +		return 0;
> > > > +
> > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > >drm.mode_config.connection_mutex,
> > > > ctx);
> > > > +	if (ret)
> > > > +		return ret;
> > > > +
> > > > +	conn_state = connector->base.state;
> > > > +
> > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > +	if (!crtc)
> > > > +		return 0;
> > > > +
> > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > +	if (ret)
> > > > +		return ret;
> > > > +
> > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > +
> > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > +
> > > > +	if (!crtc_state->base.active)
> > > > +		return 0;
> > > > +
> > > > +	if (conn_state->commit &&
> > > > +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > > +		return 0;
> > > > +
> > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > +		return 0;
> > > 
> > > NAK, this definitely won't work for implementing MST retraining. There's
> > > some
> > > pretty huge differences with how retraining needs to be handled on SST
> > > vs. MST.
> > > An example with some normal SST sink vs. what happens on my caldigit TS3
> > > 
> > > SST:
> > >     1. commit modeset, everything is OK
> > >     2. something happens, sink sends shortpulse and changes link status
> > > registers
> > >     in dpcd
> > >     3. Source receives short pulse, tries retraining five times
> > >     4. if this succeeds:
> > >         5. we're done here
> > >     6. if this fails:
> > >         7. mark link status as bad
> > >         8. get fallback parameters
> > >         9. hotplug event
> > > 
> > > MST (i915 doesn't do this yet, but this is generally how it needs to be
> > > handled):
> > >     1. commit modeset, everything is OK
> > >     2. something happens (in my case, the MST hub discovers it had the
> > > wrong max
> > >     link rate/lane count), sink sends ESI indicating channel EQ has
> > > failed
> > >     3. retraining commences with five retries.
> > >     4. if this succeeds:
> > >        5. continue
> > >     6. if this fails (I actually haven't seen this once yet)
> > >         7. mark link status as bad on all downstream connectors
> > >         8. get fallback parameters
> > >         9. hotplug event
> > >     10. the retrain didn't actually work (despite what the SST link
> > > status
> > >     registers told us). go back to step 3 five more times
> > >     11. if this fails:
> > >         12. mark link status as bad on all downstream connectors
> > >         13. get fallback parameters
> > >         14. hotplug event
> > > 
> > > simply put: we really should keep the "do we need to retrain?" logic out
> > > of the
> > > actual retraining helpers so that SST/MST codepaths can do their own
> > > checks to
> > > figure this out.
> > 
> > No, we need it since we want to check it *after* any modeset has
> > finished. With MST I think what we'll want to do is find all the pipes
> > affected by the link failure, lock them, and wait until they're done
> > with their modesets, then we check the link state. If it's bad we
> > proceed to retrain the link.
> > 
> > So basically just walking over the MST encoders in addition to the
> > SST encoder, and repeating most of the steps in this code for each.
> > Hence the the MST FIXME I left in there ;)
> > 
> 
> Lyude,
> 
> I agree with Ville here, can we add the MST retrain required check from
> within the intel_dp_retrain_link()? So for now the FIXME should be left
> there
> and MST retraining check can be added from your patches.
> 
> Manasi

Yep! Sorry I probably should have reiterated this part over email since I only
mentioned it to ville directly: this part should definitely work fine, and
intel_dp_retrain_link() is actually where we handle MST retraining in the
latest version of the retraining series:

jfyi, you can find that here:
https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
It's almost ready for the ML, just want to get the other patches this is going
to rely on in first and fix some small concerns I've got about that series'
current implementation of handling modesets after a failed retrain sequence
> 
> > > 
> > > >  
> > > >  	/* Suppress underruns caused by re-training */
> > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe,
> > > > false);
> > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > *intel_dp)
> > > >  	if (crtc->config->has_pch_encoder)
> > > >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> > > >  						      intel_crtc_pch_
> > > > transcod
> > > > er(crtc), true);
> > > > +
> > > > +	return 0;
> > > >  }
> > > >  
> > > > -static void
> > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > +/*
> > > > + * If display is now connected check links status,
> > > > + * there has been known issues of link loss triggering
> > > > + * long pulse.
> > > > + *
> > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > + * end up with HPD going low during a modeset, and then
> > > > + * going back up soon after. And once that happens we must
> > > > + * retrain the link to get a picture. That's in case no
> > > > + * userspace component reacted to intermittent HPD dip.
> > > > + */
> > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > +			     struct intel_connector *connector)
> > > >  {
> > > > -	struct drm_i915_private *dev_priv =
> > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > -	struct intel_encoder *intel_encoder =
> > > > &dp_to_dig_port(intel_dp)-
> > > > > base;
> > > > 
> > > > -	struct drm_connector_state *conn_state =
> > > > -		intel_dp->attached_connector->base.state;
> > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > -
> > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > drm.mode_config.connection_mutex));
> > > > 
> > > > -
> > > > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > -		DRM_ERROR("Failed to get link status\n");
> > > > -		return;
> > > > -	}
> > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > +	bool changed;
> > > > +	int ret;
> > > >  
> > > > -	if (!conn_state->crtc)
> > > > -		return;
> > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > >  
> > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > >  
> > > > -	if (!conn_state->crtc->state->active)
> > > > -		return;
> > > > +	for (;;) {
> 
> Here if this is getting executed due to hpd ping pong during the modeset
> and that modeset is already happening at a link fallback parameter then
> while we call retrain link, we should also validate the link parameters
> so that it doesnt try to retrain with stale values.
> 
> I think we need to call intel_dp_link_params_valid() before retrain.
> 
> > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > >  
> > > > -	if (conn_state->commit &&
> > > > -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > > -		return;
> > > > +		if (ret == -EDEADLK) {
> > > > +			drm_modeset_backoff(&ctx);
> > > > +			continue;
> > > > +		}
> > > >  
> > > > -	/*
> > > > -	 * Validate the cached values of intel_dp->link_rate and
> > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > -	 */
> > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > >link_rate,
> > > > -					intel_dp->lane_count))
> > > > -		return;
> > > > +		break;
> > > > +	}
> > > >  
> > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) 
> > > > {
> > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> > > > -			      intel_encoder->base.name);
> > > > +	drm_modeset_drop_locks(&ctx);
> > > > +	drm_modeset_acquire_fini(&ctx);
> > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
> > > >  
> > > > -		intel_dp_retrain_link(intel_dp);
> > > > -	}
> > > > +	return changed;
> > > >  }
> > > >  
> > > >  /*
> > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
> > > >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > > > unhandled\n");
> > > >  	}
> > > >  
> > > > -	intel_dp_check_link_status(intel_dp);
> > > > +	/* defer to the hotplug work for link retraining if needed */
> > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > +		return false;
> > > >  
> > > >  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING)
> > > > {
> > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > requested\n");
> > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector
> > > > *connector)
> > > >  		 */
> > > >  		status = connector_status_disconnected;
> > > >  		goto out;
> > > > -	} else {
> > > > -		/*
> > > > -		 * If display is now connected check links status,
> > > > -		 * there has been known issues of link loss
> > > > triggerring
> > > > -		 * long pulse.
> > > > -		 *
> > > > -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > -		 * weird HPD ping pong during modesets. So we can
> > > > apparently
> > > > -		 * end up with HPD going low during a modeset, and
> > > > then
> > > > -		 * going back up soon after. And once that happens we
> > > > must
> > > > -		 * retrain the link to get a picture. That's in case
> > > > no
> > > > -		 * userspace component reacted to intermittent HPD
> > > > dip.
> > > > -		 */
> > > > -		intel_dp_check_link_status(intel_dp);
> > > >  	}
> > > >  
> > > >  	/*
> > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> > > > *intel_dig_port, bool long_hpd)
> > > >  	}
> > > >  
> > > >  	if (!intel_dp->is_mst) {
> > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > -		struct drm_connector *connector = &intel_dp-
> > > > > attached_connector->base;
> > > > 
> > > > -		struct drm_crtc *crtc;
> > > > -		int iret;
> > > > -		bool handled = false;
> > > > -
> > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > -retry:
> > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > drm.mode_config.connection_mutex, &ctx);
> > > > 
> > > > -		if (iret)
> > > > -			goto err;
> > > > -
> > > > -		crtc = connector->state->crtc;
> > > > -		if (crtc) {
> > > > -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> > > > -			if (iret)
> > > > -				goto err;
> > > > -		}
> > > > +		bool handled;
> > > >  
> > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > >  
> > > > -err:
> > > > -		if (iret == -EDEADLK) {
> > > > -			drm_modeset_backoff(&ctx);
> > > > -			goto retry;
> > > > -		}
> > > > -
> > > > -		drm_modeset_drop_locks(&ctx);
> > > > -		drm_modeset_acquire_fini(&ctx);
> > > > -		WARN(iret, "Acquiring modeset locks failed with
> > > > %i\n", iret);
> > > > -
> > > >  		/* Short pulse can signify loss of hdcp
> > > > authentication */
> > > >  		intel_hdcp_check_link(intel_dp->attached_connector);
> > > >  
> > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private
> > > > *dev_priv,
> > > >  			     "DP %c", port_name(port)))
> > > >  		goto err_encoder_init;
> > > >  
> > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > >  	intel_encoder->compute_config = intel_dp_compute_config;
> > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > @@ -1611,6 +1611,8 @@ int
> > > > intel_dp_get_link_train_fallback_values(struct
> > > > intel_dp *intel_dp,
> > > >  					    int link_rate, uint8_t
> > > > lane_count);
> > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > +			  struct drm_modeset_acquire_ctx *ctx);
> > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > >  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
> > 
> > -- 
> > Ville Syrjälä
> > Intel OTC
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Navare, Manasi Feb. 28, 2018, 7:27 p.m. UTC | #5
On Wed, Feb 28, 2018 at 02:07:34PM -0500, Lyude Paul wrote:
> 
> On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> > Ville,  thanks for the patch and
> > Sorry for not being able to review this earlier.
> > Please find some comments below:
> > 
> > On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > 
> > > > > Doing link retraining from the short pulse handler is problematic
> > > > > since
> > > > > that might introduce deadlocks with MST sideband processing. Currently
> > > > > we don't retrain MST links from this code, but we want to change that.
> > > > > So better to move the entire thing to the hotplug work. We can utilize
> > > > > the new encoder->hotplug() hook for this.
> > > > > 
> > > > > The only thing we leave in the short pulse handler is the link status
> > > > > check. That one still depends on the link parameters stored under
> > > > > intel_dp, so no locking around that but races should be mostly
> > > > > harmless
> > > > > as the actual retraining code will recheck the link state if we
> > > > > end up there by mistake.
> > > > > 
> > > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > > >     Check the connector type to figure out if we should do
> > > > >     the HDMI thing or the DP think for DDI
> > > > > 
> > > > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > > >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++------
> > > > > --------
> > > > > ---
> > > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > > intel_encoder
> > > > > *encoder,
> > > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > > >  
> > > > >  	for (;;) {
> > > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > > +		if (connector->base.connector_type ==
> > > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > > +			ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > > +		else
> > > > > +			ret = intel_dp_retrain_link(encoder, &ctx);
> > > > >  
> > > > >  		if (ret == -EDEADLK) {
> > > > >  			drm_modeset_backoff(&ctx);
> > > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private
> > > > > *dev_priv,
> > > > > enum port port)
> > > > >  	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
> > > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > > port_name(port));
> > > > >  
> > > > > -	if (init_hdmi)
> > > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > -	else
> > > > > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > > >  	intel_encoder->compute_output_type =
> > > > > intel_ddi_compute_output_type;
> > > > >  	intel_encoder->compute_config = intel_ddi_compute_config;
> > > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > index 6bbf14410c2a..152016e09a11 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp
> > > > > *intel_dp)
> > > > >  	return -EINVAL;
> > > > >  }
> > > > >  
> > > > > -static void
> > > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > > +static bool
> > > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > > +{
> > > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > +
> > > > > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > > +		DRM_ERROR("Failed to get link status\n");
> > > > > +		return false;
> > > > > +	}
> > > > > +
> > > > > +	/*
> > > > > +	 * Validate the cached values of intel_dp->link_rate and
> > > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > > +	 */
> > > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > >link_rate,
> > > > > +					intel_dp->lane_count))
> > > > > +		return false;
> > > > > +
> > > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > >lane_count);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * If display is now connected check links status,
> > > > > + * there has been known issues of link loss triggering
> > > > > + * long pulse.
> > > > > + *
> > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > + * end up with HPD going low during a modeset, and then
> > > > > + * going back up soon after. And once that happens we must
> > > > > + * retrain the link to get a picture. That's in case no
> > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > + */
> > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > +			  struct drm_modeset_acquire_ctx *ctx)
> > > > >  {
> > > > > -	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)-
> > > > > >base;
> > > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > > >base.dev);
> > > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
> > > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
> > > > > +	struct intel_connector *connector = intel_dp-
> > > > > >attached_connector;
> > > > > +	struct drm_connector_state *conn_state;
> > > > > +	struct intel_crtc_state *crtc_state;
> > > > > +	struct intel_crtc *crtc;
> > > > > +	int ret;
> > > > > +
> > > > > +	/* FIXME handle the MST connectors as well */
> > > > > +
> > > > > +	if (!connector || connector->base.status !=
> > > > > connector_status_connected)
> > > > > +		return 0;
> > > > > +
> > > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > > >drm.mode_config.connection_mutex,
> > > > > ctx);
> > > > > +	if (ret)
> > > > > +		return ret;
> > > > > +
> > > > > +	conn_state = connector->base.state;
> > > > > +
> > > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > > +	if (!crtc)
> > > > > +		return 0;
> > > > > +
> > > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > > +	if (ret)
> > > > > +		return ret;
> > > > > +
> > > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > > +
> > > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > > +
> > > > > +	if (!crtc_state->base.active)
> > > > > +		return 0;
> > > > > +
> > > > > +	if (conn_state->commit &&
> > > > > +	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > > > +		return 0;
> > > > > +
> > > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > > +		return 0;
> > > > 
> > > > NAK, this definitely won't work for implementing MST retraining. There's
> > > > some
> > > > pretty huge differences with how retraining needs to be handled on SST
> > > > vs. MST.
> > > > An example with some normal SST sink vs. what happens on my caldigit TS3
> > > > 
> > > > SST:
> > > >     1. commit modeset, everything is OK
> > > >     2. something happens, sink sends shortpulse and changes link status
> > > > registers
> > > >     in dpcd
> > > >     3. Source receives short pulse, tries retraining five times
> > > >     4. if this succeeds:
> > > >         5. we're done here
> > > >     6. if this fails:
> > > >         7. mark link status as bad
> > > >         8. get fallback parameters
> > > >         9. hotplug event
> > > > 
> > > > MST (i915 doesn't do this yet, but this is generally how it needs to be
> > > > handled):
> > > >     1. commit modeset, everything is OK
> > > >     2. something happens (in my case, the MST hub discovers it had the
> > > > wrong max
> > > >     link rate/lane count), sink sends ESI indicating channel EQ has
> > > > failed
> > > >     3. retraining commences with five retries.
> > > >     4. if this succeeds:
> > > >        5. continue
> > > >     6. if this fails (I actually haven't seen this once yet)
> > > >         7. mark link status as bad on all downstream connectors
> > > >         8. get fallback parameters
> > > >         9. hotplug event
> > > >     10. the retrain didn't actually work (despite what the SST link
> > > > status
> > > >     registers told us). go back to step 3 five more times
> > > >     11. if this fails:
> > > >         12. mark link status as bad on all downstream connectors
> > > >         13. get fallback parameters
> > > >         14. hotplug event
> > > > 
> > > > simply put: we really should keep the "do we need to retrain?" logic out
> > > > of the
> > > > actual retraining helpers so that SST/MST codepaths can do their own
> > > > checks to
> > > > figure this out.
> > > 
> > > No, we need it since we want to check it *after* any modeset has
> > > finished. With MST I think what we'll want to do is find all the pipes
> > > affected by the link failure, lock them, and wait until they're done
> > > with their modesets, then we check the link state. If it's bad we
> > > proceed to retrain the link.
> > > 
> > > So basically just walking over the MST encoders in addition to the
> > > SST encoder, and repeating most of the steps in this code for each.
> > > Hence the the MST FIXME I left in there ;)
> > > 
> > 
> > Lyude,
> > 
> > I agree with Ville here, can we add the MST retrain required check from
> > within the intel_dp_retrain_link()? So for now the FIXME should be left
> > there
> > and MST retraining check can be added from your patches.
> > 
> > Manasi
> 
> Yep! Sorry I probably should have reiterated this part over email since I only
> mentioned it to ville directly: this part should definitely work fine, and
> intel_dp_retrain_link() is actually where we handle MST retraining in the
> latest version of the retraining series:
> 
> jfyi, you can find that here:
> https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
> It's almost ready for the ML, just want to get the other patches this is going
> to rely on in first and fix some small concerns I've got about that series'
> current implementation of handling modesets after a failed retrain sequence
> >

Great!
After the failed retrain sequence, it will keep on retrying until has tried all
the link rate/lane count combinations until RBR 1 lane and then fail with a ERROR message.
What was the concern on the current implementation of modeste after failed retrain?

Manasi
 
> > > > 
> > > > >  
> > > > >  	/* Suppress underruns caused by re-training */
> > > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe,
> > > > > false);
> > > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > > *intel_dp)
> > > > >  	if (crtc->config->has_pch_encoder)
> > > > >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> > > > >  						      intel_crtc_pch_
> > > > > transcod
> > > > > er(crtc), true);
> > > > > +
> > > > > +	return 0;
> > > > >  }
> > > > >  
> > > > > -static void
> > > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > > +/*
> > > > > + * If display is now connected check links status,
> > > > > + * there has been known issues of link loss triggering
> > > > > + * long pulse.
> > > > > + *
> > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > + * end up with HPD going low during a modeset, and then
> > > > > + * going back up soon after. And once that happens we must
> > > > > + * retrain the link to get a picture. That's in case no
> > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > + */
> > > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > > +			     struct intel_connector *connector)
> > > > >  {
> > > > > -	struct drm_i915_private *dev_priv =
> > > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > > -	struct intel_encoder *intel_encoder =
> > > > > &dp_to_dig_port(intel_dp)-
> > > > > > base;
> > > > > 
> > > > > -	struct drm_connector_state *conn_state =
> > > > > -		intel_dp->attached_connector->base.state;
> > > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > -
> > > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > > drm.mode_config.connection_mutex));
> > > > > 
> > > > > -
> > > > > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > > -		DRM_ERROR("Failed to get link status\n");
> > > > > -		return;
> > > > > -	}
> > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > +	bool changed;
> > > > > +	int ret;
> > > > >  
> > > > > -	if (!conn_state->crtc)
> > > > > -		return;
> > > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > > >  
> > > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
> > > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > > >  
> > > > > -	if (!conn_state->crtc->state->active)
> > > > > -		return;
> > > > > +	for (;;) {
> > 
> > Here if this is getting executed due to hpd ping pong during the modeset
> > and that modeset is already happening at a link fallback parameter then
> > while we call retrain link, we should also validate the link parameters
> > so that it doesnt try to retrain with stale values.
> > 
> > I think we need to call intel_dp_link_params_valid() before retrain.
> > 
> > > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > > >  
> > > > > -	if (conn_state->commit &&
> > > > > -	    !try_wait_for_completion(&conn_state->commit->hw_done))
> > > > > -		return;
> > > > > +		if (ret == -EDEADLK) {
> > > > > +			drm_modeset_backoff(&ctx);
> > > > > +			continue;
> > > > > +		}
> > > > >  
> > > > > -	/*
> > > > > -	 * Validate the cached values of intel_dp->link_rate and
> > > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > > -	 */
> > > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > >link_rate,
> > > > > -					intel_dp->lane_count))
> > > > > -		return;
> > > > > +		break;
> > > > > +	}
> > > > >  
> > > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) 
> > > > > {
> > > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
> > > > > -			      intel_encoder->base.name);
> > > > > +	drm_modeset_drop_locks(&ctx);
> > > > > +	drm_modeset_acquire_fini(&ctx);
> > > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
> > > > >  
> > > > > -		intel_dp_retrain_link(intel_dp);
> > > > > -	}
> > > > > +	return changed;
> > > > >  }
> > > > >  
> > > > >  /*
> > > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
> > > > >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > > > > unhandled\n");
> > > > >  	}
> > > > >  
> > > > > -	intel_dp_check_link_status(intel_dp);
> > > > > +	/* defer to the hotplug work for link retraining if needed */
> > > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > > +		return false;
> > > > >  
> > > > >  	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING)
> > > > > {
> > > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > > requested\n");
> > > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector
> > > > > *connector)
> > > > >  		 */
> > > > >  		status = connector_status_disconnected;
> > > > >  		goto out;
> > > > > -	} else {
> > > > > -		/*
> > > > > -		 * If display is now connected check links status,
> > > > > -		 * there has been known issues of link loss
> > > > > triggerring
> > > > > -		 * long pulse.
> > > > > -		 *
> > > > > -		 * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > -		 * weird HPD ping pong during modesets. So we can
> > > > > apparently
> > > > > -		 * end up with HPD going low during a modeset, and
> > > > > then
> > > > > -		 * going back up soon after. And once that happens we
> > > > > must
> > > > > -		 * retrain the link to get a picture. That's in case
> > > > > no
> > > > > -		 * userspace component reacted to intermittent HPD
> > > > > dip.
> > > > > -		 */
> > > > > -		intel_dp_check_link_status(intel_dp);
> > > > >  	}
> > > > >  
> > > > >  	/*
> > > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct intel_digital_port
> > > > > *intel_dig_port, bool long_hpd)
> > > > >  	}
> > > > >  
> > > > >  	if (!intel_dp->is_mst) {
> > > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > > -		struct drm_connector *connector = &intel_dp-
> > > > > > attached_connector->base;
> > > > > 
> > > > > -		struct drm_crtc *crtc;
> > > > > -		int iret;
> > > > > -		bool handled = false;
> > > > > -
> > > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > > -retry:
> > > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > > drm.mode_config.connection_mutex, &ctx);
> > > > > 
> > > > > -		if (iret)
> > > > > -			goto err;
> > > > > -
> > > > > -		crtc = connector->state->crtc;
> > > > > -		if (crtc) {
> > > > > -			iret = drm_modeset_lock(&crtc->mutex, &ctx);
> > > > > -			if (iret)
> > > > > -				goto err;
> > > > > -		}
> > > > > +		bool handled;
> > > > >  
> > > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > > >  
> > > > > -err:
> > > > > -		if (iret == -EDEADLK) {
> > > > > -			drm_modeset_backoff(&ctx);
> > > > > -			goto retry;
> > > > > -		}
> > > > > -
> > > > > -		drm_modeset_drop_locks(&ctx);
> > > > > -		drm_modeset_acquire_fini(&ctx);
> > > > > -		WARN(iret, "Acquiring modeset locks failed with
> > > > > %i\n", iret);
> > > > > -
> > > > >  		/* Short pulse can signify loss of hdcp
> > > > > authentication */
> > > > >  		intel_hdcp_check_link(intel_dp->attached_connector);
> > > > >  
> > > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private
> > > > > *dev_priv,
> > > > >  			     "DP %c", port_name(port)))
> > > > >  		goto err_encoder_init;
> > > > >  
> > > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > > >  	intel_encoder->compute_config = intel_dp_compute_config;
> > > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > @@ -1611,6 +1611,8 @@ int
> > > > > intel_dp_get_link_train_fallback_values(struct
> > > > > intel_dp *intel_dp,
> > > > >  					    int link_rate, uint8_t
> > > > > lane_count);
> > > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > +			  struct drm_modeset_acquire_ctx *ctx);
> > > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > > >  void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel OTC
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> -- 
> Cheers,
> 	Lyude Paul
Lyude Paul Feb. 28, 2018, 7:41 p.m. UTC | #6
On Wed, 2018-02-28 at 11:27 -0800, Manasi Navare wrote:
> On Wed, Feb 28, 2018 at 02:07:34PM -0500, Lyude Paul wrote:
> > 
> > On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> > > Ville,  thanks for the patch and
> > > Sorry for not being able to review this earlier.
> > > Please find some comments below:
> > > 
> > > On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > > > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > 
> > > > > > Doing link retraining from the short pulse handler is problematic
> > > > > > since
> > > > > > that might introduce deadlocks with MST sideband processing.
> > > > > > Currently
> > > > > > we don't retrain MST links from this code, but we want to change
> > > > > > that.
> > > > > > So better to move the entire thing to the hotplug work. We can
> > > > > > utilize
> > > > > > the new encoder->hotplug() hook for this.
> > > > > > 
> > > > > > The only thing we leave in the short pulse handler is the link
> > > > > > status
> > > > > > check. That one still depends on the link parameters stored under
> > > > > > intel_dp, so no locking around that but races should be mostly
> > > > > > harmless
> > > > > > as the actual retraining code will recheck the link state if we
> > > > > > end up there by mistake.
> > > > > > 
> > > > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > > > >     Check the connector type to figure out if we should do
> > > > > >     the HDMI thing or the DP think for DDI
> > > > > > 
> > > > > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > > > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > > > >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++--
> > > > > > ----
> > > > > > --------
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > > > intel_encoder
> > > > > > *encoder,
> > > > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > > > >  
> > > > > >  	for (;;) {
> > > > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > > > +		if (connector->base.connector_type ==
> > > > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > > > +			ret = intel_hdmi_reset_link(encoder,
> > > > > > &ctx);
> > > > > > +		else
> > > > > > +			ret = intel_dp_retrain_link(encoder,
> > > > > > &ctx);
> > > > > >  
> > > > > >  		if (ret == -EDEADLK) {
> > > > > >  			drm_modeset_backoff(&ctx);
> > > > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private
> > > > > > *dev_priv,
> > > > > > enum port port)
> > > > > >  	drm_encoder_init(&dev_priv->drm, encoder,
> > > > > > &intel_ddi_funcs,
> > > > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > > > port_name(port));
> > > > > >  
> > > > > > -	if (init_hdmi)
> > > > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > -	else
> > > > > > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > >  	intel_encoder->compute_output_type =
> > > > > > intel_ddi_compute_output_type;
> > > > > >  	intel_encoder->compute_config = intel_ddi_compute_config;
> > > > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > index 6bbf14410c2a..152016e09a11 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp
> > > > > > *intel_dp)
> > > > > >  	return -EINVAL;
> > > > > >  }
> > > > > >  
> > > > > > -static void
> > > > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > > > +static bool
> > > > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > > > +{
> > > > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > +
> > > > > > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > > > +		DRM_ERROR("Failed to get link status\n");
> > > > > > +		return false;
> > > > > > +	}
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Validate the cached values of intel_dp->link_rate and
> > > > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > > > +	 */
> > > > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > link_rate,
> > > > > > 
> > > > > > +					intel_dp->lane_count))
> > > > > > +		return false;
> > > > > > +
> > > > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > lane_count);
> > > > > > 
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > > + * If display is now connected check links status,
> > > > > > + * there has been known issues of link loss triggering
> > > > > > + * long pulse.
> > > > > > + *
> > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > + * going back up soon after. And once that happens we must
> > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > + */
> > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > +			  struct drm_modeset_acquire_ctx *ctx)
> > > > > >  {
> > > > > > -	struct intel_encoder *encoder =
> > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > base;
> > > > > > 
> > > > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > > > > base.dev);
> > > > > > 
> > > > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder-
> > > > > > >base.crtc);
> > > > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder-
> > > > > > >base);
> > > > > > +	struct intel_connector *connector = intel_dp-
> > > > > > > attached_connector;
> > > > > > 
> > > > > > +	struct drm_connector_state *conn_state;
> > > > > > +	struct intel_crtc_state *crtc_state;
> > > > > > +	struct intel_crtc *crtc;
> > > > > > +	int ret;
> > > > > > +
> > > > > > +	/* FIXME handle the MST connectors as well */
> > > > > > +
> > > > > > +	if (!connector || connector->base.status !=
> > > > > > connector_status_connected)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > > > > drm.mode_config.connection_mutex,
> > > > > > 
> > > > > > ctx);
> > > > > > +	if (ret)
> > > > > > +		return ret;
> > > > > > +
> > > > > > +	conn_state = connector->base.state;
> > > > > > +
> > > > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > > > +	if (!crtc)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > > > +	if (ret)
> > > > > > +		return ret;
> > > > > > +
> > > > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > > > +
> > > > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > > > +
> > > > > > +	if (!crtc_state->base.active)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	if (conn_state->commit &&
> > > > > > +	    !try_wait_for_completion(&conn_state->commit-
> > > > > > >hw_done))
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > > > +		return 0;
> > > > > 
> > > > > NAK, this definitely won't work for implementing MST retraining.
> > > > > There's
> > > > > some
> > > > > pretty huge differences with how retraining needs to be handled on
> > > > > SST
> > > > > vs. MST.
> > > > > An example with some normal SST sink vs. what happens on my caldigit
> > > > > TS3
> > > > > 
> > > > > SST:
> > > > >     1. commit modeset, everything is OK
> > > > >     2. something happens, sink sends shortpulse and changes link
> > > > > status
> > > > > registers
> > > > >     in dpcd
> > > > >     3. Source receives short pulse, tries retraining five times
> > > > >     4. if this succeeds:
> > > > >         5. we're done here
> > > > >     6. if this fails:
> > > > >         7. mark link status as bad
> > > > >         8. get fallback parameters
> > > > >         9. hotplug event
> > > > > 
> > > > > MST (i915 doesn't do this yet, but this is generally how it needs to
> > > > > be
> > > > > handled):
> > > > >     1. commit modeset, everything is OK
> > > > >     2. something happens (in my case, the MST hub discovers it had
> > > > > the
> > > > > wrong max
> > > > >     link rate/lane count), sink sends ESI indicating channel EQ has
> > > > > failed
> > > > >     3. retraining commences with five retries.
> > > > >     4. if this succeeds:
> > > > >        5. continue
> > > > >     6. if this fails (I actually haven't seen this once yet)
> > > > >         7. mark link status as bad on all downstream connectors
> > > > >         8. get fallback parameters
> > > > >         9. hotplug event
> > > > >     10. the retrain didn't actually work (despite what the SST link
> > > > > status
> > > > >     registers told us). go back to step 3 five more times
> > > > >     11. if this fails:
> > > > >         12. mark link status as bad on all downstream connectors
> > > > >         13. get fallback parameters
> > > > >         14. hotplug event
> > > > > 
> > > > > simply put: we really should keep the "do we need to retrain?" logic
> > > > > out
> > > > > of the
> > > > > actual retraining helpers so that SST/MST codepaths can do their own
> > > > > checks to
> > > > > figure this out.
> > > > 
> > > > No, we need it since we want to check it *after* any modeset has
> > > > finished. With MST I think what we'll want to do is find all the pipes
> > > > affected by the link failure, lock them, and wait until they're done
> > > > with their modesets, then we check the link state. If it's bad we
> > > > proceed to retrain the link.
> > > > 
> > > > So basically just walking over the MST encoders in addition to the
> > > > SST encoder, and repeating most of the steps in this code for each.
> > > > Hence the the MST FIXME I left in there ;)
> > > > 
> > > 
> > > Lyude,
> > > 
> > > I agree with Ville here, can we add the MST retrain required check from
> > > within the intel_dp_retrain_link()? So for now the FIXME should be left
> > > there
> > > and MST retraining check can be added from your patches.
> > > 
> > > Manasi
> > 
> > Yep! Sorry I probably should have reiterated this part over email since I
> > only
> > mentioned it to ville directly: this part should definitely work fine, and
> > intel_dp_retrain_link() is actually where we handle MST retraining in the
> > latest version of the retraining series:
> > 
> > jfyi, you can find that here:
> > https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
> > It's almost ready for the ML, just want to get the other patches this is
> > going
> > to rely on in first and fix some small concerns I've got about that
> > series'
> > current implementation of handling modesets after a failed retrain
> > sequence
> > > 
> 
> Great!
> After the failed retrain sequence, it will keep on retrying until has tried
> all
> the link rate/lane count combinations until RBR 1 lane and then fail with a
> ERROR message.
> What was the concern on the current implementation of modeste after failed
> retrain?
I actually take that back. Was worried that not freeing existing VCPI
allocations during the first atomic check phase for the first modeset after a
retrain failure might cause atomic checks to fail from not having enough VCPI 
space, but then I realized that wouldn't matter since if we're not able to fit
one display into the VCPI with a reduced pbn_div, that just means fitting both
displays with a reduced pbn_div would also be impossible :P.

> 
> Manasi
>  
> > > > > 
> > > > > >  
> > > > > >  	/* Suppress underruns caused by re-training */
> > > > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc-
> > > > > > >pipe,
> > > > > > false);
> > > > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > > > *intel_dp)
> > > > > >  	if (crtc->config->has_pch_encoder)
> > > > > >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> > > > > >  						      intel_crtc_
> > > > > > pch_
> > > > > > transcod
> > > > > > er(crtc), true);
> > > > > > +
> > > > > > +	return 0;
> > > > > >  }
> > > > > >  
> > > > > > -static void
> > > > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > > > +/*
> > > > > > + * If display is now connected check links status,
> > > > > > + * there has been known issues of link loss triggering
> > > > > > + * long pulse.
> > > > > > + *
> > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > + * going back up soon after. And once that happens we must
> > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > + */
> > > > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > > > +			     struct intel_connector *connector)
> > > > > >  {
> > > > > > -	struct drm_i915_private *dev_priv =
> > > > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > > > -	struct intel_encoder *intel_encoder =
> > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > base;
> > > > > > 
> > > > > > -	struct drm_connector_state *conn_state =
> > > > > > -		intel_dp->attached_connector->base.state;
> > > > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > -
> > > > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > > > drm.mode_config.connection_mutex));
> > > > > > 
> > > > > > -
> > > > > > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > > > -		DRM_ERROR("Failed to get link status\n");
> > > > > > -		return;
> > > > > > -	}
> > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > +	bool changed;
> > > > > > +	int ret;
> > > > > >  
> > > > > > -	if (!conn_state->crtc)
> > > > > > -		return;
> > > > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > > > >  
> > > > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc-
> > > > > > >mutex));
> > > > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > > > >  
> > > > > > -	if (!conn_state->crtc->state->active)
> > > > > > -		return;
> > > > > > +	for (;;) {
> > > 
> > > Here if this is getting executed due to hpd ping pong during the modeset
> > > and that modeset is already happening at a link fallback parameter then
> > > while we call retrain link, we should also validate the link parameters
> > > so that it doesnt try to retrain with stale values.
> > > 
> > > I think we need to call intel_dp_link_params_valid() before retrain.
> > > 
> > > > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > > > >  
> > > > > > -	if (conn_state->commit &&
> > > > > > -	    !try_wait_for_completion(&conn_state->commit-
> > > > > > >hw_done))
> > > > > > -		return;
> > > > > > +		if (ret == -EDEADLK) {
> > > > > > +			drm_modeset_backoff(&ctx);
> > > > > > +			continue;
> > > > > > +		}
> > > > > >  
> > > > > > -	/*
> > > > > > -	 * Validate the cached values of intel_dp->link_rate and
> > > > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > > > -	 */
> > > > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > link_rate,
> > > > > > 
> > > > > > -					intel_dp->lane_count))
> > > > > > -		return;
> > > > > > +		break;
> > > > > > +	}
> > > > > >  
> > > > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > >lane_count)) 
> > > > > > {
> > > > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok,
> > > > > > retraining\n",
> > > > > > -			      intel_encoder->base.name);
> > > > > > +	drm_modeset_drop_locks(&ctx);
> > > > > > +	drm_modeset_acquire_fini(&ctx);
> > > > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n",
> > > > > > ret);
> > > > > >  
> > > > > > -		intel_dp_retrain_link(intel_dp);
> > > > > > -	}
> > > > > > +	return changed;
> > > > > >  }
> > > > > >  
> > > > > >  /*
> > > > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp
> > > > > > *intel_dp)
> > > > > >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > > > > > unhandled\n");
> > > > > >  	}
> > > > > >  
> > > > > > -	intel_dp_check_link_status(intel_dp);
> > > > > > +	/* defer to the hotplug work for link retraining if
> > > > > > needed */
> > > > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > > > +		return false;
> > > > > >  
> > > > > >  	if (intel_dp->compliance.test_type ==
> > > > > > DP_TEST_LINK_TRAINING)
> > > > > > {
> > > > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > > > requested\n");
> > > > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector
> > > > > > *connector)
> > > > > >  		 */
> > > > > >  		status = connector_status_disconnected;
> > > > > >  		goto out;
> > > > > > -	} else {
> > > > > > -		/*
> > > > > > -		 * If display is now connected check links
> > > > > > status,
> > > > > > -		 * there has been known issues of link loss
> > > > > > triggerring
> > > > > > -		 * long pulse.
> > > > > > -		 *
> > > > > > -		 * Some sinks (eg. ASUS PB287Q) seem to perform
> > > > > > some
> > > > > > -		 * weird HPD ping pong during modesets. So we can
> > > > > > apparently
> > > > > > -		 * end up with HPD going low during a modeset,
> > > > > > and
> > > > > > then
> > > > > > -		 * going back up soon after. And once that
> > > > > > happens we
> > > > > > must
> > > > > > -		 * retrain the link to get a picture. That's in
> > > > > > case
> > > > > > no
> > > > > > -		 * userspace component reacted to intermittent
> > > > > > HPD
> > > > > > dip.
> > > > > > -		 */
> > > > > > -		intel_dp_check_link_status(intel_dp);
> > > > > >  	}
> > > > > >  
> > > > > >  	/*
> > > > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct
> > > > > > intel_digital_port
> > > > > > *intel_dig_port, bool long_hpd)
> > > > > >  	}
> > > > > >  
> > > > > >  	if (!intel_dp->is_mst) {
> > > > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > > > -		struct drm_connector *connector = &intel_dp-
> > > > > > > attached_connector->base;
> > > > > > 
> > > > > > -		struct drm_crtc *crtc;
> > > > > > -		int iret;
> > > > > > -		bool handled = false;
> > > > > > -
> > > > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > > > -retry:
> > > > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > > > drm.mode_config.connection_mutex, &ctx);
> > > > > > 
> > > > > > -		if (iret)
> > > > > > -			goto err;
> > > > > > -
> > > > > > -		crtc = connector->state->crtc;
> > > > > > -		if (crtc) {
> > > > > > -			iret = drm_modeset_lock(&crtc->mutex,
> > > > > > &ctx);
> > > > > > -			if (iret)
> > > > > > -				goto err;
> > > > > > -		}
> > > > > > +		bool handled;
> > > > > >  
> > > > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > > > >  
> > > > > > -err:
> > > > > > -		if (iret == -EDEADLK) {
> > > > > > -			drm_modeset_backoff(&ctx);
> > > > > > -			goto retry;
> > > > > > -		}
> > > > > > -
> > > > > > -		drm_modeset_drop_locks(&ctx);
> > > > > > -		drm_modeset_acquire_fini(&ctx);
> > > > > > -		WARN(iret, "Acquiring modeset locks failed with
> > > > > > %i\n", iret);
> > > > > > -
> > > > > >  		/* Short pulse can signify loss of hdcp
> > > > > > authentication */
> > > > > >  		intel_hdcp_check_link(intel_dp-
> > > > > > >attached_connector);
> > > > > >  
> > > > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private
> > > > > > *dev_priv,
> > > > > >  			     "DP %c", port_name(port)))
> > > > > >  		goto err_encoder_init;
> > > > > >  
> > > > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > > > >  	intel_encoder->compute_config = intel_dp_compute_config;
> > > > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > @@ -1611,6 +1611,8 @@ int
> > > > > > intel_dp_get_link_train_fallback_values(struct
> > > > > > intel_dp *intel_dp,
> > > > > >  					    int link_rate,
> > > > > > uint8_t
> > > > > > lane_count);
> > > > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > +			  struct drm_modeset_acquire_ctx *ctx);
> > > > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > > > >  void intel_dp_encoder_suspend(struct intel_encoder
> > > > > > *intel_encoder);
> > > > 
> > > > -- 
> > > > Ville Syrjälä
> > > > Intel OTC
> > > > _______________________________________________
> > > > Intel-gfx mailing list
> > > > Intel-gfx@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > 
> > -- 
> > Cheers,
> > 	Lyude Paul
Navare, Manasi Feb. 28, 2018, 7:57 p.m. UTC | #7
On Wed, Feb 28, 2018 at 02:41:06PM -0500, Lyude Paul wrote:
> On Wed, 2018-02-28 at 11:27 -0800, Manasi Navare wrote:
> > On Wed, Feb 28, 2018 at 02:07:34PM -0500, Lyude Paul wrote:
> > > 
> > > On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> > > > Ville,  thanks for the patch and
> > > > Sorry for not being able to review this earlier.
> > > > Please find some comments below:
> > > > 
> > > > On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > > > > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > > > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > > > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > 
> > > > > > > Doing link retraining from the short pulse handler is problematic
> > > > > > > since
> > > > > > > that might introduce deadlocks with MST sideband processing.
> > > > > > > Currently
> > > > > > > we don't retrain MST links from this code, but we want to change
> > > > > > > that.
> > > > > > > So better to move the entire thing to the hotplug work. We can
> > > > > > > utilize
> > > > > > > the new encoder->hotplug() hook for this.
> > > > > > > 
> > > > > > > The only thing we leave in the short pulse handler is the link
> > > > > > > status
> > > > > > > check. That one still depends on the link parameters stored under
> > > > > > > intel_dp, so no locking around that but races should be mostly
> > > > > > > harmless
> > > > > > > as the actual retraining code will recheck the link state if we
> > > > > > > end up there by mistake.
> > > > > > > 
> > > > > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > > > > >     Check the connector type to figure out if we should do
> > > > > > >     the HDMI thing or the DP think for DDI
> > > > > > > 
> > > > > > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > > > > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > > > > >  drivers/gpu/drm/i915/intel_dp.c  | 196 ++++++++++++++++++++++--
> > > > > > > ----
> > > > > > > --------
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > > > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > > > > intel_encoder
> > > > > > > *encoder,
> > > > > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > > > > >  
> > > > > > >  	for (;;) {
> > > > > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > > > > +		if (connector->base.connector_type ==
> > > > > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > > > > +			ret = intel_hdmi_reset_link(encoder,
> > > > > > > &ctx);
> > > > > > > +		else
> > > > > > > +			ret = intel_dp_retrain_link(encoder,
> > > > > > > &ctx);
> > > > > > >  
> > > > > > >  		if (ret == -EDEADLK) {
> > > > > > >  			drm_modeset_backoff(&ctx);
> > > > > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct drm_i915_private
> > > > > > > *dev_priv,
> > > > > > > enum port port)
> > > > > > >  	drm_encoder_init(&dev_priv->drm, encoder,
> > > > > > > &intel_ddi_funcs,
> > > > > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > > > > port_name(port));
> > > > > > >  
> > > > > > > -	if (init_hdmi)
> > > > > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > > -	else
> > > > > > > -		intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > >  	intel_encoder->compute_output_type =
> > > > > > > intel_ddi_compute_output_type;
> > > > > > >  	intel_encoder->compute_config = intel_ddi_compute_config;
> > > > > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > index 6bbf14410c2a..152016e09a11 100644
> > > > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct intel_dp
> > > > > > > *intel_dp)
> > > > > > >  	return -EINVAL;
> > > > > > >  }
> > > > > > >  
> > > > > > > -static void
> > > > > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > > > > +static bool
> > > > > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > > > > +{
> > > > > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > > +
> > > > > > > +	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > > > > +		DRM_ERROR("Failed to get link status\n");
> > > > > > > +		return false;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * Validate the cached values of intel_dp->link_rate and
> > > > > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > > > > +	 */
> > > > > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > > link_rate,
> > > > > > > 
> > > > > > > +					intel_dp->lane_count))
> > > > > > > +		return false;
> > > > > > > +
> > > > > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > > lane_count);
> > > > > > > 
> > > > > > > +}
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * If display is now connected check links status,
> > > > > > > + * there has been known issues of link loss triggering
> > > > > > > + * long pulse.
> > > > > > > + *
> > > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > > + * going back up soon after. And once that happens we must
> > > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > > + */
> > > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > > +			  struct drm_modeset_acquire_ctx *ctx)
> > > > > > >  {
> > > > > > > -	struct intel_encoder *encoder =
> > > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > > base;
> > > > > > > 
> > > > > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > > > > > base.dev);
> > > > > > > 
> > > > > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder-
> > > > > > > >base.crtc);
> > > > > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder-
> > > > > > > >base);
> > > > > > > +	struct intel_connector *connector = intel_dp-
> > > > > > > > attached_connector;
> > > > > > > 
> > > > > > > +	struct drm_connector_state *conn_state;
> > > > > > > +	struct intel_crtc_state *crtc_state;
> > > > > > > +	struct intel_crtc *crtc;
> > > > > > > +	int ret;
> > > > > > > +
> > > > > > > +	/* FIXME handle the MST connectors as well */
> > > > > > > +
> > > > > > > +	if (!connector || connector->base.status !=
> > > > > > > connector_status_connected)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > > > > > drm.mode_config.connection_mutex,
> > > > > > > 
> > > > > > > ctx);
> > > > > > > +	if (ret)
> > > > > > > +		return ret;
> > > > > > > +
> > > > > > > +	conn_state = connector->base.state;
> > > > > > > +
> > > > > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > > > > +	if (!crtc)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > > > > +	if (ret)
> > > > > > > +		return ret;
> > > > > > > +
> > > > > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > > > > +
> > > > > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > > > > +
> > > > > > > +	if (!crtc_state->base.active)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	if (conn_state->commit &&
> > > > > > > +	    !try_wait_for_completion(&conn_state->commit-
> > > > > > > >hw_done))
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > > > > +		return 0;
> > > > > > 
> > > > > > NAK, this definitely won't work for implementing MST retraining.
> > > > > > There's
> > > > > > some
> > > > > > pretty huge differences with how retraining needs to be handled on
> > > > > > SST
> > > > > > vs. MST.
> > > > > > An example with some normal SST sink vs. what happens on my caldigit
> > > > > > TS3
> > > > > > 
> > > > > > SST:
> > > > > >     1. commit modeset, everything is OK
> > > > > >     2. something happens, sink sends shortpulse and changes link
> > > > > > status
> > > > > > registers
> > > > > >     in dpcd
> > > > > >     3. Source receives short pulse, tries retraining five times
> > > > > >     4. if this succeeds:
> > > > > >         5. we're done here
> > > > > >     6. if this fails:
> > > > > >         7. mark link status as bad
> > > > > >         8. get fallback parameters
> > > > > >         9. hotplug event
> > > > > > 
> > > > > > MST (i915 doesn't do this yet, but this is generally how it needs to
> > > > > > be
> > > > > > handled):
> > > > > >     1. commit modeset, everything is OK
> > > > > >     2. something happens (in my case, the MST hub discovers it had
> > > > > > the
> > > > > > wrong max
> > > > > >     link rate/lane count), sink sends ESI indicating channel EQ has
> > > > > > failed
> > > > > >     3. retraining commences with five retries.
> > > > > >     4. if this succeeds:
> > > > > >        5. continue
> > > > > >     6. if this fails (I actually haven't seen this once yet)
> > > > > >         7. mark link status as bad on all downstream connectors
> > > > > >         8. get fallback parameters
> > > > > >         9. hotplug event
> > > > > >     10. the retrain didn't actually work (despite what the SST link
> > > > > > status
> > > > > >     registers told us). go back to step 3 five more times
> > > > > >     11. if this fails:
> > > > > >         12. mark link status as bad on all downstream connectors
> > > > > >         13. get fallback parameters
> > > > > >         14. hotplug event
> > > > > > 
> > > > > > simply put: we really should keep the "do we need to retrain?" logic
> > > > > > out
> > > > > > of the
> > > > > > actual retraining helpers so that SST/MST codepaths can do their own
> > > > > > checks to
> > > > > > figure this out.
> > > > > 
> > > > > No, we need it since we want to check it *after* any modeset has
> > > > > finished. With MST I think what we'll want to do is find all the pipes
> > > > > affected by the link failure, lock them, and wait until they're done
> > > > > with their modesets, then we check the link state. If it's bad we
> > > > > proceed to retrain the link.
> > > > > 
> > > > > So basically just walking over the MST encoders in addition to the
> > > > > SST encoder, and repeating most of the steps in this code for each.
> > > > > Hence the the MST FIXME I left in there ;)
> > > > > 
> > > > 
> > > > Lyude,
> > > > 
> > > > I agree with Ville here, can we add the MST retrain required check from
> > > > within the intel_dp_retrain_link()? So for now the FIXME should be left
> > > > there
> > > > and MST retraining check can be added from your patches.
> > > > 
> > > > Manasi
> > > 
> > > Yep! Sorry I probably should have reiterated this part over email since I
> > > only
> > > mentioned it to ville directly: this part should definitely work fine, and
> > > intel_dp_retrain_link() is actually where we handle MST retraining in the
> > > latest version of the retraining series:
> > > 
> > > jfyi, you can find that here:
> > > https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
> > > It's almost ready for the ML, just want to get the other patches this is
> > > going
> > > to rely on in first and fix some small concerns I've got about that
> > > series'
> > > current implementation of handling modesets after a failed retrain
> > > sequence
> > > > 
> > 
> > Great!
> > After the failed retrain sequence, it will keep on retrying until has tried
> > all
> > the link rate/lane count combinations until RBR 1 lane and then fail with a
> > ERROR message.
> > What was the concern on the current implementation of modeste after failed
> > retrain?
> I actually take that back. Was worried that not freeing existing VCPI
> allocations during the first atomic check phase for the first modeset after a
> retrain failure might cause atomic checks to fail from not having enough VCPI 
> space, but then I realized that wouldn't matter since if we're not able to fit
> one display into the VCPI with a reduced pbn_div, that just means fitting both
> displays with a reduced pbn_div would also be impossible :P.
>

But then in that case, in oredr to retry at lower link rates, both these monitors will
have to drop to the next lower resolutions to fit the reduced pbn_div.

Manasi
 
> > 
> > Manasi
> >  
> > > > > > 
> > > > > > >  
> > > > > > >  	/* Suppress underruns caused by re-training */
> > > > > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc-
> > > > > > > >pipe,
> > > > > > > false);
> > > > > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > > > > *intel_dp)
> > > > > > >  	if (crtc->config->has_pch_encoder)
> > > > > > >  		intel_set_pch_fifo_underrun_reporting(dev_priv,
> > > > > > >  						      intel_crtc_
> > > > > > > pch_
> > > > > > > transcod
> > > > > > > er(crtc), true);
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > -static void
> > > > > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > > > > +/*
> > > > > > > + * If display is now connected check links status,
> > > > > > > + * there has been known issues of link loss triggering
> > > > > > > + * long pulse.
> > > > > > > + *
> > > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > > + * going back up soon after. And once that happens we must
> > > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > > + */
> > > > > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > > > > +			     struct intel_connector *connector)
> > > > > > >  {
> > > > > > > -	struct drm_i915_private *dev_priv =
> > > > > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > > > > -	struct intel_encoder *intel_encoder =
> > > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > > base;
> > > > > > > 
> > > > > > > -	struct drm_connector_state *conn_state =
> > > > > > > -		intel_dp->attached_connector->base.state;
> > > > > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > > -
> > > > > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > > > > drm.mode_config.connection_mutex));
> > > > > > > 
> > > > > > > -
> > > > > > > -	if (!intel_dp_get_link_status(intel_dp, link_status)) {
> > > > > > > -		DRM_ERROR("Failed to get link status\n");
> > > > > > > -		return;
> > > > > > > -	}
> > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > +	bool changed;
> > > > > > > +	int ret;
> > > > > > >  
> > > > > > > -	if (!conn_state->crtc)
> > > > > > > -		return;
> > > > > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > > > > >  
> > > > > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc-
> > > > > > > >mutex));
> > > > > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > > > > >  
> > > > > > > -	if (!conn_state->crtc->state->active)
> > > > > > > -		return;
> > > > > > > +	for (;;) {
> > > > 
> > > > Here if this is getting executed due to hpd ping pong during the modeset
> > > > and that modeset is already happening at a link fallback parameter then
> > > > while we call retrain link, we should also validate the link parameters
> > > > so that it doesnt try to retrain with stale values.
> > > > 
> > > > I think we need to call intel_dp_link_params_valid() before retrain.
> > > > 
> > > > > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > > > > >  
> > > > > > > -	if (conn_state->commit &&
> > > > > > > -	    !try_wait_for_completion(&conn_state->commit-
> > > > > > > >hw_done))
> > > > > > > -		return;
> > > > > > > +		if (ret == -EDEADLK) {
> > > > > > > +			drm_modeset_backoff(&ctx);
> > > > > > > +			continue;
> > > > > > > +		}
> > > > > > >  
> > > > > > > -	/*
> > > > > > > -	 * Validate the cached values of intel_dp->link_rate and
> > > > > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > > > > -	 */
> > > > > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > > link_rate,
> > > > > > > 
> > > > > > > -					intel_dp->lane_count))
> > > > > > > -		return;
> > > > > > > +		break;
> > > > > > > +	}
> > > > > > >  
> > > > > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > >lane_count)) 
> > > > > > > {
> > > > > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok,
> > > > > > > retraining\n",
> > > > > > > -			      intel_encoder->base.name);
> > > > > > > +	drm_modeset_drop_locks(&ctx);
> > > > > > > +	drm_modeset_acquire_fini(&ctx);
> > > > > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n",
> > > > > > > ret);
> > > > > > >  
> > > > > > > -		intel_dp_retrain_link(intel_dp);
> > > > > > > -	}
> > > > > > > +	return changed;
> > > > > > >  }
> > > > > > >  
> > > > > > >  /*
> > > > > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp
> > > > > > > *intel_dp)
> > > > > > >  			DRM_DEBUG_DRIVER("CP or sink specific irq
> > > > > > > unhandled\n");
> > > > > > >  	}
> > > > > > >  
> > > > > > > -	intel_dp_check_link_status(intel_dp);
> > > > > > > +	/* defer to the hotplug work for link retraining if
> > > > > > > needed */
> > > > > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > > > > +		return false;
> > > > > > >  
> > > > > > >  	if (intel_dp->compliance.test_type ==
> > > > > > > DP_TEST_LINK_TRAINING)
> > > > > > > {
> > > > > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > > > > requested\n");
> > > > > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct intel_connector
> > > > > > > *connector)
> > > > > > >  		 */
> > > > > > >  		status = connector_status_disconnected;
> > > > > > >  		goto out;
> > > > > > > -	} else {
> > > > > > > -		/*
> > > > > > > -		 * If display is now connected check links
> > > > > > > status,
> > > > > > > -		 * there has been known issues of link loss
> > > > > > > triggerring
> > > > > > > -		 * long pulse.
> > > > > > > -		 *
> > > > > > > -		 * Some sinks (eg. ASUS PB287Q) seem to perform
> > > > > > > some
> > > > > > > -		 * weird HPD ping pong during modesets. So we can
> > > > > > > apparently
> > > > > > > -		 * end up with HPD going low during a modeset,
> > > > > > > and
> > > > > > > then
> > > > > > > -		 * going back up soon after. And once that
> > > > > > > happens we
> > > > > > > must
> > > > > > > -		 * retrain the link to get a picture. That's in
> > > > > > > case
> > > > > > > no
> > > > > > > -		 * userspace component reacted to intermittent
> > > > > > > HPD
> > > > > > > dip.
> > > > > > > -		 */
> > > > > > > -		intel_dp_check_link_status(intel_dp);
> > > > > > >  	}
> > > > > > >  
> > > > > > >  	/*
> > > > > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct
> > > > > > > intel_digital_port
> > > > > > > *intel_dig_port, bool long_hpd)
> > > > > > >  	}
> > > > > > >  
> > > > > > >  	if (!intel_dp->is_mst) {
> > > > > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > > > > -		struct drm_connector *connector = &intel_dp-
> > > > > > > > attached_connector->base;
> > > > > > > 
> > > > > > > -		struct drm_crtc *crtc;
> > > > > > > -		int iret;
> > > > > > > -		bool handled = false;
> > > > > > > -
> > > > > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > > > > -retry:
> > > > > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > > > > drm.mode_config.connection_mutex, &ctx);
> > > > > > > 
> > > > > > > -		if (iret)
> > > > > > > -			goto err;
> > > > > > > -
> > > > > > > -		crtc = connector->state->crtc;
> > > > > > > -		if (crtc) {
> > > > > > > -			iret = drm_modeset_lock(&crtc->mutex,
> > > > > > > &ctx);
> > > > > > > -			if (iret)
> > > > > > > -				goto err;
> > > > > > > -		}
> > > > > > > +		bool handled;
> > > > > > >  
> > > > > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > > > > >  
> > > > > > > -err:
> > > > > > > -		if (iret == -EDEADLK) {
> > > > > > > -			drm_modeset_backoff(&ctx);
> > > > > > > -			goto retry;
> > > > > > > -		}
> > > > > > > -
> > > > > > > -		drm_modeset_drop_locks(&ctx);
> > > > > > > -		drm_modeset_acquire_fini(&ctx);
> > > > > > > -		WARN(iret, "Acquiring modeset locks failed with
> > > > > > > %i\n", iret);
> > > > > > > -
> > > > > > >  		/* Short pulse can signify loss of hdcp
> > > > > > > authentication */
> > > > > > >  		intel_hdcp_check_link(intel_dp-
> > > > > > > >attached_connector);
> > > > > > >  
> > > > > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct drm_i915_private
> > > > > > > *dev_priv,
> > > > > > >  			     "DP %c", port_name(port)))
> > > > > > >  		goto err_encoder_init;
> > > > > > >  
> > > > > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > > > > >  	intel_encoder->compute_config = intel_dp_compute_config;
> > > > > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > > > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > @@ -1611,6 +1611,8 @@ int
> > > > > > > intel_dp_get_link_train_fallback_values(struct
> > > > > > > intel_dp *intel_dp,
> > > > > > >  					    int link_rate,
> > > > > > > uint8_t
> > > > > > > lane_count);
> > > > > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > > > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > > +			  struct drm_modeset_acquire_ctx *ctx);
> > > > > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > > > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > > > > >  void intel_dp_encoder_suspend(struct intel_encoder
> > > > > > > *intel_encoder);
> > > > > 
> > > > > -- 
> > > > > Ville Syrjälä
> > > > > Intel OTC
> > > > > _______________________________________________
> > > > > Intel-gfx mailing list
> > > > > Intel-gfx@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > 
> > > -- 
> > > Cheers,
> > > 	Lyude Paul
> -- 
> Cheers,
> 	Lyude Paul
Lyude Paul Feb. 28, 2018, 8:10 p.m. UTC | #8
On Wed, 2018-02-28 at 11:57 -0800, Manasi Navare wrote:
> On Wed, Feb 28, 2018 at 02:41:06PM -0500, Lyude Paul wrote:
> > On Wed, 2018-02-28 at 11:27 -0800, Manasi Navare wrote:
> > > On Wed, Feb 28, 2018 at 02:07:34PM -0500, Lyude Paul wrote:
> > > > 
> > > > On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> > > > > Ville,  thanks for the patch and
> > > > > Sorry for not being able to review this earlier.
> > > > > Please find some comments below:
> > > > > 
> > > > > On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > > > > > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > > > > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > > > > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > 
> > > > > > > > Doing link retraining from the short pulse handler is
> > > > > > > > problematic
> > > > > > > > since
> > > > > > > > that might introduce deadlocks with MST sideband processing.
> > > > > > > > Currently
> > > > > > > > we don't retrain MST links from this code, but we want to
> > > > > > > > change
> > > > > > > > that.
> > > > > > > > So better to move the entire thing to the hotplug work. We can
> > > > > > > > utilize
> > > > > > > > the new encoder->hotplug() hook for this.
> > > > > > > > 
> > > > > > > > The only thing we leave in the short pulse handler is the link
> > > > > > > > status
> > > > > > > > check. That one still depends on the link parameters stored
> > > > > > > > under
> > > > > > > > intel_dp, so no locking around that but races should be mostly
> > > > > > > > harmless
> > > > > > > > as the actual retraining code will recheck the link state if
> > > > > > > > we
> > > > > > > > end up there by mistake.
> > > > > > > > 
> > > > > > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > > > > > >     Check the connector type to figure out if we should do
> > > > > > > >     the HDMI thing or the DP think for DDI
> > > > > > > > 
> > > > > > > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > > > > > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > ---
> > > > > > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > > > > > >  drivers/gpu/drm/i915/intel_dp.c  | 196
> > > > > > > > ++++++++++++++++++++++--
> > > > > > > > ----
> > > > > > > > --------
> > > > > > > > ---
> > > > > > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > > > > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > > > > > intel_encoder
> > > > > > > > *encoder,
> > > > > > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > > > > > >  
> > > > > > > >  	for (;;) {
> > > > > > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > > > > > +		if (connector->base.connector_type ==
> > > > > > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > > > > > +			ret = intel_hdmi_reset_link(encoder,
> > > > > > > > &ctx);
> > > > > > > > +		else
> > > > > > > > +			ret = intel_dp_retrain_link(encoder,
> > > > > > > > &ctx);
> > > > > > > >  
> > > > > > > >  		if (ret == -EDEADLK) {
> > > > > > > >  			drm_modeset_backoff(&ctx);
> > > > > > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct
> > > > > > > > drm_i915_private
> > > > > > > > *dev_priv,
> > > > > > > > enum port port)
> > > > > > > >  	drm_encoder_init(&dev_priv->drm, encoder,
> > > > > > > > &intel_ddi_funcs,
> > > > > > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > > > > > port_name(port));
> > > > > > > >  
> > > > > > > > -	if (init_hdmi)
> > > > > > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > > > -	else
> > > > > > > > -		intel_encoder->hotplug =
> > > > > > > > intel_encoder_hotplug;
> > > > > > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > > >  	intel_encoder->compute_output_type =
> > > > > > > > intel_ddi_compute_output_type;
> > > > > > > >  	intel_encoder->compute_config =
> > > > > > > > intel_ddi_compute_config;
> > > > > > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > index 6bbf14410c2a..152016e09a11 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct
> > > > > > > > intel_dp
> > > > > > > > *intel_dp)
> > > > > > > >  	return -EINVAL;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > -static void
> > > > > > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > > > > > +static bool
> > > > > > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > > > > > +{
> > > > > > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > > > +
> > > > > > > > +	if (!intel_dp_get_link_status(intel_dp, link_status))
> > > > > > > > {
> > > > > > > > +		DRM_ERROR("Failed to get link status\n");
> > > > > > > > +		return false;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	/*
> > > > > > > > +	 * Validate the cached values of intel_dp->link_rate
> > > > > > > > and
> > > > > > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > > > > > +	 */
> > > > > > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > > > link_rate,
> > > > > > > > 
> > > > > > > > +					intel_dp-
> > > > > > > > >lane_count))
> > > > > > > > +		return false;
> > > > > > > > +
> > > > > > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > > > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > > > lane_count);
> > > > > > > > 
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +/*
> > > > > > > > + * If display is now connected check links status,
> > > > > > > > + * there has been known issues of link loss triggering
> > > > > > > > + * long pulse.
> > > > > > > > + *
> > > > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > > > + * going back up soon after. And once that happens we must
> > > > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > > > + */
> > > > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > > > +			  struct drm_modeset_acquire_ctx
> > > > > > > > *ctx)
> > > > > > > >  {
> > > > > > > > -	struct intel_encoder *encoder =
> > > > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > > > base;
> > > > > > > > 
> > > > > > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > > > > > > base.dev);
> > > > > > > > 
> > > > > > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder-
> > > > > > > > > base.crtc);
> > > > > > > > 
> > > > > > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder-
> > > > > > > > > base);
> > > > > > > > 
> > > > > > > > +	struct intel_connector *connector = intel_dp-
> > > > > > > > > attached_connector;
> > > > > > > > 
> > > > > > > > +	struct drm_connector_state *conn_state;
> > > > > > > > +	struct intel_crtc_state *crtc_state;
> > > > > > > > +	struct intel_crtc *crtc;
> > > > > > > > +	int ret;
> > > > > > > > +
> > > > > > > > +	/* FIXME handle the MST connectors as well */
> > > > > > > > +
> > > > > > > > +	if (!connector || connector->base.status !=
> > > > > > > > connector_status_connected)
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > > > > > > drm.mode_config.connection_mutex,
> > > > > > > > 
> > > > > > > > ctx);
> > > > > > > > +	if (ret)
> > > > > > > > +		return ret;
> > > > > > > > +
> > > > > > > > +	conn_state = connector->base.state;
> > > > > > > > +
> > > > > > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > > > > > +	if (!crtc)
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > > > > > +	if (ret)
> > > > > > > > +		return ret;
> > > > > > > > +
> > > > > > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > > > > > +
> > > > > > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > > > > > +
> > > > > > > > +	if (!crtc_state->base.active)
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	if (conn_state->commit &&
> > > > > > > > +	    !try_wait_for_completion(&conn_state->commit-
> > > > > > > > > hw_done))
> > > > > > > > 
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > > > > > +		return 0;
> > > > > > > 
> > > > > > > NAK, this definitely won't work for implementing MST retraining.
> > > > > > > There's
> > > > > > > some
> > > > > > > pretty huge differences with how retraining needs to be handled
> > > > > > > on
> > > > > > > SST
> > > > > > > vs. MST.
> > > > > > > An example with some normal SST sink vs. what happens on my
> > > > > > > caldigit
> > > > > > > TS3
> > > > > > > 
> > > > > > > SST:
> > > > > > >     1. commit modeset, everything is OK
> > > > > > >     2. something happens, sink sends shortpulse and changes link
> > > > > > > status
> > > > > > > registers
> > > > > > >     in dpcd
> > > > > > >     3. Source receives short pulse, tries retraining five times
> > > > > > >     4. if this succeeds:
> > > > > > >         5. we're done here
> > > > > > >     6. if this fails:
> > > > > > >         7. mark link status as bad
> > > > > > >         8. get fallback parameters
> > > > > > >         9. hotplug event
> > > > > > > 
> > > > > > > MST (i915 doesn't do this yet, but this is generally how it
> > > > > > > needs to
> > > > > > > be
> > > > > > > handled):
> > > > > > >     1. commit modeset, everything is OK
> > > > > > >     2. something happens (in my case, the MST hub discovers it
> > > > > > > had
> > > > > > > the
> > > > > > > wrong max
> > > > > > >     link rate/lane count), sink sends ESI indicating channel EQ
> > > > > > > has
> > > > > > > failed
> > > > > > >     3. retraining commences with five retries.
> > > > > > >     4. if this succeeds:
> > > > > > >        5. continue
> > > > > > >     6. if this fails (I actually haven't seen this once yet)
> > > > > > >         7. mark link status as bad on all downstream connectors
> > > > > > >         8. get fallback parameters
> > > > > > >         9. hotplug event
> > > > > > >     10. the retrain didn't actually work (despite what the SST
> > > > > > > link
> > > > > > > status
> > > > > > >     registers told us). go back to step 3 five more times
> > > > > > >     11. if this fails:
> > > > > > >         12. mark link status as bad on all downstream connectors
> > > > > > >         13. get fallback parameters
> > > > > > >         14. hotplug event
> > > > > > > 
> > > > > > > simply put: we really should keep the "do we need to retrain?"
> > > > > > > logic
> > > > > > > out
> > > > > > > of the
> > > > > > > actual retraining helpers so that SST/MST codepaths can do their
> > > > > > > own
> > > > > > > checks to
> > > > > > > figure this out.
> > > > > > 
> > > > > > No, we need it since we want to check it *after* any modeset has
> > > > > > finished. With MST I think what we'll want to do is find all the
> > > > > > pipes
> > > > > > affected by the link failure, lock them, and wait until they're
> > > > > > done
> > > > > > with their modesets, then we check the link state. If it's bad we
> > > > > > proceed to retrain the link.
> > > > > > 
> > > > > > So basically just walking over the MST encoders in addition to the
> > > > > > SST encoder, and repeating most of the steps in this code for
> > > > > > each.
> > > > > > Hence the the MST FIXME I left in there ;)
> > > > > > 
> > > > > 
> > > > > Lyude,
> > > > > 
> > > > > I agree with Ville here, can we add the MST retrain required check
> > > > > from
> > > > > within the intel_dp_retrain_link()? So for now the FIXME should be
> > > > > left
> > > > > there
> > > > > and MST retraining check can be added from your patches.
> > > > > 
> > > > > Manasi
> > > > 
> > > > Yep! Sorry I probably should have reiterated this part over email
> > > > since I
> > > > only
> > > > mentioned it to ville directly: this part should definitely work fine,
> > > > and
> > > > intel_dp_retrain_link() is actually where we handle MST retraining in
> > > > the
> > > > latest version of the retraining series:
> > > > 
> > > > jfyi, you can find that here:
> > > > https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
> > > > It's almost ready for the ML, just want to get the other patches this
> > > > is
> > > > going
> > > > to rely on in first and fix some small concerns I've got about that
> > > > series'
> > > > current implementation of handling modesets after a failed retrain
> > > > sequence
> > > > > 
> > > 
> > > Great!
> > > After the failed retrain sequence, it will keep on retrying until has
> > > tried
> > > all
> > > the link rate/lane count combinations until RBR 1 lane and then fail
> > > with a
> > > ERROR message.
> > > What was the concern on the current implementation of modeste after
> > > failed
> > > retrain?
> > 
> > I actually take that back. Was worried that not freeing existing VCPI
> > allocations during the first atomic check phase for the first modeset
> > after a
> > retrain failure might cause atomic checks to fail from not having enough
> > VCPI 
> > space, but then I realized that wouldn't matter since if we're not able to
> > fit
> > one display into the VCPI with a reduced pbn_div, that just means fitting
> > both
> > displays with a reduced pbn_div would also be impossible :P.
> > 
> 
> But then in that case, in oredr to retry at lower link rates, both these
> monitors will
> have to drop to the next lower resolutions to fit the reduced pbn_div.

Yep, that's the idea! In which case the atomic check should fail, which should
cause userspace to try at a reduced resolution or take some other reparative
action
> 
> Manasi
>  
> > > 
> > > Manasi
> > >  
> > > > > > > 
> > > > > > > >  
> > > > > > > >  	/* Suppress underruns caused by re-training */
> > > > > > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc-
> > > > > > > > > pipe,
> > > > > > > > 
> > > > > > > > false);
> > > > > > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > > > > > *intel_dp)
> > > > > > > >  	if (crtc->config->has_pch_encoder)
> > > > > > > >  		intel_set_pch_fifo_underrun_reporting(dev_pri
> > > > > > > > v,
> > > > > > > >  						      intel_c
> > > > > > > > rtc_
> > > > > > > > pch_
> > > > > > > > transcod
> > > > > > > > er(crtc), true);
> > > > > > > > +
> > > > > > > > +	return 0;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > -static void
> > > > > > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > > > > > +/*
> > > > > > > > + * If display is now connected check links status,
> > > > > > > > + * there has been known issues of link loss triggering
> > > > > > > > + * long pulse.
> > > > > > > > + *
> > > > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > > > + * going back up soon after. And once that happens we must
> > > > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > > > + */
> > > > > > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > > > > > +			     struct intel_connector
> > > > > > > > *connector)
> > > > > > > >  {
> > > > > > > > -	struct drm_i915_private *dev_priv =
> > > > > > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > > > > > -	struct intel_encoder *intel_encoder =
> > > > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > > > base;
> > > > > > > > 
> > > > > > > > -	struct drm_connector_state *conn_state =
> > > > > > > > -		intel_dp->attached_connector->base.state;
> > > > > > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > > > -
> > > > > > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > > > > > drm.mode_config.connection_mutex));
> > > > > > > > 
> > > > > > > > -
> > > > > > > > -	if (!intel_dp_get_link_status(intel_dp, link_status))
> > > > > > > > {
> > > > > > > > -		DRM_ERROR("Failed to get link status\n");
> > > > > > > > -		return;
> > > > > > > > -	}
> > > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > > +	bool changed;
> > > > > > > > +	int ret;
> > > > > > > >  
> > > > > > > > -	if (!conn_state->crtc)
> > > > > > > > -		return;
> > > > > > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > > > > > >  
> > > > > > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc-
> > > > > > > > > mutex));
> > > > > > > > 
> > > > > > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > > > > > >  
> > > > > > > > -	if (!conn_state->crtc->state->active)
> > > > > > > > -		return;
> > > > > > > > +	for (;;) {
> > > > > 
> > > > > Here if this is getting executed due to hpd ping pong during the
> > > > > modeset
> > > > > and that modeset is already happening at a link fallback parameter
> > > > > then
> > > > > while we call retrain link, we should also validate the link
> > > > > parameters
> > > > > so that it doesnt try to retrain with stale values.
> > > > > 
> > > > > I think we need to call intel_dp_link_params_valid() before retrain.
> > > > > 
> > > > > > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > > > > > >  
> > > > > > > > -	if (conn_state->commit &&
> > > > > > > > -	    !try_wait_for_completion(&conn_state->commit-
> > > > > > > > > hw_done))
> > > > > > > > 
> > > > > > > > -		return;
> > > > > > > > +		if (ret == -EDEADLK) {
> > > > > > > > +			drm_modeset_backoff(&ctx);
> > > > > > > > +			continue;
> > > > > > > > +		}
> > > > > > > >  
> > > > > > > > -	/*
> > > > > > > > -	 * Validate the cached values of intel_dp->link_rate
> > > > > > > > and
> > > > > > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > > > > > -	 */
> > > > > > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > > > link_rate,
> > > > > > > > 
> > > > > > > > -					intel_dp-
> > > > > > > > >lane_count))
> > > > > > > > -		return;
> > > > > > > > +		break;
> > > > > > > > +	}
> > > > > > > >  
> > > > > > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > > > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > > > lane_count)) 
> > > > > > > > 
> > > > > > > > {
> > > > > > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok,
> > > > > > > > retraining\n",
> > > > > > > > -			      intel_encoder->base.name);
> > > > > > > > +	drm_modeset_drop_locks(&ctx);
> > > > > > > > +	drm_modeset_acquire_fini(&ctx);
> > > > > > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n",
> > > > > > > > ret);
> > > > > > > >  
> > > > > > > > -		intel_dp_retrain_link(intel_dp);
> > > > > > > > -	}
> > > > > > > > +	return changed;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > >  /*
> > > > > > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp
> > > > > > > > *intel_dp)
> > > > > > > >  			DRM_DEBUG_DRIVER("CP or sink specific
> > > > > > > > irq
> > > > > > > > unhandled\n");
> > > > > > > >  	}
> > > > > > > >  
> > > > > > > > -	intel_dp_check_link_status(intel_dp);
> > > > > > > > +	/* defer to the hotplug work for link retraining if
> > > > > > > > needed */
> > > > > > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > > > > > +		return false;
> > > > > > > >  
> > > > > > > >  	if (intel_dp->compliance.test_type ==
> > > > > > > > DP_TEST_LINK_TRAINING)
> > > > > > > > {
> > > > > > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > > > > > requested\n");
> > > > > > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct
> > > > > > > > intel_connector
> > > > > > > > *connector)
> > > > > > > >  		 */
> > > > > > > >  		status = connector_status_disconnected;
> > > > > > > >  		goto out;
> > > > > > > > -	} else {
> > > > > > > > -		/*
> > > > > > > > -		 * If display is now connected check links
> > > > > > > > status,
> > > > > > > > -		 * there has been known issues of link loss
> > > > > > > > triggerring
> > > > > > > > -		 * long pulse.
> > > > > > > > -		 *
> > > > > > > > -		 * Some sinks (eg. ASUS PB287Q) seem to
> > > > > > > > perform
> > > > > > > > some
> > > > > > > > -		 * weird HPD ping pong during modesets. So we
> > > > > > > > can
> > > > > > > > apparently
> > > > > > > > -		 * end up with HPD going low during a
> > > > > > > > modeset,
> > > > > > > > and
> > > > > > > > then
> > > > > > > > -		 * going back up soon after. And once that
> > > > > > > > happens we
> > > > > > > > must
> > > > > > > > -		 * retrain the link to get a picture. That's
> > > > > > > > in
> > > > > > > > case
> > > > > > > > no
> > > > > > > > -		 * userspace component reacted to
> > > > > > > > intermittent
> > > > > > > > HPD
> > > > > > > > dip.
> > > > > > > > -		 */
> > > > > > > > -		intel_dp_check_link_status(intel_dp);
> > > > > > > >  	}
> > > > > > > >  
> > > > > > > >  	/*
> > > > > > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct
> > > > > > > > intel_digital_port
> > > > > > > > *intel_dig_port, bool long_hpd)
> > > > > > > >  	}
> > > > > > > >  
> > > > > > > >  	if (!intel_dp->is_mst) {
> > > > > > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > > > > > -		struct drm_connector *connector = &intel_dp-
> > > > > > > > > attached_connector->base;
> > > > > > > > 
> > > > > > > > -		struct drm_crtc *crtc;
> > > > > > > > -		int iret;
> > > > > > > > -		bool handled = false;
> > > > > > > > -
> > > > > > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > > > > > -retry:
> > > > > > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > > > > > drm.mode_config.connection_mutex, &ctx);
> > > > > > > > 
> > > > > > > > -		if (iret)
> > > > > > > > -			goto err;
> > > > > > > > -
> > > > > > > > -		crtc = connector->state->crtc;
> > > > > > > > -		if (crtc) {
> > > > > > > > -			iret = drm_modeset_lock(&crtc->mutex,
> > > > > > > > &ctx);
> > > > > > > > -			if (iret)
> > > > > > > > -				goto err;
> > > > > > > > -		}
> > > > > > > > +		bool handled;
> > > > > > > >  
> > > > > > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > > > > > >  
> > > > > > > > -err:
> > > > > > > > -		if (iret == -EDEADLK) {
> > > > > > > > -			drm_modeset_backoff(&ctx);
> > > > > > > > -			goto retry;
> > > > > > > > -		}
> > > > > > > > -
> > > > > > > > -		drm_modeset_drop_locks(&ctx);
> > > > > > > > -		drm_modeset_acquire_fini(&ctx);
> > > > > > > > -		WARN(iret, "Acquiring modeset locks failed
> > > > > > > > with
> > > > > > > > %i\n", iret);
> > > > > > > > -
> > > > > > > >  		/* Short pulse can signify loss of hdcp
> > > > > > > > authentication */
> > > > > > > >  		intel_hdcp_check_link(intel_dp-
> > > > > > > > > attached_connector);
> > > > > > > > 
> > > > > > > >  
> > > > > > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct
> > > > > > > > drm_i915_private
> > > > > > > > *dev_priv,
> > > > > > > >  			     "DP %c", port_name(port)))
> > > > > > > >  		goto err_encoder_init;
> > > > > > > >  
> > > > > > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > > > > > >  	intel_encoder->compute_config =
> > > > > > > > intel_dp_compute_config;
> > > > > > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > > > > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > @@ -1611,6 +1611,8 @@ int
> > > > > > > > intel_dp_get_link_train_fallback_values(struct
> > > > > > > > intel_dp *intel_dp,
> > > > > > > >  					    int link_rate,
> > > > > > > > uint8_t
> > > > > > > > lane_count);
> > > > > > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > > > > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > > > +			  struct drm_modeset_acquire_ctx
> > > > > > > > *ctx);
> > > > > > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > > > > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > > > > > >  void intel_dp_encoder_suspend(struct intel_encoder
> > > > > > > > *intel_encoder);
> > > > > > 
> > > > > > -- 
> > > > > > Ville Syrjälä
> > > > > > Intel OTC
> > > > > > _______________________________________________
> > > > > > Intel-gfx mailing list
> > > > > > Intel-gfx@lists.freedesktop.org
> > > > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > > 
> > > > -- 
> > > > Cheers,
> > > > 	Lyude Paul
> > 
> > -- 
> > Cheers,
> > 	Lyude Paul
Navare, Manasi March 5, 2018, 11:41 p.m. UTC | #9
On Wed, Feb 28, 2018 at 03:10:24PM -0500, Lyude Paul wrote:
> On Wed, 2018-02-28 at 11:57 -0800, Manasi Navare wrote:
> > On Wed, Feb 28, 2018 at 02:41:06PM -0500, Lyude Paul wrote:
> > > On Wed, 2018-02-28 at 11:27 -0800, Manasi Navare wrote:
> > > > On Wed, Feb 28, 2018 at 02:07:34PM -0500, Lyude Paul wrote:
> > > > > 
> > > > > On Tue, 2018-02-27 at 23:17 -0800, Manasi Navare wrote:
> > > > > > Ville,  thanks for the patch and
> > > > > > Sorry for not being able to review this earlier.
> > > > > > Please find some comments below:
> > > > > > 
> > > > > > On Wed, Jan 31, 2018 at 03:27:10PM +0200, Ville Syrjälä wrote:
> > > > > > > On Tue, Jan 30, 2018 at 06:16:59PM -0500, Lyude Paul wrote:
> > > > > > > > On Wed, 2018-01-17 at 21:21 +0200, Ville Syrjala wrote:
> > > > > > > > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > 
> > > > > > > > > Doing link retraining from the short pulse handler is
> > > > > > > > > problematic
> > > > > > > > > since
> > > > > > > > > that might introduce deadlocks with MST sideband processing.
> > > > > > > > > Currently
> > > > > > > > > we don't retrain MST links from this code, but we want to
> > > > > > > > > change
> > > > > > > > > that.
> > > > > > > > > So better to move the entire thing to the hotplug work. We can
> > > > > > > > > utilize
> > > > > > > > > the new encoder->hotplug() hook for this.
> > > > > > > > > 
> > > > > > > > > The only thing we leave in the short pulse handler is the link
> > > > > > > > > status
> > > > > > > > > check. That one still depends on the link parameters stored
> > > > > > > > > under
> > > > > > > > > intel_dp, so no locking around that but races should be mostly
> > > > > > > > > harmless
> > > > > > > > > as the actual retraining code will recheck the link state if
> > > > > > > > > we
> > > > > > > > > end up there by mistake.
> > > > > > > > > 
> > > > > > > > > v2: Rebase due to ->post_hotplug() now being just ->hotplug()
> > > > > > > > >     Check the connector type to figure out if we should do
> > > > > > > > >     the HDMI thing or the DP think for DDI
> > > > > > > > > 
> > > > > > > > > Cc: Manasi Navare <manasi.d.navare@intel.com>
> > > > > > > > > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > > > > > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/gpu/drm/i915/intel_ddi.c |  10 +-
> > > > > > > > >  drivers/gpu/drm/i915/intel_dp.c  | 196
> > > > > > > > > ++++++++++++++++++++++--
> > > > > > > > > ----
> > > > > > > > > --------
> > > > > > > > > ---
> > > > > > > > >  drivers/gpu/drm/i915/intel_drv.h |   2 +
> > > > > > > > >  3 files changed, 120 insertions(+), 88 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > > b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > > index 25793bdc692f..5f3d58f1ae6e 100644
> > > > > > > > > --- a/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > > +++ b/drivers/gpu/drm/i915/intel_ddi.c
> > > > > > > > > @@ -2880,7 +2880,10 @@ static bool intel_ddi_hotplug(struct
> > > > > > > > > intel_encoder
> > > > > > > > > *encoder,
> > > > > > > > >  	drm_modeset_acquire_init(&ctx, 0);
> > > > > > > > >  
> > > > > > > > >  	for (;;) {
> > > > > > > > > -		ret = intel_hdmi_reset_link(encoder, &ctx);
> > > > > > > > > +		if (connector->base.connector_type ==
> > > > > > > > > DRM_MODE_CONNECTOR_HDMIA)
> > > > > > > > > +			ret = intel_hdmi_reset_link(encoder,
> > > > > > > > > &ctx);
> > > > > > > > > +		else
> > > > > > > > > +			ret = intel_dp_retrain_link(encoder,
> > > > > > > > > &ctx);
> > > > > > > > >  
> > > > > > > > >  		if (ret == -EDEADLK) {
> > > > > > > > >  			drm_modeset_backoff(&ctx);
> > > > > > > > > @@ -3007,10 +3010,7 @@ void intel_ddi_init(struct
> > > > > > > > > drm_i915_private
> > > > > > > > > *dev_priv,
> > > > > > > > > enum port port)
> > > > > > > > >  	drm_encoder_init(&dev_priv->drm, encoder,
> > > > > > > > > &intel_ddi_funcs,
> > > > > > > > >  			 DRM_MODE_ENCODER_TMDS, "DDI %c",
> > > > > > > > > port_name(port));
> > > > > > > > >  
> > > > > > > > > -	if (init_hdmi)
> > > > > > > > > -		intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > > > > -	else
> > > > > > > > > -		intel_encoder->hotplug =
> > > > > > > > > intel_encoder_hotplug;
> > > > > > > > > +	intel_encoder->hotplug = intel_ddi_hotplug;
> > > > > > > > >  	intel_encoder->compute_output_type =
> > > > > > > > > intel_ddi_compute_output_type;
> > > > > > > > >  	intel_encoder->compute_config =
> > > > > > > > > intel_ddi_compute_config;
> > > > > > > > >  	intel_encoder->enable = intel_enable_ddi;
> > > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > > b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > > index 6bbf14410c2a..152016e09a11 100644
> > > > > > > > > --- a/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > > +++ b/drivers/gpu/drm/i915/intel_dp.c
> > > > > > > > > @@ -4275,12 +4275,83 @@ intel_dp_check_mst_status(struct
> > > > > > > > > intel_dp
> > > > > > > > > *intel_dp)
> > > > > > > > >  	return -EINVAL;
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > > -static void
> > > > > > > > > -intel_dp_retrain_link(struct intel_dp *intel_dp)
> > > > > > > > > +static bool
> > > > > > > > > +intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
> > > > > > > > > +{
> > > > > > > > > +	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > > > > +
> > > > > > > > > +	if (!intel_dp_get_link_status(intel_dp, link_status))
> > > > > > > > > {
> > > > > > > > > +		DRM_ERROR("Failed to get link status\n");
> > > > > > > > > +		return false;
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	/*
> > > > > > > > > +	 * Validate the cached values of intel_dp->link_rate
> > > > > > > > > and
> > > > > > > > > +	 * intel_dp->lane_count before attempting to retrain.
> > > > > > > > > +	 */
> > > > > > > > > +	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > > > > link_rate,
> > > > > > > > > 
> > > > > > > > > +					intel_dp-
> > > > > > > > > >lane_count))
> > > > > > > > > +		return false;
> > > > > > > > > +
> > > > > > > > > +	/* Retrain if Channel EQ or CR not ok */
> > > > > > > > > +	return !drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > > > > lane_count);
> > > > > > > > > 
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +/*
> > > > > > > > > + * If display is now connected check links status,
> > > > > > > > > + * there has been known issues of link loss triggering
> > > > > > > > > + * long pulse.
> > > > > > > > > + *
> > > > > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > > > > + * going back up soon after. And once that happens we must
> > > > > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > > > > + */
> > > > > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > > > > +			  struct drm_modeset_acquire_ctx
> > > > > > > > > *ctx)
> > > > > > > > >  {
> > > > > > > > > -	struct intel_encoder *encoder =
> > > > > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > > > > base;
> > > > > > > > > 
> > > > > > > > >  	struct drm_i915_private *dev_priv = to_i915(encoder-
> > > > > > > > > > base.dev);
> > > > > > > > > 
> > > > > > > > > -	struct intel_crtc *crtc = to_intel_crtc(encoder-
> > > > > > > > > > base.crtc);
> > > > > > > > > 
> > > > > > > > > +	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder-
> > > > > > > > > > base);
> > > > > > > > > 
> > > > > > > > > +	struct intel_connector *connector = intel_dp-
> > > > > > > > > > attached_connector;
> > > > > > > > > 
> > > > > > > > > +	struct drm_connector_state *conn_state;
> > > > > > > > > +	struct intel_crtc_state *crtc_state;
> > > > > > > > > +	struct intel_crtc *crtc;
> > > > > > > > > +	int ret;
> > > > > > > > > +
> > > > > > > > > +	/* FIXME handle the MST connectors as well */
> > > > > > > > > +
> > > > > > > > > +	if (!connector || connector->base.status !=
> > > > > > > > > connector_status_connected)
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	ret = drm_modeset_lock(&dev_priv-
> > > > > > > > > > drm.mode_config.connection_mutex,
> > > > > > > > > 
> > > > > > > > > ctx);
> > > > > > > > > +	if (ret)
> > > > > > > > > +		return ret;
> > > > > > > > > +
> > > > > > > > > +	conn_state = connector->base.state;
> > > > > > > > > +
> > > > > > > > > +	crtc = to_intel_crtc(conn_state->crtc);
> > > > > > > > > +	if (!crtc)
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
> > > > > > > > > +	if (ret)
> > > > > > > > > +		return ret;
> > > > > > > > > +
> > > > > > > > > +	crtc_state = to_intel_crtc_state(crtc->base.state);
> > > > > > > > > +
> > > > > > > > > +	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
> > > > > > > > > +
> > > > > > > > > +	if (!crtc_state->base.active)
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	if (conn_state->commit &&
> > > > > > > > > +	    !try_wait_for_completion(&conn_state->commit-
> > > > > > > > > > hw_done))
> > > > > > > > > 
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	if (!intel_dp_needs_link_retrain(intel_dp))
> > > > > > > > > +		return 0;
> > > > > > > > 
> > > > > > > > NAK, this definitely won't work for implementing MST retraining.
> > > > > > > > There's
> > > > > > > > some
> > > > > > > > pretty huge differences with how retraining needs to be handled
> > > > > > > > on
> > > > > > > > SST
> > > > > > > > vs. MST.
> > > > > > > > An example with some normal SST sink vs. what happens on my
> > > > > > > > caldigit
> > > > > > > > TS3
> > > > > > > > 
> > > > > > > > SST:
> > > > > > > >     1. commit modeset, everything is OK
> > > > > > > >     2. something happens, sink sends shortpulse and changes link
> > > > > > > > status
> > > > > > > > registers
> > > > > > > >     in dpcd
> > > > > > > >     3. Source receives short pulse, tries retraining five times
> > > > > > > >     4. if this succeeds:
> > > > > > > >         5. we're done here
> > > > > > > >     6. if this fails:
> > > > > > > >         7. mark link status as bad
> > > > > > > >         8. get fallback parameters
> > > > > > > >         9. hotplug event
> > > > > > > > 
> > > > > > > > MST (i915 doesn't do this yet, but this is generally how it
> > > > > > > > needs to
> > > > > > > > be
> > > > > > > > handled):
> > > > > > > >     1. commit modeset, everything is OK
> > > > > > > >     2. something happens (in my case, the MST hub discovers it
> > > > > > > > had
> > > > > > > > the
> > > > > > > > wrong max
> > > > > > > >     link rate/lane count), sink sends ESI indicating channel EQ
> > > > > > > > has
> > > > > > > > failed
> > > > > > > >     3. retraining commences with five retries.
> > > > > > > >     4. if this succeeds:
> > > > > > > >        5. continue
> > > > > > > >     6. if this fails (I actually haven't seen this once yet)
> > > > > > > >         7. mark link status as bad on all downstream connectors
> > > > > > > >         8. get fallback parameters
> > > > > > > >         9. hotplug event
> > > > > > > >     10. the retrain didn't actually work (despite what the SST
> > > > > > > > link
> > > > > > > > status
> > > > > > > >     registers told us). go back to step 3 five more times
> > > > > > > >     11. if this fails:
> > > > > > > >         12. mark link status as bad on all downstream connectors
> > > > > > > >         13. get fallback parameters
> > > > > > > >         14. hotplug event
> > > > > > > > 
> > > > > > > > simply put: we really should keep the "do we need to retrain?"
> > > > > > > > logic
> > > > > > > > out
> > > > > > > > of the
> > > > > > > > actual retraining helpers so that SST/MST codepaths can do their
> > > > > > > > own
> > > > > > > > checks to
> > > > > > > > figure this out.
> > > > > > > 
> > > > > > > No, we need it since we want to check it *after* any modeset has
> > > > > > > finished. With MST I think what we'll want to do is find all the
> > > > > > > pipes
> > > > > > > affected by the link failure, lock them, and wait until they're
> > > > > > > done
> > > > > > > with their modesets, then we check the link state. If it's bad we
> > > > > > > proceed to retrain the link.
> > > > > > > 
> > > > > > > So basically just walking over the MST encoders in addition to the
> > > > > > > SST encoder, and repeating most of the steps in this code for
> > > > > > > each.
> > > > > > > Hence the the MST FIXME I left in there ;)
> > > > > > > 
> > > > > > 
> > > > > > Lyude,
> > > > > > 
> > > > > > I agree with Ville here, can we add the MST retrain required check
> > > > > > from
> > > > > > within the intel_dp_retrain_link()? So for now the FIXME should be
> > > > > > left
> > > > > > there
> > > > > > and MST retraining check can be added from your patches.
> > > > > > 
> > > > > > Manasi
> > > > > 
> > > > > Yep! Sorry I probably should have reiterated this part over email
> > > > > since I
> > > > > only
> > > > > mentioned it to ville directly: this part should definitely work fine,
> > > > > and
> > > > > intel_dp_retrain_link() is actually where we handle MST retraining in
> > > > > the
> > > > > latest version of the retraining series:
> > > > > 
> > > > > jfyi, you can find that here:
> > > > > https://github.com/Lyude/linux/tree/wip/i915-mst-fallback-a4
> > > > > It's almost ready for the ML, just want to get the other patches this
> > > > > is
> > > > > going
> > > > > to rely on in first and fix some small concerns I've got about that
> > > > > series'
> > > > > current implementation of handling modesets after a failed retrain
> > > > > sequence
> > > > > > 
> > > > 
> > > > Great!
> > > > After the failed retrain sequence, it will keep on retrying until has
> > > > tried
> > > > all
> > > > the link rate/lane count combinations until RBR 1 lane and then fail
> > > > with a
> > > > ERROR message.
> > > > What was the concern on the current implementation of modeste after
> > > > failed
> > > > retrain?
> > > 
> > > I actually take that back. Was worried that not freeing existing VCPI
> > > allocations during the first atomic check phase for the first modeset
> > > after a
> > > retrain failure might cause atomic checks to fail from not having enough
> > > VCPI 
> > > space, but then I realized that wouldn't matter since if we're not able to
> > > fit
> > > one display into the VCPI with a reduced pbn_div, that just means fitting
> > > both
> > > displays with a reduced pbn_div would also be impossible :P.
> > > 
> > 
> > But then in that case, in oredr to retry at lower link rates, both these
> > monitors will
> > have to drop to the next lower resolutions to fit the reduced pbn_div.
> 
> Yep, that's the idea! In which case the atomic check should fail, which should
> cause userspace to try at a reduced resolution or take some other reparative
> action
> > 
> > Manasi
> >  
> > > > 
> > > > Manasi
> > > >  
> > > > > > > > 
> > > > > > > > >  
> > > > > > > > >  	/* Suppress underruns caused by re-training */
> > > > > > > > >  	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc-
> > > > > > > > > > pipe,
> > > > > > > > > 
> > > > > > > > > false);
> > > > > > > > > @@ -4298,51 +4369,49 @@ intel_dp_retrain_link(struct intel_dp
> > > > > > > > > *intel_dp)
> > > > > > > > >  	if (crtc->config->has_pch_encoder)
> > > > > > > > >  		intel_set_pch_fifo_underrun_reporting(dev_pri
> > > > > > > > > v,
> > > > > > > > >  						      intel_c
> > > > > > > > > rtc_
> > > > > > > > > pch_
> > > > > > > > > transcod
> > > > > > > > > er(crtc), true);
> > > > > > > > > +
> > > > > > > > > +	return 0;
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > > -static void
> > > > > > > > > -intel_dp_check_link_status(struct intel_dp *intel_dp)
> > > > > > > > > +/*
> > > > > > > > > + * If display is now connected check links status,
> > > > > > > > > + * there has been known issues of link loss triggering
> > > > > > > > > + * long pulse.
> > > > > > > > > + *
> > > > > > > > > + * Some sinks (eg. ASUS PB287Q) seem to perform some
> > > > > > > > > + * weird HPD ping pong during modesets. So we can apparently
> > > > > > > > > + * end up with HPD going low during a modeset, and then
> > > > > > > > > + * going back up soon after. And once that happens we must
> > > > > > > > > + * retrain the link to get a picture. That's in case no
> > > > > > > > > + * userspace component reacted to intermittent HPD dip.
> > > > > > > > > + */
> > > > > > > > > +static bool intel_dp_hotplug(struct intel_encoder *encoder,
> > > > > > > > > +			     struct intel_connector
> > > > > > > > > *connector)
> > > > > > > > >  {
> > > > > > > > > -	struct drm_i915_private *dev_priv =
> > > > > > > > > to_i915(intel_dp_to_dev(intel_dp));
> > > > > > > > > -	struct intel_encoder *intel_encoder =
> > > > > > > > > &dp_to_dig_port(intel_dp)-
> > > > > > > > > > base;
> > > > > > > > > 
> > > > > > > > > -	struct drm_connector_state *conn_state =
> > > > > > > > > -		intel_dp->attached_connector->base.state;
> > > > > > > > > -	u8 link_status[DP_LINK_STATUS_SIZE];
> > > > > > > > > -
> > > > > > > > > -	WARN_ON(!drm_modeset_is_locked(&dev_priv-
> > > > > > > > > > drm.mode_config.connection_mutex));
> > > > > > > > > 
> > > > > > > > > -
> > > > > > > > > -	if (!intel_dp_get_link_status(intel_dp, link_status))
> > > > > > > > > {
> > > > > > > > > -		DRM_ERROR("Failed to get link status\n");
> > > > > > > > > -		return;
> > > > > > > > > -	}
> > > > > > > > > +	struct drm_modeset_acquire_ctx ctx;
> > > > > > > > > +	bool changed;
> > > > > > > > > +	int ret;
> > > > > > > > >  
> > > > > > > > > -	if (!conn_state->crtc)
> > > > > > > > > -		return;
> > > > > > > > > +	changed = intel_encoder_hotplug(encoder, connector);
> > > > > > > > >  
> > > > > > > > > -	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc-
> > > > > > > > > > mutex));
> > > > > > > > > 
> > > > > > > > > +	drm_modeset_acquire_init(&ctx, 0);
> > > > > > > > >  
> > > > > > > > > -	if (!conn_state->crtc->state->active)
> > > > > > > > > -		return;
> > > > > > > > > +	for (;;) {
> > > > > > 
> > > > > > Here if this is getting executed due to hpd ping pong during the
> > > > > > modeset
> > > > > > and that modeset is already happening at a link fallback parameter
> > > > > > then
> > > > > > while we call retrain link, we should also validate the link
> > > > > > parameters
> > > > > > so that it doesnt try to retrain with stale values.
> > > > > > 
> > > > > > I think we need to call intel_dp_link_params_valid() before retrain.

I missed that intel_dp_retrain_link() does call intel_dp_link_params_valid() through
intel_dp_needs_retrain(). So ignore my concern above.
With that,

Acked-by: Manasi Navare <manasi.d.navare@intel.com>

Manasi
> > > > > > 
> > > > > > > > > +		ret = intel_dp_retrain_link(encoder, &ctx);
> > > > > > > > >  
> > > > > > > > > -	if (conn_state->commit &&
> > > > > > > > > -	    !try_wait_for_completion(&conn_state->commit-
> > > > > > > > > > hw_done))
> > > > > > > > > 
> > > > > > > > > -		return;
> > > > > > > > > +		if (ret == -EDEADLK) {
> > > > > > > > > +			drm_modeset_backoff(&ctx);
> > > > > > > > > +			continue;
> > > > > > > > > +		}
> > > > > > > > >  
> > > > > > > > > -	/*
> > > > > > > > > -	 * Validate the cached values of intel_dp->link_rate
> > > > > > > > > and
> > > > > > > > > -	 * intel_dp->lane_count before attempting to retrain.
> > > > > > > > > -	 */
> > > > > > > > > -	if (!intel_dp_link_params_valid(intel_dp, intel_dp-
> > > > > > > > > > link_rate,
> > > > > > > > > 
> > > > > > > > > -					intel_dp-
> > > > > > > > > >lane_count))
> > > > > > > > > -		return;
> > > > > > > > > +		break;
> > > > > > > > > +	}
> > > > > > > > >  
> > > > > > > > > -	/* Retrain if Channel EQ or CR not ok */
> > > > > > > > > -	if (!drm_dp_channel_eq_ok(link_status, intel_dp-
> > > > > > > > > > lane_count)) 
> > > > > > > > > 
> > > > > > > > > {
> > > > > > > > > -		DRM_DEBUG_KMS("%s: channel EQ not ok,
> > > > > > > > > retraining\n",
> > > > > > > > > -			      intel_encoder->base.name);
> > > > > > > > > +	drm_modeset_drop_locks(&ctx);
> > > > > > > > > +	drm_modeset_acquire_fini(&ctx);
> > > > > > > > > +	WARN(ret, "Acquiring modeset locks failed with %i\n",
> > > > > > > > > ret);
> > > > > > > > >  
> > > > > > > > > -		intel_dp_retrain_link(intel_dp);
> > > > > > > > > -	}
> > > > > > > > > +	return changed;
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > >  /*
> > > > > > > > > @@ -4400,7 +4469,9 @@ intel_dp_short_pulse(struct intel_dp
> > > > > > > > > *intel_dp)
> > > > > > > > >  			DRM_DEBUG_DRIVER("CP or sink specific
> > > > > > > > > irq
> > > > > > > > > unhandled\n");
> > > > > > > > >  	}
> > > > > > > > >  
> > > > > > > > > -	intel_dp_check_link_status(intel_dp);
> > > > > > > > > +	/* defer to the hotplug work for link retraining if
> > > > > > > > > needed */
> > > > > > > > > +	if (intel_dp_needs_link_retrain(intel_dp))
> > > > > > > > > +		return false;
> > > > > > > > >  
> > > > > > > > >  	if (intel_dp->compliance.test_type ==
> > > > > > > > > DP_TEST_LINK_TRAINING)
> > > > > > > > > {
> > > > > > > > >  		DRM_DEBUG_KMS("Link Training Compliance Test
> > > > > > > > > requested\n");
> > > > > > > > > @@ -4785,20 +4856,6 @@ intel_dp_long_pulse(struct
> > > > > > > > > intel_connector
> > > > > > > > > *connector)
> > > > > > > > >  		 */
> > > > > > > > >  		status = connector_status_disconnected;
> > > > > > > > >  		goto out;
> > > > > > > > > -	} else {
> > > > > > > > > -		/*
> > > > > > > > > -		 * If display is now connected check links
> > > > > > > > > status,
> > > > > > > > > -		 * there has been known issues of link loss
> > > > > > > > > triggerring
> > > > > > > > > -		 * long pulse.
> > > > > > > > > -		 *
> > > > > > > > > -		 * Some sinks (eg. ASUS PB287Q) seem to
> > > > > > > > > perform
> > > > > > > > > some
> > > > > > > > > -		 * weird HPD ping pong during modesets. So we
> > > > > > > > > can
> > > > > > > > > apparently
> > > > > > > > > -		 * end up with HPD going low during a
> > > > > > > > > modeset,
> > > > > > > > > and
> > > > > > > > > then
> > > > > > > > > -		 * going back up soon after. And once that
> > > > > > > > > happens we
> > > > > > > > > must
> > > > > > > > > -		 * retrain the link to get a picture. That's
> > > > > > > > > in
> > > > > > > > > case
> > > > > > > > > no
> > > > > > > > > -		 * userspace component reacted to
> > > > > > > > > intermittent
> > > > > > > > > HPD
> > > > > > > > > dip.
> > > > > > > > > -		 */
> > > > > > > > > -		intel_dp_check_link_status(intel_dp);
> > > > > > > > >  	}
> > > > > > > > >  
> > > > > > > > >  	/*
> > > > > > > > > @@ -5340,37 +5397,10 @@ intel_dp_hpd_pulse(struct
> > > > > > > > > intel_digital_port
> > > > > > > > > *intel_dig_port, bool long_hpd)
> > > > > > > > >  	}
> > > > > > > > >  
> > > > > > > > >  	if (!intel_dp->is_mst) {
> > > > > > > > > -		struct drm_modeset_acquire_ctx ctx;
> > > > > > > > > -		struct drm_connector *connector = &intel_dp-
> > > > > > > > > > attached_connector->base;
> > > > > > > > > 
> > > > > > > > > -		struct drm_crtc *crtc;
> > > > > > > > > -		int iret;
> > > > > > > > > -		bool handled = false;
> > > > > > > > > -
> > > > > > > > > -		drm_modeset_acquire_init(&ctx, 0);
> > > > > > > > > -retry:
> > > > > > > > > -		iret = drm_modeset_lock(&dev_priv-
> > > > > > > > > > drm.mode_config.connection_mutex, &ctx);
> > > > > > > > > 
> > > > > > > > > -		if (iret)
> > > > > > > > > -			goto err;
> > > > > > > > > -
> > > > > > > > > -		crtc = connector->state->crtc;
> > > > > > > > > -		if (crtc) {
> > > > > > > > > -			iret = drm_modeset_lock(&crtc->mutex,
> > > > > > > > > &ctx);
> > > > > > > > > -			if (iret)
> > > > > > > > > -				goto err;
> > > > > > > > > -		}
> > > > > > > > > +		bool handled;
> > > > > > > > >  
> > > > > > > > >  		handled = intel_dp_short_pulse(intel_dp);
> > > > > > > > >  
> > > > > > > > > -err:
> > > > > > > > > -		if (iret == -EDEADLK) {
> > > > > > > > > -			drm_modeset_backoff(&ctx);
> > > > > > > > > -			goto retry;
> > > > > > > > > -		}
> > > > > > > > > -
> > > > > > > > > -		drm_modeset_drop_locks(&ctx);
> > > > > > > > > -		drm_modeset_acquire_fini(&ctx);
> > > > > > > > > -		WARN(iret, "Acquiring modeset locks failed
> > > > > > > > > with
> > > > > > > > > %i\n", iret);
> > > > > > > > > -
> > > > > > > > >  		/* Short pulse can signify loss of hdcp
> > > > > > > > > authentication */
> > > > > > > > >  		intel_hdcp_check_link(intel_dp-
> > > > > > > > > > attached_connector);
> > > > > > > > > 
> > > > > > > > >  
> > > > > > > > > @@ -6400,7 +6430,7 @@ bool intel_dp_init(struct
> > > > > > > > > drm_i915_private
> > > > > > > > > *dev_priv,
> > > > > > > > >  			     "DP %c", port_name(port)))
> > > > > > > > >  		goto err_encoder_init;
> > > > > > > > >  
> > > > > > > > > -	intel_encoder->hotplug = intel_encoder_hotplug;
> > > > > > > > > +	intel_encoder->hotplug = intel_dp_hotplug;
> > > > > > > > >  	intel_encoder->compute_config =
> > > > > > > > > intel_dp_compute_config;
> > > > > > > > >  	intel_encoder->get_hw_state = intel_dp_get_hw_state;
> > > > > > > > >  	intel_encoder->get_config = intel_dp_get_config;
> > > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > > index 5ea1dc3f63bf..ddf28a442cd7 100644
> > > > > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > > > > @@ -1611,6 +1611,8 @@ int
> > > > > > > > > intel_dp_get_link_train_fallback_values(struct
> > > > > > > > > intel_dp *intel_dp,
> > > > > > > > >  					    int link_rate,
> > > > > > > > > uint8_t
> > > > > > > > > lane_count);
> > > > > > > > >  void intel_dp_start_link_train(struct intel_dp *intel_dp);
> > > > > > > > >  void intel_dp_stop_link_train(struct intel_dp *intel_dp);
> > > > > > > > > +int intel_dp_retrain_link(struct intel_encoder *encoder,
> > > > > > > > > +			  struct drm_modeset_acquire_ctx
> > > > > > > > > *ctx);
> > > > > > > > >  void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
> > > > > > > > >  void intel_dp_encoder_reset(struct drm_encoder *encoder);
> > > > > > > > >  void intel_dp_encoder_suspend(struct intel_encoder
> > > > > > > > > *intel_encoder);
> > > > > > > 
> > > > > > > -- 
> > > > > > > Ville Syrjälä
> > > > > > > Intel OTC
> > > > > > > _______________________________________________
> > > > > > > Intel-gfx mailing list
> > > > > > > Intel-gfx@lists.freedesktop.org
> > > > > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > > > 
> > > > > -- 
> > > > > Cheers,
> > > > > 	Lyude Paul
> > > 
> > > -- 
> > > Cheers,
> > > 	Lyude Paul
> -- 
> Cheers,
> 	Lyude Paul
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 25793bdc692f..5f3d58f1ae6e 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2880,7 +2880,10 @@  static bool intel_ddi_hotplug(struct intel_encoder *encoder,
 	drm_modeset_acquire_init(&ctx, 0);
 
 	for (;;) {
-		ret = intel_hdmi_reset_link(encoder, &ctx);
+		if (connector->base.connector_type == DRM_MODE_CONNECTOR_HDMIA)
+			ret = intel_hdmi_reset_link(encoder, &ctx);
+		else
+			ret = intel_dp_retrain_link(encoder, &ctx);
 
 		if (ret == -EDEADLK) {
 			drm_modeset_backoff(&ctx);
@@ -3007,10 +3010,7 @@  void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs,
 			 DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port));
 
-	if (init_hdmi)
-		intel_encoder->hotplug = intel_ddi_hotplug;
-	else
-		intel_encoder->hotplug = intel_encoder_hotplug;
+	intel_encoder->hotplug = intel_ddi_hotplug;
 	intel_encoder->compute_output_type = intel_ddi_compute_output_type;
 	intel_encoder->compute_config = intel_ddi_compute_config;
 	intel_encoder->enable = intel_enable_ddi;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 6bbf14410c2a..152016e09a11 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4275,12 +4275,83 @@  intel_dp_check_mst_status(struct intel_dp *intel_dp)
 	return -EINVAL;
 }
 
-static void
-intel_dp_retrain_link(struct intel_dp *intel_dp)
+static bool
+intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
+{
+	u8 link_status[DP_LINK_STATUS_SIZE];
+
+	if (!intel_dp_get_link_status(intel_dp, link_status)) {
+		DRM_ERROR("Failed to get link status\n");
+		return false;
+	}
+
+	/*
+	 * Validate the cached values of intel_dp->link_rate and
+	 * intel_dp->lane_count before attempting to retrain.
+	 */
+	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
+					intel_dp->lane_count))
+		return false;
+
+	/* Retrain if Channel EQ or CR not ok */
+	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
+}
+
+/*
+ * If display is now connected check links status,
+ * there has been known issues of link loss triggering
+ * long pulse.
+ *
+ * Some sinks (eg. ASUS PB287Q) seem to perform some
+ * weird HPD ping pong during modesets. So we can apparently
+ * end up with HPD going low during a modeset, and then
+ * going back up soon after. And once that happens we must
+ * retrain the link to get a picture. That's in case no
+ * userspace component reacted to intermittent HPD dip.
+ */
+int intel_dp_retrain_link(struct intel_encoder *encoder,
+			  struct drm_modeset_acquire_ctx *ctx)
 {
-	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_connector *connector = intel_dp->attached_connector;
+	struct drm_connector_state *conn_state;
+	struct intel_crtc_state *crtc_state;
+	struct intel_crtc *crtc;
+	int ret;
+
+	/* FIXME handle the MST connectors as well */
+
+	if (!connector || connector->base.status != connector_status_connected)
+		return 0;
+
+	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, ctx);
+	if (ret)
+		return ret;
+
+	conn_state = connector->base.state;
+
+	crtc = to_intel_crtc(conn_state->crtc);
+	if (!crtc)
+		return 0;
+
+	ret = drm_modeset_lock(&crtc->base.mutex, ctx);
+	if (ret)
+		return ret;
+
+	crtc_state = to_intel_crtc_state(crtc->base.state);
+
+	WARN_ON(!intel_crtc_has_dp_encoder(crtc_state));
+
+	if (!crtc_state->base.active)
+		return 0;
+
+	if (conn_state->commit &&
+	    !try_wait_for_completion(&conn_state->commit->hw_done))
+		return 0;
+
+	if (!intel_dp_needs_link_retrain(intel_dp))
+		return 0;
 
 	/* Suppress underruns caused by re-training */
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
@@ -4298,51 +4369,49 @@  intel_dp_retrain_link(struct intel_dp *intel_dp)
 	if (crtc->config->has_pch_encoder)
 		intel_set_pch_fifo_underrun_reporting(dev_priv,
 						      intel_crtc_pch_transcoder(crtc), true);
+
+	return 0;
 }
 
-static void
-intel_dp_check_link_status(struct intel_dp *intel_dp)
+/*
+ * If display is now connected check links status,
+ * there has been known issues of link loss triggering
+ * long pulse.
+ *
+ * Some sinks (eg. ASUS PB287Q) seem to perform some
+ * weird HPD ping pong during modesets. So we can apparently
+ * end up with HPD going low during a modeset, and then
+ * going back up soon after. And once that happens we must
+ * retrain the link to get a picture. That's in case no
+ * userspace component reacted to intermittent HPD dip.
+ */
+static bool intel_dp_hotplug(struct intel_encoder *encoder,
+			     struct intel_connector *connector)
 {
-	struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
-	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
-	struct drm_connector_state *conn_state =
-		intel_dp->attached_connector->base.state;
-	u8 link_status[DP_LINK_STATUS_SIZE];
-
-	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
-
-	if (!intel_dp_get_link_status(intel_dp, link_status)) {
-		DRM_ERROR("Failed to get link status\n");
-		return;
-	}
+	struct drm_modeset_acquire_ctx ctx;
+	bool changed;
+	int ret;
 
-	if (!conn_state->crtc)
-		return;
+	changed = intel_encoder_hotplug(encoder, connector);
 
-	WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex));
+	drm_modeset_acquire_init(&ctx, 0);
 
-	if (!conn_state->crtc->state->active)
-		return;
+	for (;;) {
+		ret = intel_dp_retrain_link(encoder, &ctx);
 
-	if (conn_state->commit &&
-	    !try_wait_for_completion(&conn_state->commit->hw_done))
-		return;
+		if (ret == -EDEADLK) {
+			drm_modeset_backoff(&ctx);
+			continue;
+		}
 
-	/*
-	 * Validate the cached values of intel_dp->link_rate and
-	 * intel_dp->lane_count before attempting to retrain.
-	 */
-	if (!intel_dp_link_params_valid(intel_dp, intel_dp->link_rate,
-					intel_dp->lane_count))
-		return;
+		break;
+	}
 
-	/* Retrain if Channel EQ or CR not ok */
-	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
-		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
-			      intel_encoder->base.name);
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+	WARN(ret, "Acquiring modeset locks failed with %i\n", ret);
 
-		intel_dp_retrain_link(intel_dp);
-	}
+	return changed;
 }
 
 /*
@@ -4400,7 +4469,9 @@  intel_dp_short_pulse(struct intel_dp *intel_dp)
 			DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n");
 	}
 
-	intel_dp_check_link_status(intel_dp);
+	/* defer to the hotplug work for link retraining if needed */
+	if (intel_dp_needs_link_retrain(intel_dp))
+		return false;
 
 	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
 		DRM_DEBUG_KMS("Link Training Compliance Test requested\n");
@@ -4785,20 +4856,6 @@  intel_dp_long_pulse(struct intel_connector *connector)
 		 */
 		status = connector_status_disconnected;
 		goto out;
-	} else {
-		/*
-		 * If display is now connected check links status,
-		 * there has been known issues of link loss triggerring
-		 * long pulse.
-		 *
-		 * Some sinks (eg. ASUS PB287Q) seem to perform some
-		 * weird HPD ping pong during modesets. So we can apparently
-		 * end up with HPD going low during a modeset, and then
-		 * going back up soon after. And once that happens we must
-		 * retrain the link to get a picture. That's in case no
-		 * userspace component reacted to intermittent HPD dip.
-		 */
-		intel_dp_check_link_status(intel_dp);
 	}
 
 	/*
@@ -5340,37 +5397,10 @@  intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	}
 
 	if (!intel_dp->is_mst) {
-		struct drm_modeset_acquire_ctx ctx;
-		struct drm_connector *connector = &intel_dp->attached_connector->base;
-		struct drm_crtc *crtc;
-		int iret;
-		bool handled = false;
-
-		drm_modeset_acquire_init(&ctx, 0);
-retry:
-		iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx);
-		if (iret)
-			goto err;
-
-		crtc = connector->state->crtc;
-		if (crtc) {
-			iret = drm_modeset_lock(&crtc->mutex, &ctx);
-			if (iret)
-				goto err;
-		}
+		bool handled;
 
 		handled = intel_dp_short_pulse(intel_dp);
 
-err:
-		if (iret == -EDEADLK) {
-			drm_modeset_backoff(&ctx);
-			goto retry;
-		}
-
-		drm_modeset_drop_locks(&ctx);
-		drm_modeset_acquire_fini(&ctx);
-		WARN(iret, "Acquiring modeset locks failed with %i\n", iret);
-
 		/* Short pulse can signify loss of hdcp authentication */
 		intel_hdcp_check_link(intel_dp->attached_connector);
 
@@ -6400,7 +6430,7 @@  bool intel_dp_init(struct drm_i915_private *dev_priv,
 			     "DP %c", port_name(port)))
 		goto err_encoder_init;
 
-	intel_encoder->hotplug = intel_encoder_hotplug;
+	intel_encoder->hotplug = intel_dp_hotplug;
 	intel_encoder->compute_config = intel_dp_compute_config;
 	intel_encoder->get_hw_state = intel_dp_get_hw_state;
 	intel_encoder->get_config = intel_dp_get_config;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 5ea1dc3f63bf..ddf28a442cd7 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1611,6 +1611,8 @@  int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp,
 					    int link_rate, uint8_t lane_count);
 void intel_dp_start_link_train(struct intel_dp *intel_dp);
 void intel_dp_stop_link_train(struct intel_dp *intel_dp);
+int intel_dp_retrain_link(struct intel_encoder *encoder,
+			  struct drm_modeset_acquire_ctx *ctx);
 void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
 void intel_dp_encoder_reset(struct drm_encoder *encoder);
 void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);