diff mbox

[RFC,8/8] drm/i915: Add two-stage ILK-style watermark programming (v2)

Message ID 1435803961-30079-9-git-send-email-matthew.d.roper@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Matt Roper July 2, 2015, 2:26 a.m. UTC
From: Matt Roper <matt@mattrope.com>

In addition to calculating final watermarks, let's also pre-calculate a
set of intermediate watermark values at atomic check time.  These
intermediate watermarks are a combination of the watermarks for the old
state and the new state; they should satisfy the requirements of both
states which means they can be programmed immediately when we commit the
atomic state (without waiting for a vblank).  Once the vblank does
happen, we can then re-program watermarks to the more optimal final
value.

v2: Significant rebasing/rewriting.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |  9 +++++
 drivers/gpu/drm/i915/i915_irq.c      |  7 ++++
 drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++-
 drivers/gpu/drm/i915/intel_drv.h     | 26 +++++++++----
 drivers/gpu/drm/i915/intel_pm.c      | 75 ++++++++++++++++++++++++++++++------
 5 files changed, 130 insertions(+), 21 deletions(-)

Comments

Daniel Vetter July 6, 2015, 9:11 a.m. UTC | #1
On Wed, Jul 01, 2015 at 07:26:01PM -0700, Matt Roper wrote:
> From: Matt Roper <matt@mattrope.com>
> 
> In addition to calculating final watermarks, let's also pre-calculate a
> set of intermediate watermark values at atomic check time.  These
> intermediate watermarks are a combination of the watermarks for the old
> state and the new state; they should satisfy the requirements of both
> states which means they can be programmed immediately when we commit the
> atomic state (without waiting for a vblank).  Once the vblank does
> happen, we can then re-program watermarks to the more optimal final
> value.
> 
> v2: Significant rebasing/rewriting.
> 
> Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h      |  9 +++++
>  drivers/gpu/drm/i915/i915_irq.c      |  7 ++++
>  drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++-
>  drivers/gpu/drm/i915/intel_drv.h     | 26 +++++++++----
>  drivers/gpu/drm/i915/intel_pm.c      | 75 ++++++++++++++++++++++++++++++------
>  5 files changed, 130 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5ad942e..42397e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -623,6 +623,9 @@ struct drm_i915_display_funcs {
>  			  struct dpll *best_clock);
>  	int (*compute_pipe_wm)(struct drm_crtc *crtc,
>  			       struct drm_atomic_state *state);
> +	void (*compute_intermediate_wm)(struct drm_device *dev,
> +					struct intel_crtc_state *newstate,
> +					const struct intel_crtc_state *oldstate);
>  	void (*update_wm)(struct drm_crtc *crtc);
>  	void (*update_sprite_wm)(struct drm_plane *plane,
>  				 struct drm_crtc *crtc,
> @@ -2574,6 +2577,12 @@ struct drm_i915_cmd_table {
>   */
>  #define HAS_ATOMIC_WM(dev_priv) (dev_priv->display.program_watermarks != NULL)
>  
> +/*
> + * Newer platforms have doublebuffered watermark registers and do not need
> + * the two-step watermark programming used by older platforms.
> + */
> +#define HAS_DBLBUF_WM(dev_priv) (INTEL_INFO(dev_priv)->gen >= 9)

Just move need_vblank_wm_update into crtc_state and compute it in the
compute_pipe_wm functions?

> +
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 20c7260..627c56f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1455,8 +1455,15 @@ static bool intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe)
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
>  
>  	if (intel_crtc->need_vblank_wm_update) {
> +		WARN_ON(HAS_DBLBUF_WM(dev_priv));
> +
> +		/* Latch final watermarks now that vblank is past */
> +		cstate->wm.active = cstate->wm.target;
> +
> +		/* Queue work to actually program them asynchronously */
>  		queue_work(dev_priv->wq, &intel_crtc->wm_work);
>  		intel_crtc->need_vblank_wm_update = false;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index fa4373e..1616d7f 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -4737,7 +4737,7 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
>  	struct drm_device *dev = crtc->base.dev;
>  	struct drm_plane *plane;
>  
> -	if (HAS_ATOMIC_WM(to_i915(dev)))
> +	if (HAS_ATOMIC_WM(to_i915(dev)) && !HAS_DBLBUF_WM(to_i915(dev)))
>  		/* vblank handler will kick off workqueue task to update wm's */
>  		crtc->need_vblank_wm_update = true;
>  
> @@ -11833,6 +11833,8 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct intel_crtc_state *pipe_config =
>  		to_intel_crtc_state(crtc_state);
> +	struct intel_crtc_state *old_pipe_config =
> +		to_intel_crtc_state(crtc->state);
>  	struct drm_atomic_state *state = crtc_state->state;
>  	int ret, idx = crtc->base.id;
>  	bool mode_changed = needs_modeset(crtc_state);
> @@ -11863,9 +11865,20 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
>  	}
>  
>  	if (dev_priv->display.compute_pipe_wm) {
> +		if (WARN_ON(!dev_priv->display.compute_intermediate_wm))
> +			return 0;

skl won't need this, right?

> +
>  		ret = dev_priv->display.compute_pipe_wm(crtc, state);
>  		if (ret)
>  			return ret;
> +
> +		/*
> +		 * Calculate 'intermediate' watermarks that satisfy both the old state
> +		 * and the new state.  We can program these immediately.
> +		 */
> +		dev_priv->display.compute_intermediate_wm(crtc->dev,
> +							  pipe_config,
> +							  old_pipe_config);
>  	}
>  
>  	return intel_atomic_setup_scalers(dev, intel_crtc, pipe_config);
> @@ -13789,8 +13802,25 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc)
>  	if (!needs_modeset(crtc->state))
>  		intel_pre_plane_update(intel_crtc);
>  
> -	if (intel_crtc->atomic.update_wm_pre)
> +	/*
> +	 * For platforms that support atomic watermarks, program the 'pending'
> +	 * watermarks immediately.  On pre-gen9 platforms, these will be the
> +	 * intermediate values that are safe for both pre- and post- vblank;
> +	 * when vblank happens, the 'pending' values will be set to the final
> +	 * 'target' values and we'll do this again to get the optimal
> +	 * watermarks.  For gen9+ platforms, the values we program here will be
> +	 * the final target values which will get automatically latched at
> +	 * vblank time; no further programming will be necessary.
> +	 *
> +	 * If a platform hasn't been transitioned to atomic watermarks yet,
> +	 * we'll continue to update watermarks the old way, if flags tell
> +	 * us to.
> +	 */
> +	if (HAS_ATOMIC_WM(dev_priv)) {
> +		dev_priv->display.program_watermarks(dev_priv);
> +	} else if (intel_crtc->atomic.update_wm_pre) {
>  		intel_update_watermarks(crtc);
> +	}
>  
>  	intel_runtime_pm_get(dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 775e3d0..7bc4e81 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -343,6 +343,11 @@ struct skl_pipe_wm {
>  	uint32_t linetime;
>  };
>  
> +union pipe_wm {
> +	struct intel_pipe_wm ilk;
> +	struct skl_pipe_wm skl;
> +};
> +
>  struct intel_crtc_state {
>  	struct drm_crtc_state base;
>  
> @@ -481,16 +486,21 @@ struct intel_crtc_state {
>  
>  	struct {
>  		/*
> -		 * final watermarks, programmed post-vblank when this state
> -		 * is committed
> +		 * final, target watermarks for state; on pre-gen9 platforms,
> +		 * this might not have been programmed yet if a vblank hasn't
> +		 * happened since this state was committed
> +		 */
> +		union pipe_wm target;
> +
> +		/*
> +		 * currently programmed watermark; on pre-gen9, this will be
> +		 * the intermediate values before vblank then switch to match
> +		 * 'target' after vblank fires)
>  		 */
> -		union {
> -			struct intel_pipe_wm ilk;
> -			struct skl_pipe_wm skl;
> -		} active;
> +		union pipe_wm active;
>  
> -		/* allow CxSR on this pipe */
> -		bool cxsr_allowed;
> +               /* allow CxSR on this pipe */
> +               bool cxsr_allowed;
>  	} wm;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index c6e735f..7942bb0 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2338,6 +2338,24 @@ static void ilk_compute_wm_config(struct drm_device *dev,
>  	}
>  }
>  
> +static bool ilk_validate_pipe_wm(struct drm_device *dev,
> +				 struct intel_wm_config *config,
> +				 struct intel_pipe_wm *pipe_wm)
> +{
> +	struct ilk_wm_maximums max;
> +
> +	/* LP0 watermarks always use 1/2 DDB partitioning */
> +	ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, &max);
> +
> +	/* At least LP0 must be valid */
> +	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
> +		DRM_DEBUG_KMS("LP0 watermark invalid\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
>  /* Compute new watermarks for the pipe */
>  static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
>  			       struct drm_atomic_state *state)
> @@ -2366,7 +2384,7 @@ static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
>  	else
>  		cstate = to_intel_crtc_state(cs);
>  
> -	pipe_wm = &cstate->wm.active.ilk;
> +	pipe_wm = &cstate->wm.target.ilk;
>  
>  	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
>  		ps = drm_atomic_get_plane_state(state,
> @@ -2405,12 +2423,8 @@ static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
>  	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
>  		pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
>  
> -	/* LP0 watermarks always use 1/2 DDB partitioning */
> -	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
> -
> -	/* At least LP0 must be valid */
> -	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
> -		return -EINVAL;
> +	if (!ilk_validate_pipe_wm(dev, &config, pipe_wm))
> +		return false;
>  
>  	ilk_compute_wm_reg_maximums(dev, 1, &max);
>  
> @@ -2435,6 +2449,40 @@ static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
>  }
>  
>  /*
> + * Build a set of 'intermediate' watermark values that satisfy both the old
> + * state and the new state.  These can be programmed to the hardware
> + * immediately.
> + */
> +void ilk_compute_intermediate_wm(struct drm_device *dev,
> +				 struct intel_crtc_state *newstate,
> +				 const struct intel_crtc_state *oldstate)
> +{
> +	struct intel_pipe_wm *a = &newstate->wm.active.ilk;
> +	const struct intel_pipe_wm *b = &oldstate->wm.active.ilk;
> +	int level, max_level = ilk_wm_max_level(dev);
> +
> +	/*
> +	 * Start with the final, target watermarks, then combine with the
> +	 * current state's watermarks.
> +	 */
> +	*a = newstate->wm.target.ilk;
> +	a->pipe_enabled |= b->pipe_enabled;
> +	a->sprites_enabled |= b->sprites_enabled;
> +	a->sprites_scaled |= b->sprites_scaled;
> +
> +	for (level = 0; level <= max_level; level++) {
> +		struct intel_wm_level *a_wm = &a->wm[level];
> +		const struct intel_wm_level *b_wm = &b->wm[level];
> +
> +		a_wm->enable &= b_wm->enable;
> +		a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
> +		a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
> +		a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
> +		a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
> +	}
> +}
> +
> +/*
>   * Merge the watermarks from all active pipes for a specific level.
>   */
>  static void ilk_merge_wm_level(struct drm_device *dev,
> @@ -3601,10 +3649,10 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
>  	skl_allocate_pipe_ddb(crtc, config, params, ddb);
>  	skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
>  
> -	if (!memcmp(&cstate->wm.active.skl, pipe_wm, sizeof(*pipe_wm)))
> +	if (!memcmp(&cstate->wm.target.skl, pipe_wm, sizeof(*pipe_wm)))
>  		return false;
>  
> -	cstate->wm.active.skl = *pipe_wm;
> +	cstate->wm.target.skl = *pipe_wm;
>  
>  	return true;
>  }
> @@ -3825,7 +3873,7 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
>  	struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
> -	struct skl_pipe_wm *active = &cstate->wm.active.skl;
> +	struct skl_pipe_wm *active = &cstate->wm.target.skl;
>  	enum pipe pipe = intel_crtc->pipe;
>  	int level, i, max_level;
>  	uint32_t temp;
> @@ -3889,7 +3937,7 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
>  	struct ilk_wm_values *hw = &dev_priv->wm.hw;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
> -	struct intel_pipe_wm *active = &cstate->wm.active.ilk;
> +	struct intel_pipe_wm *active = &cstate->wm.target.ilk;
>  	enum pipe pipe = intel_crtc->pipe;
>  	static const unsigned int wm0_pipe_reg[] = {
>  		[PIPE_A] = WM0_PIPEA_ILK,
> @@ -3928,6 +3976,8 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
>  		for (level = 0; level <= max_level; level++)
>  			active->wm[level].enable = true;
>  	}
> +
> +	cstate->wm.active.ilk = cstate->wm.target.ilk = *active;
>  }
>  
>  #define _FW_WM(value, plane) \
> @@ -7048,6 +7098,9 @@ void intel_init_pm(struct drm_device *dev)
>  		     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
>  			dev_priv->display.update_wm = ilk_update_wm;
>  			dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
> +			dev_priv->display.compute_intermediate_wm =
> +				ilk_compute_intermediate_wm;
> +			dev_priv->display.program_watermarks = ilk_program_watermarks;
>  		} else {
>  			DRM_DEBUG_KMS("Failed to read display plane latency. "
>  				      "Disable CxSR\n");
> -- 
> 2.1.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Maarten Lankhorst July 6, 2015, 12:20 p.m. UTC | #2
Op 02-07-15 om 04:26 schreef Matt Roper:
> From: Matt Roper <matt@mattrope.com>
>
> In addition to calculating final watermarks, let's also pre-calculate a
> set of intermediate watermark values at atomic check time.  These
> intermediate watermarks are a combination of the watermarks for the old
> state and the new state; they should satisfy the requirements of both
> states which means they can be programmed immediately when we commit the
> atomic state (without waiting for a vblank).  Once the vblank does
> happen, we can then re-program watermarks to the more optimal final
> value.
>
> v2: Significant rebasing/rewriting.
>
> Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h      |  9 +++++
>  drivers/gpu/drm/i915/i915_irq.c      |  7 ++++
>  drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++-
>  drivers/gpu/drm/i915/intel_drv.h     | 26 +++++++++----
>  drivers/gpu/drm/i915/intel_pm.c      | 75 ++++++++++++++++++++++++++++++------
>  5 files changed, 130 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5ad942e..42397e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -623,6 +623,9 @@ struct drm_i915_display_funcs {
>  			  struct dpll *best_clock);
>  	int (*compute_pipe_wm)(struct drm_crtc *crtc,
>  			       struct drm_atomic_state *state);
> +	void (*compute_intermediate_wm)(struct drm_device *dev,
> +					struct intel_crtc_state *newstate,
> +					const struct intel_crtc_state *oldstate);
>
If this is can't fail anyway could we please do this at runtime instead of precalculating?

Something like this:

commit_intermediate_wm
vblank_evade
plane_update
vblank_end
(wait for vblank)
commit_final_wm

Of course for skylake just hammer in the final wm during vblank evasion. :-)
Daniel Vetter July 6, 2015, 12:26 p.m. UTC | #3
On Mon, Jul 06, 2015 at 02:20:00PM +0200, Maarten Lankhorst wrote:
> Op 02-07-15 om 04:26 schreef Matt Roper:
> > From: Matt Roper <matt@mattrope.com>
> >
> > In addition to calculating final watermarks, let's also pre-calculate a
> > set of intermediate watermark values at atomic check time.  These
> > intermediate watermarks are a combination of the watermarks for the old
> > state and the new state; they should satisfy the requirements of both
> > states which means they can be programmed immediately when we commit the
> > atomic state (without waiting for a vblank).  Once the vblank does
> > happen, we can then re-program watermarks to the more optimal final
> > value.
> >
> > v2: Significant rebasing/rewriting.
> >
> > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h      |  9 +++++
> >  drivers/gpu/drm/i915/i915_irq.c      |  7 ++++
> >  drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++-
> >  drivers/gpu/drm/i915/intel_drv.h     | 26 +++++++++----
> >  drivers/gpu/drm/i915/intel_pm.c      | 75 ++++++++++++++++++++++++++++++------
> >  5 files changed, 130 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 5ad942e..42397e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -623,6 +623,9 @@ struct drm_i915_display_funcs {
> >  			  struct dpll *best_clock);
> >  	int (*compute_pipe_wm)(struct drm_crtc *crtc,
> >  			       struct drm_atomic_state *state);
> > +	void (*compute_intermediate_wm)(struct drm_device *dev,
> > +					struct intel_crtc_state *newstate,
> > +					const struct intel_crtc_state *oldstate);
> >
> If this is can't fail anyway could we please do this at runtime instead of precalculating?

Hm, this is possible to fail, we might not be able to get valid
intermediate wms. And therefore it needs to be in the compute phase.
-Daniel

> 
> Something like this:
> 
> commit_intermediate_wm
> vblank_evade
> plane_update
> vblank_end
> (wait for vblank)
> commit_final_wm
> 
> Of course for skylake just hammer in the final wm during vblank evasion. :-)
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5ad942e..42397e2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -623,6 +623,9 @@  struct drm_i915_display_funcs {
 			  struct dpll *best_clock);
 	int (*compute_pipe_wm)(struct drm_crtc *crtc,
 			       struct drm_atomic_state *state);
+	void (*compute_intermediate_wm)(struct drm_device *dev,
+					struct intel_crtc_state *newstate,
+					const struct intel_crtc_state *oldstate);
 	void (*update_wm)(struct drm_crtc *crtc);
 	void (*update_sprite_wm)(struct drm_plane *plane,
 				 struct drm_crtc *crtc,
@@ -2574,6 +2577,12 @@  struct drm_i915_cmd_table {
  */
 #define HAS_ATOMIC_WM(dev_priv) (dev_priv->display.program_watermarks != NULL)
 
+/*
+ * Newer platforms have doublebuffered watermark registers and do not need
+ * the two-step watermark programming used by older platforms.
+ */
+#define HAS_DBLBUF_WM(dev_priv) (INTEL_INFO(dev_priv)->gen >= 9)
+
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 20c7260..627c56f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1455,8 +1455,15 @@  static bool intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
 
 	if (intel_crtc->need_vblank_wm_update) {
+		WARN_ON(HAS_DBLBUF_WM(dev_priv));
+
+		/* Latch final watermarks now that vblank is past */
+		cstate->wm.active = cstate->wm.target;
+
+		/* Queue work to actually program them asynchronously */
 		queue_work(dev_priv->wq, &intel_crtc->wm_work);
 		intel_crtc->need_vblank_wm_update = false;
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fa4373e..1616d7f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4737,7 +4737,7 @@  static void intel_post_plane_update(struct intel_crtc *crtc)
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_plane *plane;
 
-	if (HAS_ATOMIC_WM(to_i915(dev)))
+	if (HAS_ATOMIC_WM(to_i915(dev)) && !HAS_DBLBUF_WM(to_i915(dev)))
 		/* vblank handler will kick off workqueue task to update wm's */
 		crtc->need_vblank_wm_update = true;
 
@@ -11833,6 +11833,8 @@  static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_crtc_state *pipe_config =
 		to_intel_crtc_state(crtc_state);
+	struct intel_crtc_state *old_pipe_config =
+		to_intel_crtc_state(crtc->state);
 	struct drm_atomic_state *state = crtc_state->state;
 	int ret, idx = crtc->base.id;
 	bool mode_changed = needs_modeset(crtc_state);
@@ -11863,9 +11865,20 @@  static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 	}
 
 	if (dev_priv->display.compute_pipe_wm) {
+		if (WARN_ON(!dev_priv->display.compute_intermediate_wm))
+			return 0;
+
 		ret = dev_priv->display.compute_pipe_wm(crtc, state);
 		if (ret)
 			return ret;
+
+		/*
+		 * Calculate 'intermediate' watermarks that satisfy both the old state
+		 * and the new state.  We can program these immediately.
+		 */
+		dev_priv->display.compute_intermediate_wm(crtc->dev,
+							  pipe_config,
+							  old_pipe_config);
 	}
 
 	return intel_atomic_setup_scalers(dev, intel_crtc, pipe_config);
@@ -13789,8 +13802,25 @@  static void intel_begin_crtc_commit(struct drm_crtc *crtc)
 	if (!needs_modeset(crtc->state))
 		intel_pre_plane_update(intel_crtc);
 
-	if (intel_crtc->atomic.update_wm_pre)
+	/*
+	 * For platforms that support atomic watermarks, program the 'pending'
+	 * watermarks immediately.  On pre-gen9 platforms, these will be the
+	 * intermediate values that are safe for both pre- and post- vblank;
+	 * when vblank happens, the 'pending' values will be set to the final
+	 * 'target' values and we'll do this again to get the optimal
+	 * watermarks.  For gen9+ platforms, the values we program here will be
+	 * the final target values which will get automatically latched at
+	 * vblank time; no further programming will be necessary.
+	 *
+	 * If a platform hasn't been transitioned to atomic watermarks yet,
+	 * we'll continue to update watermarks the old way, if flags tell
+	 * us to.
+	 */
+	if (HAS_ATOMIC_WM(dev_priv)) {
+		dev_priv->display.program_watermarks(dev_priv);
+	} else if (intel_crtc->atomic.update_wm_pre) {
 		intel_update_watermarks(crtc);
+	}
 
 	intel_runtime_pm_get(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 775e3d0..7bc4e81 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -343,6 +343,11 @@  struct skl_pipe_wm {
 	uint32_t linetime;
 };
 
+union pipe_wm {
+	struct intel_pipe_wm ilk;
+	struct skl_pipe_wm skl;
+};
+
 struct intel_crtc_state {
 	struct drm_crtc_state base;
 
@@ -481,16 +486,21 @@  struct intel_crtc_state {
 
 	struct {
 		/*
-		 * final watermarks, programmed post-vblank when this state
-		 * is committed
+		 * final, target watermarks for state; on pre-gen9 platforms,
+		 * this might not have been programmed yet if a vblank hasn't
+		 * happened since this state was committed
+		 */
+		union pipe_wm target;
+
+		/*
+		 * currently programmed watermark; on pre-gen9, this will be
+		 * the intermediate values before vblank then switch to match
+		 * 'target' after vblank fires)
 		 */
-		union {
-			struct intel_pipe_wm ilk;
-			struct skl_pipe_wm skl;
-		} active;
+		union pipe_wm active;
 
-		/* allow CxSR on this pipe */
-		bool cxsr_allowed;
+               /* allow CxSR on this pipe */
+               bool cxsr_allowed;
 	} wm;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c6e735f..7942bb0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2338,6 +2338,24 @@  static void ilk_compute_wm_config(struct drm_device *dev,
 	}
 }
 
+static bool ilk_validate_pipe_wm(struct drm_device *dev,
+				 struct intel_wm_config *config,
+				 struct intel_pipe_wm *pipe_wm)
+{
+	struct ilk_wm_maximums max;
+
+	/* LP0 watermarks always use 1/2 DDB partitioning */
+	ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, &max);
+
+	/* At least LP0 must be valid */
+	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
+		DRM_DEBUG_KMS("LP0 watermark invalid\n");
+		return false;
+	}
+
+	return true;
+}
+
 /* Compute new watermarks for the pipe */
 static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
 			       struct drm_atomic_state *state)
@@ -2366,7 +2384,7 @@  static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
 	else
 		cstate = to_intel_crtc_state(cs);
 
-	pipe_wm = &cstate->wm.active.ilk;
+	pipe_wm = &cstate->wm.target.ilk;
 
 	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
 		ps = drm_atomic_get_plane_state(state,
@@ -2405,12 +2423,8 @@  static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
 
-	/* LP0 watermarks always use 1/2 DDB partitioning */
-	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
-
-	/* At least LP0 must be valid */
-	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
-		return -EINVAL;
+	if (!ilk_validate_pipe_wm(dev, &config, pipe_wm))
+		return false;
 
 	ilk_compute_wm_reg_maximums(dev, 1, &max);
 
@@ -2435,6 +2449,40 @@  static int ilk_compute_pipe_wm(struct drm_crtc *crtc,
 }
 
 /*
+ * Build a set of 'intermediate' watermark values that satisfy both the old
+ * state and the new state.  These can be programmed to the hardware
+ * immediately.
+ */
+void ilk_compute_intermediate_wm(struct drm_device *dev,
+				 struct intel_crtc_state *newstate,
+				 const struct intel_crtc_state *oldstate)
+{
+	struct intel_pipe_wm *a = &newstate->wm.active.ilk;
+	const struct intel_pipe_wm *b = &oldstate->wm.active.ilk;
+	int level, max_level = ilk_wm_max_level(dev);
+
+	/*
+	 * Start with the final, target watermarks, then combine with the
+	 * current state's watermarks.
+	 */
+	*a = newstate->wm.target.ilk;
+	a->pipe_enabled |= b->pipe_enabled;
+	a->sprites_enabled |= b->sprites_enabled;
+	a->sprites_scaled |= b->sprites_scaled;
+
+	for (level = 0; level <= max_level; level++) {
+		struct intel_wm_level *a_wm = &a->wm[level];
+		const struct intel_wm_level *b_wm = &b->wm[level];
+
+		a_wm->enable &= b_wm->enable;
+		a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
+		a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
+		a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
+		a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
+	}
+}
+
+/*
  * Merge the watermarks from all active pipes for a specific level.
  */
 static void ilk_merge_wm_level(struct drm_device *dev,
@@ -3601,10 +3649,10 @@  static bool skl_update_pipe_wm(struct drm_crtc *crtc,
 	skl_allocate_pipe_ddb(crtc, config, params, ddb);
 	skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
 
-	if (!memcmp(&cstate->wm.active.skl, pipe_wm, sizeof(*pipe_wm)))
+	if (!memcmp(&cstate->wm.target.skl, pipe_wm, sizeof(*pipe_wm)))
 		return false;
 
-	cstate->wm.active.skl = *pipe_wm;
+	cstate->wm.target.skl = *pipe_wm;
 
 	return true;
 }
@@ -3825,7 +3873,7 @@  static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
 	struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
-	struct skl_pipe_wm *active = &cstate->wm.active.skl;
+	struct skl_pipe_wm *active = &cstate->wm.target.skl;
 	enum pipe pipe = intel_crtc->pipe;
 	int level, i, max_level;
 	uint32_t temp;
@@ -3889,7 +3937,7 @@  static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
-	struct intel_pipe_wm *active = &cstate->wm.active.ilk;
+	struct intel_pipe_wm *active = &cstate->wm.target.ilk;
 	enum pipe pipe = intel_crtc->pipe;
 	static const unsigned int wm0_pipe_reg[] = {
 		[PIPE_A] = WM0_PIPEA_ILK,
@@ -3928,6 +3976,8 @@  static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
 		for (level = 0; level <= max_level; level++)
 			active->wm[level].enable = true;
 	}
+
+	cstate->wm.active.ilk = cstate->wm.target.ilk = *active;
 }
 
 #define _FW_WM(value, plane) \
@@ -7048,6 +7098,9 @@  void intel_init_pm(struct drm_device *dev)
 		     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
 			dev_priv->display.update_wm = ilk_update_wm;
 			dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
+			dev_priv->display.compute_intermediate_wm =
+				ilk_compute_intermediate_wm;
+			dev_priv->display.program_watermarks = ilk_program_watermarks;
 		} else {
 			DRM_DEBUG_KMS("Failed to read display plane latency. "
 				      "Disable CxSR\n");